WebSVN - LCARS - Blame - Rev 47 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
23	PointedEar	4	require 5.004;
22	PointedEar	5
		6	#use diagnostics;
6	PointedEar	7	use utf8;
		8
30	PointedEar	9	## NOTE:
		10	## Enable and remove binmode when utf8::all has actually become lexically scoped
		11	# use utf8:all;
		12
22	PointedEar	13	use constant DEBUG => 0;
		14
24	PointedEar	15	## newsstat.pl
		16	## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
		17	## Based on work by Garry Knight et al.
31	PointedEar	18	##
24	PointedEar	19	## This program is free software: you can redistribute it and/or modify
		20	## it under the terms of the GNU General Public License as published by
		21	## the Free Software Foundation, either version 3 of the License, or
		22	## (at your option) any later version.
31	PointedEar	23	##
24	PointedEar	24	## This program is distributed in the hope that it will be useful,
		25	## but WITHOUT ANY WARRANTY; without even the implied warranty of
		26	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		27	## GNU General Public License for more details.
31	PointedEar	28	##
24	PointedEar	29	## You should have received a copy of the GNU General Public License
		30	## along with this program. If not, see <http://www.gnu.org/licenses/>.
		31
13	PointedEar	32	## Print out all text to STDOUT UTF-8 encoded
		33	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	34	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	35
23	PointedEar	36	## L10n
		37	use locale ':not_characters';
		38
		39	# setlocale( LC_MESSAGES, '' );
		40	require Number::Format;
		41
		42	## i18n
		43	## FIXME: Automatically include resolved '.' in @INC
22	PointedEar	44	# print join "\n", @INC;
5	PointedEar	45
22	PointedEar	46	use Locale::TextDomain ('de.pointedears.newsstat');
23	PointedEar	47	use POSIX ('locale_h');
22	PointedEar	48	use Locale::Messages qw (bind_textdomain_filter
23	PointedEar	49	bind_textdomain_codeset
		50	turn_utf_8_on);
		51
		52	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
22	PointedEar	53	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23	PointedEar	54
22	PointedEar	55	require Mail::Message;
		56	require DateTime;
		57	require DateTime::Format::Mail;
23	PointedEar	58
		59	# See comments in previous example
		60	my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
		61	@{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
		62	'decimal_point' };
		63
		64	# Apply defaults if values are missing
		65	$thousands_sep = $mon_thousands_sep unless $thousands_sep;
		66	$thousands_sep = ' ' unless $thousands_sep;
		67
		68	# grouping and mon_grouping are packed lists
		69	# of small integers (characters) telling the
		70	# grouping (thousand_seps and mon_thousand_seps
		71	# being the group dividers) of numbers and
		72	# monetary quantities. The integers' meanings:
		73	# 255 means no more grouping, 0 means repeat
		74	# the previous grouping, 1-254 means use that
		75	# as the current grouping. Grouping goes from
		76	# right to left (low to high digits). In the
		77	# below we cheat slightly by never using anything
		78	# else than the first grouping (whatever that is).
		79	my @grouping;
		80	if ($grouping)
		81	{
		82	@grouping = unpack( "C*", $grouping );
		83	}
		84	else
		85	{
		86	@grouping = (3);
		87	}
		88
		89	## FIXME: Why don't the defaults work already?
		90	my $formatter = new Number::Format(
		91	-decimal_point => $decimal_point,
		92	-thousands_sep => $thousands_sep,
		93
		94	# -grouping => $grouping[0]
		95	);
		96
5	PointedEar	97	###################### USER CONFIGURATIONS ############################
		98
13	PointedEar	99	## The name of the group to do stats for
5	PointedEar	100	my $newsgroup_name = $ARGV[0];
31	PointedEar	101	$newsgroup_name // usage();
5	PointedEar	102
13	PointedEar	103	## Check for removal flags
5	PointedEar	104	my $ix;
		105	my $j;
		106	my %skipSec;
		107	my @skiplist;
		108	my $args = @ARGV;
7	PointedEar	109	for ( $ix = 1 ; $ix < $args ; $ix++ )
		110	{
		111	$j = $ix + 1;
		112	if ( $ARGV[$ix] eq "-x" )
		113	{
		114	@skiplist = split( ",", $ARGV[$j] );
		115	}
		116	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		117	{
		118	@skiplist = split( ",", $1 );
		119	}
5	PointedEar	120	}
7	PointedEar	121	foreach (@skiplist)
		122	{
5	PointedEar	123	$skipSec{$_} = 1;
		124	}
		125
13	PointedEar	126	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	127	my $news = "/var/spool/news/";
		128
22	PointedEar	129	## Number of top or bottom posters to show
		130	my $topposters = 20;
5	PointedEar	131
13	PointedEar	132	## Number of threads we want to know about
5	PointedEar	133	my $topthreads = 20;
		134
13	PointedEar	135	## Number of cross-posted threads to show
5	PointedEar	136	my $topcrossposts = 10;
		137
22	PointedEar	138	## Number of agents we list
		139	my $topagents = 10;
		140
13	PointedEar	141	## Number of time zones to show
5	PointedEar	142	my $toptz = 10;
		143
		144	###################### DATA STRUCTURES ######################
		145	my $group = $newsgroup_name;
		146	$group =~ s!\.!/!g;
22	PointedEar	147	my %data; # name, count, agent, total, orig, quoted
		148	my $totsize = 0; # holds total sizes of all files
		149	my %crossposts; # group, count
		150	my %threads; # subject, count
		151	my $replies = 0; # total no. of replies
		152	my $origposts = 0; # total no. of original posts
		153	my %tz; # timezones by count
		154	my $earliest; # earliest article we have found
		155	my $latest; # latest article we have found
		156	my $totheader = 0; # total size of header material
		157	my $totbody = 0; # total size of body material
		158	my $totsig = 0; # total size of sig material
		159	my $totquoted = 0; # total size of quoted material
		160	my $totorig = 0; # total size of original material
		161	my $totalposts; # total no. of posts considered
5	PointedEar	162	my %distinct_agent;
13	PointedEar	163
		164	## Used to hold counts of User Agents used
		165	my %agents = (
		166	"Compuserver" => 0,
		167	"Foorum" => 0,
7	PointedEar	168	"Forte Agent" => 0,
		169	"Forte Free Agent" => 0,
13	PointedEar	170	"Gnus" => 0,
		171	"KNode" => 0,
		172	"MacSOUP" => 0,
		173	"MT-NewsWatcher" => 0,
7	PointedEar	174	"MicroPlanet Gravity" => 0,
		175	"Microsoft Outlook Express" => 0,
13	PointedEar	176	"Microsoft Windows Mail" => 0,
		177	"Mozilla" => 0,
		178	"News Rover" => 0,
		179	"NN" => 0,
		180	"Pan" => 0,
		181	"rn" => 0,
7	PointedEar	182	"slrn" => 0,
13	PointedEar	183	"Sylpheed" => 0,
7	PointedEar	184	"tin" => 0,
13	PointedEar	185	"VSoup" => 0,
7	PointedEar	186	"WebTV" => 0,
22	PointedEar	187	"Xnews" => 0,
13	PointedEar	188	);
5	PointedEar	189
22	PointedEar	190	my $datetime_parser = DateTime::Format::Mail->new();
		191	$datetime_parser->loose();
5	PointedEar	192
22	PointedEar	193	my $today = DateTime->today( time_zone => 'UTC' );
		194	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		195	my $start = int $prev_month->strftime('%s');
		196	my $numdays = int DateTime->last_day_of_month(
		197	year => $prev_month->year(),
		198	month => $prev_month->month(),
		199	time_zone => $prev_month->time_zone(),
		200	)->day();
		201	my $end = int $today->clone()->set_day(1)->strftime('%s');
		202
		203	dmsg( $start, " to ", $end ) if DEBUG;
		204
		205	chdir("$news$group")
		206	or die __x(
		207	"Can't cd to {newsgroup}: {error}\n",
		208	newsgroup => "$news$group",
		209	error => $!
		210	);
		211	opendir( DIR, "." )
		212	or die __x(
		213	"Can't open {newsgroup}: {error}\n",
		214	newsgroup => "$news$group",
		215	error => $!
		216	);
		217
		218	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	219	{
22	PointedEar	220	next unless -f $filename; # only want real files
		221	next if ( $filename eq ".overview" ); # real articles only
		222
		223	get_article($filename); # read in the article
5	PointedEar	224	}
22	PointedEar	225	closedir(DIR); # finished with the directory
7	PointedEar	226
22	PointedEar	227	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		228
13	PointedEar	229	## Post-processing
22	PointedEar	230	count_agents(); # count agents, collapsing versions
		231	fix_percent();
5	PointedEar	232
22	PointedEar	233	write_data();
		234	display_results();
5	PointedEar	235
13	PointedEar	236	########################################
22	PointedEar	237	## Get current article's header and body
13	PointedEar	238	########################################
22	PointedEar	239	sub get_article
7	PointedEar	240	{
22	PointedEar	241	my $filename = shift;
5	PointedEar	242
22	PointedEar	243	open( my $FILE, '<', $filename )
		244	or
		245	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		246	my $msg = Mail::Message->read($FILE);
		247	my $timestamp = $msg->timestamp();
		248	my $date = $msg->study('Date');
5	PointedEar	249
22	PointedEar	250	## Disregard article if timestamp is not in range
		251	dmsg($timestamp) if DEBUG;
31	PointedEar	252	if ( $timestamp < $start \|\| $timestamp >= $end )
7	PointedEar	253	{
22	PointedEar	254	dmsg("Posting on $date ignored.") if DEBUG;
		255	return;
7	PointedEar	256	}
5	PointedEar	257
22	PointedEar	258	$totalposts++; # bump count of articles considered
5	PointedEar	259
22	PointedEar	260	## DEBUG
		261	dmsg($date) if DEBUG;
13	PointedEar	262
22	PointedEar	263	## get stats about the file itself
		264	my $filesize = -s $filename; # get total size of file
		265	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	266
31	PointedEar	267	if ( ( not defined $earliest ) \|\| $timestamp < $earliest )
7	PointedEar	268	{
22	PointedEar	269	$earliest = $timestamp;
7	PointedEar	270	}
31	PointedEar	271	elsif ( ( not defined $latest ) \|\| $timestamp > $latest )
7	PointedEar	272	{
22	PointedEar	273	$latest = $timestamp;
7	PointedEar	274	}
13	PointedEar	275
22	PointedEar	276	#print "timestamp: $timestamp\n";
5	PointedEar	277
22	PointedEar	278	## count header size
		279	$totheader += $msg->head()->size();
5	PointedEar	280
22	PointedEar	281	## get the poster's name (MIME-decoded, in UTF-8)
		282	my $poster = $msg->study('From');
		283	if ( defined $poster )
7	PointedEar	284	{
22	PointedEar	285	## Convert old to new format
		286	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	287
22	PointedEar	288	## Collapse whitespace
		289	$poster =~ s/\s+/ /g;
5	PointedEar	290
22	PointedEar	291	## Remove outer quotes; TODO: observe RFC 5322 strictly
		292	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	293
22	PointedEar	294	## DEBUG
		295	dmsg($poster) if DEBUG;
7	PointedEar	296
22	PointedEar	297	## seen this one before?
		298	if ( !defined( $data{$poster} ) )
		299	{
		300	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		301	$data{$poster}{'orig'} = 0;
		302	$data{$poster}{'quoted'} = 0;
		303	}
		304	$data{$poster}{'count'}++; # bump count for this poster
		305	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	306
22	PointedEar	307	## The User-Agent and/or X-Newsreader fields
		308	## for User-Agent by poster
31	PointedEar	309	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
22	PointedEar	310	if ( defined $ua )
7	PointedEar	311	{
22	PointedEar	312	$data{$poster}{'agent'} = $ua;
		313
		314	## DEBUG
		315	dmsg($ua) if DEBUG;
7	PointedEar	316	}
5	PointedEar	317
22	PointedEar	318	## The User Agent for User-Agent by number of posts
		319	get_agent($msg);
5	PointedEar	320
22	PointedEar	321	## Get all cross-posted newsgroups
		322	for ( split( /,/, $msg->study('Newsgroups') ) )
		323	{
		324	$crossposts{$_}++; # bump count for each
		325	}
5	PointedEar	326
22	PointedEar	327	## Get threads
		328	my $thread = $msg->study('Subject');
33	PointedEar	329	$thread =~ s/^\sre:\s//i; # Remove Re: or re: at the start
		330	$thread =~ s/\s$was:\s.$\s$//i; # Remove (was: ...) at the end
22	PointedEar	331	$thread =~ s/\s+/ /g; # collapse whitespace
		332	$threads{$thread}{'count'}++; # bump count of this subject
		333	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	334
22	PointedEar	335	## Is this an original post or a reply?
		336	if ( defined $msg->study('References') )
7	PointedEar	337	{
22	PointedEar	338	$replies++;
7	PointedEar	339	}
22	PointedEar	340	else
7	PointedEar	341	{
22	PointedEar	342	$origposts++;
7	PointedEar	343	}
22	PointedEar	344
		345	## Get the time zone
		346	my $datetime = $datetime_parser->parse_datetime($date);
		347	my $tz = $datetime->strftime('%z');
		348	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		349	$tz{$tz}++;
		350
		351	## DEBUG
		352	dmsg($tz) if DEBUG;
		353
		354	#### Now analyse the body text ####
		355	my $body = $msg->body();
		356
		357	my $insig = 0;
		358	my @body = $body->lines;
		359	for (@body)
7	PointedEar	360	{
22	PointedEar	361	$totbody += length($_); # bump total body size
		362	next if (m{^$>}o); # don't count blank lines in body
		363	if ( $insig == 1 )
		364	{
		365
		366	# bump total sig size
		367	$totsig += length($_);
		368	}
		369	## are we in a quote line?
		370	## Bill Unruh uses ] quotes, and another poster uses ::
31	PointedEar	371	elsif ( m{^\s[>\]]}o \|\| m{^\s::}o )
22	PointedEar	372	{
		373	## bump count of quoted chrs
		374	$data{$poster}{'quoted'} += length($_);
		375	$totquoted += length($_);
		376	}
		377	elsif (/^-- $/)
		378	{
		379	$insig = 1;
		380	}
		381	else
		382	{
		383	## We must be processing an original line
		384	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		385	$totorig += length($_);
		386	}
7	PointedEar	387	}
22	PointedEar	388
		389	# end for (@body)
		390	}
		391
		392	close($FILE);
		393	}
		394
		395	sub get_agent
		396	{
		397	my $msg = shift;
		398
31	PointedEar	399	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
		400	// $msg->study('X-Mailer');
		401
22	PointedEar	402	if ( not defined $ua )
		403	{
		404	my $org = $msg->study('Organization');
		405	if ( defined $org
		406	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	407	{
22	PointedEar	408	$ua = $org;
7	PointedEar	409	}
22	PointedEar	410	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	411	{
22	PointedEar	412	$ua = "Pine";
		413	}
7	PointedEar	414	}
5	PointedEar	415
22	PointedEar	416	## Hopefully found UA, else set to unknown
		417	if ( not defined $ua )
		418	{
		419	$ua = __ "unknown";
		420	}
5	PointedEar	421
22	PointedEar	422	$ua = clean($ua);
		423
		424	my $raw = $ua;
		425	my $agent = $raw;
		426
		427	## strip http
		428	if ( $raw =~ /.http./ )
7	PointedEar	429	{
22	PointedEar	430	$raw =~ s!posted via!!i;
		431	$raw =~ s!http://!!g;
		432	$raw =~ s!/!!g;
		433	$raw =~ s! !!g;
		434	}
5	PointedEar	435
22	PointedEar	436	## Fix Outlook from Mac
		437	if ( $raw =~ /^microsoft/i )
		438	{
		439	$raw =~ s/-/ /g;
		440	}
5	PointedEar	441
22	PointedEar	442	## Pick out the popular agents
		443	if ( $raw =~ /(outlook express)/i
		444	\|\| $raw =~ /(windows mail)/i
		445	\|\| $raw =~ /(microplanet gravity)/i
		446	\|\| $raw =~ /(news rover)/i
		447	\|\| $raw =~ /(forte agent)/i
		448	\|\| $raw =~ /(forte free agent)/i )
		449	{
		450	$agent = $1;
		451	}
		452	elsif (
		453	$raw =~ /^(
5	PointedEar	454	pan
		455	\|sylpheed
		456	\|slrn
		457	\|mozilla
		458	\|knode
		459	\|tin
		460	\|hamster
		461	\|xrn
		462	\|xnews
		463	\|aol
		464	\|gnus
		465	\|krn
		466	\|macsoup
		467	\|messenger
		468	\|openxp
		469	\|pine
		470	\|thoth
		471	\|turnpike
		472	\|winvn
		473	\|vsoup
		474	\|google
		475	\|supernews
		476	\|nn
		477	\|rn
		478	\|007
		479	\|webtv
		480	\|compuserve
7	PointedEar	481	)/ix
22	PointedEar	482	)
		483	{
		484	$agent = $1;
		485	}
		486	else
		487	{
		488	## Clean up unknown agents
		489	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	490	{
		491	$agent = $1;
		492	}
22	PointedEar	493	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	494	{
22	PointedEar	495	$agent = $1;
		496	}
		497	}
		498
		499	$distinct_agent{$agent}++;
		500	return $agent;
		501	}
		502	## get_agent
		503
		504	#########################################
		505	## Count the User-Agents used, collapsing
		506	## different versions into one per agent.
		507	#########################################
		508	sub count_agents
		509	{
		510	POSTER:
		511	foreach my $poster ( keys %data )
		512	{
		513	foreach my $agent_name ( keys %distinct_agent )
		514	{ # check against known ones
		515	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	516	{
22	PointedEar	517	$agents{$agent_name}++;
		518	next POSTER;
7	PointedEar	519	}
		520	}
22	PointedEar	521	$agents{ $data{$poster}{'agent'} }++;
		522	}
		523	} # count_agents
7	PointedEar	524
22	PointedEar	525	#############################################
		526	## Set orig/total percentages for all posters
		527	#############################################
		528	sub fix_percent
		529	{
		530	foreach my $poster ( keys %data )
		531	{
		532	my $percent = 100;
		533	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		534	{
		535	$percent =
		536	$data{$poster}{'orig'} * 100 /
		537	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		538	}
		539	elsif ( $data{$poster}{'orig'} == 0 )
		540	{
		541	$percent = 0;
		542	}
		543	$data{$poster}{'percent'} = $percent;
5	PointedEar	544	}
22	PointedEar	545	}
		546	## fix_percent
5	PointedEar	547
22	PointedEar	548	##################################
		549	## Write data structures to a file
		550	##################################
		551	sub write_data
		552	{
		553	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		554	or die __x( "Can't create XDATA: {error}\n", error => $! );
		555	print $OUTF "Data collected from $newsgroup_name\n\n";
		556	print $OUTF
		557	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		558	foreach my $name ( keys %data )
7	PointedEar	559	{
22	PointedEar	560	print $OUTF
		561	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	562	}
22	PointedEar	563	print $OUTF
		564	"============================================================================\n";
		565	print $OUTF "Thread subjects\n";
		566	print $OUTF
		567	"----------------------------------------------------------------------------\n";
		568	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		569	{
		570	print $OUTF
		571	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		572	}
		573	print $OUTF
		574	"============================================================================\n";
		575	print $OUTF "Cross-posts\n";
		576	print $OUTF
		577	"----------------------------------------------------------------------------\n";
		578	foreach my $name ( sort keys %crossposts )
		579	{
		580	print $OUTF "$name : $crossposts{$name}\n";
		581	}
		582	print $OUTF
		583	"============================================================================\n";
		584	print $OUTF "User agents\n";
		585	print $OUTF
		586	"----------------------------------------------------------------------------\n";
		587	foreach my $name ( sort keys %agents )
		588	{
		589	print $OUTF "$name : $agents{$name}\n";
		590	}
		591	print $OUTF
		592	"============================================================================\n";
		593	print $OUTF "Time zones\n";
		594	print $OUTF
		595	"----------------------------------------------------------------------------\n";
		596	foreach my $name ( sort keys %tz )
		597	{
		598	print $OUTF "$name : $tz{$name}\n";
		599	}
		600	close $OUTF;
		601	} # write_data
5	PointedEar	602
22	PointedEar	603	sub display_results
		604	{
		605	#################### DISPLAY RESULTS #####################
45	PointedEar	606	print "=" x 76, "\n";
22	PointedEar	607	printf "%s\n",
		608	centred(
		609	__x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
		610	76 );
45	PointedEar	611	print "=" x 76, "\n";
22	PointedEar	612	printf "%s\n",
		613	centred(
23	PointedEar	614	__(
		615	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
		616	),
22	PointedEar	617	76
		618	);
		619	print "\n\n";
23	PointedEar	620	printf __"Total posts considered: %s over %d days\n",
		621	$formatter->format_number($totalposts),
		622	$formatter->format_number($numdays);
		623	my $time_locale = setlocale(LC_TIME);
22	PointedEar	624	my $earliest_datetime = DateTime->from_epoch(
23	PointedEar	625	epoch => $earliest,
		626	locale => $time_locale,
22	PointedEar	627	time_zone => 'UTC',
		628	);
		629	my $latest_datetime = DateTime->from_epoch(
23	PointedEar	630	epoch => $latest,
		631	locale => $time_locale,
22	PointedEar	632	time_zone => 'UTC',
		633	);
		634	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23	PointedEar	635	printf __"Earliest article: %s\n",
		636	$earliest_datetime->strftime($datetime_format);
		637	printf __"Latest article: %s\n",
		638	$latest_datetime->strftime($datetime_format);
		639	printf __"Original articles: %s; replies: %s\n",
		640	$formatter->format_number($origposts),
		641	$formatter->format_number($replies);
		642	printf __"Total size of posts: %s bytes (%s)" . "\n",
		643	$formatter->format_number($totsize),
		644	$formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
		645	printf __"Average %s articles per day, %s per day, %s bytes per article\n",
		646	$formatter->format_number( int( $totalposts / $numdays ) ),
		647	$formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
		648	$formatter->format_number( int( $totsize / $totalposts ) );
		649
22	PointedEar	650	my $count = keys %data;
23	PointedEar	651	printf __"Total headers: %s; bodies: %s\n",
		652	$formatter->format_bytes(
		653	$totheader, ( 'precision' => 1, 'mode' => 'iec' )
		654	),
		655	$formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	656	printf __
23	PointedEar	657	"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
		658	$formatter->format_bytes(
		659	$totquoted, ( 'precision' => 1, 'mode' => 'iec' )
		660	),
		661	$formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
		662	$formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
		663	$formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
		664	printf __"Total number of posters: %s, average %s per poster\n",
		665	$formatter->format_number($count),
		666	$formatter->format_bytes( $totsize / $count,
		667	( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	668	$count = keys %threads;
23	PointedEar	669	printf __"Total number of threads: %s, average %s per thread\n",
		670	$formatter->format_number($count),
		671	$formatter->format_bytes( $totsize / $count,
		672	( 'precision' => 1, 'mode' => 'iec' ) );
		673	printf __"Total number of user agents: %d\n",
		674	$formatter->format_number( scalar keys %agents );
31	PointedEar	675	print "\n", "=" x 76, "\n";
22	PointedEar	676	########################################
		677	## Show posters by article count Sec 1;
		678	########################################
		679	unless ( $skipSec{1} )
7	PointedEar	680	{
22	PointedEar	681	if ( keys %data < $topposters )
		682	{
		683	$count = keys %data;
		684	}
		685	else
		686	{
		687	$count = $topposters;
		688	}
		689	printf "%s\n",
		690	centred(
23	PointedEar	691	__x( "Top {count} posters by number of articles", count => $topposters ),
		692	76
		693	);
22	PointedEar	694	print "=" x 76, "\n";
		695	my $i = 0;
		696	foreach
		697	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		698	{
		699	my $name = substr( $poster, 0, 65 );
25	PointedEar	700	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	701	$data{$poster}{count};
		702	last if ( ++$i == $count );
		703	}
		704	print "\n", "=" x 76, "\n";
7	PointedEar	705	}
22	PointedEar	706
		707	######################################
		708	## Show posters by size in KiB Sec 2;
		709	######################################
		710	unless ( $skipSec{2} )
7	PointedEar	711	{
22	PointedEar	712	if ( keys %data < $topposters )
		713	{
		714	$count = keys %data;
		715	}
		716	else
		717	{
		718	$count = $topposters;
		719	}
		720	printf "%s\n",
		721	centred(
		722	__x( "Top {count} posters by article size in KiB", count => $topposters ),
23	PointedEar	723	76
		724	);
22	PointedEar	725	print "=" x 76, "\n";
		726	my $i = 0;
		727	foreach
		728	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		729	{
		730	my $name = substr( $poster, 0, 62 );
25	PointedEar	731	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	732	$data{$poster}{size} / 1024; #/
		733	last if ( ++$i == $count );
		734	}
		735	print "\n", "=" x 76, "\n";
7	PointedEar	736	}
5	PointedEar	737
22	PointedEar	738	#####################################
		739	## Show top posters for original text
		740	#####################################
31	PointedEar	741	my $topposters_real = 0;
		742
22	PointedEar	743	unless ( $skipSec{3} )
7	PointedEar	744	{
22	PointedEar	745	if ( keys %data < $topposters )
		746	{
		747	$count = keys %data;
		748	}
		749	else
		750	{
		751	$count = $topposters;
		752	}
31	PointedEar	753
22	PointedEar	754	printf "%s\n",
		755	centred(
		756	__x(
		757	"Top {count} responders by original text (> 5 posts)",
		758	count => $topposters
		759	),
		760	76
		761	);
		762	print "=" x 76, "\n";
		763	foreach my $poster (
		764	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		765	keys %data
		766	)
		767	{
		768	next if $data{$poster}{quoted} == 0;
		769	next if $data{$poster}{count} < 5;
		770	my $name = substr( $poster, 0, 63 );
31	PointedEar	771	printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
		772	rpad( $poster, 63, "." ),
22	PointedEar	773	$data{$poster}{percent};
31	PointedEar	774	last if ( ++$topposters_real == $count );
22	PointedEar	775	}
		776	print "\n", "=" x 76, "\n";
7	PointedEar	777	}
5	PointedEar	778
22	PointedEar	779	########################################
		780	## Show bottom posters for original text
		781	########################################
31	PointedEar	782
		783	$skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
		784
22	PointedEar	785	unless ( $skipSec{4} )
7	PointedEar	786	{
22	PointedEar	787	if ( keys %data < $topposters )
7	PointedEar	788	{
22	PointedEar	789	$count = keys %data;
		790	}
		791	else
		792	{
		793	$count = $topposters;
		794	}
31	PointedEar	795
22	PointedEar	796	printf "%s\n",
		797	centred(
		798	__x(
		799	"Bottom {count} responders by original text (> 5 posts)",
		800	count => $topposters
		801	),
		802	76
		803	);
		804	print "=" x 76, "\n";
		805	my $i = 0;
		806	foreach my $poster (
		807	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		808	keys %data
		809	)
		810	{
		811	next if $data{$poster}{quoted} == 0;
		812	next if $data{$poster}{count} < 5;
		813	my $name = substr( $poster, 0, 63 );
25	PointedEar	814	printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	815	$data{$poster}{percent};
		816	last if ( ++$i == $count );
		817	}
		818	print "\n", "=" x 76, "\n";
		819	}
5	PointedEar	820
22	PointedEar	821	#####################################
		822	## Show threads by number of articles
		823	#####################################
		824	unless ( $skipSec{5} )
		825	{
		826	if ( keys %threads < $topthreads )
		827	{
		828	$count = keys %threads;
7	PointedEar	829	}
22	PointedEar	830	else
		831	{
		832	$count = $topthreads;
7	PointedEar	833	}
22	PointedEar	834	printf "%s\n",
23	PointedEar	835	centred(
		836	__x( "Top {count} threads by no. of articles", count => $topthreads ),
22	PointedEar	837	76 );
		838	print "=" x 76, "\n";
		839	my $i = 0;
		840	foreach my $thread (
		841	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		842	keys %threads
		843	)
7	PointedEar	844	{
22	PointedEar	845	my $name = substr( $thread, 0, 65 );
25	PointedEar	846	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	847	$threads{$thread}{'count'};
		848	last if ( ++$i == $count );
7	PointedEar	849	}
22	PointedEar	850	print "\n", "=" x 76, "\n";
		851	}
		852
		853	##############################
		854	## Show threads by size in KiB
		855	##############################
		856	unless ( $skipSec{6} )
		857	{
		858	if ( keys %threads < $topthreads )
		859	{
		860	$count = keys %threads;
		861	}
7	PointedEar	862	else
		863	{
22	PointedEar	864	$count = $topthreads;
		865	}
		866	printf "%s\n",
23	PointedEar	867	centred(
		868	__x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22	PointedEar	869	print "=" x 76, "\n";
		870	my $i = 0;
		871	foreach my $thread (
		872	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		873	keys %threads
		874	)
		875	{
		876	my $name = substr( $thread, 0, 65 );
25	PointedEar	877	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	878	$threads{$thread}{'size'} / 1024; #/
		879	last if ( ++$i == $count );
		880	}
		881	print "\n", "=" x 76, "\n";
		882	}
5	PointedEar	883
22	PointedEar	884	##################################
		885	## Show top 10 cross-posted groups
		886	##################################
		887	unless ( $skipSec{7} )
		888	{
		889	delete $crossposts{"$newsgroup_name"}; # don't include ours
		890	if ( keys %crossposts < $topcrossposts )
		891	{
		892	$count = keys %crossposts;
7	PointedEar	893	}
22	PointedEar	894	else
		895	{
		896	$count = $topcrossposts;
		897	}
		898	printf "%s\n",
23	PointedEar	899	centred(
		900	__x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22	PointedEar	901	print "=" x 76, "\n";
		902	my $i = 0;
		903	foreach
		904	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		905	{
25	PointedEar	906	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	907	$crossposts{$name};
		908	last if ( ++$i == $count );
		909	}
		910	print "\n", "=" x 76, "\n";
		911	}
5	PointedEar	912
22	PointedEar	913	#########################
		914	## Show agents and counts
		915	#########################
		916	unless ( $skipSec{8} )
		917	{
		918	if ( keys %agents < $topagents )
		919	{
		920	$count = keys %agents;
		921	}
		922	else
		923	{
		924	$count = $topagents;
		925	}
		926	printf "%s\n",
		927	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		928	76 );
		929	print "=" x 76, "\n";
		930	my $i = 0;
		931	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		932	{
25	PointedEar	933	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22	PointedEar	934	$agents{$agent};
		935	last if ( ++$i == $count );
		936	}
		937	print "\n", "=" x 76, "\n";
		938	}
5	PointedEar	939
22	PointedEar	940	#######################
		941	## Show distinct agents
		942	#######################
		943	unless ( $skipSec{9} )
7	PointedEar	944	{
22	PointedEar	945	if ( keys %distinct_agent < $topagents )
		946	{
		947	$count = keys %distinct_agent;
7	PointedEar	948	}
22	PointedEar	949	else
		950	{
		951	$count = $topagents;
		952	}
		953	printf "%s\n",
		954	centred(
		955	__x( "Top {count} user agents by number of posts", count => $topagents ),
23	PointedEar	956	76
		957	);
22	PointedEar	958	print "=" x 76, "\n";
		959	my $i = 0;
		960	foreach my $agent (
		961	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		962	keys %distinct_agent
		963	)
		964	{
25	PointedEar	965	printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22	PointedEar	966	$distinct_agent{$agent},
		967	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		968	last if ( ++$i == $count );
		969	}
		970	print "\n", "=" x 76, "\n";
7	PointedEar	971	}
5	PointedEar	972
22	PointedEar	973	############################
		974	## Show timezones and counts
		975	############################
		976	unless ( $skipSec{10} )
7	PointedEar	977	{
22	PointedEar	978	if ( keys %tz < $toptz )
7	PointedEar	979	{
22	PointedEar	980	$count = keys %tz;
7	PointedEar	981	}
22	PointedEar	982	else
7	PointedEar	983	{
22	PointedEar	984	$count = $toptz;
7	PointedEar	985	}
23	PointedEar	986	printf "%s\n",
		987	centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22	PointedEar	988	print "=" x 76, "\n";
		989	my $i = 0;
		990	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		991	{
25	PointedEar	992	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22	PointedEar	993	last if ( ++$i == $count );
		994	}
		995	print "\n", "=" x 76, "\n";
7	PointedEar	996	}
5	PointedEar	997	}
		998
22	PointedEar	999	## helper subs
		1000
13	PointedEar	1001	###############################
		1002	## Right pad a string with '.'s
		1003	###############################
7	PointedEar	1004	sub rpad
		1005	{
13	PointedEar	1006	## Get text to pad, length to pad, pad chr
7	PointedEar	1007	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	1008
13	PointedEar	1009	## DEBUG
22	PointedEar	1010	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	1011
7	PointedEar	1012	if ( length($text) > $pad_len )
		1013	{
		1014	$text = substr( $text, 0, $pad_len );
		1015	}
		1016	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		1017	return $padded;
5	PointedEar	1018	}
		1019
13	PointedEar	1020	##################
		1021	## Centre a string
		1022	##################
7	PointedEar	1023	sub centred
		1024	{
		1025	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		1026	my $pad_len = ( $width - length($text) ) / 2; #/
		1027	my $centred = " " x $pad_len . $text;
		1028	return $centred;
5	PointedEar	1029	}
		1030
13	PointedEar	1031	###########################
		1032	## Put commas into a number
		1033	###########################
7	PointedEar	1034	sub commify
		1035	{
13	PointedEar	1036	local $_ = shift;
23	PointedEar	1037	my $number = $_;
		1038	$_ = int; # Chop non-integer part
22	PointedEar	1039	1 while
23	PointedEar	1040	s/([-+]?\d)(\d{$grouping[0]}($\|\Q$thousands_sep\E))/$1$thousands_sep$2/;
		1041	my $int_part = $_;
		1042	my $real_part = '';
		1043	if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
		1044	{
		1045	$real_part = $1;
		1046	}
		1047	return $int_part . $real_part;
5	PointedEar	1048	}
		1049
13	PointedEar	1050	################################################################
		1051	## Returns a string with leading and trailing whitespace removed
		1052	################################################################
7	PointedEar	1053	sub clean
		1054	{
		1055	my $dirty = shift;
		1056	my $clean = $dirty;
14	PointedEar	1057	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	1058
7	PointedEar	1059	return $clean;
5	PointedEar	1060	}
		1061
7	PointedEar	1062	sub usage
		1063	{
45	PointedEar	1064	print __ "usage: newsstat.pl NEWS.GROUP", "\n";
7	PointedEar	1065	exit 1;
5	PointedEar	1066	}
		1067
22	PointedEar	1068	sub dmsg
7	PointedEar	1069	{
22	PointedEar	1070	print STDERR @_, "\n";
		1071	}
		1072
		1073	sub dmsg2
		1074	{
		1075	my ( $level, @msg ) = @_;
		1076	print STDERR @msg, "\n" if $level >= DEBUG;
45	PointedEar	1077	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl - Rev 47