WebSVN - LCARS - Blame - Rev 23 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
23	PointedEar	4	require 5.004;
22	PointedEar	5
		6	#use diagnostics;
6	PointedEar	7	use utf8;
		8	use Encode;
		9
22	PointedEar	10	use constant DEBUG => 0;
		11
13	PointedEar	12	## Print out all text to STDOUT UTF-8 encoded
		13	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	14	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	15
23	PointedEar	16	## L10n
		17	use locale ':not_characters';
		18
		19	# setlocale( LC_MESSAGES, '' );
		20	require Number::Format;
		21
		22	## i18n
		23	## FIXME: Automatically include resolved '.' in @INC
22	PointedEar	24	# print join "\n", @INC;
5	PointedEar	25
22	PointedEar	26	use Locale::TextDomain ('de.pointedears.newsstat');
23	PointedEar	27	use POSIX ('locale_h');
22	PointedEar	28	use Locale::Messages qw (bind_textdomain_filter
23	PointedEar	29	bind_textdomain_codeset
		30	turn_utf_8_on);
		31
		32	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
22	PointedEar	33	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23	PointedEar	34
22	PointedEar	35	require Mail::Message;
		36	require DateTime;
		37	require DateTime::Format::Mail;
23	PointedEar	38
		39	# See comments in previous example
		40	my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
		41	@{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
		42	'decimal_point' };
		43
		44	# Apply defaults if values are missing
		45	$thousands_sep = $mon_thousands_sep unless $thousands_sep;
		46	$thousands_sep = ' ' unless $thousands_sep;
		47
		48	# grouping and mon_grouping are packed lists
		49	# of small integers (characters) telling the
		50	# grouping (thousand_seps and mon_thousand_seps
		51	# being the group dividers) of numbers and
		52	# monetary quantities. The integers' meanings:
		53	# 255 means no more grouping, 0 means repeat
		54	# the previous grouping, 1-254 means use that
		55	# as the current grouping. Grouping goes from
		56	# right to left (low to high digits). In the
		57	# below we cheat slightly by never using anything
		58	# else than the first grouping (whatever that is).
		59	my @grouping;
		60	if ($grouping)
		61	{
		62	@grouping = unpack( "C*", $grouping );
		63	}
		64	else
		65	{
		66	@grouping = (3);
		67	}
		68
		69	## FIXME: Why don't the defaults work already?
		70	my $formatter = new Number::Format(
		71	-decimal_point => $decimal_point,
		72	-thousands_sep => $thousands_sep,
		73
		74	# -grouping => $grouping[0]
		75	);
		76
5	PointedEar	77	###################### USER CONFIGURATIONS ############################
		78
13	PointedEar	79	## The name of the group to do stats for
5	PointedEar	80	my $newsgroup_name = $ARGV[0];
22	PointedEar	81	$newsgroup_name or usage();
5	PointedEar	82
13	PointedEar	83	## Check for removal flags
5	PointedEar	84	my $ix;
		85	my $j;
		86	my %skipSec;
		87	my @skiplist;
		88	my $args = @ARGV;
7	PointedEar	89	for ( $ix = 1 ; $ix < $args ; $ix++ )
		90	{
		91	$j = $ix + 1;
		92	if ( $ARGV[$ix] eq "-x" )
		93	{
		94	@skiplist = split( ",", $ARGV[$j] );
		95	}
		96	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		97	{
		98	@skiplist = split( ",", $1 );
		99	}
5	PointedEar	100	}
7	PointedEar	101	foreach (@skiplist)
		102	{
5	PointedEar	103	$skipSec{$_} = 1;
		104	}
		105
13	PointedEar	106	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	107	my $news = "/var/spool/news/";
		108
22	PointedEar	109	## Number of top or bottom posters to show
		110	my $topposters = 20;
5	PointedEar	111
13	PointedEar	112	## Number of threads we want to know about
5	PointedEar	113	my $topthreads = 20;
		114
13	PointedEar	115	## Number of cross-posted threads to show
5	PointedEar	116	my $topcrossposts = 10;
		117
22	PointedEar	118	## Number of agents we list
		119	my $topagents = 10;
		120
13	PointedEar	121	## Number of time zones to show
5	PointedEar	122	my $toptz = 10;
		123
		124	###################### DATA STRUCTURES ######################
		125	my $group = $newsgroup_name;
		126	$group =~ s!\.!/!g;
22	PointedEar	127	my %data; # name, count, agent, total, orig, quoted
		128	my $totsize = 0; # holds total sizes of all files
		129	my %crossposts; # group, count
		130	my %threads; # subject, count
		131	my $replies = 0; # total no. of replies
		132	my $origposts = 0; # total no. of original posts
		133	my %tz; # timezones by count
		134	my $earliest; # earliest article we have found
		135	my $latest; # latest article we have found
		136	my $totheader = 0; # total size of header material
		137	my $totbody = 0; # total size of body material
		138	my $totsig = 0; # total size of sig material
		139	my $totquoted = 0; # total size of quoted material
		140	my $totorig = 0; # total size of original material
		141	my $totalposts; # total no. of posts considered
5	PointedEar	142	my %distinct_agent;
13	PointedEar	143
		144	## Used to hold counts of User Agents used
		145	my %agents = (
		146	"Compuserver" => 0,
		147	"Foorum" => 0,
7	PointedEar	148	"Forte Agent" => 0,
		149	"Forte Free Agent" => 0,
13	PointedEar	150	"Gnus" => 0,
		151	"KNode" => 0,
		152	"MacSOUP" => 0,
		153	"MT-NewsWatcher" => 0,
7	PointedEar	154	"MicroPlanet Gravity" => 0,
		155	"Microsoft Outlook Express" => 0,
13	PointedEar	156	"Microsoft Windows Mail" => 0,
		157	"Mozilla" => 0,
		158	"News Rover" => 0,
		159	"NN" => 0,
		160	"Pan" => 0,
		161	"rn" => 0,
7	PointedEar	162	"slrn" => 0,
13	PointedEar	163	"Sylpheed" => 0,
7	PointedEar	164	"tin" => 0,
13	PointedEar	165	"VSoup" => 0,
7	PointedEar	166	"WebTV" => 0,
22	PointedEar	167	"Xnews" => 0,
13	PointedEar	168	);
5	PointedEar	169
22	PointedEar	170	my $datetime_parser = DateTime::Format::Mail->new();
		171	$datetime_parser->loose();
5	PointedEar	172
22	PointedEar	173	my $today = DateTime->today( time_zone => 'UTC' );
		174	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		175	my $start = int $prev_month->strftime('%s');
		176	my $numdays = int DateTime->last_day_of_month(
		177	year => $prev_month->year(),
		178	month => $prev_month->month(),
		179	time_zone => $prev_month->time_zone(),
		180	)->day();
		181	my $end = int $today->clone()->set_day(1)->strftime('%s');
		182
		183	dmsg( $start, " to ", $end ) if DEBUG;
		184
		185	chdir("$news$group")
		186	or die __x(
		187	"Can't cd to {newsgroup}: {error}\n",
		188	newsgroup => "$news$group",
		189	error => $!
		190	);
		191	opendir( DIR, "." )
		192	or die __x(
		193	"Can't open {newsgroup}: {error}\n",
		194	newsgroup => "$news$group",
		195	error => $!
		196	);
		197
		198	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	199	{
22	PointedEar	200	next unless -f $filename; # only want real files
		201	next if ( $filename eq ".overview" ); # real articles only
		202
		203	get_article($filename); # read in the article
5	PointedEar	204	}
22	PointedEar	205	closedir(DIR); # finished with the directory
7	PointedEar	206
22	PointedEar	207	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		208
13	PointedEar	209	## Post-processing
22	PointedEar	210	count_agents(); # count agents, collapsing versions
		211	fix_percent();
5	PointedEar	212
22	PointedEar	213	write_data();
		214	display_results();
5	PointedEar	215
13	PointedEar	216	########################################
22	PointedEar	217	## Get current article's header and body
13	PointedEar	218	########################################
22	PointedEar	219	sub get_article
7	PointedEar	220	{
22	PointedEar	221	my $filename = shift;
5	PointedEar	222
22	PointedEar	223	open( my $FILE, '<', $filename )
		224	or
		225	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		226	my $msg = Mail::Message->read($FILE);
		227	my $timestamp = $msg->timestamp();
		228	my $date = $msg->study('Date');
5	PointedEar	229
22	PointedEar	230	## Disregard article if timestamp is not in range
		231	dmsg($timestamp) if DEBUG;
		232	if ( $timestamp < $start or $timestamp >= $end )
7	PointedEar	233	{
22	PointedEar	234	dmsg("Posting on $date ignored.") if DEBUG;
		235	return;
7	PointedEar	236	}
5	PointedEar	237
22	PointedEar	238	$totalposts++; # bump count of articles considered
5	PointedEar	239
22	PointedEar	240	## DEBUG
		241	dmsg($date) if DEBUG;
13	PointedEar	242
22	PointedEar	243	## get stats about the file itself
		244	my $filesize = -s $filename; # get total size of file
		245	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	246
22	PointedEar	247	if ( ( not defined $earliest ) or $timestamp < $earliest )
7	PointedEar	248	{
22	PointedEar	249	$earliest = $timestamp;
7	PointedEar	250	}
22	PointedEar	251	elsif ( ( not defined $latest ) or $timestamp > $latest )
7	PointedEar	252	{
22	PointedEar	253	$latest = $timestamp;
7	PointedEar	254	}
13	PointedEar	255
22	PointedEar	256	#print "timestamp: $timestamp\n";
5	PointedEar	257
22	PointedEar	258	## count header size
		259	$totheader += $msg->head()->size();
5	PointedEar	260
22	PointedEar	261	## get the poster's name (MIME-decoded, in UTF-8)
		262	my $poster = $msg->study('From');
		263	if ( defined $poster )
7	PointedEar	264	{
22	PointedEar	265	## Convert old to new format
		266	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	267
22	PointedEar	268	## Collapse whitespace
		269	$poster =~ s/\s+/ /g;
5	PointedEar	270
22	PointedEar	271	## Remove outer quotes; TODO: observe RFC 5322 strictly
		272	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	273
22	PointedEar	274	## DEBUG
		275	dmsg($poster) if DEBUG;
7	PointedEar	276
22	PointedEar	277	## seen this one before?
		278	if ( !defined( $data{$poster} ) )
		279	{
		280	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		281	$data{$poster}{'orig'} = 0;
		282	$data{$poster}{'quoted'} = 0;
		283	}
		284	$data{$poster}{'count'}++; # bump count for this poster
		285	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	286
22	PointedEar	287	## The User-Agent and/or X-Newsreader fields
		288	## for User-Agent by poster
		289	my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
		290	if ( defined $ua )
7	PointedEar	291	{
22	PointedEar	292	$data{$poster}{'agent'} = $ua;
		293
		294	## DEBUG
		295	dmsg($ua) if DEBUG;
7	PointedEar	296	}
5	PointedEar	297
22	PointedEar	298	## The User Agent for User-Agent by number of posts
		299	get_agent($msg);
5	PointedEar	300
22	PointedEar	301	## Get all cross-posted newsgroups
		302	for ( split( /,/, $msg->study('Newsgroups') ) )
		303	{
		304	$crossposts{$_}++; # bump count for each
		305	}
5	PointedEar	306
22	PointedEar	307	## Get threads
		308	my $thread = $msg->study('Subject');
		309	$thread =~ s/^re:\s+//i; # Remove Re: or re: at start
		310	$thread =~ s/\s+/ /g; # collapse whitespace
		311	$threads{$thread}{'count'}++; # bump count of this subject
		312	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	313
22	PointedEar	314	## Is this an original post or a reply?
		315	if ( defined $msg->study('References') )
7	PointedEar	316	{
22	PointedEar	317	$replies++;
7	PointedEar	318	}
22	PointedEar	319	else
7	PointedEar	320	{
22	PointedEar	321	$origposts++;
7	PointedEar	322	}
22	PointedEar	323
		324	## Get the time zone
		325	my $datetime = $datetime_parser->parse_datetime($date);
		326	my $tz = $datetime->strftime('%z');
		327	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		328	$tz{$tz}++;
		329
		330	## DEBUG
		331	dmsg($tz) if DEBUG;
		332
		333	#### Now analyse the body text ####
		334	my $body = $msg->body();
		335
		336	my $insig = 0;
		337	my @body = $body->lines;
		338	for (@body)
7	PointedEar	339	{
22	PointedEar	340	$totbody += length($_); # bump total body size
		341	next if (m{^$>}o); # don't count blank lines in body
		342	if ( $insig == 1 )
		343	{
		344
		345	# bump total sig size
		346	$totsig += length($_);
		347	}
		348	## are we in a quote line?
		349	## Bill Unruh uses ] quotes, and another poster uses ::
		350	elsif ( m{^\s[>\]]}o or m{^\s::}o )
		351	{
		352	## bump count of quoted chrs
		353	$data{$poster}{'quoted'} += length($_);
		354	$totquoted += length($_);
		355	}
		356	elsif (/^-- $/)
		357	{
		358	$insig = 1;
		359	}
		360	else
		361	{
		362	## We must be processing an original line
		363	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		364	$totorig += length($_);
		365	}
7	PointedEar	366	}
22	PointedEar	367
		368	# end for (@body)
		369	}
		370
		371	close($FILE);
		372	}
		373
		374	sub get_agent
		375	{
		376	my $msg = shift;
		377
		378	my $ua =
		379	$msg->study('User-Agent')
		380	or $msg->study('X-Newsreader')
		381	or $msg->study('X-Mailer');
		382	if ( not defined $ua )
		383	{
		384	my $org = $msg->study('Organization');
		385	if ( defined $org
		386	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	387	{
22	PointedEar	388	$ua = $org;
7	PointedEar	389	}
22	PointedEar	390	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	391	{
22	PointedEar	392	$ua = "Pine";
		393	}
7	PointedEar	394	}
5	PointedEar	395
22	PointedEar	396	## Hopefully found UA, else set to unknown
		397	if ( not defined $ua )
		398	{
		399	$ua = __ "unknown";
		400	}
5	PointedEar	401
22	PointedEar	402	$ua = clean($ua);
		403
		404	my $raw = $ua;
		405	my $agent = $raw;
		406
		407	## strip http
		408	if ( $raw =~ /.http./ )
7	PointedEar	409	{
22	PointedEar	410	$raw =~ s!posted via!!i;
		411	$raw =~ s!http://!!g;
		412	$raw =~ s!/!!g;
		413	$raw =~ s! !!g;
		414	}
5	PointedEar	415
22	PointedEar	416	## Fix Outlook from Mac
		417	if ( $raw =~ /^microsoft/i )
		418	{
		419	$raw =~ s/-/ /g;
		420	}
5	PointedEar	421
22	PointedEar	422	## Pick out the popular agents
		423	if ( $raw =~ /(outlook express)/i
		424	\|\| $raw =~ /(windows mail)/i
		425	\|\| $raw =~ /(microplanet gravity)/i
		426	\|\| $raw =~ /(news rover)/i
		427	\|\| $raw =~ /(forte agent)/i
		428	\|\| $raw =~ /(forte free agent)/i )
		429	{
		430	$agent = $1;
		431	}
		432	elsif (
		433	$raw =~ /^(
5	PointedEar	434	pan
		435	\|sylpheed
		436	\|slrn
		437	\|mozilla
		438	\|knode
		439	\|tin
		440	\|hamster
		441	\|xrn
		442	\|xnews
		443	\|aol
		444	\|gnus
		445	\|krn
		446	\|macsoup
		447	\|messenger
		448	\|openxp
		449	\|pine
		450	\|thoth
		451	\|turnpike
		452	\|winvn
		453	\|vsoup
		454	\|google
		455	\|supernews
		456	\|nn
		457	\|rn
		458	\|007
		459	\|webtv
		460	\|compuserve
7	PointedEar	461	)/ix
22	PointedEar	462	)
		463	{
		464	$agent = $1;
		465	}
		466	else
		467	{
		468	## Clean up unknown agents
		469	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	470	{
		471	$agent = $1;
		472	}
22	PointedEar	473	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	474	{
22	PointedEar	475	$agent = $1;
		476	}
		477	}
		478
		479	$distinct_agent{$agent}++;
		480	return $agent;
		481	}
		482	## get_agent
		483
		484	#########################################
		485	## Count the User-Agents used, collapsing
		486	## different versions into one per agent.
		487	#########################################
		488	sub count_agents
		489	{
		490	POSTER:
		491	foreach my $poster ( keys %data )
		492	{
		493	foreach my $agent_name ( keys %distinct_agent )
		494	{ # check against known ones
		495	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	496	{
22	PointedEar	497	$agents{$agent_name}++;
		498	next POSTER;
7	PointedEar	499	}
		500	}
22	PointedEar	501	$agents{ $data{$poster}{'agent'} }++;
		502	}
		503	} # count_agents
7	PointedEar	504
22	PointedEar	505	#############################################
		506	## Set orig/total percentages for all posters
		507	#############################################
		508	sub fix_percent
		509	{
		510	foreach my $poster ( keys %data )
		511	{
		512	my $percent = 100;
		513	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		514	{
		515	$percent =
		516	$data{$poster}{'orig'} * 100 /
		517	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		518	}
		519	elsif ( $data{$poster}{'orig'} == 0 )
		520	{
		521	$percent = 0;
		522	}
		523	$data{$poster}{'percent'} = $percent;
5	PointedEar	524	}
22	PointedEar	525	}
		526	## fix_percent
5	PointedEar	527
22	PointedEar	528	##################################
		529	## Write data structures to a file
		530	##################################
		531	sub write_data
		532	{
		533	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		534	or die __x( "Can't create XDATA: {error}\n", error => $! );
		535	print $OUTF "Data collected from $newsgroup_name\n\n";
		536	print $OUTF
		537	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		538	foreach my $name ( keys %data )
7	PointedEar	539	{
22	PointedEar	540	print $OUTF
		541	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	542	}
22	PointedEar	543	print $OUTF
		544	"============================================================================\n";
		545	print $OUTF "Thread subjects\n";
		546	print $OUTF
		547	"----------------------------------------------------------------------------\n";
		548	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		549	{
		550	print $OUTF
		551	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		552	}
		553	print $OUTF
		554	"============================================================================\n";
		555	print $OUTF "Cross-posts\n";
		556	print $OUTF
		557	"----------------------------------------------------------------------------\n";
		558	foreach my $name ( sort keys %crossposts )
		559	{
		560	print $OUTF "$name : $crossposts{$name}\n";
		561	}
		562	print $OUTF
		563	"============================================================================\n";
		564	print $OUTF "User agents\n";
		565	print $OUTF
		566	"----------------------------------------------------------------------------\n";
		567	foreach my $name ( sort keys %agents )
		568	{
		569	print $OUTF "$name : $agents{$name}\n";
		570	}
		571	print $OUTF
		572	"============================================================================\n";
		573	print $OUTF "Time zones\n";
		574	print $OUTF
		575	"----------------------------------------------------------------------------\n";
		576	foreach my $name ( sort keys %tz )
		577	{
		578	print $OUTF "$name : $tz{$name}\n";
		579	}
		580	close $OUTF;
		581	} # write_data
5	PointedEar	582
22	PointedEar	583	sub display_results
		584	{
		585	#################### DISPLAY RESULTS #####################
23	PointedEar	586	println( "=" x 76 );
22	PointedEar	587	printf "%s\n",
		588	centred(
		589	__x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
		590	76 );
23	PointedEar	591	println( "=" x 76 );
22	PointedEar	592	printf "%s\n",
		593	centred(
23	PointedEar	594	__(
		595	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
		596	),
22	PointedEar	597	76
		598	);
		599	print "\n\n";
23	PointedEar	600	printf __"Total posts considered: %s over %d days\n",
		601	$formatter->format_number($totalposts),
		602	$formatter->format_number($numdays);
		603	my $time_locale = setlocale(LC_TIME);
22	PointedEar	604	my $earliest_datetime = DateTime->from_epoch(
23	PointedEar	605	epoch => $earliest,
		606	locale => $time_locale,
22	PointedEar	607	time_zone => 'UTC',
		608	);
		609	my $latest_datetime = DateTime->from_epoch(
23	PointedEar	610	epoch => $latest,
		611	locale => $time_locale,
22	PointedEar	612	time_zone => 'UTC',
		613	);
		614	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23	PointedEar	615	printf __"Earliest article: %s\n",
		616	$earliest_datetime->strftime($datetime_format);
		617	printf __"Latest article: %s\n",
		618	$latest_datetime->strftime($datetime_format);
		619	printf __"Original articles: %s; replies: %s\n",
		620	$formatter->format_number($origposts),
		621	$formatter->format_number($replies);
		622	printf __"Total size of posts: %s bytes (%s)" . "\n",
		623	$formatter->format_number($totsize),
		624	$formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
		625	printf __"Average %s articles per day, %s per day, %s bytes per article\n",
		626	$formatter->format_number( int( $totalposts / $numdays ) ),
		627	$formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
		628	$formatter->format_number( int( $totsize / $totalposts ) );
		629
22	PointedEar	630	my $count = keys %data;
23	PointedEar	631	printf __"Total headers: %s; bodies: %s\n",
		632	$formatter->format_bytes(
		633	$totheader, ( 'precision' => 1, 'mode' => 'iec' )
		634	),
		635	$formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	636	printf __
23	PointedEar	637	"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
		638	$formatter->format_bytes(
		639	$totquoted, ( 'precision' => 1, 'mode' => 'iec' )
		640	),
		641	$formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
		642	$formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
		643	$formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
		644	printf __"Total number of posters: %s, average %s per poster\n",
		645	$formatter->format_number($count),
		646	$formatter->format_bytes( $totsize / $count,
		647	( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	648	$count = keys %threads;
23	PointedEar	649	printf __"Total number of threads: %s, average %s per thread\n",
		650	$formatter->format_number($count),
		651	$formatter->format_bytes( $totsize / $count,
		652	( 'precision' => 1, 'mode' => 'iec' ) );
		653	printf __"Total number of user agents: %d\n",
		654	$formatter->format_number( scalar keys %agents );
		655	print "\n", "=" x 76, "\n" ;
22	PointedEar	656	########################################
		657	## Show posters by article count Sec 1;
		658	########################################
		659	unless ( $skipSec{1} )
7	PointedEar	660	{
22	PointedEar	661	if ( keys %data < $topposters )
		662	{
		663	$count = keys %data;
		664	}
		665	else
		666	{
		667	$count = $topposters;
		668	}
		669	printf "%s\n",
		670	centred(
23	PointedEar	671	__x( "Top {count} posters by number of articles", count => $topposters ),
		672	76
		673	);
22	PointedEar	674	print "=" x 76, "\n";
		675	my $i = 0;
		676	foreach
		677	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		678	{
		679	my $name = substr( $poster, 0, 65 );
		680	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		681	$data{$poster}{count};
		682	last if ( ++$i == $count );
		683	}
		684	print "\n", "=" x 76, "\n";
7	PointedEar	685	}
22	PointedEar	686
		687	######################################
		688	## Show posters by size in KiB Sec 2;
		689	######################################
		690	unless ( $skipSec{2} )
7	PointedEar	691	{
22	PointedEar	692	if ( keys %data < $topposters )
		693	{
		694	$count = keys %data;
		695	}
		696	else
		697	{
		698	$count = $topposters;
		699	}
		700	printf "%s\n",
		701	centred(
		702	__x( "Top {count} posters by article size in KiB", count => $topposters ),
23	PointedEar	703	76
		704	);
22	PointedEar	705	print "=" x 76, "\n";
		706	my $i = 0;
		707	foreach
		708	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		709	{
		710	my $name = substr( $poster, 0, 62 );
		711	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		712	$data{$poster}{size} / 1024; #/
		713	last if ( ++$i == $count );
		714	}
		715	print "\n", "=" x 76, "\n";
7	PointedEar	716	}
5	PointedEar	717
22	PointedEar	718	#####################################
		719	## Show top posters for original text
		720	#####################################
		721	unless ( $skipSec{3} )
7	PointedEar	722	{
22	PointedEar	723	if ( keys %data < $topposters )
		724	{
		725	$count = keys %data;
		726	}
		727	else
		728	{
		729	$count = $topposters;
		730	}
		731	printf "%s\n",
		732	centred(
		733	__x(
		734	"Top {count} responders by original text (> 5 posts)",
		735	count => $topposters
		736	),
		737	76
		738	);
		739	print "=" x 76, "\n";
		740	my $i = 0;
		741	foreach my $poster (
		742	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		743	keys %data
		744	)
		745	{
		746	next if $data{$poster}{quoted} == 0;
		747	next if $data{$poster}{count} < 5;
		748	my $name = substr( $poster, 0, 63 );
		749	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		750	$data{$poster}{percent};
		751	last if ( ++$i == $count );
		752	}
		753	print "\n", "=" x 76, "\n";
7	PointedEar	754	}
5	PointedEar	755
22	PointedEar	756	########################################
		757	## Show bottom posters for original text
		758	########################################
		759	unless ( $skipSec{4} )
7	PointedEar	760	{
22	PointedEar	761	if ( keys %data < $topposters )
7	PointedEar	762	{
22	PointedEar	763	$count = keys %data;
		764	}
		765	else
		766	{
		767	$count = $topposters;
		768	}
		769	printf "%s\n",
		770	centred(
		771	__x(
		772	"Bottom {count} responders by original text (> 5 posts)",
		773	count => $topposters
		774	),
		775	76
		776	);
		777	print "=" x 76, "\n";
		778	my $i = 0;
		779	foreach my $poster (
		780	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		781	keys %data
		782	)
		783	{
		784	next if $data{$poster}{quoted} == 0;
		785	next if $data{$poster}{count} < 5;
		786	my $name = substr( $poster, 0, 63 );
		787	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		788	$data{$poster}{percent};
		789	last if ( ++$i == $count );
		790	}
		791	print "\n", "=" x 76, "\n";
		792	}
5	PointedEar	793
22	PointedEar	794	#####################################
		795	## Show threads by number of articles
		796	#####################################
		797	unless ( $skipSec{5} )
		798	{
		799	if ( keys %threads < $topthreads )
		800	{
		801	$count = keys %threads;
7	PointedEar	802	}
22	PointedEar	803	else
		804	{
		805	$count = $topthreads;
7	PointedEar	806	}
22	PointedEar	807	printf "%s\n",
23	PointedEar	808	centred(
		809	__x( "Top {count} threads by no. of articles", count => $topthreads ),
22	PointedEar	810	76 );
		811	print "=" x 76, "\n";
		812	my $i = 0;
		813	foreach my $thread (
		814	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		815	keys %threads
		816	)
7	PointedEar	817	{
22	PointedEar	818	my $name = substr( $thread, 0, 65 );
		819	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		820	$threads{$thread}{'count'};
		821	last if ( ++$i == $count );
7	PointedEar	822	}
22	PointedEar	823	print "\n", "=" x 76, "\n";
		824	}
		825
		826	##############################
		827	## Show threads by size in KiB
		828	##############################
		829	unless ( $skipSec{6} )
		830	{
		831	if ( keys %threads < $topthreads )
		832	{
		833	$count = keys %threads;
		834	}
7	PointedEar	835	else
		836	{
22	PointedEar	837	$count = $topthreads;
		838	}
		839	printf "%s\n",
23	PointedEar	840	centred(
		841	__x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22	PointedEar	842	print "=" x 76, "\n";
		843	my $i = 0;
		844	foreach my $thread (
		845	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		846	keys %threads
		847	)
		848	{
		849	my $name = substr( $thread, 0, 65 );
		850	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		851	$threads{$thread}{'size'} / 1024; #/
		852	last if ( ++$i == $count );
		853	}
		854	print "\n", "=" x 76, "\n";
		855	}
5	PointedEar	856
22	PointedEar	857	##################################
		858	## Show top 10 cross-posted groups
		859	##################################
		860	unless ( $skipSec{7} )
		861	{
		862	delete $crossposts{"$newsgroup_name"}; # don't include ours
		863	if ( keys %crossposts < $topcrossposts )
		864	{
		865	$count = keys %crossposts;
7	PointedEar	866	}
22	PointedEar	867	else
		868	{
		869	$count = $topcrossposts;
		870	}
		871	printf "%s\n",
23	PointedEar	872	centred(
		873	__x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22	PointedEar	874	print "=" x 76, "\n";
		875	my $i = 0;
		876	foreach
		877	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		878	{
		879	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		880	$crossposts{$name};
		881	last if ( ++$i == $count );
		882	}
		883	print "\n", "=" x 76, "\n";
		884	}
5	PointedEar	885
22	PointedEar	886	#########################
		887	## Show agents and counts
		888	#########################
		889	unless ( $skipSec{8} )
		890	{
		891	if ( keys %agents < $topagents )
		892	{
		893	$count = keys %agents;
		894	}
		895	else
		896	{
		897	$count = $topagents;
		898	}
		899	printf "%s\n",
		900	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		901	76 );
		902	print "=" x 76, "\n";
		903	my $i = 0;
		904	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		905	{
		906	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
		907	$agents{$agent};
		908	last if ( ++$i == $count );
		909	}
		910	print "\n", "=" x 76, "\n";
		911	}
5	PointedEar	912
22	PointedEar	913	#######################
		914	## Show distinct agents
		915	#######################
		916	unless ( $skipSec{9} )
7	PointedEar	917	{
22	PointedEar	918	if ( keys %distinct_agent < $topagents )
		919	{
		920	$count = keys %distinct_agent;
7	PointedEar	921	}
22	PointedEar	922	else
		923	{
		924	$count = $topagents;
		925	}
		926	printf "%s\n",
		927	centred(
		928	__x( "Top {count} user agents by number of posts", count => $topagents ),
23	PointedEar	929	76
		930	);
22	PointedEar	931	print "=" x 76, "\n";
		932	my $i = 0;
		933	foreach my $agent (
		934	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		935	keys %distinct_agent
		936	)
		937	{
		938	printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
		939	$distinct_agent{$agent},
		940	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		941	last if ( ++$i == $count );
		942	}
		943	print "\n", "=" x 76, "\n";
7	PointedEar	944	}
5	PointedEar	945
22	PointedEar	946	############################
		947	## Show timezones and counts
		948	############################
		949	unless ( $skipSec{10} )
7	PointedEar	950	{
22	PointedEar	951	if ( keys %tz < $toptz )
7	PointedEar	952	{
22	PointedEar	953	$count = keys %tz;
7	PointedEar	954	}
22	PointedEar	955	else
7	PointedEar	956	{
22	PointedEar	957	$count = $toptz;
7	PointedEar	958	}
23	PointedEar	959	printf "%s\n",
		960	centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22	PointedEar	961	print "=" x 76, "\n";
		962	my $i = 0;
		963	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		964	{
		965	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
		966	last if ( ++$i == $count );
		967	}
		968	print "\n", "=" x 76, "\n";
7	PointedEar	969	}
5	PointedEar	970	}
		971
22	PointedEar	972	## helper subs
		973
13	PointedEar	974	###############################
		975	## Right pad a string with '.'s
		976	###############################
7	PointedEar	977	sub rpad
		978	{
13	PointedEar	979	## Get text to pad, length to pad, pad chr
7	PointedEar	980	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	981
13	PointedEar	982	## DEBUG
22	PointedEar	983	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	984
7	PointedEar	985	if ( length($text) > $pad_len )
		986	{
		987	$text = substr( $text, 0, $pad_len );
		988	}
		989	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		990	return $padded;
5	PointedEar	991	}
		992
13	PointedEar	993	##################
		994	## Centre a string
		995	##################
7	PointedEar	996	sub centred
		997	{
		998	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		999	my $pad_len = ( $width - length($text) ) / 2; #/
		1000	my $centred = " " x $pad_len . $text;
		1001	return $centred;
5	PointedEar	1002	}
		1003
13	PointedEar	1004	###########################
		1005	## Put commas into a number
		1006	###########################
7	PointedEar	1007	sub commify
		1008	{
13	PointedEar	1009	local $_ = shift;
23	PointedEar	1010	my $number = $_;
		1011	$_ = int; # Chop non-integer part
22	PointedEar	1012	1 while
23	PointedEar	1013	s/([-+]?\d)(\d{$grouping[0]}($\|\Q$thousands_sep\E))/$1$thousands_sep$2/;
		1014	my $int_part = $_;
		1015	my $real_part = '';
		1016	if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
		1017	{
		1018	$real_part = $1;
		1019	}
		1020	return $int_part . $real_part;
5	PointedEar	1021	}
		1022
13	PointedEar	1023	################################################################
		1024	## Returns a string with leading and trailing whitespace removed
		1025	################################################################
7	PointedEar	1026	sub clean
		1027	{
		1028	my $dirty = shift;
		1029	my $clean = $dirty;
14	PointedEar	1030	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	1031
7	PointedEar	1032	return $clean;
5	PointedEar	1033	}
		1034
7	PointedEar	1035	sub usage
		1036	{
23	PointedEar	1037	println( __ "usage: newsstat.pl NEWS.GROUP" );
7	PointedEar	1038	exit 1;
5	PointedEar	1039	}
		1040
22	PointedEar	1041	sub dmsg
7	PointedEar	1042	{
22	PointedEar	1043	print STDERR @_, "\n";
		1044	}
		1045
		1046	sub dmsg2
		1047	{
		1048	my ( $level, @msg ) = @_;
		1049	print STDERR @msg, "\n" if $level >= DEBUG;
		1050	}
23	PointedEar	1051
		1052	sub println
		1053	{
		1054	print @_, "\n";
		1055	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl - Rev 23