WebSVN - LCARS - Blame - Rev 22 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
22	PointedEar	4
		5	#use diagnostics;
6	PointedEar	6	use utf8;
		7	use Encode;
		8
22	PointedEar	9	use constant DEBUG => 0;
		10
13	PointedEar	11	## Print out all text to STDOUT UTF-8 encoded
		12	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	13	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	14
22	PointedEar	15	# FIXME: Automatically include resolved '.' in @INC
		16	# print join "\n", @INC;
5	PointedEar	17
22	PointedEar	18	use locale ':not_characters';
		19	use Locale::TextDomain ('de.pointedears.newsstat');
		20	use POSIX ('locale_h');
		21	use Locale::Messages qw (bind_textdomain_filter
		22	bind_textdomain_codeset
		23	turn_utf_8_on);
		24	#setlocale( LC_MESSAGES, '' );
		25	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
		26	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
		27
		28	require Mail::Message;
		29	require DateTime;
		30	require DateTime::Format::Mail;
		31
5	PointedEar	32	###################### USER CONFIGURATIONS ############################
		33
13	PointedEar	34	## The name of the group to do stats for
5	PointedEar	35	my $newsgroup_name = $ARGV[0];
22	PointedEar	36	$newsgroup_name or usage();
5	PointedEar	37
13	PointedEar	38	## Check for removal flags
5	PointedEar	39	my $ix;
		40	my $j;
		41	my %skipSec;
		42	my @skiplist;
		43	my $args = @ARGV;
7	PointedEar	44	for ( $ix = 1 ; $ix < $args ; $ix++ )
		45	{
		46	$j = $ix + 1;
		47	if ( $ARGV[$ix] eq "-x" )
		48	{
		49	@skiplist = split( ",", $ARGV[$j] );
		50	}
		51	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		52	{
		53	@skiplist = split( ",", $1 );
		54	}
5	PointedEar	55	}
7	PointedEar	56	foreach (@skiplist)
		57	{
5	PointedEar	58	$skipSec{$_} = 1;
		59	}
		60
13	PointedEar	61	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	62	my $news = "/var/spool/news/";
		63
22	PointedEar	64	## Number of top or bottom posters to show
		65	my $topposters = 20;
5	PointedEar	66
13	PointedEar	67	## Number of threads we want to know about
5	PointedEar	68	my $topthreads = 20;
		69
13	PointedEar	70	## Number of cross-posted threads to show
5	PointedEar	71	my $topcrossposts = 10;
		72
22	PointedEar	73	## Number of agents we list
		74	my $topagents = 10;
		75
13	PointedEar	76	## Number of time zones to show
5	PointedEar	77	my $toptz = 10;
		78
		79	###################### DATA STRUCTURES ######################
		80	my $group = $newsgroup_name;
		81	$group =~ s!\.!/!g;
22	PointedEar	82	my %data; # name, count, agent, total, orig, quoted
		83	my $totsize = 0; # holds total sizes of all files
		84	my %crossposts; # group, count
		85	my %threads; # subject, count
		86	my $replies = 0; # total no. of replies
		87	my $origposts = 0; # total no. of original posts
		88	my %tz; # timezones by count
		89	my $earliest; # earliest article we have found
		90	my $latest; # latest article we have found
		91	my $totheader = 0; # total size of header material
		92	my $totbody = 0; # total size of body material
		93	my $totsig = 0; # total size of sig material
		94	my $totquoted = 0; # total size of quoted material
		95	my $totorig = 0; # total size of original material
		96	my $totalposts; # total no. of posts considered
5	PointedEar	97	my %distinct_agent;
13	PointedEar	98
		99	## Used to hold counts of User Agents used
		100	my %agents = (
		101	"Compuserver" => 0,
		102	"Foorum" => 0,
7	PointedEar	103	"Forte Agent" => 0,
		104	"Forte Free Agent" => 0,
13	PointedEar	105	"Gnus" => 0,
		106	"KNode" => 0,
		107	"MacSOUP" => 0,
		108	"MT-NewsWatcher" => 0,
7	PointedEar	109	"MicroPlanet Gravity" => 0,
		110	"Microsoft Outlook Express" => 0,
13	PointedEar	111	"Microsoft Windows Mail" => 0,
		112	"Mozilla" => 0,
		113	"News Rover" => 0,
		114	"NN" => 0,
		115	"Pan" => 0,
		116	"rn" => 0,
7	PointedEar	117	"slrn" => 0,
13	PointedEar	118	"Sylpheed" => 0,
7	PointedEar	119	"tin" => 0,
13	PointedEar	120	"VSoup" => 0,
7	PointedEar	121	"WebTV" => 0,
22	PointedEar	122	"Xnews" => 0,
13	PointedEar	123	);
5	PointedEar	124
22	PointedEar	125	my $datetime_parser = DateTime::Format::Mail->new();
		126	$datetime_parser->loose();
5	PointedEar	127
22	PointedEar	128	my $today = DateTime->today( time_zone => 'UTC' );
		129	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		130	my $start = int $prev_month->strftime('%s');
		131	my $numdays = int DateTime->last_day_of_month(
		132	year => $prev_month->year(),
		133	month => $prev_month->month(),
		134	time_zone => $prev_month->time_zone(),
		135	)->day();
		136	my $end = int $today->clone()->set_day(1)->strftime('%s');
		137
		138	dmsg( $start, " to ", $end ) if DEBUG;
		139
		140	chdir("$news$group")
		141	or die __x(
		142	"Can't cd to {newsgroup}: {error}\n",
		143	newsgroup => "$news$group",
		144	error => $!
		145	);
		146	opendir( DIR, "." )
		147	or die __x(
		148	"Can't open {newsgroup}: {error}\n",
		149	newsgroup => "$news$group",
		150	error => $!
		151	);
		152
		153	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	154	{
22	PointedEar	155	next unless -f $filename; # only want real files
		156	next if ( $filename eq ".overview" ); # real articles only
		157
		158	get_article($filename); # read in the article
5	PointedEar	159	}
22	PointedEar	160	closedir(DIR); # finished with the directory
7	PointedEar	161
22	PointedEar	162	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		163
13	PointedEar	164	## Post-processing
22	PointedEar	165	count_agents(); # count agents, collapsing versions
		166	fix_percent();
5	PointedEar	167
22	PointedEar	168	write_data();
		169	display_results();
5	PointedEar	170
13	PointedEar	171	########################################
22	PointedEar	172	## Get current article's header and body
13	PointedEar	173	########################################
22	PointedEar	174	sub get_article
7	PointedEar	175	{
22	PointedEar	176	my $filename = shift;
5	PointedEar	177
22	PointedEar	178	open( my $FILE, '<', $filename )
		179	or
		180	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		181	my $msg = Mail::Message->read($FILE);
		182	my $timestamp = $msg->timestamp();
		183	my $date = $msg->study('Date');
5	PointedEar	184
22	PointedEar	185	## Disregard article if timestamp is not in range
		186	dmsg($timestamp) if DEBUG;
		187	if ( $timestamp < $start or $timestamp >= $end )
7	PointedEar	188	{
22	PointedEar	189	dmsg("Posting on $date ignored.") if DEBUG;
		190	return;
7	PointedEar	191	}
5	PointedEar	192
22	PointedEar	193	$totalposts++; # bump count of articles considered
5	PointedEar	194
22	PointedEar	195	## DEBUG
		196	dmsg($date) if DEBUG;
13	PointedEar	197
22	PointedEar	198	## get stats about the file itself
		199	my $filesize = -s $filename; # get total size of file
		200	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	201
22	PointedEar	202	if ( ( not defined $earliest ) or $timestamp < $earliest )
7	PointedEar	203	{
22	PointedEar	204	$earliest = $timestamp;
7	PointedEar	205	}
22	PointedEar	206	elsif ( ( not defined $latest ) or $timestamp > $latest )
7	PointedEar	207	{
22	PointedEar	208	$latest = $timestamp;
7	PointedEar	209	}
13	PointedEar	210
22	PointedEar	211	#print "timestamp: $timestamp\n";
5	PointedEar	212
22	PointedEar	213	## count header size
		214	$totheader += $msg->head()->size();
5	PointedEar	215
22	PointedEar	216	## get the poster's name (MIME-decoded, in UTF-8)
		217	my $poster = $msg->study('From');
		218	if ( defined $poster )
7	PointedEar	219	{
22	PointedEar	220	## Convert old to new format
		221	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	222
22	PointedEar	223	## Collapse whitespace
		224	$poster =~ s/\s+/ /g;
5	PointedEar	225
22	PointedEar	226	## Remove outer quotes; TODO: observe RFC 5322 strictly
		227	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	228
22	PointedEar	229	## DEBUG
		230	dmsg($poster) if DEBUG;
7	PointedEar	231
22	PointedEar	232	## seen this one before?
		233	if ( !defined( $data{$poster} ) )
		234	{
		235	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		236	$data{$poster}{'orig'} = 0;
		237	$data{$poster}{'quoted'} = 0;
		238	}
		239	$data{$poster}{'count'}++; # bump count for this poster
		240	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	241
22	PointedEar	242	## The User-Agent and/or X-Newsreader fields
		243	## for User-Agent by poster
		244	my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
		245	if ( defined $ua )
7	PointedEar	246	{
22	PointedEar	247	$data{$poster}{'agent'} = $ua;
		248
		249	## DEBUG
		250	dmsg($ua) if DEBUG;
7	PointedEar	251	}
5	PointedEar	252
22	PointedEar	253	## The User Agent for User-Agent by number of posts
		254	get_agent($msg);
5	PointedEar	255
22	PointedEar	256	## Get all cross-posted newsgroups
		257	for ( split( /,/, $msg->study('Newsgroups') ) )
		258	{
		259	$crossposts{$_}++; # bump count for each
		260	}
5	PointedEar	261
22	PointedEar	262	## Get threads
		263	my $thread = $msg->study('Subject');
		264	$thread =~ s/^re:\s+//i; # Remove Re: or re: at start
		265	$thread =~ s/\s+/ /g; # collapse whitespace
		266	$threads{$thread}{'count'}++; # bump count of this subject
		267	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	268
22	PointedEar	269	## Is this an original post or a reply?
		270	if ( defined $msg->study('References') )
7	PointedEar	271	{
22	PointedEar	272	$replies++;
7	PointedEar	273	}
22	PointedEar	274	else
7	PointedEar	275	{
22	PointedEar	276	$origposts++;
7	PointedEar	277	}
22	PointedEar	278
		279	## Get the time zone
		280	my $datetime = $datetime_parser->parse_datetime($date);
		281	my $tz = $datetime->strftime('%z');
		282	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		283	$tz{$tz}++;
		284
		285	## DEBUG
		286	dmsg($tz) if DEBUG;
		287
		288	#### Now analyse the body text ####
		289	my $body = $msg->body();
		290
		291	my $insig = 0;
		292	my @body = $body->lines;
		293	for (@body)
7	PointedEar	294	{
22	PointedEar	295	$totbody += length($_); # bump total body size
		296	next if (m{^$>}o); # don't count blank lines in body
		297	if ( $insig == 1 )
		298	{
		299
		300	# bump total sig size
		301	$totsig += length($_);
		302	}
		303	## are we in a quote line?
		304	## Bill Unruh uses ] quotes, and another poster uses ::
		305	elsif ( m{^\s[>\]]}o or m{^\s::}o )
		306	{
		307	## bump count of quoted chrs
		308	$data{$poster}{'quoted'} += length($_);
		309	$totquoted += length($_);
		310	}
		311	elsif (/^-- $/)
		312	{
		313	$insig = 1;
		314	}
		315	else
		316	{
		317	## We must be processing an original line
		318	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		319	$totorig += length($_);
		320	}
7	PointedEar	321	}
22	PointedEar	322
		323	# end for (@body)
		324	}
		325
		326	close($FILE);
		327	}
		328
		329	sub get_agent
		330	{
		331	my $msg = shift;
		332
		333	my $ua =
		334	$msg->study('User-Agent')
		335	or $msg->study('X-Newsreader')
		336	or $msg->study('X-Mailer');
		337	if ( not defined $ua )
		338	{
		339	my $org = $msg->study('Organization');
		340	if ( defined $org
		341	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	342	{
22	PointedEar	343	$ua = $org;
7	PointedEar	344	}
22	PointedEar	345	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	346	{
22	PointedEar	347	$ua = "Pine";
		348	}
7	PointedEar	349	}
5	PointedEar	350
22	PointedEar	351	## Hopefully found UA, else set to unknown
		352	if ( not defined $ua )
		353	{
		354	$ua = __ "unknown";
		355	}
5	PointedEar	356
22	PointedEar	357	$ua = clean($ua);
		358
		359	my $raw = $ua;
		360	my $agent = $raw;
		361
		362	## strip http
		363	if ( $raw =~ /.http./ )
7	PointedEar	364	{
22	PointedEar	365	$raw =~ s!posted via!!i;
		366	$raw =~ s!http://!!g;
		367	$raw =~ s!/!!g;
		368	$raw =~ s! !!g;
		369	}
5	PointedEar	370
22	PointedEar	371	## Fix Outlook from Mac
		372	if ( $raw =~ /^microsoft/i )
		373	{
		374	$raw =~ s/-/ /g;
		375	}
5	PointedEar	376
22	PointedEar	377	## Pick out the popular agents
		378	if ( $raw =~ /(outlook express)/i
		379	\|\| $raw =~ /(windows mail)/i
		380	\|\| $raw =~ /(microplanet gravity)/i
		381	\|\| $raw =~ /(news rover)/i
		382	\|\| $raw =~ /(forte agent)/i
		383	\|\| $raw =~ /(forte free agent)/i )
		384	{
		385	$agent = $1;
		386	}
		387	elsif (
		388	$raw =~ /^(
5	PointedEar	389	pan
		390	\|sylpheed
		391	\|slrn
		392	\|mozilla
		393	\|knode
		394	\|tin
		395	\|hamster
		396	\|xrn
		397	\|xnews
		398	\|aol
		399	\|gnus
		400	\|krn
		401	\|macsoup
		402	\|messenger
		403	\|openxp
		404	\|pine
		405	\|thoth
		406	\|turnpike
		407	\|winvn
		408	\|vsoup
		409	\|google
		410	\|supernews
		411	\|nn
		412	\|rn
		413	\|007
		414	\|webtv
		415	\|compuserve
7	PointedEar	416	)/ix
22	PointedEar	417	)
		418	{
		419	$agent = $1;
		420	}
		421	else
		422	{
		423	## Clean up unknown agents
		424	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	425	{
		426	$agent = $1;
		427	}
22	PointedEar	428	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	429	{
22	PointedEar	430	$agent = $1;
		431	}
		432	}
		433
		434	$distinct_agent{$agent}++;
		435	return $agent;
		436	}
		437	## get_agent
		438
		439	#########################################
		440	## Count the User-Agents used, collapsing
		441	## different versions into one per agent.
		442	#########################################
		443	sub count_agents
		444	{
		445	POSTER:
		446	foreach my $poster ( keys %data )
		447	{
		448	foreach my $agent_name ( keys %distinct_agent )
		449	{ # check against known ones
		450	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	451	{
22	PointedEar	452	$agents{$agent_name}++;
		453	next POSTER;
7	PointedEar	454	}
		455	}
22	PointedEar	456	$agents{ $data{$poster}{'agent'} }++;
		457	}
		458	} # count_agents
7	PointedEar	459
22	PointedEar	460	#############################################
		461	## Set orig/total percentages for all posters
		462	#############################################
		463	sub fix_percent
		464	{
		465	foreach my $poster ( keys %data )
		466	{
		467	my $percent = 100;
		468	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		469	{
		470	$percent =
		471	$data{$poster}{'orig'} * 100 /
		472	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		473	}
		474	elsif ( $data{$poster}{'orig'} == 0 )
		475	{
		476	$percent = 0;
		477	}
		478	$data{$poster}{'percent'} = $percent;
5	PointedEar	479	}
22	PointedEar	480	}
		481	## fix_percent
5	PointedEar	482
22	PointedEar	483	##################################
		484	## Write data structures to a file
		485	##################################
		486	sub write_data
		487	{
		488	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		489	or die __x( "Can't create XDATA: {error}\n", error => $! );
		490	print $OUTF "Data collected from $newsgroup_name\n\n";
		491	print $OUTF
		492	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		493	foreach my $name ( keys %data )
7	PointedEar	494	{
22	PointedEar	495	print $OUTF
		496	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	497	}
22	PointedEar	498	print $OUTF
		499	"============================================================================\n";
		500	print $OUTF "Thread subjects\n";
		501	print $OUTF
		502	"----------------------------------------------------------------------------\n";
		503	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		504	{
		505	print $OUTF
		506	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		507	}
		508	print $OUTF
		509	"============================================================================\n";
		510	print $OUTF "Cross-posts\n";
		511	print $OUTF
		512	"----------------------------------------------------------------------------\n";
		513	foreach my $name ( sort keys %crossposts )
		514	{
		515	print $OUTF "$name : $crossposts{$name}\n";
		516	}
		517	print $OUTF
		518	"============================================================================\n";
		519	print $OUTF "User agents\n";
		520	print $OUTF
		521	"----------------------------------------------------------------------------\n";
		522	foreach my $name ( sort keys %agents )
		523	{
		524	print $OUTF "$name : $agents{$name}\n";
		525	}
		526	print $OUTF
		527	"============================================================================\n";
		528	print $OUTF "Time zones\n";
		529	print $OUTF
		530	"----------------------------------------------------------------------------\n";
		531	foreach my $name ( sort keys %tz )
		532	{
		533	print $OUTF "$name : $tz{$name}\n";
		534	}
		535	close $OUTF;
		536	} # write_data
5	PointedEar	537
22	PointedEar	538	sub display_results
		539	{
		540	#################### DISPLAY RESULTS #####################
		541	print "=" x 76, "\n";
		542	printf "%s\n",
		543	centred(
		544	__x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
		545	76 );
		546	print "=" x 76, "\n";
		547	printf "%s\n",
		548	centred(
		549	__
		550	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)",
		551	76
		552	);
		553	print "\n\n";
		554	printf __"Total posts considered: %s over %d days" . "\n",
		555	commify($totalposts),
		556	$numdays;
		557	my $time_locale = setlocale(LC_TIME);
		558	my $earliest_datetime = DateTime->from_epoch(
		559	epoch => $earliest,
		560	locale => $time_locale,
		561	time_zone => 'UTC',
		562	);
		563	my $latest_datetime = DateTime->from_epoch(
		564	epoch => $latest,
		565	locale => $time_locale,
		566	time_zone => 'UTC',
		567	);
		568	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
		569	printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format);
		570	printf __"Latest article" . ": %s\n", $latest_datetime->strftime($datetime_format);
		571	printf __"Original articles: %s; replies" . ": %s\n",
		572	commify($origposts),
		573	commify($replies);
		574	printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n",
		575	commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576; #
		576	printf __
		577	"Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
		578	commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
		579	commify( int( $totsize / $totalposts ) );
		580	my $count = keys %data;
		581	printf __"Total headers: %s KiB; bodies: %s KiB\n",
		582	commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
		583	printf __
		584	"Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n",
		585	commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
		586	( $totorig * 100 ) / ( $totorig + $totquoted ),
		587	commify( int( $totsig / 1024 ) );
		588	printf __"Total number of posters: %s, average %s bytes per poster\n",
		589	commify($count), commify( int( $totsize / $count ) ); #/
		590	$count = keys %threads;
		591	printf __"Total number of threads: %s, average %s bytes per thread\n",
		592	commify($count), commify( int( $totsize / $count ) ); #/
		593	printf __"Total number of user agents: %d\n", scalar keys %agents;
		594	print "\n", "=" x 76, "\n";
5	PointedEar	595
22	PointedEar	596	########################################
		597	## Show posters by article count Sec 1;
		598	########################################
		599	unless ( $skipSec{1} )
7	PointedEar	600	{
22	PointedEar	601	if ( keys %data < $topposters )
		602	{
		603	$count = keys %data;
		604	}
		605	else
		606	{
		607	$count = $topposters;
		608	}
		609	printf "%s\n",
		610	centred(
		611	__x( "Top {count} posters by number of articles", count => $topposters ), 76 );
		612	print "=" x 76, "\n";
		613	my $i = 0;
		614	foreach
		615	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		616	{
		617	my $name = substr( $poster, 0, 65 );
		618	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		619	$data{$poster}{count};
		620	last if ( ++$i == $count );
		621	}
		622	print "\n", "=" x 76, "\n";
7	PointedEar	623	}
22	PointedEar	624
		625	######################################
		626	## Show posters by size in KiB Sec 2;
		627	######################################
		628	unless ( $skipSec{2} )
7	PointedEar	629	{
22	PointedEar	630	if ( keys %data < $topposters )
		631	{
		632	$count = keys %data;
		633	}
		634	else
		635	{
		636	$count = $topposters;
		637	}
		638	printf "%s\n",
		639	centred(
		640	__x( "Top {count} posters by article size in KiB", count => $topposters ),
		641	76 );
		642	print "=" x 76, "\n";
		643	my $i = 0;
		644	foreach
		645	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		646	{
		647	my $name = substr( $poster, 0, 62 );
		648	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		649	$data{$poster}{size} / 1024; #/
		650	last if ( ++$i == $count );
		651	}
		652	print "\n", "=" x 76, "\n";
7	PointedEar	653	}
5	PointedEar	654
22	PointedEar	655	#####################################
		656	## Show top posters for original text
		657	#####################################
		658	unless ( $skipSec{3} )
7	PointedEar	659	{
22	PointedEar	660	if ( keys %data < $topposters )
		661	{
		662	$count = keys %data;
		663	}
		664	else
		665	{
		666	$count = $topposters;
		667	}
		668	printf "%s\n",
		669	centred(
		670	__x(
		671	"Top {count} responders by original text (> 5 posts)",
		672	count => $topposters
		673	),
		674	76
		675	);
		676	print "=" x 76, "\n";
		677	my $i = 0;
		678	foreach my $poster (
		679	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		680	keys %data
		681	)
		682	{
		683	next if $data{$poster}{quoted} == 0;
		684	next if $data{$poster}{count} < 5;
		685	my $name = substr( $poster, 0, 63 );
		686	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		687	$data{$poster}{percent};
		688	last if ( ++$i == $count );
		689	}
		690	print "\n", "=" x 76, "\n";
7	PointedEar	691	}
5	PointedEar	692
22	PointedEar	693	########################################
		694	## Show bottom posters for original text
		695	########################################
		696	unless ( $skipSec{4} )
7	PointedEar	697	{
22	PointedEar	698	if ( keys %data < $topposters )
7	PointedEar	699	{
22	PointedEar	700	$count = keys %data;
		701	}
		702	else
		703	{
		704	$count = $topposters;
		705	}
		706	printf "%s\n",
		707	centred(
		708	__x(
		709	"Bottom {count} responders by original text (> 5 posts)",
		710	count => $topposters
		711	),
		712	76
		713	);
		714	print "=" x 76, "\n";
		715	my $i = 0;
		716	foreach my $poster (
		717	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		718	keys %data
		719	)
		720	{
		721	next if $data{$poster}{quoted} == 0;
		722	next if $data{$poster}{count} < 5;
		723	my $name = substr( $poster, 0, 63 );
		724	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		725	$data{$poster}{percent};
		726	last if ( ++$i == $count );
		727	}
		728	print "\n", "=" x 76, "\n";
		729	}
5	PointedEar	730
22	PointedEar	731	#####################################
		732	## Show threads by number of articles
		733	#####################################
		734	unless ( $skipSec{5} )
		735	{
		736	if ( keys %threads < $topthreads )
		737	{
		738	$count = keys %threads;
7	PointedEar	739	}
22	PointedEar	740	else
		741	{
		742	$count = $topthreads;
7	PointedEar	743	}
22	PointedEar	744	printf "%s\n",
		745	centred( __x( "Top {count} threads by no. of articles", count => $topthreads ),
		746	76 );
		747	print "=" x 76, "\n";
		748	my $i = 0;
		749	foreach my $thread (
		750	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		751	keys %threads
		752	)
7	PointedEar	753	{
22	PointedEar	754	my $name = substr( $thread, 0, 65 );
		755	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		756	$threads{$thread}{'count'};
		757	last if ( ++$i == $count );
7	PointedEar	758	}
22	PointedEar	759	print "\n", "=" x 76, "\n";
		760	}
		761
		762	##############################
		763	## Show threads by size in KiB
		764	##############################
		765	unless ( $skipSec{6} )
		766	{
		767	if ( keys %threads < $topthreads )
		768	{
		769	$count = keys %threads;
		770	}
7	PointedEar	771	else
		772	{
22	PointedEar	773	$count = $topthreads;
		774	}
		775	printf "%s\n",
		776	centred( __x( "Top {count} threads by size in KiB", count => $topthreads ),
		777	76 );
		778	print "=" x 76, "\n";
		779	my $i = 0;
		780	foreach my $thread (
		781	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		782	keys %threads
		783	)
		784	{
		785	my $name = substr( $thread, 0, 65 );
		786	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		787	$threads{$thread}{'size'} / 1024; #/
		788	last if ( ++$i == $count );
		789	}
		790	print "\n", "=" x 76, "\n";
		791	}
5	PointedEar	792
22	PointedEar	793	##################################
		794	## Show top 10 cross-posted groups
		795	##################################
		796	unless ( $skipSec{7} )
		797	{
		798	delete $crossposts{"$newsgroup_name"}; # don't include ours
		799	if ( keys %crossposts < $topcrossposts )
		800	{
		801	$count = keys %crossposts;
7	PointedEar	802	}
22	PointedEar	803	else
		804	{
		805	$count = $topcrossposts;
		806	}
		807	printf "%s\n",
		808	centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
		809	print "=" x 76, "\n";
		810	my $i = 0;
		811	foreach
		812	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		813	{
		814	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		815	$crossposts{$name};
		816	last if ( ++$i == $count );
		817	}
		818	print "\n", "=" x 76, "\n";
		819	}
5	PointedEar	820
22	PointedEar	821	#########################
		822	## Show agents and counts
		823	#########################
		824	unless ( $skipSec{8} )
		825	{
		826	if ( keys %agents < $topagents )
		827	{
		828	$count = keys %agents;
		829	}
		830	else
		831	{
		832	$count = $topagents;
		833	}
		834	printf "%s\n",
		835	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		836	76 );
		837	print "=" x 76, "\n";
		838	my $i = 0;
		839	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		840	{
		841	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
		842	$agents{$agent};
		843	last if ( ++$i == $count );
		844	}
		845	print "\n", "=" x 76, "\n";
		846	}
5	PointedEar	847
22	PointedEar	848	#######################
		849	## Show distinct agents
		850	#######################
		851	unless ( $skipSec{9} )
7	PointedEar	852	{
22	PointedEar	853	if ( keys %distinct_agent < $topagents )
		854	{
		855	$count = keys %distinct_agent;
7	PointedEar	856	}
22	PointedEar	857	else
		858	{
		859	$count = $topagents;
		860	}
		861	printf "%s\n",
		862	centred(
		863	__x( "Top {count} user agents by number of posts", count => $topagents ),
		864	76 );
		865	print "=" x 76, "\n";
		866	my $i = 0;
		867	foreach my $agent (
		868	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		869	keys %distinct_agent
		870	)
		871	{
		872	printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
		873	$distinct_agent{$agent},
		874	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		875	last if ( ++$i == $count );
		876	}
		877	print "\n", "=" x 76, "\n";
7	PointedEar	878	}
5	PointedEar	879
22	PointedEar	880	############################
		881	## Show timezones and counts
		882	############################
		883	unless ( $skipSec{10} )
7	PointedEar	884	{
22	PointedEar	885	if ( keys %tz < $toptz )
7	PointedEar	886	{
22	PointedEar	887	$count = keys %tz;
7	PointedEar	888	}
22	PointedEar	889	else
7	PointedEar	890	{
22	PointedEar	891	$count = $toptz;
7	PointedEar	892	}
22	PointedEar	893	printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 );
		894	print "=" x 76, "\n";
		895	my $i = 0;
		896	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		897	{
		898	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
		899	last if ( ++$i == $count );
		900	}
		901	print "\n", "=" x 76, "\n";
7	PointedEar	902	}
5	PointedEar	903	}
		904
22	PointedEar	905	## helper subs
		906
13	PointedEar	907	###############################
		908	## Right pad a string with '.'s
		909	###############################
7	PointedEar	910	sub rpad
		911	{
13	PointedEar	912	## Get text to pad, length to pad, pad chr
7	PointedEar	913	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	914
13	PointedEar	915	## DEBUG
22	PointedEar	916	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	917
7	PointedEar	918	if ( length($text) > $pad_len )
		919	{
		920	$text = substr( $text, 0, $pad_len );
		921	}
		922	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		923	return $padded;
5	PointedEar	924	}
		925
13	PointedEar	926	##################
		927	## Centre a string
		928	##################
7	PointedEar	929	sub centred
		930	{
		931	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		932	my $pad_len = ( $width - length($text) ) / 2; #/
		933	my $centred = " " x $pad_len . $text;
		934	return $centred;
5	PointedEar	935	}
		936
13	PointedEar	937	###########################
		938	## Put commas into a number
		939	###########################
22	PointedEar	940	# Get some of locale's numeric formatting parameters
		941	my ($thousands_sep, $grouping) =
		942	@{localeconv()}{'thousands_sep', 'grouping'};
		943	# Apply defaults if values are missing
		944	$thousands_sep = ',' unless $thousands_sep;
		945	# grouping and mon_grouping are packed lists
		946	# of small integers (characters) telling the
		947	# grouping (thousand_seps and mon_thousand_seps
		948	# being the group dividers) of numbers and
		949	# monetary quantities. The integers' meanings:
		950	# 255 means no more grouping, 0 means repeat
		951	# the previous grouping, 1-254 means use that
		952	# as the current grouping. Grouping goes from
		953	# right to left (low to high digits). In the
		954	# below we cheat slightly by never using anything
		955	# else than the first grouping (whatever that is).
		956	my @grouping;
		957	if ($grouping) {
		958	@grouping = unpack("C*", $grouping);
		959	} else {
		960	@grouping = (3);
		961	}
		962
7	PointedEar	963	sub commify
		964	{
13	PointedEar	965	local $_ = shift;
22	PointedEar	966	#1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
		967	$_ = int; # Chop non-integer part
		968	1 while
		969	s/(\d)(\d{$grouping[0]}($\|$thousands_sep))/$1$thousands_sep$2/;
7	PointedEar	970	return $_;
5	PointedEar	971	}
		972
13	PointedEar	973	################################################################
		974	## Returns a string with leading and trailing whitespace removed
		975	################################################################
7	PointedEar	976	sub clean
		977	{
		978	my $dirty = shift;
		979	my $clean = $dirty;
14	PointedEar	980	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	981
7	PointedEar	982	return $clean;
5	PointedEar	983	}
		984
7	PointedEar	985	sub usage
		986	{
22	PointedEar	987	print __"usage: newsstat.pl NEWS.GROUP\n";
7	PointedEar	988	exit 1;
5	PointedEar	989	}
		990
22	PointedEar	991	sub dmsg
7	PointedEar	992	{
22	PointedEar	993	print STDERR @_, "\n";
		994	}
		995
		996	sub dmsg2
		997	{
		998	my ( $level, @msg ) = @_;
		999	print STDERR @msg, "\n" if $level >= DEBUG;
		1000	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 33 - Rev 22