WebSVN - LCARS - Blame - Rev 24 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
23	PointedEar	4	require 5.004;
22	PointedEar	5
		6	#use diagnostics;
6	PointedEar	7	use utf8;
		8	use Encode;
		9
22	PointedEar	10	use constant DEBUG => 0;
		11
24	PointedEar	12	## newsstat.pl
		13	## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
		14	## Based on work by Garry Knight et al.
		15	##
		16	## This program is free software: you can redistribute it and/or modify
		17	## it under the terms of the GNU General Public License as published by
		18	## the Free Software Foundation, either version 3 of the License, or
		19	## (at your option) any later version.
		20	##
		21	## This program is distributed in the hope that it will be useful,
		22	## but WITHOUT ANY WARRANTY; without even the implied warranty of
		23	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		24	## GNU General Public License for more details.
		25	##
		26	## You should have received a copy of the GNU General Public License
		27	## along with this program. If not, see <http://www.gnu.org/licenses/>.
		28
13	PointedEar	29	## Print out all text to STDOUT UTF-8 encoded
		30	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	31	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	32
23	PointedEar	33	## L10n
		34	use locale ':not_characters';
		35
		36	# setlocale( LC_MESSAGES, '' );
		37	require Number::Format;
		38
		39	## i18n
		40	## FIXME: Automatically include resolved '.' in @INC
22	PointedEar	41	# print join "\n", @INC;
5	PointedEar	42
22	PointedEar	43	use Locale::TextDomain ('de.pointedears.newsstat');
23	PointedEar	44	use POSIX ('locale_h');
22	PointedEar	45	use Locale::Messages qw (bind_textdomain_filter
23	PointedEar	46	bind_textdomain_codeset
		47	turn_utf_8_on);
		48
		49	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
22	PointedEar	50	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23	PointedEar	51
22	PointedEar	52	require Mail::Message;
		53	require DateTime;
		54	require DateTime::Format::Mail;
23	PointedEar	55
		56	# See comments in previous example
		57	my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
		58	@{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
		59	'decimal_point' };
		60
		61	# Apply defaults if values are missing
		62	$thousands_sep = $mon_thousands_sep unless $thousands_sep;
		63	$thousands_sep = ' ' unless $thousands_sep;
		64
		65	# grouping and mon_grouping are packed lists
		66	# of small integers (characters) telling the
		67	# grouping (thousand_seps and mon_thousand_seps
		68	# being the group dividers) of numbers and
		69	# monetary quantities. The integers' meanings:
		70	# 255 means no more grouping, 0 means repeat
		71	# the previous grouping, 1-254 means use that
		72	# as the current grouping. Grouping goes from
		73	# right to left (low to high digits). In the
		74	# below we cheat slightly by never using anything
		75	# else than the first grouping (whatever that is).
		76	my @grouping;
		77	if ($grouping)
		78	{
		79	@grouping = unpack( "C*", $grouping );
		80	}
		81	else
		82	{
		83	@grouping = (3);
		84	}
		85
		86	## FIXME: Why don't the defaults work already?
		87	my $formatter = new Number::Format(
		88	-decimal_point => $decimal_point,
		89	-thousands_sep => $thousands_sep,
		90
		91	# -grouping => $grouping[0]
		92	);
		93
5	PointedEar	94	###################### USER CONFIGURATIONS ############################
		95
13	PointedEar	96	## The name of the group to do stats for
5	PointedEar	97	my $newsgroup_name = $ARGV[0];
22	PointedEar	98	$newsgroup_name or usage();
5	PointedEar	99
13	PointedEar	100	## Check for removal flags
5	PointedEar	101	my $ix;
		102	my $j;
		103	my %skipSec;
		104	my @skiplist;
		105	my $args = @ARGV;
7	PointedEar	106	for ( $ix = 1 ; $ix < $args ; $ix++ )
		107	{
		108	$j = $ix + 1;
		109	if ( $ARGV[$ix] eq "-x" )
		110	{
		111	@skiplist = split( ",", $ARGV[$j] );
		112	}
		113	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		114	{
		115	@skiplist = split( ",", $1 );
		116	}
5	PointedEar	117	}
7	PointedEar	118	foreach (@skiplist)
		119	{
5	PointedEar	120	$skipSec{$_} = 1;
		121	}
		122
13	PointedEar	123	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	124	my $news = "/var/spool/news/";
		125
22	PointedEar	126	## Number of top or bottom posters to show
		127	my $topposters = 20;
5	PointedEar	128
13	PointedEar	129	## Number of threads we want to know about
5	PointedEar	130	my $topthreads = 20;
		131
13	PointedEar	132	## Number of cross-posted threads to show
5	PointedEar	133	my $topcrossposts = 10;
		134
22	PointedEar	135	## Number of agents we list
		136	my $topagents = 10;
		137
13	PointedEar	138	## Number of time zones to show
5	PointedEar	139	my $toptz = 10;
		140
		141	###################### DATA STRUCTURES ######################
		142	my $group = $newsgroup_name;
		143	$group =~ s!\.!/!g;
22	PointedEar	144	my %data; # name, count, agent, total, orig, quoted
		145	my $totsize = 0; # holds total sizes of all files
		146	my %crossposts; # group, count
		147	my %threads; # subject, count
		148	my $replies = 0; # total no. of replies
		149	my $origposts = 0; # total no. of original posts
		150	my %tz; # timezones by count
		151	my $earliest; # earliest article we have found
		152	my $latest; # latest article we have found
		153	my $totheader = 0; # total size of header material
		154	my $totbody = 0; # total size of body material
		155	my $totsig = 0; # total size of sig material
		156	my $totquoted = 0; # total size of quoted material
		157	my $totorig = 0; # total size of original material
		158	my $totalposts; # total no. of posts considered
5	PointedEar	159	my %distinct_agent;
13	PointedEar	160
		161	## Used to hold counts of User Agents used
		162	my %agents = (
		163	"Compuserver" => 0,
		164	"Foorum" => 0,
7	PointedEar	165	"Forte Agent" => 0,
		166	"Forte Free Agent" => 0,
13	PointedEar	167	"Gnus" => 0,
		168	"KNode" => 0,
		169	"MacSOUP" => 0,
		170	"MT-NewsWatcher" => 0,
7	PointedEar	171	"MicroPlanet Gravity" => 0,
		172	"Microsoft Outlook Express" => 0,
13	PointedEar	173	"Microsoft Windows Mail" => 0,
		174	"Mozilla" => 0,
		175	"News Rover" => 0,
		176	"NN" => 0,
		177	"Pan" => 0,
		178	"rn" => 0,
7	PointedEar	179	"slrn" => 0,
13	PointedEar	180	"Sylpheed" => 0,
7	PointedEar	181	"tin" => 0,
13	PointedEar	182	"VSoup" => 0,
7	PointedEar	183	"WebTV" => 0,
22	PointedEar	184	"Xnews" => 0,
13	PointedEar	185	);
5	PointedEar	186
22	PointedEar	187	my $datetime_parser = DateTime::Format::Mail->new();
		188	$datetime_parser->loose();
5	PointedEar	189
22	PointedEar	190	my $today = DateTime->today( time_zone => 'UTC' );
		191	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		192	my $start = int $prev_month->strftime('%s');
		193	my $numdays = int DateTime->last_day_of_month(
		194	year => $prev_month->year(),
		195	month => $prev_month->month(),
		196	time_zone => $prev_month->time_zone(),
		197	)->day();
		198	my $end = int $today->clone()->set_day(1)->strftime('%s');
		199
		200	dmsg( $start, " to ", $end ) if DEBUG;
		201
		202	chdir("$news$group")
		203	or die __x(
		204	"Can't cd to {newsgroup}: {error}\n",
		205	newsgroup => "$news$group",
		206	error => $!
		207	);
		208	opendir( DIR, "." )
		209	or die __x(
		210	"Can't open {newsgroup}: {error}\n",
		211	newsgroup => "$news$group",
		212	error => $!
		213	);
		214
		215	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	216	{
22	PointedEar	217	next unless -f $filename; # only want real files
		218	next if ( $filename eq ".overview" ); # real articles only
		219
		220	get_article($filename); # read in the article
5	PointedEar	221	}
22	PointedEar	222	closedir(DIR); # finished with the directory
7	PointedEar	223
22	PointedEar	224	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		225
13	PointedEar	226	## Post-processing
22	PointedEar	227	count_agents(); # count agents, collapsing versions
		228	fix_percent();
5	PointedEar	229
22	PointedEar	230	write_data();
		231	display_results();
5	PointedEar	232
13	PointedEar	233	########################################
22	PointedEar	234	## Get current article's header and body
13	PointedEar	235	########################################
22	PointedEar	236	sub get_article
7	PointedEar	237	{
22	PointedEar	238	my $filename = shift;
5	PointedEar	239
22	PointedEar	240	open( my $FILE, '<', $filename )
		241	or
		242	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		243	my $msg = Mail::Message->read($FILE);
		244	my $timestamp = $msg->timestamp();
		245	my $date = $msg->study('Date');
5	PointedEar	246
22	PointedEar	247	## Disregard article if timestamp is not in range
		248	dmsg($timestamp) if DEBUG;
		249	if ( $timestamp < $start or $timestamp >= $end )
7	PointedEar	250	{
22	PointedEar	251	dmsg("Posting on $date ignored.") if DEBUG;
		252	return;
7	PointedEar	253	}
5	PointedEar	254
22	PointedEar	255	$totalposts++; # bump count of articles considered
5	PointedEar	256
22	PointedEar	257	## DEBUG
		258	dmsg($date) if DEBUG;
13	PointedEar	259
22	PointedEar	260	## get stats about the file itself
		261	my $filesize = -s $filename; # get total size of file
		262	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	263
22	PointedEar	264	if ( ( not defined $earliest ) or $timestamp < $earliest )
7	PointedEar	265	{
22	PointedEar	266	$earliest = $timestamp;
7	PointedEar	267	}
22	PointedEar	268	elsif ( ( not defined $latest ) or $timestamp > $latest )
7	PointedEar	269	{
22	PointedEar	270	$latest = $timestamp;
7	PointedEar	271	}
13	PointedEar	272
22	PointedEar	273	#print "timestamp: $timestamp\n";
5	PointedEar	274
22	PointedEar	275	## count header size
		276	$totheader += $msg->head()->size();
5	PointedEar	277
22	PointedEar	278	## get the poster's name (MIME-decoded, in UTF-8)
		279	my $poster = $msg->study('From');
		280	if ( defined $poster )
7	PointedEar	281	{
22	PointedEar	282	## Convert old to new format
		283	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	284
22	PointedEar	285	## Collapse whitespace
		286	$poster =~ s/\s+/ /g;
5	PointedEar	287
22	PointedEar	288	## Remove outer quotes; TODO: observe RFC 5322 strictly
		289	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	290
22	PointedEar	291	## DEBUG
		292	dmsg($poster) if DEBUG;
7	PointedEar	293
22	PointedEar	294	## seen this one before?
		295	if ( !defined( $data{$poster} ) )
		296	{
		297	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		298	$data{$poster}{'orig'} = 0;
		299	$data{$poster}{'quoted'} = 0;
		300	}
		301	$data{$poster}{'count'}++; # bump count for this poster
		302	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	303
22	PointedEar	304	## The User-Agent and/or X-Newsreader fields
		305	## for User-Agent by poster
		306	my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
		307	if ( defined $ua )
7	PointedEar	308	{
22	PointedEar	309	$data{$poster}{'agent'} = $ua;
		310
		311	## DEBUG
		312	dmsg($ua) if DEBUG;
7	PointedEar	313	}
5	PointedEar	314
22	PointedEar	315	## The User Agent for User-Agent by number of posts
		316	get_agent($msg);
5	PointedEar	317
22	PointedEar	318	## Get all cross-posted newsgroups
		319	for ( split( /,/, $msg->study('Newsgroups') ) )
		320	{
		321	$crossposts{$_}++; # bump count for each
		322	}
5	PointedEar	323
22	PointedEar	324	## Get threads
		325	my $thread = $msg->study('Subject');
		326	$thread =~ s/^re:\s+//i; # Remove Re: or re: at start
		327	$thread =~ s/\s+/ /g; # collapse whitespace
		328	$threads{$thread}{'count'}++; # bump count of this subject
		329	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	330
22	PointedEar	331	## Is this an original post or a reply?
		332	if ( defined $msg->study('References') )
7	PointedEar	333	{
22	PointedEar	334	$replies++;
7	PointedEar	335	}
22	PointedEar	336	else
7	PointedEar	337	{
22	PointedEar	338	$origposts++;
7	PointedEar	339	}
22	PointedEar	340
		341	## Get the time zone
		342	my $datetime = $datetime_parser->parse_datetime($date);
		343	my $tz = $datetime->strftime('%z');
		344	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		345	$tz{$tz}++;
		346
		347	## DEBUG
		348	dmsg($tz) if DEBUG;
		349
		350	#### Now analyse the body text ####
		351	my $body = $msg->body();
		352
		353	my $insig = 0;
		354	my @body = $body->lines;
		355	for (@body)
7	PointedEar	356	{
22	PointedEar	357	$totbody += length($_); # bump total body size
		358	next if (m{^$>}o); # don't count blank lines in body
		359	if ( $insig == 1 )
		360	{
		361
		362	# bump total sig size
		363	$totsig += length($_);
		364	}
		365	## are we in a quote line?
		366	## Bill Unruh uses ] quotes, and another poster uses ::
		367	elsif ( m{^\s[>\]]}o or m{^\s::}o )
		368	{
		369	## bump count of quoted chrs
		370	$data{$poster}{'quoted'} += length($_);
		371	$totquoted += length($_);
		372	}
		373	elsif (/^-- $/)
		374	{
		375	$insig = 1;
		376	}
		377	else
		378	{
		379	## We must be processing an original line
		380	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		381	$totorig += length($_);
		382	}
7	PointedEar	383	}
22	PointedEar	384
		385	# end for (@body)
		386	}
		387
		388	close($FILE);
		389	}
		390
		391	sub get_agent
		392	{
		393	my $msg = shift;
		394
		395	my $ua =
		396	$msg->study('User-Agent')
		397	or $msg->study('X-Newsreader')
		398	or $msg->study('X-Mailer');
		399	if ( not defined $ua )
		400	{
		401	my $org = $msg->study('Organization');
		402	if ( defined $org
		403	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	404	{
22	PointedEar	405	$ua = $org;
7	PointedEar	406	}
22	PointedEar	407	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	408	{
22	PointedEar	409	$ua = "Pine";
		410	}
7	PointedEar	411	}
5	PointedEar	412
22	PointedEar	413	## Hopefully found UA, else set to unknown
		414	if ( not defined $ua )
		415	{
		416	$ua = __ "unknown";
		417	}
5	PointedEar	418
22	PointedEar	419	$ua = clean($ua);
		420
		421	my $raw = $ua;
		422	my $agent = $raw;
		423
		424	## strip http
		425	if ( $raw =~ /.http./ )
7	PointedEar	426	{
22	PointedEar	427	$raw =~ s!posted via!!i;
		428	$raw =~ s!http://!!g;
		429	$raw =~ s!/!!g;
		430	$raw =~ s! !!g;
		431	}
5	PointedEar	432
22	PointedEar	433	## Fix Outlook from Mac
		434	if ( $raw =~ /^microsoft/i )
		435	{
		436	$raw =~ s/-/ /g;
		437	}
5	PointedEar	438
22	PointedEar	439	## Pick out the popular agents
		440	if ( $raw =~ /(outlook express)/i
		441	\|\| $raw =~ /(windows mail)/i
		442	\|\| $raw =~ /(microplanet gravity)/i
		443	\|\| $raw =~ /(news rover)/i
		444	\|\| $raw =~ /(forte agent)/i
		445	\|\| $raw =~ /(forte free agent)/i )
		446	{
		447	$agent = $1;
		448	}
		449	elsif (
		450	$raw =~ /^(
5	PointedEar	451	pan
		452	\|sylpheed
		453	\|slrn
		454	\|mozilla
		455	\|knode
		456	\|tin
		457	\|hamster
		458	\|xrn
		459	\|xnews
		460	\|aol
		461	\|gnus
		462	\|krn
		463	\|macsoup
		464	\|messenger
		465	\|openxp
		466	\|pine
		467	\|thoth
		468	\|turnpike
		469	\|winvn
		470	\|vsoup
		471	\|google
		472	\|supernews
		473	\|nn
		474	\|rn
		475	\|007
		476	\|webtv
		477	\|compuserve
7	PointedEar	478	)/ix
22	PointedEar	479	)
		480	{
		481	$agent = $1;
		482	}
		483	else
		484	{
		485	## Clean up unknown agents
		486	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	487	{
		488	$agent = $1;
		489	}
22	PointedEar	490	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	491	{
22	PointedEar	492	$agent = $1;
		493	}
		494	}
		495
		496	$distinct_agent{$agent}++;
		497	return $agent;
		498	}
		499	## get_agent
		500
		501	#########################################
		502	## Count the User-Agents used, collapsing
		503	## different versions into one per agent.
		504	#########################################
		505	sub count_agents
		506	{
		507	POSTER:
		508	foreach my $poster ( keys %data )
		509	{
		510	foreach my $agent_name ( keys %distinct_agent )
		511	{ # check against known ones
		512	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	513	{
22	PointedEar	514	$agents{$agent_name}++;
		515	next POSTER;
7	PointedEar	516	}
		517	}
22	PointedEar	518	$agents{ $data{$poster}{'agent'} }++;
		519	}
		520	} # count_agents
7	PointedEar	521
22	PointedEar	522	#############################################
		523	## Set orig/total percentages for all posters
		524	#############################################
		525	sub fix_percent
		526	{
		527	foreach my $poster ( keys %data )
		528	{
		529	my $percent = 100;
		530	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		531	{
		532	$percent =
		533	$data{$poster}{'orig'} * 100 /
		534	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		535	}
		536	elsif ( $data{$poster}{'orig'} == 0 )
		537	{
		538	$percent = 0;
		539	}
		540	$data{$poster}{'percent'} = $percent;
5	PointedEar	541	}
22	PointedEar	542	}
		543	## fix_percent
5	PointedEar	544
22	PointedEar	545	##################################
		546	## Write data structures to a file
		547	##################################
		548	sub write_data
		549	{
		550	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		551	or die __x( "Can't create XDATA: {error}\n", error => $! );
		552	print $OUTF "Data collected from $newsgroup_name\n\n";
		553	print $OUTF
		554	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		555	foreach my $name ( keys %data )
7	PointedEar	556	{
22	PointedEar	557	print $OUTF
		558	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	559	}
22	PointedEar	560	print $OUTF
		561	"============================================================================\n";
		562	print $OUTF "Thread subjects\n";
		563	print $OUTF
		564	"----------------------------------------------------------------------------\n";
		565	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		566	{
		567	print $OUTF
		568	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		569	}
		570	print $OUTF
		571	"============================================================================\n";
		572	print $OUTF "Cross-posts\n";
		573	print $OUTF
		574	"----------------------------------------------------------------------------\n";
		575	foreach my $name ( sort keys %crossposts )
		576	{
		577	print $OUTF "$name : $crossposts{$name}\n";
		578	}
		579	print $OUTF
		580	"============================================================================\n";
		581	print $OUTF "User agents\n";
		582	print $OUTF
		583	"----------------------------------------------------------------------------\n";
		584	foreach my $name ( sort keys %agents )
		585	{
		586	print $OUTF "$name : $agents{$name}\n";
		587	}
		588	print $OUTF
		589	"============================================================================\n";
		590	print $OUTF "Time zones\n";
		591	print $OUTF
		592	"----------------------------------------------------------------------------\n";
		593	foreach my $name ( sort keys %tz )
		594	{
		595	print $OUTF "$name : $tz{$name}\n";
		596	}
		597	close $OUTF;
		598	} # write_data
5	PointedEar	599
22	PointedEar	600	sub display_results
		601	{
		602	#################### DISPLAY RESULTS #####################
23	PointedEar	603	println( "=" x 76 );
22	PointedEar	604	printf "%s\n",
		605	centred(
		606	__x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
		607	76 );
23	PointedEar	608	println( "=" x 76 );
22	PointedEar	609	printf "%s\n",
		610	centred(
23	PointedEar	611	__(
		612	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
		613	),
22	PointedEar	614	76
		615	);
		616	print "\n\n";
23	PointedEar	617	printf __"Total posts considered: %s over %d days\n",
		618	$formatter->format_number($totalposts),
		619	$formatter->format_number($numdays);
		620	my $time_locale = setlocale(LC_TIME);
22	PointedEar	621	my $earliest_datetime = DateTime->from_epoch(
23	PointedEar	622	epoch => $earliest,
		623	locale => $time_locale,
22	PointedEar	624	time_zone => 'UTC',
		625	);
		626	my $latest_datetime = DateTime->from_epoch(
23	PointedEar	627	epoch => $latest,
		628	locale => $time_locale,
22	PointedEar	629	time_zone => 'UTC',
		630	);
		631	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23	PointedEar	632	printf __"Earliest article: %s\n",
		633	$earliest_datetime->strftime($datetime_format);
		634	printf __"Latest article: %s\n",
		635	$latest_datetime->strftime($datetime_format);
		636	printf __"Original articles: %s; replies: %s\n",
		637	$formatter->format_number($origposts),
		638	$formatter->format_number($replies);
		639	printf __"Total size of posts: %s bytes (%s)" . "\n",
		640	$formatter->format_number($totsize),
		641	$formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
		642	printf __"Average %s articles per day, %s per day, %s bytes per article\n",
		643	$formatter->format_number( int( $totalposts / $numdays ) ),
		644	$formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
		645	$formatter->format_number( int( $totsize / $totalposts ) );
		646
22	PointedEar	647	my $count = keys %data;
23	PointedEar	648	printf __"Total headers: %s; bodies: %s\n",
		649	$formatter->format_bytes(
		650	$totheader, ( 'precision' => 1, 'mode' => 'iec' )
		651	),
		652	$formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	653	printf __
23	PointedEar	654	"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
		655	$formatter->format_bytes(
		656	$totquoted, ( 'precision' => 1, 'mode' => 'iec' )
		657	),
		658	$formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
		659	$formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
		660	$formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
		661	printf __"Total number of posters: %s, average %s per poster\n",
		662	$formatter->format_number($count),
		663	$formatter->format_bytes( $totsize / $count,
		664	( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	665	$count = keys %threads;
23	PointedEar	666	printf __"Total number of threads: %s, average %s per thread\n",
		667	$formatter->format_number($count),
		668	$formatter->format_bytes( $totsize / $count,
		669	( 'precision' => 1, 'mode' => 'iec' ) );
		670	printf __"Total number of user agents: %d\n",
		671	$formatter->format_number( scalar keys %agents );
		672	print "\n", "=" x 76, "\n" ;
22	PointedEar	673	########################################
		674	## Show posters by article count Sec 1;
		675	########################################
		676	unless ( $skipSec{1} )
7	PointedEar	677	{
22	PointedEar	678	if ( keys %data < $topposters )
		679	{
		680	$count = keys %data;
		681	}
		682	else
		683	{
		684	$count = $topposters;
		685	}
		686	printf "%s\n",
		687	centred(
23	PointedEar	688	__x( "Top {count} posters by number of articles", count => $topposters ),
		689	76
		690	);
22	PointedEar	691	print "=" x 76, "\n";
		692	my $i = 0;
		693	foreach
		694	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		695	{
		696	my $name = substr( $poster, 0, 65 );
		697	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		698	$data{$poster}{count};
		699	last if ( ++$i == $count );
		700	}
		701	print "\n", "=" x 76, "\n";
7	PointedEar	702	}
22	PointedEar	703
		704	######################################
		705	## Show posters by size in KiB Sec 2;
		706	######################################
		707	unless ( $skipSec{2} )
7	PointedEar	708	{
22	PointedEar	709	if ( keys %data < $topposters )
		710	{
		711	$count = keys %data;
		712	}
		713	else
		714	{
		715	$count = $topposters;
		716	}
		717	printf "%s\n",
		718	centred(
		719	__x( "Top {count} posters by article size in KiB", count => $topposters ),
23	PointedEar	720	76
		721	);
22	PointedEar	722	print "=" x 76, "\n";
		723	my $i = 0;
		724	foreach
		725	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		726	{
		727	my $name = substr( $poster, 0, 62 );
		728	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
		729	$data{$poster}{size} / 1024; #/
		730	last if ( ++$i == $count );
		731	}
		732	print "\n", "=" x 76, "\n";
7	PointedEar	733	}
5	PointedEar	734
22	PointedEar	735	#####################################
		736	## Show top posters for original text
		737	#####################################
		738	unless ( $skipSec{3} )
7	PointedEar	739	{
22	PointedEar	740	if ( keys %data < $topposters )
		741	{
		742	$count = keys %data;
		743	}
		744	else
		745	{
		746	$count = $topposters;
		747	}
		748	printf "%s\n",
		749	centred(
		750	__x(
		751	"Top {count} responders by original text (> 5 posts)",
		752	count => $topposters
		753	),
		754	76
		755	);
		756	print "=" x 76, "\n";
		757	my $i = 0;
		758	foreach my $poster (
		759	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		760	keys %data
		761	)
		762	{
		763	next if $data{$poster}{quoted} == 0;
		764	next if $data{$poster}{count} < 5;
		765	my $name = substr( $poster, 0, 63 );
		766	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		767	$data{$poster}{percent};
		768	last if ( ++$i == $count );
		769	}
		770	print "\n", "=" x 76, "\n";
7	PointedEar	771	}
5	PointedEar	772
22	PointedEar	773	########################################
		774	## Show bottom posters for original text
		775	########################################
		776	unless ( $skipSec{4} )
7	PointedEar	777	{
22	PointedEar	778	if ( keys %data < $topposters )
7	PointedEar	779	{
22	PointedEar	780	$count = keys %data;
		781	}
		782	else
		783	{
		784	$count = $topposters;
		785	}
		786	printf "%s\n",
		787	centred(
		788	__x(
		789	"Bottom {count} responders by original text (> 5 posts)",
		790	count => $topposters
		791	),
		792	76
		793	);
		794	print "=" x 76, "\n";
		795	my $i = 0;
		796	foreach my $poster (
		797	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		798	keys %data
		799	)
		800	{
		801	next if $data{$poster}{quoted} == 0;
		802	next if $data{$poster}{count} < 5;
		803	my $name = substr( $poster, 0, 63 );
		804	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
		805	$data{$poster}{percent};
		806	last if ( ++$i == $count );
		807	}
		808	print "\n", "=" x 76, "\n";
		809	}
5	PointedEar	810
22	PointedEar	811	#####################################
		812	## Show threads by number of articles
		813	#####################################
		814	unless ( $skipSec{5} )
		815	{
		816	if ( keys %threads < $topthreads )
		817	{
		818	$count = keys %threads;
7	PointedEar	819	}
22	PointedEar	820	else
		821	{
		822	$count = $topthreads;
7	PointedEar	823	}
22	PointedEar	824	printf "%s\n",
23	PointedEar	825	centred(
		826	__x( "Top {count} threads by no. of articles", count => $topthreads ),
22	PointedEar	827	76 );
		828	print "=" x 76, "\n";
		829	my $i = 0;
		830	foreach my $thread (
		831	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		832	keys %threads
		833	)
7	PointedEar	834	{
22	PointedEar	835	my $name = substr( $thread, 0, 65 );
		836	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		837	$threads{$thread}{'count'};
		838	last if ( ++$i == $count );
7	PointedEar	839	}
22	PointedEar	840	print "\n", "=" x 76, "\n";
		841	}
		842
		843	##############################
		844	## Show threads by size in KiB
		845	##############################
		846	unless ( $skipSec{6} )
		847	{
		848	if ( keys %threads < $topthreads )
		849	{
		850	$count = keys %threads;
		851	}
7	PointedEar	852	else
		853	{
22	PointedEar	854	$count = $topthreads;
		855	}
		856	printf "%s\n",
23	PointedEar	857	centred(
		858	__x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22	PointedEar	859	print "=" x 76, "\n";
		860	my $i = 0;
		861	foreach my $thread (
		862	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		863	keys %threads
		864	)
		865	{
		866	my $name = substr( $thread, 0, 65 );
		867	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		868	$threads{$thread}{'size'} / 1024; #/
		869	last if ( ++$i == $count );
		870	}
		871	print "\n", "=" x 76, "\n";
		872	}
5	PointedEar	873
22	PointedEar	874	##################################
		875	## Show top 10 cross-posted groups
		876	##################################
		877	unless ( $skipSec{7} )
		878	{
		879	delete $crossposts{"$newsgroup_name"}; # don't include ours
		880	if ( keys %crossposts < $topcrossposts )
		881	{
		882	$count = keys %crossposts;
7	PointedEar	883	}
22	PointedEar	884	else
		885	{
		886	$count = $topcrossposts;
		887	}
		888	printf "%s\n",
23	PointedEar	889	centred(
		890	__x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22	PointedEar	891	print "=" x 76, "\n";
		892	my $i = 0;
		893	foreach
		894	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		895	{
		896	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
		897	$crossposts{$name};
		898	last if ( ++$i == $count );
		899	}
		900	print "\n", "=" x 76, "\n";
		901	}
5	PointedEar	902
22	PointedEar	903	#########################
		904	## Show agents and counts
		905	#########################
		906	unless ( $skipSec{8} )
		907	{
		908	if ( keys %agents < $topagents )
		909	{
		910	$count = keys %agents;
		911	}
		912	else
		913	{
		914	$count = $topagents;
		915	}
		916	printf "%s\n",
		917	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		918	76 );
		919	print "=" x 76, "\n";
		920	my $i = 0;
		921	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		922	{
		923	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
		924	$agents{$agent};
		925	last if ( ++$i == $count );
		926	}
		927	print "\n", "=" x 76, "\n";
		928	}
5	PointedEar	929
22	PointedEar	930	#######################
		931	## Show distinct agents
		932	#######################
		933	unless ( $skipSec{9} )
7	PointedEar	934	{
22	PointedEar	935	if ( keys %distinct_agent < $topagents )
		936	{
		937	$count = keys %distinct_agent;
7	PointedEar	938	}
22	PointedEar	939	else
		940	{
		941	$count = $topagents;
		942	}
		943	printf "%s\n",
		944	centred(
		945	__x( "Top {count} user agents by number of posts", count => $topagents ),
23	PointedEar	946	76
		947	);
22	PointedEar	948	print "=" x 76, "\n";
		949	my $i = 0;
		950	foreach my $agent (
		951	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		952	keys %distinct_agent
		953	)
		954	{
		955	printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
		956	$distinct_agent{$agent},
		957	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		958	last if ( ++$i == $count );
		959	}
		960	print "\n", "=" x 76, "\n";
7	PointedEar	961	}
5	PointedEar	962
22	PointedEar	963	############################
		964	## Show timezones and counts
		965	############################
		966	unless ( $skipSec{10} )
7	PointedEar	967	{
22	PointedEar	968	if ( keys %tz < $toptz )
7	PointedEar	969	{
22	PointedEar	970	$count = keys %tz;
7	PointedEar	971	}
22	PointedEar	972	else
7	PointedEar	973	{
22	PointedEar	974	$count = $toptz;
7	PointedEar	975	}
23	PointedEar	976	printf "%s\n",
		977	centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22	PointedEar	978	print "=" x 76, "\n";
		979	my $i = 0;
		980	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		981	{
		982	printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
		983	last if ( ++$i == $count );
		984	}
		985	print "\n", "=" x 76, "\n";
7	PointedEar	986	}
5	PointedEar	987	}
		988
22	PointedEar	989	## helper subs
		990
13	PointedEar	991	###############################
		992	## Right pad a string with '.'s
		993	###############################
7	PointedEar	994	sub rpad
		995	{
13	PointedEar	996	## Get text to pad, length to pad, pad chr
7	PointedEar	997	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	998
13	PointedEar	999	## DEBUG
22	PointedEar	1000	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	1001
7	PointedEar	1002	if ( length($text) > $pad_len )
		1003	{
		1004	$text = substr( $text, 0, $pad_len );
		1005	}
		1006	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		1007	return $padded;
5	PointedEar	1008	}
		1009
13	PointedEar	1010	##################
		1011	## Centre a string
		1012	##################
7	PointedEar	1013	sub centred
		1014	{
		1015	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		1016	my $pad_len = ( $width - length($text) ) / 2; #/
		1017	my $centred = " " x $pad_len . $text;
		1018	return $centred;
5	PointedEar	1019	}
		1020
13	PointedEar	1021	###########################
		1022	## Put commas into a number
		1023	###########################
7	PointedEar	1024	sub commify
		1025	{
13	PointedEar	1026	local $_ = shift;
23	PointedEar	1027	my $number = $_;
		1028	$_ = int; # Chop non-integer part
22	PointedEar	1029	1 while
23	PointedEar	1030	s/([-+]?\d)(\d{$grouping[0]}($\|\Q$thousands_sep\E))/$1$thousands_sep$2/;
		1031	my $int_part = $_;
		1032	my $real_part = '';
		1033	if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
		1034	{
		1035	$real_part = $1;
		1036	}
		1037	return $int_part . $real_part;
5	PointedEar	1038	}
		1039
13	PointedEar	1040	################################################################
		1041	## Returns a string with leading and trailing whitespace removed
		1042	################################################################
7	PointedEar	1043	sub clean
		1044	{
		1045	my $dirty = shift;
		1046	my $clean = $dirty;
14	PointedEar	1047	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	1048
7	PointedEar	1049	return $clean;
5	PointedEar	1050	}
		1051
7	PointedEar	1052	sub usage
		1053	{
23	PointedEar	1054	println( __ "usage: newsstat.pl NEWS.GROUP" );
7	PointedEar	1055	exit 1;
5	PointedEar	1056	}
		1057
22	PointedEar	1058	sub dmsg
7	PointedEar	1059	{
22	PointedEar	1060	print STDERR @_, "\n";
		1061	}
		1062
		1063	sub dmsg2
		1064	{
		1065	my ( $level, @msg ) = @_;
		1066	print STDERR @msg, "\n" if $level >= DEBUG;
		1067	}
23	PointedEar	1068
		1069	sub println
		1070	{
		1071	print @_, "\n";
		1072	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl - Rev 24