WebSVN - LCARS - Blame - Rev 48 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
23	PointedEar	4	require 5.004;
22	PointedEar	5
		6	#use diagnostics;
6	PointedEar	7	use utf8;
		8
30	PointedEar	9	## NOTE:
		10	## Enable and remove binmode when utf8::all has actually become lexically scoped
		11	# use utf8:all;
		12
22	PointedEar	13	use constant DEBUG => 0;
		14
24	PointedEar	15	## newsstat.pl
		16	## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
		17	## Based on work by Garry Knight et al.
31	PointedEar	18	##
24	PointedEar	19	## This program is free software: you can redistribute it and/or modify
		20	## it under the terms of the GNU General Public License as published by
		21	## the Free Software Foundation, either version 3 of the License, or
		22	## (at your option) any later version.
31	PointedEar	23	##
24	PointedEar	24	## This program is distributed in the hope that it will be useful,
		25	## but WITHOUT ANY WARRANTY; without even the implied warranty of
		26	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		27	## GNU General Public License for more details.
31	PointedEar	28	##
24	PointedEar	29	## You should have received a copy of the GNU General Public License
		30	## along with this program. If not, see <http://www.gnu.org/licenses/>.
		31
13	PointedEar	32	## Print out all text to STDOUT UTF-8 encoded
		33	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	34	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	35
23	PointedEar	36	## L10n
		37	use locale ':not_characters';
		38
		39	# setlocale( LC_MESSAGES, '' );
		40	require Number::Format;
		41
		42	## i18n
		43	## FIXME: Automatically include resolved '.' in @INC
22	PointedEar	44	# print join "\n", @INC;
5	PointedEar	45
22	PointedEar	46	use Locale::TextDomain ('de.pointedears.newsstat');
23	PointedEar	47	use POSIX ('locale_h');
22	PointedEar	48	use Locale::Messages qw (bind_textdomain_filter
23	PointedEar	49	bind_textdomain_codeset
		50	turn_utf_8_on);
		51
		52	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
22	PointedEar	53	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23	PointedEar	54
22	PointedEar	55	require Mail::Message;
		56	require DateTime;
		57	require DateTime::Format::Mail;
23	PointedEar	58
		59	# See comments in previous example
		60	my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
		61	@{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
		62	'decimal_point' };
		63
		64	# Apply defaults if values are missing
		65	$thousands_sep = $mon_thousands_sep unless $thousands_sep;
		66	$thousands_sep = ' ' unless $thousands_sep;
		67
		68	# grouping and mon_grouping are packed lists
		69	# of small integers (characters) telling the
		70	# grouping (thousand_seps and mon_thousand_seps
		71	# being the group dividers) of numbers and
		72	# monetary quantities. The integers' meanings:
		73	# 255 means no more grouping, 0 means repeat
		74	# the previous grouping, 1-254 means use that
		75	# as the current grouping. Grouping goes from
		76	# right to left (low to high digits). In the
		77	# below we cheat slightly by never using anything
		78	# else than the first grouping (whatever that is).
		79	my @grouping;
		80	if ($grouping)
		81	{
		82	@grouping = unpack( "C*", $grouping );
		83	}
		84	else
		85	{
		86	@grouping = (3);
		87	}
		88
		89	## FIXME: Why don't the defaults work already?
		90	my $formatter = new Number::Format(
		91	-decimal_point => $decimal_point,
		92	-thousands_sep => $thousands_sep,
		93	# -grouping => $grouping[0]
		94	);
		95
5	PointedEar	96	###################### USER CONFIGURATIONS ############################
		97
13	PointedEar	98	## The name of the group to do stats for
5	PointedEar	99	my $newsgroup_name = $ARGV[0];
31	PointedEar	100	$newsgroup_name // usage();
5	PointedEar	101
13	PointedEar	102	## Check for removal flags
5	PointedEar	103	my $ix;
		104	my $j;
		105	my %skipSec;
		106	my @skiplist;
		107	my $args = @ARGV;
7	PointedEar	108	for ( $ix = 1 ; $ix < $args ; $ix++ )
		109	{
		110	$j = $ix + 1;
		111	if ( $ARGV[$ix] eq "-x" )
		112	{
		113	@skiplist = split( ",", $ARGV[$j] );
		114	}
		115	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		116	{
		117	@skiplist = split( ",", $1 );
		118	}
5	PointedEar	119	}
7	PointedEar	120	foreach (@skiplist)
		121	{
5	PointedEar	122	$skipSec{$_} = 1;
		123	}
		124
13	PointedEar	125	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	126	my $news = "/var/spool/news/";
		127
22	PointedEar	128	## Number of top or bottom posters to show
		129	my $topposters = 20;
5	PointedEar	130
13	PointedEar	131	## Number of threads we want to know about
5	PointedEar	132	my $topthreads = 20;
		133
13	PointedEar	134	## Number of cross-posted threads to show
5	PointedEar	135	my $topcrossposts = 10;
		136
22	PointedEar	137	## Number of agents we list
		138	my $topagents = 10;
		139
13	PointedEar	140	## Number of time zones to show
5	PointedEar	141	my $toptz = 10;
		142
		143	###################### DATA STRUCTURES ######################
		144	my $group = $newsgroup_name;
		145	$group =~ s!\.!/!g;
22	PointedEar	146	my %data; # name, count, agent, total, orig, quoted
		147	my $totsize = 0; # holds total sizes of all files
		148	my %crossposts; # group, count
		149	my %threads; # subject, count
		150	my $replies = 0; # total no. of replies
		151	my $origposts = 0; # total no. of original posts
		152	my %tz; # timezones by count
		153	my $earliest; # earliest article we have found
		154	my $latest; # latest article we have found
		155	my $totheader = 0; # total size of header material
		156	my $totbody = 0; # total size of body material
		157	my $totsig = 0; # total size of sig material
		158	my $totquoted = 0; # total size of quoted material
		159	my $totorig = 0; # total size of original material
		160	my $totalposts; # total no. of posts considered
5	PointedEar	161	my %distinct_agent;
13	PointedEar	162
		163	## Used to hold counts of User Agents used
		164	my %agents = (
		165	"Compuserver" => 0,
		166	"Foorum" => 0,
7	PointedEar	167	"Forte Agent" => 0,
		168	"Forte Free Agent" => 0,
13	PointedEar	169	"Gnus" => 0,
		170	"KNode" => 0,
		171	"MacSOUP" => 0,
		172	"MT-NewsWatcher" => 0,
7	PointedEar	173	"MicroPlanet Gravity" => 0,
		174	"Microsoft Outlook Express" => 0,
13	PointedEar	175	"Microsoft Windows Mail" => 0,
		176	"Mozilla" => 0,
		177	"News Rover" => 0,
		178	"NN" => 0,
		179	"Pan" => 0,
		180	"rn" => 0,
7	PointedEar	181	"slrn" => 0,
13	PointedEar	182	"Sylpheed" => 0,
7	PointedEar	183	"tin" => 0,
13	PointedEar	184	"VSoup" => 0,
7	PointedEar	185	"WebTV" => 0,
22	PointedEar	186	"Xnews" => 0,
13	PointedEar	187	);
5	PointedEar	188
22	PointedEar	189	my $datetime_parser = DateTime::Format::Mail->new();
		190	$datetime_parser->loose();
5	PointedEar	191
22	PointedEar	192	my $today = DateTime->today( time_zone => 'UTC' );
		193	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		194	my $start = int $prev_month->strftime('%s');
		195	my $numdays = int DateTime->last_day_of_month(
		196	year => $prev_month->year(),
		197	month => $prev_month->month(),
		198	time_zone => $prev_month->time_zone(),
		199	)->day();
		200	my $end = int $today->clone()->set_day(1)->strftime('%s');
		201
		202	dmsg( $start, " to ", $end ) if DEBUG;
		203
		204	chdir("$news$group")
		205	or die __x(
		206	"Can't cd to {newsgroup}: {error}\n",
		207	newsgroup => "$news$group",
		208	error => $!
		209	);
		210	opendir( DIR, "." )
		211	or die __x(
		212	"Can't open {newsgroup}: {error}\n",
		213	newsgroup => "$news$group",
		214	error => $!
		215	);
		216
		217	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	218	{
22	PointedEar	219	next unless -f $filename; # only want real files
		220	next if ( $filename eq ".overview" ); # real articles only
		221
		222	get_article($filename); # read in the article
5	PointedEar	223	}
22	PointedEar	224	closedir(DIR); # finished with the directory
7	PointedEar	225
22	PointedEar	226	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		227
13	PointedEar	228	## Post-processing
22	PointedEar	229	count_agents(); # count agents, collapsing versions
		230	fix_percent();
5	PointedEar	231
22	PointedEar	232	write_data();
		233	display_results();
5	PointedEar	234
13	PointedEar	235	########################################
22	PointedEar	236	## Get current article's header and body
13	PointedEar	237	########################################
22	PointedEar	238	sub get_article
7	PointedEar	239	{
22	PointedEar	240	my $filename = shift;
5	PointedEar	241
22	PointedEar	242	open( my $FILE, '<', $filename )
		243	or
		244	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		245	my $msg = Mail::Message->read($FILE);
		246	my $timestamp = $msg->timestamp();
		247	my $date = $msg->study('Date');
5	PointedEar	248
22	PointedEar	249	## Disregard article if timestamp is not in range
		250	dmsg($timestamp) if DEBUG;
31	PointedEar	251	if ( $timestamp < $start \|\| $timestamp >= $end )
7	PointedEar	252	{
22	PointedEar	253	dmsg("Posting on $date ignored.") if DEBUG;
		254	return;
7	PointedEar	255	}
5	PointedEar	256
22	PointedEar	257	$totalposts++; # bump count of articles considered
5	PointedEar	258
22	PointedEar	259	## DEBUG
		260	dmsg($date) if DEBUG;
13	PointedEar	261
22	PointedEar	262	## get stats about the file itself
		263	my $filesize = -s $filename; # get total size of file
		264	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	265
31	PointedEar	266	if ( ( not defined $earliest ) \|\| $timestamp < $earliest )
7	PointedEar	267	{
22	PointedEar	268	$earliest = $timestamp;
7	PointedEar	269	}
31	PointedEar	270	elsif ( ( not defined $latest ) \|\| $timestamp > $latest )
7	PointedEar	271	{
22	PointedEar	272	$latest = $timestamp;
7	PointedEar	273	}
13	PointedEar	274
22	PointedEar	275	#print "timestamp: $timestamp\n";
5	PointedEar	276
22	PointedEar	277	## count header size
		278	$totheader += $msg->head()->size();
5	PointedEar	279
22	PointedEar	280	## get the poster's name (MIME-decoded, in UTF-8)
		281	my $poster = $msg->study('From');
		282	if ( defined $poster )
7	PointedEar	283	{
22	PointedEar	284	## Convert old to new format
		285	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	286
22	PointedEar	287	## Collapse whitespace
		288	$poster =~ s/\s+/ /g;
5	PointedEar	289
22	PointedEar	290	## Remove outer quotes; TODO: observe RFC 5322 strictly
		291	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	292
22	PointedEar	293	## DEBUG
		294	dmsg($poster) if DEBUG;
7	PointedEar	295
22	PointedEar	296	## seen this one before?
		297	if ( !defined( $data{$poster} ) )
		298	{
		299	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		300	$data{$poster}{'orig'} = 0;
		301	$data{$poster}{'quoted'} = 0;
		302	}
		303	$data{$poster}{'count'}++; # bump count for this poster
		304	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	305
22	PointedEar	306	## The User-Agent and/or X-Newsreader fields
		307	## for User-Agent by poster
31	PointedEar	308	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
22	PointedEar	309	if ( defined $ua )
7	PointedEar	310	{
22	PointedEar	311	$data{$poster}{'agent'} = $ua;
		312
		313	## DEBUG
		314	dmsg($ua) if DEBUG;
7	PointedEar	315	}
5	PointedEar	316
48	PointedEar	317	## The User Agent for User-Agent by number of articles
22	PointedEar	318	get_agent($msg);
5	PointedEar	319
22	PointedEar	320	## Get all cross-posted newsgroups
		321	for ( split( /,/, $msg->study('Newsgroups') ) )
		322	{
		323	$crossposts{$_}++; # bump count for each
		324	}
5	PointedEar	325
22	PointedEar	326	## Get threads
		327	my $thread = $msg->study('Subject');
48	PointedEar	328	$thread =~ s/^\sre:\s//i; # Remove Re: or re: at the start
33	PointedEar	329	$thread =~ s/\s$was:\s.$\s$//i; # Remove (was: ...) at the end
48	PointedEar	330	$thread =~ s/\s+/ /g; # collapse whitespace
		331	$threads{$thread}{'count'}++; # bump count of this subject
		332	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	333
22	PointedEar	334	## Is this an original post or a reply?
		335	if ( defined $msg->study('References') )
7	PointedEar	336	{
22	PointedEar	337	$replies++;
7	PointedEar	338	}
22	PointedEar	339	else
7	PointedEar	340	{
22	PointedEar	341	$origposts++;
7	PointedEar	342	}
22	PointedEar	343
		344	## Get the time zone
		345	my $datetime = $datetime_parser->parse_datetime($date);
		346	my $tz = $datetime->strftime('%z');
		347	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		348	$tz{$tz}++;
		349
		350	## DEBUG
		351	dmsg($tz) if DEBUG;
		352
		353	#### Now analyse the body text ####
		354	my $body = $msg->body();
		355
		356	my $insig = 0;
		357	my @body = $body->lines;
		358	for (@body)
7	PointedEar	359	{
22	PointedEar	360	$totbody += length($_); # bump total body size
		361	next if (m{^$>}o); # don't count blank lines in body
		362	if ( $insig == 1 )
		363	{
		364
		365	# bump total sig size
		366	$totsig += length($_);
		367	}
		368	## are we in a quote line?
		369	## Bill Unruh uses ] quotes, and another poster uses ::
31	PointedEar	370	elsif ( m{^\s[>\]]}o \|\| m{^\s::}o )
22	PointedEar	371	{
		372	## bump count of quoted chrs
		373	$data{$poster}{'quoted'} += length($_);
		374	$totquoted += length($_);
		375	}
		376	elsif (/^-- $/)
		377	{
		378	$insig = 1;
		379	}
		380	else
		381	{
		382	## We must be processing an original line
		383	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		384	$totorig += length($_);
		385	}
7	PointedEar	386	}
22	PointedEar	387
		388	# end for (@body)
		389	}
		390
		391	close($FILE);
		392	}
		393
		394	sub get_agent
		395	{
		396	my $msg = shift;
		397
31	PointedEar	398	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
		399	// $msg->study('X-Mailer');
		400
22	PointedEar	401	if ( not defined $ua )
		402	{
		403	my $org = $msg->study('Organization');
		404	if ( defined $org
		405	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	406	{
22	PointedEar	407	$ua = $org;
7	PointedEar	408	}
22	PointedEar	409	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	410	{
22	PointedEar	411	$ua = "Pine";
		412	}
7	PointedEar	413	}
5	PointedEar	414
22	PointedEar	415	## Hopefully found UA, else set to unknown
		416	if ( not defined $ua )
		417	{
		418	$ua = __ "unknown";
		419	}
5	PointedEar	420
22	PointedEar	421	$ua = clean($ua);
		422
		423	my $raw = $ua;
		424	my $agent = $raw;
		425
		426	## strip http
		427	if ( $raw =~ /.http./ )
7	PointedEar	428	{
22	PointedEar	429	$raw =~ s!posted via!!i;
		430	$raw =~ s!http://!!g;
		431	$raw =~ s!/!!g;
		432	$raw =~ s! !!g;
		433	}
5	PointedEar	434
22	PointedEar	435	## Fix Outlook from Mac
		436	if ( $raw =~ /^microsoft/i )
		437	{
		438	$raw =~ s/-/ /g;
		439	}
5	PointedEar	440
22	PointedEar	441	## Pick out the popular agents
		442	if ( $raw =~ /(outlook express)/i
		443	\|\| $raw =~ /(windows mail)/i
		444	\|\| $raw =~ /(microplanet gravity)/i
		445	\|\| $raw =~ /(news rover)/i
		446	\|\| $raw =~ /(forte agent)/i
		447	\|\| $raw =~ /(forte free agent)/i )
		448	{
		449	$agent = $1;
		450	}
		451	elsif (
		452	$raw =~ /^(
5	PointedEar	453	pan
		454	\|sylpheed
		455	\|slrn
		456	\|mozilla
		457	\|knode
		458	\|tin
		459	\|hamster
		460	\|xrn
		461	\|xnews
		462	\|aol
		463	\|gnus
		464	\|krn
		465	\|macsoup
		466	\|messenger
		467	\|openxp
		468	\|pine
		469	\|thoth
		470	\|turnpike
		471	\|winvn
		472	\|vsoup
		473	\|google
		474	\|supernews
		475	\|nn
		476	\|rn
		477	\|007
		478	\|webtv
		479	\|compuserve
7	PointedEar	480	)/ix
22	PointedEar	481	)
		482	{
		483	$agent = $1;
		484	}
		485	else
		486	{
		487	## Clean up unknown agents
		488	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	489	{
		490	$agent = $1;
		491	}
22	PointedEar	492	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	493	{
22	PointedEar	494	$agent = $1;
		495	}
		496	}
		497
		498	$distinct_agent{$agent}++;
		499	return $agent;
		500	}
		501	## get_agent
		502
		503	#########################################
		504	## Count the User-Agents used, collapsing
		505	## different versions into one per agent.
		506	#########################################
		507	sub count_agents
		508	{
		509	POSTER:
		510	foreach my $poster ( keys %data )
		511	{
		512	foreach my $agent_name ( keys %distinct_agent )
		513	{ # check against known ones
		514	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	515	{
22	PointedEar	516	$agents{$agent_name}++;
		517	next POSTER;
7	PointedEar	518	}
		519	}
22	PointedEar	520	$agents{ $data{$poster}{'agent'} }++;
		521	}
		522	} # count_agents
7	PointedEar	523
22	PointedEar	524	#############################################
		525	## Set orig/total percentages for all posters
		526	#############################################
		527	sub fix_percent
		528	{
		529	foreach my $poster ( keys %data )
		530	{
		531	my $percent = 100;
		532	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		533	{
		534	$percent =
		535	$data{$poster}{'orig'} * 100 /
		536	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		537	}
		538	elsif ( $data{$poster}{'orig'} == 0 )
		539	{
		540	$percent = 0;
		541	}
		542	$data{$poster}{'percent'} = $percent;
5	PointedEar	543	}
22	PointedEar	544	}
		545	## fix_percent
5	PointedEar	546
22	PointedEar	547	##################################
		548	## Write data structures to a file
		549	##################################
		550	sub write_data
		551	{
		552	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		553	or die __x( "Can't create XDATA: {error}\n", error => $! );
		554	print $OUTF "Data collected from $newsgroup_name\n\n";
		555	print $OUTF
		556	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		557	foreach my $name ( keys %data )
7	PointedEar	558	{
22	PointedEar	559	print $OUTF
		560	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	561	}
22	PointedEar	562	print $OUTF
		563	"============================================================================\n";
		564	print $OUTF "Thread subjects\n";
		565	print $OUTF
		566	"----------------------------------------------------------------------------\n";
		567	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		568	{
		569	print $OUTF
		570	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		571	}
		572	print $OUTF
		573	"============================================================================\n";
		574	print $OUTF "Cross-posts\n";
		575	print $OUTF
		576	"----------------------------------------------------------------------------\n";
		577	foreach my $name ( sort keys %crossposts )
		578	{
		579	print $OUTF "$name : $crossposts{$name}\n";
		580	}
		581	print $OUTF
		582	"============================================================================\n";
		583	print $OUTF "User agents\n";
		584	print $OUTF
		585	"----------------------------------------------------------------------------\n";
		586	foreach my $name ( sort keys %agents )
		587	{
		588	print $OUTF "$name : $agents{$name}\n";
		589	}
		590	print $OUTF
		591	"============================================================================\n";
		592	print $OUTF "Time zones\n";
		593	print $OUTF
		594	"----------------------------------------------------------------------------\n";
		595	foreach my $name ( sort keys %tz )
		596	{
		597	print $OUTF "$name : $tz{$name}\n";
		598	}
		599	close $OUTF;
		600	} # write_data
5	PointedEar	601
22	PointedEar	602	sub display_results
		603	{
		604	#################### DISPLAY RESULTS #####################
45	PointedEar	605	print "=" x 76, "\n";
22	PointedEar	606	printf "%s\n",
		607	centred(
48	PointedEar	608	__x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ),
		609	76
		610	);
45	PointedEar	611	print "=" x 76, "\n";
22	PointedEar	612	printf "%s\n",
		613	centred(
23	PointedEar	614	__(
		615	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
		616	),
22	PointedEar	617	76
		618	);
		619	print "\n\n";
48	PointedEar	620	printf __"Total articles considered: %s over %d days\n",
23	PointedEar	621	$formatter->format_number($totalposts),
		622	$formatter->format_number($numdays);
		623	my $time_locale = setlocale(LC_TIME);
22	PointedEar	624	my $earliest_datetime = DateTime->from_epoch(
23	PointedEar	625	epoch => $earliest,
		626	locale => $time_locale,
22	PointedEar	627	time_zone => 'UTC',
		628	);
		629	my $latest_datetime = DateTime->from_epoch(
23	PointedEar	630	epoch => $latest,
		631	locale => $time_locale,
22	PointedEar	632	time_zone => 'UTC',
		633	);
		634	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23	PointedEar	635	printf __"Earliest article: %s\n",
		636	$earliest_datetime->strftime($datetime_format);
		637	printf __"Latest article: %s\n",
		638	$latest_datetime->strftime($datetime_format);
		639	printf __"Original articles: %s; replies: %s\n",
		640	$formatter->format_number($origposts),
		641	$formatter->format_number($replies);
48	PointedEar	642	printf __"Total size of articles: %s bytes (%s)" . "\n",
23	PointedEar	643	$formatter->format_number($totsize),
		644	$formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
		645	printf __"Average %s articles per day, %s per day, %s bytes per article\n",
		646	$formatter->format_number( int( $totalposts / $numdays ) ),
		647	$formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
		648	$formatter->format_number( int( $totsize / $totalposts ) );
		649
22	PointedEar	650	my $count = keys %data;
23	PointedEar	651	printf __"Total headers: %s; bodies: %s\n",
		652	$formatter->format_bytes(
		653	$totheader, ( 'precision' => 1, 'mode' => 'iec' )
		654	),
		655	$formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	656	printf __
23	PointedEar	657	"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
		658	$formatter->format_bytes(
		659	$totquoted, ( 'precision' => 1, 'mode' => 'iec' )
		660	),
		661	$formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
		662	$formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
		663	$formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
		664	printf __"Total number of posters: %s, average %s per poster\n",
		665	$formatter->format_number($count),
		666	$formatter->format_bytes( $totsize / $count,
		667	( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	668	$count = keys %threads;
23	PointedEar	669	printf __"Total number of threads: %s, average %s per thread\n",
		670	$formatter->format_number($count),
		671	$formatter->format_bytes( $totsize / $count,
		672	( 'precision' => 1, 'mode' => 'iec' ) );
		673	printf __"Total number of user agents: %d\n",
		674	$formatter->format_number( scalar keys %agents );
31	PointedEar	675	print "\n", "=" x 76, "\n";
22	PointedEar	676	########################################
		677	## Show posters by article count Sec 1;
		678	########################################
		679	unless ( $skipSec{1} )
7	PointedEar	680	{
22	PointedEar	681	if ( keys %data < $topposters )
		682	{
		683	$count = keys %data;
		684	}
		685	else
		686	{
		687	$count = $topposters;
		688	}
		689	printf "%s\n",
		690	centred(
23	PointedEar	691	__x( "Top {count} posters by number of articles", count => $topposters ),
		692	76
		693	);
22	PointedEar	694	print "=" x 76, "\n";
		695	my $i = 0;
		696	foreach
		697	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		698	{
		699	my $name = substr( $poster, 0, 65 );
25	PointedEar	700	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	701	$data{$poster}{count};
		702	last if ( ++$i == $count );
		703	}
		704	print "\n", "=" x 76, "\n";
7	PointedEar	705	}
22	PointedEar	706
		707	######################################
		708	## Show posters by size in KiB Sec 2;
		709	######################################
		710	unless ( $skipSec{2} )
7	PointedEar	711	{
22	PointedEar	712	if ( keys %data < $topposters )
		713	{
		714	$count = keys %data;
		715	}
		716	else
		717	{
		718	$count = $topposters;
		719	}
		720	printf "%s\n",
		721	centred(
		722	__x( "Top {count} posters by article size in KiB", count => $topposters ),
23	PointedEar	723	76
		724	);
22	PointedEar	725	print "=" x 76, "\n";
		726	my $i = 0;
		727	foreach
		728	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		729	{
		730	my $name = substr( $poster, 0, 62 );
25	PointedEar	731	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	732	$data{$poster}{size} / 1024; #/
		733	last if ( ++$i == $count );
		734	}
		735	print "\n", "=" x 76, "\n";
7	PointedEar	736	}
5	PointedEar	737
22	PointedEar	738	#####################################
		739	## Show top posters for original text
		740	#####################################
31	PointedEar	741	my $topposters_real = 0;
		742
22	PointedEar	743	unless ( $skipSec{3} )
7	PointedEar	744	{
22	PointedEar	745	if ( keys %data < $topposters )
		746	{
		747	$count = keys %data;
		748	}
		749	else
		750	{
		751	$count = $topposters;
		752	}
31	PointedEar	753
22	PointedEar	754	printf "%s\n",
		755	centred(
		756	__x(
48	PointedEar	757	"Top {count} responders by original text (> 5 articles)",
22	PointedEar	758	count => $topposters
		759	),
		760	76
		761	);
		762	print "=" x 76, "\n";
		763	foreach my $poster (
		764	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		765	keys %data
		766	)
		767	{
		768	next if $data{$poster}{quoted} == 0;
		769	next if $data{$poster}{count} < 5;
		770	my $name = substr( $poster, 0, 63 );
31	PointedEar	771	printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
		772	rpad( $poster, 63, "." ),
22	PointedEar	773	$data{$poster}{percent};
31	PointedEar	774	last if ( ++$topposters_real == $count );
22	PointedEar	775	}
		776	print "\n", "=" x 76, "\n";
7	PointedEar	777	}
5	PointedEar	778
22	PointedEar	779	########################################
		780	## Show bottom posters for original text
		781	########################################
31	PointedEar	782
		783	$skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
		784
22	PointedEar	785	unless ( $skipSec{4} )
7	PointedEar	786	{
22	PointedEar	787	if ( keys %data < $topposters )
7	PointedEar	788	{
22	PointedEar	789	$count = keys %data;
		790	}
		791	else
		792	{
		793	$count = $topposters;
		794	}
31	PointedEar	795
22	PointedEar	796	printf "%s\n",
		797	centred(
		798	__x(
48	PointedEar	799	"Bottom {count} responders by original text (> 5 articles)",
22	PointedEar	800	count => $topposters
		801	),
		802	76
		803	);
		804	print "=" x 76, "\n";
		805	my $i = 0;
		806	foreach my $poster (
		807	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		808	keys %data
		809	)
		810	{
		811	next if $data{$poster}{quoted} == 0;
		812	next if $data{$poster}{count} < 5;
		813	my $name = substr( $poster, 0, 63 );
25	PointedEar	814	printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	815	$data{$poster}{percent};
		816	last if ( ++$i == $count );
		817	}
		818	print "\n", "=" x 76, "\n";
		819	}
5	PointedEar	820
22	PointedEar	821	#####################################
		822	## Show threads by number of articles
		823	#####################################
		824	unless ( $skipSec{5} )
		825	{
		826	if ( keys %threads < $topthreads )
		827	{
		828	$count = keys %threads;
7	PointedEar	829	}
22	PointedEar	830	else
		831	{
		832	$count = $topthreads;
7	PointedEar	833	}
22	PointedEar	834	printf "%s\n",
23	PointedEar	835	centred(
		836	__x( "Top {count} threads by no. of articles", count => $topthreads ),
22	PointedEar	837	76 );
		838	print "=" x 76, "\n";
		839	my $i = 0;
		840	foreach my $thread (
		841	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		842	keys %threads
		843	)
7	PointedEar	844	{
22	PointedEar	845	my $name = substr( $thread, 0, 65 );
25	PointedEar	846	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	847	$threads{$thread}{'count'};
		848	last if ( ++$i == $count );
7	PointedEar	849	}
22	PointedEar	850	print "\n", "=" x 76, "\n";
		851	}
		852
		853	##############################
		854	## Show threads by size in KiB
		855	##############################
		856	unless ( $skipSec{6} )
		857	{
		858	if ( keys %threads < $topthreads )
		859	{
		860	$count = keys %threads;
		861	}
7	PointedEar	862	else
		863	{
22	PointedEar	864	$count = $topthreads;
		865	}
		866	printf "%s\n",
23	PointedEar	867	centred(
		868	__x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22	PointedEar	869	print "=" x 76, "\n";
		870	my $i = 0;
		871	foreach my $thread (
		872	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		873	keys %threads
		874	)
		875	{
		876	my $name = substr( $thread, 0, 65 );
25	PointedEar	877	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	878	$threads{$thread}{'size'} / 1024; #/
		879	last if ( ++$i == $count );
		880	}
		881	print "\n", "=" x 76, "\n";
		882	}
5	PointedEar	883
22	PointedEar	884	##################################
		885	## Show top 10 cross-posted groups
		886	##################################
		887	unless ( $skipSec{7} )
		888	{
		889	delete $crossposts{"$newsgroup_name"}; # don't include ours
		890	if ( keys %crossposts < $topcrossposts )
		891	{
		892	$count = keys %crossposts;
7	PointedEar	893	}
22	PointedEar	894	else
		895	{
		896	$count = $topcrossposts;
		897	}
		898	printf "%s\n",
23	PointedEar	899	centred(
		900	__x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22	PointedEar	901	print "=" x 76, "\n";
		902	my $i = 0;
		903	foreach
		904	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		905	{
25	PointedEar	906	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	907	$crossposts{$name};
		908	last if ( ++$i == $count );
		909	}
		910	print "\n", "=" x 76, "\n";
		911	}
5	PointedEar	912
22	PointedEar	913	#########################
		914	## Show agents and counts
		915	#########################
		916	unless ( $skipSec{8} )
		917	{
		918	if ( keys %agents < $topagents )
		919	{
		920	$count = keys %agents;
		921	}
		922	else
		923	{
		924	$count = $topagents;
		925	}
		926	printf "%s\n",
		927	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		928	76 );
		929	print "=" x 76, "\n";
		930	my $i = 0;
		931	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		932	{
25	PointedEar	933	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22	PointedEar	934	$agents{$agent};
		935	last if ( ++$i == $count );
		936	}
		937	print "\n", "=" x 76, "\n";
		938	}
5	PointedEar	939
22	PointedEar	940	#######################
		941	## Show distinct agents
		942	#######################
		943	unless ( $skipSec{9} )
7	PointedEar	944	{
22	PointedEar	945	if ( keys %distinct_agent < $topagents )
		946	{
		947	$count = keys %distinct_agent;
7	PointedEar	948	}
22	PointedEar	949	else
		950	{
		951	$count = $topagents;
		952	}
		953	printf "%s\n",
		954	centred(
48	PointedEar	955	__x(
		956	"Top {count} user agents by number of articles",
		957	count => $topagents
		958	),
23	PointedEar	959	76
		960	);
22	PointedEar	961	print "=" x 76, "\n";
		962	my $i = 0;
		963	foreach my $agent (
		964	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		965	keys %distinct_agent
		966	)
		967	{
25	PointedEar	968	printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22	PointedEar	969	$distinct_agent{$agent},
		970	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		971	last if ( ++$i == $count );
		972	}
		973	print "\n", "=" x 76, "\n";
7	PointedEar	974	}
5	PointedEar	975
22	PointedEar	976	############################
		977	## Show timezones and counts
		978	############################
		979	unless ( $skipSec{10} )
7	PointedEar	980	{
22	PointedEar	981	if ( keys %tz < $toptz )
7	PointedEar	982	{
22	PointedEar	983	$count = keys %tz;
7	PointedEar	984	}
22	PointedEar	985	else
7	PointedEar	986	{
22	PointedEar	987	$count = $toptz;
7	PointedEar	988	}
23	PointedEar	989	printf "%s\n",
48	PointedEar	990	centred(
		991	__x( "Top {count} time zones by number of articles", count => $toptz ),
		992	76 );
22	PointedEar	993	print "=" x 76, "\n";
		994	my $i = 0;
		995	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		996	{
25	PointedEar	997	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22	PointedEar	998	last if ( ++$i == $count );
		999	}
		1000	print "\n", "=" x 76, "\n";
7	PointedEar	1001	}
5	PointedEar	1002	}
		1003
22	PointedEar	1004	## helper subs
		1005
13	PointedEar	1006	###############################
		1007	## Right pad a string with '.'s
		1008	###############################
7	PointedEar	1009	sub rpad
		1010	{
13	PointedEar	1011	## Get text to pad, length to pad, pad chr
7	PointedEar	1012	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	1013
13	PointedEar	1014	## DEBUG
22	PointedEar	1015	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	1016
7	PointedEar	1017	if ( length($text) > $pad_len )
		1018	{
		1019	$text = substr( $text, 0, $pad_len );
		1020	}
		1021	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		1022	return $padded;
5	PointedEar	1023	}
		1024
13	PointedEar	1025	##################
		1026	## Centre a string
		1027	##################
7	PointedEar	1028	sub centred
		1029	{
		1030	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		1031	my $pad_len = ( $width - length($text) ) / 2; #/
		1032	my $centred = " " x $pad_len . $text;
		1033	return $centred;
5	PointedEar	1034	}
		1035
13	PointedEar	1036	###########################
		1037	## Put commas into a number
		1038	###########################
7	PointedEar	1039	sub commify
		1040	{
13	PointedEar	1041	local $_ = shift;
23	PointedEar	1042	my $number = $_;
		1043	$_ = int; # Chop non-integer part
22	PointedEar	1044	1 while
23	PointedEar	1045	s/([-+]?\d)(\d{$grouping[0]}($\|\Q$thousands_sep\E))/$1$thousands_sep$2/;
		1046	my $int_part = $_;
		1047	my $real_part = '';
		1048	if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
		1049	{
		1050	$real_part = $1;
		1051	}
		1052	return $int_part . $real_part;
5	PointedEar	1053	}
		1054
13	PointedEar	1055	################################################################
		1056	## Returns a string with leading and trailing whitespace removed
		1057	################################################################
7	PointedEar	1058	sub clean
		1059	{
		1060	my $dirty = shift;
		1061	my $clean = $dirty;
14	PointedEar	1062	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	1063
7	PointedEar	1064	return $clean;
5	PointedEar	1065	}
		1066
7	PointedEar	1067	sub usage
		1068	{
45	PointedEar	1069	print __ "usage: newsstat.pl NEWS.GROUP", "\n";
7	PointedEar	1070	exit 1;
5	PointedEar	1071	}
		1072
22	PointedEar	1073	sub dmsg
7	PointedEar	1074	{
22	PointedEar	1075	print STDERR @_, "\n";
		1076	}
		1077
		1078	sub dmsg2
		1079	{
		1080	my ( $level, @msg ) = @_;
		1081	print STDERR @msg, "\n" if $level >= DEBUG;
48	PointedEar	1082	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 34 - Rev 48