WebSVN - LCARS - Blame - Rev 49 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
8	PointedEar	1	#!/usr/bin/env perl
5	PointedEar	2	use strict;
6	PointedEar	3	use warnings;
23	PointedEar	4	require 5.004;
22	PointedEar	5
		6	#use diagnostics;
6	PointedEar	7	use utf8;
		8
30	PointedEar	9	## NOTE:
		10	## Enable and remove binmode when utf8::all has actually become lexically scoped
		11	# use utf8:all;
		12
22	PointedEar	13	use constant DEBUG => 0;
		14
24	PointedEar	15	## newsstat.pl
		16	## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
		17	## Based on work by Garry Knight et al.
31	PointedEar	18	##
24	PointedEar	19	## This program is free software: you can redistribute it and/or modify
		20	## it under the terms of the GNU General Public License as published by
		21	## the Free Software Foundation, either version 3 of the License, or
		22	## (at your option) any later version.
31	PointedEar	23	##
24	PointedEar	24	## This program is distributed in the hope that it will be useful,
		25	## but WITHOUT ANY WARRANTY; without even the implied warranty of
		26	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		27	## GNU General Public License for more details.
31	PointedEar	28	##
24	PointedEar	29	## You should have received a copy of the GNU General Public License
		30	## along with this program. If not, see <http://www.gnu.org/licenses/>.
		31
13	PointedEar	32	## Print out all text to STDOUT UTF-8 encoded
		33	binmode STDOUT, ':encoding(UTF-8)';
22	PointedEar	34	binmode STDERR, ':encoding(UTF-8)';
5	PointedEar	35
23	PointedEar	36	## L10n
		37	use locale ':not_characters';
		38
		39	# setlocale( LC_MESSAGES, '' );
		40	require Number::Format;
		41
		42	## i18n
		43	## FIXME: Automatically include resolved '.' in @INC
22	PointedEar	44	# print join "\n", @INC;
5	PointedEar	45
22	PointedEar	46	use Locale::TextDomain ('de.pointedears.newsstat');
23	PointedEar	47	use POSIX ('locale_h');
22	PointedEar	48	use Locale::Messages qw (bind_textdomain_filter
23	PointedEar	49	bind_textdomain_codeset
		50	turn_utf_8_on);
		51
		52	bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
22	PointedEar	53	bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23	PointedEar	54
22	PointedEar	55	require Mail::Message;
		56	require DateTime;
		57	require DateTime::Format::Mail;
23	PointedEar	58
		59	# See comments in previous example
		60	my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
		61	@{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
		62	'decimal_point' };
		63
		64	# Apply defaults if values are missing
		65	$thousands_sep = $mon_thousands_sep unless $thousands_sep;
		66	$thousands_sep = ' ' unless $thousands_sep;
		67
		68	# grouping and mon_grouping are packed lists
		69	# of small integers (characters) telling the
		70	# grouping (thousand_seps and mon_thousand_seps
		71	# being the group dividers) of numbers and
		72	# monetary quantities. The integers' meanings:
		73	# 255 means no more grouping, 0 means repeat
		74	# the previous grouping, 1-254 means use that
		75	# as the current grouping. Grouping goes from
		76	# right to left (low to high digits). In the
		77	# below we cheat slightly by never using anything
		78	# else than the first grouping (whatever that is).
		79	my @grouping;
		80	if ($grouping)
		81	{
		82	@grouping = unpack( "C*", $grouping );
		83	}
		84	else
		85	{
		86	@grouping = (3);
		87	}
		88
		89	## FIXME: Why don't the defaults work already?
		90	my $formatter = new Number::Format(
		91	-decimal_point => $decimal_point,
		92	-thousands_sep => $thousands_sep,
49	PointedEar	93	-kibi_suffix => ' KiB',
		94	-mebi_suffix => ' MiB',
		95	-gibi_suffix => ' GiB',
		96
23	PointedEar	97	# -grouping => $grouping[0]
		98	);
		99
5	PointedEar	100	###################### USER CONFIGURATIONS ############################
		101
13	PointedEar	102	## The name of the group to do stats for
5	PointedEar	103	my $newsgroup_name = $ARGV[0];
31	PointedEar	104	$newsgroup_name // usage();
5	PointedEar	105
13	PointedEar	106	## Check for removal flags
5	PointedEar	107	my $ix;
		108	my $j;
		109	my %skipSec;
		110	my @skiplist;
		111	my $args = @ARGV;
7	PointedEar	112	for ( $ix = 1 ; $ix < $args ; $ix++ )
		113	{
		114	$j = $ix + 1;
		115	if ( $ARGV[$ix] eq "-x" )
		116	{
		117	@skiplist = split( ",", $ARGV[$j] );
		118	}
		119	elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
		120	{
		121	@skiplist = split( ",", $1 );
		122	}
5	PointedEar	123	}
7	PointedEar	124	foreach (@skiplist)
		125	{
5	PointedEar	126	$skipSec{$_} = 1;
		127	}
		128
13	PointedEar	129	## Leafnode users will want /var/spool/news for this variable.
5	PointedEar	130	my $news = "/var/spool/news/";
		131
22	PointedEar	132	## Number of top or bottom posters to show
		133	my $topposters = 20;
5	PointedEar	134
13	PointedEar	135	## Number of threads we want to know about
5	PointedEar	136	my $topthreads = 20;
		137
13	PointedEar	138	## Number of cross-posted threads to show
5	PointedEar	139	my $topcrossposts = 10;
		140
22	PointedEar	141	## Number of agents we list
		142	my $topagents = 10;
		143
13	PointedEar	144	## Number of time zones to show
5	PointedEar	145	my $toptz = 10;
		146
		147	###################### DATA STRUCTURES ######################
		148	my $group = $newsgroup_name;
		149	$group =~ s!\.!/!g;
22	PointedEar	150	my %data; # name, count, agent, total, orig, quoted
		151	my $totsize = 0; # holds total sizes of all files
		152	my %crossposts; # group, count
		153	my %threads; # subject, count
		154	my $replies = 0; # total no. of replies
		155	my $origposts = 0; # total no. of original posts
		156	my %tz; # timezones by count
		157	my $earliest; # earliest article we have found
		158	my $latest; # latest article we have found
		159	my $totheader = 0; # total size of header material
		160	my $totbody = 0; # total size of body material
		161	my $totsig = 0; # total size of sig material
		162	my $totquoted = 0; # total size of quoted material
		163	my $totorig = 0; # total size of original material
		164	my $totalposts; # total no. of posts considered
5	PointedEar	165	my %distinct_agent;
13	PointedEar	166
		167	## Used to hold counts of User Agents used
		168	my %agents = (
		169	"Compuserver" => 0,
		170	"Foorum" => 0,
7	PointedEar	171	"Forte Agent" => 0,
		172	"Forte Free Agent" => 0,
13	PointedEar	173	"Gnus" => 0,
		174	"KNode" => 0,
		175	"MacSOUP" => 0,
		176	"MT-NewsWatcher" => 0,
7	PointedEar	177	"MicroPlanet Gravity" => 0,
		178	"Microsoft Outlook Express" => 0,
13	PointedEar	179	"Microsoft Windows Mail" => 0,
		180	"Mozilla" => 0,
		181	"News Rover" => 0,
		182	"NN" => 0,
		183	"Pan" => 0,
		184	"rn" => 0,
7	PointedEar	185	"slrn" => 0,
13	PointedEar	186	"Sylpheed" => 0,
7	PointedEar	187	"tin" => 0,
13	PointedEar	188	"VSoup" => 0,
7	PointedEar	189	"WebTV" => 0,
22	PointedEar	190	"Xnews" => 0,
13	PointedEar	191	);
5	PointedEar	192
22	PointedEar	193	my $datetime_parser = DateTime::Format::Mail->new();
		194	$datetime_parser->loose();
5	PointedEar	195
22	PointedEar	196	my $today = DateTime->today( time_zone => 'UTC' );
		197	my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
		198	my $start = int $prev_month->strftime('%s');
		199	my $numdays = int DateTime->last_day_of_month(
		200	year => $prev_month->year(),
		201	month => $prev_month->month(),
		202	time_zone => $prev_month->time_zone(),
		203	)->day();
		204	my $end = int $today->clone()->set_day(1)->strftime('%s');
		205
		206	dmsg( $start, " to ", $end ) if DEBUG;
		207
		208	chdir("$news$group")
		209	or die __x(
		210	"Can't cd to {newsgroup}: {error}\n",
		211	newsgroup => "$news$group",
		212	error => $!
		213	);
		214	opendir( DIR, "." )
		215	or die __x(
		216	"Can't open {newsgroup}: {error}\n",
		217	newsgroup => "$news$group",
		218	error => $!
		219	);
		220
		221	while ( defined( my $filename = readdir(DIR) ) )
7	PointedEar	222	{
22	PointedEar	223	next unless -f $filename; # only want real files
		224	next if ( $filename eq ".overview" ); # real articles only
		225
		226	get_article($filename); # read in the article
5	PointedEar	227	}
22	PointedEar	228	closedir(DIR); # finished with the directory
7	PointedEar	229
22	PointedEar	230	dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG;
		231
13	PointedEar	232	## Post-processing
22	PointedEar	233	count_agents(); # count agents, collapsing versions
		234	fix_percent();
5	PointedEar	235
22	PointedEar	236	write_data();
		237	display_results();
5	PointedEar	238
13	PointedEar	239	########################################
22	PointedEar	240	## Get current article's header and body
13	PointedEar	241	########################################
22	PointedEar	242	sub get_article
7	PointedEar	243	{
22	PointedEar	244	my $filename = shift;
5	PointedEar	245
22	PointedEar	246	open( my $FILE, '<', $filename )
		247	or
		248	die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
		249	my $msg = Mail::Message->read($FILE);
		250	my $timestamp = $msg->timestamp();
		251	my $date = $msg->study('Date');
5	PointedEar	252
22	PointedEar	253	## Disregard article if timestamp is not in range
		254	dmsg($timestamp) if DEBUG;
31	PointedEar	255	if ( $timestamp < $start \|\| $timestamp >= $end )
7	PointedEar	256	{
22	PointedEar	257	dmsg("Posting on $date ignored.") if DEBUG;
		258	return;
7	PointedEar	259	}
5	PointedEar	260
22	PointedEar	261	$totalposts++; # bump count of articles considered
5	PointedEar	262
22	PointedEar	263	## DEBUG
		264	dmsg($date) if DEBUG;
13	PointedEar	265
22	PointedEar	266	## get stats about the file itself
		267	my $filesize = -s $filename; # get total size of file
		268	$totsize += $filesize; # bump total sizes of all files
5	PointedEar	269
31	PointedEar	270	if ( ( not defined $earliest ) \|\| $timestamp < $earliest )
7	PointedEar	271	{
22	PointedEar	272	$earliest = $timestamp;
7	PointedEar	273	}
31	PointedEar	274	elsif ( ( not defined $latest ) \|\| $timestamp > $latest )
7	PointedEar	275	{
22	PointedEar	276	$latest = $timestamp;
7	PointedEar	277	}
13	PointedEar	278
22	PointedEar	279	#print "timestamp: $timestamp\n";
5	PointedEar	280
22	PointedEar	281	## count header size
		282	$totheader += $msg->head()->size();
5	PointedEar	283
22	PointedEar	284	## get the poster's name (MIME-decoded, in UTF-8)
		285	my $poster = $msg->study('From');
		286	if ( defined $poster )
7	PointedEar	287	{
22	PointedEar	288	## Convert old to new format
		289	$poster =~ s/^\s(.+?\@.+?)\s$(.+?)$\s*$/$2 <$1>/;
5	PointedEar	290
22	PointedEar	291	## Collapse whitespace
		292	$poster =~ s/\s+/ /g;
5	PointedEar	293
22	PointedEar	294	## Remove outer quotes; TODO: observe RFC 5322 strictly
		295	$poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5	PointedEar	296
22	PointedEar	297	## DEBUG
		298	dmsg($poster) if DEBUG;
7	PointedEar	299
22	PointedEar	300	## seen this one before?
		301	if ( !defined( $data{$poster} ) )
		302	{
		303	$data{$poster}{'agent'} = __ 'unknown'; # comes after For: field
		304	$data{$poster}{'orig'} = 0;
		305	$data{$poster}{'quoted'} = 0;
		306	}
		307	$data{$poster}{'count'}++; # bump count for this poster
		308	$data{$poster}{'size'} += $filesize; # total size of file
5	PointedEar	309
22	PointedEar	310	## The User-Agent and/or X-Newsreader fields
		311	## for User-Agent by poster
31	PointedEar	312	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
22	PointedEar	313	if ( defined $ua )
7	PointedEar	314	{
22	PointedEar	315	$data{$poster}{'agent'} = $ua;
		316
		317	## DEBUG
		318	dmsg($ua) if DEBUG;
7	PointedEar	319	}
5	PointedEar	320
48	PointedEar	321	## The User Agent for User-Agent by number of articles
22	PointedEar	322	get_agent($msg);
5	PointedEar	323
22	PointedEar	324	## Get all cross-posted newsgroups
		325	for ( split( /,/, $msg->study('Newsgroups') ) )
		326	{
		327	$crossposts{$_}++; # bump count for each
		328	}
5	PointedEar	329
22	PointedEar	330	## Get threads
		331	my $thread = $msg->study('Subject');
48	PointedEar	332	$thread =~ s/^\sre:\s//i; # Remove Re: or re: at the start
33	PointedEar	333	$thread =~ s/\s$was:\s.$\s$//i; # Remove (was: ...) at the end
48	PointedEar	334	$thread =~ s/\s+/ /g; # collapse whitespace
		335	$threads{$thread}{'count'}++; # bump count of this subject
		336	$threads{$thread}{'size'} += $filesize; # bump bytes for this thread
5	PointedEar	337
22	PointedEar	338	## Is this an original post or a reply?
		339	if ( defined $msg->study('References') )
7	PointedEar	340	{
22	PointedEar	341	$replies++;
7	PointedEar	342	}
22	PointedEar	343	else
7	PointedEar	344	{
22	PointedEar	345	$origposts++;
7	PointedEar	346	}
22	PointedEar	347
		348	## Get the time zone
		349	my $datetime = $datetime_parser->parse_datetime($date);
		350	my $tz = $datetime->strftime('%z');
		351	$tz = "UTC" if $tz =~ m{^(?:GMT\|0000)$}o;
		352	$tz{$tz}++;
		353
		354	## DEBUG
		355	dmsg($tz) if DEBUG;
		356
		357	#### Now analyse the body text ####
		358	my $body = $msg->body();
		359
		360	my $insig = 0;
		361	my @body = $body->lines;
		362	for (@body)
7	PointedEar	363	{
22	PointedEar	364	$totbody += length($_); # bump total body size
		365	next if (m{^$>}o); # don't count blank lines in body
		366	if ( $insig == 1 )
		367	{
		368
		369	# bump total sig size
		370	$totsig += length($_);
		371	}
		372	## are we in a quote line?
		373	## Bill Unruh uses ] quotes, and another poster uses ::
31	PointedEar	374	elsif ( m{^\s[>\]]}o \|\| m{^\s::}o )
22	PointedEar	375	{
		376	## bump count of quoted chrs
		377	$data{$poster}{'quoted'} += length($_);
		378	$totquoted += length($_);
		379	}
		380	elsif (/^-- $/)
		381	{
		382	$insig = 1;
		383	}
		384	else
		385	{
		386	## We must be processing an original line
		387	$data{$poster}{'orig'} += length($_); # bump count of original chrs
		388	$totorig += length($_);
		389	}
7	PointedEar	390	}
22	PointedEar	391
		392	# end for (@body)
		393	}
		394
		395	close($FILE);
		396	}
		397
		398	sub get_agent
		399	{
		400	my $msg = shift;
		401
31	PointedEar	402	my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
		403	// $msg->study('X-Mailer');
		404
22	PointedEar	405	if ( not defined $ua )
		406	{
		407	my $org = $msg->study('Organization');
		408	if ( defined $org
		409	and $org =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/ )
7	PointedEar	410	{
22	PointedEar	411	$ua = $org;
7	PointedEar	412	}
22	PointedEar	413	elsif ( $msg->study('Message-ID') =~ /pine/i )
7	PointedEar	414	{
22	PointedEar	415	$ua = "Pine";
		416	}
7	PointedEar	417	}
5	PointedEar	418
22	PointedEar	419	## Hopefully found UA, else set to unknown
		420	if ( not defined $ua )
		421	{
		422	$ua = __ "unknown";
		423	}
5	PointedEar	424
22	PointedEar	425	$ua = clean($ua);
		426
		427	my $raw = $ua;
		428	my $agent = $raw;
		429
		430	## strip http
		431	if ( $raw =~ /.http./ )
7	PointedEar	432	{
22	PointedEar	433	$raw =~ s!posted via!!i;
		434	$raw =~ s!http://!!g;
		435	$raw =~ s!/!!g;
		436	$raw =~ s! !!g;
		437	}
5	PointedEar	438
22	PointedEar	439	## Fix Outlook from Mac
		440	if ( $raw =~ /^microsoft/i )
		441	{
		442	$raw =~ s/-/ /g;
		443	}
5	PointedEar	444
22	PointedEar	445	## Pick out the popular agents
		446	if ( $raw =~ /(outlook express)/i
		447	\|\| $raw =~ /(windows mail)/i
		448	\|\| $raw =~ /(microplanet gravity)/i
		449	\|\| $raw =~ /(news rover)/i
		450	\|\| $raw =~ /(forte agent)/i
		451	\|\| $raw =~ /(forte free agent)/i )
		452	{
		453	$agent = $1;
		454	}
		455	elsif (
		456	$raw =~ /^(
5	PointedEar	457	pan
		458	\|sylpheed
		459	\|slrn
		460	\|mozilla
		461	\|knode
		462	\|tin
		463	\|hamster
		464	\|xrn
		465	\|xnews
		466	\|aol
		467	\|gnus
		468	\|krn
		469	\|macsoup
		470	\|messenger
		471	\|openxp
		472	\|pine
		473	\|thoth
		474	\|turnpike
		475	\|winvn
		476	\|vsoup
		477	\|google
		478	\|supernews
		479	\|nn
		480	\|rn
		481	\|007
		482	\|webtv
		483	\|compuserve
7	PointedEar	484	)/ix
22	PointedEar	485	)
		486	{
		487	$agent = $1;
		488	}
		489	else
		490	{
		491	## Clean up unknown agents
		492	if ( $raw =~ m!^(.*?)/! )
7	PointedEar	493	{
		494	$agent = $1;
		495	}
22	PointedEar	496	elsif ( $raw =~ /^(\w)\d./ )
7	PointedEar	497	{
22	PointedEar	498	$agent = $1;
		499	}
		500	}
		501
		502	$distinct_agent{$agent}++;
		503	return $agent;
		504	}
		505	## get_agent
		506
		507	#########################################
		508	## Count the User-Agents used, collapsing
		509	## different versions into one per agent.
		510	#########################################
		511	sub count_agents
		512	{
		513	POSTER:
		514	foreach my $poster ( keys %data )
		515	{
		516	foreach my $agent_name ( keys %distinct_agent )
		517	{ # check against known ones
		518	if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7	PointedEar	519	{
22	PointedEar	520	$agents{$agent_name}++;
		521	next POSTER;
7	PointedEar	522	}
		523	}
22	PointedEar	524	$agents{ $data{$poster}{'agent'} }++;
		525	}
		526	} # count_agents
7	PointedEar	527
22	PointedEar	528	#############################################
		529	## Set orig/total percentages for all posters
		530	#############################################
		531	sub fix_percent
		532	{
		533	foreach my $poster ( keys %data )
		534	{
		535	my $percent = 100;
		536	if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
		537	{
		538	$percent =
		539	$data{$poster}{'orig'} * 100 /
		540	( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/
		541	}
		542	elsif ( $data{$poster}{'orig'} == 0 )
		543	{
		544	$percent = 0;
		545	}
		546	$data{$poster}{'percent'} = $percent;
5	PointedEar	547	}
22	PointedEar	548	}
		549	## fix_percent
5	PointedEar	550
22	PointedEar	551	##################################
		552	## Write data structures to a file
		553	##################################
		554	sub write_data
		555	{
		556	open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
		557	or die __x( "Can't create XDATA: {error}\n", error => $! );
		558	print $OUTF "Data collected from $newsgroup_name\n\n";
		559	print $OUTF
		560	"Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		561	foreach my $name ( keys %data )
7	PointedEar	562	{
22	PointedEar	563	print $OUTF
		564	"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7	PointedEar	565	}
22	PointedEar	566	print $OUTF
		567	"============================================================================\n";
		568	print $OUTF "Thread subjects\n";
		569	print $OUTF
		570	"----------------------------------------------------------------------------\n";
		571	foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
		572	{
		573	print $OUTF
		574	"$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
		575	}
		576	print $OUTF
		577	"============================================================================\n";
		578	print $OUTF "Cross-posts\n";
		579	print $OUTF
		580	"----------------------------------------------------------------------------\n";
		581	foreach my $name ( sort keys %crossposts )
		582	{
		583	print $OUTF "$name : $crossposts{$name}\n";
		584	}
		585	print $OUTF
		586	"============================================================================\n";
		587	print $OUTF "User agents\n";
		588	print $OUTF
		589	"----------------------------------------------------------------------------\n";
		590	foreach my $name ( sort keys %agents )
		591	{
		592	print $OUTF "$name : $agents{$name}\n";
		593	}
		594	print $OUTF
		595	"============================================================================\n";
		596	print $OUTF "Time zones\n";
		597	print $OUTF
		598	"----------------------------------------------------------------------------\n";
		599	foreach my $name ( sort keys %tz )
		600	{
		601	print $OUTF "$name : $tz{$name}\n";
		602	}
		603	close $OUTF;
		604	} # write_data
5	PointedEar	605
22	PointedEar	606	sub display_results
		607	{
		608	#################### DISPLAY RESULTS #####################
45	PointedEar	609	print "=" x 76, "\n";
22	PointedEar	610	printf "%s\n",
		611	centred(
48	PointedEar	612	__x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ),
		613	76
		614	);
45	PointedEar	615	print "=" x 76, "\n";
22	PointedEar	616	printf "%s\n",
		617	centred(
23	PointedEar	618	__(
		619	"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
		620	),
22	PointedEar	621	76
		622	);
49	PointedEar	623	print "\n";
		624	printf __"Total articles considered: %s over %d days\n",
23	PointedEar	625	$formatter->format_number($totalposts),
		626	$formatter->format_number($numdays);
		627	my $time_locale = setlocale(LC_TIME);
22	PointedEar	628	my $earliest_datetime = DateTime->from_epoch(
23	PointedEar	629	epoch => $earliest,
		630	locale => $time_locale,
22	PointedEar	631	time_zone => 'UTC',
		632	);
		633	my $latest_datetime = DateTime->from_epoch(
23	PointedEar	634	epoch => $latest,
		635	locale => $time_locale,
22	PointedEar	636	time_zone => 'UTC',
		637	);
		638	my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
49	PointedEar	639	printf __"Earliest article: %s" . "\n",
23	PointedEar	640	$earliest_datetime->strftime($datetime_format);
49	PointedEar	641	printf __"Latest article: %s" . "\n",
23	PointedEar	642	$latest_datetime->strftime($datetime_format);
49	PointedEar	643	printf __"Original articles: %s; replies: %s" . "\n",
23	PointedEar	644	$formatter->format_number($origposts),
		645	$formatter->format_number($replies);
49	PointedEar	646	printf __"Total size of articles: %s bytes (%s)" . "\n",
23	PointedEar	647	$formatter->format_number($totsize),
		648	$formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
49	PointedEar	649	printf __"Average %s articles per day, %s per day, %s bytes per article.\n",
23	PointedEar	650	$formatter->format_number( int( $totalposts / $numdays ) ),
		651	$formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
		652	$formatter->format_number( int( $totsize / $totalposts ) );
		653
22	PointedEar	654	my $count = keys %data;
49	PointedEar	655	print "\n";
		656	printf __"Total headers: %s; bodies: %s\n",
23	PointedEar	657	$formatter->format_bytes(
		658	$totheader, ( 'precision' => 1, 'mode' => 'iec' )
		659	),
		660	$formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	661	printf __
23	PointedEar	662	"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
		663	$formatter->format_bytes(
		664	$totquoted, ( 'precision' => 1, 'mode' => 'iec' )
		665	),
		666	$formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
49	PointedEar	667	$formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) )
		668	,
23	PointedEar	669	$formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
49	PointedEar	670	print "\n";
		671	printf __"Total number of posters: %s, average %s per poster\n",
23	PointedEar	672	$formatter->format_number($count),
		673	$formatter->format_bytes( $totsize / $count,
		674	( 'precision' => 1, 'mode' => 'iec' ) );
22	PointedEar	675	$count = keys %threads;
49	PointedEar	676	printf __"Total number of threads: %s, average %s per thread\n",
23	PointedEar	677	$formatter->format_number($count),
		678	$formatter->format_bytes( $totsize / $count,
		679	( 'precision' => 1, 'mode' => 'iec' ) );
		680	printf __"Total number of user agents: %d\n",
		681	$formatter->format_number( scalar keys %agents );
31	PointedEar	682	print "\n", "=" x 76, "\n";
22	PointedEar	683	########################################
		684	## Show posters by article count Sec 1;
		685	########################################
		686	unless ( $skipSec{1} )
7	PointedEar	687	{
22	PointedEar	688	if ( keys %data < $topposters )
		689	{
		690	$count = keys %data;
		691	}
		692	else
		693	{
		694	$count = $topposters;
		695	}
		696	printf "%s\n",
		697	centred(
23	PointedEar	698	__x( "Top {count} posters by number of articles", count => $topposters ),
		699	76
		700	);
22	PointedEar	701	print "=" x 76, "\n";
		702	my $i = 0;
		703	foreach
		704	my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
		705	{
		706	my $name = substr( $poster, 0, 65 );
25	PointedEar	707	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	708	$data{$poster}{count};
		709	last if ( ++$i == $count );
		710	}
		711	print "\n", "=" x 76, "\n";
7	PointedEar	712	}
22	PointedEar	713
		714	######################################
		715	## Show posters by size in KiB Sec 2;
		716	######################################
		717	unless ( $skipSec{2} )
7	PointedEar	718	{
22	PointedEar	719	if ( keys %data < $topposters )
		720	{
		721	$count = keys %data;
		722	}
		723	else
		724	{
		725	$count = $topposters;
		726	}
		727	printf "%s\n",
		728	centred(
		729	__x( "Top {count} posters by article size in KiB", count => $topposters ),
23	PointedEar	730	76
		731	);
22	PointedEar	732	print "=" x 76, "\n";
		733	my $i = 0;
		734	foreach
		735	my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
		736	{
		737	my $name = substr( $poster, 0, 62 );
25	PointedEar	738	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	739	$data{$poster}{size} / 1024; #/
		740	last if ( ++$i == $count );
		741	}
		742	print "\n", "=" x 76, "\n";
7	PointedEar	743	}
5	PointedEar	744
22	PointedEar	745	#####################################
		746	## Show top posters for original text
		747	#####################################
31	PointedEar	748	my $topposters_real = 0;
		749
22	PointedEar	750	unless ( $skipSec{3} )
7	PointedEar	751	{
22	PointedEar	752	if ( keys %data < $topposters )
		753	{
		754	$count = keys %data;
		755	}
		756	else
		757	{
		758	$count = $topposters;
		759	}
31	PointedEar	760
22	PointedEar	761	printf "%s\n",
		762	centred(
		763	__x(
48	PointedEar	764	"Top {count} responders by original text (> 5 articles)",
22	PointedEar	765	count => $topposters
		766	),
		767	76
		768	);
		769	print "=" x 76, "\n";
		770	foreach my $poster (
		771	sort { $data{$b}{percent} <=> $data{$a}{percent} }
		772	keys %data
		773	)
		774	{
		775	next if $data{$poster}{quoted} == 0;
		776	next if $data{$poster}{count} < 5;
		777	my $name = substr( $poster, 0, 63 );
31	PointedEar	778	printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
		779	rpad( $poster, 63, "." ),
22	PointedEar	780	$data{$poster}{percent};
31	PointedEar	781	last if ( ++$topposters_real == $count );
22	PointedEar	782	}
		783	print "\n", "=" x 76, "\n";
7	PointedEar	784	}
5	PointedEar	785
22	PointedEar	786	########################################
		787	## Show bottom posters for original text
		788	########################################
31	PointedEar	789
		790	$skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
		791
22	PointedEar	792	unless ( $skipSec{4} )
7	PointedEar	793	{
22	PointedEar	794	if ( keys %data < $topposters )
7	PointedEar	795	{
22	PointedEar	796	$count = keys %data;
		797	}
		798	else
		799	{
		800	$count = $topposters;
		801	}
31	PointedEar	802
22	PointedEar	803	printf "%s\n",
		804	centred(
		805	__x(
48	PointedEar	806	"Bottom {count} responders by original text (> 5 articles)",
22	PointedEar	807	count => $topposters
		808	),
		809	76
		810	);
		811	print "=" x 76, "\n";
		812	my $i = 0;
		813	foreach my $poster (
		814	sort { $data{$a}{percent} <=> $data{$b}{percent} }
		815	keys %data
		816	)
		817	{
		818	next if $data{$poster}{quoted} == 0;
		819	next if $data{$poster}{count} < 5;
		820	my $name = substr( $poster, 0, 63 );
25	PointedEar	821	printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22	PointedEar	822	$data{$poster}{percent};
		823	last if ( ++$i == $count );
		824	}
		825	print "\n", "=" x 76, "\n";
		826	}
5	PointedEar	827
22	PointedEar	828	#####################################
		829	## Show threads by number of articles
		830	#####################################
		831	unless ( $skipSec{5} )
		832	{
		833	if ( keys %threads < $topthreads )
		834	{
		835	$count = keys %threads;
7	PointedEar	836	}
22	PointedEar	837	else
		838	{
		839	$count = $topthreads;
7	PointedEar	840	}
22	PointedEar	841	printf "%s\n",
23	PointedEar	842	centred(
		843	__x( "Top {count} threads by no. of articles", count => $topthreads ),
22	PointedEar	844	76 );
		845	print "=" x 76, "\n";
		846	my $i = 0;
		847	foreach my $thread (
		848	sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
		849	keys %threads
		850	)
7	PointedEar	851	{
22	PointedEar	852	my $name = substr( $thread, 0, 65 );
25	PointedEar	853	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	854	$threads{$thread}{'count'};
		855	last if ( ++$i == $count );
7	PointedEar	856	}
22	PointedEar	857	print "\n", "=" x 76, "\n";
		858	}
		859
		860	##############################
		861	## Show threads by size in KiB
		862	##############################
		863	unless ( $skipSec{6} )
		864	{
		865	if ( keys %threads < $topthreads )
		866	{
		867	$count = keys %threads;
		868	}
7	PointedEar	869	else
		870	{
22	PointedEar	871	$count = $topthreads;
		872	}
		873	printf "%s\n",
23	PointedEar	874	centred(
		875	__x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22	PointedEar	876	print "=" x 76, "\n";
		877	my $i = 0;
		878	foreach my $thread (
		879	sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
		880	keys %threads
		881	)
		882	{
		883	my $name = substr( $thread, 0, 65 );
25	PointedEar	884	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	885	$threads{$thread}{'size'} / 1024; #/
		886	last if ( ++$i == $count );
		887	}
		888	print "\n", "=" x 76, "\n";
		889	}
5	PointedEar	890
22	PointedEar	891	##################################
		892	## Show top 10 cross-posted groups
		893	##################################
		894	unless ( $skipSec{7} )
		895	{
		896	delete $crossposts{"$newsgroup_name"}; # don't include ours
		897	if ( keys %crossposts < $topcrossposts )
		898	{
		899	$count = keys %crossposts;
7	PointedEar	900	}
22	PointedEar	901	else
		902	{
		903	$count = $topcrossposts;
		904	}
		905	printf "%s\n",
23	PointedEar	906	centred(
		907	__x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22	PointedEar	908	print "=" x 76, "\n";
		909	my $i = 0;
		910	foreach
		911	my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
		912	{
25	PointedEar	913	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22	PointedEar	914	$crossposts{$name};
		915	last if ( ++$i == $count );
		916	}
		917	print "\n", "=" x 76, "\n";
		918	}
5	PointedEar	919
22	PointedEar	920	#########################
		921	## Show agents and counts
		922	#########################
		923	unless ( $skipSec{8} )
		924	{
		925	if ( keys %agents < $topagents )
		926	{
		927	$count = keys %agents;
		928	}
		929	else
		930	{
		931	$count = $topagents;
		932	}
		933	printf "%s\n",
		934	centred( __x( "Top {count} user agents by poster", count => $topagents ),
		935	76 );
		936	print "=" x 76, "\n";
		937	my $i = 0;
		938	foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
		939	{
25	PointedEar	940	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22	PointedEar	941	$agents{$agent};
		942	last if ( ++$i == $count );
		943	}
		944	print "\n", "=" x 76, "\n";
		945	}
5	PointedEar	946
22	PointedEar	947	#######################
		948	## Show distinct agents
		949	#######################
		950	unless ( $skipSec{9} )
7	PointedEar	951	{
22	PointedEar	952	if ( keys %distinct_agent < $topagents )
		953	{
		954	$count = keys %distinct_agent;
7	PointedEar	955	}
22	PointedEar	956	else
		957	{
		958	$count = $topagents;
		959	}
		960	printf "%s\n",
		961	centred(
48	PointedEar	962	__x(
		963	"Top {count} user agents by number of articles",
		964	count => $topagents
		965	),
23	PointedEar	966	76
		967	);
22	PointedEar	968	print "=" x 76, "\n";
		969	my $i = 0;
		970	foreach my $agent (
		971	sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
		972	keys %distinct_agent
		973	)
		974	{
25	PointedEar	975	printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22	PointedEar	976	$distinct_agent{$agent},
		977	( ( $distinct_agent{$agent} / $totalposts ) * 100 );
		978	last if ( ++$i == $count );
		979	}
		980	print "\n", "=" x 76, "\n";
7	PointedEar	981	}
5	PointedEar	982
22	PointedEar	983	############################
		984	## Show timezones and counts
		985	############################
		986	unless ( $skipSec{10} )
7	PointedEar	987	{
22	PointedEar	988	if ( keys %tz < $toptz )
7	PointedEar	989	{
22	PointedEar	990	$count = keys %tz;
7	PointedEar	991	}
22	PointedEar	992	else
7	PointedEar	993	{
22	PointedEar	994	$count = $toptz;
7	PointedEar	995	}
23	PointedEar	996	printf "%s\n",
48	PointedEar	997	centred(
		998	__x( "Top {count} time zones by number of articles", count => $toptz ),
		999	76 );
22	PointedEar	1000	print "=" x 76, "\n";
		1001	my $i = 0;
		1002	foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
		1003	{
25	PointedEar	1004	printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22	PointedEar	1005	last if ( ++$i == $count );
		1006	}
		1007	print "\n", "=" x 76, "\n";
7	PointedEar	1008	}
5	PointedEar	1009	}
		1010
22	PointedEar	1011	## helper subs
		1012
13	PointedEar	1013	###############################
		1014	## Right pad a string with '.'s
		1015	###############################
7	PointedEar	1016	sub rpad
		1017	{
13	PointedEar	1018	## Get text to pad, length to pad, pad chr
7	PointedEar	1019	my ( $text, $pad_len, $pad_chr ) = @_;
11	PointedEar	1020
13	PointedEar	1021	## DEBUG
22	PointedEar	1022	printf( "\|%s\| = %d\n", $text, length($text) ) if DEBUG > 1;
11	PointedEar	1023
7	PointedEar	1024	if ( length($text) > $pad_len )
		1025	{
		1026	$text = substr( $text, 0, $pad_len );
		1027	}
		1028	my $padded = $text . $pad_chr x ( $pad_len - length($text) );
		1029	return $padded;
5	PointedEar	1030	}
		1031
13	PointedEar	1032	##################
		1033	## Centre a string
		1034	##################
7	PointedEar	1035	sub centred
		1036	{
		1037	my ( $text, $width ) = @_; # text to centre, size of field to centre in
		1038	my $pad_len = ( $width - length($text) ) / 2; #/
		1039	my $centred = " " x $pad_len . $text;
		1040	return $centred;
5	PointedEar	1041	}
		1042
13	PointedEar	1043	###########################
		1044	## Put commas into a number
		1045	###########################
7	PointedEar	1046	sub commify
		1047	{
13	PointedEar	1048	local $_ = shift;
23	PointedEar	1049	my $number = $_;
		1050	$_ = int; # Chop non-integer part
22	PointedEar	1051	1 while
23	PointedEar	1052	s/([-+]?\d)(\d{$grouping[0]}($\|\Q$thousands_sep\E))/$1$thousands_sep$2/;
		1053	my $int_part = $_;
		1054	my $real_part = '';
		1055	if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
		1056	{
		1057	$real_part = $1;
		1058	}
		1059	return $int_part . $real_part;
5	PointedEar	1060	}
		1061
13	PointedEar	1062	################################################################
		1063	## Returns a string with leading and trailing whitespace removed
		1064	################################################################
7	PointedEar	1065	sub clean
		1066	{
		1067	my $dirty = shift;
		1068	my $clean = $dirty;
14	PointedEar	1069	$clean =~ s/^\s+\|\s+$//g;
5	PointedEar	1070
7	PointedEar	1071	return $clean;
5	PointedEar	1072	}
		1073
7	PointedEar	1074	sub usage
		1075	{
45	PointedEar	1076	print __ "usage: newsstat.pl NEWS.GROUP", "\n";
7	PointedEar	1077	exit 1;
5	PointedEar	1078	}
		1079
22	PointedEar	1080	sub dmsg
7	PointedEar	1081	{
22	PointedEar	1082	print STDERR @_, "\n";
		1083	}
		1084
		1085	sub dmsg2
		1086	{
		1087	my ( $level, @msg ) = @_;
		1088	print STDERR @msg, "\n" if $level >= DEBUG;
48	PointedEar	1089	}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 152 - Rev 49