WebSVN - LCARS - Blame - Rev 6 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
5	PointedEar	1	#!/usr/bin/perl -w
		2	use strict;
6	PointedEar	3	use warnings;
		4	use utf8;
		5	use Encode;
		6
5	PointedEar	7	#########################
6	PointedEar	8	# newsstat.pl version 0.4
5	PointedEar	9
6	PointedEar	10	############################################################################
		11	# Collect statistics about a newsgroup (specified by first argument) in
		12	# the local news spool. Check all articles in the last 30-day period.
		13	# Rank posters by number of posts and by volume of posts, report on top and
		14	# bottom 20 posters. Show their name, number of posts, size of posts,
		15	# percentage of quoted lines. Rank user-agents used, by poster rather than
		16	# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
		17	#
		18	# (Numbers and paths can be configured below. -- PE)
		19	############################################################################
5	PointedEar	20
6	PointedEar	21	############################################################################
		22	# RECENT CHANGES #
		23	# 2011-07-03 PE - Use Encode to decode/encode MIME encodings
		24	# - Use warnings, utf8 (just in case)
		25	# - Documentation update
		26	# N/A NN - Take newsgroup name as argument
		27	# 2004-06-19 NN - newsgroup name is $ARGV[0]
		28	# - Allow command line flags for subtracting
		29	# output if not pertinent for a group
		30	# 2002-11-09 NN - Put Garry's writedata() function back in.
		31	# - added "rn" to my list of UA's
		32	# - Started using %distinct_agent for both User agent
		33	# sections
		34	# - named it newsstat.pl version 0.3
		35	# 2002-11-06 NN - Fixed the earliest/latest file problem by using
		36	# mtime rather than ctime, and simplifying the logic
		37	# 2002-11-05 NN - moved user configurations to the top
		38	# - fixed the cross-posting section
		39	# - introduced the $newsgroup_name variable which
		40	# later becomes $news$group
		41	# - changed $name to $agent_name in countagents()
		42	#
		43	# Contributors
		44	# -------------
		45	# NN Nomen nominandum (name to be determined later)
		46	# PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
5	PointedEar	47
6	PointedEar	48	########### TODO #############
5	PointedEar	49	# Commas in bottom section of report
		50	# Show date the figures were compiled
		51	# No. of HTML articles (Content-Type: text/html)
		52	# No. of quoted sigs (/>\s*-- /)
		53	# Per cent of top-posted articles
		54	# Top 10 cross-posters
		55	# Top 20 news posting hosts (from Path)
		56	# Count of certain subject words: newbie, kde, burner, sendmail, etc.
		57	# Count all User Agents that each poster uses
		58	# What do we do about Bill Unruh's ] quote style?
		59	# Change the way dates/times are checked
		60	# include % share in posters by no. of arts
		61	# include % share in posters by size
		62	# Total, orig & quoted lines by user agent with per cent
6	PointedEar	63	# Take more arguments
5	PointedEar	64	#######################################################
		65
		66	###################### USER CONFIGURATIONS ############################
		67
		68	# The name of the group to do stats for
		69	my $newsgroup_name = $ARGV[0];
6	PointedEar	70	$newsgroup_name or &usage;
5	PointedEar	71
		72	# Check for removal flags
		73	my $ix;
		74	my $j;
		75	my %skipSec;
		76	my @skiplist;
		77	my $args = @ARGV;
		78	for ( $ix = 1 ; $ix < $args ; $ix++ ) {
		79	$j = $ix + 1;
		80	if ( $ARGV[$ix] eq "-x" ) {
		81	@skiplist = split(",",$ARGV[$j]);
		82	} elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
		83	@skiplist = split(",",$1);
		84	}
		85	}
		86	foreach(@skiplist) {
		87	$skipSec{$_} = 1;
		88	}
		89
		90	# Leafnode users will want /var/spool/news for this variable.
		91	my $news = "/var/spool/news/";
		92
		93	# How many days are we doing statistics for?
		94	my $numdays = 30;
		95
		96	# no. of agents we list
		97	my $topagents = 10;
		98
		99	# no. of threads we want to know about
		100	my $topthreads = 20;
		101
		102	# no. of top or bottom posters to show
		103	my $topposters = 20;
		104
		105	# no. of cross-posted threads to show
		106	my $topcrossposts = 10;
		107
		108	# no. of time zones to show
		109	my $toptz = 10;
		110
		111
		112
		113	###################### DATA STRUCTURES ######################
		114	my $group = $newsgroup_name;
		115	$group =~ s!\.!/!g;
		116	my %data; # name, count, agent, total, orig, quoted
		117	my %threads; # subject, count
		118	my %crossposts; # group, count
		119	my %tz; # timezones by count
		120	my %headers; # holds header of current article
		121	my %lcheader; # holds lowercase headers
		122	my @body; # holds body of current article
		123	my @sig; # holds sig text;
		124	my $totalposts; # total no. of posts considered
		125	my $filename; # name of current article file
		126	my $filesize; # size of current article file
		127	my $earliest; # earliest article we have found
		128	my $latest; # latest article we have found
		129	my $poster; # poster we are dealing with
		130	my $totsize = 0; # holds total sizes of all files
		131	my $totheader = 0; # total size of header material
		132	my $totbody = 0; # total size of body material
		133	my $totsig = 0; # total size of sig material
		134	my $totorig = 0; # total size of original material
		135	my $totquoted = 0; # total size of quoted material
		136	my $origposts = 0; # total no. of original posts
		137	my $replies = 0; # total no. of replies
		138	my $i; # general purpose
		139	my %distinct_agent;
		140	my %agents = # used to hold counts of User Agents used
		141	( "KNode" => 0,
		142	"Pan" => 0,
		143	"Mozilla" => 0,
		144	"Sylpheed" => 0,
		145	"Gnus" => 0,
		146	"Forte Agent" => 0,
		147	"Forte Free Agent" => 0,
		148	"MicroPlanet Gravity" => 0,
		149	"Microsoft Outlook Express" => 0,
		150	"Xnews" => 0,
		151	"slrn" => 0,
		152	"tin" => 0,
		153	"rn" => 0,
		154	"NN" => 0,
		155	"MacSOUP" => 0,
		156	"Foorum" => 0,
		157	"MT-NewsWatcher" => 0,
		158	"News Rover" => 0,
		159	"WebTV" => 0,
		160	"Compuserver" => 0,
		161	"VSoup" => 0);
		162
		163	######################## MAIN CODE ########################
		164	$! = 1;
		165
		166	chdir("$news$group") or die "Can't cd to $news$group: $!\n";
		167	opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
		168	while (defined($filename = readdir(DIR))) {
		169	%lcheader = ();
		170	next unless -f $filename; # only want real files
		171	next if ($filename eq ".overview"); # real articles only
		172	next if (-M $filename > $numdays); # only want articles <= a certain age
		173	$earliest = (stat $filename)[9] unless defined ($earliest);
		174	$latest = (stat $filename)[9] unless defined ($latest);
		175	&getarticle($filename); # read in the article
		176	&getdata; # grab the data from the article
		177	$totalposts++; # bump count of articles considered
		178	}
		179	closedir(DIR); # finished with the directory
		180	# post-processing
		181	&countagents; # count agents, collapsing versions
		182	&fixpercent; # check percentages orig/total for posters
		183
		184	&writedata;
		185
		186	#################### DISPLAY RESULTS #####################
		187	print "=" x 76, "\n";
		188	printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
		189	print "=" x 76, "\n";
6	PointedEar	190	printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
5	PointedEar	191	print "\n\n";
		192	printf "Total posts considered: %s over %d days\n",
		193	commify($totalposts), $numdays;
		194	printf "Earliest article: %s\n", scalar localtime($earliest);
		195	printf "Latest article: %s\n", scalar localtime($latest);
		196	printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
		197	printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
		198	commify(int($totsize / 1024)), $totsize / 1048576; #
		199	printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
		200	commify(int($totalposts / $numdays)),
		201	$totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
		202	my $count = keys %data;
		203	printf "Total headers: %s KB bodies: %s KB\n",
		204	commify(int($totheader / 1024)), commify(int($totbody / 1024));
		205	printf "Body text - quoted: %s KB, original: %s KB = %02.2f%%, sigs: %s KB\n",
		206	commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
		207	($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
		208	printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
		209	commify(int($totsize / $count)); #/
		210	$count = keys %threads;
		211	printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
		212	commify(int($totsize / $count)); #/
		213	printf "Total number of User-Agents: %d\n", scalar keys %agents;
		214	print "\n", "=" x 76, "\n";
		215
		216	###############################
		217	# show posters by article count Sec 1;
		218	###############################
		219	unless ( $skipSec{1} ) {
		220	if (keys %data < $topposters) {
		221	$count = keys %data;
		222	} else {
		223	$count = $topposters;
		224	}
		225	printf "%s\n", &centred("Top $count posters by number of articles", 76);
		226	print "=" x 76, "\n";
		227	$i = 0;
		228	foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
		229	my $name = substr($poster, 0, 65);
		230	printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
		231	last if (++$i == $count);
		232	}
		233	print "\n", "=" x 76, "\n";
		234	}
		235
		236	################################
		237	# show posters by size in Kbytes Sec 2;
		238	################################
		239	unless ( $skipSec{2} ) {
		240	if (keys %data < $topposters) {
		241	$count = keys %data;
		242	} else {
		243	$count = $topposters;
		244	}
		245	printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
		246	print "=" x 76, "\n";
		247	$i = 0;
		248	foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
		249	my $name = substr($poster, 0, 62);
		250	printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
		251	last if (++$i == $count);
		252	}
		253	print "\n", "=" x 76, "\n";
		254	}
		255
		256	####################################
		257	# show top posters for original text
		258	####################################
		259	unless ( $skipSec{3} ) {
		260	if (keys %data < $topposters) {
		261	$count = keys %data;
		262	} else {
		263	$count = $topposters;
		264	}
		265	printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
		266	print "=" x 76, "\n";
		267	$i = 0;
		268	foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
		269	next if $data{$poster}{quoted} == 0;
		270	next if $data{$poster}{count} < 5;
		271	my $name = substr($poster, 0, 63);
		272	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
		273	last if (++$i == $count);
		274	}
		275	print "\n", "=" x 76, "\n";
		276	}
		277
		278	#######################################
		279	# show bottom posters for original text
		280	#######################################
		281	unless ( $skipSec{4} ) {
		282	if (keys %data < $topposters) {
		283	$count = keys %data;
		284	} else {
		285	$count = $topposters;
		286	}
		287	printf "%s\n", &centred("Bottom $count responders by original text (> 5 posts)", 76);
		288	print "=" x 76, "\n";
		289	$i = 0;
		290	foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
		291	next if $data{$poster}{quoted} == 0;
		292	next if $data{$poster}{count} < 5;
		293	my $name = substr($poster, 0, 63);
		294	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
		295	last if (++$i == $count);
		296	}
		297	print "\n", "=" x 76, "\n";
		298	}
		299
		300	####################################
		301	# show threads by number of articles
		302	####################################
		303	unless ( $skipSec{5} ) {
		304	if (keys %threads < $topthreads) {
		305	$count = keys %threads;
		306	} else {
		307	$count = $topthreads;
		308	}
		309	printf "%s\n", &centred("Top $count threads by no. of articles", 76);
		310	print "=" x 76, "\n";
		311	$i = 0;
		312	foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
		313	my $name = substr($thread, 0, 65);
		314	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
		315	last if (++$i == $count);
		316	}
		317	print "\n", "=" x 76, "\n";
		318	}
		319	################################
		320	# show threads by size in Kbytes
		321	################################
		322	unless ( $skipSec{6} ) {
		323	if (keys %threads < $topthreads) {
		324	$count = keys %threads;
		325	} else {
		326	$count = $topthreads;
		327	}
		328	printf "%s\n", &centred("Top $count threads by size in KB", 76);
		329	print "=" x 76, "\n";
		330	$i = 0;
		331	foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
		332	my $name = substr($thread, 0, 65);
		333	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
		334	last if (++$i == $count);
		335	}
		336	print "\n", "=" x 76, "\n";
		337	}
		338
		339	#################################
		340	# show top 10 cross-posted groups
		341	#################################
		342	unless ( $skipSec{7} ) {
		343	delete $crossposts{"$newsgroup_name"}; # don't include ours
		344	if (keys %crossposts < $topcrossposts) {
		345	$count = keys %crossposts;
		346	} else {
		347	$count = $topcrossposts;
		348	}
		349	printf "%s\n", &centred("Top $count cross-posted groups", 76);
		350	print "=" x 76, "\n";
		351	$i = 0;
		352	foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
		353	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
		354	last if (++$i == $count);
		355	}
		356	print "\n", "=" x 76, "\n";
		357	}
		358	#######################
		359	#show agents and counts
		360	#######################
		361	unless ( $skipSec{8} ) {
		362	if (keys %agents < $topagents) {
		363	$count = keys %agents;
		364	} else {
		365	$count = $topagents;
		366	}
		367	printf "%s\n", &centred("Top $count User Agents by poster", 76);
		368	print "=" x 76, "\n";
		369	$i = 0;
		370	foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
		371	printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
		372	last if (++$i == $count);
		373	}
		374	print "\n", "=" x 76, "\n";
		375	}
		376
		377	#######################
		378	#show distinct agents
		379	#######################
		380	unless ( $skipSec{9} ) {
		381	if (keys %distinct_agent < $topagents) {
		382	$count = keys %distinct_agent;
		383	} else {
		384	$count = $topagents;
		385	}
		386	printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
		387	print "=" x 76, "\n";
		388	$i = 0;
		389	foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
		390	printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
		391	last if (++$i == $count);
		392	}
		393	print "\n", "=" x 76, "\n";
		394	}
		395
		396	##########################
		397	#show timezones and counts
		398	##########################
		399	unless ( $skipSec{10} ) {
		400	if (keys %tz < $toptz) {
		401	$count = keys %tz;
		402	} else {
		403	$count = $toptz;
		404	}
		405	printf "%s\n", &centred("Top 10 time zones", 76);
		406	print "=" x 76, "\n";
		407	$i = 0;
		408	foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
		409	printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
		410	last if (++$i == $count);
		411	}
		412	print "\n", "=" x 76, "\n";
		413	}
		414
		415
		416	################################ SUBROUTINES ################################
		417
		418
		419	#######################################
		420	# get current article's header and body
		421	#######################################
		422	sub getarticle {
		423	%headers = (); # dump old headers
		424	my $filename = shift; # get the name of the file
		425	# get stats about the file itself
		426	$filesize = -s $filename; # get total size of file
		427	$totsize += $filesize; # bump total sizes of all files
		428
		429	my $mtime = (stat $filename)[9];
		430	if ( $mtime < $earliest ) {
		431	$earliest = $mtime;
		432	} elsif ( $mtime > $latest ) {
		433	$latest = $mtime;
		434	}
		435
		436	# now read the file
		437	open(FILE, $filename) or die "Can't open $filename: $!\n";
		438	while (<FILE>) {
		439	$totheader += length($_); # bump total header size
		440	last if (/^\s*$/); # end of header?
		441	if (/^([^:\s]):\s+(.)/) {
		442	my($key,$val) = ($1,$2);
6	PointedEar	443	$headers{$key} = decode('MIME-Header', $val);
5	PointedEar	444	$lcheader{clean(lc($key))} = clean($val);
		445	}
		446	}
		447	@body = <FILE>; # slurp up body
		448	close(FILE);
		449	} # getarticle
		450
		451	###################################
		452	# get data from the current article
		453	###################################
		454	sub getdata {
		455	#### First, analyse header fields ####
		456
		457	# Set up this poster if not defined, get counts, sizes
6	PointedEar	458	$poster = encode('UTF-8', $headers{From}); # get the poster's name
5	PointedEar	459	if (!defined($data{$poster})) { # seen this one before?
		460	$data{$poster}{agent} = 'Unknown'; # comes after For: field
		461	$data{$poster}{orig} = 0;
		462	$data{$poster}{quoted} = 0;
		463	}
		464	$data{$poster}{count}++; # bump count for this poster
		465	$data{$poster}{size} += $filesize; # total size of file
		466
		467	# The User-Agent and/or X-Newsreader fields
		468	# for User-Agent by poster
		469	if (defined $lcheader{"user-agent"}) {
		470	$data{$poster}{agent} = $lcheader{"user-agent"};
		471	}
		472	if (defined $lcheader{"x-newsreader"}) {
		473	$data{$poster}{agent} = $lcheader{"x-newsreader"};
		474	}
		475
		476	# The User Agent for User-Agent by number of posts
		477	my $UA = "unknown";
		478	foreach my $keys ( keys %lcheader )
		479	{
		480	if (defined $lcheader{'user-agent'})
		481	{
		482	$UA = $lcheader{'user-agent'};
		483	}
		484	elsif (defined $lcheader{"x-newsreader"})
		485	{
		486	$UA = $lcheader{"x-newsreader"};
		487	}
		488	elsif (defined $lcheader{'x-mailer'})
		489	{
		490	$UA = $lcheader{'x-mailer'};
		491	}
		492	elsif ((defined $lcheader{'organization'}) &&
		493	($lcheader{'organization'} =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/))
		494	{
		495	$UA = $lcheader{'organization'};
		496	}
		497	elsif ( $lcheader{'message-id'} =~ /pine/i )
		498	{
		499	$UA = "Pine";
		500	} ## Hopefully found UA, else set to unknown
		501	}
		502
		503
		504	$UA = clean($UA);
		505	$UA = get_agent($UA);
		506
		507
		508	sub get_agent {
		509	my $raw = shift;
		510	my $agent = $raw;
		511
		512	## strip http
		513	if ( $raw =~ /.http./ ) {
		514	$raw =~ s!posted via!!i;
		515	$raw =~ s!http://!!g;
		516	$raw =~ s!/!!g;
		517	$raw =~ s! !!g;
		518	}
		519
		520	## Fix Outlook from Mac
		521	if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
		522
		523	## Pick out the popular agents
		524	if ( $raw =~ /(outlook express)/i \|\|
		525	$raw =~ /(microplanet gravity)/i \|\|
		526	$raw =~ /(news rover)/i \|\|
		527	$raw =~ /(forte agent)/i \|\|
		528	$raw =~ /(forte free agent)/i
		529	)
		530	{
		531	$agent = $1;
		532	}
		533	elsif ( $raw =~ /^(
		534	pan
		535	\|sylpheed
		536	\|slrn
		537	\|mozilla
		538	\|knode
		539	\|tin
		540	\|hamster
		541	\|xrn
		542	\|xnews
		543	\|aol
		544	\|gnus
		545	\|krn
		546	\|macsoup
		547	\|messenger
		548	\|openxp
		549	\|pine
		550	\|thoth
		551	\|turnpike
		552	\|winvn
		553	\|vsoup
		554	\|google
		555	\|supernews
		556	\|nn
		557	\|rn
		558	\|007
		559	\|webtv
		560	\|compuserve
		561	)/ix )
		562	{
		563	$agent = $1;
		564	}
		565	else
		566	{
		567	## Clean up unknown agents
		568	if ( $raw =~ m!^(.*?)/! ) {
		569	$agent = $1;
		570	}
		571	elsif ( $raw =~ /^(\w)\d./ )
		572	{
		573	$agent = $1;
		574	}
		575	}
		576
		577	$distinct_agent{$agent}++;
		578	return $agent;
		579	}
		580
		581
		582	# Get all cross-posted newsgroups
		583	for (split /,/, $headers{"Newsgroups"}) {
		584	$crossposts{$_}++; # bump count for each
		585	}
		586
		587	# Get threads
6	PointedEar	588	my $thread = encode('UTF-8', $headers{"Subject"});
5	PointedEar	589	$thread =~ s/^re: //i; # Remove Re: or re: at start
		590	$thread =~ s/\s+/ /g; # collapse whitespace
		591	$threads{$thread}{count} += 1; # bump count of this subject
		592	$threads{$thread}{size} += $filesize; # bump bytes for this thread
		593
		594	# Is this an original post or a reply?
		595	if (defined $headers{"References"}) {
		596	$replies++;
		597	} else {
		598	$origposts++;
		599	}
		600
		601	# Get the time zone
		602	$_ = $headers{"Date"};
		603	my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
		604	if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
		605	$tz = "UTC";
		606	}
		607	$tz{$tz}++;
		608
		609	#### Now analyse the body text ####
		610	my $insig = 0;
		611	for (@body) {
		612	$totbody += length($_); # bump total body size
		613	next if (/^$>/); # don't count blank lines in body
		614	if ($insig == 1) {
		615	$totsig += length($_); # bump total sig size
		616	# Bill Unruh uses ] quotes, and another poster uses ::
		617	} elsif (/^\s[>\]]/ or /^\s::/) { # are we in a quote line?
		618	$data{$poster}{quoted} += length($_); # bump count of quoted chrs
		619	$totquoted += length($_);
		620	} elsif (/-- /) {
		621	$insig = 1;
		622	} else {
		623	# we must be processing an original line
		624	$data{$poster}{orig} += length($_); # bump count of original chrs
		625	$totorig += length($_);
		626	}
		627	} # end for (@body)
		628
		629	} # getdata
		630
		631	########################################
		632	# Count the User-Agents used, collapsing
		633	# different versions into one per agent.
		634	########################################
		635	sub countagents {
		636	POSTER:
		637	foreach $poster (keys %data) {
		638	foreach my $agent_name (keys %distinct_agent) { # check against known ones
		639	if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
		640	$agents{$agent_name}++;
		641	next POSTER;
		642	}
		643	}
		644	$agents{$data{$poster}{agent}}++;
		645	}
		646	} # countagents
		647
		648	############################################
		649	# set orig/total percentages for all posters
		650	############################################
		651	sub fixpercent {
		652	foreach $poster (keys %data) {
		653	my $percent = 100;
		654	if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
		655	$percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
		656	} elsif ($data{$poster}{orig} == 0) {
		657	$percent = 0;
		658	}
		659	$data{$poster}{percent} = $percent;
		660	}
		661	}
		662
		663	##############################
		664	# right pad a string with '.'s
		665	##############################
		666	sub rpad {
		667	# get text to pad, length to pad, pad chr
		668	my ($text, $pad_len, $pad_chr) = @_;
		669	if (length($text) > $pad_len) {
		670	$text = substr($text, 0, $pad_len);
		671	}
		672	my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
		673	return $padded;
		674	}
		675
		676	#################
		677	# centre a string
		678	#################
		679	sub centred {
		680	my ($text, $width) = @_; # text to centre, size of field to centre in
		681	my $pad_len = ($width - length($text)) / 2; #/
		682	my $centred = " " x $pad_len . $text;
		683	return $centred;
		684	}
		685
		686	##########################
		687	# put commas into a number
		688	##########################
		689	sub commify {
		690	$_ = shift;
		691	1 while s/^(-?\d+)(\d{3})/$1,$2/;
		692	return $_;
		693	}
		694
		695	#########################
		696	# clean
		697	#########################
		698	sub clean {
		699	my $dirty = shift;
		700	my $clean = $dirty;
		701	$clean =~ s/^\s*//;
		702	$clean =~ s/\s*$//;
		703
		704	return $clean;
		705	}
		706
		707
		708	sub usage {
		709
		710	print "usage: newstat.pl newsgroupname\n";
		711	exit 1;
		712	}
		713
		714	###################################
		715	# Write data structures to a file #
		716	###################################
		717	sub writedata {
		718	open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
		719	print OUTF "Data collected from alt.os.linux.mandrake\n\n";
		720	print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		721	foreach my $name (keys %data) {
		722	print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
		723	}
		724	print OUTF "============================================================================\n";
		725	print OUTF "Thread subjects\n";
		726	print OUTF "----------------------------------------------------------------------------\n";
		727	foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
		728	print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
		729	}
		730	print OUTF "============================================================================\n";
		731	print OUTF "Cross-posts\n";
		732	print OUTF "----------------------------------------------------------------------------\n";
		733	foreach my $name (sort keys %crossposts) {
		734	print OUTF "$name : $crossposts{$name}\n";
		735	}
		736	print OUTF
		737	print OUTF "============================================================================\n";
		738	print OUTF "User agents\n";
		739	print OUTF "----------------------------------------------------------------------------\n";
		740	foreach my $name (sort keys %agents) {
		741	print OUTF "$name : $agents{$name}\n";
		742	}
		743	print OUTF "============================================================================\n";
		744	print OUTF "Time zones\n";
		745	print OUTF "----------------------------------------------------------------------------\n";
		746	foreach my $name (sort keys %tz) {
		747	print OUTF "$name : $tz{$name}\n";
		748	}
		749	close OUTF;
		750	} # writedata

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 8 - Rev 6