WebSVN - LCARS - Blame - Rev 5 - /trunk/tools/network/news/newsstat/newsstat.pl

Rev	Author	Line No.	Line
5	PointedEar	1	#!/usr/bin/perl -w
		2	use strict;
		3	#########################
		4	# newsstat.pl version 0.3
		5
		6
		7
		8	###################################################################
		9	# Collect statistics about the alt.os.linux.mandrake newsgroup.
		10	# Check all articles in the last 7-day period. Rank posters by
		11	# no. of posts and by volume of posts, report on top and bottom
		12	# 20 posters. Show their name, no. posts, size of posts, percentage
		13	# quoted lines. Rank user-agents used, by poster rather than by
		14	# post. Rank top 10 threads. Rank top 10 cross-posted groups.
		15	###################################################################
		16
		17	##################################################################
		18	# RECENT CHANGES #
		19	# 2004/06/19 - newsgroup name is $ARGV[0]
		20	# - Allow command line flags for subtracting
		21	# output if not pertinent for a group
		22	# 2002/11/09 - Put Garry's writedata() function back in.
		23	# - added "rn" to my list of UA's
		24	# - Started using %distinct_agent for both User agent
		25	# sections
		26	# - named it newsstat.pl version 0.3
		27	# 2002/11/06 - Fixed the earliest/latest file problem by using
		28	# mtime rather than ctime, and simplifying the logic
		29	# 2002/11/05 - moved user configurations to the top
		30	# - fixed the cross-posting section
		31	# - introduced the $newsgroup_name variable which
		32	# later becomes $news$group
		33	# - changed $name to $agent_name in countagents()
		34
		35	########### NEXT #############
		36	# Commas in bottom section of report
		37	# Show date the figures were compiled
		38	# No. of HTML articles (Content-Type: text/html)
		39	# No. of quoted sigs (/>\s*-- /)
		40	# Per cent of top-posted articles
		41	# Top 10 cross-posters
		42	# Top 20 news posting hosts (from Path)
		43	# Count of certain subject words: newbie, kde, burner, sendmail, etc.
		44	# Count all User Agents that each poster uses
		45	# What do we do about Bill Unruh's ] quote style?
		46	# Change the way dates/times are checked
		47	# include % share in posters by no. of arts
		48	# include % share in posters by size
		49	# Total, orig & quoted lines by user agent with per cent
		50	# Take arguments, i.e. newsgroup name
		51	#######################################################
		52
		53	###################### USER CONFIGURATIONS ############################
		54
		55	# The name of the group to do stats for
		56	my $newsgroup_name = $ARGV[0];
		57	$newsgroup_name or &usage;
		58
		59	# Check for removal flags
		60	my $ix;
		61	my $j;
		62	my %skipSec;
		63	my @skiplist;
		64	my $args = @ARGV;
		65	for ( $ix = 1 ; $ix < $args ; $ix++ ) {
		66	$j = $ix + 1;
		67	if ( $ARGV[$ix] eq "-x" ) {
		68	@skiplist = split(",",$ARGV[$j]);
		69	} elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
		70	@skiplist = split(",",$1);
		71	}
		72	}
		73	foreach(@skiplist) {
		74	$skipSec{$_} = 1;
		75	}
		76
		77	# Leafnode users will want /var/spool/news for this variable.
		78	my $news = "/var/spool/news/";
		79
		80	# How many days are we doing statistics for?
		81	my $numdays = 30;
		82
		83	# no. of agents we list
		84	my $topagents = 10;
		85
		86	# no. of threads we want to know about
		87	my $topthreads = 20;
		88
		89	# no. of top or bottom posters to show
		90	my $topposters = 20;
		91
		92	# no. of cross-posted threads to show
		93	my $topcrossposts = 10;
		94
		95	# no. of time zones to show
		96	my $toptz = 10;
		97
		98
		99
		100	###################### DATA STRUCTURES ######################
		101	my $group = $newsgroup_name;
		102	$group =~ s!\.!/!g;
		103	my %data; # name, count, agent, total, orig, quoted
		104	my %threads; # subject, count
		105	my %crossposts; # group, count
		106	my %tz; # timezones by count
		107	my %headers; # holds header of current article
		108	my %lcheader; # holds lowercase headers
		109	my @body; # holds body of current article
		110	my @sig; # holds sig text;
		111	my $totalposts; # total no. of posts considered
		112	my $filename; # name of current article file
		113	my $filesize; # size of current article file
		114	my $earliest; # earliest article we have found
		115	my $latest; # latest article we have found
		116	my $poster; # poster we are dealing with
		117	my $totsize = 0; # holds total sizes of all files
		118	my $totheader = 0; # total size of header material
		119	my $totbody = 0; # total size of body material
		120	my $totsig = 0; # total size of sig material
		121	my $totorig = 0; # total size of original material
		122	my $totquoted = 0; # total size of quoted material
		123	my $origposts = 0; # total no. of original posts
		124	my $replies = 0; # total no. of replies
		125	my $i; # general purpose
		126	my %distinct_agent;
		127	my %agents = # used to hold counts of User Agents used
		128	( "KNode" => 0,
		129	"Pan" => 0,
		130	"Mozilla" => 0,
		131	"Sylpheed" => 0,
		132	"Gnus" => 0,
		133	"Forte Agent" => 0,
		134	"Forte Free Agent" => 0,
		135	"MicroPlanet Gravity" => 0,
		136	"Microsoft Outlook Express" => 0,
		137	"Xnews" => 0,
		138	"slrn" => 0,
		139	"tin" => 0,
		140	"rn" => 0,
		141	"NN" => 0,
		142	"MacSOUP" => 0,
		143	"Foorum" => 0,
		144	"MT-NewsWatcher" => 0,
		145	"News Rover" => 0,
		146	"WebTV" => 0,
		147	"Compuserver" => 0,
		148	"VSoup" => 0);
		149
		150	######################## MAIN CODE ########################
		151	$! = 1;
		152
		153	chdir("$news$group") or die "Can't cd to $news$group: $!\n";
		154	opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
		155	while (defined($filename = readdir(DIR))) {
		156	%lcheader = ();
		157	next unless -f $filename; # only want real files
		158	next if ($filename eq ".overview"); # real articles only
		159	next if (-M $filename > $numdays); # only want articles <= a certain age
		160	$earliest = (stat $filename)[9] unless defined ($earliest);
		161	$latest = (stat $filename)[9] unless defined ($latest);
		162	&getarticle($filename); # read in the article
		163	&getdata; # grab the data from the article
		164	$totalposts++; # bump count of articles considered
		165	}
		166	closedir(DIR); # finished with the directory
		167	# post-processing
		168	&countagents; # count agents, collapsing versions
		169	&fixpercent; # check percentages orig/total for posters
		170
		171	&writedata;
		172
		173	#################### DISPLAY RESULTS #####################
		174	print "=" x 76, "\n";
		175	printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
		176	print "=" x 76, "\n";
		177	printf "%s\n", &centred("(stats compiled with a script by Garry Knight)", 76);
		178	print "\n\n";
		179	printf "Total posts considered: %s over %d days\n",
		180	commify($totalposts), $numdays;
		181	printf "Earliest article: %s\n", scalar localtime($earliest);
		182	printf "Latest article: %s\n", scalar localtime($latest);
		183	printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
		184	printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
		185	commify(int($totsize / 1024)), $totsize / 1048576; #
		186	printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
		187	commify(int($totalposts / $numdays)),
		188	$totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
		189	my $count = keys %data;
		190	printf "Total headers: %s KB bodies: %s KB\n",
		191	commify(int($totheader / 1024)), commify(int($totbody / 1024));
		192	printf "Body text - quoted: %s KB, original: %s KB = %02.2f%%, sigs: %s KB\n",
		193	commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
		194	($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
		195	printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
		196	commify(int($totsize / $count)); #/
		197	$count = keys %threads;
		198	printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
		199	commify(int($totsize / $count)); #/
		200	printf "Total number of User-Agents: %d\n", scalar keys %agents;
		201	print "\n", "=" x 76, "\n";
		202
		203	###############################
		204	# show posters by article count Sec 1;
		205	###############################
		206	unless ( $skipSec{1} ) {
		207	if (keys %data < $topposters) {
		208	$count = keys %data;
		209	} else {
		210	$count = $topposters;
		211	}
		212	printf "%s\n", &centred("Top $count posters by number of articles", 76);
		213	print "=" x 76, "\n";
		214	$i = 0;
		215	foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
		216	my $name = substr($poster, 0, 65);
		217	printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
		218	last if (++$i == $count);
		219	}
		220	print "\n", "=" x 76, "\n";
		221	}
		222
		223	################################
		224	# show posters by size in Kbytes Sec 2;
		225	################################
		226	unless ( $skipSec{2} ) {
		227	if (keys %data < $topposters) {
		228	$count = keys %data;
		229	} else {
		230	$count = $topposters;
		231	}
		232	printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
		233	print "=" x 76, "\n";
		234	$i = 0;
		235	foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
		236	my $name = substr($poster, 0, 62);
		237	printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
		238	last if (++$i == $count);
		239	}
		240	print "\n", "=" x 76, "\n";
		241	}
		242
		243	####################################
		244	# show top posters for original text
		245	####################################
		246	unless ( $skipSec{3} ) {
		247	if (keys %data < $topposters) {
		248	$count = keys %data;
		249	} else {
		250	$count = $topposters;
		251	}
		252	printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
		253	print "=" x 76, "\n";
		254	$i = 0;
		255	foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
		256	next if $data{$poster}{quoted} == 0;
		257	next if $data{$poster}{count} < 5;
		258	my $name = substr($poster, 0, 63);
		259	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
		260	last if (++$i == $count);
		261	}
		262	print "\n", "=" x 76, "\n";
		263	}
		264
		265	#######################################
		266	# show bottom posters for original text
		267	#######################################
		268	unless ( $skipSec{4} ) {
		269	if (keys %data < $topposters) {
		270	$count = keys %data;
		271	} else {
		272	$count = $topposters;
		273	}
		274	printf "%s\n", &centred("Bottom $count responders by original text (> 5 posts)", 76);
		275	print "=" x 76, "\n";
		276	$i = 0;
		277	foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
		278	next if $data{$poster}{quoted} == 0;
		279	next if $data{$poster}{count} < 5;
		280	my $name = substr($poster, 0, 63);
		281	printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
		282	last if (++$i == $count);
		283	}
		284	print "\n", "=" x 76, "\n";
		285	}
		286
		287	####################################
		288	# show threads by number of articles
		289	####################################
		290	unless ( $skipSec{5} ) {
		291	if (keys %threads < $topthreads) {
		292	$count = keys %threads;
		293	} else {
		294	$count = $topthreads;
		295	}
		296	printf "%s\n", &centred("Top $count threads by no. of articles", 76);
		297	print "=" x 76, "\n";
		298	$i = 0;
		299	foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
		300	my $name = substr($thread, 0, 65);
		301	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
		302	last if (++$i == $count);
		303	}
		304	print "\n", "=" x 76, "\n";
		305	}
		306	################################
		307	# show threads by size in Kbytes
		308	################################
		309	unless ( $skipSec{6} ) {
		310	if (keys %threads < $topthreads) {
		311	$count = keys %threads;
		312	} else {
		313	$count = $topthreads;
		314	}
		315	printf "%s\n", &centred("Top $count threads by size in KB", 76);
		316	print "=" x 76, "\n";
		317	$i = 0;
		318	foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
		319	my $name = substr($thread, 0, 65);
		320	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
		321	last if (++$i == $count);
		322	}
		323	print "\n", "=" x 76, "\n";
		324	}
		325
		326	#################################
		327	# show top 10 cross-posted groups
		328	#################################
		329	unless ( $skipSec{7} ) {
		330	delete $crossposts{"$newsgroup_name"}; # don't include ours
		331	if (keys %crossposts < $topcrossposts) {
		332	$count = keys %crossposts;
		333	} else {
		334	$count = $topcrossposts;
		335	}
		336	printf "%s\n", &centred("Top $count cross-posted groups", 76);
		337	print "=" x 76, "\n";
		338	$i = 0;
		339	foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
		340	printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
		341	last if (++$i == $count);
		342	}
		343	print "\n", "=" x 76, "\n";
		344	}
		345	#######################
		346	#show agents and counts
		347	#######################
		348	unless ( $skipSec{8} ) {
		349	if (keys %agents < $topagents) {
		350	$count = keys %agents;
		351	} else {
		352	$count = $topagents;
		353	}
		354	printf "%s\n", &centred("Top $count User Agents by poster", 76);
		355	print "=" x 76, "\n";
		356	$i = 0;
		357	foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
		358	printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
		359	last if (++$i == $count);
		360	}
		361	print "\n", "=" x 76, "\n";
		362	}
		363
		364	#######################
		365	#show distinct agents
		366	#######################
		367	unless ( $skipSec{9} ) {
		368	if (keys %distinct_agent < $topagents) {
		369	$count = keys %distinct_agent;
		370	} else {
		371	$count = $topagents;
		372	}
		373	printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
		374	print "=" x 76, "\n";
		375	$i = 0;
		376	foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
		377	printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
		378	last if (++$i == $count);
		379	}
		380	print "\n", "=" x 76, "\n";
		381	}
		382
		383	##########################
		384	#show timezones and counts
		385	##########################
		386	unless ( $skipSec{10} ) {
		387	if (keys %tz < $toptz) {
		388	$count = keys %tz;
		389	} else {
		390	$count = $toptz;
		391	}
		392	printf "%s\n", &centred("Top 10 time zones", 76);
		393	print "=" x 76, "\n";
		394	$i = 0;
		395	foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
		396	printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
		397	last if (++$i == $count);
		398	}
		399	print "\n", "=" x 76, "\n";
		400	}
		401
		402
		403	################################ SUBROUTINES ################################
		404
		405
		406	#######################################
		407	# get current article's header and body
		408	#######################################
		409	sub getarticle {
		410	%headers = (); # dump old headers
		411	my $filename = shift; # get the name of the file
		412	# get stats about the file itself
		413	$filesize = -s $filename; # get total size of file
		414	$totsize += $filesize; # bump total sizes of all files
		415
		416	my $mtime = (stat $filename)[9];
		417	if ( $mtime < $earliest ) {
		418	$earliest = $mtime;
		419	} elsif ( $mtime > $latest ) {
		420	$latest = $mtime;
		421	}
		422
		423	# now read the file
		424	open(FILE, $filename) or die "Can't open $filename: $!\n";
		425	while (<FILE>) {
		426	$totheader += length($_); # bump total header size
		427	last if (/^\s*$/); # end of header?
		428	if (/^([^:\s]):\s+(.)/) {
		429	my($key,$val) = ($1,$2);
		430	$headers{$key} = $val;
		431	$lcheader{clean(lc($key))} = clean($val);
		432	}
		433	}
		434	@body = <FILE>; # slurp up body
		435	close(FILE);
		436	} # getarticle
		437
		438	###################################
		439	# get data from the current article
		440	###################################
		441	sub getdata {
		442	#### First, analyse header fields ####
		443
		444	# Set up this poster if not defined, get counts, sizes
		445	$poster = $headers{From}; # get the poster's name
		446	if (!defined($data{$poster})) { # seen this one before?
		447	$data{$poster}{agent} = 'Unknown'; # comes after For: field
		448	$data{$poster}{orig} = 0;
		449	$data{$poster}{quoted} = 0;
		450	}
		451	$data{$poster}{count}++; # bump count for this poster
		452	$data{$poster}{size} += $filesize; # total size of file
		453
		454	# The User-Agent and/or X-Newsreader fields
		455	# for User-Agent by poster
		456	if (defined $lcheader{"user-agent"}) {
		457	$data{$poster}{agent} = $lcheader{"user-agent"};
		458	}
		459	if (defined $lcheader{"x-newsreader"}) {
		460	$data{$poster}{agent} = $lcheader{"x-newsreader"};
		461	}
		462
		463	# The User Agent for User-Agent by number of posts
		464	my $UA = "unknown";
		465	foreach my $keys ( keys %lcheader )
		466	{
		467	if (defined $lcheader{'user-agent'})
		468	{
		469	$UA = $lcheader{'user-agent'};
		470	}
		471	elsif (defined $lcheader{"x-newsreader"})
		472	{
		473	$UA = $lcheader{"x-newsreader"};
		474	}
		475	elsif (defined $lcheader{'x-mailer'})
		476	{
		477	$UA = $lcheader{'x-mailer'};
		478	}
		479	elsif ((defined $lcheader{'organization'}) &&
		480	($lcheader{'organization'} =~ /groups\.google\|AOL\|Supernews\|WebTV\|compuserve/))
		481	{
		482	$UA = $lcheader{'organization'};
		483	}
		484	elsif ( $lcheader{'message-id'} =~ /pine/i )
		485	{
		486	$UA = "Pine";
		487	} ## Hopefully found UA, else set to unknown
		488	}
		489
		490
		491	$UA = clean($UA);
		492	$UA = get_agent($UA);
		493
		494
		495	sub get_agent {
		496	my $raw = shift;
		497	my $agent = $raw;
		498
		499	## strip http
		500	if ( $raw =~ /.http./ ) {
		501	$raw =~ s!posted via!!i;
		502	$raw =~ s!http://!!g;
		503	$raw =~ s!/!!g;
		504	$raw =~ s! !!g;
		505	}
		506
		507	## Fix Outlook from Mac
		508	if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
		509
		510	## Pick out the popular agents
		511	if ( $raw =~ /(outlook express)/i \|\|
		512	$raw =~ /(microplanet gravity)/i \|\|
		513	$raw =~ /(news rover)/i \|\|
		514	$raw =~ /(forte agent)/i \|\|
		515	$raw =~ /(forte free agent)/i
		516	)
		517	{
		518	$agent = $1;
		519	}
		520	elsif ( $raw =~ /^(
		521	pan
		522	\|sylpheed
		523	\|slrn
		524	\|mozilla
		525	\|knode
		526	\|tin
		527	\|hamster
		528	\|xrn
		529	\|xnews
		530	\|aol
		531	\|gnus
		532	\|krn
		533	\|macsoup
		534	\|messenger
		535	\|openxp
		536	\|pine
		537	\|thoth
		538	\|turnpike
		539	\|winvn
		540	\|vsoup
		541	\|google
		542	\|supernews
		543	\|nn
		544	\|rn
		545	\|007
		546	\|webtv
		547	\|compuserve
		548	)/ix )
		549	{
		550	$agent = $1;
		551	}
		552	else
		553	{
		554	## Clean up unknown agents
		555	if ( $raw =~ m!^(.*?)/! ) {
		556	$agent = $1;
		557	}
		558	elsif ( $raw =~ /^(\w)\d./ )
		559	{
		560	$agent = $1;
		561	}
		562	}
		563
		564	$distinct_agent{$agent}++;
		565	return $agent;
		566	}
		567
		568
		569	# Get all cross-posted newsgroups
		570	for (split /,/, $headers{"Newsgroups"}) {
		571	$crossposts{$_}++; # bump count for each
		572	}
		573
		574	# Get threads
		575	my $thread = $headers{"Subject"};
		576	$thread =~ s/^re: //i; # Remove Re: or re: at start
		577	$thread =~ s/\s+/ /g; # collapse whitespace
		578	$threads{$thread}{count} += 1; # bump count of this subject
		579	$threads{$thread}{size} += $filesize; # bump bytes for this thread
		580
		581	# Is this an original post or a reply?
		582	if (defined $headers{"References"}) {
		583	$replies++;
		584	} else {
		585	$origposts++;
		586	}
		587
		588	# Get the time zone
		589	$_ = $headers{"Date"};
		590	my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
		591	if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
		592	$tz = "UTC";
		593	}
		594	$tz{$tz}++;
		595
		596	#### Now analyse the body text ####
		597	my $insig = 0;
		598	for (@body) {
		599	$totbody += length($_); # bump total body size
		600	next if (/^$>/); # don't count blank lines in body
		601	if ($insig == 1) {
		602	$totsig += length($_); # bump total sig size
		603	# Bill Unruh uses ] quotes, and another poster uses ::
		604	} elsif (/^\s[>\]]/ or /^\s::/) { # are we in a quote line?
		605	$data{$poster}{quoted} += length($_); # bump count of quoted chrs
		606	$totquoted += length($_);
		607	} elsif (/-- /) {
		608	$insig = 1;
		609	} else {
		610	# we must be processing an original line
		611	$data{$poster}{orig} += length($_); # bump count of original chrs
		612	$totorig += length($_);
		613	}
		614	} # end for (@body)
		615
		616	} # getdata
		617
		618	########################################
		619	# Count the User-Agents used, collapsing
		620	# different versions into one per agent.
		621	########################################
		622	sub countagents {
		623	POSTER:
		624	foreach $poster (keys %data) {
		625	foreach my $agent_name (keys %distinct_agent) { # check against known ones
		626	if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
		627	$agents{$agent_name}++;
		628	next POSTER;
		629	}
		630	}
		631	$agents{$data{$poster}{agent}}++;
		632	}
		633	} # countagents
		634
		635	############################################
		636	# set orig/total percentages for all posters
		637	############################################
		638	sub fixpercent {
		639	foreach $poster (keys %data) {
		640	my $percent = 100;
		641	if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
		642	$percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
		643	} elsif ($data{$poster}{orig} == 0) {
		644	$percent = 0;
		645	}
		646	$data{$poster}{percent} = $percent;
		647	}
		648	}
		649
		650	##############################
		651	# right pad a string with '.'s
		652	##############################
		653	sub rpad {
		654	# get text to pad, length to pad, pad chr
		655	my ($text, $pad_len, $pad_chr) = @_;
		656	if (length($text) > $pad_len) {
		657	$text = substr($text, 0, $pad_len);
		658	}
		659	my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
		660	return $padded;
		661	}
		662
		663	#################
		664	# centre a string
		665	#################
		666	sub centred {
		667	my ($text, $width) = @_; # text to centre, size of field to centre in
		668	my $pad_len = ($width - length($text)) / 2; #/
		669	my $centred = " " x $pad_len . $text;
		670	return $centred;
		671	}
		672
		673	##########################
		674	# put commas into a number
		675	##########################
		676	sub commify {
		677	$_ = shift;
		678	1 while s/^(-?\d+)(\d{3})/$1,$2/;
		679	return $_;
		680	}
		681
		682	#########################
		683	# clean
		684	#########################
		685	sub clean {
		686	my $dirty = shift;
		687	my $clean = $dirty;
		688	$clean =~ s/^\s*//;
		689	$clean =~ s/\s*$//;
		690
		691	return $clean;
		692	}
		693
		694
		695	sub usage {
		696
		697	print "usage: newstat.pl newsgroupname\n";
		698	exit 1;
		699	}
		700
		701	###################################
		702	# Write data structures to a file #
		703	###################################
		704	sub writedata {
		705	open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
		706	print OUTF "Data collected from alt.os.linux.mandrake\n\n";
		707	print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
		708	foreach my $name (keys %data) {
		709	print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
		710	}
		711	print OUTF "============================================================================\n";
		712	print OUTF "Thread subjects\n";
		713	print OUTF "----------------------------------------------------------------------------\n";
		714	foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
		715	print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
		716	}
		717	print OUTF "============================================================================\n";
		718	print OUTF "Cross-posts\n";
		719	print OUTF "----------------------------------------------------------------------------\n";
		720	foreach my $name (sort keys %crossposts) {
		721	print OUTF "$name : $crossposts{$name}\n";
		722	}
		723	print OUTF
		724	print OUTF "============================================================================\n";
		725	print OUTF "User agents\n";
		726	print OUTF "----------------------------------------------------------------------------\n";
		727	foreach my $name (sort keys %agents) {
		728	print OUTF "$name : $agents{$name}\n";
		729	}
		730	print OUTF "============================================================================\n";
		731	print OUTF "Time zones\n";
		732	print OUTF "----------------------------------------------------------------------------\n";
		733	foreach my $name (sort keys %tz) {
		734	print OUTF "$name : $tz{$name}\n";
		735	}
		736	close OUTF;
		737	} # writedata

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 9 - Rev 5