Subversion Repositories LCARS

Compare Revisions

Last modification

Ignore whitespace Rev 5 → Rev 6

/trunk/tools/network/news/newsstat/newsstat.pl
1,38 → 1,51
#!/usr/bin/perl -w
use strict;
use warnings;
use utf8;
use Encode;
 
#########################
# newsstat.pl version 0.3
# newsstat.pl version 0.4
 
############################################################################
# Collect statistics about a newsgroup (specified by first argument) in
# the local news spool. Check all articles in the last 30-day period.
# Rank posters by number of posts and by volume of posts, report on top and
# bottom 20 posters. Show their name, number of posts, size of posts,
# percentage of quoted lines. Rank user-agents used, by poster rather than
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
#
# (Numbers and paths can be configured below. -- PE)
############################################################################
 
############################################################################
# RECENT CHANGES #
# 2011-07-03 PE - Use Encode to decode/encode MIME encodings
# - Use warnings, utf8 (just in case)
# - Documentation update
# N/A NN - Take newsgroup name as argument
# 2004-06-19 NN - newsgroup name is $ARGV[0]
# - Allow command line flags for subtracting
# output if not pertinent for a group
# 2002-11-09 NN - Put Garry's writedata() function back in.
# - added "rn" to my list of UA's
# - Started using %distinct_agent for both User agent
# sections
# - named it newsstat.pl version 0.3
# 2002-11-06 NN - Fixed the earliest/latest file problem by using
# mtime rather than ctime, and simplifying the logic
# 2002-11-05 NN - moved user configurations to the top
# - fixed the cross-posting section
# - introduced the $newsgroup_name variable which
# later becomes $news$group
# - changed $name to $agent_name in countagents()
#
# Contributors
# -------------
# NN Nomen nominandum (name to be determined later)
# PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
 
###################################################################
# Collect statistics about the alt.os.linux.mandrake newsgroup.
# Check all articles in the last 7-day period. Rank posters by
# no. of posts and by volume of posts, report on top and bottom
# 20 posters. Show their name, no. posts, size of posts, percentage
# quoted lines. Rank user-agents used, by poster rather than by
# post. Rank top 10 threads. Rank top 10 cross-posted groups.
###################################################################
 
##################################################################
# RECENT CHANGES #
# 2004/06/19 - newsgroup name is $ARGV[0]
# - Allow command line flags for subtracting
# output if not pertinent for a group
# 2002/11/09 - Put Garry's writedata() function back in.
# - added "rn" to my list of UA's
# - Started using %distinct_agent for both User agent
# sections
# - named it newsstat.pl version 0.3
# 2002/11/06 - Fixed the earliest/latest file problem by using
# mtime rather than ctime, and simplifying the logic
# 2002/11/05 - moved user configurations to the top
# - fixed the cross-posting section
# - introduced the $newsgroup_name variable which
# later becomes $news$group
# - changed $name to $agent_name in countagents()
 
########### NEXT #############
########### TODO #############
# Commas in bottom section of report
# Show date the figures were compiled
# No. of HTML articles (Content-Type: text/html)
47,7 → 60,7
# include % share in posters by no. of arts
# include % share in posters by size
# Total, orig & quoted lines by user agent with per cent
# Take arguments, i.e. newsgroup name
# Take more arguments
#######################################################
 
###################### USER CONFIGURATIONS ############################
54,7 → 67,7
 
# The name of the group to do stats for
my $newsgroup_name = $ARGV[0];
$newsgroup_name or &usage;
$newsgroup_name or &usage;
 
# Check for removal flags
my $ix;
174,7 → 187,7
print "=" x 76, "\n";
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
print "=" x 76, "\n";
printf "%s\n", &centred("(stats compiled with a script by Garry Knight)", 76);
printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
print "\n\n";
printf "Total posts considered: %s over %d days\n",
commify($totalposts), $numdays;
427,7 → 440,7
last if (/^\s*$/); # end of header?
if (/^([^:\s]*):\s+(.*)/) {
my($key,$val) = ($1,$2);
$headers{$key} = $val;
$headers{$key} = decode('MIME-Header', $val);
$lcheader{clean(lc($key))} = clean($val);
}
}
442,7 → 455,7
#### First, analyse header fields ####
 
# Set up this poster if not defined, get counts, sizes
$poster = $headers{From}; # get the poster's name
$poster = encode('UTF-8', $headers{From}); # get the poster's name
if (!defined($data{$poster})) { # seen this one before?
$data{$poster}{agent} = 'Unknown'; # comes after For: field
$data{$poster}{orig} = 0;
572,7 → 585,7
}
 
# Get threads
my $thread = $headers{"Subject"};
my $thread = encode('UTF-8', $headers{"Subject"});
$thread =~ s/^re: //i; # Remove Re: or re: at start
$thread =~ s/\s+/ /g; # collapse whitespace
$threads{$thread}{count} += 1; # bump count of this subject