1,38 → 1,51 |
#!/usr/bin/perl -w |
use strict; |
use warnings; |
use utf8; |
use Encode; |
|
######################### |
# newsstat.pl version 0.3 |
# newsstat.pl version 0.4 |
|
############################################################################ |
# Collect statistics about a newsgroup (specified by first argument) in |
# the local news spool. Check all articles in the last 30-day period. |
# Rank posters by number of posts and by volume of posts, report on top and |
# bottom 20 posters. Show their name, number of posts, size of posts, |
# percentage of quoted lines. Rank user-agents used, by poster rather than |
# by post. Rank top 20 threads. Rank top 10 cross-posted groups. |
# |
# (Numbers and paths can be configured below. -- PE) |
############################################################################ |
|
|
################################################################### |
# Collect statistics about the alt.os.linux.mandrake newsgroup. |
# Check all articles in the last 7-day period. Rank posters by |
# no. of posts and by volume of posts, report on top and bottom |
# 20 posters. Show their name, no. posts, size of posts, percentage |
# quoted lines. Rank user-agents used, by poster rather than by |
# post. Rank top 10 threads. Rank top 10 cross-posted groups. |
################################################################### |
|
################################################################## |
############################################################################ |
# RECENT CHANGES # |
# 2004/06/19 - newsgroup name is $ARGV[0] |
# 2011-07-03 PE - Use Encode to decode/encode MIME encodings |
# - Use warnings, utf8 (just in case) |
# - Documentation update |
# N/A NN - Take newsgroup name as argument |
# 2004-06-19 NN - newsgroup name is $ARGV[0] |
# - Allow command line flags for subtracting |
# output if not pertinent for a group |
# 2002/11/09 - Put Garry's writedata() function back in. |
# 2002-11-09 NN - Put Garry's writedata() function back in. |
# - added "rn" to my list of UA's |
# - Started using %distinct_agent for both User agent |
# sections |
# - named it newsstat.pl version 0.3 |
# 2002/11/06 - Fixed the earliest/latest file problem by using |
# 2002-11-06 NN - Fixed the earliest/latest file problem by using |
# mtime rather than ctime, and simplifying the logic |
# 2002/11/05 - moved user configurations to the top |
# 2002-11-05 NN - moved user configurations to the top |
# - fixed the cross-posting section |
# - introduced the $newsgroup_name variable which |
# later becomes $news$group |
# - changed $name to $agent_name in countagents() |
# |
# Contributors |
# ------------- |
# NN Nomen nominandum (name to be determined later) |
# PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de> |
|
########### NEXT ############# |
########### TODO ############# |
# Commas in bottom section of report |
# Show date the figures were compiled |
# No. of HTML articles (Content-Type: text/html) |
47,7 → 60,7 |
# include % share in posters by no. of arts |
# include % share in posters by size |
# Total, orig & quoted lines by user agent with per cent |
# Take arguments, i.e. newsgroup name |
# Take more arguments |
####################################################### |
|
###################### USER CONFIGURATIONS ############################ |
174,7 → 187,7 |
print "=" x 76, "\n"; |
printf "%s\n", ¢red("Analysis of posts to $newsgroup_name", 76); |
print "=" x 76, "\n"; |
printf "%s\n", ¢red("(stats compiled with a script by Garry Knight)", 76); |
printf "%s\n", ¢red("(stats compiled with a script by Garry Knight et al.)", 76); |
print "\n\n"; |
printf "Total posts considered: %s over %d days\n", |
commify($totalposts), $numdays; |
427,7 → 440,7 |
last if (/^\s*$/); # end of header? |
if (/^([^:\s]*):\s+(.*)/) { |
my($key,$val) = ($1,$2); |
$headers{$key} = $val; |
$headers{$key} = decode('MIME-Header', $val); |
$lcheader{clean(lc($key))} = clean($val); |
} |
} |
442,7 → 455,7 |
#### First, analyse header fields #### |
|
# Set up this poster if not defined, get counts, sizes |
$poster = $headers{From}; # get the poster's name |
$poster = encode('UTF-8', $headers{From}); # get the poster's name |
if (!defined($data{$poster})) { # seen this one before? |
$data{$poster}{agent} = 'Unknown'; # comes after For: field |
$data{$poster}{orig} = 0; |
572,7 → 585,7 |
} |
|
# Get threads |
my $thread = $headers{"Subject"}; |
my $thread = encode('UTF-8', $headers{"Subject"}); |
$thread =~ s/^re: //i; # Remove Re: or re: at start |
$thread =~ s/\s+/ /g; # collapse whitespace |
$threads{$thread}{count} += 1; # bump count of this subject |