| 1,38 → 1,51 |
| #!/usr/bin/perl -w |
| use strict; |
| use warnings; |
| use utf8; |
| use Encode; |
| |
| ######################### |
| # newsstat.pl version 0.3 |
| # newsstat.pl version 0.4 |
| |
| ############################################################################ |
| # Collect statistics about a newsgroup (specified by first argument) in |
| # the local news spool. Check all articles in the last 30-day period. |
| # Rank posters by number of posts and by volume of posts, report on top and |
| # bottom 20 posters. Show their name, number of posts, size of posts, |
| # percentage of quoted lines. Rank user-agents used, by poster rather than |
| # by post. Rank top 20 threads. Rank top 10 cross-posted groups. |
| # |
| # (Numbers and paths can be configured below. -- PE) |
| ############################################################################ |
| |
| |
| ################################################################### |
| # Collect statistics about the alt.os.linux.mandrake newsgroup. |
| # Check all articles in the last 7-day period. Rank posters by |
| # no. of posts and by volume of posts, report on top and bottom |
| # 20 posters. Show their name, no. posts, size of posts, percentage |
| # quoted lines. Rank user-agents used, by poster rather than by |
| # post. Rank top 10 threads. Rank top 10 cross-posted groups. |
| ################################################################### |
| |
| ################################################################## |
| ############################################################################ |
| # RECENT CHANGES # |
| # 2004/06/19 - newsgroup name is $ARGV[0] |
| # 2011-07-03 PE - Use Encode to decode/encode MIME encodings |
| # - Use warnings, utf8 (just in case) |
| # - Documentation update |
| # N/A NN - Take newsgroup name as argument |
| # 2004-06-19 NN - newsgroup name is $ARGV[0] |
| # - Allow command line flags for subtracting |
| # output if not pertinent for a group |
| # 2002/11/09 - Put Garry's writedata() function back in. |
| # 2002-11-09 NN - Put Garry's writedata() function back in. |
| # - added "rn" to my list of UA's |
| # - Started using %distinct_agent for both User agent |
| # sections |
| # - named it newsstat.pl version 0.3 |
| # 2002/11/06 - Fixed the earliest/latest file problem by using |
| # 2002-11-06 NN - Fixed the earliest/latest file problem by using |
| # mtime rather than ctime, and simplifying the logic |
| # 2002/11/05 - moved user configurations to the top |
| # 2002-11-05 NN - moved user configurations to the top |
| # - fixed the cross-posting section |
| # - introduced the $newsgroup_name variable which |
| # later becomes $news$group |
| # - changed $name to $agent_name in countagents() |
| # |
| # Contributors |
| # ------------- |
| # NN Nomen nominandum (name to be determined later) |
| # PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de> |
| |
| ########### NEXT ############# |
| ########### TODO ############# |
| # Commas in bottom section of report |
| # Show date the figures were compiled |
| # No. of HTML articles (Content-Type: text/html) |
| 47,7 → 60,7 |
| # include % share in posters by no. of arts |
| # include % share in posters by size |
| # Total, orig & quoted lines by user agent with per cent |
| # Take arguments, i.e. newsgroup name |
| # Take more arguments |
| ####################################################### |
| |
| ###################### USER CONFIGURATIONS ############################ |
| 174,7 → 187,7 |
| print "=" x 76, "\n"; |
| printf "%s\n", ¢red("Analysis of posts to $newsgroup_name", 76); |
| print "=" x 76, "\n"; |
| printf "%s\n", ¢red("(stats compiled with a script by Garry Knight)", 76); |
| printf "%s\n", ¢red("(stats compiled with a script by Garry Knight et al.)", 76); |
| print "\n\n"; |
| printf "Total posts considered: %s over %d days\n", |
| commify($totalposts), $numdays; |
| 427,7 → 440,7 |
| last if (/^\s*$/); # end of header? |
| if (/^([^:\s]*):\s+(.*)/) { |
| my($key,$val) = ($1,$2); |
| $headers{$key} = $val; |
| $headers{$key} = decode('MIME-Header', $val); |
| $lcheader{clean(lc($key))} = clean($val); |
| } |
| } |
| 442,7 → 455,7 |
| #### First, analyse header fields #### |
| |
| # Set up this poster if not defined, get counts, sizes |
| $poster = $headers{From}; # get the poster's name |
| $poster = encode('UTF-8', $headers{From}); # get the poster's name |
| if (!defined($data{$poster})) { # seen this one before? |
| $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| $data{$poster}{orig} = 0; |
| 572,7 → 585,7 |
| } |
| |
| # Get threads |
| my $thread = $headers{"Subject"}; |
| my $thread = encode('UTF-8', $headers{"Subject"}); |
| $thread =~ s/^re: //i; # Remove Re: or re: at start |
| $thread =~ s/\s+/ /g; # collapse whitespace |
| $threads{$thread}{count} += 1; # bump count of this subject |