WebSVN - LCARS - Diff - Rev 6 and 7 - /trunk/tools/network/news/newsstat/newsstat.pl

-#!/usr/bin/perl -w
+#!/usr/bin/perl
 use strict;
 use warnings;
 use utf8;
 use Encode;
 #########################
 # newsstat.pl version 0.4
 ############################################################################
 # Collect statistics about a newsgroup (specified by first argument) in
 # the local news spool. Check all articles in the last 30-day period.
 # Rank posters by number of posts and by volume of posts, report on top and
 # bottom 20 posters. Show their name, number of posts, size of posts,
 # percentage of quoted lines. Rank user-agents used, by poster rather than
 # by post. Rank top 20 threads. Rank top 10 cross-posted groups.
 #
 # (Numbers and paths can be configured below.  -- PE)
 ############################################################################
 ############################################################################
 #                       RECENT CHANGES                                     #
 # 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
 #                 - Use warnings, utf8 (just in case)
 #                 - Documentation update
 # N/A         NN  - Take newsgroup name as argument
 # 2004-06-19  NN  - newsgroup name is $ARGV[0]
 #                 - Allow command line flags for subtracting
 #                   output if not pertinent for a group
 # 2002-11-05  NN  - moved user configurations to the top
 #                 - fixed the cross-posting section
 #                 - introduced the $newsgroup_name variable which
 #                   later becomes $news$group
 #                 - changed $name to $agent_name in countagents()
 #
 # Contributors
 # -------------
 # NN  Nomen nominandum (name to be determined later)
 # PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
 my $ix;
 my $j;
 my %skipSec;
 my @skiplist;
 my $args = @ARGV;
-for ( $ix = 1 ; $ix < $args ; $ix++ ) {
+for ( $ix = 1 ; $ix < $args ; $ix++ )
+{
-       $j = $ix + 1;
+  $j = $ix + 1;
-       if ( $ARGV[$ix] eq "-x" ) {
+  if ( $ARGV[$ix] eq "-x" )
+  {
-               @skiplist = split(",",$ARGV[$j]);
+    @skiplist = split( ",", $ARGV[$j] );
+  }
-       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
+  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
+  {
-               @skiplist = split(",",$1);
+    @skiplist = split( ",", $1 );
-       }
+  }
+}
-foreach(@skiplist) {
+foreach (@skiplist)
+{
   $skipSec{$_} = 1;
+}
 # Leafnode users will want /var/spool/news for this variable.
 my $news = "/var/spool/news/";
 my $topcrossposts = 10;
 # no. of time zones to show
 my $toptz = 10;
 ###################### DATA STRUCTURES ######################
 my $group = $newsgroup_name;
 $group =~ s!\.!/!g;
-my %data;                               # name, count, agent, total, orig, quoted
+my %data;          # name, count, agent, total, orig, quoted
-my %threads;                            # subject, count
+my %threads;       # subject, count
-my %crossposts;                         # group, count
+my %crossposts;    # group, count
-my %tz;                                 # timezones by count
+my %tz;            # timezones by count
-my %headers;                            # holds header of current article
+my %headers;       # holds header of current article
-my %lcheader;                           # holds lowercase headers
+my %lcheader;      # holds lowercase headers
-my @body;                               # holds body of current article
+my @body;          # holds body of current article
-my @sig;                                # holds sig text;
+my @sig;           # holds sig text;
-my $totalposts;                         # total no. of posts considered
+my $totalposts;    # total no. of posts considered
-my $filename;                           # name of current article file
+my $filename;      # name of current article file
-my $filesize;                           # size of current article file
+my $filesize;      # size of current article file
-my $earliest;                           # earliest article we have found
+my $earliest;      # earliest article we have found
-my $latest;                             # latest article we have found
+my $latest;        # latest article we have found
-my $poster;                             # poster we are dealing with
+my $poster;        # poster we are dealing with
-my $totsize = 0;                        # holds total sizes of all files
+my $totsize   = 0; # holds total sizes of all files
-my $totheader = 0;                      # total size of header material
+my $totheader = 0; # total size of header material
-my $totbody = 0;                        # total size of body material
+my $totbody   = 0; # total size of body material
-my $totsig = 0;                         # total size of sig material
+my $totsig    = 0; # total size of sig material
-my $totorig = 0;                        # total size of original material
+my $totorig   = 0; # total size of original material
-my $totquoted = 0;                      # total size of quoted material
+my $totquoted = 0; # total size of quoted material
-my $origposts = 0;                      # total no. of original posts
+my $origposts = 0; # total no. of original posts
-my $replies = 0;                        # total no. of replies
+my $replies   = 0; # total no. of replies
-my $i;                                  # general purpose
+my $i;             # general purpose
 my %distinct_agent;
-my %agents =                            # used to hold counts of User Agents used
+my %agents =       # used to hold counts of User Agents used
+  (
-            (  "KNode"                         => 0,
+  "KNode"                     => 0,
-               "Pan"                           => 0,
+  "Pan"                       => 0,
-               "Mozilla"                       => 0,
+  "Mozilla"                   => 0,
-               "Sylpheed"                      => 0,
+  "Sylpheed"                  => 0,
-               "Gnus"                          => 0,
+  "Gnus"                      => 0,
-               "Forte Agent"                   => 0,
+  "Forte Agent"               => 0,
-               "Forte Free Agent"              => 0,
+  "Forte Free Agent"          => 0,
-               "MicroPlanet Gravity"           => 0,
+  "MicroPlanet Gravity"       => 0,
-               "Microsoft Outlook Express"     => 0,
+  "Microsoft Outlook Express" => 0,
-               "Xnews"                         => 0,
+  "Xnews"                     => 0,
-               "slrn"                          => 0,
+  "slrn"                      => 0,
-               "tin"                           => 0,
+  "tin"                       => 0,
-               "rn"                            => 0,
+  "rn"                        => 0,
-               "NN"                            => 0,
+  "NN"                        => 0,
-               "MacSOUP"                       => 0,
+  "MacSOUP"                   => 0,
-               "Foorum"                        => 0,
+  "Foorum"                    => 0,
-               "MT-NewsWatcher"                => 0,
+  "MT-NewsWatcher"            => 0,
-               "News Rover"                    => 0,
+  "News Rover"                => 0,
-               "WebTV"                         => 0,
+  "WebTV"                     => 0,
-               "Compuserver"                   => 0,
+  "Compuserver"               => 0,
-               "VSoup"                         => 0);
+  "VSoup"                     => 0
+  );
 ######################## MAIN CODE ########################
 $! = 1;
 chdir("$news$group") or die "Can't cd to $news$group: $!\n";
-opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
+opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
-while (defined($filename = readdir(DIR))) {
+while ( defined( $filename = readdir(DIR) ) )
+{
- %lcheader = ();
+  %lcheader = ();
- next unless -f $filename;             # only want real files
+  next unless -f $filename;    # only want real files
- next if ($filename eq ".overview");   # real articles only
+  next if ( $filename eq ".overview" );    # real articles only
- next if (-M $filename > $numdays);    # only want articles <= a certain age
+  next if ( -M $filename > $numdays );     # only want articles <= a certain age
- $earliest = (stat $filename)[9] unless defined ($earliest);
+  $earliest = ( stat $filename )[9] unless defined($earliest);
- $latest   = (stat $filename)[9] unless defined ($latest);
+  $latest   = ( stat $filename )[9] unless defined($latest);
- &getarticle($filename);               # read in the article
+  &getarticle($filename);                  # read in the article
- &getdata;                             # grab the data from the article
+  &getdata;                                # grab the data from the article
- $totalposts++;                        # bump count of articles considered
+  $totalposts++;                           # bump count of articles considered
+}
-closedir(DIR);                          # finished with the directory
+closedir(DIR);                             # finished with the directory
 # post-processing
-&countagents;                           # count agents, collapsing versions
+&countagents;    # count agents, collapsing versions
-&fixpercent;                            # check percentages orig/total for posters
+&fixpercent;     # check percentages orig/total for posters
 &writedata;
 #################### DISPLAY RESULTS #####################
 print "=" x 76, "\n";
-printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
+printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
 print "=" x 76, "\n";
+printf "%s\n",
-printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
+  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
 print "\n\n";
-printf "Total posts considered: %s over %d days\n",
+printf "Total posts considered: %s over %d days\n", commify($totalposts),
-       commify($totalposts), $numdays;
+  $numdays;
-printf "Earliest article: %s\n", scalar localtime($earliest);
+printf "Earliest article: %s\n",               scalar localtime($earliest);
-printf "Latest article:   %s\n", scalar localtime($latest);
+printf "Latest article:   %s\n",               scalar localtime($latest);
-printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
+printf "Original articles: %s, replies: %s\n", commify($origposts),
+  commify($replies);
 printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
-       commify(int($totsize / 1024)), $totsize / 1048576; #
+  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
 printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
-       commify(int($totalposts / $numdays)),
+  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
-       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
+  commify( int( $totsize / $totalposts ) );
 my $count = keys %data;
 printf "Total headers: %s KB  bodies: %s KB\n",
-       commify(int($totheader / 1024)), commify(int($totbody / 1024));
+  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
 printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
-       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
+  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
-       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
+  ( $totorig * 100 ) / ( $totorig + $totquoted ),
+  commify( int( $totsig / 1024 ) );
-printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
+printf "Total number of posters: %s, average %s bytes per poster\n",
-       commify(int($totsize / $count)); #/
+  commify($count), commify( int( $totsize / $count ) );     #/
 $count = keys %threads;
-printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
+printf "Total number of threads: %s, average %s bytes per thread\n",
-       commify(int($totsize / $count)); #/
+  commify($count), commify( int( $totsize / $count ) );     #/
 printf "Total number of User-Agents: %d\n", scalar keys %agents;
 print "\n", "=" x 76, "\n";
 ###############################
 # show posters by article count  Sec 1;
 ###############################
-unless ( $skipSec{1} ) {
+unless ( $skipSec{1} )
+{
-    if (keys %data < $topposters) {
+  if ( keys %data < $topposters )
+  {
-      $count = keys %data;
+    $count = keys %data;
+  }
-    } else {
+  else
+  {
-      $count = $topposters;
+    $count = $topposters;
-    }
+  }
-    printf "%s\n", &centred("Top $count posters by number of articles", 76);
+  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
-    print "=" x 76, "\n";
+  print "=" x 76, "\n";
-    $i = 0;
+  $i = 0;
-    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
+  foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
+  {
-    my $name = substr($poster, 0, 65);
+    my $name = substr( $poster, 0, 65 );
-    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
+      $data{$poster}{count};
-    last if (++$i == $count);
+    last if ( ++$i == $count );
- }
+  }
- print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 ################################
 # show posters by size in Kbytes Sec 2;
 ################################
-unless ( $skipSec{2} ) {
+unless ( $skipSec{2} )
+{
-  if (keys %data < $topposters) {
+  if ( keys %data < $topposters )
+  {
     $count = keys %data;
+  }
-  } else {
+  else
+  {
     $count = $topposters;
+  }
-  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
+  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
   print "=" x 76, "\n";
   $i = 0;
-  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
+  foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
+  {
-    my $name = substr($poster, 0, 62);
+    my $name = substr( $poster, 0, 62 );
-    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
+      $data{$poster}{size} / 1024;    #/
-    last if (++$i == $count);
+    last if ( ++$i == $count );
+  }
   print "\n", "=" x 76, "\n";
+}
 ####################################
 # show top posters for original text
 ####################################
-unless ( $skipSec{3} ) {
+unless ( $skipSec{3} )
+{
-   if (keys %data < $topposters) {
+  if ( keys %data < $topposters )
+  {
-     $count = keys %data;
+    $count = keys %data;
+  }
-   } else {
+  else
+  {
-     $count = $topposters;
+    $count = $topposters;
-   }
+  }
+  printf "%s\n",
-   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
+    &centred( "Top $count responders by original text (> 5 posts)", 76 );
-   print "=" x 76, "\n";
+  print "=" x 76, "\n";
-   $i = 0;
+  $i = 0;
-   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
+  foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
+    keys %data )
+  {
-     next if $data{$poster}{quoted} == 0;
+    next if $data{$poster}{quoted} == 0;
-     next if $data{$poster}{count} < 5;
+    next if $data{$poster}{count} < 5;
-     my $name = substr($poster, 0, 63);
+    my $name = substr( $poster, 0, 63 );
-     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
+    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
+      $data{$poster}{percent};
-     last if (++$i == $count);
+    last if ( ++$i == $count );
-   }
+  }
-   print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 #######################################
 # show bottom posters for original text
 #######################################
-unless ( $skipSec{4} ) {
+unless ( $skipSec{4} )
+{
-  if (keys %data < $topposters) {
+  if ( keys %data < $topposters )
+  {
     $count = keys %data;
+  }
-  } else {
+  else
+  {
     $count = $topposters;
+  }
+  printf "%s\n",
-  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
+    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
   print "=" x 76, "\n";
   $i = 0;
-  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
+  foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
+    keys %data )
+  {
     next if $data{$poster}{quoted} == 0;
     next if $data{$poster}{count} < 5;
-    my $name = substr($poster, 0, 63);
+    my $name = substr( $poster, 0, 63 );
-    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
+    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
+      $data{$poster}{percent};
-    last if (++$i == $count);
+    last if ( ++$i == $count );
+  }
   print "\n", "=" x 76, "\n";
+}
 ####################################
 # show threads by number of articles
 ####################################
-unless ( $skipSec{5} ) {
+unless ( $skipSec{5} )
+{
-  if (keys %threads < $topthreads) {
+  if ( keys %threads < $topthreads )
+  {
     $count = keys %threads;
+  }
-  } else {
+  else
+  {
     $count = $topthreads;
+  }
-  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
+  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
   print "=" x 76, "\n";
   $i = 0;
-  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
+  foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
+    keys %threads )
+  {
-    my $name = substr($thread, 0, 65);
+    my $name = substr( $thread, 0, 65 );
-    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
+      $threads{$thread}{count};
-    last if (++$i == $count);
+    last if ( ++$i == $count );
+  }
   print "\n", "=" x 76, "\n";
+}
 ################################
 # show threads by size in Kbytes
 ################################
-unless ( $skipSec{6} ) {
+unless ( $skipSec{6} )
+{
-  if (keys %threads < $topthreads) {
+  if ( keys %threads < $topthreads )
+  {
     $count = keys %threads;
+  }
-  } else {
+  else
+  {
     $count = $topthreads;
+  }
-  printf "%s\n", &centred("Top $count threads by size in KB", 76);
+  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
   print "=" x 76, "\n";
   $i = 0;
-  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
+  foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
+    keys %threads )
+  {
-    my $name = substr($thread, 0, 65);
+    my $name = substr( $thread, 0, 65 );
-    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
+      $threads{$thread}{size} / 1024;    #/
-    last if (++$i == $count);
+    last if ( ++$i == $count );
+  }
   print "\n", "=" x 76, "\n";
+}
 #################################
 # show top 10 cross-posted groups
 #################################
-unless ( $skipSec{7} ) {
+unless ( $skipSec{7} )
+{
-delete $crossposts{"$newsgroup_name"};  # don't include ours
+  delete $crossposts{"$newsgroup_name"};    # don't include ours
-if (keys %crossposts < $topcrossposts) {
+  if ( keys %crossposts < $topcrossposts )
+  {
- $count = keys %crossposts;
+    $count = keys %crossposts;
+  }
-} else {
+  else
+  {
- $count = $topcrossposts;
+    $count = $topcrossposts;
-}
+  }
-printf "%s\n", &centred("Top $count cross-posted groups", 76);
+  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
-print "=" x 76, "\n";
+  print "=" x 76, "\n";
-$i = 0;
+  $i = 0;
+  foreach
-foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
+    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
+  {
- printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
+      $crossposts{$name};
- last if (++$i == $count);
+    last if ( ++$i == $count );
-}
+  }
-print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 #######################
 #show agents and counts
 #######################
-unless ( $skipSec{8} ) {
+unless ( $skipSec{8} )
+{
-if (keys %agents < $topagents) {
+  if ( keys %agents < $topagents )
+  {
- $count = keys %agents;
+    $count = keys %agents;
+  }
-} else {
+  else
+  {
- $count = $topagents;
+    $count = $topagents;
-}
+  }
-printf "%s\n", &centred("Top $count User Agents by poster", 76);
+  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
-print "=" x 76, "\n";
+  print "=" x 76, "\n";
-$i = 0;
+  $i = 0;
-foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
+  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
+  {
- printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
+      $agents{$agent};
- last if (++$i == $count);
+    last if ( ++$i == $count );
-}
+  }
-print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 #######################
 #show distinct agents
 #######################
-unless ( $skipSec{9} ) {
+unless ( $skipSec{9} )
+{
-if (keys %distinct_agent < $topagents) {
+  if ( keys %distinct_agent < $topagents )
+  {
- $count = keys %distinct_agent;
+    $count = keys %distinct_agent;
+  }
-} else {
+  else
+  {
- $count = $topagents;
+    $count = $topagents;
-}
+  }
-printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
+  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
-print "=" x 76, "\n";
+  print "=" x 76, "\n";
-$i = 0;
+  $i = 0;
-foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
+  foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
+    keys %distinct_agent )
+  {
-printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
+    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
+      $distinct_agent{$agent},
+      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
- last if (++$i == $count);
+    last if ( ++$i == $count );
-}
+  }
-print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 ##########################
 #show timezones and counts
 ##########################
-unless ( $skipSec{10} ) {
+unless ( $skipSec{10} )
+{
-if (keys %tz < $toptz) {
+  if ( keys %tz < $toptz )
+  {
- $count = keys %tz;
+    $count = keys %tz;
+  }
-} else {
+  else
+  {
- $count = $toptz;
+    $count = $toptz;
-}
+  }
-printf "%s\n", &centred("Top 10 time zones", 76);
+  printf "%s\n", &centred( "Top 10 time zones", 76 );
-print "=" x 76, "\n";
+  print "=" x 76, "\n";
-$i = 0;
+  $i = 0;
-foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
+  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
+  {
- printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
+    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
- last if (++$i == $count);
+    last if ( ++$i == $count );
-}
+  }
-print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n";
+}
 ################################ SUBROUTINES ################################
 #######################################
 # get current article's header and body
 #######################################
-sub getarticle {
+sub getarticle
+{
- %headers = ();                        # dump old headers
+  %headers = ();    # dump old headers
- my $filename = shift;                 # get the name of the file
+  my $filename = shift;    # get the name of the file
-# get stats about the file itself
+  # get stats about the file itself
- $filesize = -s $filename;             # get total size of file
+  $filesize = -s $filename;    # get total size of file
- $totsize += $filesize;                # bump total sizes of all files
+  $totsize += $filesize;       # bump total sizes of all files
- my $mtime = (stat $filename)[9];
+  my $mtime = ( stat $filename )[9];
- if ( $mtime < $earliest ) {
+  if ( $mtime < $earliest )
+  {
     $earliest = $mtime;
+  }
- } elsif ( $mtime > $latest ) {
+  elsif ( $mtime > $latest )
+  {
     $latest = $mtime;
- }
+  }
-# now read the file
+  # now read the file
- open(FILE, $filename) or die "Can't open $filename: $!\n";
+  open( FILE, $filename ) or die "Can't open $filename: $!\n";
- while (<FILE>) {
+  while (<FILE>)
+  {
-   $totheader += length($_);           # bump total header size
+    $totheader += length($_);    # bump total header size
-   last if (/^\s*$/);                  # end of header?
+    last if (/^\s*$/);           # end of header?
-   if (/^([^:\s]*):\s+(.*)/) {
+    if (/^([^:\s]*):\s+(.*)/)
+    {
-     my($key,$val) = ($1,$2);
+      my ( $key, $val ) = ( $1, $2 );
-     $headers{$key} = decode('MIME-Header', $val);
+      $headers{$key} = decode( 'MIME-Header', $val );
-     $lcheader{clean(lc($key))} = clean($val);
+      $lcheader{ clean( lc($key) ) } = clean($val);
-  }
+    }
- }
+  }
- @body = <FILE>;                       # slurp up body
+  @body = <FILE>;                # slurp up body
- close(FILE);
+  close(FILE);
-} # getarticle
+}    # getarticle
 ###################################
 # get data from the current article
 ###################################
-sub getdata {
+sub getdata
+{
 #### First, analyse header fields ####
-# Set up this poster if not defined, get counts, sizes
+  # Set up this poster if not defined, get counts, sizes
- $poster = encode('UTF-8', $headers{From});             # get the poster's name
+  $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
+  if ( !defined( $data{$poster} ) )
- if (!defined($data{$poster})) {       # seen this one before?
+  {                                               # seen this one before?
-   $data{$poster}{agent} = 'Unknown';  # comes after For: field
+    $data{$poster}{agent}  = 'Unknown';           # comes after For: field
-   $data{$poster}{orig} = 0;
+    $data{$poster}{orig}   = 0;
-   $data{$poster}{quoted} = 0;
+    $data{$poster}{quoted} = 0;
- }
+  }
- $data{$poster}{count}++;      # bump count for this poster
+  $data{$poster}{count}++;                        # bump count for this poster
- $data{$poster}{size} += $filesize;    # total size of file
+  $data{$poster}{size} += $filesize;              # total size of file
-# The User-Agent and/or X-Newsreader fields
+  # The User-Agent and/or X-Newsreader fields
-# for User-Agent by poster
+  # for User-Agent by poster
- if (defined $lcheader{"user-agent"}) {
+  if ( defined $lcheader{"user-agent"} )
+  {
-   $data{$poster}{agent} = $lcheader{"user-agent"};
+    $data{$poster}{agent} = $lcheader{"user-agent"};
- }
+  }
- if (defined $lcheader{"x-newsreader"}) {
+  if ( defined $lcheader{"x-newsreader"} )
+  {
-   $data{$poster}{agent} = $lcheader{"x-newsreader"};
+    $data{$poster}{agent} = $lcheader{"x-newsreader"};
- }
+  }
-# The User Agent for User-Agent by number of posts
+  # The User Agent for User-Agent by number of posts
- my $UA = "unknown";
+  my $UA = "unknown";
- foreach my $keys ( keys %lcheader )
+  foreach my $keys ( keys %lcheader )
- {
+  {
-   if (defined $lcheader{'user-agent'})
+    if ( defined $lcheader{'user-agent'} )
-   {
+    {
-           $UA = $lcheader{'user-agent'};
+      $UA = $lcheader{'user-agent'};
-   }
+    }
-    elsif (defined $lcheader{"x-newsreader"})
+    elsif ( defined $lcheader{"x-newsreader"} )
-   {
+    {
-           $UA = $lcheader{"x-newsreader"};
+      $UA = $lcheader{"x-newsreader"};
-   }
+    }
-   elsif (defined $lcheader{'x-mailer'})
+    elsif ( defined $lcheader{'x-mailer'} )
-   {
+    {
-           $UA = $lcheader{'x-mailer'};
+      $UA = $lcheader{'x-mailer'};
-   }
+    }
+    elsif (
-   elsif ((defined $lcheader{'organization'}) &&
+      ( defined $lcheader{'organization'} )
+      && ( $lcheader{'organization'} =~
-         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
+        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
+      )
-   {
+    {
-           $UA = $lcheader{'organization'};
+      $UA = $lcheader{'organization'};
-   }
+    }
-   elsif ( $lcheader{'message-id'} =~ /pine/i )
+    elsif ( $lcheader{'message-id'} =~ /pine/i )
-   {
+    {
-           $UA = "Pine";
+      $UA = "Pine";
-   } ## Hopefully found UA, else set to unknown
+    }    ## Hopefully found UA, else set to unknown
- }
+  }
-$UA = clean($UA);
+  $UA = clean($UA);
-$UA = get_agent($UA);
+  $UA = get_agent($UA);
-sub get_agent {
+  sub get_agent
+  {
- my $raw = shift;
+    my $raw   = shift;
- my $agent = $raw;
+    my $agent = $raw;
- ## strip http
+    ## strip http
- if ( $raw =~ /.*http.*/ ) {
+    if ( $raw =~ /.*http.*/ )
+    {
-   $raw =~ s!posted via!!i;
+      $raw =~ s!posted via!!i;
-   $raw =~ s!http://!!g;
+      $raw =~ s!http://!!g;
-   $raw =~ s!/!!g;
+      $raw =~ s!/!!g;
-   $raw =~ s! !!g;
+      $raw =~ s! !!g;
- }
+    }
- ## Fix Outlook from Mac
+    ## Fix Outlook from Mac
- if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
+    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
- ## Pick out the popular agents
+    ## Pick out the popular agents
- if ( $raw =~ /(outlook express)/i     ||
+    if ( $raw =~ /(outlook express)/i
-      $raw =~ /(microplanet gravity)/i ||
+      || $raw =~ /(microplanet gravity)/i
-      $raw =~ /(news rover)/i          ||
+      || $raw =~ /(news rover)/i
-      $raw =~ /(forte agent)/i         ||
+      || $raw =~ /(forte agent)/i
-      $raw =~ /(forte free agent)/i
+      || $raw =~ /(forte free agent)/i )
-    )
+    {
- {
-       $agent = $1;
+      $agent = $1;
- }
+    }
+    elsif (
- elsif ( $raw =~ /^(
+      $raw =~ /^(
         pan
        |sylpheed
        |slrn
        |mozilla
        |knode
        |nn
        |rn
        |007
        |webtv
        |compuserve
-       )/ix )
+       )/ix
+      )
- {
+    {
-       $agent = $1;
+      $agent = $1;
- }
+    }
- else
+    else
- {
+    {
- ## Clean up unknown agents
+      ## Clean up unknown agents
-       if ( $raw =~ m!^(.*?)/! ) {
+      if ( $raw =~ m!^(.*?)/! )
+      {
-             $agent = $1;
+        $agent = $1;
-       }
+      }
-       elsif ( $raw =~ /^(\w*)\d.*/ )
+      elsif ( $raw =~ /^(\w*)\d.*/ )
-       {
+      {
-            $agent = $1;
+        $agent = $1;
-       }
+      }
-  }
+    }
-$distinct_agent{$agent}++;
+    $distinct_agent{$agent}++;
-return $agent;
+    return $agent;
-}
+  }
+  # Get all cross-posted newsgroups
+  for ( split /,/, $headers{"Newsgroups"} )
+  {
+    $crossposts{$_}++;    # bump count for each
+  }
-# Get all cross-posted newsgroups
- for (split /,/, $headers{"Newsgroups"}) {
-   $crossposts{$_}++;          # bump count for each
- }
-# Get threads
+  # Get threads
- my $thread = encode('UTF-8', $headers{"Subject"});
+  my $thread = encode( 'UTF-8', $headers{"Subject"} );
- $thread =~ s/^re: //i;                # Remove Re: or re: at start
+  $thread =~ s/^re: //i;    # Remove Re: or re: at start
- $thread =~ s/\s+/ /g;                 # collapse whitespace
+  $thread =~ s/\s+/ /g;     # collapse whitespace
- $threads{$thread}{count} += 1;        # bump count of this subject
+  $threads{$thread}{count} += 1;            # bump count of this subject
- $threads{$thread}{size} += $filesize; # bump bytes for this thread
+  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
-# Is this an original post or a reply?
+  # Is this an original post or a reply?
- if (defined $headers{"References"}) {
+  if ( defined $headers{"References"} )
+  {
-   $replies++;
+    $replies++;
+  }
- } else {
+  else
+  {
-   $origposts++;
+    $origposts++;
- }
+  }
-# Get the time zone
+  # Get the time zone
- $_ = $headers{"Date"};
+  $_ = $headers{"Date"};
- my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
+  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
- if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
+  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
+  {
-   $tz = "UTC";
+    $tz = "UTC";
- }
+  }
- $tz{$tz}++;
+  $tz{$tz}++;
 #### Now analyse the body text ####
- my $insig = 0;
+  my $insig = 0;
- for (@body) {
+  for (@body)
+  {
-   $totbody += length($_);             # bump total body size
+    $totbody += length($_);    # bump total body size
-   next if (/^$>/);                    # don't count blank lines in body
+    next if (/^$>/);           # don't count blank lines in body
-   if ($insig == 1) {
+    if ( $insig == 1 )
-     $totsig += length($_);            # bump total sig size
-# Bill Unruh uses ] quotes, and another poster uses ::
-   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
-     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
-     $totquoted += length($_);
-   } elsif (/-- /) {
-     $insig = 1;
-   } else {
+    {
-# we must be processing an original line
-     $data{$poster}{orig} += length($_); # bump count of original chrs
+      $totsig += length($_);    # bump total sig size
-     $totorig += length($_);
-   }
- } # end for (@body)
+      # Bill Unruh uses ] quotes, and another poster uses ::
+    }
+    elsif ( /^\s*[>\]]/ or /^\s*::/ )
+    {                           # are we in a quote line?
+      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
+      $totquoted             += length($_);
+    }
+    elsif (/-- /)
+    {
+      $insig = 1;
+    }
+    else
+    {
+      # we must be processing an original line
+      $data{$poster}{orig} += length($_);      # bump count of original chrs
+      $totorig             += length($_);
+    }
+  }    # end for (@body)
-} # getdata
+}    # getdata
 ########################################
 # Count the User-Agents used, collapsing
 # different versions into one per agent.
 ########################################
-sub countagents {
+sub countagents
+{
 POSTER:
- foreach $poster (keys %data) {
+  foreach $poster ( keys %data )
+  {
-   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
+    foreach my $agent_name ( keys %distinct_agent )
+    {    # check against known ones
-     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
+      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
+      {
-       $agents{$agent_name}++;
+        $agents{$agent_name}++;
-       next POSTER;
+        next POSTER;
-     }
+      }
-   }
+    }
-   $agents{$data{$poster}{agent}}++;
+    $agents{ $data{$poster}{agent} }++;
- }
+  }
-} # countagents
+}    # countagents
 ############################################
 # set orig/total percentages for all posters
 ############################################
-sub fixpercent {
+sub fixpercent
+{
- foreach $poster (keys %data) {
+  foreach $poster ( keys %data )
+  {
-   my $percent = 100;
+    my $percent = 100;
-   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
+    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
+    {
+      $percent = $data{$poster}{orig} * 100 /
-     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
+        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
+    }
-   } elsif ($data{$poster}{orig} == 0) {
+    elsif ( $data{$poster}{orig} == 0 )
+    {
-     $percent = 0;
+      $percent = 0;
-   }
+    }
-   $data{$poster}{percent} = $percent;
+    $data{$poster}{percent} = $percent;
- }
+  }
+}
 ##############################
 # right pad a string with '.'s
 ##############################
-sub rpad {
+sub rpad
+{
-# get text to pad, length to pad, pad chr
+  # get text to pad, length to pad, pad chr
- my ($text, $pad_len, $pad_chr) = @_;
+  my ( $text, $pad_len, $pad_chr ) = @_;
- if (length($text) > $pad_len) {
+  if ( length($text) > $pad_len )
+  {
-   $text = substr($text, 0, $pad_len);
+    $text = substr( $text, 0, $pad_len );
- }
+  }
- my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
+  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
- return $padded;
+  return $padded;
+}
 #################
 # centre a string
 #################
-sub centred {
+sub centred
+{
- my ($text, $width) = @_;              # text to centre, size of field to centre in
+  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
- my $pad_len = ($width - length($text)) / 2;   #/
+  my $pad_len = ( $width - length($text) ) / 2;    #/
- my $centred = " " x $pad_len . $text;
+  my $centred = " " x $pad_len . $text;
- return $centred;
+  return $centred;
+}
 ##########################
 # put commas into a number
 ##########################
-sub commify {
+sub commify
+{
- $_  = shift;
+  $_ = shift;
 while s/^(-?\d+)(\d{3})/$1,$2/;
- return $_;
+  return $_;
+}
 #########################
 # clean
 #########################
-sub clean {
+sub clean
+{
- my $dirty = shift;
+  my $dirty = shift;
- my $clean = $dirty;
+  my $clean = $dirty;
- $clean =~ s/^\s*//;
+  $clean =~ s/^\s*//;
- $clean =~ s/\s*$//;
+  $clean =~ s/\s*$//;
-return $clean;
+  return $clean;
+}
+sub usage
+{
-sub usage {
- print "usage: newstat.pl newsgroupname\n";
+  print "usage: newstat.pl newsgroupname\n";
- exit 1;
+  exit 1;
+}
 ###################################
 # Write data structures to a file #
 ###################################
-sub writedata {
+sub writedata
+{
- open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
+  open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
- print OUTF "Data collected from alt.os.linux.mandrake\n\n";
+  print OUTF "Data collected from alt.os.linux.mandrake\n\n";
+  print OUTF
- print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
+    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
- foreach my $name (keys %data) {
+  foreach my $name ( keys %data )
+  {
+    print OUTF
-   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
+"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
- }
+  }
+  print OUTF
- print OUTF "============================================================================\n";
+"============================================================================\n";
- print OUTF "Thread subjects\n";
+  print OUTF "Thread subjects\n";
+  print OUTF
- print OUTF "----------------------------------------------------------------------------\n";
+"----------------------------------------------------------------------------\n";
- foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
+  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
+  {
-   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
+    print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
- }
+  }
+  print OUTF
- print OUTF "============================================================================\n";
+"============================================================================\n";
- print OUTF "Cross-posts\n";
+  print OUTF "Cross-posts\n";
+  print OUTF
- print OUTF "----------------------------------------------------------------------------\n";
+"----------------------------------------------------------------------------\n";
- foreach my $name (sort keys %crossposts) {
+  foreach my $name ( sort keys %crossposts )
+  {
-   print OUTF "$name : $crossposts{$name}\n";
+    print OUTF "$name : $crossposts{$name}\n";
- }
+  }
- print OUTF
+  print OUTF print OUTF
- print OUTF "============================================================================\n";
+"============================================================================\n";
- print OUTF "User agents\n";
+  print OUTF "User agents\n";
+  print OUTF
- print OUTF "----------------------------------------------------------------------------\n";
+"----------------------------------------------------------------------------\n";
- foreach my $name (sort keys %agents) {
+  foreach my $name ( sort keys %agents )
+  {
-   print OUTF "$name : $agents{$name}\n";
+    print OUTF "$name : $agents{$name}\n";
- }
+  }
+  print OUTF
- print OUTF "============================================================================\n";
+"============================================================================\n";
- print OUTF "Time zones\n";
+  print OUTF "Time zones\n";
+  print OUTF
- print OUTF "----------------------------------------------------------------------------\n";
+"----------------------------------------------------------------------------\n";
- foreach my $name (sort keys %tz) {
+  foreach my $name ( sort keys %tz )
+  {
-   print OUTF "$name : $tz{$name}\n";
+    print OUTF "$name : $tz{$name}\n";
- }
+  }
- close OUTF;
+  close OUTF;
-} # writedata
+}    # writedata

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 8 - Rev 6 → 7