WebSVN - LCARS - Diff - Rev 8 and 9 - /trunk/tools/network/news/newsstat/newsstat.pl

 #!/usr/bin/env perl
 use strict;
 use warnings;
 use utf8;
+use encoding 'utf-8';
 use Encode;
 ###########################
 # newsstat.pl version 0.4.1
 # (Numbers and paths can be configured below.  -- PE)
 ############################################################################
 ############################################################################
 #                       RECENT CHANGES                                     #
+# 2011-10-03  PE  - Use more compatible shebang
+#                 - Fixed some Perl::Critic-ized code
+#                 - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
+#                 - Formatted source code
 # 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
 #                 - Use warnings, utf8 (just in case)
 #                 - Documentation update
 # N/A         NN  - Take newsgroup name as argument
 # 2004-06-19  NN  - newsgroup name is $ARGV[0]
     $count = $topposters;
+  }
   printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach
-  foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
+    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
+  {
     my $name = substr( $poster, 0, 65 );
     printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
       $data{$poster}{count};
     last if ( ++$i == $count );
+  }
   printf "%s\n",
     &centred( "Top $count responders by original text (> 5 posts)", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach my $poster (
-  foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
+    sort { $data{$b}{percent} <=> $data{$a}{percent} }
-    keys %data )
+    keys %data
+    )
+  {
     next if $data{$poster}{quoted} == 0;
     next if $data{$poster}{count} < 5;
     my $name = substr( $poster, 0, 63 );
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
+  }
   printf "%s\n",
     &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach my $poster (
-  foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
+    sort { $data{$a}{percent} <=> $data{$b}{percent} }
-    keys %data )
+    keys %data
+    )
+  {
     next if $data{$poster}{quoted} == 0;
     next if $data{$poster}{count} < 5;
     my $name = substr( $poster, 0, 63 );
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
     $count = $topthreads;
+  }
   printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach my $thread (
-  foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
+    sort { $threads{$b}{count} <=> $threads{$a}{count} }
-    keys %threads )
+    keys %threads
+    )
+  {
     my $name = substr( $thread, 0, 65 );
     printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
       $threads{$thread}{count};
     last if ( ++$i == $count );
     $count = $topthreads;
+  }
   printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach my $thread (
-  foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
+    sort { $threads{$b}{size} <=> $threads{$a}{size} }
-    keys %threads )
+    keys %threads
+    )
+  {
     my $name = substr( $thread, 0, 65 );
     printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
       $threads{$thread}{size} / 1024;    #/
     last if ( ++$i == $count );
     $count = $topagents;
+  }
   printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
   print "=" x 76, "\n";
   $i = 0;
+  foreach my $agent (
-  foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
+    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
-    keys %distinct_agent )
+    keys %distinct_agent
+    )
+  {
     printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
       $distinct_agent{$agent},
       ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
     last if ( ++$i == $count );
+  {
     $latest = $mtime;
+  }
   # now read the file
-  open(my $FILE, $filename) or die "Can't open $filename: $!\n";
+  open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
   while (<$FILE>)
+  {
     $totheader += length($_);    # bump total header size
     last if (/^\s*$/);           # end of header?
     if (/^([^:\s]*):\s+(.*)/)
       my ( $key, $val ) = ( $1, $2 );
       $headers{$key} = decode( 'MIME-Header', $val );
       $lcheader{ clean( lc($key) ) } = clean($val);
+    }
+  }
-  @body = <$FILE>;                # slurp up body
+  @body = <$FILE>;               # slurp up body
   close($FILE);
 }    # getarticle
 ###################################
 # get data from the current article
 sub getdata
+{
 #### First, analyse header fields ####
   # Set up this poster if not defined, get counts, sizes
-  $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
+  my $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
   if ( !defined( $data{$poster} ) )
-  {                                               # seen this one before?
+  {                                                  # seen this one before?
-    $data{$poster}{agent}  = 'Unknown';           # comes after For: field
+    $data{$poster}{agent}  = 'Unknown';              # comes after For: field
     $data{$poster}{orig}   = 0;
     $data{$poster}{quoted} = 0;
+  }
-  $data{$poster}{count}++;                        # bump count for this poster
+  $data{$poster}{count}++;    # bump count for this poster
-  $data{$poster}{size} += $filesize;              # total size of file
+  $data{$poster}{size} += $filesize;    # total size of file
   # The User-Agent and/or X-Newsreader fields
   # for User-Agent by poster
   if ( defined $lcheader{"user-agent"} )
+  {
+  }
   # Get the time zone
   $_ = $headers{"Date"};
   my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
-  if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
+  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
+  {
     $tz = "UTC";
+  }
   $tz{$tz}++;
       # Bill Unruh uses ] quotes, and another poster uses ::
+    }
     elsif ( /^\s*[>\]]/ or /^\s*::/ )
     {                           # are we in a quote line?
       $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
-      $totquoted             += length($_);
+      $totquoted += length($_);
+    }
     elsif (/-- /)
+    {
       $insig = 1;
+    }
     else
+    {
       # we must be processing an original line
       $data{$poster}{orig} += length($_);      # bump count of original chrs
-      $totorig             += length($_);
+      $totorig += length($_);
+    }
   }    # end for (@body)
 }    # getdata
   foreach my $poster ( keys %data )
+  {
     my $percent = 100;
     if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
+    {
+      $percent =
-      $percent = $data{$poster}{orig} * 100 /
+        $data{$poster}{orig} * 100 /
         ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
+    }
     elsif ( $data{$poster}{orig} == 0 )
+    {
       $percent = 0;
   print $OUTF "Thread subjects\n";
   print $OUTF
 "----------------------------------------------------------------------------\n";
   foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
+  {
+    print $OUTF
-    print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
+      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
+  }
   print $OUTF
 "============================================================================\n";
   print $OUTF "Cross-posts\n";
   print $OUTF

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl @ 10 - Rev 8 → 9