| 2,7 → 2,6 |
| use strict; |
| use warnings; |
| use utf8; |
| use encoding 'utf-8'; |
| use Encode; |
| |
| ########################### |
| 21,10 → 20,6 |
| |
| ############################################################################ |
| # RECENT CHANGES # |
| # 2011-10-03 PE - Use more compatible shebang |
| # - Fixed some Perl::Critic-ized code |
| # - Fixed wrong indent for non-ASCII names (TODO: proper dot count) |
| # - Formatted source code |
| # 2011-07-03 PE - Use Encode to decode/encode MIME encodings |
| # - Use warnings, utf8 (just in case) |
| # - Documentation update |
| 244,8 → 239,7 |
| printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach |
| my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| { |
| my $name = substr( $poster, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 298,10 → 292,8 |
| ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $poster ( |
| sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| keys %data |
| ) |
| foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| keys %data ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| 330,10 → 322,8 |
| ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $poster ( |
| sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| keys %data |
| ) |
| foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| keys %data ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| 361,10 → 351,8 |
| printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{count} <=> $threads{$a}{count} } |
| keys %threads |
| ) |
| foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
| keys %threads ) |
| { |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 389,10 → 377,8 |
| printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{size} <=> $threads{$a}{size} } |
| keys %threads |
| ) |
| foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
| keys %threads ) |
| { |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 469,10 → 455,8 |
| printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $agent ( |
| sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| keys %distinct_agent |
| ) |
| foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| keys %distinct_agent ) |
| { |
| printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| $distinct_agent{$agent}, |
| 531,7 → 515,7 |
| } |
| |
| # now read the file |
| open( my $FILE, $filename ) or die "Can't open $filename: $!\n"; |
| open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
| while (<$FILE>) |
| { |
| $totheader += length($_); # bump total header size |
| 543,7 → 527,7 |
| $lcheader{ clean( lc($key) ) } = clean($val); |
| } |
| } |
| @body = <$FILE>; # slurp up body |
| @body = <$FILE>; # slurp up body |
| close($FILE); |
| } # getarticle |
| |
| 555,15 → 539,15 |
| #### First, analyse header fields #### |
| |
| # Set up this poster if not defined, get counts, sizes |
| my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
| $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
| if ( !defined( $data{$poster} ) ) |
| { # seen this one before? |
| $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| { # seen this one before? |
| $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| $data{$poster}{orig} = 0; |
| $data{$poster}{quoted} = 0; |
| } |
| $data{$poster}{count}++; # bump count for this poster |
| $data{$poster}{size} += $filesize; # total size of file |
| $data{$poster}{count}++; # bump count for this poster |
| $data{$poster}{size} += $filesize; # total size of file |
| |
| # The User-Agent and/or X-Newsreader fields |
| # for User-Agent by poster |
| 712,7 → 696,7 |
| # Get the time zone |
| $_ = $headers{"Date"}; |
| my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
| if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| { |
| $tz = "UTC"; |
| } |
| 733,7 → 717,7 |
| elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
| { # are we in a quote line? |
| $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
| $totquoted += length($_); |
| $totquoted += length($_); |
| } |
| elsif (/-- /) |
| { |
| 744,7 → 728,7 |
| |
| # we must be processing an original line |
| $data{$poster}{orig} += length($_); # bump count of original chrs |
| $totorig += length($_); |
| $totorig += length($_); |
| } |
| } # end for (@body) |
| |
| 781,8 → 765,7 |
| my $percent = 100; |
| if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
| { |
| $percent = |
| $data{$poster}{orig} * 100 / |
| $percent = $data{$poster}{orig} * 100 / |
| ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
| } |
| elsif ( $data{$poster}{orig} == 0 ) |
| 871,8 → 854,7 |
| "----------------------------------------------------------------------------\n"; |
| foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| { |
| print $OUTF |
| "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |