2,6 → 2,7 |
use strict; |
use warnings; |
use utf8; |
use encoding 'utf-8'; |
use Encode; |
|
########################### |
20,6 → 21,10 |
|
############################################################################ |
# RECENT CHANGES # |
# 2011-10-03 PE - Use more compatible shebang |
# - Fixed some Perl::Critic-ized code |
# - Fixed wrong indent for non-ASCII names (TODO: proper dot count) |
# - Formatted source code |
# 2011-07-03 PE - Use Encode to decode/encode MIME encodings |
# - Use warnings, utf8 (just in case) |
# - Documentation update |
239,7 → 244,8 |
printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
foreach |
my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
{ |
my $name = substr( $poster, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
292,8 → 298,10 |
¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
keys %data ) |
foreach my $poster ( |
sort { $data{$b}{percent} <=> $data{$a}{percent} } |
keys %data |
) |
{ |
next if $data{$poster}{quoted} == 0; |
next if $data{$poster}{count} < 5; |
322,8 → 330,10 |
¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
keys %data ) |
foreach my $poster ( |
sort { $data{$a}{percent} <=> $data{$b}{percent} } |
keys %data |
) |
{ |
next if $data{$poster}{quoted} == 0; |
next if $data{$poster}{count} < 5; |
351,8 → 361,10 |
printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
keys %threads ) |
foreach my $thread ( |
sort { $threads{$b}{count} <=> $threads{$a}{count} } |
keys %threads |
) |
{ |
my $name = substr( $thread, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
377,8 → 389,10 |
printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
keys %threads ) |
foreach my $thread ( |
sort { $threads{$b}{size} <=> $threads{$a}{size} } |
keys %threads |
) |
{ |
my $name = substr( $thread, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
455,8 → 469,10 |
printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
keys %distinct_agent ) |
foreach my $agent ( |
sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
keys %distinct_agent |
) |
{ |
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
$distinct_agent{$agent}, |
515,7 → 531,7 |
} |
|
# now read the file |
open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
open( my $FILE, $filename ) or die "Can't open $filename: $!\n"; |
while (<$FILE>) |
{ |
$totheader += length($_); # bump total header size |
527,7 → 543,7 |
$lcheader{ clean( lc($key) ) } = clean($val); |
} |
} |
@body = <$FILE>; # slurp up body |
@body = <$FILE>; # slurp up body |
close($FILE); |
} # getarticle |
|
539,15 → 555,15 |
#### First, analyse header fields #### |
|
# Set up this poster if not defined, get counts, sizes |
$poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
if ( !defined( $data{$poster} ) ) |
{ # seen this one before? |
$data{$poster}{agent} = 'Unknown'; # comes after For: field |
{ # seen this one before? |
$data{$poster}{agent} = 'Unknown'; # comes after For: field |
$data{$poster}{orig} = 0; |
$data{$poster}{quoted} = 0; |
} |
$data{$poster}{count}++; # bump count for this poster |
$data{$poster}{size} += $filesize; # total size of file |
$data{$poster}{count}++; # bump count for this poster |
$data{$poster}{size} += $filesize; # total size of file |
|
# The User-Agent and/or X-Newsreader fields |
# for User-Agent by poster |
696,7 → 712,7 |
# Get the time zone |
$_ = $headers{"Date"}; |
my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
{ |
$tz = "UTC"; |
} |
717,7 → 733,7 |
elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
{ # are we in a quote line? |
$data{$poster}{quoted} += length($_); # bump count of quoted chrs |
$totquoted += length($_); |
$totquoted += length($_); |
} |
elsif (/-- /) |
{ |
728,7 → 744,7 |
|
# we must be processing an original line |
$data{$poster}{orig} += length($_); # bump count of original chrs |
$totorig += length($_); |
$totorig += length($_); |
} |
} # end for (@body) |
|
765,7 → 781,8 |
my $percent = 100; |
if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
{ |
$percent = $data{$poster}{orig} * 100 / |
$percent = |
$data{$poster}{orig} * 100 / |
( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
} |
elsif ( $data{$poster}{orig} == 0 ) |
854,7 → 871,8 |
"----------------------------------------------------------------------------\n"; |
foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
{ |
print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
print $OUTF |
"$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
} |
print $OUTF |
"============================================================================\n"; |