Subversion Repositories LCARS

Compare Revisions

Last modification

Ignore whitespace Rev 8 → Rev 9

/trunk/tools/network/news/newsstat/newsstat.pl
2,6 → 2,7
use strict;
use warnings;
use utf8;
use encoding 'utf-8';
use Encode;
 
###########################
20,6 → 21,10
 
############################################################################
# RECENT CHANGES #
# 2011-10-03 PE - Use more compatible shebang
# - Fixed some Perl::Critic-ized code
# - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
# - Formatted source code
# 2011-07-03 PE - Use Encode to decode/encode MIME encodings
# - Use warnings, utf8 (just in case)
# - Documentation update
239,7 → 244,8
printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
foreach
my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
{
my $name = substr( $poster, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
292,8 → 298,10
&centred( "Top $count responders by original text (> 5 posts)", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
keys %data )
foreach my $poster (
sort { $data{$b}{percent} <=> $data{$a}{percent} }
keys %data
)
{
next if $data{$poster}{quoted} == 0;
next if $data{$poster}{count} < 5;
322,8 → 330,10
&centred( "Bottom $count responders by original text (> 5 posts)", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
keys %data )
foreach my $poster (
sort { $data{$a}{percent} <=> $data{$b}{percent} }
keys %data
)
{
next if $data{$poster}{quoted} == 0;
next if $data{$poster}{count} < 5;
351,8 → 361,10
printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
keys %threads )
foreach my $thread (
sort { $threads{$b}{count} <=> $threads{$a}{count} }
keys %threads
)
{
my $name = substr( $thread, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
377,8 → 389,10
printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
keys %threads )
foreach my $thread (
sort { $threads{$b}{size} <=> $threads{$a}{size} }
keys %threads
)
{
my $name = substr( $thread, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
455,8 → 469,10
printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
keys %distinct_agent )
foreach my $agent (
sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
keys %distinct_agent
)
{
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
$distinct_agent{$agent},
515,7 → 531,7
}
 
# now read the file
open(my $FILE, $filename) or die "Can't open $filename: $!\n";
open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
while (<$FILE>)
{
$totheader += length($_); # bump total header size
527,7 → 543,7
$lcheader{ clean( lc($key) ) } = clean($val);
}
}
@body = <$FILE>; # slurp up body
@body = <$FILE>; # slurp up body
close($FILE);
} # getarticle
 
539,15 → 555,15
#### First, analyse header fields ####
 
# Set up this poster if not defined, get counts, sizes
$poster = encode( 'UTF-8', $headers{From} ); # get the poster's name
my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name
if ( !defined( $data{$poster} ) )
{ # seen this one before?
$data{$poster}{agent} = 'Unknown'; # comes after For: field
{ # seen this one before?
$data{$poster}{agent} = 'Unknown'; # comes after For: field
$data{$poster}{orig} = 0;
$data{$poster}{quoted} = 0;
}
$data{$poster}{count}++; # bump count for this poster
$data{$poster}{size} += $filesize; # total size of file
$data{$poster}{count}++; # bump count for this poster
$data{$poster}{size} += $filesize; # total size of file
 
# The User-Agent and/or X-Newsreader fields
# for User-Agent by poster
696,7 → 712,7
# Get the time zone
$_ = $headers{"Date"};
my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
{
$tz = "UTC";
}
717,7 → 733,7
elsif ( /^\s*[>\]]/ or /^\s*::/ )
{ # are we in a quote line?
$data{$poster}{quoted} += length($_); # bump count of quoted chrs
$totquoted += length($_);
$totquoted += length($_);
}
elsif (/-- /)
{
728,7 → 744,7
 
# we must be processing an original line
$data{$poster}{orig} += length($_); # bump count of original chrs
$totorig += length($_);
$totorig += length($_);
}
} # end for (@body)
 
765,7 → 781,8
my $percent = 100;
if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
{
$percent = $data{$poster}{orig} * 100 /
$percent =
$data{$poster}{orig} * 100 /
( $data{$poster}{quoted} + $data{$poster}{orig} ); #/
}
elsif ( $data{$poster}{orig} == 0 )
854,7 → 871,8
"----------------------------------------------------------------------------\n";
foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
{
print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
print $OUTF
"$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
}
print $OUTF
"============================================================================\n";