Subversion Repositories LCARS

Compare Revisions

Last modification

Ignore whitespace Rev 9 → Rev 8

/trunk/tools/network/news/newsstat/newsstat.pl
2,7 → 2,6
use strict;
use warnings;
use utf8;
use encoding 'utf-8';
use Encode;
 
###########################
21,10 → 20,6
 
############################################################################
# RECENT CHANGES #
# 2011-10-03 PE - Use more compatible shebang
# - Fixed some Perl::Critic-ized code
# - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
# - Formatted source code
# 2011-07-03 PE - Use Encode to decode/encode MIME encodings
# - Use warnings, utf8 (just in case)
# - Documentation update
244,8 → 239,7
printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
print "=" x 76, "\n";
$i = 0;
foreach
my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
{
my $name = substr( $poster, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
298,10 → 292,8
&centred( "Top $count responders by original text (> 5 posts)", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $poster (
sort { $data{$b}{percent} <=> $data{$a}{percent} }
keys %data
)
foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
keys %data )
{
next if $data{$poster}{quoted} == 0;
next if $data{$poster}{count} < 5;
330,10 → 322,8
&centred( "Bottom $count responders by original text (> 5 posts)", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $poster (
sort { $data{$a}{percent} <=> $data{$b}{percent} }
keys %data
)
foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
keys %data )
{
next if $data{$poster}{quoted} == 0;
next if $data{$poster}{count} < 5;
361,10 → 351,8
printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $thread (
sort { $threads{$b}{count} <=> $threads{$a}{count} }
keys %threads
)
foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
keys %threads )
{
my $name = substr( $thread, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
389,10 → 377,8
printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $thread (
sort { $threads{$b}{size} <=> $threads{$a}{size} }
keys %threads
)
foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
keys %threads )
{
my $name = substr( $thread, 0, 65 );
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
469,10 → 455,8
printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
print "=" x 76, "\n";
$i = 0;
foreach my $agent (
sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
keys %distinct_agent
)
foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
keys %distinct_agent )
{
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
$distinct_agent{$agent},
531,7 → 515,7
}
 
# now read the file
open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
open(my $FILE, $filename) or die "Can't open $filename: $!\n";
while (<$FILE>)
{
$totheader += length($_); # bump total header size
543,7 → 527,7
$lcheader{ clean( lc($key) ) } = clean($val);
}
}
@body = <$FILE>; # slurp up body
@body = <$FILE>; # slurp up body
close($FILE);
} # getarticle
 
555,15 → 539,15
#### First, analyse header fields ####
 
# Set up this poster if not defined, get counts, sizes
my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name
$poster = encode( 'UTF-8', $headers{From} ); # get the poster's name
if ( !defined( $data{$poster} ) )
{ # seen this one before?
$data{$poster}{agent} = 'Unknown'; # comes after For: field
{ # seen this one before?
$data{$poster}{agent} = 'Unknown'; # comes after For: field
$data{$poster}{orig} = 0;
$data{$poster}{quoted} = 0;
}
$data{$poster}{count}++; # bump count for this poster
$data{$poster}{size} += $filesize; # total size of file
$data{$poster}{count}++; # bump count for this poster
$data{$poster}{size} += $filesize; # total size of file
 
# The User-Agent and/or X-Newsreader fields
# for User-Agent by poster
712,7 → 696,7
# Get the time zone
$_ = $headers{"Date"};
my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
{
$tz = "UTC";
}
733,7 → 717,7
elsif ( /^\s*[>\]]/ or /^\s*::/ )
{ # are we in a quote line?
$data{$poster}{quoted} += length($_); # bump count of quoted chrs
$totquoted += length($_);
$totquoted += length($_);
}
elsif (/-- /)
{
744,7 → 728,7
 
# we must be processing an original line
$data{$poster}{orig} += length($_); # bump count of original chrs
$totorig += length($_);
$totorig += length($_);
}
} # end for (@body)
 
781,8 → 765,7
my $percent = 100;
if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
{
$percent =
$data{$poster}{orig} * 100 /
$percent = $data{$poster}{orig} * 100 /
( $data{$poster}{quoted} + $data{$poster}{orig} ); #/
}
elsif ( $data{$poster}{orig} == 0 )
871,8 → 854,7
"----------------------------------------------------------------------------\n";
foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
{
print $OUTF
"$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
}
print $OUTF
"============================================================================\n";