1,4 → 1,4 |
#!/usr/bin/perl |
#!/usr/bin/perl -w |
use strict; |
use warnings; |
use utf8; |
75,20 → 75,15 |
my %skipSec; |
my @skiplist; |
my $args = @ARGV; |
for ( $ix = 1 ; $ix < $args ; $ix++ ) |
{ |
for ( $ix = 1 ; $ix < $args ; $ix++ ) { |
$j = $ix + 1; |
if ( $ARGV[$ix] eq "-x" ) |
{ |
if ( $ARGV[$ix] eq "-x" ) { |
@skiplist = split( ",", $ARGV[$j] ); |
} |
elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
{ |
} elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) { |
@skiplist = split( ",", $1 ); |
} |
} |
foreach (@skiplist) |
{ |
foreach(@skiplist) { |
$skipSec{$_} = 1; |
} |
|
113,6 → 108,8 |
# no. of time zones to show |
my $toptz = 10; |
|
|
|
###################### DATA STRUCTURES ###################### |
my $group = $newsgroup_name; |
$group =~ s!\.!/!g; |
141,8 → 138,7 |
my $i; # general purpose |
my %distinct_agent; |
my %agents = # used to hold counts of User Agents used |
( |
"KNode" => 0, |
( "KNode" => 0, |
"Pan" => 0, |
"Mozilla" => 0, |
"Sylpheed" => 0, |
162,8 → 158,7 |
"News Rover" => 0, |
"WebTV" => 0, |
"Compuserver" => 0, |
"VSoup" => 0 |
); |
"VSoup" => 0); |
|
######################## MAIN CODE ######################## |
$! = 1; |
170,8 → 165,7 |
|
chdir("$news$group") or die "Can't cd to $news$group: $!\n"; |
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n"; |
while ( defined( $filename = readdir(DIR) ) ) |
{ |
while (defined($filename = readdir(DIR))) { |
%lcheader = (); |
next unless -f $filename; # only want real files |
next if ( $filename eq ".overview" ); # real articles only |
183,7 → 177,6 |
$totalposts++; # bump count of articles considered |
} |
closedir(DIR); # finished with the directory |
|
# post-processing |
&countagents; # count agents, collapsing versions |
&fixpercent; # check percentages orig/total for posters |
194,32 → 187,29 |
print "=" x 76, "\n"; |
printf "%s\n", ¢red( "Analysis of posts to $newsgroup_name", 76 ); |
print "=" x 76, "\n"; |
printf "%s\n", |
¢red( "(stats compiled with a script by Garry Knight et al.)", 76 ); |
printf "%s\n", ¢red("(stats compiled with a script by Garry Knight et al.)", 76); |
print "\n\n"; |
printf "Total posts considered: %s over %d days\n", commify($totalposts), |
$numdays; |
printf "Total posts considered: %s over %d days\n", |
commify($totalposts), $numdays; |
printf "Earliest article: %s\n", scalar localtime($earliest); |
printf "Latest article: %s\n", scalar localtime($latest); |
printf "Original articles: %s, replies: %s\n", commify($origposts), |
commify($replies); |
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies); |
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize), |
commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n", |
commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
commify( int( $totsize / $totalposts ) ); |
commify(int($totalposts / $numdays)), |
$totsize / $numdays / 1048576, commify(int($totsize / $totalposts)); |
my $count = keys %data; |
printf "Total headers: %s KB bodies: %s KB\n", |
commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
printf "Body text - quoted: %s KB, original: %s KB = %02.2f%%, sigs: %s KB\n", |
commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
( $totorig * 100 ) / ( $totorig + $totquoted ), |
commify( int( $totsig / 1024 ) ); |
printf "Total number of posters: %s, average %s bytes per poster\n", |
commify($count), commify( int( $totsize / $count ) ); #/ |
($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024)); |
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count), |
commify(int($totsize / $count)); #/ |
$count = keys %threads; |
printf "Total number of threads: %s, average %s bytes per thread\n", |
commify($count), commify( int( $totsize / $count ) ); #/ |
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count), |
commify(int($totsize / $count)); #/ |
printf "Total number of User-Agents: %d\n", scalar keys %agents; |
print "\n", "=" x 76, "\n"; |
|
226,24 → 216,18 |
############################### |
# show posters by article count Sec 1; |
############################### |
unless ( $skipSec{1} ) |
{ |
if ( keys %data < $topposters ) |
{ |
unless ( $skipSec{1} ) { |
if (keys %data < $topposters) { |
$count = keys %data; |
} |
else |
{ |
} else { |
$count = $topposters; |
} |
printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
{ |
foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) { |
my $name = substr( $poster, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
$data{$poster}{count}; |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
252,24 → 236,18 |
################################ |
# show posters by size in Kbytes Sec 2; |
################################ |
unless ( $skipSec{2} ) |
{ |
if ( keys %data < $topposters ) |
{ |
unless ( $skipSec{2} ) { |
if (keys %data < $topposters) { |
$count = keys %data; |
} |
else |
{ |
} else { |
$count = $topposters; |
} |
printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
{ |
foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) { |
my $name = substr( $poster, 0, 62 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
$data{$poster}{size} / 1024; #/ |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/ |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
278,28 → 256,20 |
#################################### |
# show top posters for original text |
#################################### |
unless ( $skipSec{3} ) |
{ |
if ( keys %data < $topposters ) |
{ |
unless ( $skipSec{3} ) { |
if (keys %data < $topposters) { |
$count = keys %data; |
} |
else |
{ |
} else { |
$count = $topposters; |
} |
printf "%s\n", |
¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
printf "%s\n", ¢red("Top $count responders by original text (> 5 posts)", 76); |
print "=" x 76, "\n"; |
$i = 0; |
foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
keys %data ) |
{ |
foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) { |
next if $data{$poster}{quoted} == 0; |
next if $data{$poster}{count} < 5; |
my $name = substr( $poster, 0, 63 ); |
printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
$data{$poster}{percent}; |
printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
308,28 → 278,20 |
####################################### |
# show bottom posters for original text |
####################################### |
unless ( $skipSec{4} ) |
{ |
if ( keys %data < $topposters ) |
{ |
unless ( $skipSec{4} ) { |
if (keys %data < $topposters) { |
$count = keys %data; |
} |
else |
{ |
} else { |
$count = $topposters; |
} |
printf "%s\n", |
¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
printf "%s\n", ¢red("Bottom $count responders by original text (> 5 posts)", 76); |
print "=" x 76, "\n"; |
$i = 0; |
foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
keys %data ) |
{ |
foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) { |
next if $data{$poster}{quoted} == 0; |
next if $data{$poster}{count} < 5; |
my $name = substr( $poster, 0, 63 ); |
printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
$data{$poster}{percent}; |
printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
338,25 → 300,18 |
#################################### |
# show threads by number of articles |
#################################### |
unless ( $skipSec{5} ) |
{ |
if ( keys %threads < $topthreads ) |
{ |
unless ( $skipSec{5} ) { |
if (keys %threads < $topthreads) { |
$count = keys %threads; |
} |
else |
{ |
} else { |
$count = $topthreads; |
} |
printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
keys %threads ) |
{ |
foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) { |
my $name = substr( $thread, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
$threads{$thread}{count}; |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
364,25 → 319,18 |
################################ |
# show threads by size in Kbytes |
################################ |
unless ( $skipSec{6} ) |
{ |
if ( keys %threads < $topthreads ) |
{ |
unless ( $skipSec{6} ) { |
if (keys %threads < $topthreads) { |
$count = keys %threads; |
} |
else |
{ |
} else { |
$count = $topthreads; |
} |
printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
keys %threads ) |
{ |
foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) { |
my $name = substr( $thread, 0, 65 ); |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
$threads{$thread}{size} / 1024; #/ |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/ |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
391,25 → 339,18 |
################################# |
# show top 10 cross-posted groups |
################################# |
unless ( $skipSec{7} ) |
{ |
unless ( $skipSec{7} ) { |
delete $crossposts{"$newsgroup_name"}; # don't include ours |
if ( keys %crossposts < $topcrossposts ) |
{ |
if (keys %crossposts < $topcrossposts) { |
$count = keys %crossposts; |
} |
else |
{ |
} else { |
$count = $topcrossposts; |
} |
printf "%s\n", ¢red( "Top $count cross-posted groups", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach |
my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
{ |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
$crossposts{$name}; |
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) { |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
417,23 → 358,17 |
####################### |
#show agents and counts |
####################### |
unless ( $skipSec{8} ) |
{ |
if ( keys %agents < $topagents ) |
{ |
unless ( $skipSec{8} ) { |
if (keys %agents < $topagents) { |
$count = keys %agents; |
} |
else |
{ |
} else { |
$count = $topagents; |
} |
printf "%s\n", ¢red( "Top $count User Agents by poster", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
{ |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
$agents{$agent}; |
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) { |
printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent}; |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
442,25 → 377,17 |
####################### |
#show distinct agents |
####################### |
unless ( $skipSec{9} ) |
{ |
if ( keys %distinct_agent < $topagents ) |
{ |
unless ( $skipSec{9} ) { |
if (keys %distinct_agent < $topagents) { |
$count = keys %distinct_agent; |
} |
else |
{ |
} else { |
$count = $topagents; |
} |
printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
keys %distinct_agent ) |
{ |
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
$distinct_agent{$agent}, |
( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) { |
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100); |
last if ( ++$i == $count ); |
} |
print "\n", "=" x 76, "\n"; |
469,21 → 396,16 |
########################## |
#show timezones and counts |
########################## |
unless ( $skipSec{10} ) |
{ |
if ( keys %tz < $toptz ) |
{ |
unless ( $skipSec{10} ) { |
if (keys %tz < $toptz) { |
$count = keys %tz; |
} |
else |
{ |
} else { |
$count = $toptz; |
} |
printf "%s\n", ¢red( "Top 10 time zones", 76 ); |
print "=" x 76, "\n"; |
$i = 0; |
foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
{ |
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) { |
printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
last if ( ++$i == $count ); |
} |
490,38 → 412,33 |
print "\n", "=" x 76, "\n"; |
} |
|
|
################################ SUBROUTINES ################################ |
|
|
####################################### |
# get current article's header and body |
####################################### |
sub getarticle |
{ |
sub getarticle { |
%headers = (); # dump old headers |
my $filename = shift; # get the name of the file |
|
# get stats about the file itself |
$filesize = -s $filename; # get total size of file |
$totsize += $filesize; # bump total sizes of all files |
|
my $mtime = ( stat $filename )[9]; |
if ( $mtime < $earliest ) |
{ |
if ( $mtime < $earliest ) { |
$earliest = $mtime; |
} |
elsif ( $mtime > $latest ) |
{ |
} elsif ( $mtime > $latest ) { |
$latest = $mtime; |
} |
|
# now read the file |
open( FILE, $filename ) or die "Can't open $filename: $!\n"; |
while (<FILE>) |
{ |
while (<FILE>) { |
$totheader += length($_); # bump total header size |
last if (/^\s*$/); # end of header? |
if (/^([^:\s]*):\s+(.*)/) |
{ |
if (/^([^:\s]*):\s+(.*)/) { |
my ( $key, $val ) = ( $1, $2 ); |
$headers{$key} = decode( 'MIME-Header', $val ); |
$lcheader{ clean( lc($key) ) } = clean($val); |
534,14 → 451,12 |
################################### |
# get data from the current article |
################################### |
sub getdata |
{ |
sub getdata { |
#### First, analyse header fields #### |
|
# Set up this poster if not defined, get counts, sizes |
$poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
if ( !defined( $data{$poster} ) ) |
{ # seen this one before? |
if (!defined($data{$poster})) { # seen this one before? |
$data{$poster}{agent} = 'Unknown'; # comes after For: field |
$data{$poster}{orig} = 0; |
$data{$poster}{quoted} = 0; |
551,12 → 466,10 |
|
# The User-Agent and/or X-Newsreader fields |
# for User-Agent by poster |
if ( defined $lcheader{"user-agent"} ) |
{ |
if (defined $lcheader{"user-agent"}) { |
$data{$poster}{agent} = $lcheader{"user-agent"}; |
} |
if ( defined $lcheader{"x-newsreader"} ) |
{ |
if (defined $lcheader{"x-newsreader"}) { |
$data{$poster}{agent} = $lcheader{"x-newsreader"}; |
} |
|
576,11 → 489,8 |
{ |
$UA = $lcheader{'x-mailer'}; |
} |
elsif ( |
( defined $lcheader{'organization'} ) |
&& ( $lcheader{'organization'} =~ |
/groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
) |
elsif ((defined $lcheader{'organization'}) && |
($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/)) |
{ |
$UA = $lcheader{'organization'}; |
} |
590,17 → 500,17 |
} ## Hopefully found UA, else set to unknown |
} |
|
|
$UA = clean($UA); |
$UA = get_agent($UA); |
|
sub get_agent |
{ |
|
sub get_agent { |
my $raw = shift; |
my $agent = $raw; |
|
## strip http |
if ( $raw =~ /.*http.*/ ) |
{ |
if ( $raw =~ /.*http.*/ ) { |
$raw =~ s!posted via!!i; |
$raw =~ s!http://!!g; |
$raw =~ s!/!!g; |
611,16 → 521,16 |
if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; } |
|
## Pick out the popular agents |
if ( $raw =~ /(outlook express)/i |
|| $raw =~ /(microplanet gravity)/i |
|| $raw =~ /(news rover)/i |
|| $raw =~ /(forte agent)/i |
|| $raw =~ /(forte free agent)/i ) |
if ( $raw =~ /(outlook express)/i || |
$raw =~ /(microplanet gravity)/i || |
$raw =~ /(news rover)/i || |
$raw =~ /(forte agent)/i || |
$raw =~ /(forte free agent)/i |
) |
{ |
$agent = $1; |
} |
elsif ( |
$raw =~ /^( |
elsif ( $raw =~ /^( |
pan |
|sylpheed |
|slrn |
648,8 → 558,7 |
|007 |
|webtv |
|compuserve |
)/ix |
) |
)/ix ) |
{ |
$agent = $1; |
} |
656,8 → 565,7 |
else |
{ |
## Clean up unknown agents |
if ( $raw =~ m!^(.*?)/! ) |
{ |
if ( $raw =~ m!^(.*?)/! ) { |
$agent = $1; |
} |
elsif ( $raw =~ /^(\w*)\d.*/ ) |
670,9 → 578,9 |
return $agent; |
} |
|
|
# Get all cross-posted newsgroups |
for ( split /,/, $headers{"Newsgroups"} ) |
{ |
for (split /,/, $headers{"Newsgroups"}) { |
$crossposts{$_}++; # bump count for each |
} |
|
684,12 → 592,9 |
$threads{$thread}{size} += $filesize; # bump bytes for this thread |
|
# Is this an original post or a reply? |
if ( defined $headers{"References"} ) |
{ |
if (defined $headers{"References"}) { |
$replies++; |
} |
else |
{ |
} else { |
$origposts++; |
} |
|
696,8 → 601,7 |
# Get the time zone |
$_ = $headers{"Date"}; |
my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
{ |
if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) { |
$tz = "UTC"; |
} |
$tz{$tz}++; |
704,28 → 608,18 |
|
#### Now analyse the body text #### |
my $insig = 0; |
for (@body) |
{ |
for (@body) { |
$totbody += length($_); # bump total body size |
next if (/^$>/); # don't count blank lines in body |
if ( $insig == 1 ) |
{ |
if ($insig == 1) { |
$totsig += length($_); # bump total sig size |
|
# Bill Unruh uses ] quotes, and another poster uses :: |
} |
elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
{ # are we in a quote line? |
} elsif (/^\s*[>\]]/ or /^\s*::/) { # are we in a quote line? |
$data{$poster}{quoted} += length($_); # bump count of quoted chrs |
$totquoted += length($_); |
} |
elsif (/-- /) |
{ |
} elsif (/-- /) { |
$insig = 1; |
} |
else |
{ |
|
} else { |
# we must be processing an original line |
$data{$poster}{orig} += length($_); # bump count of original chrs |
$totorig += length($_); |
738,15 → 632,11 |
# Count the User-Agents used, collapsing |
# different versions into one per agent. |
######################################## |
sub countagents |
{ |
sub countagents { |
POSTER: |
foreach $poster ( keys %data ) |
{ |
foreach my $agent_name ( keys %distinct_agent ) |
{ # check against known ones |
if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
{ |
foreach $poster (keys %data) { |
foreach my $agent_name (keys %distinct_agent) { # check against known ones |
if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) { |
$agents{$agent_name}++; |
next POSTER; |
} |
758,18 → 648,12 |
############################################ |
# set orig/total percentages for all posters |
############################################ |
sub fixpercent |
{ |
foreach $poster ( keys %data ) |
{ |
sub fixpercent { |
foreach $poster (keys %data) { |
my $percent = 100; |
if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
{ |
$percent = $data{$poster}{orig} * 100 / |
( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
} |
elsif ( $data{$poster}{orig} == 0 ) |
{ |
if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) { |
$percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/ |
} elsif ($data{$poster}{orig} == 0) { |
$percent = 0; |
} |
$data{$poster}{percent} = $percent; |
779,13 → 663,10 |
############################## |
# right pad a string with '.'s |
############################## |
sub rpad |
{ |
|
sub rpad { |
# get text to pad, length to pad, pad chr |
my ( $text, $pad_len, $pad_chr ) = @_; |
if ( length($text) > $pad_len ) |
{ |
if (length($text) > $pad_len) { |
$text = substr( $text, 0, $pad_len ); |
} |
my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
795,8 → 676,7 |
################# |
# centre a string |
################# |
sub centred |
{ |
sub centred { |
my ( $text, $width ) = @_; # text to centre, size of field to centre in |
my $pad_len = ( $width - length($text) ) / 2; #/ |
my $centred = " " x $pad_len . $text; |
806,8 → 686,7 |
########################## |
# put commas into a number |
########################## |
sub commify |
{ |
sub commify { |
$_ = shift; |
1 while s/^(-?\d+)(\d{3})/$1,$2/; |
return $_; |
816,8 → 695,7 |
######################### |
# clean |
######################### |
sub clean |
{ |
sub clean { |
my $dirty = shift; |
my $clean = $dirty; |
$clean =~ s/^\s*//; |
826,9 → 704,9 |
return $clean; |
} |
|
sub usage |
{ |
|
sub usage { |
|
print "usage: newstat.pl newsgroupname\n"; |
exit 1; |
} |
836,51 → 714,36 |
################################### |
# Write data structures to a file # |
################################### |
sub writedata |
{ |
sub writedata { |
open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
print OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
print OUTF |
"Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
foreach my $name ( keys %data ) |
{ |
print OUTF |
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
foreach my $name (keys %data) { |
print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
} |
print OUTF |
"============================================================================\n"; |
print OUTF "============================================================================\n"; |
print OUTF "Thread subjects\n"; |
print OUTF |
"----------------------------------------------------------------------------\n"; |
foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
{ |
print OUTF "----------------------------------------------------------------------------\n"; |
foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) { |
print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
} |
print OUTF |
"============================================================================\n"; |
print OUTF "============================================================================\n"; |
print OUTF "Cross-posts\n"; |
print OUTF |
"----------------------------------------------------------------------------\n"; |
foreach my $name ( sort keys %crossposts ) |
{ |
print OUTF "----------------------------------------------------------------------------\n"; |
foreach my $name (sort keys %crossposts) { |
print OUTF "$name : $crossposts{$name}\n"; |
} |
print OUTF print OUTF |
"============================================================================\n"; |
print OUTF |
print OUTF "============================================================================\n"; |
print OUTF "User agents\n"; |
print OUTF |
"----------------------------------------------------------------------------\n"; |
foreach my $name ( sort keys %agents ) |
{ |
print OUTF "----------------------------------------------------------------------------\n"; |
foreach my $name (sort keys %agents) { |
print OUTF "$name : $agents{$name}\n"; |
} |
print OUTF |
"============================================================================\n"; |
print OUTF "============================================================================\n"; |
print OUTF "Time zones\n"; |
print OUTF |
"----------------------------------------------------------------------------\n"; |
foreach my $name ( sort keys %tz ) |
{ |
print OUTF "----------------------------------------------------------------------------\n"; |
foreach my $name (sort keys %tz) { |
print OUTF "$name : $tz{$name}\n"; |
} |
close OUTF; |