| 1,33 → 1,39 |
| #!/usr/bin/env perl |
| use strict; |
| use warnings; |
| use diagnostics; |
| |
| #use diagnostics; |
| use utf8; |
| use Encode; |
| |
| use constant DEBUG => 0; |
| |
| ## Print out all text to STDOUT UTF-8 encoded |
| binmode STDOUT, ':encoding(UTF-8)'; |
| binmode STDERR, ':encoding(UTF-8)'; |
| |
| ############################## |
| ## newsstat.pl version 0.4.3.1 |
| # FIXME: Automatically include resolved '.' in @INC |
| # print join "\n", @INC; |
| |
| ########################################################################### |
| ## Collect statistics about a newsgroup (specified by first argument) |
| ## in the local news spool. Check all articles in the last 30-day period. |
| ## Rank posters by number of posts and by volume of posts, report on top |
| ## and bottom 20 posters. Show their name, number of posts, size of posts, |
| ## percentage of quoted lines. Rank user-agents used, by poster rather |
| ## than by post. Rank top 20 threads. Rank top 10 cross-posted groups. |
| ## |
| ## Numbers and paths can be configured below. See ChangeLog and TODO |
| ## for more. -- PE |
| ########################################################################### |
| |
| use locale ':not_characters'; |
| use Locale::TextDomain ('de.pointedears.newsstat'); |
| use POSIX ('locale_h'); |
| use Locale::Messages qw (bind_textdomain_filter |
| bind_textdomain_codeset |
| turn_utf_8_on); |
| #setlocale( LC_MESSAGES, '' ); |
| bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
| bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
| |
| require Mail::Message; |
| require DateTime; |
| require DateTime::Format::Mail; |
| |
| ###################### USER CONFIGURATIONS ############################ |
| |
| ## The name of the group to do stats for |
| my $newsgroup_name = $ARGV[0]; |
| $newsgroup_name or &usage; |
| $newsgroup_name or usage(); |
| |
| ## Check for removal flags |
| my $ix; |
| 55,21 → 61,18 |
| ## Leafnode users will want /var/spool/news for this variable. |
| my $news = "/var/spool/news/"; |
| |
| ## How many days are we doing statistics for? |
| my $numdays = 30; |
| ## Number of top or bottom posters to show |
| my $topposters = 20; |
| |
| ## Number of agents we list |
| my $topagents = 10; |
| |
| ## Number of threads we want to know about |
| my $topthreads = 20; |
| |
| ## Number of top or bottom posters to show |
| my $topposters = 20; |
| |
| ## Number of cross-posted threads to show |
| my $topcrossposts = 10; |
| |
| ## Number of agents we list |
| my $topagents = 10; |
| |
| ## Number of time zones to show |
| my $toptz = 10; |
| |
| 76,29 → 79,21 |
| ###################### DATA STRUCTURES ###################### |
| my $group = $newsgroup_name; |
| $group =~ s!\.!/!g; |
| my %data; # name, count, agent, total, orig, quoted |
| my %threads; # subject, count |
| my %crossposts; # group, count |
| my %tz; # timezones by count |
| my %headers; # holds header of current article |
| my %lcheader; # holds lowercase headers |
| my @body; # holds body of current article |
| my @sig; # holds sig text; |
| my $totalposts; # total no. of posts considered |
| my $filename; # name of current article file |
| my $filesize; # size of current article file |
| my $earliest; # earliest article we have found |
| my $latest; # latest article we have found |
| my $poster; # poster we are dealing with |
| my $totsize = 0; # holds total sizes of all files |
| my $totheader = 0; # total size of header material |
| my $totbody = 0; # total size of body material |
| my $totsig = 0; # total size of sig material |
| my $totorig = 0; # total size of original material |
| my $totquoted = 0; # total size of quoted material |
| my $origposts = 0; # total no. of original posts |
| my $replies = 0; # total no. of replies |
| my $i; # general purpose |
| my %data; # name, count, agent, total, orig, quoted |
| my $totsize = 0; # holds total sizes of all files |
| my %crossposts; # group, count |
| my %threads; # subject, count |
| my $replies = 0; # total no. of replies |
| my $origposts = 0; # total no. of original posts |
| my %tz; # timezones by count |
| my $earliest; # earliest article we have found |
| my $latest; # latest article we have found |
| my $totheader = 0; # total size of header material |
| my $totbody = 0; # total size of body material |
| my $totsig = 0; # total size of sig material |
| my $totquoted = 0; # total size of quoted material |
| my $totorig = 0; # total size of original material |
| my $totalposts; # total no. of posts considered |
| my %distinct_agent; |
| |
| ## Used to hold counts of User Agents used |
| 124,491 → 119,273 |
| "tin" => 0, |
| "VSoup" => 0, |
| "WebTV" => 0, |
| "Xnews" => 0 |
| "Xnews" => 0, |
| ); |
| |
| ######################## MAIN CODE ######################## |
| $! = 1; |
| my $datetime_parser = DateTime::Format::Mail->new(); |
| $datetime_parser->loose(); |
| |
| chdir("$news$group") or die "Can't cd to $news$group: $!\n"; |
| opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n"; |
| while ( defined( $filename = readdir(DIR) ) ) |
| my $today = DateTime->today( time_zone => 'UTC' ); |
| my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
| my $start = int $prev_month->strftime('%s'); |
| my $numdays = int DateTime->last_day_of_month( |
| year => $prev_month->year(), |
| month => $prev_month->month(), |
| time_zone => $prev_month->time_zone(), |
| )->day(); |
| my $end = int $today->clone()->set_day(1)->strftime('%s'); |
| |
| dmsg( $start, " to ", $end ) if DEBUG; |
| |
| chdir("$news$group") |
| or die __x( |
| "Can't cd to {newsgroup}: {error}\n", |
| newsgroup => "$news$group", |
| error => $! |
| ); |
| opendir( DIR, "." ) |
| or die __x( |
| "Can't open {newsgroup}: {error}\n", |
| newsgroup => "$news$group", |
| error => $! |
| ); |
| |
| while ( defined( my $filename = readdir(DIR) ) ) |
| { |
| %lcheader = (); |
| next unless -f $filename; # only want real files |
| next if ( $filename eq ".overview" ); # real articles only |
| next if ( -M $filename > $numdays ); # only want articles <= a certain age |
| $earliest = ( stat $filename )[9] unless defined($earliest); |
| $latest = ( stat $filename )[9] unless defined($latest); |
| &get_article($filename); # read in the article |
| &get_data; # grab the data from the article |
| $totalposts++; # bump count of articles considered |
| next unless -f $filename; # only want real files |
| next if ( $filename eq ".overview" ); # real articles only |
| |
| get_article($filename); # read in the article |
| } |
| closedir(DIR); # finished with the directory |
| closedir(DIR); # finished with the directory |
| |
| dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
| |
| ## Post-processing |
| &count_agents; # count agents, collapsing versions |
| &fix_percent; # check percentages orig/total for posters |
| count_agents(); # count agents, collapsing versions |
| fix_percent(); |
| |
| &write_data; |
| write_data(); |
| display_results(); |
| |
| #################### DISPLAY RESULTS ##################### |
| print "=" x 76, "\n"; |
| printf "%s\n", ¢red( "Analysis of posts to $newsgroup_name", 76 ); |
| print "=" x 76, "\n"; |
| printf "%s\n", |
| ¢red( "(stats compiled with a script by Garry Knight et al.)", 76 ); |
| print "\n\n"; |
| printf "Total posts considered: %s over %d days\n", commify($totalposts), |
| $numdays; |
| printf "Earliest article: %s\n", scalar localtime($earliest); |
| printf "Latest article: %s\n", scalar localtime($latest); |
| printf "Original articles: %s, replies: %s\n", commify($origposts), |
| commify($replies); |
| printf "Total size of posts: %s bytes (%s KiB) (%.2f MiB)\n", commify($totsize), |
| commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
| printf "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
| commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
| commify( int( $totsize / $totalposts ) ); |
| my $count = keys %data; |
| printf "Total headers: %s KiB bodies: %s KiB\n", |
| commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
| printf "Body text - quoted: %s KiB, original: %s KiB = %02.2f%%, sigs: %s KiB\n", |
| commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
| ( $totorig * 100 ) / ( $totorig + $totquoted ), |
| commify( int( $totsig / 1024 ) ); |
| printf "Total number of posters: %s, average %s bytes per poster\n", |
| commify($count), commify( int( $totsize / $count ) ); #/ |
| $count = keys %threads; |
| printf "Total number of threads: %s, average %s bytes per thread\n", |
| commify($count), commify( int( $totsize / $count ) ); #/ |
| printf "Total number of user agents: %d\n", scalar keys %agents; |
| print "\n", "=" x 76, "\n"; |
| |
| ######################################## |
| ## Show posters by article count Sec 1; |
| ## Get current article's header and body |
| ######################################## |
| unless ( $skipSec{1} ) |
| sub get_article |
| { |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach |
| my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| { |
| my $name = substr( $poster, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{count}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| my $filename = shift; |
| |
| ###################################### |
| ## Show posters by size in KiB Sec 2; |
| ###################################### |
| unless ( $skipSec{2} ) |
| { |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", ¢red( "Top $count posters by article size in KiB", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
| { |
| my $name = substr( $poster, 0, 62 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{size} / 1024; #/ |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| open( my $FILE, '<', $filename ) |
| or |
| die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
| my $msg = Mail::Message->read($FILE); |
| my $timestamp = $msg->timestamp(); |
| my $date = $msg->study('Date'); |
| |
| ##################################### |
| ## Show top posters for original text |
| ##################################### |
| unless ( $skipSec{3} ) |
| { |
| if ( keys %data < $topposters ) |
| ## Disregard article if timestamp is not in range |
| dmsg($timestamp) if DEBUG; |
| if ( $timestamp < $start or $timestamp >= $end ) |
| { |
| $count = keys %data; |
| dmsg("Posting on $date ignored.") if DEBUG; |
| return; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $poster ( |
| sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| keys %data |
| ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| my $name = substr( $poster, 0, 63 ); |
| printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{percent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ######################################## |
| ## Show bottom posters for original text |
| ######################################## |
| unless ( $skipSec{4} ) |
| { |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $poster ( |
| sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| keys %data |
| ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| my $name = substr( $poster, 0, 63 ); |
| printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{percent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| $totalposts++; # bump count of articles considered |
| |
| ##################################### |
| ## Show threads by number of articles |
| ##################################### |
| unless ( $skipSec{5} ) |
| { |
| if ( keys %threads < $topthreads ) |
| { |
| $count = keys %threads; |
| } |
| else |
| { |
| $count = $topthreads; |
| } |
| printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{count} <=> $threads{$a}{count} } |
| keys %threads |
| ) |
| { |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $threads{$thread}{count}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| ## DEBUG |
| dmsg($date) if DEBUG; |
| |
| ############################## |
| ## Show threads by size in KiB |
| ############################## |
| unless ( $skipSec{6} ) |
| { |
| if ( keys %threads < $topthreads ) |
| { |
| $count = keys %threads; |
| } |
| else |
| { |
| $count = $topthreads; |
| } |
| printf "%s\n", ¢red( "Top $count threads by size in KiB", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{size} <=> $threads{$a}{size} } |
| keys %threads |
| ) |
| { |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $threads{$thread}{size} / 1024; #/ |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| ## get stats about the file itself |
| my $filesize = -s $filename; # get total size of file |
| $totsize += $filesize; # bump total sizes of all files |
| |
| ################################## |
| ## Show top 10 cross-posted groups |
| ################################## |
| unless ( $skipSec{7} ) |
| { |
| delete $crossposts{"$newsgroup_name"}; # don't include ours |
| if ( keys %crossposts < $topcrossposts ) |
| if ( ( not defined $earliest ) or $timestamp < $earliest ) |
| { |
| $count = keys %crossposts; |
| $earliest = $timestamp; |
| } |
| else |
| elsif ( ( not defined $latest ) or $timestamp > $latest ) |
| { |
| $count = $topcrossposts; |
| $latest = $timestamp; |
| } |
| printf "%s\n", ¢red( "Top $count cross-posted groups", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach |
| my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $crossposts{$name}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ######################### |
| ## Show agents and counts |
| ######################### |
| unless ( $skipSec{8} ) |
| { |
| if ( keys %agents < $topagents ) |
| { |
| $count = keys %agents; |
| } |
| else |
| { |
| $count = $topagents; |
| } |
| printf "%s\n", ¢red( "Top $count User Agents by poster", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
| $agents{$agent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| #print "timestamp: $timestamp\n"; |
| |
| ####################### |
| ## Show distinct agents |
| ####################### |
| unless ( $skipSec{9} ) |
| { |
| if ( keys %distinct_agent < $topagents ) |
| { |
| $count = keys %distinct_agent; |
| } |
| else |
| { |
| $count = $topagents; |
| } |
| printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $agent ( |
| sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| keys %distinct_agent |
| ) |
| { |
| printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| $distinct_agent{$agent}, |
| ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| ## count header size |
| $totheader += $msg->head()->size(); |
| |
| ############################ |
| ## Show timezones and counts |
| ############################ |
| unless ( $skipSec{10} ) |
| { |
| if ( keys %tz < $toptz ) |
| ## get the poster's name (MIME-decoded, in UTF-8) |
| my $poster = $msg->study('From'); |
| if ( defined $poster ) |
| { |
| $count = keys %tz; |
| } |
| else |
| { |
| $count = $toptz; |
| } |
| printf "%s\n", ¢red( "Top 10 time zones", 76 ); |
| print "=" x 76, "\n"; |
| $i = 0; |
| foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| ## Convert old to new format |
| $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
| |
| ################################ SUBROUTINES ################################ |
| ## Collapse whitespace |
| $poster =~ s/\s+/ /g; |
| |
| ######################################## |
| ## Get current article's header and body |
| ######################################## |
| sub get_article |
| { |
| %headers = (); # dump old headers |
| my $filename = shift; # get the name of the file |
| ## Remove outer quotes; TODO: observe RFC 5322 strictly |
| $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
| |
| ## get stats about the file itself |
| $filesize = -s $filename; # get total size of file |
| $totsize += $filesize; # bump total sizes of all files |
| ## DEBUG |
| dmsg($poster) if DEBUG; |
| |
| my $mtime = ( stat $filename )[9]; |
| if ( $mtime < $earliest ) |
| { |
| $earliest = $mtime; |
| } |
| elsif ( $mtime > $latest ) |
| { |
| $latest = $mtime; |
| } |
| ## seen this one before? |
| if ( !defined( $data{$poster} ) ) |
| { |
| $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
| $data{$poster}{'orig'} = 0; |
| $data{$poster}{'quoted'} = 0; |
| } |
| $data{$poster}{'count'}++; # bump count for this poster |
| $data{$poster}{'size'} += $filesize; # total size of file |
| |
| ## now read the file |
| open( my $FILE, '<', $filename ) or die "Can't open $filename: $!\n"; |
| while (<$FILE>) |
| { |
| $totheader += length($_); # bump total header size |
| last if (/^\s*$/); # end of header? |
| if (/^([^:\s]*):\s*(.*)/) |
| ## The User-Agent and/or X-Newsreader fields |
| ## for User-Agent by poster |
| my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
| if ( defined $ua ) |
| { |
| my ( $key, $val ) = ( $1, $2 ); |
| $headers{$key} = decode( 'MIME-Header', $val ); |
| $lcheader{ clean( lc($key) ) } = clean($val); |
| $data{$poster}{'agent'} = $ua; |
| |
| ## DEBUG |
| dmsg($ua) if DEBUG; |
| } |
| } |
| @body = <$FILE>; # slurp up body |
| close($FILE); |
| } # get_article |
| |
| #################################### |
| ## Get data from the current article |
| #################################### |
| sub get_data |
| { |
| #### First, analyse header fields #### |
| ## The User Agent for User-Agent by number of posts |
| get_agent($msg); |
| |
| ## Set up this poster if not defined, get counts, sizes |
| my $poster = $headers{From}; # get the poster's name |
| |
| # Convert old to new format |
| $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
| |
| # Collapse whitespace |
| $poster =~ s/\s+/ /; |
| |
| # Remove outer quotes |
| $poster =~ s/^["'](.+?)["']\s+(.*)/$1 $2/; |
| |
| if ( !defined( $data{$poster} ) ) |
| { # seen this one before? |
| $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| $data{$poster}{orig} = 0; |
| $data{$poster}{quoted} = 0; |
| } |
| $data{$poster}{count}++; # bump count for this poster |
| $data{$poster}{size} += $filesize; # total size of file |
| ## Get all cross-posted newsgroups |
| for ( split( /,/, $msg->study('Newsgroups') ) ) |
| { |
| $crossposts{$_}++; # bump count for each |
| } |
| |
| ## The User-Agent and/or X-Newsreader fields |
| ## for User-Agent by poster |
| if ( defined $lcheader{"user-agent"} ) |
| { |
| $data{$poster}{agent} = $lcheader{"user-agent"}; |
| } |
| if ( defined $lcheader{"x-newsreader"} ) |
| { |
| $data{$poster}{agent} = $lcheader{"x-newsreader"}; |
| } |
| ## Get threads |
| my $thread = $msg->study('Subject'); |
| $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
| $thread =~ s/\s+/ /g; # collapse whitespace |
| $threads{$thread}{'count'}++; # bump count of this subject |
| $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
| |
| ## The User Agent for User-Agent by number of posts |
| my $UA = "unknown"; |
| foreach my $keys ( keys %lcheader ) |
| { |
| if ( defined $lcheader{'user-agent'} ) |
| ## Is this an original post or a reply? |
| if ( defined $msg->study('References') ) |
| { |
| $UA = $lcheader{'user-agent'}; |
| $replies++; |
| } |
| elsif ( defined $lcheader{"x-newsreader"} ) |
| else |
| { |
| $UA = $lcheader{"x-newsreader"}; |
| $origposts++; |
| } |
| elsif ( defined $lcheader{'x-mailer'} ) |
| |
| ## Get the time zone |
| my $datetime = $datetime_parser->parse_datetime($date); |
| my $tz = $datetime->strftime('%z'); |
| $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
| $tz{$tz}++; |
| |
| ## DEBUG |
| dmsg($tz) if DEBUG; |
| |
| #### Now analyse the body text #### |
| my $body = $msg->body(); |
| |
| my $insig = 0; |
| my @body = $body->lines; |
| for (@body) |
| { |
| $UA = $lcheader{'x-mailer'}; |
| $totbody += length($_); # bump total body size |
| next if (m{^$>}o); # don't count blank lines in body |
| if ( $insig == 1 ) |
| { |
| |
| # bump total sig size |
| $totsig += length($_); |
| } |
| ## are we in a quote line? |
| ## Bill Unruh uses ] quotes, and another poster uses :: |
| elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
| { |
| ## bump count of quoted chrs |
| $data{$poster}{'quoted'} += length($_); |
| $totquoted += length($_); |
| } |
| elsif (/^-- $/) |
| { |
| $insig = 1; |
| } |
| else |
| { |
| ## We must be processing an original line |
| $data{$poster}{'orig'} += length($_); # bump count of original chrs |
| $totorig += length($_); |
| } |
| } |
| elsif ( |
| ( defined $lcheader{'organization'} ) |
| && ( $lcheader{'organization'} =~ |
| /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
| ) |
| |
| # end for (@body) |
| } |
| |
| close($FILE); |
| } |
| |
| sub get_agent |
| { |
| my $msg = shift; |
| |
| my $ua = |
| $msg->study('User-Agent') |
| or $msg->study('X-Newsreader') |
| or $msg->study('X-Mailer'); |
| if ( not defined $ua ) |
| { |
| my $org = $msg->study('Organization'); |
| if ( defined $org |
| and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
| { |
| $UA = $lcheader{'organization'}; |
| $ua = $org; |
| } |
| elsif ( $lcheader{'message-id'} =~ /pine/i ) |
| elsif ( $msg->study('Message-ID') =~ /pine/i ) |
| { |
| $UA = "Pine"; |
| } ## Hopefully found UA, else set to unknown |
| $ua = "Pine"; |
| } |
| } |
| |
| $UA = clean($UA); |
| $UA = get_agent($UA); |
| ## Hopefully found UA, else set to unknown |
| if ( not defined $ua ) |
| { |
| $ua = __ "unknown"; |
| } |
| |
| sub get_agent |
| $ua = clean($ua); |
| |
| my $raw = $ua; |
| my $agent = $raw; |
| |
| ## strip http |
| if ( $raw =~ /.*http.*/ ) |
| { |
| my $raw = shift; |
| my $agent = $raw; |
| $raw =~ s!posted via!!i; |
| $raw =~ s!http://!!g; |
| $raw =~ s!/!!g; |
| $raw =~ s! !!g; |
| } |
| |
| ## strip http |
| if ( $raw =~ /.*http.*/ ) |
| { |
| $raw =~ s!posted via!!i; |
| $raw =~ s!http://!!g; |
| $raw =~ s!/!!g; |
| $raw =~ s! !!g; |
| } |
| ## Fix Outlook from Mac |
| if ( $raw =~ /^microsoft/i ) |
| { |
| $raw =~ s/-/ /g; |
| } |
| |
| ## Fix Outlook from Mac |
| if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; } |
| |
| ## Pick out the popular agents |
| if ( |
| $raw =~ /(outlook express)/i |
| || $raw =~ /(windows mail)/i |
| || $raw =~ /(microplanet gravity)/i |
| || $raw =~ /(news rover)/i |
| || $raw =~ /(forte agent)/i |
| || $raw =~ /(forte free agent)/i |
| ) |
| { |
| $agent = $1; |
| } |
| elsif ( |
| $raw =~ /^( |
| ## Pick out the popular agents |
| if ( $raw =~ /(outlook express)/i |
| || $raw =~ /(windows mail)/i |
| || $raw =~ /(microplanet gravity)/i |
| || $raw =~ /(news rover)/i |
| || $raw =~ /(forte agent)/i |
| || $raw =~ /(forte free agent)/i ) |
| { |
| $agent = $1; |
| } |
| elsif ( |
| $raw =~ /^( |
| pan |
| |sylpheed |
| |slrn |
| 637,134 → 414,496 |
| |webtv |
| |compuserve |
| )/ix |
| ) |
| ) |
| { |
| $agent = $1; |
| } |
| else |
| { |
| ## Clean up unknown agents |
| if ( $raw =~ m!^(.*?)/! ) |
| { |
| $agent = $1; |
| } |
| else |
| elsif ( $raw =~ /^(\w*)\d.*/ ) |
| { |
| ## Clean up unknown agents |
| if ( $raw =~ m!^(.*?)/! ) |
| $agent = $1; |
| } |
| } |
| |
| $distinct_agent{$agent}++; |
| return $agent; |
| } |
| ## get_agent |
| |
| ######################################### |
| ## Count the User-Agents used, collapsing |
| ## different versions into one per agent. |
| ######################################### |
| sub count_agents |
| { |
| POSTER: |
| foreach my $poster ( keys %data ) |
| { |
| foreach my $agent_name ( keys %distinct_agent ) |
| { # check against known ones |
| if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
| { |
| $agent = $1; |
| $agents{$agent_name}++; |
| next POSTER; |
| } |
| elsif ( $raw =~ /^(\w*)\d.*/ ) |
| { |
| $agent = $1; |
| } |
| } |
| $agents{ $data{$poster}{'agent'} }++; |
| } |
| } # count_agents |
| |
| $distinct_agent{$agent}++; |
| return $agent; |
| ############################################# |
| ## Set orig/total percentages for all posters |
| ############################################# |
| sub fix_percent |
| { |
| foreach my $poster ( keys %data ) |
| { |
| my $percent = 100; |
| if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
| { |
| $percent = |
| $data{$poster}{'orig'} * 100 / |
| ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
| } |
| elsif ( $data{$poster}{'orig'} == 0 ) |
| { |
| $percent = 0; |
| } |
| $data{$poster}{'percent'} = $percent; |
| } |
| } |
| ## fix_percent |
| |
| ## Get all cross-posted newsgroups |
| for ( split /,/, $headers{"Newsgroups"} ) |
| ################################## |
| ## Write data structures to a file |
| ################################## |
| sub write_data |
| { |
| open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
| or die __x( "Can't create XDATA: {error}\n", error => $! ); |
| print $OUTF "Data collected from $newsgroup_name\n\n"; |
| print $OUTF |
| "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
| foreach my $name ( keys %data ) |
| { |
| $crossposts{$_}++; # bump count for each |
| print $OUTF |
| "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Thread subjects\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| { |
| print $OUTF |
| "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Cross-posts\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %crossposts ) |
| { |
| print $OUTF "$name : $crossposts{$name}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "User agents\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %agents ) |
| { |
| print $OUTF "$name : $agents{$name}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Time zones\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %tz ) |
| { |
| print $OUTF "$name : $tz{$name}\n"; |
| } |
| close $OUTF; |
| } # write_data |
| |
| ## Get threads |
| my $thread = $headers{"Subject"}; |
| $thread =~ s/^re: //i; # Remove Re: or re: at start |
| $thread =~ s/\s+/ /g; # collapse whitespace |
| $threads{$thread}{count} += 1; # bump count of this subject |
| $threads{$thread}{size} += $filesize; # bump bytes for this thread |
| sub display_results |
| { |
| #################### DISPLAY RESULTS ##################### |
| print "=" x 76, "\n"; |
| printf "%s\n", |
| centred( |
| __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
| 76 ); |
| print "=" x 76, "\n"; |
| printf "%s\n", |
| centred( |
| __ |
| "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)", |
| 76 |
| ); |
| print "\n\n"; |
| printf __"Total posts considered: %s over %d days" . "\n", |
| commify($totalposts), |
| $numdays; |
| my $time_locale = setlocale(LC_TIME); |
| my $earliest_datetime = DateTime->from_epoch( |
| epoch => $earliest, |
| locale => $time_locale, |
| time_zone => 'UTC', |
| ); |
| my $latest_datetime = DateTime->from_epoch( |
| epoch => $latest, |
| locale => $time_locale, |
| time_zone => 'UTC', |
| ); |
| my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
| printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format); |
| printf __"Latest article" . ": %s\n", $latest_datetime->strftime($datetime_format); |
| printf __"Original articles: %s; replies" . ": %s\n", |
| commify($origposts), |
| commify($replies); |
| printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n", |
| commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
| printf __ |
| "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
| commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
| commify( int( $totsize / $totalposts ) ); |
| my $count = keys %data; |
| printf __"Total headers: %s KiB; bodies: %s KiB\n", |
| commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
| printf __ |
| "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n", |
| commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
| ( $totorig * 100 ) / ( $totorig + $totquoted ), |
| commify( int( $totsig / 1024 ) ); |
| printf __"Total number of posters: %s, average %s bytes per poster\n", |
| commify($count), commify( int( $totsize / $count ) ); #/ |
| $count = keys %threads; |
| printf __"Total number of threads: %s, average %s bytes per thread\n", |
| commify($count), commify( int( $totsize / $count ) ); #/ |
| printf __"Total number of user agents: %d\n", scalar keys %agents; |
| print "\n", "=" x 76, "\n"; |
| |
| ## Is this an original post or a reply? |
| if ( defined $headers{"References"} ) |
| ######################################## |
| ## Show posters by article count Sec 1; |
| ######################################## |
| unless ( $skipSec{1} ) |
| { |
| $replies++; |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| centred( |
| __x( "Top {count} posters by number of articles", count => $topposters ), 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach |
| my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| { |
| my $name = substr( $poster, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{count}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| else |
| |
| ###################################### |
| ## Show posters by size in KiB Sec 2; |
| ###################################### |
| unless ( $skipSec{2} ) |
| { |
| $origposts++; |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| centred( |
| __x( "Top {count} posters by article size in KiB", count => $topposters ), |
| 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach |
| my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
| { |
| my $name = substr( $poster, 0, 62 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{size} / 1024; #/ |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ## Get the time zone |
| $_ = $headers{"Date"}; |
| my ($tz) = /\d\d:\d\d(?::\d\d)?\s+(.*)/; |
| if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| ##################################### |
| ## Show top posters for original text |
| ##################################### |
| unless ( $skipSec{3} ) |
| { |
| $tz = "UTC"; |
| if ( keys %data < $topposters ) |
| { |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| centred( |
| __x( |
| "Top {count} responders by original text (> 5 posts)", |
| count => $topposters |
| ), |
| 76 |
| ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $poster ( |
| sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| keys %data |
| ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| my $name = substr( $poster, 0, 63 ); |
| printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{percent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| $tz{$tz}++; |
| |
| #### Now analyse the body text #### |
| my $insig = 0; |
| for (@body) |
| ######################################## |
| ## Show bottom posters for original text |
| ######################################## |
| unless ( $skipSec{4} ) |
| { |
| $totbody += length($_); # bump total body size |
| next if (/^$>/); # don't count blank lines in body |
| if ( $insig == 1 ) |
| if ( keys %data < $topposters ) |
| { |
| $totsig += length($_); # bump total sig size |
| $count = keys %data; |
| } |
| else |
| { |
| $count = $topposters; |
| } |
| printf "%s\n", |
| centred( |
| __x( |
| "Bottom {count} responders by original text (> 5 posts)", |
| count => $topposters |
| ), |
| 76 |
| ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $poster ( |
| sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| keys %data |
| ) |
| { |
| next if $data{$poster}{quoted} == 0; |
| next if $data{$poster}{count} < 5; |
| my $name = substr( $poster, 0, 63 ); |
| printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| $data{$poster}{percent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ## Bill Unruh uses ] quotes, and another poster uses :: |
| ##################################### |
| ## Show threads by number of articles |
| ##################################### |
| unless ( $skipSec{5} ) |
| { |
| if ( keys %threads < $topthreads ) |
| { |
| $count = keys %threads; |
| } |
| elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
| { # are we in a quote line? |
| $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
| $totquoted += length($_); |
| else |
| { |
| $count = $topthreads; |
| } |
| elsif (/-- /) |
| printf "%s\n", |
| centred( __x( "Top {count} threads by no. of articles", count => $topthreads ), |
| 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
| keys %threads |
| ) |
| { |
| $insig = 1; |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $threads{$thread}{'count'}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ############################## |
| ## Show threads by size in KiB |
| ############################## |
| unless ( $skipSec{6} ) |
| { |
| if ( keys %threads < $topthreads ) |
| { |
| $count = keys %threads; |
| } |
| else |
| { |
| $count = $topthreads; |
| } |
| printf "%s\n", |
| centred( __x( "Top {count} threads by size in KiB", count => $topthreads ), |
| 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $thread ( |
| sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
| keys %threads |
| ) |
| { |
| my $name = substr( $thread, 0, 65 ); |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $threads{$thread}{'size'} / 1024; #/ |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ## We must be processing an original line |
| $data{$poster}{orig} += length($_); # bump count of original chrs |
| $totorig += length($_); |
| ################################## |
| ## Show top 10 cross-posted groups |
| ################################## |
| unless ( $skipSec{7} ) |
| { |
| delete $crossposts{"$newsgroup_name"}; # don't include ours |
| if ( keys %crossposts < $topcrossposts ) |
| { |
| $count = keys %crossposts; |
| } |
| } # end for (@body) |
| else |
| { |
| $count = $topcrossposts; |
| } |
| printf "%s\n", |
| centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach |
| my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| $crossposts{$name}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| } # get_data |
| ######################### |
| ## Show agents and counts |
| ######################### |
| unless ( $skipSec{8} ) |
| { |
| if ( keys %agents < $topagents ) |
| { |
| $count = keys %agents; |
| } |
| else |
| { |
| $count = $topagents; |
| } |
| printf "%s\n", |
| centred( __x( "Top {count} user agents by poster", count => $topagents ), |
| 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
| $agents{$agent}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| |
| ######################################### |
| ## Count the User-Agents used, collapsing |
| ## different versions into one per agent. |
| ######################################### |
| sub count_agents |
| { |
| POSTER: |
| foreach my $poster ( keys %data ) |
| ####################### |
| ## Show distinct agents |
| ####################### |
| unless ( $skipSec{9} ) |
| { |
| foreach my $agent_name ( keys %distinct_agent ) |
| { # check against known ones |
| if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
| { |
| $agents{$agent_name}++; |
| next POSTER; |
| } |
| if ( keys %distinct_agent < $topagents ) |
| { |
| $count = keys %distinct_agent; |
| } |
| $agents{ $data{$poster}{agent} }++; |
| else |
| { |
| $count = $topagents; |
| } |
| printf "%s\n", |
| centred( |
| __x( "Top {count} user agents by number of posts", count => $topagents ), |
| 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $agent ( |
| sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| keys %distinct_agent |
| ) |
| { |
| printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| $distinct_agent{$agent}, |
| ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| } # count_agents |
| |
| ############################################# |
| ## Set orig/total percentages for all posters |
| ############################################# |
| sub fix_percent |
| { |
| foreach my $poster ( keys %data ) |
| ############################ |
| ## Show timezones and counts |
| ############################ |
| unless ( $skipSec{10} ) |
| { |
| my $percent = 100; |
| if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
| if ( keys %tz < $toptz ) |
| { |
| $percent = |
| $data{$poster}{orig} * 100 / |
| ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
| $count = keys %tz; |
| } |
| elsif ( $data{$poster}{orig} == 0 ) |
| else |
| { |
| $percent = 0; |
| $count = $toptz; |
| } |
| $data{$poster}{percent} = $percent; |
| printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 ); |
| print "=" x 76, "\n"; |
| my $i = 0; |
| foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
| { |
| printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
| last if ( ++$i == $count ); |
| } |
| print "\n", "=" x 76, "\n"; |
| } |
| } |
| |
| ## helper subs |
| |
| ############################### |
| ## Right pad a string with '.'s |
| ############################### |
| 774,7 → 913,7 |
| my ( $text, $pad_len, $pad_chr ) = @_; |
| |
| ## DEBUG |
| #printf "|%s| = %d\n", $text, length($text); |
| printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
| |
| if ( length($text) > $pad_len ) |
| { |
| 798,10 → 937,36 |
| ########################### |
| ## Put commas into a number |
| ########################### |
| # Get some of locale's numeric formatting parameters |
| my ($thousands_sep, $grouping) = |
| @{localeconv()}{'thousands_sep', 'grouping'}; |
| # Apply defaults if values are missing |
| $thousands_sep = ',' unless $thousands_sep; |
| # grouping and mon_grouping are packed lists |
| # of small integers (characters) telling the |
| # grouping (thousand_seps and mon_thousand_seps |
| # being the group dividers) of numbers and |
| # monetary quantities. The integers' meanings: |
| # 255 means no more grouping, 0 means repeat |
| # the previous grouping, 1-254 means use that |
| # as the current grouping. Grouping goes from |
| # right to left (low to high digits). In the |
| # below we cheat slightly by never using anything |
| # else than the first grouping (whatever that is). |
| my @grouping; |
| if ($grouping) { |
| @grouping = unpack("C*", $grouping); |
| } else { |
| @grouping = (3); |
| } |
| |
| sub commify |
| { |
| local $_ = shift; |
| 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; |
| #1 while s/^([-+]?\d+)(\d{3})/$1,$2/; |
| $_ = int; # Chop non-integer part |
| 1 while |
| s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/; |
| return $_; |
| } |
| |
| 819,61 → 984,17 |
| |
| sub usage |
| { |
| print "usage: newstat.pl newsgroupname\n"; |
| print __"usage: newsstat.pl NEWS.GROUP\n"; |
| exit 1; |
| } |
| |
| ################################## |
| ## Write data structures to a file |
| ################################## |
| sub write_data |
| sub dmsg |
| { |
| open my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" |
| or die "Can't create XDATA: $!\n"; |
| print $OUTF "Data collected from $newsgroup_name\n\n"; |
| print $OUTF |
| "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
| foreach my $name ( keys %data ) |
| { |
| print $OUTF |
| "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Thread subjects\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| { |
| print $OUTF |
| "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Cross-posts\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %crossposts ) |
| { |
| print $OUTF "$name : $crossposts{$name}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "User agents\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %agents ) |
| { |
| print $OUTF "$name : $agents{$name}\n"; |
| } |
| print $OUTF |
| "============================================================================\n"; |
| print $OUTF "Time zones\n"; |
| print $OUTF |
| "----------------------------------------------------------------------------\n"; |
| foreach my $name ( sort keys %tz ) |
| { |
| print $OUTF "$name : $tz{$name}\n"; |
| } |
| close $OUTF; |
| } # write_data |
| print STDERR @_, "\n"; |
| } |
| |
| sub dmsg2 |
| { |
| my ( $level, @msg ) = @_; |
| print STDERR @msg, "\n" if $level >= DEBUG; |
| } |