Rev 31 |
    Rev 45 |
    Go to most recent revision |
    View as "text/plain" |
    Blame |
    Compare with Previous |
    Last modification |
    View Log
    | RSS feed
  
  
    1
  
  
#!/usr/bin/env perl
use strict
;
use warnings
;
require 5.004;
#use diagnostics;
use utf8
;
## NOTE:
## Enable and remove binmode when utf8::all has actually become lexically scoped
# use utf8:all;
use constant DEBUG 
=> 0;
## newsstat.pl
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
## Based on work by Garry Knight et al.
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
## Print out all text to STDOUT UTF-8 encoded
binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';
## L10n
use locale 
':not_characters';
# setlocale( LC_MESSAGES, '' );
require Number
::Format;
## i18n
## FIXME: Automatically include resolved '.' in @INC
# print join "\n", @INC;
use Locale
::TextDomain ('de.pointedears.newsstat');
use POSIX              
('locale_h');
use Locale
::Messages qw (bind_textdomain_filter
  bind_textdomain_codeset
  turn_utf_8_on
);
bind_textdomain_filter 
'de.pointedears.newsstat',  \&turn_utf_8_on;
bind_textdomain_codeset 
'de.pointedears.newsstat', 'utf-8';
require Mail
::Message;
require DateTime
;
require DateTime
::Format::Mail;
# See comments in previous example
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
  @{ localeconv
() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
  'decimal_point' };
# Apply defaults if values are missing
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
$thousands_sep = ' ' unless $thousands_sep;
# grouping and mon_grouping are packed lists
# of small integers (characters) telling the
# grouping (thousand_seps and mon_thousand_seps
# being the group dividers) of numbers and
# monetary quantities.  The integers' meanings:
# 255 means no more grouping, 0 means repeat
# the previous grouping, 1-254 means use that
# as the current grouping.  Grouping goes from
# right to left (low to high digits).  In the
# below we cheat slightly by never using anything
# else than the first grouping (whatever that is).
my @grouping;
if ($grouping)
{
  @grouping = unpack( "C*", $grouping );
}
else
{
  @grouping = (3);
}
## FIXME: Why don't the defaults work already?
my $formatter = new Number
::Format(
  -decimal_point 
=> $decimal_point,
  -thousands_sep 
=> $thousands_sep,
  # -grouping      => $grouping[0]
);
###################### USER CONFIGURATIONS ############################
## The name of the group to do stats for
my $newsgroup_name = $ARGV[0];
$newsgroup_name // usage
();
## Check for removal flags
my $ix;
my $j;
my %skipSec;
my @skiplist;
my $args = @ARGV;
for ( $ix = 1 ; $ix < $args ; $ix++ )
{
  $j = $ix + 1;
  if ( $ARGV[$ix] eq "-x" )
  {
    @skiplist = split( ",", $ARGV[$j] );
  }
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
  {
    @skiplist = split( ",", $1 );
  }
}
foreach (@skiplist)
{
  $skipSec{$_} = 1;
}
## Leafnode users will want /var/spool/news for this variable.
my $news = "/var/spool/news/";
## Number of top or bottom posters to show
my $topposters = 20;
## Number of threads we want to know about
my $topthreads = 20;
## Number of cross-posted threads to show
my $topcrossposts = 10;
## Number of agents we list
my $topagents = 10;
## Number of time zones to show
my $toptz = 10;
###################### DATA STRUCTURES ######################
my $group = $newsgroup_name;
$group =~ s!\
.!/!g
;
my %data;    # name, count, agent, total, orig, quoted
my $totsize = 0;    # holds total sizes of all files
my %crossposts;     # group, count
my %threads;        # subject, count
my $replies   = 0;  # total no. of replies
my $origposts = 0;  # total no. of original posts
my %tz;             # timezones by count
my $earliest;       # earliest article we have found
my $latest;         # latest article we have found
my $totheader = 0;  # total size of header material
my $totbody   = 0;  # total size of body material
my $totsig    = 0;  # total size of sig material
my $totquoted = 0;  # total size of quoted material
my $totorig   = 0;  # total size of original material
my $totalposts;     # total no. of posts considered
my %distinct_agent;
## Used to hold counts of User Agents used
my %agents = (
  "Compuserver"               => 0,
  "Foorum"                    => 0,
  "Forte Agent"               => 0,
  "Forte Free Agent"          => 0,
  "Gnus"                      => 0,
  "KNode"                     => 0,
  "MacSOUP"                   => 0,
  "MT-NewsWatcher"            => 0,
  "MicroPlanet Gravity"       => 0,
  "Microsoft Outlook Express" => 0,
  "Microsoft Windows Mail"    => 0,
  "Mozilla"                   => 0,
  "News Rover"                => 0,
  "NN"                        => 0,
  "Pan"                       => 0,
  "rn"                        => 0,
  "slrn"                      => 0,
  "Sylpheed"                  => 0,
  "tin"                       => 0,
  "VSoup"                     => 0,
  "WebTV"                     => 0,
  "Xnews"                     => 0,
);
my $datetime_parser = DateTime
::Format::Mail->new();
$datetime_parser->loose();
my $today = DateTime
->today( time_zone 
=> 'UTC' );
my $prev_month = $today->clone()->subtract( months 
=> 1 )->set_day(1);
my $start      = int $prev_month->strftime('%s');
my $numdays    = int DateTime
->last_day_of_month(
  year      
=> $prev_month->year(),
  month     
=> $prev_month->month(),
  time_zone 
=> $prev_month->time_zone(),
)->day();
my $end = int $today->clone()->set_day(1)->strftime('%s');
dmsg
( $start, " to ", $end ) if DEBUG
;
chdir("$news$group")
  or die __x
(
  "Can't cd to {newsgroup}: {error}\n",
  newsgroup 
=> "$news$group",
  error     
=> $!
  );
opendir( DIR
, "." )
  or die __x
(
  "Can't open {newsgroup}: {error}\n",
  newsgroup 
=> "$news$group",
  error     
=> $!
  );
while ( defined( my $filename = readdir(DIR
) ) )
{
  next unless -f 
$filename;    # only want real files
  next if ( $filename eq ".overview" );    # real articles only
  get_article
($filename);                  # read in the article
}
closedir(DIR
);                             # finished with the directory
dmsg
("\nearliest: $earliest\nlatest:   $latest") if DEBUG
;
## Post-processing
count_agents
();                            # count agents, collapsing versions
fix_percent
();
write_data
();
display_results
();
########################################
## Get current article's header and body
########################################
sub get_article
{
  my $filename = shift;
  open( my $FILE, '<', $filename )
    or
    die __x
( "Can't open {file}: {error}\n", file 
=> $filename, error 
=> $! );
  my $msg       = Mail
::Message->read($FILE);
  my $timestamp = $msg->timestamp();
  my $date      = $msg->study('Date');
  ## Disregard article if timestamp is not in range
  dmsg
($timestamp) if DEBUG
;
  if ( $timestamp < $start || $timestamp >= $end )
  {
    dmsg
("Posting on $date ignored.") if DEBUG
;
    return;
  }
  $totalposts++;    # bump count of articles considered
  ## DEBUG
  dmsg
($date) if DEBUG
;
  ## get stats about the file itself
  my $filesize = -s $filename;    # get total size of file
  $totsize += $filesize;          # bump total sizes of all files
  if ( ( not defined $earliest ) || $timestamp < $earliest )
  {
    $earliest = $timestamp;
  }
  elsif ( ( not defined $latest ) || $timestamp > $latest )
  {
    $latest = $timestamp;
  }
  #print "timestamp: $timestamp\n";
  ## count header size
  $totheader += $msg->head()->size();
  ## get the poster's name (MIME-decoded, in UTF-8)
  my $poster = $msg->study('From');
  if ( defined $poster )
  {
    ## Convert old to new format
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
    ## Collapse whitespace
    $poster =~ s/\s+/ /g;
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
    ## DEBUG
    dmsg
($poster) if DEBUG
;
    ## seen this one before?
    if ( !defined( $data{$poster} ) )
    {
      $data{$poster}{'agent'}  = __ 
'unknown';    # comes after For: field
      $data{$poster}{'orig'}   = 0;
      $data{$poster}{'quoted'} = 0;
    }
    $data{$poster}{'count'}++;                    # bump count for this poster
    $data{$poster}{'size'} += $filesize;          # total size of file
    ## The User-Agent and/or X-Newsreader fields
    ## for User-Agent by poster
    my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
    if ( defined $ua )
    {
      $data{$poster}{'agent'} = $ua;
      ## DEBUG
      dmsg
($ua) if DEBUG
;
    }
    ## The User Agent for User-Agent by number of posts
    get_agent
($msg);
    ## Get all cross-posted newsgroups
    for ( split( /,/, $msg->study('Newsgroups') ) )
    {
      $crossposts{$_}++;    # bump count for each
    }
    ## Get threads
    my $thread = $msg->study('Subject');
    $thread =~ s/^\s*re:\s*//i;    # Remove Re: or re: at the start
    $thread =~ s/\s*\(was:\s*.*\)\s*$//i;    # Remove (was: ...) at the end
    $thread =~ s/\s+/ /g;       # collapse whitespace
    $threads{$thread}{'count'}++;    # bump count of this subject
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
    ## Is this an original post or a reply?
    if ( defined $msg->study('References') )
    {
      $replies++;
    }
    else
    {
      $origposts++;
    }
    ## Get the time zone
    my $datetime = $datetime_parser->parse_datetime($date);
    my $tz       = $datetime->strftime('%z');
    $tz = "UTC" if $tz =~ m{^(?:GMT
|0000
)$
}o
;
    $tz{$tz}++;
    ## DEBUG
    dmsg
($tz) if DEBUG
;
#### Now analyse the body text ####
    my $body = $msg->body();
    my $insig = 0;
    my @body  = $body->lines;
    for (@body)
    {
      $totbody += length($_);    # bump total body size
      next if (m{^$>}o
);         # don't count blank lines in body
      if ( $insig == 1 )
      {
        # bump total sig size
        $totsig += length($_);
      }
      ## are we in a quote line?
      ## Bill Unruh uses ] quotes, and another poster uses ::
      elsif ( m{^\
s*[>\
]]}o 
|| m{^\
s*::}o 
)
      {
        ## bump count of quoted chrs
        $data{$poster}{'quoted'} += length($_);
        $totquoted += length($_);
      }
      elsif (/^-- $/)
      {
        $insig = 1;
      }
      else
      {
        ## We must be processing an original line
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
        $totorig += length($_);
      }
    }
    # end for (@body)
  }
  close($FILE);
}
sub get_agent
{
  my $msg = shift;
  my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
    // $msg->study('X-Mailer');
  if ( not defined $ua )
  {
    my $org = $msg->study('Organization');
    if ( defined $org
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
    {
      $ua = $org;
    }
    elsif ( $msg->study('Message-ID') =~ /pine/i )
    {
      $ua = "Pine";
    }
  }
  ## Hopefully found UA, else set to unknown
  if ( not defined $ua )
  {
    $ua = __ 
"unknown";
  }
  $ua = clean
($ua);
  my $raw   = $ua;
  my $agent = $raw;
  ## strip http
  if ( $raw =~ /.*http.*/ )
  {
    $raw =~ s!posted via
!!i
;
    $raw =~ s!http
://!!g
;
    $raw =~ s!/!!g
;
    $raw =~ s! !!g
;
  }
  ## Fix Outlook from Mac
  if ( $raw =~ /^microsoft/i )
  {
    $raw =~ s/-/ /g;
  }
  ## Pick out the popular agents
  if ( $raw =~ /(outlook express)/i
    || $raw =~ /(windows mail)/i
    || $raw =~ /(microplanet gravity)/i
    || $raw =~ /(news rover)/i
    || $raw =~ /(forte agent)/i
    || $raw =~ /(forte free agent)/i )
  {
    $agent = $1;
  }
  elsif (
    $raw =~ /^(
        pan
       
|sylpheed
       
|slrn
       
|mozilla
       
|knode
       
|tin
       
|hamster
       
|xrn
       
|xnews
       
|aol
       
|gnus
       
|krn
       
|macsoup
       
|messenger
       
|openxp
       
|pine
       
|thoth
       
|turnpike
       
|winvn
       
|vsoup
       
|google
       
|supernews
       
|nn
       
|rn
       
|007
       
|webtv
       
|compuserve
       
)/ix
    
)
  {
    $agent = $1;
  }
  else
  {
    ## Clean up unknown agents
    if ( $raw =~ m!^(.*?)/! )
    {
      $agent = $1;
    }
    elsif ( $raw =~ /^(\w*)\d.*/ )
    {
      $agent = $1;
    }
  }
  $distinct_agent{$agent}++;
  return $agent;
}
## get_agent
#########################################
## Count the User-Agents used, collapsing
## different versions into one per agent.
#########################################
sub count_agents
{
POSTER
:
  foreach my $poster ( keys %data )
  {
    foreach my $agent_name ( keys %distinct_agent )
    {    # check against known ones
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
      {
        $agents{$agent_name}++;
        next POSTER
;
      }
    }
    $agents{ $data{$poster}{'agent'} }++;
  }
}    # count_agents
#############################################
## Set orig/total percentages for all posters
#############################################
sub fix_percent
{
  foreach my $poster ( keys %data )
  {
    my $percent = 100;
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
    {
      $percent =
        $data{$poster}{'orig'} * 100 /
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
    }
    elsif ( $data{$poster}{'orig'} == 0 )
    {
      $percent = 0;
    }
    $data{$poster}{'percent'} = $percent;
  }
}
## fix_percent
##################################
## Write data structures to a file
##################################
sub write_data
{
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
    or die __x
( "Can't create XDATA: {error}\n", error 
=> $! );
  print $OUTF "Data collected from $newsgroup_name\n\n";
  print $OUTF
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
  foreach my $name ( keys %data )
  {
    print $OUTF
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
  }
  print $OUTF
"============================================================================\n";
  print $OUTF "Thread subjects\n";
  print $OUTF
"----------------------------------------------------------------------------\n";
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
  {
    print $OUTF
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
  }
  print $OUTF
"============================================================================\n";
  print $OUTF "Cross-posts\n";
  print $OUTF
"----------------------------------------------------------------------------\n";
  foreach my $name ( sort keys %crossposts )
  {
    print $OUTF "$name : $crossposts{$name}\n";
  }
  print $OUTF
"============================================================================\n";
  print $OUTF "User agents\n";
  print $OUTF
"----------------------------------------------------------------------------\n";
  foreach my $name ( sort keys %agents )
  {
    print $OUTF "$name : $agents{$name}\n";
  }
  print $OUTF
"============================================================================\n";
  print $OUTF "Time zones\n";
  print $OUTF
"----------------------------------------------------------------------------\n";
  foreach my $name ( sort keys %tz )
  {
    print $OUTF "$name : $tz{$name}\n";
  }
  close $OUTF;
}    # write_data
sub display_results
{
  #################### DISPLAY RESULTS #####################
  println
( "=" x 
76 );
  printf "%s\n",
    centred
(
    __x
( "Analysis of posts to {newsgroup}", newsgroup 
=> $newsgroup_name ),
    76 );
  println
( "=" x 
76 );
  printf "%s\n",
    centred
(
    __
(
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
    ),
    76
    );
  print "\n\n";
  printf __
"Total posts considered: %s over %d days\n",
    $formatter->format_number($totalposts),
    $formatter->format_number($numdays);
  my $time_locale       = setlocale
(LC_TIME
);
  my $earliest_datetime = DateTime
->from_epoch(
    epoch     
=> $earliest,
    locale    
=> $time_locale,
    time_zone 
=> 'UTC',
  );
  my $latest_datetime = DateTime
->from_epoch(
    epoch     
=> $latest,
    locale    
=> $time_locale,
    time_zone 
=> 'UTC',
  );
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
  printf __
"Earliest article: %s\n",
    $earliest_datetime->strftime($datetime_format);
  printf __
"Latest article:   %s\n",
    $latest_datetime->strftime($datetime_format);
  printf __
"Original articles: %s; replies: %s\n",
    $formatter->format_number($origposts),
    $formatter->format_number($replies);
  printf __
"Total size of posts: %s bytes (%s)" . "\n",
    $formatter->format_number($totsize),
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
  printf __
"Average %s articles per day, %s per day, %s bytes per article\n",
    $formatter->format_number( int( $totalposts / $numdays ) ),
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
    $formatter->format_number( int( $totsize / $totalposts ) );
  my $count = keys %data;
  printf __
"Total headers: %s; bodies: %s\n",
    $formatter->format_bytes(
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
    ),
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
  printf __
    
"Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
    $formatter->format_bytes(
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
    ),
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
  printf __
"Total number of posters: %s, average %s per poster\n",
    $formatter->format_number($count),
    $formatter->format_bytes( $totsize / $count,
    ( 'precision' => 1, 'mode' => 'iec' ) );
  $count = keys %threads;
  printf __
"Total number of threads: %s, average %s per thread\n",
    $formatter->format_number($count),
    $formatter->format_bytes( $totsize / $count,
    ( 'precision' => 1, 'mode' => 'iec' ) );
  printf __
"Total number of user agents: %d\n",
    $formatter->format_number( scalar keys %agents );
  print "\n", "=" x 
76, "\n";
  ########################################
  ## Show posters by article count  Sec 1;
  ########################################
  unless ( $skipSec{1} )
  {
    if ( keys %data < $topposters )
    {
      $count = keys %data;
    }
    else
    {
      $count = $topposters;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} posters by number of articles", count 
=> $topposters ),
      76
      );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach
      my $poster ( sort { $data{$b}{count
} <=> $data{$a}{count
} } keys %data )
    {
      my $name = substr( $poster, 0, 65 );
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $poster, 63, "." ),
        $data{$poster}{count
};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  ######################################
  ## Show posters by size in KiB  Sec 2;
  ######################################
  unless ( $skipSec{2} )
  {
    if ( keys %data < $topposters )
    {
      $count = keys %data;
    }
    else
    {
      $count = $topposters;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} posters by article size in KiB", count 
=> $topposters ),
      76
      );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach
      my $poster ( sort { $data{$b}{size
} <=> $data{$a}{size
} } keys %data )
    {
      my $name = substr( $poster, 0, 62 );
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $poster, 63, "." ),
        $data{$poster}{size
} / 1024;    #/
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  #####################################
  ## Show top posters for original text
  #####################################
  my $topposters_real = 0;
  unless ( $skipSec{3} )
  {
    if ( keys %data < $topposters )
    {
      $count = keys %data;
    }
    else
    {
      $count = $topposters;
    }
    printf "%s\n",
      centred
(
      __x
(
        "Top {count} responders by original text (> 5 posts)",
        count 
=> $topposters
      ),
      76
      );
    print "=" x 
76, "\n";
    foreach my $poster (
      sort { $data{$b}{percent
} <=> $data{$a}{percent
} }
      keys %data
      )
    {
      next if $data{$poster}{quoted
} == 0;
      next if $data{$poster}{count
} < 5;
      my $name = substr( $poster, 0, 63 );
      printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
        rpad
( $poster, 63, "." ),
        $data{$poster}{percent
};
      last if ( ++$topposters_real == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  ########################################
  ## Show bottom posters for original text
  ########################################
  $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
  unless ( $skipSec{4} )
  {
    if ( keys %data < $topposters )
    {
      $count = keys %data;
    }
    else
    {
      $count = $topposters;
    }
    printf "%s\n",
      centred
(
      __x
(
        "Bottom {count} responders by original text  (> 5 posts)",
        count 
=> $topposters
      ),
      76
      );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $poster (
      sort { $data{$a}{percent
} <=> $data{$b}{percent
} }
      keys %data
      )
    {
      next if $data{$poster}{quoted
} == 0;
      next if $data{$poster}{count
} < 5;
      my $name = substr( $poster, 0, 63 );
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad
( $poster, 63, "." ),
        $data{$poster}{percent
};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  #####################################
  ## Show threads by number of articles
  #####################################
  unless ( $skipSec{5} )
  {
    if ( keys %threads < $topthreads )
    {
      $count = keys %threads;
    }
    else
    {
      $count = $topthreads;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} threads by no. of articles", count 
=> $topthreads ),
      76 );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $thread (
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
      keys %threads
      )
    {
      my $name = substr( $thread, 0, 65 );
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $name, 63, "." ),
        $threads{$thread}{'count'};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  ##############################
  ## Show threads by size in KiB
  ##############################
  unless ( $skipSec{6} )
  {
    if ( keys %threads < $topthreads )
    {
      $count = keys %threads;
    }
    else
    {
      $count = $topthreads;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} threads by size in KiB", count 
=> $topthreads ), 76 );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $thread (
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
      keys %threads
      )
    {
      my $name = substr( $thread, 0, 65 );
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $name, 63, "." ),
        $threads{$thread}{'size'} / 1024;    #/
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  ##################################
  ## Show top 10 cross-posted groups
  ##################################
  unless ( $skipSec{7} )
  {
    delete $crossposts{"$newsgroup_name"};    # don't include ours
    if ( keys %crossposts < $topcrossposts )
    {
      $count = keys %crossposts;
    }
    else
    {
      $count = $topcrossposts;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} cross-posted groups", count 
=> $topcrossposts ), 76 );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
    {
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $name, 63, "." ),
        $crossposts{$name};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  #########################
  ## Show agents and counts
  #########################
  unless ( $skipSec{8} )
  {
    if ( keys %agents < $topagents )
    {
      $count = keys %agents;
    }
    else
    {
      $count = $topagents;
    }
    printf "%s\n",
      centred
( __x
( "Top {count} user agents by poster", count 
=> $topagents ),
      76 );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
    {
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $agent, 63, "." ),
        $agents{$agent};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  #######################
  ## Show distinct agents
  #######################
  unless ( $skipSec{9} )
  {
    if ( keys %distinct_agent < $topagents )
    {
      $count = keys %distinct_agent;
    }
    else
    {
      $count = $topagents;
    }
    printf "%s\n",
      centred
(
      __x
( "Top {count} user agents by number of posts", count 
=> $topagents ),
      76
      );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $agent (
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
      keys %distinct_agent
      )
    {
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad
( $agent, 58, "." ),
        $distinct_agent{$agent},
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
  ############################
  ## Show timezones and counts
  ############################
  unless ( $skipSec{10} )
  {
    if ( keys %tz < $toptz )
    {
      $count = keys %tz;
    }
    else
    {
      $count = $toptz;
    }
    printf "%s\n",
      centred
( __x
( "Top {count} time zones", count 
=> $toptz ), 76 );
    print "=" x 
76, "\n";
    my $i = 0;
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
    {
      printf "%2d. %-63s : %6d\n", $i + 1, rpad
( $zone, 63, "." ), $tz{$zone};
      last if ( ++$i == $count );
    }
    print "\n", "=" x 
76, "\n";
  }
}
## helper subs
###############################
## Right pad a string with '.'s
###############################
sub rpad
{
  ## Get text to pad, length to pad, pad chr
  my ( $text, $pad_len, $pad_chr ) = @_;
  ## DEBUG
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG 
> 1;
  if ( length($text) > $pad_len )
  {
    $text = substr( $text, 0, $pad_len );
  }
  my $padded = $text . $pad_chr x 
( $pad_len - length($text) );
  return $padded;
}
##################
## Centre a string
##################
sub centred
{
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
  my $pad_len = ( $width - length($text) ) / 2;    #/
  my $centred = " " x 
$pad_len . $text;
  return $centred;
}
###########################
## Put commas into a number
###########################
sub commify
{
  local $_ = shift;
  my $number = $_;
  $_ = int;                                        # Chop non-integer part
  1 while
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
  my $int_part  = $_;
  my $real_part = '';
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
  {
    $real_part = $1;
  }
  return $int_part . $real_part;
}
################################################################
## Returns a string with leading and trailing whitespace removed
################################################################
sub clean
{
  my $dirty = shift;
  my $clean = $dirty;
  $clean =~ s/^\s+|\s+$//g;
  return $clean;
}
sub usage
{
  println
( __ 
"usage: newsstat.pl NEWS.GROUP" );
  exit 1;
}
sub dmsg
{
  print STDERR @_, "\n";
}
sub dmsg2
{
  my ( $level, @msg ) = @_;
  print STDERR @msg, "\n" if $level >= DEBUG
;
}
sub println
{
  print @_, "\n";
}