Subversion Repositories LCARS

Rev

Rev 48 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 PointedEar 1
#!/usr/bin/env perl
5 PointedEar 2
use strict;
6 PointedEar 3
use warnings;
23 PointedEar 4
require 5.004;
22 PointedEar 5
 
6
#use diagnostics;
6 PointedEar 7
use utf8;
8
 
30 PointedEar 9
## NOTE:
10
## Enable and remove binmode when utf8::all has actually become lexically scoped
11
# use utf8:all;
12
 
22 PointedEar 13
use constant DEBUG => 0;
14
 
24 PointedEar 15
## newsstat.pl
16
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
17
## Based on work by Garry Knight et al.
31 PointedEar 18
##
24 PointedEar 19
## This program is free software: you can redistribute it and/or modify
20
## it under the terms of the GNU General Public License as published by
21
## the Free Software Foundation, either version 3 of the License, or
22
## (at your option) any later version.
31 PointedEar 23
##
24 PointedEar 24
## This program is distributed in the hope that it will be useful,
25
## but WITHOUT ANY WARRANTY; without even the implied warranty of
26
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
## GNU General Public License for more details.
31 PointedEar 28
##
24 PointedEar 29
## You should have received a copy of the GNU General Public License
30
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
31
 
13 PointedEar 32
## Print out all text to STDOUT UTF-8 encoded
33
binmode STDOUT, ':encoding(UTF-8)';
22 PointedEar 34
binmode STDERR, ':encoding(UTF-8)';
5 PointedEar 35
 
23 PointedEar 36
## L10n
37
use locale ':not_characters';
38
 
39
# setlocale( LC_MESSAGES, '' );
40
require Number::Format;
41
 
42
## i18n
43
## FIXME: Automatically include resolved '.' in @INC
22 PointedEar 44
# print join "\n", @INC;
5 PointedEar 45
 
22 PointedEar 46
use Locale::TextDomain ('de.pointedears.newsstat');
23 PointedEar 47
use POSIX              ('locale_h');
22 PointedEar 48
use Locale::Messages qw (bind_textdomain_filter
23 PointedEar 49
  bind_textdomain_codeset
50
  turn_utf_8_on);
51
 
52
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
22 PointedEar 53
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23 PointedEar 54
 
22 PointedEar 55
require Mail::Message;
56
require DateTime;
57
require DateTime::Format::Mail;
23 PointedEar 58
 
59
# See comments in previous example
60
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
61
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
62
  'decimal_point' };
63
 
64
# Apply defaults if values are missing
65
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
66
$thousands_sep = ' ' unless $thousands_sep;
67
 
68
# grouping and mon_grouping are packed lists
69
# of small integers (characters) telling the
70
# grouping (thousand_seps and mon_thousand_seps
71
# being the group dividers) of numbers and
72
# monetary quantities.  The integers' meanings:
73
# 255 means no more grouping, 0 means repeat
74
# the previous grouping, 1-254 means use that
75
# as the current grouping.  Grouping goes from
76
# right to left (low to high digits).  In the
77
# below we cheat slightly by never using anything
78
# else than the first grouping (whatever that is).
79
my @grouping;
80
if ($grouping)
81
{
82
  @grouping = unpack( "C*", $grouping );
83
}
84
else
85
{
86
  @grouping = (3);
87
}
88
 
89
## FIXME: Why don't the defaults work already?
90
my $formatter = new Number::Format(
91
  -decimal_point => $decimal_point,
92
  -thousands_sep => $thousands_sep,
49 PointedEar 93
  -kibi_suffix   => ' KiB',
94
  -mebi_suffix   => ' MiB',
95
  -gibi_suffix   => ' GiB',
96
 
23 PointedEar 97
  # -grouping      => $grouping[0]
98
);
99
 
5 PointedEar 100
###################### USER CONFIGURATIONS ############################
101
 
13 PointedEar 102
## The name of the group to do stats for
5 PointedEar 103
my $newsgroup_name = $ARGV[0];
31 PointedEar 104
$newsgroup_name // usage();
5 PointedEar 105
 
13 PointedEar 106
## Check for removal flags
5 PointedEar 107
my $ix;
108
my $j;
109
my %skipSec;
110
my @skiplist;
111
my $args = @ARGV;
7 PointedEar 112
for ( $ix = 1 ; $ix < $args ; $ix++ )
113
{
114
  $j = $ix + 1;
115
  if ( $ARGV[$ix] eq "-x" )
116
  {
117
    @skiplist = split( ",", $ARGV[$j] );
118
  }
119
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
120
  {
121
    @skiplist = split( ",", $1 );
122
  }
5 PointedEar 123
}
7 PointedEar 124
foreach (@skiplist)
125
{
5 PointedEar 126
  $skipSec{$_} = 1;
127
}
128
 
13 PointedEar 129
## Leafnode users will want /var/spool/news for this variable.
5 PointedEar 130
my $news = "/var/spool/news/";
131
 
22 PointedEar 132
## Number of top or bottom posters to show
133
my $topposters = 20;
5 PointedEar 134
 
13 PointedEar 135
## Number of threads we want to know about
5 PointedEar 136
my $topthreads = 20;
137
 
13 PointedEar 138
## Number of cross-posted threads to show
5 PointedEar 139
my $topcrossposts = 10;
140
 
22 PointedEar 141
## Number of agents we list
142
my $topagents = 10;
143
 
13 PointedEar 144
## Number of time zones to show
5 PointedEar 145
my $toptz = 10;
146
 
147
###################### DATA STRUCTURES ######################
148
my $group = $newsgroup_name;
149
$group =~ s!\.!/!g;
22 PointedEar 150
my %data;    # name, count, agent, total, orig, quoted
151
my $totsize = 0;    # holds total sizes of all files
152
my %crossposts;     # group, count
153
my %threads;        # subject, count
154
my $replies   = 0;  # total no. of replies
155
my $origposts = 0;  # total no. of original posts
156
my %tz;             # timezones by count
157
my $earliest;       # earliest article we have found
158
my $latest;         # latest article we have found
159
my $totheader = 0;  # total size of header material
160
my $totbody   = 0;  # total size of body material
161
my $totsig    = 0;  # total size of sig material
162
my $totquoted = 0;  # total size of quoted material
163
my $totorig   = 0;  # total size of original material
164
my $totalposts;     # total no. of posts considered
5 PointedEar 165
my %distinct_agent;
13 PointedEar 166
 
167
## Used to hold counts of User Agents used
168
my %agents = (
169
  "Compuserver"               => 0,
170
  "Foorum"                    => 0,
7 PointedEar 171
  "Forte Agent"               => 0,
172
  "Forte Free Agent"          => 0,
13 PointedEar 173
  "Gnus"                      => 0,
174
  "KNode"                     => 0,
175
  "MacSOUP"                   => 0,
176
  "MT-NewsWatcher"            => 0,
7 PointedEar 177
  "MicroPlanet Gravity"       => 0,
178
  "Microsoft Outlook Express" => 0,
13 PointedEar 179
  "Microsoft Windows Mail"    => 0,
180
  "Mozilla"                   => 0,
181
  "News Rover"                => 0,
182
  "NN"                        => 0,
183
  "Pan"                       => 0,
184
  "rn"                        => 0,
7 PointedEar 185
  "slrn"                      => 0,
13 PointedEar 186
  "Sylpheed"                  => 0,
7 PointedEar 187
  "tin"                       => 0,
13 PointedEar 188
  "VSoup"                     => 0,
7 PointedEar 189
  "WebTV"                     => 0,
22 PointedEar 190
  "Xnews"                     => 0,
13 PointedEar 191
);
5 PointedEar 192
 
22 PointedEar 193
my $datetime_parser = DateTime::Format::Mail->new();
194
$datetime_parser->loose();
5 PointedEar 195
 
22 PointedEar 196
my $today = DateTime->today( time_zone => 'UTC' );
197
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
198
my $start      = int $prev_month->strftime('%s');
199
my $numdays    = int DateTime->last_day_of_month(
200
  year      => $prev_month->year(),
201
  month     => $prev_month->month(),
202
  time_zone => $prev_month->time_zone(),
203
)->day();
204
my $end = int $today->clone()->set_day(1)->strftime('%s');
205
 
206
dmsg( $start, " to ", $end ) if DEBUG;
207
 
208
chdir("$news$group")
209
  or die __x(
210
  "Can't cd to {newsgroup}: {error}\n",
211
  newsgroup => "$news$group",
212
  error     => $!
213
  );
214
opendir( DIR, "." )
215
  or die __x(
216
  "Can't open {newsgroup}: {error}\n",
217
  newsgroup => "$news$group",
218
  error     => $!
219
  );
220
 
221
while ( defined( my $filename = readdir(DIR) ) )
7 PointedEar 222
{
22 PointedEar 223
  next unless -f $filename;    # only want real files
224
  next if ( $filename eq ".overview" );    # real articles only
225
 
226
  get_article($filename);                  # read in the article
5 PointedEar 227
}
22 PointedEar 228
closedir(DIR);                             # finished with the directory
7 PointedEar 229
 
22 PointedEar 230
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
231
 
13 PointedEar 232
## Post-processing
22 PointedEar 233
count_agents();                            # count agents, collapsing versions
234
fix_percent();
5 PointedEar 235
 
22 PointedEar 236
write_data();
237
display_results();
5 PointedEar 238
 
13 PointedEar 239
########################################
22 PointedEar 240
## Get current article's header and body
13 PointedEar 241
########################################
22 PointedEar 242
sub get_article
7 PointedEar 243
{
22 PointedEar 244
  my $filename = shift;
5 PointedEar 245
 
22 PointedEar 246
  open( my $FILE, '<', $filename )
247
    or
248
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
249
  my $msg       = Mail::Message->read($FILE);
250
  my $timestamp = $msg->timestamp();
251
  my $date      = $msg->study('Date');
5 PointedEar 252
 
22 PointedEar 253
  ## Disregard article if timestamp is not in range
254
  dmsg($timestamp) if DEBUG;
31 PointedEar 255
  if ( $timestamp < $start || $timestamp >= $end )
7 PointedEar 256
  {
22 PointedEar 257
    dmsg("Posting on $date ignored.") if DEBUG;
258
    return;
7 PointedEar 259
  }
5 PointedEar 260
 
22 PointedEar 261
  $totalposts++;    # bump count of articles considered
5 PointedEar 262
 
22 PointedEar 263
  ## DEBUG
264
  dmsg($date) if DEBUG;
13 PointedEar 265
 
22 PointedEar 266
  ## get stats about the file itself
267
  my $filesize = -s $filename;    # get total size of file
268
  $totsize += $filesize;          # bump total sizes of all files
5 PointedEar 269
 
31 PointedEar 270
  if ( ( not defined $earliest ) || $timestamp < $earliest )
7 PointedEar 271
  {
22 PointedEar 272
    $earliest = $timestamp;
7 PointedEar 273
  }
31 PointedEar 274
  elsif ( ( not defined $latest ) || $timestamp > $latest )
7 PointedEar 275
  {
22 PointedEar 276
    $latest = $timestamp;
7 PointedEar 277
  }
13 PointedEar 278
 
22 PointedEar 279
  #print "timestamp: $timestamp\n";
5 PointedEar 280
 
22 PointedEar 281
  ## count header size
282
  $totheader += $msg->head()->size();
5 PointedEar 283
 
22 PointedEar 284
  ## get the poster's name (MIME-decoded, in UTF-8)
285
  my $poster = $msg->study('From');
286
  if ( defined $poster )
7 PointedEar 287
  {
22 PointedEar 288
    ## Convert old to new format
289
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
5 PointedEar 290
 
22 PointedEar 291
    ## Collapse whitespace
292
    $poster =~ s/\s+/ /g;
5 PointedEar 293
 
22 PointedEar 294
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
295
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5 PointedEar 296
 
22 PointedEar 297
    ## DEBUG
298
    dmsg($poster) if DEBUG;
7 PointedEar 299
 
22 PointedEar 300
    ## seen this one before?
301
    if ( !defined( $data{$poster} ) )
302
    {
303
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
304
      $data{$poster}{'orig'}   = 0;
305
      $data{$poster}{'quoted'} = 0;
306
    }
307
    $data{$poster}{'count'}++;                    # bump count for this poster
308
    $data{$poster}{'size'} += $filesize;          # total size of file
5 PointedEar 309
 
22 PointedEar 310
    ## The User-Agent and/or X-Newsreader fields
311
    ## for User-Agent by poster
31 PointedEar 312
    my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
22 PointedEar 313
    if ( defined $ua )
7 PointedEar 314
    {
22 PointedEar 315
      $data{$poster}{'agent'} = $ua;
316
 
317
      ## DEBUG
318
      dmsg($ua) if DEBUG;
7 PointedEar 319
    }
5 PointedEar 320
 
48 PointedEar 321
    ## The User Agent for User-Agent by number of articles
22 PointedEar 322
    get_agent($msg);
5 PointedEar 323
 
22 PointedEar 324
    ## Get all cross-posted newsgroups
325
    for ( split( /,/, $msg->study('Newsgroups') ) )
326
    {
327
      $crossposts{$_}++;    # bump count for each
328
    }
5 PointedEar 329
 
22 PointedEar 330
    ## Get threads
331
    my $thread = $msg->study('Subject');
48 PointedEar 332
    $thread =~ s/^\s*re:\s*//i;              # Remove Re: or re: at the start
33 PointedEar 333
    $thread =~ s/\s*\(was:\s*.*\)\s*$//i;    # Remove (was: ...) at the end
48 PointedEar 334
    $thread =~ s/\s+/ /g;                    # collapse whitespace
335
    $threads{$thread}{'count'}++;            # bump count of this subject
336
    $threads{$thread}{'size'} += $filesize;  # bump bytes for this thread
5 PointedEar 337
 
22 PointedEar 338
    ## Is this an original post or a reply?
339
    if ( defined $msg->study('References') )
7 PointedEar 340
    {
22 PointedEar 341
      $replies++;
7 PointedEar 342
    }
22 PointedEar 343
    else
7 PointedEar 344
    {
22 PointedEar 345
      $origposts++;
7 PointedEar 346
    }
22 PointedEar 347
 
348
    ## Get the time zone
349
    my $datetime = $datetime_parser->parse_datetime($date);
350
    my $tz       = $datetime->strftime('%z');
351
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
352
    $tz{$tz}++;
353
 
354
    ## DEBUG
355
    dmsg($tz) if DEBUG;
356
 
357
#### Now analyse the body text ####
358
    my $body = $msg->body();
359
 
360
    my $insig = 0;
361
    my @body  = $body->lines;
362
    for (@body)
7 PointedEar 363
    {
22 PointedEar 364
      $totbody += length($_);    # bump total body size
365
      next if (m{^$>}o);         # don't count blank lines in body
366
      if ( $insig == 1 )
367
      {
368
 
369
        # bump total sig size
370
        $totsig += length($_);
371
      }
372
      ## are we in a quote line?
373
      ## Bill Unruh uses ] quotes, and another poster uses ::
31 PointedEar 374
      elsif ( m{^\s*[>\]]}o || m{^\s*::}o )
22 PointedEar 375
      {
376
        ## bump count of quoted chrs
377
        $data{$poster}{'quoted'} += length($_);
378
        $totquoted += length($_);
379
      }
380
      elsif (/^-- $/)
381
      {
382
        $insig = 1;
383
      }
384
      else
385
      {
386
        ## We must be processing an original line
387
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
388
        $totorig += length($_);
389
      }
7 PointedEar 390
    }
22 PointedEar 391
 
392
    # end for (@body)
393
  }
394
 
395
  close($FILE);
396
}
397
 
398
sub get_agent
399
{
400
  my $msg = shift;
401
 
31 PointedEar 402
  my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
403
    // $msg->study('X-Mailer');
404
 
22 PointedEar 405
  if ( not defined $ua )
406
  {
407
    my $org = $msg->study('Organization');
408
    if ( defined $org
409
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
7 PointedEar 410
    {
22 PointedEar 411
      $ua = $org;
7 PointedEar 412
    }
22 PointedEar 413
    elsif ( $msg->study('Message-ID') =~ /pine/i )
7 PointedEar 414
    {
22 PointedEar 415
      $ua = "Pine";
416
    }
7 PointedEar 417
  }
5 PointedEar 418
 
22 PointedEar 419
  ## Hopefully found UA, else set to unknown
420
  if ( not defined $ua )
421
  {
422
    $ua = __ "unknown";
423
  }
5 PointedEar 424
 
22 PointedEar 425
  $ua = clean($ua);
426
 
427
  my $raw   = $ua;
428
  my $agent = $raw;
429
 
430
  ## strip http
431
  if ( $raw =~ /.*http.*/ )
7 PointedEar 432
  {
22 PointedEar 433
    $raw =~ s!posted via!!i;
434
    $raw =~ s!http://!!g;
435
    $raw =~ s!/!!g;
436
    $raw =~ s! !!g;
437
  }
5 PointedEar 438
 
22 PointedEar 439
  ## Fix Outlook from Mac
440
  if ( $raw =~ /^microsoft/i )
441
  {
442
    $raw =~ s/-/ /g;
443
  }
5 PointedEar 444
 
22 PointedEar 445
  ## Pick out the popular agents
446
  if ( $raw =~ /(outlook express)/i
447
    || $raw =~ /(windows mail)/i
448
    || $raw =~ /(microplanet gravity)/i
449
    || $raw =~ /(news rover)/i
450
    || $raw =~ /(forte agent)/i
451
    || $raw =~ /(forte free agent)/i )
452
  {
453
    $agent = $1;
454
  }
455
  elsif (
456
    $raw =~ /^(
5 PointedEar 457
        pan
458
       |sylpheed
459
       |slrn
460
       |mozilla
461
       |knode
462
       |tin
463
       |hamster
464
       |xrn
465
       |xnews
466
       |aol
467
       |gnus
468
       |krn
469
       |macsoup
470
       |messenger
471
       |openxp
472
       |pine
473
       |thoth
474
       |turnpike
475
       |winvn
476
       |vsoup
477
       |google
478
       |supernews
479
       |nn
480
       |rn
481
       |007
482
       |webtv
483
       |compuserve
7 PointedEar 484
       )/ix
22 PointedEar 485
    )
486
  {
487
    $agent = $1;
488
  }
489
  else
490
  {
491
    ## Clean up unknown agents
492
    if ( $raw =~ m!^(.*?)/! )
7 PointedEar 493
    {
494
      $agent = $1;
495
    }
22 PointedEar 496
    elsif ( $raw =~ /^(\w*)\d.*/ )
7 PointedEar 497
    {
22 PointedEar 498
      $agent = $1;
499
    }
500
  }
501
 
502
  $distinct_agent{$agent}++;
503
  return $agent;
504
}
505
## get_agent
506
 
507
#########################################
508
## Count the User-Agents used, collapsing
509
## different versions into one per agent.
510
#########################################
511
sub count_agents
512
{
513
POSTER:
514
  foreach my $poster ( keys %data )
515
  {
516
    foreach my $agent_name ( keys %distinct_agent )
517
    {    # check against known ones
518
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7 PointedEar 519
      {
22 PointedEar 520
        $agents{$agent_name}++;
521
        next POSTER;
7 PointedEar 522
      }
523
    }
22 PointedEar 524
    $agents{ $data{$poster}{'agent'} }++;
525
  }
526
}    # count_agents
7 PointedEar 527
 
22 PointedEar 528
#############################################
529
## Set orig/total percentages for all posters
530
#############################################
531
sub fix_percent
532
{
533
  foreach my $poster ( keys %data )
534
  {
535
    my $percent = 100;
536
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
537
    {
538
      $percent =
539
        $data{$poster}{'orig'} * 100 /
540
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
541
    }
542
    elsif ( $data{$poster}{'orig'} == 0 )
543
    {
544
      $percent = 0;
545
    }
546
    $data{$poster}{'percent'} = $percent;
5 PointedEar 547
  }
22 PointedEar 548
}
549
## fix_percent
5 PointedEar 550
 
22 PointedEar 551
##################################
552
## Write data structures to a file
553
##################################
554
sub write_data
555
{
556
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
557
    or die __x( "Can't create XDATA: {error}\n", error => $! );
558
  print $OUTF "Data collected from $newsgroup_name\n\n";
559
  print $OUTF
560
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
561
  foreach my $name ( keys %data )
7 PointedEar 562
  {
22 PointedEar 563
    print $OUTF
564
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7 PointedEar 565
  }
22 PointedEar 566
  print $OUTF
567
"============================================================================\n";
568
  print $OUTF "Thread subjects\n";
569
  print $OUTF
570
"----------------------------------------------------------------------------\n";
571
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
572
  {
573
    print $OUTF
574
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
575
  }
576
  print $OUTF
577
"============================================================================\n";
578
  print $OUTF "Cross-posts\n";
579
  print $OUTF
580
"----------------------------------------------------------------------------\n";
581
  foreach my $name ( sort keys %crossposts )
582
  {
583
    print $OUTF "$name : $crossposts{$name}\n";
584
  }
585
  print $OUTF
586
"============================================================================\n";
587
  print $OUTF "User agents\n";
588
  print $OUTF
589
"----------------------------------------------------------------------------\n";
590
  foreach my $name ( sort keys %agents )
591
  {
592
    print $OUTF "$name : $agents{$name}\n";
593
  }
594
  print $OUTF
595
"============================================================================\n";
596
  print $OUTF "Time zones\n";
597
  print $OUTF
598
"----------------------------------------------------------------------------\n";
599
  foreach my $name ( sort keys %tz )
600
  {
601
    print $OUTF "$name : $tz{$name}\n";
602
  }
603
  close $OUTF;
604
}    # write_data
5 PointedEar 605
 
22 PointedEar 606
sub display_results
607
{
608
  #################### DISPLAY RESULTS #####################
45 PointedEar 609
  print "=" x 76, "\n";
22 PointedEar 610
  printf "%s\n",
611
    centred(
48 PointedEar 612
    __x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ),
613
    76
614
    );
45 PointedEar 615
  print "=" x 76, "\n";
22 PointedEar 616
  printf "%s\n",
617
    centred(
23 PointedEar 618
    __(
619
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
620
    ),
22 PointedEar 621
    76
622
    );
49 PointedEar 623
  print "\n";
624
  printf __"Total articles considered:   %s over %d days\n",
23 PointedEar 625
    $formatter->format_number($totalposts),
626
    $formatter->format_number($numdays);
627
  my $time_locale       = setlocale(LC_TIME);
22 PointedEar 628
  my $earliest_datetime = DateTime->from_epoch(
23 PointedEar 629
    epoch     => $earliest,
630
    locale    => $time_locale,
22 PointedEar 631
    time_zone => 'UTC',
632
  );
633
  my $latest_datetime = DateTime->from_epoch(
23 PointedEar 634
    epoch     => $latest,
635
    locale    => $time_locale,
22 PointedEar 636
    time_zone => 'UTC',
637
  );
638
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
49 PointedEar 639
  printf __"Earliest article:            %s" . "\n",
23 PointedEar 640
    $earliest_datetime->strftime($datetime_format);
49 PointedEar 641
  printf __"Latest article:              %s" . "\n",
23 PointedEar 642
    $latest_datetime->strftime($datetime_format);
49 PointedEar 643
  printf __"Original articles:           %s; replies: %s" . "\n",
23 PointedEar 644
    $formatter->format_number($origposts),
645
    $formatter->format_number($replies);
49 PointedEar 646
  printf __"Total size of articles:      %s bytes (%s)" . "\n",
23 PointedEar 647
    $formatter->format_number($totsize),
648
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
49 PointedEar 649
  printf __"Average %s articles per day, %s per day, %s bytes per article.\n",
23 PointedEar 650
    $formatter->format_number( int( $totalposts / $numdays ) ),
651
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
652
    $formatter->format_number( int( $totsize / $totalposts ) );
653
 
22 PointedEar 654
  my $count = keys %data;
49 PointedEar 655
  print "\n";
656
  printf __"Total headers:      %s; bodies: %s\n",
23 PointedEar 657
    $formatter->format_bytes(
658
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
659
    ),
660
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 661
  printf __
23 PointedEar 662
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
663
    $formatter->format_bytes(
664
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
665
    ),
666
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
49 PointedEar 667
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) )
668
    ,
23 PointedEar 669
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
49 PointedEar 670
  print "\n";
671
  printf __"Total number of posters:     %s, average %s per poster\n",
23 PointedEar 672
    $formatter->format_number($count),
673
    $formatter->format_bytes( $totsize / $count,
674
    ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 675
  $count = keys %threads;
49 PointedEar 676
  printf __"Total number of threads:     %s, average %s per thread\n",
23 PointedEar 677
    $formatter->format_number($count),
678
    $formatter->format_bytes( $totsize / $count,
679
    ( 'precision' => 1, 'mode' => 'iec' ) );
680
  printf __"Total number of user agents: %d\n",
681
    $formatter->format_number( scalar keys %agents );
31 PointedEar 682
  print "\n", "=" x 76, "\n";
22 PointedEar 683
  ########################################
684
  ## Show posters by article count  Sec 1;
685
  ########################################
686
  unless ( $skipSec{1} )
7 PointedEar 687
  {
22 PointedEar 688
    if ( keys %data < $topposters )
689
    {
690
      $count = keys %data;
691
    }
692
    else
693
    {
694
      $count = $topposters;
695
    }
696
    printf "%s\n",
697
      centred(
23 PointedEar 698
      __x( "Top {count} posters by number of articles", count => $topposters ),
699
      76
700
      );
22 PointedEar 701
    print "=" x 76, "\n";
702
    my $i = 0;
703
    foreach
704
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
705
    {
706
      my $name = substr( $poster, 0, 65 );
25 PointedEar 707
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 708
        $data{$poster}{count};
709
      last if ( ++$i == $count );
710
    }
711
    print "\n", "=" x 76, "\n";
7 PointedEar 712
  }
22 PointedEar 713
 
714
  ######################################
715
  ## Show posters by size in KiB  Sec 2;
716
  ######################################
717
  unless ( $skipSec{2} )
7 PointedEar 718
  {
22 PointedEar 719
    if ( keys %data < $topposters )
720
    {
721
      $count = keys %data;
722
    }
723
    else
724
    {
725
      $count = $topposters;
726
    }
727
    printf "%s\n",
728
      centred(
729
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
23 PointedEar 730
      76
731
      );
22 PointedEar 732
    print "=" x 76, "\n";
733
    my $i = 0;
734
    foreach
735
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
736
    {
737
      my $name = substr( $poster, 0, 62 );
25 PointedEar 738
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 739
        $data{$poster}{size} / 1024;    #/
740
      last if ( ++$i == $count );
741
    }
742
    print "\n", "=" x 76, "\n";
7 PointedEar 743
  }
5 PointedEar 744
 
22 PointedEar 745
  #####################################
746
  ## Show top posters for original text
747
  #####################################
31 PointedEar 748
  my $topposters_real = 0;
749
 
22 PointedEar 750
  unless ( $skipSec{3} )
7 PointedEar 751
  {
22 PointedEar 752
    if ( keys %data < $topposters )
753
    {
754
      $count = keys %data;
755
    }
756
    else
757
    {
758
      $count = $topposters;
759
    }
31 PointedEar 760
 
22 PointedEar 761
    printf "%s\n",
762
      centred(
763
      __x(
48 PointedEar 764
        "Top {count} responders by original text (> 5 articles)",
22 PointedEar 765
        count => $topposters
766
      ),
767
      76
768
      );
769
    print "=" x 76, "\n";
770
    foreach my $poster (
771
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
772
      keys %data
773
      )
774
    {
775
      next if $data{$poster}{quoted} == 0;
776
      next if $data{$poster}{count} < 5;
777
      my $name = substr( $poster, 0, 63 );
31 PointedEar 778
      printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
779
        rpad( $poster, 63, "." ),
22 PointedEar 780
        $data{$poster}{percent};
31 PointedEar 781
      last if ( ++$topposters_real == $count );
22 PointedEar 782
    }
783
    print "\n", "=" x 76, "\n";
7 PointedEar 784
  }
5 PointedEar 785
 
22 PointedEar 786
  ########################################
787
  ## Show bottom posters for original text
788
  ########################################
31 PointedEar 789
 
790
  $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
791
 
22 PointedEar 792
  unless ( $skipSec{4} )
7 PointedEar 793
  {
22 PointedEar 794
    if ( keys %data < $topposters )
7 PointedEar 795
    {
22 PointedEar 796
      $count = keys %data;
797
    }
798
    else
799
    {
800
      $count = $topposters;
801
    }
31 PointedEar 802
 
22 PointedEar 803
    printf "%s\n",
804
      centred(
805
      __x(
48 PointedEar 806
        "Bottom {count} responders by original text  (> 5 articles)",
22 PointedEar 807
        count => $topposters
808
      ),
809
      76
810
      );
811
    print "=" x 76, "\n";
812
    my $i = 0;
813
    foreach my $poster (
814
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
815
      keys %data
816
      )
817
    {
818
      next if $data{$poster}{quoted} == 0;
819
      next if $data{$poster}{count} < 5;
820
      my $name = substr( $poster, 0, 63 );
25 PointedEar 821
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 822
        $data{$poster}{percent};
823
      last if ( ++$i == $count );
824
    }
825
    print "\n", "=" x 76, "\n";
826
  }
5 PointedEar 827
 
22 PointedEar 828
  #####################################
829
  ## Show threads by number of articles
830
  #####################################
831
  unless ( $skipSec{5} )
832
  {
833
    if ( keys %threads < $topthreads )
834
    {
835
      $count = keys %threads;
7 PointedEar 836
    }
22 PointedEar 837
    else
838
    {
839
      $count = $topthreads;
7 PointedEar 840
    }
22 PointedEar 841
    printf "%s\n",
23 PointedEar 842
      centred(
843
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
22 PointedEar 844
      76 );
845
    print "=" x 76, "\n";
846
    my $i = 0;
847
    foreach my $thread (
848
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
849
      keys %threads
850
      )
7 PointedEar 851
    {
22 PointedEar 852
      my $name = substr( $thread, 0, 65 );
25 PointedEar 853
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 854
        $threads{$thread}{'count'};
855
      last if ( ++$i == $count );
7 PointedEar 856
    }
22 PointedEar 857
    print "\n", "=" x 76, "\n";
858
  }
859
 
860
  ##############################
861
  ## Show threads by size in KiB
862
  ##############################
863
  unless ( $skipSec{6} )
864
  {
865
    if ( keys %threads < $topthreads )
866
    {
867
      $count = keys %threads;
868
    }
7 PointedEar 869
    else
870
    {
22 PointedEar 871
      $count = $topthreads;
872
    }
873
    printf "%s\n",
23 PointedEar 874
      centred(
875
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22 PointedEar 876
    print "=" x 76, "\n";
877
    my $i = 0;
878
    foreach my $thread (
879
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
880
      keys %threads
881
      )
882
    {
883
      my $name = substr( $thread, 0, 65 );
25 PointedEar 884
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 885
        $threads{$thread}{'size'} / 1024;    #/
886
      last if ( ++$i == $count );
887
    }
888
    print "\n", "=" x 76, "\n";
889
  }
5 PointedEar 890
 
22 PointedEar 891
  ##################################
892
  ## Show top 10 cross-posted groups
893
  ##################################
894
  unless ( $skipSec{7} )
895
  {
896
    delete $crossposts{"$newsgroup_name"};    # don't include ours
897
    if ( keys %crossposts < $topcrossposts )
898
    {
899
      $count = keys %crossposts;
7 PointedEar 900
    }
22 PointedEar 901
    else
902
    {
903
      $count = $topcrossposts;
904
    }
905
    printf "%s\n",
23 PointedEar 906
      centred(
907
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22 PointedEar 908
    print "=" x 76, "\n";
909
    my $i = 0;
910
    foreach
911
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
912
    {
25 PointedEar 913
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 914
        $crossposts{$name};
915
      last if ( ++$i == $count );
916
    }
917
    print "\n", "=" x 76, "\n";
918
  }
5 PointedEar 919
 
22 PointedEar 920
  #########################
921
  ## Show agents and counts
922
  #########################
923
  unless ( $skipSec{8} )
924
  {
925
    if ( keys %agents < $topagents )
926
    {
927
      $count = keys %agents;
928
    }
929
    else
930
    {
931
      $count = $topagents;
932
    }
933
    printf "%s\n",
934
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
935
      76 );
936
    print "=" x 76, "\n";
937
    my $i = 0;
938
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
939
    {
25 PointedEar 940
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22 PointedEar 941
        $agents{$agent};
942
      last if ( ++$i == $count );
943
    }
944
    print "\n", "=" x 76, "\n";
945
  }
5 PointedEar 946
 
22 PointedEar 947
  #######################
948
  ## Show distinct agents
949
  #######################
950
  unless ( $skipSec{9} )
7 PointedEar 951
  {
22 PointedEar 952
    if ( keys %distinct_agent < $topagents )
953
    {
954
      $count = keys %distinct_agent;
7 PointedEar 955
    }
22 PointedEar 956
    else
957
    {
958
      $count = $topagents;
959
    }
960
    printf "%s\n",
961
      centred(
48 PointedEar 962
      __x(
963
        "Top {count} user agents by number of articles",
964
        count => $topagents
965
      ),
23 PointedEar 966
      76
967
      );
22 PointedEar 968
    print "=" x 76, "\n";
969
    my $i = 0;
970
    foreach my $agent (
971
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
972
      keys %distinct_agent
973
      )
974
    {
25 PointedEar 975
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22 PointedEar 976
        $distinct_agent{$agent},
977
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
978
      last if ( ++$i == $count );
979
    }
980
    print "\n", "=" x 76, "\n";
7 PointedEar 981
  }
5 PointedEar 982
 
22 PointedEar 983
  ############################
984
  ## Show timezones and counts
985
  ############################
986
  unless ( $skipSec{10} )
7 PointedEar 987
  {
22 PointedEar 988
    if ( keys %tz < $toptz )
7 PointedEar 989
    {
22 PointedEar 990
      $count = keys %tz;
7 PointedEar 991
    }
22 PointedEar 992
    else
7 PointedEar 993
    {
22 PointedEar 994
      $count = $toptz;
7 PointedEar 995
    }
23 PointedEar 996
    printf "%s\n",
48 PointedEar 997
      centred(
998
      __x( "Top {count} time zones by number of articles", count => $toptz ),
999
      76 );
22 PointedEar 1000
    print "=" x 76, "\n";
1001
    my $i = 0;
1002
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
1003
    {
25 PointedEar 1004
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22 PointedEar 1005
      last if ( ++$i == $count );
1006
    }
1007
    print "\n", "=" x 76, "\n";
7 PointedEar 1008
  }
5 PointedEar 1009
}
1010
 
22 PointedEar 1011
## helper subs
1012
 
13 PointedEar 1013
###############################
1014
## Right pad a string with '.'s
1015
###############################
7 PointedEar 1016
sub rpad
1017
{
13 PointedEar 1018
  ## Get text to pad, length to pad, pad chr
7 PointedEar 1019
  my ( $text, $pad_len, $pad_chr ) = @_;
11 PointedEar 1020
 
13 PointedEar 1021
  ## DEBUG
22 PointedEar 1022
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
11 PointedEar 1023
 
7 PointedEar 1024
  if ( length($text) > $pad_len )
1025
  {
1026
    $text = substr( $text, 0, $pad_len );
1027
  }
1028
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1029
  return $padded;
5 PointedEar 1030
}
1031
 
13 PointedEar 1032
##################
1033
## Centre a string
1034
##################
7 PointedEar 1035
sub centred
1036
{
1037
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1038
  my $pad_len = ( $width - length($text) ) / 2;    #/
1039
  my $centred = " " x $pad_len . $text;
1040
  return $centred;
5 PointedEar 1041
}
1042
 
13 PointedEar 1043
###########################
1044
## Put commas into a number
1045
###########################
7 PointedEar 1046
sub commify
1047
{
13 PointedEar 1048
  local $_ = shift;
23 PointedEar 1049
  my $number = $_;
1050
  $_ = int;                                        # Chop non-integer part
22 PointedEar 1051
  1 while
23 PointedEar 1052
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1053
  my $int_part  = $_;
1054
  my $real_part = '';
1055
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1056
  {
1057
    $real_part = $1;
1058
  }
1059
  return $int_part . $real_part;
5 PointedEar 1060
}
1061
 
13 PointedEar 1062
################################################################
1063
## Returns a string with leading and trailing whitespace removed
1064
################################################################
7 PointedEar 1065
sub clean
1066
{
1067
  my $dirty = shift;
1068
  my $clean = $dirty;
14 PointedEar 1069
  $clean =~ s/^\s+|\s+$//g;
5 PointedEar 1070
 
7 PointedEar 1071
  return $clean;
5 PointedEar 1072
}
1073
 
7 PointedEar 1074
sub usage
1075
{
45 PointedEar 1076
  print __ "usage: newsstat.pl NEWS.GROUP", "\n";
7 PointedEar 1077
  exit 1;
5 PointedEar 1078
}
1079
 
22 PointedEar 1080
sub dmsg
7 PointedEar 1081
{
22 PointedEar 1082
  print STDERR @_, "\n";
1083
}
1084
 
1085
sub dmsg2
1086
{
1087
  my ( $level, @msg ) = @_;
1088
  print STDERR @msg, "\n" if $level >= DEBUG;
48 PointedEar 1089
}