Subversion Repositories LCARS

Rev

Rev 31 | Rev 45 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 PointedEar 1
#!/usr/bin/env perl
5 PointedEar 2
use strict;
6 PointedEar 3
use warnings;
23 PointedEar 4
require 5.004;
22 PointedEar 5
 
6
#use diagnostics;
6 PointedEar 7
use utf8;
8
 
30 PointedEar 9
## NOTE:
10
## Enable and remove binmode when utf8::all has actually become lexically scoped
11
# use utf8:all;
12
 
22 PointedEar 13
use constant DEBUG => 0;
14
 
24 PointedEar 15
## newsstat.pl
16
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
17
## Based on work by Garry Knight et al.
31 PointedEar 18
##
24 PointedEar 19
## This program is free software: you can redistribute it and/or modify
20
## it under the terms of the GNU General Public License as published by
21
## the Free Software Foundation, either version 3 of the License, or
22
## (at your option) any later version.
31 PointedEar 23
##
24 PointedEar 24
## This program is distributed in the hope that it will be useful,
25
## but WITHOUT ANY WARRANTY; without even the implied warranty of
26
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
## GNU General Public License for more details.
31 PointedEar 28
##
24 PointedEar 29
## You should have received a copy of the GNU General Public License
30
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
31
 
13 PointedEar 32
## Print out all text to STDOUT UTF-8 encoded
33
binmode STDOUT, ':encoding(UTF-8)';
22 PointedEar 34
binmode STDERR, ':encoding(UTF-8)';
5 PointedEar 35
 
23 PointedEar 36
## L10n
37
use locale ':not_characters';
38
 
39
# setlocale( LC_MESSAGES, '' );
40
require Number::Format;
41
 
42
## i18n
43
## FIXME: Automatically include resolved '.' in @INC
22 PointedEar 44
# print join "\n", @INC;
5 PointedEar 45
 
22 PointedEar 46
use Locale::TextDomain ('de.pointedears.newsstat');
23 PointedEar 47
use POSIX              ('locale_h');
22 PointedEar 48
use Locale::Messages qw (bind_textdomain_filter
23 PointedEar 49
  bind_textdomain_codeset
50
  turn_utf_8_on);
51
 
52
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
22 PointedEar 53
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23 PointedEar 54
 
22 PointedEar 55
require Mail::Message;
56
require DateTime;
57
require DateTime::Format::Mail;
23 PointedEar 58
 
59
# See comments in previous example
60
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
61
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
62
  'decimal_point' };
63
 
64
# Apply defaults if values are missing
65
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
66
$thousands_sep = ' ' unless $thousands_sep;
67
 
68
# grouping and mon_grouping are packed lists
69
# of small integers (characters) telling the
70
# grouping (thousand_seps and mon_thousand_seps
71
# being the group dividers) of numbers and
72
# monetary quantities.  The integers' meanings:
73
# 255 means no more grouping, 0 means repeat
74
# the previous grouping, 1-254 means use that
75
# as the current grouping.  Grouping goes from
76
# right to left (low to high digits).  In the
77
# below we cheat slightly by never using anything
78
# else than the first grouping (whatever that is).
79
my @grouping;
80
if ($grouping)
81
{
82
  @grouping = unpack( "C*", $grouping );
83
}
84
else
85
{
86
  @grouping = (3);
87
}
88
 
89
## FIXME: Why don't the defaults work already?
90
my $formatter = new Number::Format(
91
  -decimal_point => $decimal_point,
92
  -thousands_sep => $thousands_sep,
93
 
94
  # -grouping      => $grouping[0]
95
);
96
 
5 PointedEar 97
###################### USER CONFIGURATIONS ############################
98
 
13 PointedEar 99
## The name of the group to do stats for
5 PointedEar 100
my $newsgroup_name = $ARGV[0];
31 PointedEar 101
$newsgroup_name // usage();
5 PointedEar 102
 
13 PointedEar 103
## Check for removal flags
5 PointedEar 104
my $ix;
105
my $j;
106
my %skipSec;
107
my @skiplist;
108
my $args = @ARGV;
7 PointedEar 109
for ( $ix = 1 ; $ix < $args ; $ix++ )
110
{
111
  $j = $ix + 1;
112
  if ( $ARGV[$ix] eq "-x" )
113
  {
114
    @skiplist = split( ",", $ARGV[$j] );
115
  }
116
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
117
  {
118
    @skiplist = split( ",", $1 );
119
  }
5 PointedEar 120
}
7 PointedEar 121
foreach (@skiplist)
122
{
5 PointedEar 123
  $skipSec{$_} = 1;
124
}
125
 
13 PointedEar 126
## Leafnode users will want /var/spool/news for this variable.
5 PointedEar 127
my $news = "/var/spool/news/";
128
 
22 PointedEar 129
## Number of top or bottom posters to show
130
my $topposters = 20;
5 PointedEar 131
 
13 PointedEar 132
## Number of threads we want to know about
5 PointedEar 133
my $topthreads = 20;
134
 
13 PointedEar 135
## Number of cross-posted threads to show
5 PointedEar 136
my $topcrossposts = 10;
137
 
22 PointedEar 138
## Number of agents we list
139
my $topagents = 10;
140
 
13 PointedEar 141
## Number of time zones to show
5 PointedEar 142
my $toptz = 10;
143
 
144
###################### DATA STRUCTURES ######################
145
my $group = $newsgroup_name;
146
$group =~ s!\.!/!g;
22 PointedEar 147
my %data;    # name, count, agent, total, orig, quoted
148
my $totsize = 0;    # holds total sizes of all files
149
my %crossposts;     # group, count
150
my %threads;        # subject, count
151
my $replies   = 0;  # total no. of replies
152
my $origposts = 0;  # total no. of original posts
153
my %tz;             # timezones by count
154
my $earliest;       # earliest article we have found
155
my $latest;         # latest article we have found
156
my $totheader = 0;  # total size of header material
157
my $totbody   = 0;  # total size of body material
158
my $totsig    = 0;  # total size of sig material
159
my $totquoted = 0;  # total size of quoted material
160
my $totorig   = 0;  # total size of original material
161
my $totalposts;     # total no. of posts considered
5 PointedEar 162
my %distinct_agent;
13 PointedEar 163
 
164
## Used to hold counts of User Agents used
165
my %agents = (
166
  "Compuserver"               => 0,
167
  "Foorum"                    => 0,
7 PointedEar 168
  "Forte Agent"               => 0,
169
  "Forte Free Agent"          => 0,
13 PointedEar 170
  "Gnus"                      => 0,
171
  "KNode"                     => 0,
172
  "MacSOUP"                   => 0,
173
  "MT-NewsWatcher"            => 0,
7 PointedEar 174
  "MicroPlanet Gravity"       => 0,
175
  "Microsoft Outlook Express" => 0,
13 PointedEar 176
  "Microsoft Windows Mail"    => 0,
177
  "Mozilla"                   => 0,
178
  "News Rover"                => 0,
179
  "NN"                        => 0,
180
  "Pan"                       => 0,
181
  "rn"                        => 0,
7 PointedEar 182
  "slrn"                      => 0,
13 PointedEar 183
  "Sylpheed"                  => 0,
7 PointedEar 184
  "tin"                       => 0,
13 PointedEar 185
  "VSoup"                     => 0,
7 PointedEar 186
  "WebTV"                     => 0,
22 PointedEar 187
  "Xnews"                     => 0,
13 PointedEar 188
);
5 PointedEar 189
 
22 PointedEar 190
my $datetime_parser = DateTime::Format::Mail->new();
191
$datetime_parser->loose();
5 PointedEar 192
 
22 PointedEar 193
my $today = DateTime->today( time_zone => 'UTC' );
194
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
195
my $start      = int $prev_month->strftime('%s');
196
my $numdays    = int DateTime->last_day_of_month(
197
  year      => $prev_month->year(),
198
  month     => $prev_month->month(),
199
  time_zone => $prev_month->time_zone(),
200
)->day();
201
my $end = int $today->clone()->set_day(1)->strftime('%s');
202
 
203
dmsg( $start, " to ", $end ) if DEBUG;
204
 
205
chdir("$news$group")
206
  or die __x(
207
  "Can't cd to {newsgroup}: {error}\n",
208
  newsgroup => "$news$group",
209
  error     => $!
210
  );
211
opendir( DIR, "." )
212
  or die __x(
213
  "Can't open {newsgroup}: {error}\n",
214
  newsgroup => "$news$group",
215
  error     => $!
216
  );
217
 
218
while ( defined( my $filename = readdir(DIR) ) )
7 PointedEar 219
{
22 PointedEar 220
  next unless -f $filename;    # only want real files
221
  next if ( $filename eq ".overview" );    # real articles only
222
 
223
  get_article($filename);                  # read in the article
5 PointedEar 224
}
22 PointedEar 225
closedir(DIR);                             # finished with the directory
7 PointedEar 226
 
22 PointedEar 227
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
228
 
13 PointedEar 229
## Post-processing
22 PointedEar 230
count_agents();                            # count agents, collapsing versions
231
fix_percent();
5 PointedEar 232
 
22 PointedEar 233
write_data();
234
display_results();
5 PointedEar 235
 
13 PointedEar 236
########################################
22 PointedEar 237
## Get current article's header and body
13 PointedEar 238
########################################
22 PointedEar 239
sub get_article
7 PointedEar 240
{
22 PointedEar 241
  my $filename = shift;
5 PointedEar 242
 
22 PointedEar 243
  open( my $FILE, '<', $filename )
244
    or
245
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
246
  my $msg       = Mail::Message->read($FILE);
247
  my $timestamp = $msg->timestamp();
248
  my $date      = $msg->study('Date');
5 PointedEar 249
 
22 PointedEar 250
  ## Disregard article if timestamp is not in range
251
  dmsg($timestamp) if DEBUG;
31 PointedEar 252
  if ( $timestamp < $start || $timestamp >= $end )
7 PointedEar 253
  {
22 PointedEar 254
    dmsg("Posting on $date ignored.") if DEBUG;
255
    return;
7 PointedEar 256
  }
5 PointedEar 257
 
22 PointedEar 258
  $totalposts++;    # bump count of articles considered
5 PointedEar 259
 
22 PointedEar 260
  ## DEBUG
261
  dmsg($date) if DEBUG;
13 PointedEar 262
 
22 PointedEar 263
  ## get stats about the file itself
264
  my $filesize = -s $filename;    # get total size of file
265
  $totsize += $filesize;          # bump total sizes of all files
5 PointedEar 266
 
31 PointedEar 267
  if ( ( not defined $earliest ) || $timestamp < $earliest )
7 PointedEar 268
  {
22 PointedEar 269
    $earliest = $timestamp;
7 PointedEar 270
  }
31 PointedEar 271
  elsif ( ( not defined $latest ) || $timestamp > $latest )
7 PointedEar 272
  {
22 PointedEar 273
    $latest = $timestamp;
7 PointedEar 274
  }
13 PointedEar 275
 
22 PointedEar 276
  #print "timestamp: $timestamp\n";
5 PointedEar 277
 
22 PointedEar 278
  ## count header size
279
  $totheader += $msg->head()->size();
5 PointedEar 280
 
22 PointedEar 281
  ## get the poster's name (MIME-decoded, in UTF-8)
282
  my $poster = $msg->study('From');
283
  if ( defined $poster )
7 PointedEar 284
  {
22 PointedEar 285
    ## Convert old to new format
286
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
5 PointedEar 287
 
22 PointedEar 288
    ## Collapse whitespace
289
    $poster =~ s/\s+/ /g;
5 PointedEar 290
 
22 PointedEar 291
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
292
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5 PointedEar 293
 
22 PointedEar 294
    ## DEBUG
295
    dmsg($poster) if DEBUG;
7 PointedEar 296
 
22 PointedEar 297
    ## seen this one before?
298
    if ( !defined( $data{$poster} ) )
299
    {
300
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
301
      $data{$poster}{'orig'}   = 0;
302
      $data{$poster}{'quoted'} = 0;
303
    }
304
    $data{$poster}{'count'}++;                    # bump count for this poster
305
    $data{$poster}{'size'} += $filesize;          # total size of file
5 PointedEar 306
 
22 PointedEar 307
    ## The User-Agent and/or X-Newsreader fields
308
    ## for User-Agent by poster
31 PointedEar 309
    my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader');
22 PointedEar 310
    if ( defined $ua )
7 PointedEar 311
    {
22 PointedEar 312
      $data{$poster}{'agent'} = $ua;
313
 
314
      ## DEBUG
315
      dmsg($ua) if DEBUG;
7 PointedEar 316
    }
5 PointedEar 317
 
22 PointedEar 318
    ## The User Agent for User-Agent by number of posts
319
    get_agent($msg);
5 PointedEar 320
 
22 PointedEar 321
    ## Get all cross-posted newsgroups
322
    for ( split( /,/, $msg->study('Newsgroups') ) )
323
    {
324
      $crossposts{$_}++;    # bump count for each
325
    }
5 PointedEar 326
 
22 PointedEar 327
    ## Get threads
328
    my $thread = $msg->study('Subject');
33 PointedEar 329
    $thread =~ s/^\s*re:\s*//i;    # Remove Re: or re: at the start
330
    $thread =~ s/\s*\(was:\s*.*\)\s*$//i;    # Remove (was: ...) at the end
22 PointedEar 331
    $thread =~ s/\s+/ /g;       # collapse whitespace
332
    $threads{$thread}{'count'}++;    # bump count of this subject
333
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
5 PointedEar 334
 
22 PointedEar 335
    ## Is this an original post or a reply?
336
    if ( defined $msg->study('References') )
7 PointedEar 337
    {
22 PointedEar 338
      $replies++;
7 PointedEar 339
    }
22 PointedEar 340
    else
7 PointedEar 341
    {
22 PointedEar 342
      $origposts++;
7 PointedEar 343
    }
22 PointedEar 344
 
345
    ## Get the time zone
346
    my $datetime = $datetime_parser->parse_datetime($date);
347
    my $tz       = $datetime->strftime('%z');
348
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
349
    $tz{$tz}++;
350
 
351
    ## DEBUG
352
    dmsg($tz) if DEBUG;
353
 
354
#### Now analyse the body text ####
355
    my $body = $msg->body();
356
 
357
    my $insig = 0;
358
    my @body  = $body->lines;
359
    for (@body)
7 PointedEar 360
    {
22 PointedEar 361
      $totbody += length($_);    # bump total body size
362
      next if (m{^$>}o);         # don't count blank lines in body
363
      if ( $insig == 1 )
364
      {
365
 
366
        # bump total sig size
367
        $totsig += length($_);
368
      }
369
      ## are we in a quote line?
370
      ## Bill Unruh uses ] quotes, and another poster uses ::
31 PointedEar 371
      elsif ( m{^\s*[>\]]}o || m{^\s*::}o )
22 PointedEar 372
      {
373
        ## bump count of quoted chrs
374
        $data{$poster}{'quoted'} += length($_);
375
        $totquoted += length($_);
376
      }
377
      elsif (/^-- $/)
378
      {
379
        $insig = 1;
380
      }
381
      else
382
      {
383
        ## We must be processing an original line
384
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
385
        $totorig += length($_);
386
      }
7 PointedEar 387
    }
22 PointedEar 388
 
389
    # end for (@body)
390
  }
391
 
392
  close($FILE);
393
}
394
 
395
sub get_agent
396
{
397
  my $msg = shift;
398
 
31 PointedEar 399
  my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader')
400
    // $msg->study('X-Mailer');
401
 
22 PointedEar 402
  if ( not defined $ua )
403
  {
404
    my $org = $msg->study('Organization');
405
    if ( defined $org
406
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
7 PointedEar 407
    {
22 PointedEar 408
      $ua = $org;
7 PointedEar 409
    }
22 PointedEar 410
    elsif ( $msg->study('Message-ID') =~ /pine/i )
7 PointedEar 411
    {
22 PointedEar 412
      $ua = "Pine";
413
    }
7 PointedEar 414
  }
5 PointedEar 415
 
22 PointedEar 416
  ## Hopefully found UA, else set to unknown
417
  if ( not defined $ua )
418
  {
419
    $ua = __ "unknown";
420
  }
5 PointedEar 421
 
22 PointedEar 422
  $ua = clean($ua);
423
 
424
  my $raw   = $ua;
425
  my $agent = $raw;
426
 
427
  ## strip http
428
  if ( $raw =~ /.*http.*/ )
7 PointedEar 429
  {
22 PointedEar 430
    $raw =~ s!posted via!!i;
431
    $raw =~ s!http://!!g;
432
    $raw =~ s!/!!g;
433
    $raw =~ s! !!g;
434
  }
5 PointedEar 435
 
22 PointedEar 436
  ## Fix Outlook from Mac
437
  if ( $raw =~ /^microsoft/i )
438
  {
439
    $raw =~ s/-/ /g;
440
  }
5 PointedEar 441
 
22 PointedEar 442
  ## Pick out the popular agents
443
  if ( $raw =~ /(outlook express)/i
444
    || $raw =~ /(windows mail)/i
445
    || $raw =~ /(microplanet gravity)/i
446
    || $raw =~ /(news rover)/i
447
    || $raw =~ /(forte agent)/i
448
    || $raw =~ /(forte free agent)/i )
449
  {
450
    $agent = $1;
451
  }
452
  elsif (
453
    $raw =~ /^(
5 PointedEar 454
        pan
455
       |sylpheed
456
       |slrn
457
       |mozilla
458
       |knode
459
       |tin
460
       |hamster
461
       |xrn
462
       |xnews
463
       |aol
464
       |gnus
465
       |krn
466
       |macsoup
467
       |messenger
468
       |openxp
469
       |pine
470
       |thoth
471
       |turnpike
472
       |winvn
473
       |vsoup
474
       |google
475
       |supernews
476
       |nn
477
       |rn
478
       |007
479
       |webtv
480
       |compuserve
7 PointedEar 481
       )/ix
22 PointedEar 482
    )
483
  {
484
    $agent = $1;
485
  }
486
  else
487
  {
488
    ## Clean up unknown agents
489
    if ( $raw =~ m!^(.*?)/! )
7 PointedEar 490
    {
491
      $agent = $1;
492
    }
22 PointedEar 493
    elsif ( $raw =~ /^(\w*)\d.*/ )
7 PointedEar 494
    {
22 PointedEar 495
      $agent = $1;
496
    }
497
  }
498
 
499
  $distinct_agent{$agent}++;
500
  return $agent;
501
}
502
## get_agent
503
 
504
#########################################
505
## Count the User-Agents used, collapsing
506
## different versions into one per agent.
507
#########################################
508
sub count_agents
509
{
510
POSTER:
511
  foreach my $poster ( keys %data )
512
  {
513
    foreach my $agent_name ( keys %distinct_agent )
514
    {    # check against known ones
515
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7 PointedEar 516
      {
22 PointedEar 517
        $agents{$agent_name}++;
518
        next POSTER;
7 PointedEar 519
      }
520
    }
22 PointedEar 521
    $agents{ $data{$poster}{'agent'} }++;
522
  }
523
}    # count_agents
7 PointedEar 524
 
22 PointedEar 525
#############################################
526
## Set orig/total percentages for all posters
527
#############################################
528
sub fix_percent
529
{
530
  foreach my $poster ( keys %data )
531
  {
532
    my $percent = 100;
533
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
534
    {
535
      $percent =
536
        $data{$poster}{'orig'} * 100 /
537
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
538
    }
539
    elsif ( $data{$poster}{'orig'} == 0 )
540
    {
541
      $percent = 0;
542
    }
543
    $data{$poster}{'percent'} = $percent;
5 PointedEar 544
  }
22 PointedEar 545
}
546
## fix_percent
5 PointedEar 547
 
22 PointedEar 548
##################################
549
## Write data structures to a file
550
##################################
551
sub write_data
552
{
553
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
554
    or die __x( "Can't create XDATA: {error}\n", error => $! );
555
  print $OUTF "Data collected from $newsgroup_name\n\n";
556
  print $OUTF
557
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
558
  foreach my $name ( keys %data )
7 PointedEar 559
  {
22 PointedEar 560
    print $OUTF
561
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7 PointedEar 562
  }
22 PointedEar 563
  print $OUTF
564
"============================================================================\n";
565
  print $OUTF "Thread subjects\n";
566
  print $OUTF
567
"----------------------------------------------------------------------------\n";
568
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
569
  {
570
    print $OUTF
571
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
572
  }
573
  print $OUTF
574
"============================================================================\n";
575
  print $OUTF "Cross-posts\n";
576
  print $OUTF
577
"----------------------------------------------------------------------------\n";
578
  foreach my $name ( sort keys %crossposts )
579
  {
580
    print $OUTF "$name : $crossposts{$name}\n";
581
  }
582
  print $OUTF
583
"============================================================================\n";
584
  print $OUTF "User agents\n";
585
  print $OUTF
586
"----------------------------------------------------------------------------\n";
587
  foreach my $name ( sort keys %agents )
588
  {
589
    print $OUTF "$name : $agents{$name}\n";
590
  }
591
  print $OUTF
592
"============================================================================\n";
593
  print $OUTF "Time zones\n";
594
  print $OUTF
595
"----------------------------------------------------------------------------\n";
596
  foreach my $name ( sort keys %tz )
597
  {
598
    print $OUTF "$name : $tz{$name}\n";
599
  }
600
  close $OUTF;
601
}    # write_data
5 PointedEar 602
 
22 PointedEar 603
sub display_results
604
{
605
  #################### DISPLAY RESULTS #####################
23 PointedEar 606
  println( "=" x 76 );
22 PointedEar 607
  printf "%s\n",
608
    centred(
609
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
610
    76 );
23 PointedEar 611
  println( "=" x 76 );
22 PointedEar 612
  printf "%s\n",
613
    centred(
23 PointedEar 614
    __(
615
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
616
    ),
22 PointedEar 617
    76
618
    );
619
  print "\n\n";
23 PointedEar 620
  printf __"Total posts considered: %s over %d days\n",
621
    $formatter->format_number($totalposts),
622
    $formatter->format_number($numdays);
623
  my $time_locale       = setlocale(LC_TIME);
22 PointedEar 624
  my $earliest_datetime = DateTime->from_epoch(
23 PointedEar 625
    epoch     => $earliest,
626
    locale    => $time_locale,
22 PointedEar 627
    time_zone => 'UTC',
628
  );
629
  my $latest_datetime = DateTime->from_epoch(
23 PointedEar 630
    epoch     => $latest,
631
    locale    => $time_locale,
22 PointedEar 632
    time_zone => 'UTC',
633
  );
634
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23 PointedEar 635
  printf __"Earliest article: %s\n",
636
    $earliest_datetime->strftime($datetime_format);
637
  printf __"Latest article:   %s\n",
638
    $latest_datetime->strftime($datetime_format);
639
  printf __"Original articles: %s; replies: %s\n",
640
    $formatter->format_number($origposts),
641
    $formatter->format_number($replies);
642
  printf __"Total size of posts: %s bytes (%s)" . "\n",
643
    $formatter->format_number($totsize),
644
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
645
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
646
    $formatter->format_number( int( $totalposts / $numdays ) ),
647
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
648
    $formatter->format_number( int( $totsize / $totalposts ) );
649
 
22 PointedEar 650
  my $count = keys %data;
23 PointedEar 651
  printf __"Total headers: %s; bodies: %s\n",
652
    $formatter->format_bytes(
653
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
654
    ),
655
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 656
  printf __
23 PointedEar 657
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
658
    $formatter->format_bytes(
659
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
660
    ),
661
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
662
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
663
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
664
  printf __"Total number of posters: %s, average %s per poster\n",
665
    $formatter->format_number($count),
666
    $formatter->format_bytes( $totsize / $count,
667
    ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 668
  $count = keys %threads;
23 PointedEar 669
  printf __"Total number of threads: %s, average %s per thread\n",
670
    $formatter->format_number($count),
671
    $formatter->format_bytes( $totsize / $count,
672
    ( 'precision' => 1, 'mode' => 'iec' ) );
673
  printf __"Total number of user agents: %d\n",
674
    $formatter->format_number( scalar keys %agents );
31 PointedEar 675
  print "\n", "=" x 76, "\n";
22 PointedEar 676
  ########################################
677
  ## Show posters by article count  Sec 1;
678
  ########################################
679
  unless ( $skipSec{1} )
7 PointedEar 680
  {
22 PointedEar 681
    if ( keys %data < $topposters )
682
    {
683
      $count = keys %data;
684
    }
685
    else
686
    {
687
      $count = $topposters;
688
    }
689
    printf "%s\n",
690
      centred(
23 PointedEar 691
      __x( "Top {count} posters by number of articles", count => $topposters ),
692
      76
693
      );
22 PointedEar 694
    print "=" x 76, "\n";
695
    my $i = 0;
696
    foreach
697
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
698
    {
699
      my $name = substr( $poster, 0, 65 );
25 PointedEar 700
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 701
        $data{$poster}{count};
702
      last if ( ++$i == $count );
703
    }
704
    print "\n", "=" x 76, "\n";
7 PointedEar 705
  }
22 PointedEar 706
 
707
  ######################################
708
  ## Show posters by size in KiB  Sec 2;
709
  ######################################
710
  unless ( $skipSec{2} )
7 PointedEar 711
  {
22 PointedEar 712
    if ( keys %data < $topposters )
713
    {
714
      $count = keys %data;
715
    }
716
    else
717
    {
718
      $count = $topposters;
719
    }
720
    printf "%s\n",
721
      centred(
722
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
23 PointedEar 723
      76
724
      );
22 PointedEar 725
    print "=" x 76, "\n";
726
    my $i = 0;
727
    foreach
728
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
729
    {
730
      my $name = substr( $poster, 0, 62 );
25 PointedEar 731
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 732
        $data{$poster}{size} / 1024;    #/
733
      last if ( ++$i == $count );
734
    }
735
    print "\n", "=" x 76, "\n";
7 PointedEar 736
  }
5 PointedEar 737
 
22 PointedEar 738
  #####################################
739
  ## Show top posters for original text
740
  #####################################
31 PointedEar 741
  my $topposters_real = 0;
742
 
22 PointedEar 743
  unless ( $skipSec{3} )
7 PointedEar 744
  {
22 PointedEar 745
    if ( keys %data < $topposters )
746
    {
747
      $count = keys %data;
748
    }
749
    else
750
    {
751
      $count = $topposters;
752
    }
31 PointedEar 753
 
22 PointedEar 754
    printf "%s\n",
755
      centred(
756
      __x(
757
        "Top {count} responders by original text (> 5 posts)",
758
        count => $topposters
759
      ),
760
      76
761
      );
762
    print "=" x 76, "\n";
763
    foreach my $poster (
764
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
765
      keys %data
766
      )
767
    {
768
      next if $data{$poster}{quoted} == 0;
769
      next if $data{$poster}{count} < 5;
770
      my $name = substr( $poster, 0, 63 );
31 PointedEar 771
      printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1,
772
        rpad( $poster, 63, "." ),
22 PointedEar 773
        $data{$poster}{percent};
31 PointedEar 774
      last if ( ++$topposters_real == $count );
22 PointedEar 775
    }
776
    print "\n", "=" x 76, "\n";
7 PointedEar 777
  }
5 PointedEar 778
 
22 PointedEar 779
  ########################################
780
  ## Show bottom posters for original text
781
  ########################################
31 PointedEar 782
 
783
  $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4};
784
 
22 PointedEar 785
  unless ( $skipSec{4} )
7 PointedEar 786
  {
22 PointedEar 787
    if ( keys %data < $topposters )
7 PointedEar 788
    {
22 PointedEar 789
      $count = keys %data;
790
    }
791
    else
792
    {
793
      $count = $topposters;
794
    }
31 PointedEar 795
 
22 PointedEar 796
    printf "%s\n",
797
      centred(
798
      __x(
799
        "Bottom {count} responders by original text  (> 5 posts)",
800
        count => $topposters
801
      ),
802
      76
803
      );
804
    print "=" x 76, "\n";
805
    my $i = 0;
806
    foreach my $poster (
807
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
808
      keys %data
809
      )
810
    {
811
      next if $data{$poster}{quoted} == 0;
812
      next if $data{$poster}{count} < 5;
813
      my $name = substr( $poster, 0, 63 );
25 PointedEar 814
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 815
        $data{$poster}{percent};
816
      last if ( ++$i == $count );
817
    }
818
    print "\n", "=" x 76, "\n";
819
  }
5 PointedEar 820
 
22 PointedEar 821
  #####################################
822
  ## Show threads by number of articles
823
  #####################################
824
  unless ( $skipSec{5} )
825
  {
826
    if ( keys %threads < $topthreads )
827
    {
828
      $count = keys %threads;
7 PointedEar 829
    }
22 PointedEar 830
    else
831
    {
832
      $count = $topthreads;
7 PointedEar 833
    }
22 PointedEar 834
    printf "%s\n",
23 PointedEar 835
      centred(
836
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
22 PointedEar 837
      76 );
838
    print "=" x 76, "\n";
839
    my $i = 0;
840
    foreach my $thread (
841
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
842
      keys %threads
843
      )
7 PointedEar 844
    {
22 PointedEar 845
      my $name = substr( $thread, 0, 65 );
25 PointedEar 846
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 847
        $threads{$thread}{'count'};
848
      last if ( ++$i == $count );
7 PointedEar 849
    }
22 PointedEar 850
    print "\n", "=" x 76, "\n";
851
  }
852
 
853
  ##############################
854
  ## Show threads by size in KiB
855
  ##############################
856
  unless ( $skipSec{6} )
857
  {
858
    if ( keys %threads < $topthreads )
859
    {
860
      $count = keys %threads;
861
    }
7 PointedEar 862
    else
863
    {
22 PointedEar 864
      $count = $topthreads;
865
    }
866
    printf "%s\n",
23 PointedEar 867
      centred(
868
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22 PointedEar 869
    print "=" x 76, "\n";
870
    my $i = 0;
871
    foreach my $thread (
872
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
873
      keys %threads
874
      )
875
    {
876
      my $name = substr( $thread, 0, 65 );
25 PointedEar 877
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 878
        $threads{$thread}{'size'} / 1024;    #/
879
      last if ( ++$i == $count );
880
    }
881
    print "\n", "=" x 76, "\n";
882
  }
5 PointedEar 883
 
22 PointedEar 884
  ##################################
885
  ## Show top 10 cross-posted groups
886
  ##################################
887
  unless ( $skipSec{7} )
888
  {
889
    delete $crossposts{"$newsgroup_name"};    # don't include ours
890
    if ( keys %crossposts < $topcrossposts )
891
    {
892
      $count = keys %crossposts;
7 PointedEar 893
    }
22 PointedEar 894
    else
895
    {
896
      $count = $topcrossposts;
897
    }
898
    printf "%s\n",
23 PointedEar 899
      centred(
900
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22 PointedEar 901
    print "=" x 76, "\n";
902
    my $i = 0;
903
    foreach
904
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
905
    {
25 PointedEar 906
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 907
        $crossposts{$name};
908
      last if ( ++$i == $count );
909
    }
910
    print "\n", "=" x 76, "\n";
911
  }
5 PointedEar 912
 
22 PointedEar 913
  #########################
914
  ## Show agents and counts
915
  #########################
916
  unless ( $skipSec{8} )
917
  {
918
    if ( keys %agents < $topagents )
919
    {
920
      $count = keys %agents;
921
    }
922
    else
923
    {
924
      $count = $topagents;
925
    }
926
    printf "%s\n",
927
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
928
      76 );
929
    print "=" x 76, "\n";
930
    my $i = 0;
931
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
932
    {
25 PointedEar 933
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22 PointedEar 934
        $agents{$agent};
935
      last if ( ++$i == $count );
936
    }
937
    print "\n", "=" x 76, "\n";
938
  }
5 PointedEar 939
 
22 PointedEar 940
  #######################
941
  ## Show distinct agents
942
  #######################
943
  unless ( $skipSec{9} )
7 PointedEar 944
  {
22 PointedEar 945
    if ( keys %distinct_agent < $topagents )
946
    {
947
      $count = keys %distinct_agent;
7 PointedEar 948
    }
22 PointedEar 949
    else
950
    {
951
      $count = $topagents;
952
    }
953
    printf "%s\n",
954
      centred(
955
      __x( "Top {count} user agents by number of posts", count => $topagents ),
23 PointedEar 956
      76
957
      );
22 PointedEar 958
    print "=" x 76, "\n";
959
    my $i = 0;
960
    foreach my $agent (
961
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
962
      keys %distinct_agent
963
      )
964
    {
25 PointedEar 965
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22 PointedEar 966
        $distinct_agent{$agent},
967
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
968
      last if ( ++$i == $count );
969
    }
970
    print "\n", "=" x 76, "\n";
7 PointedEar 971
  }
5 PointedEar 972
 
22 PointedEar 973
  ############################
974
  ## Show timezones and counts
975
  ############################
976
  unless ( $skipSec{10} )
7 PointedEar 977
  {
22 PointedEar 978
    if ( keys %tz < $toptz )
7 PointedEar 979
    {
22 PointedEar 980
      $count = keys %tz;
7 PointedEar 981
    }
22 PointedEar 982
    else
7 PointedEar 983
    {
22 PointedEar 984
      $count = $toptz;
7 PointedEar 985
    }
23 PointedEar 986
    printf "%s\n",
987
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22 PointedEar 988
    print "=" x 76, "\n";
989
    my $i = 0;
990
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
991
    {
25 PointedEar 992
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22 PointedEar 993
      last if ( ++$i == $count );
994
    }
995
    print "\n", "=" x 76, "\n";
7 PointedEar 996
  }
5 PointedEar 997
}
998
 
22 PointedEar 999
## helper subs
1000
 
13 PointedEar 1001
###############################
1002
## Right pad a string with '.'s
1003
###############################
7 PointedEar 1004
sub rpad
1005
{
13 PointedEar 1006
  ## Get text to pad, length to pad, pad chr
7 PointedEar 1007
  my ( $text, $pad_len, $pad_chr ) = @_;
11 PointedEar 1008
 
13 PointedEar 1009
  ## DEBUG
22 PointedEar 1010
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
11 PointedEar 1011
 
7 PointedEar 1012
  if ( length($text) > $pad_len )
1013
  {
1014
    $text = substr( $text, 0, $pad_len );
1015
  }
1016
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1017
  return $padded;
5 PointedEar 1018
}
1019
 
13 PointedEar 1020
##################
1021
## Centre a string
1022
##################
7 PointedEar 1023
sub centred
1024
{
1025
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1026
  my $pad_len = ( $width - length($text) ) / 2;    #/
1027
  my $centred = " " x $pad_len . $text;
1028
  return $centred;
5 PointedEar 1029
}
1030
 
13 PointedEar 1031
###########################
1032
## Put commas into a number
1033
###########################
7 PointedEar 1034
sub commify
1035
{
13 PointedEar 1036
  local $_ = shift;
23 PointedEar 1037
  my $number = $_;
1038
  $_ = int;                                        # Chop non-integer part
22 PointedEar 1039
  1 while
23 PointedEar 1040
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1041
  my $int_part  = $_;
1042
  my $real_part = '';
1043
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1044
  {
1045
    $real_part = $1;
1046
  }
1047
  return $int_part . $real_part;
5 PointedEar 1048
}
1049
 
13 PointedEar 1050
################################################################
1051
## Returns a string with leading and trailing whitespace removed
1052
################################################################
7 PointedEar 1053
sub clean
1054
{
1055
  my $dirty = shift;
1056
  my $clean = $dirty;
14 PointedEar 1057
  $clean =~ s/^\s+|\s+$//g;
5 PointedEar 1058
 
7 PointedEar 1059
  return $clean;
5 PointedEar 1060
}
1061
 
7 PointedEar 1062
sub usage
1063
{
23 PointedEar 1064
  println( __ "usage: newsstat.pl NEWS.GROUP" );
7 PointedEar 1065
  exit 1;
5 PointedEar 1066
}
1067
 
22 PointedEar 1068
sub dmsg
7 PointedEar 1069
{
22 PointedEar 1070
  print STDERR @_, "\n";
1071
}
1072
 
1073
sub dmsg2
1074
{
1075
  my ( $level, @msg ) = @_;
1076
  print STDERR @msg, "\n" if $level >= DEBUG;
1077
}
23 PointedEar 1078
 
1079
sub println
1080
{
1081
  print @_, "\n";
1082
}