Subversion Repositories LCARS

Rev

Rev 24 | Rev 30 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 PointedEar 1
#!/usr/bin/env perl
5 PointedEar 2
use strict;
6 PointedEar 3
use warnings;
23 PointedEar 4
require 5.004;
22 PointedEar 5
 
6
#use diagnostics;
6 PointedEar 7
use utf8;
8
use Encode;
9
 
22 PointedEar 10
use constant DEBUG => 0;
11
 
24 PointedEar 12
## newsstat.pl
13
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
14
## Based on work by Garry Knight et al.
15
## 
16
## This program is free software: you can redistribute it and/or modify
17
## it under the terms of the GNU General Public License as published by
18
## the Free Software Foundation, either version 3 of the License, or
19
## (at your option) any later version.
20
## 
21
## This program is distributed in the hope that it will be useful,
22
## but WITHOUT ANY WARRANTY; without even the implied warranty of
23
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
## GNU General Public License for more details.
25
## 
26
## You should have received a copy of the GNU General Public License
27
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
28
 
13 PointedEar 29
## Print out all text to STDOUT UTF-8 encoded
30
binmode STDOUT, ':encoding(UTF-8)';
22 PointedEar 31
binmode STDERR, ':encoding(UTF-8)';
5 PointedEar 32
 
23 PointedEar 33
## L10n
34
use locale ':not_characters';
35
 
36
# setlocale( LC_MESSAGES, '' );
37
require Number::Format;
38
 
39
## i18n
40
## FIXME: Automatically include resolved '.' in @INC
22 PointedEar 41
# print join "\n", @INC;
5 PointedEar 42
 
22 PointedEar 43
use Locale::TextDomain ('de.pointedears.newsstat');
23 PointedEar 44
use POSIX              ('locale_h');
22 PointedEar 45
use Locale::Messages qw (bind_textdomain_filter
23 PointedEar 46
  bind_textdomain_codeset
47
  turn_utf_8_on);
48
 
49
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
22 PointedEar 50
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23 PointedEar 51
 
22 PointedEar 52
require Mail::Message;
53
require DateTime;
54
require DateTime::Format::Mail;
23 PointedEar 55
 
56
# See comments in previous example
57
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
58
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
59
  'decimal_point' };
60
 
61
# Apply defaults if values are missing
62
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
63
$thousands_sep = ' ' unless $thousands_sep;
64
 
65
# grouping and mon_grouping are packed lists
66
# of small integers (characters) telling the
67
# grouping (thousand_seps and mon_thousand_seps
68
# being the group dividers) of numbers and
69
# monetary quantities.  The integers' meanings:
70
# 255 means no more grouping, 0 means repeat
71
# the previous grouping, 1-254 means use that
72
# as the current grouping.  Grouping goes from
73
# right to left (low to high digits).  In the
74
# below we cheat slightly by never using anything
75
# else than the first grouping (whatever that is).
76
my @grouping;
77
if ($grouping)
78
{
79
  @grouping = unpack( "C*", $grouping );
80
}
81
else
82
{
83
  @grouping = (3);
84
}
85
 
86
## FIXME: Why don't the defaults work already?
87
my $formatter = new Number::Format(
88
  -decimal_point => $decimal_point,
89
  -thousands_sep => $thousands_sep,
90
 
91
  # -grouping      => $grouping[0]
92
);
93
 
5 PointedEar 94
###################### USER CONFIGURATIONS ############################
95
 
13 PointedEar 96
## The name of the group to do stats for
5 PointedEar 97
my $newsgroup_name = $ARGV[0];
22 PointedEar 98
$newsgroup_name or usage();
5 PointedEar 99
 
13 PointedEar 100
## Check for removal flags
5 PointedEar 101
my $ix;
102
my $j;
103
my %skipSec;
104
my @skiplist;
105
my $args = @ARGV;
7 PointedEar 106
for ( $ix = 1 ; $ix < $args ; $ix++ )
107
{
108
  $j = $ix + 1;
109
  if ( $ARGV[$ix] eq "-x" )
110
  {
111
    @skiplist = split( ",", $ARGV[$j] );
112
  }
113
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
114
  {
115
    @skiplist = split( ",", $1 );
116
  }
5 PointedEar 117
}
7 PointedEar 118
foreach (@skiplist)
119
{
5 PointedEar 120
  $skipSec{$_} = 1;
121
}
122
 
13 PointedEar 123
## Leafnode users will want /var/spool/news for this variable.
5 PointedEar 124
my $news = "/var/spool/news/";
125
 
22 PointedEar 126
## Number of top or bottom posters to show
127
my $topposters = 20;
5 PointedEar 128
 
13 PointedEar 129
## Number of threads we want to know about
5 PointedEar 130
my $topthreads = 20;
131
 
13 PointedEar 132
## Number of cross-posted threads to show
5 PointedEar 133
my $topcrossposts = 10;
134
 
22 PointedEar 135
## Number of agents we list
136
my $topagents = 10;
137
 
13 PointedEar 138
## Number of time zones to show
5 PointedEar 139
my $toptz = 10;
140
 
141
###################### DATA STRUCTURES ######################
142
my $group = $newsgroup_name;
143
$group =~ s!\.!/!g;
22 PointedEar 144
my %data;    # name, count, agent, total, orig, quoted
145
my $totsize = 0;    # holds total sizes of all files
146
my %crossposts;     # group, count
147
my %threads;        # subject, count
148
my $replies   = 0;  # total no. of replies
149
my $origposts = 0;  # total no. of original posts
150
my %tz;             # timezones by count
151
my $earliest;       # earliest article we have found
152
my $latest;         # latest article we have found
153
my $totheader = 0;  # total size of header material
154
my $totbody   = 0;  # total size of body material
155
my $totsig    = 0;  # total size of sig material
156
my $totquoted = 0;  # total size of quoted material
157
my $totorig   = 0;  # total size of original material
158
my $totalposts;     # total no. of posts considered
5 PointedEar 159
my %distinct_agent;
13 PointedEar 160
 
161
## Used to hold counts of User Agents used
162
my %agents = (
163
  "Compuserver"               => 0,
164
  "Foorum"                    => 0,
7 PointedEar 165
  "Forte Agent"               => 0,
166
  "Forte Free Agent"          => 0,
13 PointedEar 167
  "Gnus"                      => 0,
168
  "KNode"                     => 0,
169
  "MacSOUP"                   => 0,
170
  "MT-NewsWatcher"            => 0,
7 PointedEar 171
  "MicroPlanet Gravity"       => 0,
172
  "Microsoft Outlook Express" => 0,
13 PointedEar 173
  "Microsoft Windows Mail"    => 0,
174
  "Mozilla"                   => 0,
175
  "News Rover"                => 0,
176
  "NN"                        => 0,
177
  "Pan"                       => 0,
178
  "rn"                        => 0,
7 PointedEar 179
  "slrn"                      => 0,
13 PointedEar 180
  "Sylpheed"                  => 0,
7 PointedEar 181
  "tin"                       => 0,
13 PointedEar 182
  "VSoup"                     => 0,
7 PointedEar 183
  "WebTV"                     => 0,
22 PointedEar 184
  "Xnews"                     => 0,
13 PointedEar 185
);
5 PointedEar 186
 
22 PointedEar 187
my $datetime_parser = DateTime::Format::Mail->new();
188
$datetime_parser->loose();
5 PointedEar 189
 
22 PointedEar 190
my $today = DateTime->today( time_zone => 'UTC' );
191
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
192
my $start      = int $prev_month->strftime('%s');
193
my $numdays    = int DateTime->last_day_of_month(
194
  year      => $prev_month->year(),
195
  month     => $prev_month->month(),
196
  time_zone => $prev_month->time_zone(),
197
)->day();
198
my $end = int $today->clone()->set_day(1)->strftime('%s');
199
 
200
dmsg( $start, " to ", $end ) if DEBUG;
201
 
202
chdir("$news$group")
203
  or die __x(
204
  "Can't cd to {newsgroup}: {error}\n",
205
  newsgroup => "$news$group",
206
  error     => $!
207
  );
208
opendir( DIR, "." )
209
  or die __x(
210
  "Can't open {newsgroup}: {error}\n",
211
  newsgroup => "$news$group",
212
  error     => $!
213
  );
214
 
215
while ( defined( my $filename = readdir(DIR) ) )
7 PointedEar 216
{
22 PointedEar 217
  next unless -f $filename;    # only want real files
218
  next if ( $filename eq ".overview" );    # real articles only
219
 
220
  get_article($filename);                  # read in the article
5 PointedEar 221
}
22 PointedEar 222
closedir(DIR);                             # finished with the directory
7 PointedEar 223
 
22 PointedEar 224
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
225
 
13 PointedEar 226
## Post-processing
22 PointedEar 227
count_agents();                            # count agents, collapsing versions
228
fix_percent();
5 PointedEar 229
 
22 PointedEar 230
write_data();
231
display_results();
5 PointedEar 232
 
13 PointedEar 233
########################################
22 PointedEar 234
## Get current article's header and body
13 PointedEar 235
########################################
22 PointedEar 236
sub get_article
7 PointedEar 237
{
22 PointedEar 238
  my $filename = shift;
5 PointedEar 239
 
22 PointedEar 240
  open( my $FILE, '<', $filename )
241
    or
242
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
243
  my $msg       = Mail::Message->read($FILE);
244
  my $timestamp = $msg->timestamp();
245
  my $date      = $msg->study('Date');
5 PointedEar 246
 
22 PointedEar 247
  ## Disregard article if timestamp is not in range
248
  dmsg($timestamp) if DEBUG;
249
  if ( $timestamp < $start or $timestamp >= $end )
7 PointedEar 250
  {
22 PointedEar 251
    dmsg("Posting on $date ignored.") if DEBUG;
252
    return;
7 PointedEar 253
  }
5 PointedEar 254
 
22 PointedEar 255
  $totalposts++;    # bump count of articles considered
5 PointedEar 256
 
22 PointedEar 257
  ## DEBUG
258
  dmsg($date) if DEBUG;
13 PointedEar 259
 
22 PointedEar 260
  ## get stats about the file itself
261
  my $filesize = -s $filename;    # get total size of file
262
  $totsize += $filesize;          # bump total sizes of all files
5 PointedEar 263
 
22 PointedEar 264
  if ( ( not defined $earliest ) or $timestamp < $earliest )
7 PointedEar 265
  {
22 PointedEar 266
    $earliest = $timestamp;
7 PointedEar 267
  }
22 PointedEar 268
  elsif ( ( not defined $latest ) or $timestamp > $latest )
7 PointedEar 269
  {
22 PointedEar 270
    $latest = $timestamp;
7 PointedEar 271
  }
13 PointedEar 272
 
22 PointedEar 273
  #print "timestamp: $timestamp\n";
5 PointedEar 274
 
22 PointedEar 275
  ## count header size
276
  $totheader += $msg->head()->size();
5 PointedEar 277
 
22 PointedEar 278
  ## get the poster's name (MIME-decoded, in UTF-8)
279
  my $poster = $msg->study('From');
280
  if ( defined $poster )
7 PointedEar 281
  {
22 PointedEar 282
    ## Convert old to new format
283
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
5 PointedEar 284
 
22 PointedEar 285
    ## Collapse whitespace
286
    $poster =~ s/\s+/ /g;
5 PointedEar 287
 
22 PointedEar 288
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
289
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5 PointedEar 290
 
22 PointedEar 291
    ## DEBUG
292
    dmsg($poster) if DEBUG;
7 PointedEar 293
 
22 PointedEar 294
    ## seen this one before?
295
    if ( !defined( $data{$poster} ) )
296
    {
297
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
298
      $data{$poster}{'orig'}   = 0;
299
      $data{$poster}{'quoted'} = 0;
300
    }
301
    $data{$poster}{'count'}++;                    # bump count for this poster
302
    $data{$poster}{'size'} += $filesize;          # total size of file
5 PointedEar 303
 
22 PointedEar 304
    ## The User-Agent and/or X-Newsreader fields
305
    ## for User-Agent by poster
306
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
307
    if ( defined $ua )
7 PointedEar 308
    {
22 PointedEar 309
      $data{$poster}{'agent'} = $ua;
310
 
311
      ## DEBUG
312
      dmsg($ua) if DEBUG;
7 PointedEar 313
    }
5 PointedEar 314
 
22 PointedEar 315
    ## The User Agent for User-Agent by number of posts
316
    get_agent($msg);
5 PointedEar 317
 
22 PointedEar 318
    ## Get all cross-posted newsgroups
319
    for ( split( /,/, $msg->study('Newsgroups') ) )
320
    {
321
      $crossposts{$_}++;    # bump count for each
322
    }
5 PointedEar 323
 
22 PointedEar 324
    ## Get threads
325
    my $thread = $msg->study('Subject');
326
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
327
    $thread =~ s/\s+/ /g;       # collapse whitespace
328
    $threads{$thread}{'count'}++;    # bump count of this subject
329
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
5 PointedEar 330
 
22 PointedEar 331
    ## Is this an original post or a reply?
332
    if ( defined $msg->study('References') )
7 PointedEar 333
    {
22 PointedEar 334
      $replies++;
7 PointedEar 335
    }
22 PointedEar 336
    else
7 PointedEar 337
    {
22 PointedEar 338
      $origposts++;
7 PointedEar 339
    }
22 PointedEar 340
 
341
    ## Get the time zone
342
    my $datetime = $datetime_parser->parse_datetime($date);
343
    my $tz       = $datetime->strftime('%z');
344
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
345
    $tz{$tz}++;
346
 
347
    ## DEBUG
348
    dmsg($tz) if DEBUG;
349
 
350
#### Now analyse the body text ####
351
    my $body = $msg->body();
352
 
353
    my $insig = 0;
354
    my @body  = $body->lines;
355
    for (@body)
7 PointedEar 356
    {
22 PointedEar 357
      $totbody += length($_);    # bump total body size
358
      next if (m{^$>}o);         # don't count blank lines in body
359
      if ( $insig == 1 )
360
      {
361
 
362
        # bump total sig size
363
        $totsig += length($_);
364
      }
365
      ## are we in a quote line?
366
      ## Bill Unruh uses ] quotes, and another poster uses ::
367
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
368
      {
369
        ## bump count of quoted chrs
370
        $data{$poster}{'quoted'} += length($_);
371
        $totquoted += length($_);
372
      }
373
      elsif (/^-- $/)
374
      {
375
        $insig = 1;
376
      }
377
      else
378
      {
379
        ## We must be processing an original line
380
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
381
        $totorig += length($_);
382
      }
7 PointedEar 383
    }
22 PointedEar 384
 
385
    # end for (@body)
386
  }
387
 
388
  close($FILE);
389
}
390
 
391
sub get_agent
392
{
393
  my $msg = shift;
394
 
395
  my $ua =
396
       $msg->study('User-Agent')
397
    or $msg->study('X-Newsreader')
398
    or $msg->study('X-Mailer');
399
  if ( not defined $ua )
400
  {
401
    my $org = $msg->study('Organization');
402
    if ( defined $org
403
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
7 PointedEar 404
    {
22 PointedEar 405
      $ua = $org;
7 PointedEar 406
    }
22 PointedEar 407
    elsif ( $msg->study('Message-ID') =~ /pine/i )
7 PointedEar 408
    {
22 PointedEar 409
      $ua = "Pine";
410
    }
7 PointedEar 411
  }
5 PointedEar 412
 
22 PointedEar 413
  ## Hopefully found UA, else set to unknown
414
  if ( not defined $ua )
415
  {
416
    $ua = __ "unknown";
417
  }
5 PointedEar 418
 
22 PointedEar 419
  $ua = clean($ua);
420
 
421
  my $raw   = $ua;
422
  my $agent = $raw;
423
 
424
  ## strip http
425
  if ( $raw =~ /.*http.*/ )
7 PointedEar 426
  {
22 PointedEar 427
    $raw =~ s!posted via!!i;
428
    $raw =~ s!http://!!g;
429
    $raw =~ s!/!!g;
430
    $raw =~ s! !!g;
431
  }
5 PointedEar 432
 
22 PointedEar 433
  ## Fix Outlook from Mac
434
  if ( $raw =~ /^microsoft/i )
435
  {
436
    $raw =~ s/-/ /g;
437
  }
5 PointedEar 438
 
22 PointedEar 439
  ## Pick out the popular agents
440
  if ( $raw =~ /(outlook express)/i
441
    || $raw =~ /(windows mail)/i
442
    || $raw =~ /(microplanet gravity)/i
443
    || $raw =~ /(news rover)/i
444
    || $raw =~ /(forte agent)/i
445
    || $raw =~ /(forte free agent)/i )
446
  {
447
    $agent = $1;
448
  }
449
  elsif (
450
    $raw =~ /^(
5 PointedEar 451
        pan
452
       |sylpheed
453
       |slrn
454
       |mozilla
455
       |knode
456
       |tin
457
       |hamster
458
       |xrn
459
       |xnews
460
       |aol
461
       |gnus
462
       |krn
463
       |macsoup
464
       |messenger
465
       |openxp
466
       |pine
467
       |thoth
468
       |turnpike
469
       |winvn
470
       |vsoup
471
       |google
472
       |supernews
473
       |nn
474
       |rn
475
       |007
476
       |webtv
477
       |compuserve
7 PointedEar 478
       )/ix
22 PointedEar 479
    )
480
  {
481
    $agent = $1;
482
  }
483
  else
484
  {
485
    ## Clean up unknown agents
486
    if ( $raw =~ m!^(.*?)/! )
7 PointedEar 487
    {
488
      $agent = $1;
489
    }
22 PointedEar 490
    elsif ( $raw =~ /^(\w*)\d.*/ )
7 PointedEar 491
    {
22 PointedEar 492
      $agent = $1;
493
    }
494
  }
495
 
496
  $distinct_agent{$agent}++;
497
  return $agent;
498
}
499
## get_agent
500
 
501
#########################################
502
## Count the User-Agents used, collapsing
503
## different versions into one per agent.
504
#########################################
505
sub count_agents
506
{
507
POSTER:
508
  foreach my $poster ( keys %data )
509
  {
510
    foreach my $agent_name ( keys %distinct_agent )
511
    {    # check against known ones
512
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7 PointedEar 513
      {
22 PointedEar 514
        $agents{$agent_name}++;
515
        next POSTER;
7 PointedEar 516
      }
517
    }
22 PointedEar 518
    $agents{ $data{$poster}{'agent'} }++;
519
  }
520
}    # count_agents
7 PointedEar 521
 
22 PointedEar 522
#############################################
523
## Set orig/total percentages for all posters
524
#############################################
525
sub fix_percent
526
{
527
  foreach my $poster ( keys %data )
528
  {
529
    my $percent = 100;
530
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
531
    {
532
      $percent =
533
        $data{$poster}{'orig'} * 100 /
534
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
535
    }
536
    elsif ( $data{$poster}{'orig'} == 0 )
537
    {
538
      $percent = 0;
539
    }
540
    $data{$poster}{'percent'} = $percent;
5 PointedEar 541
  }
22 PointedEar 542
}
543
## fix_percent
5 PointedEar 544
 
22 PointedEar 545
##################################
546
## Write data structures to a file
547
##################################
548
sub write_data
549
{
550
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
551
    or die __x( "Can't create XDATA: {error}\n", error => $! );
552
  print $OUTF "Data collected from $newsgroup_name\n\n";
553
  print $OUTF
554
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
555
  foreach my $name ( keys %data )
7 PointedEar 556
  {
22 PointedEar 557
    print $OUTF
558
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7 PointedEar 559
  }
22 PointedEar 560
  print $OUTF
561
"============================================================================\n";
562
  print $OUTF "Thread subjects\n";
563
  print $OUTF
564
"----------------------------------------------------------------------------\n";
565
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
566
  {
567
    print $OUTF
568
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
569
  }
570
  print $OUTF
571
"============================================================================\n";
572
  print $OUTF "Cross-posts\n";
573
  print $OUTF
574
"----------------------------------------------------------------------------\n";
575
  foreach my $name ( sort keys %crossposts )
576
  {
577
    print $OUTF "$name : $crossposts{$name}\n";
578
  }
579
  print $OUTF
580
"============================================================================\n";
581
  print $OUTF "User agents\n";
582
  print $OUTF
583
"----------------------------------------------------------------------------\n";
584
  foreach my $name ( sort keys %agents )
585
  {
586
    print $OUTF "$name : $agents{$name}\n";
587
  }
588
  print $OUTF
589
"============================================================================\n";
590
  print $OUTF "Time zones\n";
591
  print $OUTF
592
"----------------------------------------------------------------------------\n";
593
  foreach my $name ( sort keys %tz )
594
  {
595
    print $OUTF "$name : $tz{$name}\n";
596
  }
597
  close $OUTF;
598
}    # write_data
5 PointedEar 599
 
22 PointedEar 600
sub display_results
601
{
602
  #################### DISPLAY RESULTS #####################
23 PointedEar 603
  println( "=" x 76 );
22 PointedEar 604
  printf "%s\n",
605
    centred(
606
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
607
    76 );
23 PointedEar 608
  println( "=" x 76 );
22 PointedEar 609
  printf "%s\n",
610
    centred(
23 PointedEar 611
    __(
612
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
613
    ),
22 PointedEar 614
    76
615
    );
616
  print "\n\n";
23 PointedEar 617
  printf __"Total posts considered: %s over %d days\n",
618
    $formatter->format_number($totalposts),
619
    $formatter->format_number($numdays);
620
  my $time_locale       = setlocale(LC_TIME);
22 PointedEar 621
  my $earliest_datetime = DateTime->from_epoch(
23 PointedEar 622
    epoch     => $earliest,
623
    locale    => $time_locale,
22 PointedEar 624
    time_zone => 'UTC',
625
  );
626
  my $latest_datetime = DateTime->from_epoch(
23 PointedEar 627
    epoch     => $latest,
628
    locale    => $time_locale,
22 PointedEar 629
    time_zone => 'UTC',
630
  );
631
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23 PointedEar 632
  printf __"Earliest article: %s\n",
633
    $earliest_datetime->strftime($datetime_format);
634
  printf __"Latest article:   %s\n",
635
    $latest_datetime->strftime($datetime_format);
636
  printf __"Original articles: %s; replies: %s\n",
637
    $formatter->format_number($origposts),
638
    $formatter->format_number($replies);
639
  printf __"Total size of posts: %s bytes (%s)" . "\n",
640
    $formatter->format_number($totsize),
641
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
642
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
643
    $formatter->format_number( int( $totalposts / $numdays ) ),
644
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
645
    $formatter->format_number( int( $totsize / $totalposts ) );
646
 
22 PointedEar 647
  my $count = keys %data;
23 PointedEar 648
  printf __"Total headers: %s; bodies: %s\n",
649
    $formatter->format_bytes(
650
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
651
    ),
652
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 653
  printf __
23 PointedEar 654
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
655
    $formatter->format_bytes(
656
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
657
    ),
658
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
659
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
660
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
661
  printf __"Total number of posters: %s, average %s per poster\n",
662
    $formatter->format_number($count),
663
    $formatter->format_bytes( $totsize / $count,
664
    ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 665
  $count = keys %threads;
23 PointedEar 666
  printf __"Total number of threads: %s, average %s per thread\n",
667
    $formatter->format_number($count),
668
    $formatter->format_bytes( $totsize / $count,
669
    ( 'precision' => 1, 'mode' => 'iec' ) );
670
  printf __"Total number of user agents: %d\n",
671
    $formatter->format_number( scalar keys %agents );
672
  print "\n", "=" x 76, "\n"  ;
22 PointedEar 673
  ########################################
674
  ## Show posters by article count  Sec 1;
675
  ########################################
676
  unless ( $skipSec{1} )
7 PointedEar 677
  {
22 PointedEar 678
    if ( keys %data < $topposters )
679
    {
680
      $count = keys %data;
681
    }
682
    else
683
    {
684
      $count = $topposters;
685
    }
686
    printf "%s\n",
687
      centred(
23 PointedEar 688
      __x( "Top {count} posters by number of articles", count => $topposters ),
689
      76
690
      );
22 PointedEar 691
    print "=" x 76, "\n";
692
    my $i = 0;
693
    foreach
694
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
695
    {
696
      my $name = substr( $poster, 0, 65 );
25 PointedEar 697
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 698
        $data{$poster}{count};
699
      last if ( ++$i == $count );
700
    }
701
    print "\n", "=" x 76, "\n";
7 PointedEar 702
  }
22 PointedEar 703
 
704
  ######################################
705
  ## Show posters by size in KiB  Sec 2;
706
  ######################################
707
  unless ( $skipSec{2} )
7 PointedEar 708
  {
22 PointedEar 709
    if ( keys %data < $topposters )
710
    {
711
      $count = keys %data;
712
    }
713
    else
714
    {
715
      $count = $topposters;
716
    }
717
    printf "%s\n",
718
      centred(
719
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
23 PointedEar 720
      76
721
      );
22 PointedEar 722
    print "=" x 76, "\n";
723
    my $i = 0;
724
    foreach
725
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
726
    {
727
      my $name = substr( $poster, 0, 62 );
25 PointedEar 728
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 729
        $data{$poster}{size} / 1024;    #/
730
      last if ( ++$i == $count );
731
    }
732
    print "\n", "=" x 76, "\n";
7 PointedEar 733
  }
5 PointedEar 734
 
22 PointedEar 735
  #####################################
736
  ## Show top posters for original text
737
  #####################################
738
  unless ( $skipSec{3} )
7 PointedEar 739
  {
22 PointedEar 740
    if ( keys %data < $topposters )
741
    {
742
      $count = keys %data;
743
    }
744
    else
745
    {
746
      $count = $topposters;
747
    }
748
    printf "%s\n",
749
      centred(
750
      __x(
751
        "Top {count} responders by original text (> 5 posts)",
752
        count => $topposters
753
      ),
754
      76
755
      );
756
    print "=" x 76, "\n";
757
    my $i = 0;
758
    foreach my $poster (
759
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
760
      keys %data
761
      )
762
    {
763
      next if $data{$poster}{quoted} == 0;
764
      next if $data{$poster}{count} < 5;
765
      my $name = substr( $poster, 0, 63 );
25 PointedEar 766
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 767
        $data{$poster}{percent};
768
      last if ( ++$i == $count );
769
    }
770
    print "\n", "=" x 76, "\n";
7 PointedEar 771
  }
5 PointedEar 772
 
22 PointedEar 773
  ########################################
774
  ## Show bottom posters for original text
775
  ########################################
776
  unless ( $skipSec{4} )
7 PointedEar 777
  {
22 PointedEar 778
    if ( keys %data < $topposters )
7 PointedEar 779
    {
22 PointedEar 780
      $count = keys %data;
781
    }
782
    else
783
    {
784
      $count = $topposters;
785
    }
786
    printf "%s\n",
787
      centred(
788
      __x(
789
        "Bottom {count} responders by original text  (> 5 posts)",
790
        count => $topposters
791
      ),
792
      76
793
      );
794
    print "=" x 76, "\n";
795
    my $i = 0;
796
    foreach my $poster (
797
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
798
      keys %data
799
      )
800
    {
801
      next if $data{$poster}{quoted} == 0;
802
      next if $data{$poster}{count} < 5;
803
      my $name = substr( $poster, 0, 63 );
25 PointedEar 804
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
22 PointedEar 805
        $data{$poster}{percent};
806
      last if ( ++$i == $count );
807
    }
808
    print "\n", "=" x 76, "\n";
809
  }
5 PointedEar 810
 
22 PointedEar 811
  #####################################
812
  ## Show threads by number of articles
813
  #####################################
814
  unless ( $skipSec{5} )
815
  {
816
    if ( keys %threads < $topthreads )
817
    {
818
      $count = keys %threads;
7 PointedEar 819
    }
22 PointedEar 820
    else
821
    {
822
      $count = $topthreads;
7 PointedEar 823
    }
22 PointedEar 824
    printf "%s\n",
23 PointedEar 825
      centred(
826
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
22 PointedEar 827
      76 );
828
    print "=" x 76, "\n";
829
    my $i = 0;
830
    foreach my $thread (
831
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
832
      keys %threads
833
      )
7 PointedEar 834
    {
22 PointedEar 835
      my $name = substr( $thread, 0, 65 );
25 PointedEar 836
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 837
        $threads{$thread}{'count'};
838
      last if ( ++$i == $count );
7 PointedEar 839
    }
22 PointedEar 840
    print "\n", "=" x 76, "\n";
841
  }
842
 
843
  ##############################
844
  ## Show threads by size in KiB
845
  ##############################
846
  unless ( $skipSec{6} )
847
  {
848
    if ( keys %threads < $topthreads )
849
    {
850
      $count = keys %threads;
851
    }
7 PointedEar 852
    else
853
    {
22 PointedEar 854
      $count = $topthreads;
855
    }
856
    printf "%s\n",
23 PointedEar 857
      centred(
858
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22 PointedEar 859
    print "=" x 76, "\n";
860
    my $i = 0;
861
    foreach my $thread (
862
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
863
      keys %threads
864
      )
865
    {
866
      my $name = substr( $thread, 0, 65 );
25 PointedEar 867
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 868
        $threads{$thread}{'size'} / 1024;    #/
869
      last if ( ++$i == $count );
870
    }
871
    print "\n", "=" x 76, "\n";
872
  }
5 PointedEar 873
 
22 PointedEar 874
  ##################################
875
  ## Show top 10 cross-posted groups
876
  ##################################
877
  unless ( $skipSec{7} )
878
  {
879
    delete $crossposts{"$newsgroup_name"};    # don't include ours
880
    if ( keys %crossposts < $topcrossposts )
881
    {
882
      $count = keys %crossposts;
7 PointedEar 883
    }
22 PointedEar 884
    else
885
    {
886
      $count = $topcrossposts;
887
    }
888
    printf "%s\n",
23 PointedEar 889
      centred(
890
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22 PointedEar 891
    print "=" x 76, "\n";
892
    my $i = 0;
893
    foreach
894
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
895
    {
25 PointedEar 896
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
22 PointedEar 897
        $crossposts{$name};
898
      last if ( ++$i == $count );
899
    }
900
    print "\n", "=" x 76, "\n";
901
  }
5 PointedEar 902
 
22 PointedEar 903
  #########################
904
  ## Show agents and counts
905
  #########################
906
  unless ( $skipSec{8} )
907
  {
908
    if ( keys %agents < $topagents )
909
    {
910
      $count = keys %agents;
911
    }
912
    else
913
    {
914
      $count = $topagents;
915
    }
916
    printf "%s\n",
917
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
918
      76 );
919
    print "=" x 76, "\n";
920
    my $i = 0;
921
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
922
    {
25 PointedEar 923
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
22 PointedEar 924
        $agents{$agent};
925
      last if ( ++$i == $count );
926
    }
927
    print "\n", "=" x 76, "\n";
928
  }
5 PointedEar 929
 
22 PointedEar 930
  #######################
931
  ## Show distinct agents
932
  #######################
933
  unless ( $skipSec{9} )
7 PointedEar 934
  {
22 PointedEar 935
    if ( keys %distinct_agent < $topagents )
936
    {
937
      $count = keys %distinct_agent;
7 PointedEar 938
    }
22 PointedEar 939
    else
940
    {
941
      $count = $topagents;
942
    }
943
    printf "%s\n",
944
      centred(
945
      __x( "Top {count} user agents by number of posts", count => $topagents ),
23 PointedEar 946
      76
947
      );
22 PointedEar 948
    print "=" x 76, "\n";
949
    my $i = 0;
950
    foreach my $agent (
951
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
952
      keys %distinct_agent
953
      )
954
    {
25 PointedEar 955
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
22 PointedEar 956
        $distinct_agent{$agent},
957
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
958
      last if ( ++$i == $count );
959
    }
960
    print "\n", "=" x 76, "\n";
7 PointedEar 961
  }
5 PointedEar 962
 
22 PointedEar 963
  ############################
964
  ## Show timezones and counts
965
  ############################
966
  unless ( $skipSec{10} )
7 PointedEar 967
  {
22 PointedEar 968
    if ( keys %tz < $toptz )
7 PointedEar 969
    {
22 PointedEar 970
      $count = keys %tz;
7 PointedEar 971
    }
22 PointedEar 972
    else
7 PointedEar 973
    {
22 PointedEar 974
      $count = $toptz;
7 PointedEar 975
    }
23 PointedEar 976
    printf "%s\n",
977
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22 PointedEar 978
    print "=" x 76, "\n";
979
    my $i = 0;
980
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
981
    {
25 PointedEar 982
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
22 PointedEar 983
      last if ( ++$i == $count );
984
    }
985
    print "\n", "=" x 76, "\n";
7 PointedEar 986
  }
5 PointedEar 987
}
988
 
22 PointedEar 989
## helper subs
990
 
13 PointedEar 991
###############################
992
## Right pad a string with '.'s
993
###############################
7 PointedEar 994
sub rpad
995
{
13 PointedEar 996
  ## Get text to pad, length to pad, pad chr
7 PointedEar 997
  my ( $text, $pad_len, $pad_chr ) = @_;
11 PointedEar 998
 
13 PointedEar 999
  ## DEBUG
22 PointedEar 1000
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
11 PointedEar 1001
 
7 PointedEar 1002
  if ( length($text) > $pad_len )
1003
  {
1004
    $text = substr( $text, 0, $pad_len );
1005
  }
1006
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1007
  return $padded;
5 PointedEar 1008
}
1009
 
13 PointedEar 1010
##################
1011
## Centre a string
1012
##################
7 PointedEar 1013
sub centred
1014
{
1015
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1016
  my $pad_len = ( $width - length($text) ) / 2;    #/
1017
  my $centred = " " x $pad_len . $text;
1018
  return $centred;
5 PointedEar 1019
}
1020
 
13 PointedEar 1021
###########################
1022
## Put commas into a number
1023
###########################
7 PointedEar 1024
sub commify
1025
{
13 PointedEar 1026
  local $_ = shift;
23 PointedEar 1027
  my $number = $_;
1028
  $_ = int;                                        # Chop non-integer part
22 PointedEar 1029
  1 while
23 PointedEar 1030
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1031
  my $int_part  = $_;
1032
  my $real_part = '';
1033
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1034
  {
1035
    $real_part = $1;
1036
  }
1037
  return $int_part . $real_part;
5 PointedEar 1038
}
1039
 
13 PointedEar 1040
################################################################
1041
## Returns a string with leading and trailing whitespace removed
1042
################################################################
7 PointedEar 1043
sub clean
1044
{
1045
  my $dirty = shift;
1046
  my $clean = $dirty;
14 PointedEar 1047
  $clean =~ s/^\s+|\s+$//g;
5 PointedEar 1048
 
7 PointedEar 1049
  return $clean;
5 PointedEar 1050
}
1051
 
7 PointedEar 1052
sub usage
1053
{
23 PointedEar 1054
  println( __ "usage: newsstat.pl NEWS.GROUP" );
7 PointedEar 1055
  exit 1;
5 PointedEar 1056
}
1057
 
22 PointedEar 1058
sub dmsg
7 PointedEar 1059
{
22 PointedEar 1060
  print STDERR @_, "\n";
1061
}
1062
 
1063
sub dmsg2
1064
{
1065
  my ( $level, @msg ) = @_;
1066
  print STDERR @msg, "\n" if $level >= DEBUG;
1067
}
23 PointedEar 1068
 
1069
sub println
1070
{
1071
  print @_, "\n";
1072
}