Subversion Repositories LCARS

Rev

Rev 22 | Rev 24 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 PointedEar 1
#!/usr/bin/env perl
5 PointedEar 2
use strict;
6 PointedEar 3
use warnings;
23 PointedEar 4
require 5.004;
22 PointedEar 5
 
6
#use diagnostics;
6 PointedEar 7
use utf8;
8
use Encode;
9
 
22 PointedEar 10
use constant DEBUG => 0;
11
 
13 PointedEar 12
## Print out all text to STDOUT UTF-8 encoded
13
binmode STDOUT, ':encoding(UTF-8)';
22 PointedEar 14
binmode STDERR, ':encoding(UTF-8)';
5 PointedEar 15
 
23 PointedEar 16
## L10n
17
use locale ':not_characters';
18
 
19
# setlocale( LC_MESSAGES, '' );
20
require Number::Format;
21
 
22
## i18n
23
## FIXME: Automatically include resolved '.' in @INC
22 PointedEar 24
# print join "\n", @INC;
5 PointedEar 25
 
22 PointedEar 26
use Locale::TextDomain ('de.pointedears.newsstat');
23 PointedEar 27
use POSIX              ('locale_h');
22 PointedEar 28
use Locale::Messages qw (bind_textdomain_filter
23 PointedEar 29
  bind_textdomain_codeset
30
  turn_utf_8_on);
31
 
32
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
22 PointedEar 33
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
23 PointedEar 34
 
22 PointedEar 35
require Mail::Message;
36
require DateTime;
37
require DateTime::Format::Mail;
23 PointedEar 38
 
39
# See comments in previous example
40
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
41
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
42
  'decimal_point' };
43
 
44
# Apply defaults if values are missing
45
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
46
$thousands_sep = ' ' unless $thousands_sep;
47
 
48
# grouping and mon_grouping are packed lists
49
# of small integers (characters) telling the
50
# grouping (thousand_seps and mon_thousand_seps
51
# being the group dividers) of numbers and
52
# monetary quantities.  The integers' meanings:
53
# 255 means no more grouping, 0 means repeat
54
# the previous grouping, 1-254 means use that
55
# as the current grouping.  Grouping goes from
56
# right to left (low to high digits).  In the
57
# below we cheat slightly by never using anything
58
# else than the first grouping (whatever that is).
59
my @grouping;
60
if ($grouping)
61
{
62
  @grouping = unpack( "C*", $grouping );
63
}
64
else
65
{
66
  @grouping = (3);
67
}
68
 
69
## FIXME: Why don't the defaults work already?
70
my $formatter = new Number::Format(
71
  -decimal_point => $decimal_point,
72
  -thousands_sep => $thousands_sep,
73
 
74
  # -grouping      => $grouping[0]
75
);
76
 
5 PointedEar 77
###################### USER CONFIGURATIONS ############################
78
 
13 PointedEar 79
## The name of the group to do stats for
5 PointedEar 80
my $newsgroup_name = $ARGV[0];
22 PointedEar 81
$newsgroup_name or usage();
5 PointedEar 82
 
13 PointedEar 83
## Check for removal flags
5 PointedEar 84
my $ix;
85
my $j;
86
my %skipSec;
87
my @skiplist;
88
my $args = @ARGV;
7 PointedEar 89
for ( $ix = 1 ; $ix < $args ; $ix++ )
90
{
91
  $j = $ix + 1;
92
  if ( $ARGV[$ix] eq "-x" )
93
  {
94
    @skiplist = split( ",", $ARGV[$j] );
95
  }
96
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
97
  {
98
    @skiplist = split( ",", $1 );
99
  }
5 PointedEar 100
}
7 PointedEar 101
foreach (@skiplist)
102
{
5 PointedEar 103
  $skipSec{$_} = 1;
104
}
105
 
13 PointedEar 106
## Leafnode users will want /var/spool/news for this variable.
5 PointedEar 107
my $news = "/var/spool/news/";
108
 
22 PointedEar 109
## Number of top or bottom posters to show
110
my $topposters = 20;
5 PointedEar 111
 
13 PointedEar 112
## Number of threads we want to know about
5 PointedEar 113
my $topthreads = 20;
114
 
13 PointedEar 115
## Number of cross-posted threads to show
5 PointedEar 116
my $topcrossposts = 10;
117
 
22 PointedEar 118
## Number of agents we list
119
my $topagents = 10;
120
 
13 PointedEar 121
## Number of time zones to show
5 PointedEar 122
my $toptz = 10;
123
 
124
###################### DATA STRUCTURES ######################
125
my $group = $newsgroup_name;
126
$group =~ s!\.!/!g;
22 PointedEar 127
my %data;    # name, count, agent, total, orig, quoted
128
my $totsize = 0;    # holds total sizes of all files
129
my %crossposts;     # group, count
130
my %threads;        # subject, count
131
my $replies   = 0;  # total no. of replies
132
my $origposts = 0;  # total no. of original posts
133
my %tz;             # timezones by count
134
my $earliest;       # earliest article we have found
135
my $latest;         # latest article we have found
136
my $totheader = 0;  # total size of header material
137
my $totbody   = 0;  # total size of body material
138
my $totsig    = 0;  # total size of sig material
139
my $totquoted = 0;  # total size of quoted material
140
my $totorig   = 0;  # total size of original material
141
my $totalposts;     # total no. of posts considered
5 PointedEar 142
my %distinct_agent;
13 PointedEar 143
 
144
## Used to hold counts of User Agents used
145
my %agents = (
146
  "Compuserver"               => 0,
147
  "Foorum"                    => 0,
7 PointedEar 148
  "Forte Agent"               => 0,
149
  "Forte Free Agent"          => 0,
13 PointedEar 150
  "Gnus"                      => 0,
151
  "KNode"                     => 0,
152
  "MacSOUP"                   => 0,
153
  "MT-NewsWatcher"            => 0,
7 PointedEar 154
  "MicroPlanet Gravity"       => 0,
155
  "Microsoft Outlook Express" => 0,
13 PointedEar 156
  "Microsoft Windows Mail"    => 0,
157
  "Mozilla"                   => 0,
158
  "News Rover"                => 0,
159
  "NN"                        => 0,
160
  "Pan"                       => 0,
161
  "rn"                        => 0,
7 PointedEar 162
  "slrn"                      => 0,
13 PointedEar 163
  "Sylpheed"                  => 0,
7 PointedEar 164
  "tin"                       => 0,
13 PointedEar 165
  "VSoup"                     => 0,
7 PointedEar 166
  "WebTV"                     => 0,
22 PointedEar 167
  "Xnews"                     => 0,
13 PointedEar 168
);
5 PointedEar 169
 
22 PointedEar 170
my $datetime_parser = DateTime::Format::Mail->new();
171
$datetime_parser->loose();
5 PointedEar 172
 
22 PointedEar 173
my $today = DateTime->today( time_zone => 'UTC' );
174
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
175
my $start      = int $prev_month->strftime('%s');
176
my $numdays    = int DateTime->last_day_of_month(
177
  year      => $prev_month->year(),
178
  month     => $prev_month->month(),
179
  time_zone => $prev_month->time_zone(),
180
)->day();
181
my $end = int $today->clone()->set_day(1)->strftime('%s');
182
 
183
dmsg( $start, " to ", $end ) if DEBUG;
184
 
185
chdir("$news$group")
186
  or die __x(
187
  "Can't cd to {newsgroup}: {error}\n",
188
  newsgroup => "$news$group",
189
  error     => $!
190
  );
191
opendir( DIR, "." )
192
  or die __x(
193
  "Can't open {newsgroup}: {error}\n",
194
  newsgroup => "$news$group",
195
  error     => $!
196
  );
197
 
198
while ( defined( my $filename = readdir(DIR) ) )
7 PointedEar 199
{
22 PointedEar 200
  next unless -f $filename;    # only want real files
201
  next if ( $filename eq ".overview" );    # real articles only
202
 
203
  get_article($filename);                  # read in the article
5 PointedEar 204
}
22 PointedEar 205
closedir(DIR);                             # finished with the directory
7 PointedEar 206
 
22 PointedEar 207
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
208
 
13 PointedEar 209
## Post-processing
22 PointedEar 210
count_agents();                            # count agents, collapsing versions
211
fix_percent();
5 PointedEar 212
 
22 PointedEar 213
write_data();
214
display_results();
5 PointedEar 215
 
13 PointedEar 216
########################################
22 PointedEar 217
## Get current article's header and body
13 PointedEar 218
########################################
22 PointedEar 219
sub get_article
7 PointedEar 220
{
22 PointedEar 221
  my $filename = shift;
5 PointedEar 222
 
22 PointedEar 223
  open( my $FILE, '<', $filename )
224
    or
225
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
226
  my $msg       = Mail::Message->read($FILE);
227
  my $timestamp = $msg->timestamp();
228
  my $date      = $msg->study('Date');
5 PointedEar 229
 
22 PointedEar 230
  ## Disregard article if timestamp is not in range
231
  dmsg($timestamp) if DEBUG;
232
  if ( $timestamp < $start or $timestamp >= $end )
7 PointedEar 233
  {
22 PointedEar 234
    dmsg("Posting on $date ignored.") if DEBUG;
235
    return;
7 PointedEar 236
  }
5 PointedEar 237
 
22 PointedEar 238
  $totalposts++;    # bump count of articles considered
5 PointedEar 239
 
22 PointedEar 240
  ## DEBUG
241
  dmsg($date) if DEBUG;
13 PointedEar 242
 
22 PointedEar 243
  ## get stats about the file itself
244
  my $filesize = -s $filename;    # get total size of file
245
  $totsize += $filesize;          # bump total sizes of all files
5 PointedEar 246
 
22 PointedEar 247
  if ( ( not defined $earliest ) or $timestamp < $earliest )
7 PointedEar 248
  {
22 PointedEar 249
    $earliest = $timestamp;
7 PointedEar 250
  }
22 PointedEar 251
  elsif ( ( not defined $latest ) or $timestamp > $latest )
7 PointedEar 252
  {
22 PointedEar 253
    $latest = $timestamp;
7 PointedEar 254
  }
13 PointedEar 255
 
22 PointedEar 256
  #print "timestamp: $timestamp\n";
5 PointedEar 257
 
22 PointedEar 258
  ## count header size
259
  $totheader += $msg->head()->size();
5 PointedEar 260
 
22 PointedEar 261
  ## get the poster's name (MIME-decoded, in UTF-8)
262
  my $poster = $msg->study('From');
263
  if ( defined $poster )
7 PointedEar 264
  {
22 PointedEar 265
    ## Convert old to new format
266
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
5 PointedEar 267
 
22 PointedEar 268
    ## Collapse whitespace
269
    $poster =~ s/\s+/ /g;
5 PointedEar 270
 
22 PointedEar 271
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
272
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5 PointedEar 273
 
22 PointedEar 274
    ## DEBUG
275
    dmsg($poster) if DEBUG;
7 PointedEar 276
 
22 PointedEar 277
    ## seen this one before?
278
    if ( !defined( $data{$poster} ) )
279
    {
280
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
281
      $data{$poster}{'orig'}   = 0;
282
      $data{$poster}{'quoted'} = 0;
283
    }
284
    $data{$poster}{'count'}++;                    # bump count for this poster
285
    $data{$poster}{'size'} += $filesize;          # total size of file
5 PointedEar 286
 
22 PointedEar 287
    ## The User-Agent and/or X-Newsreader fields
288
    ## for User-Agent by poster
289
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
290
    if ( defined $ua )
7 PointedEar 291
    {
22 PointedEar 292
      $data{$poster}{'agent'} = $ua;
293
 
294
      ## DEBUG
295
      dmsg($ua) if DEBUG;
7 PointedEar 296
    }
5 PointedEar 297
 
22 PointedEar 298
    ## The User Agent for User-Agent by number of posts
299
    get_agent($msg);
5 PointedEar 300
 
22 PointedEar 301
    ## Get all cross-posted newsgroups
302
    for ( split( /,/, $msg->study('Newsgroups') ) )
303
    {
304
      $crossposts{$_}++;    # bump count for each
305
    }
5 PointedEar 306
 
22 PointedEar 307
    ## Get threads
308
    my $thread = $msg->study('Subject');
309
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
310
    $thread =~ s/\s+/ /g;       # collapse whitespace
311
    $threads{$thread}{'count'}++;    # bump count of this subject
312
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
5 PointedEar 313
 
22 PointedEar 314
    ## Is this an original post or a reply?
315
    if ( defined $msg->study('References') )
7 PointedEar 316
    {
22 PointedEar 317
      $replies++;
7 PointedEar 318
    }
22 PointedEar 319
    else
7 PointedEar 320
    {
22 PointedEar 321
      $origposts++;
7 PointedEar 322
    }
22 PointedEar 323
 
324
    ## Get the time zone
325
    my $datetime = $datetime_parser->parse_datetime($date);
326
    my $tz       = $datetime->strftime('%z');
327
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
328
    $tz{$tz}++;
329
 
330
    ## DEBUG
331
    dmsg($tz) if DEBUG;
332
 
333
#### Now analyse the body text ####
334
    my $body = $msg->body();
335
 
336
    my $insig = 0;
337
    my @body  = $body->lines;
338
    for (@body)
7 PointedEar 339
    {
22 PointedEar 340
      $totbody += length($_);    # bump total body size
341
      next if (m{^$>}o);         # don't count blank lines in body
342
      if ( $insig == 1 )
343
      {
344
 
345
        # bump total sig size
346
        $totsig += length($_);
347
      }
348
      ## are we in a quote line?
349
      ## Bill Unruh uses ] quotes, and another poster uses ::
350
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
351
      {
352
        ## bump count of quoted chrs
353
        $data{$poster}{'quoted'} += length($_);
354
        $totquoted += length($_);
355
      }
356
      elsif (/^-- $/)
357
      {
358
        $insig = 1;
359
      }
360
      else
361
      {
362
        ## We must be processing an original line
363
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
364
        $totorig += length($_);
365
      }
7 PointedEar 366
    }
22 PointedEar 367
 
368
    # end for (@body)
369
  }
370
 
371
  close($FILE);
372
}
373
 
374
sub get_agent
375
{
376
  my $msg = shift;
377
 
378
  my $ua =
379
       $msg->study('User-Agent')
380
    or $msg->study('X-Newsreader')
381
    or $msg->study('X-Mailer');
382
  if ( not defined $ua )
383
  {
384
    my $org = $msg->study('Organization');
385
    if ( defined $org
386
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
7 PointedEar 387
    {
22 PointedEar 388
      $ua = $org;
7 PointedEar 389
    }
22 PointedEar 390
    elsif ( $msg->study('Message-ID') =~ /pine/i )
7 PointedEar 391
    {
22 PointedEar 392
      $ua = "Pine";
393
    }
7 PointedEar 394
  }
5 PointedEar 395
 
22 PointedEar 396
  ## Hopefully found UA, else set to unknown
397
  if ( not defined $ua )
398
  {
399
    $ua = __ "unknown";
400
  }
5 PointedEar 401
 
22 PointedEar 402
  $ua = clean($ua);
403
 
404
  my $raw   = $ua;
405
  my $agent = $raw;
406
 
407
  ## strip http
408
  if ( $raw =~ /.*http.*/ )
7 PointedEar 409
  {
22 PointedEar 410
    $raw =~ s!posted via!!i;
411
    $raw =~ s!http://!!g;
412
    $raw =~ s!/!!g;
413
    $raw =~ s! !!g;
414
  }
5 PointedEar 415
 
22 PointedEar 416
  ## Fix Outlook from Mac
417
  if ( $raw =~ /^microsoft/i )
418
  {
419
    $raw =~ s/-/ /g;
420
  }
5 PointedEar 421
 
22 PointedEar 422
  ## Pick out the popular agents
423
  if ( $raw =~ /(outlook express)/i
424
    || $raw =~ /(windows mail)/i
425
    || $raw =~ /(microplanet gravity)/i
426
    || $raw =~ /(news rover)/i
427
    || $raw =~ /(forte agent)/i
428
    || $raw =~ /(forte free agent)/i )
429
  {
430
    $agent = $1;
431
  }
432
  elsif (
433
    $raw =~ /^(
5 PointedEar 434
        pan
435
       |sylpheed
436
       |slrn
437
       |mozilla
438
       |knode
439
       |tin
440
       |hamster
441
       |xrn
442
       |xnews
443
       |aol
444
       |gnus
445
       |krn
446
       |macsoup
447
       |messenger
448
       |openxp
449
       |pine
450
       |thoth
451
       |turnpike
452
       |winvn
453
       |vsoup
454
       |google
455
       |supernews
456
       |nn
457
       |rn
458
       |007
459
       |webtv
460
       |compuserve
7 PointedEar 461
       )/ix
22 PointedEar 462
    )
463
  {
464
    $agent = $1;
465
  }
466
  else
467
  {
468
    ## Clean up unknown agents
469
    if ( $raw =~ m!^(.*?)/! )
7 PointedEar 470
    {
471
      $agent = $1;
472
    }
22 PointedEar 473
    elsif ( $raw =~ /^(\w*)\d.*/ )
7 PointedEar 474
    {
22 PointedEar 475
      $agent = $1;
476
    }
477
  }
478
 
479
  $distinct_agent{$agent}++;
480
  return $agent;
481
}
482
## get_agent
483
 
484
#########################################
485
## Count the User-Agents used, collapsing
486
## different versions into one per agent.
487
#########################################
488
sub count_agents
489
{
490
POSTER:
491
  foreach my $poster ( keys %data )
492
  {
493
    foreach my $agent_name ( keys %distinct_agent )
494
    {    # check against known ones
495
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7 PointedEar 496
      {
22 PointedEar 497
        $agents{$agent_name}++;
498
        next POSTER;
7 PointedEar 499
      }
500
    }
22 PointedEar 501
    $agents{ $data{$poster}{'agent'} }++;
502
  }
503
}    # count_agents
7 PointedEar 504
 
22 PointedEar 505
#############################################
506
## Set orig/total percentages for all posters
507
#############################################
508
sub fix_percent
509
{
510
  foreach my $poster ( keys %data )
511
  {
512
    my $percent = 100;
513
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
514
    {
515
      $percent =
516
        $data{$poster}{'orig'} * 100 /
517
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
518
    }
519
    elsif ( $data{$poster}{'orig'} == 0 )
520
    {
521
      $percent = 0;
522
    }
523
    $data{$poster}{'percent'} = $percent;
5 PointedEar 524
  }
22 PointedEar 525
}
526
## fix_percent
5 PointedEar 527
 
22 PointedEar 528
##################################
529
## Write data structures to a file
530
##################################
531
sub write_data
532
{
533
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
534
    or die __x( "Can't create XDATA: {error}\n", error => $! );
535
  print $OUTF "Data collected from $newsgroup_name\n\n";
536
  print $OUTF
537
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
538
  foreach my $name ( keys %data )
7 PointedEar 539
  {
22 PointedEar 540
    print $OUTF
541
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7 PointedEar 542
  }
22 PointedEar 543
  print $OUTF
544
"============================================================================\n";
545
  print $OUTF "Thread subjects\n";
546
  print $OUTF
547
"----------------------------------------------------------------------------\n";
548
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
549
  {
550
    print $OUTF
551
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
552
  }
553
  print $OUTF
554
"============================================================================\n";
555
  print $OUTF "Cross-posts\n";
556
  print $OUTF
557
"----------------------------------------------------------------------------\n";
558
  foreach my $name ( sort keys %crossposts )
559
  {
560
    print $OUTF "$name : $crossposts{$name}\n";
561
  }
562
  print $OUTF
563
"============================================================================\n";
564
  print $OUTF "User agents\n";
565
  print $OUTF
566
"----------------------------------------------------------------------------\n";
567
  foreach my $name ( sort keys %agents )
568
  {
569
    print $OUTF "$name : $agents{$name}\n";
570
  }
571
  print $OUTF
572
"============================================================================\n";
573
  print $OUTF "Time zones\n";
574
  print $OUTF
575
"----------------------------------------------------------------------------\n";
576
  foreach my $name ( sort keys %tz )
577
  {
578
    print $OUTF "$name : $tz{$name}\n";
579
  }
580
  close $OUTF;
581
}    # write_data
5 PointedEar 582
 
22 PointedEar 583
sub display_results
584
{
585
  #################### DISPLAY RESULTS #####################
23 PointedEar 586
  println( "=" x 76 );
22 PointedEar 587
  printf "%s\n",
588
    centred(
589
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
590
    76 );
23 PointedEar 591
  println( "=" x 76 );
22 PointedEar 592
  printf "%s\n",
593
    centred(
23 PointedEar 594
    __(
595
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
596
    ),
22 PointedEar 597
    76
598
    );
599
  print "\n\n";
23 PointedEar 600
  printf __"Total posts considered: %s over %d days\n",
601
    $formatter->format_number($totalposts),
602
    $formatter->format_number($numdays);
603
  my $time_locale       = setlocale(LC_TIME);
22 PointedEar 604
  my $earliest_datetime = DateTime->from_epoch(
23 PointedEar 605
    epoch     => $earliest,
606
    locale    => $time_locale,
22 PointedEar 607
    time_zone => 'UTC',
608
  );
609
  my $latest_datetime = DateTime->from_epoch(
23 PointedEar 610
    epoch     => $latest,
611
    locale    => $time_locale,
22 PointedEar 612
    time_zone => 'UTC',
613
  );
614
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
23 PointedEar 615
  printf __"Earliest article: %s\n",
616
    $earliest_datetime->strftime($datetime_format);
617
  printf __"Latest article:   %s\n",
618
    $latest_datetime->strftime($datetime_format);
619
  printf __"Original articles: %s; replies: %s\n",
620
    $formatter->format_number($origposts),
621
    $formatter->format_number($replies);
622
  printf __"Total size of posts: %s bytes (%s)" . "\n",
623
    $formatter->format_number($totsize),
624
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
625
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
626
    $formatter->format_number( int( $totalposts / $numdays ) ),
627
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
628
    $formatter->format_number( int( $totsize / $totalposts ) );
629
 
22 PointedEar 630
  my $count = keys %data;
23 PointedEar 631
  printf __"Total headers: %s; bodies: %s\n",
632
    $formatter->format_bytes(
633
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
634
    ),
635
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 636
  printf __
23 PointedEar 637
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
638
    $formatter->format_bytes(
639
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
640
    ),
641
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
642
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
643
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
644
  printf __"Total number of posters: %s, average %s per poster\n",
645
    $formatter->format_number($count),
646
    $formatter->format_bytes( $totsize / $count,
647
    ( 'precision' => 1, 'mode' => 'iec' ) );
22 PointedEar 648
  $count = keys %threads;
23 PointedEar 649
  printf __"Total number of threads: %s, average %s per thread\n",
650
    $formatter->format_number($count),
651
    $formatter->format_bytes( $totsize / $count,
652
    ( 'precision' => 1, 'mode' => 'iec' ) );
653
  printf __"Total number of user agents: %d\n",
654
    $formatter->format_number( scalar keys %agents );
655
  print "\n", "=" x 76, "\n"  ;
22 PointedEar 656
  ########################################
657
  ## Show posters by article count  Sec 1;
658
  ########################################
659
  unless ( $skipSec{1} )
7 PointedEar 660
  {
22 PointedEar 661
    if ( keys %data < $topposters )
662
    {
663
      $count = keys %data;
664
    }
665
    else
666
    {
667
      $count = $topposters;
668
    }
669
    printf "%s\n",
670
      centred(
23 PointedEar 671
      __x( "Top {count} posters by number of articles", count => $topposters ),
672
      76
673
      );
22 PointedEar 674
    print "=" x 76, "\n";
675
    my $i = 0;
676
    foreach
677
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
678
    {
679
      my $name = substr( $poster, 0, 65 );
680
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
681
        $data{$poster}{count};
682
      last if ( ++$i == $count );
683
    }
684
    print "\n", "=" x 76, "\n";
7 PointedEar 685
  }
22 PointedEar 686
 
687
  ######################################
688
  ## Show posters by size in KiB  Sec 2;
689
  ######################################
690
  unless ( $skipSec{2} )
7 PointedEar 691
  {
22 PointedEar 692
    if ( keys %data < $topposters )
693
    {
694
      $count = keys %data;
695
    }
696
    else
697
    {
698
      $count = $topposters;
699
    }
700
    printf "%s\n",
701
      centred(
702
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
23 PointedEar 703
      76
704
      );
22 PointedEar 705
    print "=" x 76, "\n";
706
    my $i = 0;
707
    foreach
708
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
709
    {
710
      my $name = substr( $poster, 0, 62 );
711
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
712
        $data{$poster}{size} / 1024;    #/
713
      last if ( ++$i == $count );
714
    }
715
    print "\n", "=" x 76, "\n";
7 PointedEar 716
  }
5 PointedEar 717
 
22 PointedEar 718
  #####################################
719
  ## Show top posters for original text
720
  #####################################
721
  unless ( $skipSec{3} )
7 PointedEar 722
  {
22 PointedEar 723
    if ( keys %data < $topposters )
724
    {
725
      $count = keys %data;
726
    }
727
    else
728
    {
729
      $count = $topposters;
730
    }
731
    printf "%s\n",
732
      centred(
733
      __x(
734
        "Top {count} responders by original text (> 5 posts)",
735
        count => $topposters
736
      ),
737
      76
738
      );
739
    print "=" x 76, "\n";
740
    my $i = 0;
741
    foreach my $poster (
742
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
743
      keys %data
744
      )
745
    {
746
      next if $data{$poster}{quoted} == 0;
747
      next if $data{$poster}{count} < 5;
748
      my $name = substr( $poster, 0, 63 );
749
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
750
        $data{$poster}{percent};
751
      last if ( ++$i == $count );
752
    }
753
    print "\n", "=" x 76, "\n";
7 PointedEar 754
  }
5 PointedEar 755
 
22 PointedEar 756
  ########################################
757
  ## Show bottom posters for original text
758
  ########################################
759
  unless ( $skipSec{4} )
7 PointedEar 760
  {
22 PointedEar 761
    if ( keys %data < $topposters )
7 PointedEar 762
    {
22 PointedEar 763
      $count = keys %data;
764
    }
765
    else
766
    {
767
      $count = $topposters;
768
    }
769
    printf "%s\n",
770
      centred(
771
      __x(
772
        "Bottom {count} responders by original text  (> 5 posts)",
773
        count => $topposters
774
      ),
775
      76
776
      );
777
    print "=" x 76, "\n";
778
    my $i = 0;
779
    foreach my $poster (
780
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
781
      keys %data
782
      )
783
    {
784
      next if $data{$poster}{quoted} == 0;
785
      next if $data{$poster}{count} < 5;
786
      my $name = substr( $poster, 0, 63 );
787
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
788
        $data{$poster}{percent};
789
      last if ( ++$i == $count );
790
    }
791
    print "\n", "=" x 76, "\n";
792
  }
5 PointedEar 793
 
22 PointedEar 794
  #####################################
795
  ## Show threads by number of articles
796
  #####################################
797
  unless ( $skipSec{5} )
798
  {
799
    if ( keys %threads < $topthreads )
800
    {
801
      $count = keys %threads;
7 PointedEar 802
    }
22 PointedEar 803
    else
804
    {
805
      $count = $topthreads;
7 PointedEar 806
    }
22 PointedEar 807
    printf "%s\n",
23 PointedEar 808
      centred(
809
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
22 PointedEar 810
      76 );
811
    print "=" x 76, "\n";
812
    my $i = 0;
813
    foreach my $thread (
814
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
815
      keys %threads
816
      )
7 PointedEar 817
    {
22 PointedEar 818
      my $name = substr( $thread, 0, 65 );
819
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
820
        $threads{$thread}{'count'};
821
      last if ( ++$i == $count );
7 PointedEar 822
    }
22 PointedEar 823
    print "\n", "=" x 76, "\n";
824
  }
825
 
826
  ##############################
827
  ## Show threads by size in KiB
828
  ##############################
829
  unless ( $skipSec{6} )
830
  {
831
    if ( keys %threads < $topthreads )
832
    {
833
      $count = keys %threads;
834
    }
7 PointedEar 835
    else
836
    {
22 PointedEar 837
      $count = $topthreads;
838
    }
839
    printf "%s\n",
23 PointedEar 840
      centred(
841
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
22 PointedEar 842
    print "=" x 76, "\n";
843
    my $i = 0;
844
    foreach my $thread (
845
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
846
      keys %threads
847
      )
848
    {
849
      my $name = substr( $thread, 0, 65 );
850
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
851
        $threads{$thread}{'size'} / 1024;    #/
852
      last if ( ++$i == $count );
853
    }
854
    print "\n", "=" x 76, "\n";
855
  }
5 PointedEar 856
 
22 PointedEar 857
  ##################################
858
  ## Show top 10 cross-posted groups
859
  ##################################
860
  unless ( $skipSec{7} )
861
  {
862
    delete $crossposts{"$newsgroup_name"};    # don't include ours
863
    if ( keys %crossposts < $topcrossposts )
864
    {
865
      $count = keys %crossposts;
7 PointedEar 866
    }
22 PointedEar 867
    else
868
    {
869
      $count = $topcrossposts;
870
    }
871
    printf "%s\n",
23 PointedEar 872
      centred(
873
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
22 PointedEar 874
    print "=" x 76, "\n";
875
    my $i = 0;
876
    foreach
877
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
878
    {
879
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
880
        $crossposts{$name};
881
      last if ( ++$i == $count );
882
    }
883
    print "\n", "=" x 76, "\n";
884
  }
5 PointedEar 885
 
22 PointedEar 886
  #########################
887
  ## Show agents and counts
888
  #########################
889
  unless ( $skipSec{8} )
890
  {
891
    if ( keys %agents < $topagents )
892
    {
893
      $count = keys %agents;
894
    }
895
    else
896
    {
897
      $count = $topagents;
898
    }
899
    printf "%s\n",
900
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
901
      76 );
902
    print "=" x 76, "\n";
903
    my $i = 0;
904
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
905
    {
906
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
907
        $agents{$agent};
908
      last if ( ++$i == $count );
909
    }
910
    print "\n", "=" x 76, "\n";
911
  }
5 PointedEar 912
 
22 PointedEar 913
  #######################
914
  ## Show distinct agents
915
  #######################
916
  unless ( $skipSec{9} )
7 PointedEar 917
  {
22 PointedEar 918
    if ( keys %distinct_agent < $topagents )
919
    {
920
      $count = keys %distinct_agent;
7 PointedEar 921
    }
22 PointedEar 922
    else
923
    {
924
      $count = $topagents;
925
    }
926
    printf "%s\n",
927
      centred(
928
      __x( "Top {count} user agents by number of posts", count => $topagents ),
23 PointedEar 929
      76
930
      );
22 PointedEar 931
    print "=" x 76, "\n";
932
    my $i = 0;
933
    foreach my $agent (
934
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
935
      keys %distinct_agent
936
      )
937
    {
938
      printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
939
        $distinct_agent{$agent},
940
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
941
      last if ( ++$i == $count );
942
    }
943
    print "\n", "=" x 76, "\n";
7 PointedEar 944
  }
5 PointedEar 945
 
22 PointedEar 946
  ############################
947
  ## Show timezones and counts
948
  ############################
949
  unless ( $skipSec{10} )
7 PointedEar 950
  {
22 PointedEar 951
    if ( keys %tz < $toptz )
7 PointedEar 952
    {
22 PointedEar 953
      $count = keys %tz;
7 PointedEar 954
    }
22 PointedEar 955
    else
7 PointedEar 956
    {
22 PointedEar 957
      $count = $toptz;
7 PointedEar 958
    }
23 PointedEar 959
    printf "%s\n",
960
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
22 PointedEar 961
    print "=" x 76, "\n";
962
    my $i = 0;
963
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
964
    {
965
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
966
      last if ( ++$i == $count );
967
    }
968
    print "\n", "=" x 76, "\n";
7 PointedEar 969
  }
5 PointedEar 970
}
971
 
22 PointedEar 972
## helper subs
973
 
13 PointedEar 974
###############################
975
## Right pad a string with '.'s
976
###############################
7 PointedEar 977
sub rpad
978
{
13 PointedEar 979
  ## Get text to pad, length to pad, pad chr
7 PointedEar 980
  my ( $text, $pad_len, $pad_chr ) = @_;
11 PointedEar 981
 
13 PointedEar 982
  ## DEBUG
22 PointedEar 983
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
11 PointedEar 984
 
7 PointedEar 985
  if ( length($text) > $pad_len )
986
  {
987
    $text = substr( $text, 0, $pad_len );
988
  }
989
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
990
  return $padded;
5 PointedEar 991
}
992
 
13 PointedEar 993
##################
994
## Centre a string
995
##################
7 PointedEar 996
sub centred
997
{
998
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
999
  my $pad_len = ( $width - length($text) ) / 2;    #/
1000
  my $centred = " " x $pad_len . $text;
1001
  return $centred;
5 PointedEar 1002
}
1003
 
13 PointedEar 1004
###########################
1005
## Put commas into a number
1006
###########################
7 PointedEar 1007
sub commify
1008
{
13 PointedEar 1009
  local $_ = shift;
23 PointedEar 1010
  my $number = $_;
1011
  $_ = int;                                        # Chop non-integer part
22 PointedEar 1012
  1 while
23 PointedEar 1013
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1014
  my $int_part  = $_;
1015
  my $real_part = '';
1016
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1017
  {
1018
    $real_part = $1;
1019
  }
1020
  return $int_part . $real_part;
5 PointedEar 1021
}
1022
 
13 PointedEar 1023
################################################################
1024
## Returns a string with leading and trailing whitespace removed
1025
################################################################
7 PointedEar 1026
sub clean
1027
{
1028
  my $dirty = shift;
1029
  my $clean = $dirty;
14 PointedEar 1030
  $clean =~ s/^\s+|\s+$//g;
5 PointedEar 1031
 
7 PointedEar 1032
  return $clean;
5 PointedEar 1033
}
1034
 
7 PointedEar 1035
sub usage
1036
{
23 PointedEar 1037
  println( __ "usage: newsstat.pl NEWS.GROUP" );
7 PointedEar 1038
  exit 1;
5 PointedEar 1039
}
1040
 
22 PointedEar 1041
sub dmsg
7 PointedEar 1042
{
22 PointedEar 1043
  print STDERR @_, "\n";
1044
}
1045
 
1046
sub dmsg2
1047
{
1048
  my ( $level, @msg ) = @_;
1049
  print STDERR @msg, "\n" if $level >= DEBUG;
1050
}
23 PointedEar 1051
 
1052
sub println
1053
{
1054
  print @_, "\n";
1055
}