Subversion Repositories LCARS

Rev

Rev 14 | Rev 23 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8 PointedEar 1
#!/usr/bin/env perl
5 PointedEar 2
use strict;
6 PointedEar 3
use warnings;
22 PointedEar 4
 
5
#use diagnostics;
6 PointedEar 6
use utf8;
7
use Encode;
8
 
22 PointedEar 9
use constant DEBUG => 0;
10
 
13 PointedEar 11
## Print out all text to STDOUT UTF-8 encoded
12
binmode STDOUT, ':encoding(UTF-8)';
22 PointedEar 13
binmode STDERR, ':encoding(UTF-8)';
5 PointedEar 14
 
22 PointedEar 15
# FIXME: Automatically include resolved '.' in @INC
16
# print join "\n", @INC;
5 PointedEar 17
 
22 PointedEar 18
use locale ':not_characters';
19
use Locale::TextDomain ('de.pointedears.newsstat');
20
use POSIX ('locale_h');
21
use Locale::Messages qw (bind_textdomain_filter
22
                         bind_textdomain_codeset
23
                         turn_utf_8_on);
24
#setlocale( LC_MESSAGES, '' );
25
bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
26
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
27
 
28
require Mail::Message;
29
require DateTime;
30
require DateTime::Format::Mail;
31
 
5 PointedEar 32
###################### USER CONFIGURATIONS ############################
33
 
13 PointedEar 34
## The name of the group to do stats for
5 PointedEar 35
my $newsgroup_name = $ARGV[0];
22 PointedEar 36
$newsgroup_name or usage();
5 PointedEar 37
 
13 PointedEar 38
## Check for removal flags
5 PointedEar 39
my $ix;
40
my $j;
41
my %skipSec;
42
my @skiplist;
43
my $args = @ARGV;
7 PointedEar 44
for ( $ix = 1 ; $ix < $args ; $ix++ )
45
{
46
  $j = $ix + 1;
47
  if ( $ARGV[$ix] eq "-x" )
48
  {
49
    @skiplist = split( ",", $ARGV[$j] );
50
  }
51
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
52
  {
53
    @skiplist = split( ",", $1 );
54
  }
5 PointedEar 55
}
7 PointedEar 56
foreach (@skiplist)
57
{
5 PointedEar 58
  $skipSec{$_} = 1;
59
}
60
 
13 PointedEar 61
## Leafnode users will want /var/spool/news for this variable.
5 PointedEar 62
my $news = "/var/spool/news/";
63
 
22 PointedEar 64
## Number of top or bottom posters to show
65
my $topposters = 20;
5 PointedEar 66
 
13 PointedEar 67
## Number of threads we want to know about
5 PointedEar 68
my $topthreads = 20;
69
 
13 PointedEar 70
## Number of cross-posted threads to show
5 PointedEar 71
my $topcrossposts = 10;
72
 
22 PointedEar 73
## Number of agents we list
74
my $topagents = 10;
75
 
13 PointedEar 76
## Number of time zones to show
5 PointedEar 77
my $toptz = 10;
78
 
79
###################### DATA STRUCTURES ######################
80
my $group = $newsgroup_name;
81
$group =~ s!\.!/!g;
22 PointedEar 82
my %data;    # name, count, agent, total, orig, quoted
83
my $totsize = 0;    # holds total sizes of all files
84
my %crossposts;     # group, count
85
my %threads;        # subject, count
86
my $replies   = 0;  # total no. of replies
87
my $origposts = 0;  # total no. of original posts
88
my %tz;             # timezones by count
89
my $earliest;       # earliest article we have found
90
my $latest;         # latest article we have found
91
my $totheader = 0;  # total size of header material
92
my $totbody   = 0;  # total size of body material
93
my $totsig    = 0;  # total size of sig material
94
my $totquoted = 0;  # total size of quoted material
95
my $totorig   = 0;  # total size of original material
96
my $totalposts;     # total no. of posts considered
5 PointedEar 97
my %distinct_agent;
13 PointedEar 98
 
99
## Used to hold counts of User Agents used
100
my %agents = (
101
  "Compuserver"               => 0,
102
  "Foorum"                    => 0,
7 PointedEar 103
  "Forte Agent"               => 0,
104
  "Forte Free Agent"          => 0,
13 PointedEar 105
  "Gnus"                      => 0,
106
  "KNode"                     => 0,
107
  "MacSOUP"                   => 0,
108
  "MT-NewsWatcher"            => 0,
7 PointedEar 109
  "MicroPlanet Gravity"       => 0,
110
  "Microsoft Outlook Express" => 0,
13 PointedEar 111
  "Microsoft Windows Mail"    => 0,
112
  "Mozilla"                   => 0,
113
  "News Rover"                => 0,
114
  "NN"                        => 0,
115
  "Pan"                       => 0,
116
  "rn"                        => 0,
7 PointedEar 117
  "slrn"                      => 0,
13 PointedEar 118
  "Sylpheed"                  => 0,
7 PointedEar 119
  "tin"                       => 0,
13 PointedEar 120
  "VSoup"                     => 0,
7 PointedEar 121
  "WebTV"                     => 0,
22 PointedEar 122
  "Xnews"                     => 0,
13 PointedEar 123
);
5 PointedEar 124
 
22 PointedEar 125
my $datetime_parser = DateTime::Format::Mail->new();
126
$datetime_parser->loose();
5 PointedEar 127
 
22 PointedEar 128
my $today = DateTime->today( time_zone => 'UTC' );
129
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
130
my $start      = int $prev_month->strftime('%s');
131
my $numdays    = int DateTime->last_day_of_month(
132
  year      => $prev_month->year(),
133
  month     => $prev_month->month(),
134
  time_zone => $prev_month->time_zone(),
135
)->day();
136
my $end = int $today->clone()->set_day(1)->strftime('%s');
137
 
138
dmsg( $start, " to ", $end ) if DEBUG;
139
 
140
chdir("$news$group")
141
  or die __x(
142
  "Can't cd to {newsgroup}: {error}\n",
143
  newsgroup => "$news$group",
144
  error     => $!
145
  );
146
opendir( DIR, "." )
147
  or die __x(
148
  "Can't open {newsgroup}: {error}\n",
149
  newsgroup => "$news$group",
150
  error     => $!
151
  );
152
 
153
while ( defined( my $filename = readdir(DIR) ) )
7 PointedEar 154
{
22 PointedEar 155
  next unless -f $filename;    # only want real files
156
  next if ( $filename eq ".overview" );    # real articles only
157
 
158
  get_article($filename);                  # read in the article
5 PointedEar 159
}
22 PointedEar 160
closedir(DIR);                             # finished with the directory
7 PointedEar 161
 
22 PointedEar 162
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
163
 
13 PointedEar 164
## Post-processing
22 PointedEar 165
count_agents();                            # count agents, collapsing versions
166
fix_percent();
5 PointedEar 167
 
22 PointedEar 168
write_data();
169
display_results();
5 PointedEar 170
 
13 PointedEar 171
########################################
22 PointedEar 172
## Get current article's header and body
13 PointedEar 173
########################################
22 PointedEar 174
sub get_article
7 PointedEar 175
{
22 PointedEar 176
  my $filename = shift;
5 PointedEar 177
 
22 PointedEar 178
  open( my $FILE, '<', $filename )
179
    or
180
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
181
  my $msg       = Mail::Message->read($FILE);
182
  my $timestamp = $msg->timestamp();
183
  my $date      = $msg->study('Date');
5 PointedEar 184
 
22 PointedEar 185
  ## Disregard article if timestamp is not in range
186
  dmsg($timestamp) if DEBUG;
187
  if ( $timestamp < $start or $timestamp >= $end )
7 PointedEar 188
  {
22 PointedEar 189
    dmsg("Posting on $date ignored.") if DEBUG;
190
    return;
7 PointedEar 191
  }
5 PointedEar 192
 
22 PointedEar 193
  $totalposts++;    # bump count of articles considered
5 PointedEar 194
 
22 PointedEar 195
  ## DEBUG
196
  dmsg($date) if DEBUG;
13 PointedEar 197
 
22 PointedEar 198
  ## get stats about the file itself
199
  my $filesize = -s $filename;    # get total size of file
200
  $totsize += $filesize;          # bump total sizes of all files
5 PointedEar 201
 
22 PointedEar 202
  if ( ( not defined $earliest ) or $timestamp < $earliest )
7 PointedEar 203
  {
22 PointedEar 204
    $earliest = $timestamp;
7 PointedEar 205
  }
22 PointedEar 206
  elsif ( ( not defined $latest ) or $timestamp > $latest )
7 PointedEar 207
  {
22 PointedEar 208
    $latest = $timestamp;
7 PointedEar 209
  }
13 PointedEar 210
 
22 PointedEar 211
  #print "timestamp: $timestamp\n";
5 PointedEar 212
 
22 PointedEar 213
  ## count header size
214
  $totheader += $msg->head()->size();
5 PointedEar 215
 
22 PointedEar 216
  ## get the poster's name (MIME-decoded, in UTF-8)
217
  my $poster = $msg->study('From');
218
  if ( defined $poster )
7 PointedEar 219
  {
22 PointedEar 220
    ## Convert old to new format
221
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
5 PointedEar 222
 
22 PointedEar 223
    ## Collapse whitespace
224
    $poster =~ s/\s+/ /g;
5 PointedEar 225
 
22 PointedEar 226
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
227
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
5 PointedEar 228
 
22 PointedEar 229
    ## DEBUG
230
    dmsg($poster) if DEBUG;
7 PointedEar 231
 
22 PointedEar 232
    ## seen this one before?
233
    if ( !defined( $data{$poster} ) )
234
    {
235
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
236
      $data{$poster}{'orig'}   = 0;
237
      $data{$poster}{'quoted'} = 0;
238
    }
239
    $data{$poster}{'count'}++;                    # bump count for this poster
240
    $data{$poster}{'size'} += $filesize;          # total size of file
5 PointedEar 241
 
22 PointedEar 242
    ## The User-Agent and/or X-Newsreader fields
243
    ## for User-Agent by poster
244
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
245
    if ( defined $ua )
7 PointedEar 246
    {
22 PointedEar 247
      $data{$poster}{'agent'} = $ua;
248
 
249
      ## DEBUG
250
      dmsg($ua) if DEBUG;
7 PointedEar 251
    }
5 PointedEar 252
 
22 PointedEar 253
    ## The User Agent for User-Agent by number of posts
254
    get_agent($msg);
5 PointedEar 255
 
22 PointedEar 256
    ## Get all cross-posted newsgroups
257
    for ( split( /,/, $msg->study('Newsgroups') ) )
258
    {
259
      $crossposts{$_}++;    # bump count for each
260
    }
5 PointedEar 261
 
22 PointedEar 262
    ## Get threads
263
    my $thread = $msg->study('Subject');
264
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
265
    $thread =~ s/\s+/ /g;       # collapse whitespace
266
    $threads{$thread}{'count'}++;    # bump count of this subject
267
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
5 PointedEar 268
 
22 PointedEar 269
    ## Is this an original post or a reply?
270
    if ( defined $msg->study('References') )
7 PointedEar 271
    {
22 PointedEar 272
      $replies++;
7 PointedEar 273
    }
22 PointedEar 274
    else
7 PointedEar 275
    {
22 PointedEar 276
      $origposts++;
7 PointedEar 277
    }
22 PointedEar 278
 
279
    ## Get the time zone
280
    my $datetime = $datetime_parser->parse_datetime($date);
281
    my $tz       = $datetime->strftime('%z');
282
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
283
    $tz{$tz}++;
284
 
285
    ## DEBUG
286
    dmsg($tz) if DEBUG;
287
 
288
#### Now analyse the body text ####
289
    my $body = $msg->body();
290
 
291
    my $insig = 0;
292
    my @body  = $body->lines;
293
    for (@body)
7 PointedEar 294
    {
22 PointedEar 295
      $totbody += length($_);    # bump total body size
296
      next if (m{^$>}o);         # don't count blank lines in body
297
      if ( $insig == 1 )
298
      {
299
 
300
        # bump total sig size
301
        $totsig += length($_);
302
      }
303
      ## are we in a quote line?
304
      ## Bill Unruh uses ] quotes, and another poster uses ::
305
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
306
      {
307
        ## bump count of quoted chrs
308
        $data{$poster}{'quoted'} += length($_);
309
        $totquoted += length($_);
310
      }
311
      elsif (/^-- $/)
312
      {
313
        $insig = 1;
314
      }
315
      else
316
      {
317
        ## We must be processing an original line
318
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
319
        $totorig += length($_);
320
      }
7 PointedEar 321
    }
22 PointedEar 322
 
323
    # end for (@body)
324
  }
325
 
326
  close($FILE);
327
}
328
 
329
sub get_agent
330
{
331
  my $msg = shift;
332
 
333
  my $ua =
334
       $msg->study('User-Agent')
335
    or $msg->study('X-Newsreader')
336
    or $msg->study('X-Mailer');
337
  if ( not defined $ua )
338
  {
339
    my $org = $msg->study('Organization');
340
    if ( defined $org
341
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
7 PointedEar 342
    {
22 PointedEar 343
      $ua = $org;
7 PointedEar 344
    }
22 PointedEar 345
    elsif ( $msg->study('Message-ID') =~ /pine/i )
7 PointedEar 346
    {
22 PointedEar 347
      $ua = "Pine";
348
    }
7 PointedEar 349
  }
5 PointedEar 350
 
22 PointedEar 351
  ## Hopefully found UA, else set to unknown
352
  if ( not defined $ua )
353
  {
354
    $ua = __ "unknown";
355
  }
5 PointedEar 356
 
22 PointedEar 357
  $ua = clean($ua);
358
 
359
  my $raw   = $ua;
360
  my $agent = $raw;
361
 
362
  ## strip http
363
  if ( $raw =~ /.*http.*/ )
7 PointedEar 364
  {
22 PointedEar 365
    $raw =~ s!posted via!!i;
366
    $raw =~ s!http://!!g;
367
    $raw =~ s!/!!g;
368
    $raw =~ s! !!g;
369
  }
5 PointedEar 370
 
22 PointedEar 371
  ## Fix Outlook from Mac
372
  if ( $raw =~ /^microsoft/i )
373
  {
374
    $raw =~ s/-/ /g;
375
  }
5 PointedEar 376
 
22 PointedEar 377
  ## Pick out the popular agents
378
  if ( $raw =~ /(outlook express)/i
379
    || $raw =~ /(windows mail)/i
380
    || $raw =~ /(microplanet gravity)/i
381
    || $raw =~ /(news rover)/i
382
    || $raw =~ /(forte agent)/i
383
    || $raw =~ /(forte free agent)/i )
384
  {
385
    $agent = $1;
386
  }
387
  elsif (
388
    $raw =~ /^(
5 PointedEar 389
        pan
390
       |sylpheed
391
       |slrn
392
       |mozilla
393
       |knode
394
       |tin
395
       |hamster
396
       |xrn
397
       |xnews
398
       |aol
399
       |gnus
400
       |krn
401
       |macsoup
402
       |messenger
403
       |openxp
404
       |pine
405
       |thoth
406
       |turnpike
407
       |winvn
408
       |vsoup
409
       |google
410
       |supernews
411
       |nn
412
       |rn
413
       |007
414
       |webtv
415
       |compuserve
7 PointedEar 416
       )/ix
22 PointedEar 417
    )
418
  {
419
    $agent = $1;
420
  }
421
  else
422
  {
423
    ## Clean up unknown agents
424
    if ( $raw =~ m!^(.*?)/! )
7 PointedEar 425
    {
426
      $agent = $1;
427
    }
22 PointedEar 428
    elsif ( $raw =~ /^(\w*)\d.*/ )
7 PointedEar 429
    {
22 PointedEar 430
      $agent = $1;
431
    }
432
  }
433
 
434
  $distinct_agent{$agent}++;
435
  return $agent;
436
}
437
## get_agent
438
 
439
#########################################
440
## Count the User-Agents used, collapsing
441
## different versions into one per agent.
442
#########################################
443
sub count_agents
444
{
445
POSTER:
446
  foreach my $poster ( keys %data )
447
  {
448
    foreach my $agent_name ( keys %distinct_agent )
449
    {    # check against known ones
450
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
7 PointedEar 451
      {
22 PointedEar 452
        $agents{$agent_name}++;
453
        next POSTER;
7 PointedEar 454
      }
455
    }
22 PointedEar 456
    $agents{ $data{$poster}{'agent'} }++;
457
  }
458
}    # count_agents
7 PointedEar 459
 
22 PointedEar 460
#############################################
461
## Set orig/total percentages for all posters
462
#############################################
463
sub fix_percent
464
{
465
  foreach my $poster ( keys %data )
466
  {
467
    my $percent = 100;
468
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
469
    {
470
      $percent =
471
        $data{$poster}{'orig'} * 100 /
472
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
473
    }
474
    elsif ( $data{$poster}{'orig'} == 0 )
475
    {
476
      $percent = 0;
477
    }
478
    $data{$poster}{'percent'} = $percent;
5 PointedEar 479
  }
22 PointedEar 480
}
481
## fix_percent
5 PointedEar 482
 
22 PointedEar 483
##################################
484
## Write data structures to a file
485
##################################
486
sub write_data
487
{
488
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
489
    or die __x( "Can't create XDATA: {error}\n", error => $! );
490
  print $OUTF "Data collected from $newsgroup_name\n\n";
491
  print $OUTF
492
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
493
  foreach my $name ( keys %data )
7 PointedEar 494
  {
22 PointedEar 495
    print $OUTF
496
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
7 PointedEar 497
  }
22 PointedEar 498
  print $OUTF
499
"============================================================================\n";
500
  print $OUTF "Thread subjects\n";
501
  print $OUTF
502
"----------------------------------------------------------------------------\n";
503
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
504
  {
505
    print $OUTF
506
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
507
  }
508
  print $OUTF
509
"============================================================================\n";
510
  print $OUTF "Cross-posts\n";
511
  print $OUTF
512
"----------------------------------------------------------------------------\n";
513
  foreach my $name ( sort keys %crossposts )
514
  {
515
    print $OUTF "$name : $crossposts{$name}\n";
516
  }
517
  print $OUTF
518
"============================================================================\n";
519
  print $OUTF "User agents\n";
520
  print $OUTF
521
"----------------------------------------------------------------------------\n";
522
  foreach my $name ( sort keys %agents )
523
  {
524
    print $OUTF "$name : $agents{$name}\n";
525
  }
526
  print $OUTF
527
"============================================================================\n";
528
  print $OUTF "Time zones\n";
529
  print $OUTF
530
"----------------------------------------------------------------------------\n";
531
  foreach my $name ( sort keys %tz )
532
  {
533
    print $OUTF "$name : $tz{$name}\n";
534
  }
535
  close $OUTF;
536
}    # write_data
5 PointedEar 537
 
22 PointedEar 538
sub display_results
539
{
540
  #################### DISPLAY RESULTS #####################
541
  print "=" x 76, "\n";
542
  printf "%s\n",
543
    centred(
544
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
545
    76 );
546
  print "=" x 76, "\n";
547
  printf "%s\n",
548
    centred(
549
    __
550
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)",
551
    76
552
    );
553
  print "\n\n";
554
  printf __"Total posts considered: %s over %d days" . "\n",
555
    commify($totalposts),
556
    $numdays;
557
  my $time_locale = setlocale(LC_TIME);
558
  my $earliest_datetime = DateTime->from_epoch(
559
    epoch => $earliest,
560
    locale => $time_locale,
561
    time_zone => 'UTC',
562
  );
563
  my $latest_datetime = DateTime->from_epoch(
564
    epoch => $latest,
565
    locale => $time_locale,
566
    time_zone => 'UTC',
567
  );
568
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
569
  printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format);
570
  printf __"Latest article" . ":   %s\n", $latest_datetime->strftime($datetime_format);
571
  printf __"Original articles: %s; replies" . ": %s\n",
572
    commify($origposts),
573
    commify($replies);
574
  printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n",
575
    commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576;  #
576
  printf __
577
    "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
578
    commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
579
    commify( int( $totsize / $totalposts ) );
580
  my $count = keys %data;
581
  printf __"Total headers: %s KiB; bodies: %s KiB\n",
582
    commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
583
  printf __
584
    "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n",
585
    commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
586
    ( $totorig * 100 ) / ( $totorig + $totquoted ),
587
    commify( int( $totsig / 1024 ) );
588
  printf __"Total number of posters: %s, average %s bytes per poster\n",
589
    commify($count), commify( int( $totsize / $count ) );    #/
590
  $count = keys %threads;
591
  printf __"Total number of threads: %s, average %s bytes per thread\n",
592
    commify($count), commify( int( $totsize / $count ) );    #/
593
  printf __"Total number of user agents: %d\n", scalar keys %agents;
594
  print "\n", "=" x 76, "\n";
5 PointedEar 595
 
22 PointedEar 596
  ########################################
597
  ## Show posters by article count  Sec 1;
598
  ########################################
599
  unless ( $skipSec{1} )
7 PointedEar 600
  {
22 PointedEar 601
    if ( keys %data < $topposters )
602
    {
603
      $count = keys %data;
604
    }
605
    else
606
    {
607
      $count = $topposters;
608
    }
609
    printf "%s\n",
610
      centred(
611
      __x( "Top {count} posters by number of articles", count => $topposters ), 76 );
612
    print "=" x 76, "\n";
613
    my $i = 0;
614
    foreach
615
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
616
    {
617
      my $name = substr( $poster, 0, 65 );
618
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
619
        $data{$poster}{count};
620
      last if ( ++$i == $count );
621
    }
622
    print "\n", "=" x 76, "\n";
7 PointedEar 623
  }
22 PointedEar 624
 
625
  ######################################
626
  ## Show posters by size in KiB  Sec 2;
627
  ######################################
628
  unless ( $skipSec{2} )
7 PointedEar 629
  {
22 PointedEar 630
    if ( keys %data < $topposters )
631
    {
632
      $count = keys %data;
633
    }
634
    else
635
    {
636
      $count = $topposters;
637
    }
638
    printf "%s\n",
639
      centred(
640
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
641
      76 );
642
    print "=" x 76, "\n";
643
    my $i = 0;
644
    foreach
645
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
646
    {
647
      my $name = substr( $poster, 0, 62 );
648
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
649
        $data{$poster}{size} / 1024;    #/
650
      last if ( ++$i == $count );
651
    }
652
    print "\n", "=" x 76, "\n";
7 PointedEar 653
  }
5 PointedEar 654
 
22 PointedEar 655
  #####################################
656
  ## Show top posters for original text
657
  #####################################
658
  unless ( $skipSec{3} )
7 PointedEar 659
  {
22 PointedEar 660
    if ( keys %data < $topposters )
661
    {
662
      $count = keys %data;
663
    }
664
    else
665
    {
666
      $count = $topposters;
667
    }
668
    printf "%s\n",
669
      centred(
670
      __x(
671
        "Top {count} responders by original text (> 5 posts)",
672
        count => $topposters
673
      ),
674
      76
675
      );
676
    print "=" x 76, "\n";
677
    my $i = 0;
678
    foreach my $poster (
679
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
680
      keys %data
681
      )
682
    {
683
      next if $data{$poster}{quoted} == 0;
684
      next if $data{$poster}{count} < 5;
685
      my $name = substr( $poster, 0, 63 );
686
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
687
        $data{$poster}{percent};
688
      last if ( ++$i == $count );
689
    }
690
    print "\n", "=" x 76, "\n";
7 PointedEar 691
  }
5 PointedEar 692
 
22 PointedEar 693
  ########################################
694
  ## Show bottom posters for original text
695
  ########################################
696
  unless ( $skipSec{4} )
7 PointedEar 697
  {
22 PointedEar 698
    if ( keys %data < $topposters )
7 PointedEar 699
    {
22 PointedEar 700
      $count = keys %data;
701
    }
702
    else
703
    {
704
      $count = $topposters;
705
    }
706
    printf "%s\n",
707
      centred(
708
      __x(
709
        "Bottom {count} responders by original text  (> 5 posts)",
710
        count => $topposters
711
      ),
712
      76
713
      );
714
    print "=" x 76, "\n";
715
    my $i = 0;
716
    foreach my $poster (
717
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
718
      keys %data
719
      )
720
    {
721
      next if $data{$poster}{quoted} == 0;
722
      next if $data{$poster}{count} < 5;
723
      my $name = substr( $poster, 0, 63 );
724
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
725
        $data{$poster}{percent};
726
      last if ( ++$i == $count );
727
    }
728
    print "\n", "=" x 76, "\n";
729
  }
5 PointedEar 730
 
22 PointedEar 731
  #####################################
732
  ## Show threads by number of articles
733
  #####################################
734
  unless ( $skipSec{5} )
735
  {
736
    if ( keys %threads < $topthreads )
737
    {
738
      $count = keys %threads;
7 PointedEar 739
    }
22 PointedEar 740
    else
741
    {
742
      $count = $topthreads;
7 PointedEar 743
    }
22 PointedEar 744
    printf "%s\n",
745
      centred( __x( "Top {count} threads by no. of articles", count => $topthreads ),
746
      76 );
747
    print "=" x 76, "\n";
748
    my $i = 0;
749
    foreach my $thread (
750
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
751
      keys %threads
752
      )
7 PointedEar 753
    {
22 PointedEar 754
      my $name = substr( $thread, 0, 65 );
755
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
756
        $threads{$thread}{'count'};
757
      last if ( ++$i == $count );
7 PointedEar 758
    }
22 PointedEar 759
    print "\n", "=" x 76, "\n";
760
  }
761
 
762
  ##############################
763
  ## Show threads by size in KiB
764
  ##############################
765
  unless ( $skipSec{6} )
766
  {
767
    if ( keys %threads < $topthreads )
768
    {
769
      $count = keys %threads;
770
    }
7 PointedEar 771
    else
772
    {
22 PointedEar 773
      $count = $topthreads;
774
    }
775
    printf "%s\n",
776
      centred( __x( "Top {count} threads by size in KiB", count => $topthreads ),
777
      76 );
778
    print "=" x 76, "\n";
779
    my $i = 0;
780
    foreach my $thread (
781
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
782
      keys %threads
783
      )
784
    {
785
      my $name = substr( $thread, 0, 65 );
786
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
787
        $threads{$thread}{'size'} / 1024;    #/
788
      last if ( ++$i == $count );
789
    }
790
    print "\n", "=" x 76, "\n";
791
  }
5 PointedEar 792
 
22 PointedEar 793
  ##################################
794
  ## Show top 10 cross-posted groups
795
  ##################################
796
  unless ( $skipSec{7} )
797
  {
798
    delete $crossposts{"$newsgroup_name"};    # don't include ours
799
    if ( keys %crossposts < $topcrossposts )
800
    {
801
      $count = keys %crossposts;
7 PointedEar 802
    }
22 PointedEar 803
    else
804
    {
805
      $count = $topcrossposts;
806
    }
807
    printf "%s\n",
808
      centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
809
    print "=" x 76, "\n";
810
    my $i = 0;
811
    foreach
812
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
813
    {
814
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
815
        $crossposts{$name};
816
      last if ( ++$i == $count );
817
    }
818
    print "\n", "=" x 76, "\n";
819
  }
5 PointedEar 820
 
22 PointedEar 821
  #########################
822
  ## Show agents and counts
823
  #########################
824
  unless ( $skipSec{8} )
825
  {
826
    if ( keys %agents < $topagents )
827
    {
828
      $count = keys %agents;
829
    }
830
    else
831
    {
832
      $count = $topagents;
833
    }
834
    printf "%s\n",
835
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
836
      76 );
837
    print "=" x 76, "\n";
838
    my $i = 0;
839
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
840
    {
841
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
842
        $agents{$agent};
843
      last if ( ++$i == $count );
844
    }
845
    print "\n", "=" x 76, "\n";
846
  }
5 PointedEar 847
 
22 PointedEar 848
  #######################
849
  ## Show distinct agents
850
  #######################
851
  unless ( $skipSec{9} )
7 PointedEar 852
  {
22 PointedEar 853
    if ( keys %distinct_agent < $topagents )
854
    {
855
      $count = keys %distinct_agent;
7 PointedEar 856
    }
22 PointedEar 857
    else
858
    {
859
      $count = $topagents;
860
    }
861
    printf "%s\n",
862
      centred(
863
      __x( "Top {count} user agents by number of posts", count => $topagents ),
864
      76 );
865
    print "=" x 76, "\n";
866
    my $i = 0;
867
    foreach my $agent (
868
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
869
      keys %distinct_agent
870
      )
871
    {
872
      printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
873
        $distinct_agent{$agent},
874
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
875
      last if ( ++$i == $count );
876
    }
877
    print "\n", "=" x 76, "\n";
7 PointedEar 878
  }
5 PointedEar 879
 
22 PointedEar 880
  ############################
881
  ## Show timezones and counts
882
  ############################
883
  unless ( $skipSec{10} )
7 PointedEar 884
  {
22 PointedEar 885
    if ( keys %tz < $toptz )
7 PointedEar 886
    {
22 PointedEar 887
      $count = keys %tz;
7 PointedEar 888
    }
22 PointedEar 889
    else
7 PointedEar 890
    {
22 PointedEar 891
      $count = $toptz;
7 PointedEar 892
    }
22 PointedEar 893
    printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 );
894
    print "=" x 76, "\n";
895
    my $i = 0;
896
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
897
    {
898
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
899
      last if ( ++$i == $count );
900
    }
901
    print "\n", "=" x 76, "\n";
7 PointedEar 902
  }
5 PointedEar 903
}
904
 
22 PointedEar 905
## helper subs
906
 
13 PointedEar 907
###############################
908
## Right pad a string with '.'s
909
###############################
7 PointedEar 910
sub rpad
911
{
13 PointedEar 912
  ## Get text to pad, length to pad, pad chr
7 PointedEar 913
  my ( $text, $pad_len, $pad_chr ) = @_;
11 PointedEar 914
 
13 PointedEar 915
  ## DEBUG
22 PointedEar 916
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
11 PointedEar 917
 
7 PointedEar 918
  if ( length($text) > $pad_len )
919
  {
920
    $text = substr( $text, 0, $pad_len );
921
  }
922
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
923
  return $padded;
5 PointedEar 924
}
925
 
13 PointedEar 926
##################
927
## Centre a string
928
##################
7 PointedEar 929
sub centred
930
{
931
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
932
  my $pad_len = ( $width - length($text) ) / 2;    #/
933
  my $centred = " " x $pad_len . $text;
934
  return $centred;
5 PointedEar 935
}
936
 
13 PointedEar 937
###########################
938
## Put commas into a number
939
###########################
22 PointedEar 940
# Get some of locale's numeric formatting parameters
941
my ($thousands_sep, $grouping) =
942
        @{localeconv()}{'thousands_sep', 'grouping'};
943
# Apply defaults if values are missing
944
$thousands_sep = ',' unless $thousands_sep;
945
# grouping and mon_grouping are packed lists
946
# of small integers (characters) telling the
947
# grouping (thousand_seps and mon_thousand_seps
948
# being the group dividers) of numbers and
949
# monetary quantities.  The integers' meanings:
950
# 255 means no more grouping, 0 means repeat
951
# the previous grouping, 1-254 means use that
952
# as the current grouping.  Grouping goes from
953
# right to left (low to high digits).  In the
954
# below we cheat slightly by never using anything
955
# else than the first grouping (whatever that is).
956
my @grouping;
957
if ($grouping) {
958
    @grouping = unpack("C*", $grouping);
959
} else {
960
    @grouping = (3);
961
}
962
 
7 PointedEar 963
sub commify
964
{
13 PointedEar 965
  local $_ = shift;
22 PointedEar 966
  #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
967
  $_ = int;    # Chop non-integer part
968
  1 while
969
  s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/;
7 PointedEar 970
  return $_;
5 PointedEar 971
}
972
 
13 PointedEar 973
################################################################
974
## Returns a string with leading and trailing whitespace removed
975
################################################################
7 PointedEar 976
sub clean
977
{
978
  my $dirty = shift;
979
  my $clean = $dirty;
14 PointedEar 980
  $clean =~ s/^\s+|\s+$//g;
5 PointedEar 981
 
7 PointedEar 982
  return $clean;
5 PointedEar 983
}
984
 
7 PointedEar 985
sub usage
986
{
22 PointedEar 987
  print __"usage: newsstat.pl NEWS.GROUP\n";
7 PointedEar 988
  exit 1;
5 PointedEar 989
}
990
 
22 PointedEar 991
sub dmsg
7 PointedEar 992
{
22 PointedEar 993
  print STDERR @_, "\n";
994
}
995
 
996
sub dmsg2
997
{
998
  my ( $level, @msg ) = @_;
999
  print STDERR @msg, "\n" if $level >= DEBUG;
1000
}