Subversion Repositories LCARS

Rev

Rev 22 | Rev 24 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 22 Rev 23
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
-
 
4
require 5.004;
4
5
5
#use diagnostics;
6
#use diagnostics;
6
use utf8;
7
use utf8;
7
use Encode;
8
use Encode;
8
9
9
use constant DEBUG => 0;
10
use constant DEBUG => 0;
10
11
11
## Print out all text to STDOUT UTF-8 encoded
12
## Print out all text to STDOUT UTF-8 encoded
12
binmode STDOUT, ':encoding(UTF-8)';
13
binmode STDOUT, ':encoding(UTF-8)';
13
binmode STDERR, ':encoding(UTF-8)';
14
binmode STDERR, ':encoding(UTF-8)';
14
15
-
 
16
## L10n
-
 
17
use locale ':not_characters';
-
 
18
-
 
19
# setlocale( LC_MESSAGES, '' );
-
 
20
require Number::Format;
-
 
21
-
 
22
## i18n
15
# FIXME: Automatically include resolved '.' in @INC
23
## FIXME: Automatically include resolved '.' in @INC
16
# print join "\n", @INC;
24
# print join "\n", @INC;
17
25
18
use locale ':not_characters';
-
 
19
use Locale::TextDomain ('de.pointedears.newsstat');
26
use Locale::TextDomain ('de.pointedears.newsstat');
20
use POSIX ('locale_h');
27
use POSIX              ('locale_h');
21
use Locale::Messages qw (bind_textdomain_filter
28
use Locale::Messages qw (bind_textdomain_filter
22
                         bind_textdomain_codeset
29
  bind_textdomain_codeset
23
                         turn_utf_8_on);
30
  turn_utf_8_on);
24
#setlocale( LC_MESSAGES, '' );
-
 
-
 
31
25
bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
32
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
26
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
33
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
27
 
34
28
require Mail::Message;
35
require Mail::Message;
29
require DateTime;
36
require DateTime;
30
require DateTime::Format::Mail;
37
require DateTime::Format::Mail;
-
 
38
-
 
39
# See comments in previous example
-
 
40
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
-
 
41
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
-
 
42
  'decimal_point' };
-
 
43
-
 
44
# Apply defaults if values are missing
-
 
45
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
-
 
46
$thousands_sep = ' ' unless $thousands_sep;
-
 
47
-
 
48
# grouping and mon_grouping are packed lists
-
 
49
# of small integers (characters) telling the
-
 
50
# grouping (thousand_seps and mon_thousand_seps
-
 
51
# being the group dividers) of numbers and
-
 
52
# monetary quantities.  The integers' meanings:
-
 
53
# 255 means no more grouping, 0 means repeat
-
 
54
# the previous grouping, 1-254 means use that
-
 
55
# as the current grouping.  Grouping goes from
-
 
56
# right to left (low to high digits).  In the
-
 
57
# below we cheat slightly by never using anything
-
 
58
# else than the first grouping (whatever that is).
-
 
59
my @grouping;
-
 
60
if ($grouping)
-
 
61
{
-
 
62
  @grouping = unpack( "C*", $grouping );
-
 
63
}
-
 
64
else
-
 
65
{
-
 
66
  @grouping = (3);
-
 
67
}
-
 
68
-
 
69
## FIXME: Why don't the defaults work already?
-
 
70
my $formatter = new Number::Format(
-
 
71
  -decimal_point => $decimal_point,
-
 
72
  -thousands_sep => $thousands_sep,
-
 
73
-
 
74
  # -grouping      => $grouping[0]
-
 
75
);
31
 
76
32
###################### USER CONFIGURATIONS ############################
77
###################### USER CONFIGURATIONS ############################
33
78
34
## The name of the group to do stats for
79
## The name of the group to do stats for
35
my $newsgroup_name = $ARGV[0];
80
my $newsgroup_name = $ARGV[0];
36
$newsgroup_name or usage();
81
$newsgroup_name or usage();
37
82
38
## Check for removal flags
83
## Check for removal flags
39
my $ix;
84
my $ix;
40
my $j;
85
my $j;
41
my %skipSec;
86
my %skipSec;
42
my @skiplist;
87
my @skiplist;
43
my $args = @ARGV;
88
my $args = @ARGV;
44
for ( $ix = 1 ; $ix < $args ; $ix++ )
89
for ( $ix = 1 ; $ix < $args ; $ix++ )
45
{
90
{
46
  $j = $ix + 1;
91
  $j = $ix + 1;
47
  if ( $ARGV[$ix] eq "-x" )
92
  if ( $ARGV[$ix] eq "-x" )
48
  {
93
  {
49
    @skiplist = split( ",", $ARGV[$j] );
94
    @skiplist = split( ",", $ARGV[$j] );
50
  }
95
  }
51
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
96
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
52
  {
97
  {
53
    @skiplist = split( ",", $1 );
98
    @skiplist = split( ",", $1 );
54
  }
99
  }
55
}
100
}
56
foreach (@skiplist)
101
foreach (@skiplist)
57
{
102
{
58
  $skipSec{$_} = 1;
103
  $skipSec{$_} = 1;
59
}
104
}
60
105
61
## Leafnode users will want /var/spool/news for this variable.
106
## Leafnode users will want /var/spool/news for this variable.
62
my $news = "/var/spool/news/";
107
my $news = "/var/spool/news/";
63
108
64
## Number of top or bottom posters to show
109
## Number of top or bottom posters to show
65
my $topposters = 20;
110
my $topposters = 20;
66
111
67
## Number of threads we want to know about
112
## Number of threads we want to know about
68
my $topthreads = 20;
113
my $topthreads = 20;
69
114
70
## Number of cross-posted threads to show
115
## Number of cross-posted threads to show
71
my $topcrossposts = 10;
116
my $topcrossposts = 10;
72
117
73
## Number of agents we list
118
## Number of agents we list
74
my $topagents = 10;
119
my $topagents = 10;
75
120
76
## Number of time zones to show
121
## Number of time zones to show
77
my $toptz = 10;
122
my $toptz = 10;
78
123
79
###################### DATA STRUCTURES ######################
124
###################### DATA STRUCTURES ######################
80
my $group = $newsgroup_name;
125
my $group = $newsgroup_name;
81
$group =~ s!\.!/!g;
126
$group =~ s!\.!/!g;
82
my %data;    # name, count, agent, total, orig, quoted
127
my %data;    # name, count, agent, total, orig, quoted
83
my $totsize = 0;    # holds total sizes of all files
128
my $totsize = 0;    # holds total sizes of all files
84
my %crossposts;     # group, count
129
my %crossposts;     # group, count
85
my %threads;        # subject, count
130
my %threads;        # subject, count
86
my $replies   = 0;  # total no. of replies
131
my $replies   = 0;  # total no. of replies
87
my $origposts = 0;  # total no. of original posts
132
my $origposts = 0;  # total no. of original posts
88
my %tz;             # timezones by count
133
my %tz;             # timezones by count
89
my $earliest;       # earliest article we have found
134
my $earliest;       # earliest article we have found
90
my $latest;         # latest article we have found
135
my $latest;         # latest article we have found
91
my $totheader = 0;  # total size of header material
136
my $totheader = 0;  # total size of header material
92
my $totbody   = 0;  # total size of body material
137
my $totbody   = 0;  # total size of body material
93
my $totsig    = 0;  # total size of sig material
138
my $totsig    = 0;  # total size of sig material
94
my $totquoted = 0;  # total size of quoted material
139
my $totquoted = 0;  # total size of quoted material
95
my $totorig   = 0;  # total size of original material
140
my $totorig   = 0;  # total size of original material
96
my $totalposts;     # total no. of posts considered
141
my $totalposts;     # total no. of posts considered
97
my %distinct_agent;
142
my %distinct_agent;
98
143
99
## Used to hold counts of User Agents used
144
## Used to hold counts of User Agents used
100
my %agents = (
145
my %agents = (
101
  "Compuserver"               => 0,
146
  "Compuserver"               => 0,
102
  "Foorum"                    => 0,
147
  "Foorum"                    => 0,
103
  "Forte Agent"               => 0,
148
  "Forte Agent"               => 0,
104
  "Forte Free Agent"          => 0,
149
  "Forte Free Agent"          => 0,
105
  "Gnus"                      => 0,
150
  "Gnus"                      => 0,
106
  "KNode"                     => 0,
151
  "KNode"                     => 0,
107
  "MacSOUP"                   => 0,
152
  "MacSOUP"                   => 0,
108
  "MT-NewsWatcher"            => 0,
153
  "MT-NewsWatcher"            => 0,
109
  "MicroPlanet Gravity"       => 0,
154
  "MicroPlanet Gravity"       => 0,
110
  "Microsoft Outlook Express" => 0,
155
  "Microsoft Outlook Express" => 0,
111
  "Microsoft Windows Mail"    => 0,
156
  "Microsoft Windows Mail"    => 0,
112
  "Mozilla"                   => 0,
157
  "Mozilla"                   => 0,
113
  "News Rover"                => 0,
158
  "News Rover"                => 0,
114
  "NN"                        => 0,
159
  "NN"                        => 0,
115
  "Pan"                       => 0,
160
  "Pan"                       => 0,
116
  "rn"                        => 0,
161
  "rn"                        => 0,
117
  "slrn"                      => 0,
162
  "slrn"                      => 0,
118
  "Sylpheed"                  => 0,
163
  "Sylpheed"                  => 0,
119
  "tin"                       => 0,
164
  "tin"                       => 0,
120
  "VSoup"                     => 0,
165
  "VSoup"                     => 0,
121
  "WebTV"                     => 0,
166
  "WebTV"                     => 0,
122
  "Xnews"                     => 0,
167
  "Xnews"                     => 0,
123
);
168
);
124
169
125
my $datetime_parser = DateTime::Format::Mail->new();
170
my $datetime_parser = DateTime::Format::Mail->new();
126
$datetime_parser->loose();
171
$datetime_parser->loose();
127
172
128
my $today = DateTime->today( time_zone => 'UTC' );
173
my $today = DateTime->today( time_zone => 'UTC' );
129
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
174
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
130
my $start      = int $prev_month->strftime('%s');
175
my $start      = int $prev_month->strftime('%s');
131
my $numdays    = int DateTime->last_day_of_month(
176
my $numdays    = int DateTime->last_day_of_month(
132
  year      => $prev_month->year(),
177
  year      => $prev_month->year(),
133
  month     => $prev_month->month(),
178
  month     => $prev_month->month(),
134
  time_zone => $prev_month->time_zone(),
179
  time_zone => $prev_month->time_zone(),
135
)->day();
180
)->day();
136
my $end = int $today->clone()->set_day(1)->strftime('%s');
181
my $end = int $today->clone()->set_day(1)->strftime('%s');
137
182
138
dmsg( $start, " to ", $end ) if DEBUG;
183
dmsg( $start, " to ", $end ) if DEBUG;
139
184
140
chdir("$news$group")
185
chdir("$news$group")
141
  or die __x(
186
  or die __x(
142
  "Can't cd to {newsgroup}: {error}\n",
187
  "Can't cd to {newsgroup}: {error}\n",
143
  newsgroup => "$news$group",
188
  newsgroup => "$news$group",
144
  error     => $!
189
  error     => $!
145
  );
190
  );
146
opendir( DIR, "." )
191
opendir( DIR, "." )
147
  or die __x(
192
  or die __x(
148
  "Can't open {newsgroup}: {error}\n",
193
  "Can't open {newsgroup}: {error}\n",
149
  newsgroup => "$news$group",
194
  newsgroup => "$news$group",
150
  error     => $!
195
  error     => $!
151
  );
196
  );
152
197
153
while ( defined( my $filename = readdir(DIR) ) )
198
while ( defined( my $filename = readdir(DIR) ) )
154
{
199
{
155
  next unless -f $filename;    # only want real files
200
  next unless -f $filename;    # only want real files
156
  next if ( $filename eq ".overview" );    # real articles only
201
  next if ( $filename eq ".overview" );    # real articles only
157
202
158
  get_article($filename);                  # read in the article
203
  get_article($filename);                  # read in the article
159
}
204
}
160
closedir(DIR);                             # finished with the directory
205
closedir(DIR);                             # finished with the directory
161
206
162
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
207
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
163
208
164
## Post-processing
209
## Post-processing
165
count_agents();                            # count agents, collapsing versions
210
count_agents();                            # count agents, collapsing versions
166
fix_percent();
211
fix_percent();
167
212
168
write_data();
213
write_data();
169
display_results();
214
display_results();
170
215
171
########################################
216
########################################
172
## Get current article's header and body
217
## Get current article's header and body
173
########################################
218
########################################
174
sub get_article
219
sub get_article
175
{
220
{
176
  my $filename = shift;
221
  my $filename = shift;
177
222
178
  open( my $FILE, '<', $filename )
223
  open( my $FILE, '<', $filename )
179
    or
224
    or
180
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
225
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
181
  my $msg       = Mail::Message->read($FILE);
226
  my $msg       = Mail::Message->read($FILE);
182
  my $timestamp = $msg->timestamp();
227
  my $timestamp = $msg->timestamp();
183
  my $date      = $msg->study('Date');
228
  my $date      = $msg->study('Date');
184
229
185
  ## Disregard article if timestamp is not in range
230
  ## Disregard article if timestamp is not in range
186
  dmsg($timestamp) if DEBUG;
231
  dmsg($timestamp) if DEBUG;
187
  if ( $timestamp < $start or $timestamp >= $end )
232
  if ( $timestamp < $start or $timestamp >= $end )
188
  {
233
  {
189
    dmsg("Posting on $date ignored.") if DEBUG;
234
    dmsg("Posting on $date ignored.") if DEBUG;
190
    return;
235
    return;
191
  }
236
  }
192
237
193
  $totalposts++;    # bump count of articles considered
238
  $totalposts++;    # bump count of articles considered
194
239
195
  ## DEBUG
240
  ## DEBUG
196
  dmsg($date) if DEBUG;
241
  dmsg($date) if DEBUG;
197
242
198
  ## get stats about the file itself
243
  ## get stats about the file itself
199
  my $filesize = -s $filename;    # get total size of file
244
  my $filesize = -s $filename;    # get total size of file
200
  $totsize += $filesize;          # bump total sizes of all files
245
  $totsize += $filesize;          # bump total sizes of all files
201
246
202
  if ( ( not defined $earliest ) or $timestamp < $earliest )
247
  if ( ( not defined $earliest ) or $timestamp < $earliest )
203
  {
248
  {
204
    $earliest = $timestamp;
249
    $earliest = $timestamp;
205
  }
250
  }
206
  elsif ( ( not defined $latest ) or $timestamp > $latest )
251
  elsif ( ( not defined $latest ) or $timestamp > $latest )
207
  {
252
  {
208
    $latest = $timestamp;
253
    $latest = $timestamp;
209
  }
254
  }
210
255
211
  #print "timestamp: $timestamp\n";
256
  #print "timestamp: $timestamp\n";
212
257
213
  ## count header size
258
  ## count header size
214
  $totheader += $msg->head()->size();
259
  $totheader += $msg->head()->size();
215
260
216
  ## get the poster's name (MIME-decoded, in UTF-8)
261
  ## get the poster's name (MIME-decoded, in UTF-8)
217
  my $poster = $msg->study('From');
262
  my $poster = $msg->study('From');
218
  if ( defined $poster )
263
  if ( defined $poster )
219
  {
264
  {
220
    ## Convert old to new format
265
    ## Convert old to new format
221
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
266
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
222
267
223
    ## Collapse whitespace
268
    ## Collapse whitespace
224
    $poster =~ s/\s+/ /g;
269
    $poster =~ s/\s+/ /g;
225
270
226
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
271
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
227
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
272
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
228
273
229
    ## DEBUG
274
    ## DEBUG
230
    dmsg($poster) if DEBUG;
275
    dmsg($poster) if DEBUG;
231
276
232
    ## seen this one before?
277
    ## seen this one before?
233
    if ( !defined( $data{$poster} ) )
278
    if ( !defined( $data{$poster} ) )
234
    {
279
    {
235
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
280
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
236
      $data{$poster}{'orig'}   = 0;
281
      $data{$poster}{'orig'}   = 0;
237
      $data{$poster}{'quoted'} = 0;
282
      $data{$poster}{'quoted'} = 0;
238
    }
283
    }
239
    $data{$poster}{'count'}++;                    # bump count for this poster
284
    $data{$poster}{'count'}++;                    # bump count for this poster
240
    $data{$poster}{'size'} += $filesize;          # total size of file
285
    $data{$poster}{'size'} += $filesize;          # total size of file
241
286
242
    ## The User-Agent and/or X-Newsreader fields
287
    ## The User-Agent and/or X-Newsreader fields
243
    ## for User-Agent by poster
288
    ## for User-Agent by poster
244
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
289
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
245
    if ( defined $ua )
290
    if ( defined $ua )
246
    {
291
    {
247
      $data{$poster}{'agent'} = $ua;
292
      $data{$poster}{'agent'} = $ua;
248
293
249
      ## DEBUG
294
      ## DEBUG
250
      dmsg($ua) if DEBUG;
295
      dmsg($ua) if DEBUG;
251
    }
296
    }
252
297
253
    ## The User Agent for User-Agent by number of posts
298
    ## The User Agent for User-Agent by number of posts
254
    get_agent($msg);
299
    get_agent($msg);
255
300
256
    ## Get all cross-posted newsgroups
301
    ## Get all cross-posted newsgroups
257
    for ( split( /,/, $msg->study('Newsgroups') ) )
302
    for ( split( /,/, $msg->study('Newsgroups') ) )
258
    {
303
    {
259
      $crossposts{$_}++;    # bump count for each
304
      $crossposts{$_}++;    # bump count for each
260
    }
305
    }
261
306
262
    ## Get threads
307
    ## Get threads
263
    my $thread = $msg->study('Subject');
308
    my $thread = $msg->study('Subject');
264
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
309
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
265
    $thread =~ s/\s+/ /g;       # collapse whitespace
310
    $thread =~ s/\s+/ /g;       # collapse whitespace
266
    $threads{$thread}{'count'}++;    # bump count of this subject
311
    $threads{$thread}{'count'}++;    # bump count of this subject
267
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
312
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
268
313
269
    ## Is this an original post or a reply?
314
    ## Is this an original post or a reply?
270
    if ( defined $msg->study('References') )
315
    if ( defined $msg->study('References') )
271
    {
316
    {
272
      $replies++;
317
      $replies++;
273
    }
318
    }
274
    else
319
    else
275
    {
320
    {
276
      $origposts++;
321
      $origposts++;
277
    }
322
    }
278
323
279
    ## Get the time zone
324
    ## Get the time zone
280
    my $datetime = $datetime_parser->parse_datetime($date);
325
    my $datetime = $datetime_parser->parse_datetime($date);
281
    my $tz       = $datetime->strftime('%z');
326
    my $tz       = $datetime->strftime('%z');
282
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
327
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
283
    $tz{$tz}++;
328
    $tz{$tz}++;
284
329
285
    ## DEBUG
330
    ## DEBUG
286
    dmsg($tz) if DEBUG;
331
    dmsg($tz) if DEBUG;
287
332
288
#### Now analyse the body text ####
333
#### Now analyse the body text ####
289
    my $body = $msg->body();
334
    my $body = $msg->body();
290
335
291
    my $insig = 0;
336
    my $insig = 0;
292
    my @body  = $body->lines;
337
    my @body  = $body->lines;
293
    for (@body)
338
    for (@body)
294
    {
339
    {
295
      $totbody += length($_);    # bump total body size
340
      $totbody += length($_);    # bump total body size
296
      next if (m{^$>}o);         # don't count blank lines in body
341
      next if (m{^$>}o);         # don't count blank lines in body
297
      if ( $insig == 1 )
342
      if ( $insig == 1 )
298
      {
343
      {
299
344
300
        # bump total sig size
345
        # bump total sig size
301
        $totsig += length($_);
346
        $totsig += length($_);
302
      }
347
      }
303
      ## are we in a quote line?
348
      ## are we in a quote line?
304
      ## Bill Unruh uses ] quotes, and another poster uses ::
349
      ## Bill Unruh uses ] quotes, and another poster uses ::
305
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
350
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
306
      {
351
      {
307
        ## bump count of quoted chrs
352
        ## bump count of quoted chrs
308
        $data{$poster}{'quoted'} += length($_);
353
        $data{$poster}{'quoted'} += length($_);
309
        $totquoted += length($_);
354
        $totquoted += length($_);
310
      }
355
      }
311
      elsif (/^-- $/)
356
      elsif (/^-- $/)
312
      {
357
      {
313
        $insig = 1;
358
        $insig = 1;
314
      }
359
      }
315
      else
360
      else
316
      {
361
      {
317
        ## We must be processing an original line
362
        ## We must be processing an original line
318
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
363
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
319
        $totorig += length($_);
364
        $totorig += length($_);
320
      }
365
      }
321
    }
366
    }
322
367
323
    # end for (@body)
368
    # end for (@body)
324
  }
369
  }
325
370
326
  close($FILE);
371
  close($FILE);
327
}
372
}
328
373
329
sub get_agent
374
sub get_agent
330
{
375
{
331
  my $msg = shift;
376
  my $msg = shift;
332
377
333
  my $ua =
378
  my $ua =
334
       $msg->study('User-Agent')
379
       $msg->study('User-Agent')
335
    or $msg->study('X-Newsreader')
380
    or $msg->study('X-Newsreader')
336
    or $msg->study('X-Mailer');
381
    or $msg->study('X-Mailer');
337
  if ( not defined $ua )
382
  if ( not defined $ua )
338
  {
383
  {
339
    my $org = $msg->study('Organization');
384
    my $org = $msg->study('Organization');
340
    if ( defined $org
385
    if ( defined $org
341
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
386
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
342
    {
387
    {
343
      $ua = $org;
388
      $ua = $org;
344
    }
389
    }
345
    elsif ( $msg->study('Message-ID') =~ /pine/i )
390
    elsif ( $msg->study('Message-ID') =~ /pine/i )
346
    {
391
    {
347
      $ua = "Pine";
392
      $ua = "Pine";
348
    }
393
    }
349
  }
394
  }
350
395
351
  ## Hopefully found UA, else set to unknown
396
  ## Hopefully found UA, else set to unknown
352
  if ( not defined $ua )
397
  if ( not defined $ua )
353
  {
398
  {
354
    $ua = __ "unknown";
399
    $ua = __ "unknown";
355
  }
400
  }
356
401
357
  $ua = clean($ua);
402
  $ua = clean($ua);
358
403
359
  my $raw   = $ua;
404
  my $raw   = $ua;
360
  my $agent = $raw;
405
  my $agent = $raw;
361
406
362
  ## strip http
407
  ## strip http
363
  if ( $raw =~ /.*http.*/ )
408
  if ( $raw =~ /.*http.*/ )
364
  {
409
  {
365
    $raw =~ s!posted via!!i;
410
    $raw =~ s!posted via!!i;
366
    $raw =~ s!http://!!g;
411
    $raw =~ s!http://!!g;
367
    $raw =~ s!/!!g;
412
    $raw =~ s!/!!g;
368
    $raw =~ s! !!g;
413
    $raw =~ s! !!g;
369
  }
414
  }
370
415
371
  ## Fix Outlook from Mac
416
  ## Fix Outlook from Mac
372
  if ( $raw =~ /^microsoft/i )
417
  if ( $raw =~ /^microsoft/i )
373
  {
418
  {
374
    $raw =~ s/-/ /g;
419
    $raw =~ s/-/ /g;
375
  }
420
  }
376
421
377
  ## Pick out the popular agents
422
  ## Pick out the popular agents
378
  if ( $raw =~ /(outlook express)/i
423
  if ( $raw =~ /(outlook express)/i
379
    || $raw =~ /(windows mail)/i
424
    || $raw =~ /(windows mail)/i
380
    || $raw =~ /(microplanet gravity)/i
425
    || $raw =~ /(microplanet gravity)/i
381
    || $raw =~ /(news rover)/i
426
    || $raw =~ /(news rover)/i
382
    || $raw =~ /(forte agent)/i
427
    || $raw =~ /(forte agent)/i
383
    || $raw =~ /(forte free agent)/i )
428
    || $raw =~ /(forte free agent)/i )
384
  {
429
  {
385
    $agent = $1;
430
    $agent = $1;
386
  }
431
  }
387
  elsif (
432
  elsif (
388
    $raw =~ /^(
433
    $raw =~ /^(
389
        pan
434
        pan
390
       |sylpheed
435
       |sylpheed
391
       |slrn
436
       |slrn
392
       |mozilla
437
       |mozilla
393
       |knode
438
       |knode
394
       |tin
439
       |tin
395
       |hamster
440
       |hamster
396
       |xrn
441
       |xrn
397
       |xnews
442
       |xnews
398
       |aol
443
       |aol
399
       |gnus
444
       |gnus
400
       |krn
445
       |krn
401
       |macsoup
446
       |macsoup
402
       |messenger
447
       |messenger
403
       |openxp
448
       |openxp
404
       |pine
449
       |pine
405
       |thoth
450
       |thoth
406
       |turnpike
451
       |turnpike
407
       |winvn
452
       |winvn
408
       |vsoup
453
       |vsoup
409
       |google
454
       |google
410
       |supernews
455
       |supernews
411
       |nn
456
       |nn
412
       |rn
457
       |rn
413
       |007
458
       |007
414
       |webtv
459
       |webtv
415
       |compuserve
460
       |compuserve
416
       )/ix
461
       )/ix
417
    )
462
    )
418
  {
463
  {
419
    $agent = $1;
464
    $agent = $1;
420
  }
465
  }
421
  else
466
  else
422
  {
467
  {
423
    ## Clean up unknown agents
468
    ## Clean up unknown agents
424
    if ( $raw =~ m!^(.*?)/! )
469
    if ( $raw =~ m!^(.*?)/! )
425
    {
470
    {
426
      $agent = $1;
471
      $agent = $1;
427
    }
472
    }
428
    elsif ( $raw =~ /^(\w*)\d.*/ )
473
    elsif ( $raw =~ /^(\w*)\d.*/ )
429
    {
474
    {
430
      $agent = $1;
475
      $agent = $1;
431
    }
476
    }
432
  }
477
  }
433
478
434
  $distinct_agent{$agent}++;
479
  $distinct_agent{$agent}++;
435
  return $agent;
480
  return $agent;
436
}
481
}
437
## get_agent
482
## get_agent
438
483
439
#########################################
484
#########################################
440
## Count the User-Agents used, collapsing
485
## Count the User-Agents used, collapsing
441
## different versions into one per agent.
486
## different versions into one per agent.
442
#########################################
487
#########################################
443
sub count_agents
488
sub count_agents
444
{
489
{
445
POSTER:
490
POSTER:
446
  foreach my $poster ( keys %data )
491
  foreach my $poster ( keys %data )
447
  {
492
  {
448
    foreach my $agent_name ( keys %distinct_agent )
493
    foreach my $agent_name ( keys %distinct_agent )
449
    {    # check against known ones
494
    {    # check against known ones
450
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
495
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
451
      {
496
      {
452
        $agents{$agent_name}++;
497
        $agents{$agent_name}++;
453
        next POSTER;
498
        next POSTER;
454
      }
499
      }
455
    }
500
    }
456
    $agents{ $data{$poster}{'agent'} }++;
501
    $agents{ $data{$poster}{'agent'} }++;
457
  }
502
  }
458
}    # count_agents
503
}    # count_agents
459
504
460
#############################################
505
#############################################
461
## Set orig/total percentages for all posters
506
## Set orig/total percentages for all posters
462
#############################################
507
#############################################
463
sub fix_percent
508
sub fix_percent
464
{
509
{
465
  foreach my $poster ( keys %data )
510
  foreach my $poster ( keys %data )
466
  {
511
  {
467
    my $percent = 100;
512
    my $percent = 100;
468
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
513
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
469
    {
514
    {
470
      $percent =
515
      $percent =
471
        $data{$poster}{'orig'} * 100 /
516
        $data{$poster}{'orig'} * 100 /
472
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
517
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
473
    }
518
    }
474
    elsif ( $data{$poster}{'orig'} == 0 )
519
    elsif ( $data{$poster}{'orig'} == 0 )
475
    {
520
    {
476
      $percent = 0;
521
      $percent = 0;
477
    }
522
    }
478
    $data{$poster}{'percent'} = $percent;
523
    $data{$poster}{'percent'} = $percent;
479
  }
524
  }
480
}
525
}
481
## fix_percent
526
## fix_percent
482
527
483
##################################
528
##################################
484
## Write data structures to a file
529
## Write data structures to a file
485
##################################
530
##################################
486
sub write_data
531
sub write_data
487
{
532
{
488
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
533
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
489
    or die __x( "Can't create XDATA: {error}\n", error => $! );
534
    or die __x( "Can't create XDATA: {error}\n", error => $! );
490
  print $OUTF "Data collected from $newsgroup_name\n\n";
535
  print $OUTF "Data collected from $newsgroup_name\n\n";
491
  print $OUTF
536
  print $OUTF
492
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
537
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
493
  foreach my $name ( keys %data )
538
  foreach my $name ( keys %data )
494
  {
539
  {
495
    print $OUTF
540
    print $OUTF
496
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
541
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
497
  }
542
  }
498
  print $OUTF
543
  print $OUTF
499
"============================================================================\n";
544
"============================================================================\n";
500
  print $OUTF "Thread subjects\n";
545
  print $OUTF "Thread subjects\n";
501
  print $OUTF
546
  print $OUTF
502
"----------------------------------------------------------------------------\n";
547
"----------------------------------------------------------------------------\n";
503
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
548
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
504
  {
549
  {
505
    print $OUTF
550
    print $OUTF
506
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
551
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
507
  }
552
  }
508
  print $OUTF
553
  print $OUTF
509
"============================================================================\n";
554
"============================================================================\n";
510
  print $OUTF "Cross-posts\n";
555
  print $OUTF "Cross-posts\n";
511
  print $OUTF
556
  print $OUTF
512
"----------------------------------------------------------------------------\n";
557
"----------------------------------------------------------------------------\n";
513
  foreach my $name ( sort keys %crossposts )
558
  foreach my $name ( sort keys %crossposts )
514
  {
559
  {
515
    print $OUTF "$name : $crossposts{$name}\n";
560
    print $OUTF "$name : $crossposts{$name}\n";
516
  }
561
  }
517
  print $OUTF
562
  print $OUTF
518
"============================================================================\n";
563
"============================================================================\n";
519
  print $OUTF "User agents\n";
564
  print $OUTF "User agents\n";
520
  print $OUTF
565
  print $OUTF
521
"----------------------------------------------------------------------------\n";
566
"----------------------------------------------------------------------------\n";
522
  foreach my $name ( sort keys %agents )
567
  foreach my $name ( sort keys %agents )
523
  {
568
  {
524
    print $OUTF "$name : $agents{$name}\n";
569
    print $OUTF "$name : $agents{$name}\n";
525
  }
570
  }
526
  print $OUTF
571
  print $OUTF
527
"============================================================================\n";
572
"============================================================================\n";
528
  print $OUTF "Time zones\n";
573
  print $OUTF "Time zones\n";
529
  print $OUTF
574
  print $OUTF
530
"----------------------------------------------------------------------------\n";
575
"----------------------------------------------------------------------------\n";
531
  foreach my $name ( sort keys %tz )
576
  foreach my $name ( sort keys %tz )
532
  {
577
  {
533
    print $OUTF "$name : $tz{$name}\n";
578
    print $OUTF "$name : $tz{$name}\n";
534
  }
579
  }
535
  close $OUTF;
580
  close $OUTF;
536
}    # write_data
581
}    # write_data
537
582
538
sub display_results
583
sub display_results
539
{
584
{
540
  #################### DISPLAY RESULTS #####################
585
  #################### DISPLAY RESULTS #####################
541
  print "=" x 76, "\n";
586
  println( "=" x 76 );
542
  printf "%s\n",
587
  printf "%s\n",
543
    centred(
588
    centred(
544
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
589
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
545
    76 );
590
    76 );
546
  print "=" x 76, "\n";
591
  println( "=" x 76 );
547
  printf "%s\n",
592
  printf "%s\n",
548
    centred(
593
    centred(
549
    __
594
    __(
550
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)",
595
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
-
 
596
    ),
551
    76
597
    76
552
    );
598
    );
553
  print "\n\n";
599
  print "\n\n";
554
  printf __"Total posts considered: %s over %d days" . "\n",
600
  printf __"Total posts considered: %s over %d days\n",
555
    commify($totalposts),
601
    $formatter->format_number($totalposts),
556
    $numdays;
602
    $formatter->format_number($numdays);
557
  my $time_locale = setlocale(LC_TIME);
603
  my $time_locale       = setlocale(LC_TIME);
558
  my $earliest_datetime = DateTime->from_epoch(
604
  my $earliest_datetime = DateTime->from_epoch(
559
    epoch => $earliest,
605
    epoch     => $earliest,
560
    locale => $time_locale,
606
    locale    => $time_locale,
561
    time_zone => 'UTC',
607
    time_zone => 'UTC',
562
  );
608
  );
563
  my $latest_datetime = DateTime->from_epoch(
609
  my $latest_datetime = DateTime->from_epoch(
564
    epoch => $latest,
610
    epoch     => $latest,
565
    locale => $time_locale,
611
    locale    => $time_locale,
566
    time_zone => 'UTC',
612
    time_zone => 'UTC',
567
  );
613
  );
568
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
614
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
-
 
615
  printf __"Earliest article: %s\n",
569
  printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format);
616
    $earliest_datetime->strftime($datetime_format);
-
 
617
  printf __"Latest article:   %s\n",
570
  printf __"Latest article" . ":   %s\n", $latest_datetime->strftime($datetime_format);
618
    $latest_datetime->strftime($datetime_format);
571
  printf __"Original articles: %s; replies" . ": %s\n",
619
  printf __"Original articles: %s; replies: %s\n",
572
    commify($origposts),
620
    $formatter->format_number($origposts),
573
    commify($replies);
621
    $formatter->format_number($replies);
574
  printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n",
622
  printf __"Total size of posts: %s bytes (%s)" . "\n",
-
 
623
    $formatter->format_number($totsize),
575
    commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576;  #
624
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
576
  printf __
-
 
577
    "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
625
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
578
    commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
626
    $formatter->format_number( int( $totalposts / $numdays ) ),
-
 
627
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
579
    commify( int( $totsize / $totalposts ) );
628
    $formatter->format_number( int( $totsize / $totalposts ) );
-
 
629
580
  my $count = keys %data;
630
  my $count = keys %data;
581
  printf __"Total headers: %s KiB; bodies: %s KiB\n",
631
  printf __"Total headers: %s; bodies: %s\n",
-
 
632
    $formatter->format_bytes(
-
 
633
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
-
 
634
    ),
582
    commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
635
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
583
  printf __
636
  printf __
584
    "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n",
637
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
-
 
638
    $formatter->format_bytes(
-
 
639
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
-
 
640
    ),
585
    commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
641
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
586
    ( $totorig * 100 ) / ( $totorig + $totquoted ),
642
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
587
    commify( int( $totsig / 1024 ) );
643
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
588
  printf __"Total number of posters: %s, average %s bytes per poster\n",
644
  printf __"Total number of posters: %s, average %s per poster\n",
-
 
645
    $formatter->format_number($count),
589
    commify($count), commify( int( $totsize / $count ) );    #/
646
    $formatter->format_bytes( $totsize / $count,
-
 
647
    ( 'precision' => 1, 'mode' => 'iec' ) );
590
  $count = keys %threads;
648
  $count = keys %threads;
591
  printf __"Total number of threads: %s, average %s bytes per thread\n",
649
  printf __"Total number of threads: %s, average %s per thread\n",
-
 
650
    $formatter->format_number($count),
592
    commify($count), commify( int( $totsize / $count ) );    #/
651
    $formatter->format_bytes( $totsize / $count,
-
 
652
    ( 'precision' => 1, 'mode' => 'iec' ) );
593
  printf __"Total number of user agents: %d\n", scalar keys %agents;
653
  printf __"Total number of user agents: %d\n",
-
 
654
    $formatter->format_number( scalar keys %agents );
594
  print "\n", "=" x 76, "\n";
655
  print "\n", "=" x 76, "\n"  ;
595
-
 
596
  ########################################
656
  ########################################
597
  ## Show posters by article count  Sec 1;
657
  ## Show posters by article count  Sec 1;
598
  ########################################
658
  ########################################
599
  unless ( $skipSec{1} )
659
  unless ( $skipSec{1} )
600
  {
660
  {
601
    if ( keys %data < $topposters )
661
    if ( keys %data < $topposters )
602
    {
662
    {
603
      $count = keys %data;
663
      $count = keys %data;
604
    }
664
    }
605
    else
665
    else
606
    {
666
    {
607
      $count = $topposters;
667
      $count = $topposters;
608
    }
668
    }
609
    printf "%s\n",
669
    printf "%s\n",
610
      centred(
670
      centred(
611
      __x( "Top {count} posters by number of articles", count => $topposters ), 76 );
671
      __x( "Top {count} posters by number of articles", count => $topposters ),
-
 
672
      76
-
 
673
      );
612
    print "=" x 76, "\n";
674
    print "=" x 76, "\n";
613
    my $i = 0;
675
    my $i = 0;
614
    foreach
676
    foreach
615
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
677
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
616
    {
678
    {
617
      my $name = substr( $poster, 0, 65 );
679
      my $name = substr( $poster, 0, 65 );
618
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
680
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
619
        $data{$poster}{count};
681
        $data{$poster}{count};
620
      last if ( ++$i == $count );
682
      last if ( ++$i == $count );
621
    }
683
    }
622
    print "\n", "=" x 76, "\n";
684
    print "\n", "=" x 76, "\n";
623
  }
685
  }
624
686
625
  ######################################
687
  ######################################
626
  ## Show posters by size in KiB  Sec 2;
688
  ## Show posters by size in KiB  Sec 2;
627
  ######################################
689
  ######################################
628
  unless ( $skipSec{2} )
690
  unless ( $skipSec{2} )
629
  {
691
  {
630
    if ( keys %data < $topposters )
692
    if ( keys %data < $topposters )
631
    {
693
    {
632
      $count = keys %data;
694
      $count = keys %data;
633
    }
695
    }
634
    else
696
    else
635
    {
697
    {
636
      $count = $topposters;
698
      $count = $topposters;
637
    }
699
    }
638
    printf "%s\n",
700
    printf "%s\n",
639
      centred(
701
      centred(
640
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
702
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
-
 
703
      76
641
      76 );
704
      );
642
    print "=" x 76, "\n";
705
    print "=" x 76, "\n";
643
    my $i = 0;
706
    my $i = 0;
644
    foreach
707
    foreach
645
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
708
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
646
    {
709
    {
647
      my $name = substr( $poster, 0, 62 );
710
      my $name = substr( $poster, 0, 62 );
648
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
711
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
649
        $data{$poster}{size} / 1024;    #/
712
        $data{$poster}{size} / 1024;    #/
650
      last if ( ++$i == $count );
713
      last if ( ++$i == $count );
651
    }
714
    }
652
    print "\n", "=" x 76, "\n";
715
    print "\n", "=" x 76, "\n";
653
  }
716
  }
654
717
655
  #####################################
718
  #####################################
656
  ## Show top posters for original text
719
  ## Show top posters for original text
657
  #####################################
720
  #####################################
658
  unless ( $skipSec{3} )
721
  unless ( $skipSec{3} )
659
  {
722
  {
660
    if ( keys %data < $topposters )
723
    if ( keys %data < $topposters )
661
    {
724
    {
662
      $count = keys %data;
725
      $count = keys %data;
663
    }
726
    }
664
    else
727
    else
665
    {
728
    {
666
      $count = $topposters;
729
      $count = $topposters;
667
    }
730
    }
668
    printf "%s\n",
731
    printf "%s\n",
669
      centred(
732
      centred(
670
      __x(
733
      __x(
671
        "Top {count} responders by original text (> 5 posts)",
734
        "Top {count} responders by original text (> 5 posts)",
672
        count => $topposters
735
        count => $topposters
673
      ),
736
      ),
674
      76
737
      76
675
      );
738
      );
676
    print "=" x 76, "\n";
739
    print "=" x 76, "\n";
677
    my $i = 0;
740
    my $i = 0;
678
    foreach my $poster (
741
    foreach my $poster (
679
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
742
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
680
      keys %data
743
      keys %data
681
      )
744
      )
682
    {
745
    {
683
      next if $data{$poster}{quoted} == 0;
746
      next if $data{$poster}{quoted} == 0;
684
      next if $data{$poster}{count} < 5;
747
      next if $data{$poster}{count} < 5;
685
      my $name = substr( $poster, 0, 63 );
748
      my $name = substr( $poster, 0, 63 );
686
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
749
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
687
        $data{$poster}{percent};
750
        $data{$poster}{percent};
688
      last if ( ++$i == $count );
751
      last if ( ++$i == $count );
689
    }
752
    }
690
    print "\n", "=" x 76, "\n";
753
    print "\n", "=" x 76, "\n";
691
  }
754
  }
692
755
693
  ########################################
756
  ########################################
694
  ## Show bottom posters for original text
757
  ## Show bottom posters for original text
695
  ########################################
758
  ########################################
696
  unless ( $skipSec{4} )
759
  unless ( $skipSec{4} )
697
  {
760
  {
698
    if ( keys %data < $topposters )
761
    if ( keys %data < $topposters )
699
    {
762
    {
700
      $count = keys %data;
763
      $count = keys %data;
701
    }
764
    }
702
    else
765
    else
703
    {
766
    {
704
      $count = $topposters;
767
      $count = $topposters;
705
    }
768
    }
706
    printf "%s\n",
769
    printf "%s\n",
707
      centred(
770
      centred(
708
      __x(
771
      __x(
709
        "Bottom {count} responders by original text  (> 5 posts)",
772
        "Bottom {count} responders by original text  (> 5 posts)",
710
        count => $topposters
773
        count => $topposters
711
      ),
774
      ),
712
      76
775
      76
713
      );
776
      );
714
    print "=" x 76, "\n";
777
    print "=" x 76, "\n";
715
    my $i = 0;
778
    my $i = 0;
716
    foreach my $poster (
779
    foreach my $poster (
717
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
780
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
718
      keys %data
781
      keys %data
719
      )
782
      )
720
    {
783
    {
721
      next if $data{$poster}{quoted} == 0;
784
      next if $data{$poster}{quoted} == 0;
722
      next if $data{$poster}{count} < 5;
785
      next if $data{$poster}{count} < 5;
723
      my $name = substr( $poster, 0, 63 );
786
      my $name = substr( $poster, 0, 63 );
724
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
787
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
725
        $data{$poster}{percent};
788
        $data{$poster}{percent};
726
      last if ( ++$i == $count );
789
      last if ( ++$i == $count );
727
    }
790
    }
728
    print "\n", "=" x 76, "\n";
791
    print "\n", "=" x 76, "\n";
729
  }
792
  }
730
793
731
  #####################################
794
  #####################################
732
  ## Show threads by number of articles
795
  ## Show threads by number of articles
733
  #####################################
796
  #####################################
734
  unless ( $skipSec{5} )
797
  unless ( $skipSec{5} )
735
  {
798
  {
736
    if ( keys %threads < $topthreads )
799
    if ( keys %threads < $topthreads )
737
    {
800
    {
738
      $count = keys %threads;
801
      $count = keys %threads;
739
    }
802
    }
740
    else
803
    else
741
    {
804
    {
742
      $count = $topthreads;
805
      $count = $topthreads;
743
    }
806
    }
744
    printf "%s\n",
807
    printf "%s\n",
-
 
808
      centred(
745
      centred( __x( "Top {count} threads by no. of articles", count => $topthreads ),
809
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
746
      76 );
810
      76 );
747
    print "=" x 76, "\n";
811
    print "=" x 76, "\n";
748
    my $i = 0;
812
    my $i = 0;
749
    foreach my $thread (
813
    foreach my $thread (
750
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
814
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
751
      keys %threads
815
      keys %threads
752
      )
816
      )
753
    {
817
    {
754
      my $name = substr( $thread, 0, 65 );
818
      my $name = substr( $thread, 0, 65 );
755
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
819
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
756
        $threads{$thread}{'count'};
820
        $threads{$thread}{'count'};
757
      last if ( ++$i == $count );
821
      last if ( ++$i == $count );
758
    }
822
    }
759
    print "\n", "=" x 76, "\n";
823
    print "\n", "=" x 76, "\n";
760
  }
824
  }
761
825
762
  ##############################
826
  ##############################
763
  ## Show threads by size in KiB
827
  ## Show threads by size in KiB
764
  ##############################
828
  ##############################
765
  unless ( $skipSec{6} )
829
  unless ( $skipSec{6} )
766
  {
830
  {
767
    if ( keys %threads < $topthreads )
831
    if ( keys %threads < $topthreads )
768
    {
832
    {
769
      $count = keys %threads;
833
      $count = keys %threads;
770
    }
834
    }
771
    else
835
    else
772
    {
836
    {
773
      $count = $topthreads;
837
      $count = $topthreads;
774
    }
838
    }
775
    printf "%s\n",
839
    printf "%s\n",
-
 
840
      centred(
776
      centred( __x( "Top {count} threads by size in KiB", count => $topthreads ),
841
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
777
      76 );
-
 
778
    print "=" x 76, "\n";
842
    print "=" x 76, "\n";
779
    my $i = 0;
843
    my $i = 0;
780
    foreach my $thread (
844
    foreach my $thread (
781
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
845
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
782
      keys %threads
846
      keys %threads
783
      )
847
      )
784
    {
848
    {
785
      my $name = substr( $thread, 0, 65 );
849
      my $name = substr( $thread, 0, 65 );
786
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
850
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
787
        $threads{$thread}{'size'} / 1024;    #/
851
        $threads{$thread}{'size'} / 1024;    #/
788
      last if ( ++$i == $count );
852
      last if ( ++$i == $count );
789
    }
853
    }
790
    print "\n", "=" x 76, "\n";
854
    print "\n", "=" x 76, "\n";
791
  }
855
  }
792
856
793
  ##################################
857
  ##################################
794
  ## Show top 10 cross-posted groups
858
  ## Show top 10 cross-posted groups
795
  ##################################
859
  ##################################
796
  unless ( $skipSec{7} )
860
  unless ( $skipSec{7} )
797
  {
861
  {
798
    delete $crossposts{"$newsgroup_name"};    # don't include ours
862
    delete $crossposts{"$newsgroup_name"};    # don't include ours
799
    if ( keys %crossposts < $topcrossposts )
863
    if ( keys %crossposts < $topcrossposts )
800
    {
864
    {
801
      $count = keys %crossposts;
865
      $count = keys %crossposts;
802
    }
866
    }
803
    else
867
    else
804
    {
868
    {
805
      $count = $topcrossposts;
869
      $count = $topcrossposts;
806
    }
870
    }
807
    printf "%s\n",
871
    printf "%s\n",
-
 
872
      centred(
808
      centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
873
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
809
    print "=" x 76, "\n";
874
    print "=" x 76, "\n";
810
    my $i = 0;
875
    my $i = 0;
811
    foreach
876
    foreach
812
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
877
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
813
    {
878
    {
814
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
879
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
815
        $crossposts{$name};
880
        $crossposts{$name};
816
      last if ( ++$i == $count );
881
      last if ( ++$i == $count );
817
    }
882
    }
818
    print "\n", "=" x 76, "\n";
883
    print "\n", "=" x 76, "\n";
819
  }
884
  }
820
885
821
  #########################
886
  #########################
822
  ## Show agents and counts
887
  ## Show agents and counts
823
  #########################
888
  #########################
824
  unless ( $skipSec{8} )
889
  unless ( $skipSec{8} )
825
  {
890
  {
826
    if ( keys %agents < $topagents )
891
    if ( keys %agents < $topagents )
827
    {
892
    {
828
      $count = keys %agents;
893
      $count = keys %agents;
829
    }
894
    }
830
    else
895
    else
831
    {
896
    {
832
      $count = $topagents;
897
      $count = $topagents;
833
    }
898
    }
834
    printf "%s\n",
899
    printf "%s\n",
835
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
900
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
836
      76 );
901
      76 );
837
    print "=" x 76, "\n";
902
    print "=" x 76, "\n";
838
    my $i = 0;
903
    my $i = 0;
839
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
904
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
840
    {
905
    {
841
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
906
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
842
        $agents{$agent};
907
        $agents{$agent};
843
      last if ( ++$i == $count );
908
      last if ( ++$i == $count );
844
    }
909
    }
845
    print "\n", "=" x 76, "\n";
910
    print "\n", "=" x 76, "\n";
846
  }
911
  }
847
912
848
  #######################
913
  #######################
849
  ## Show distinct agents
914
  ## Show distinct agents
850
  #######################
915
  #######################
851
  unless ( $skipSec{9} )
916
  unless ( $skipSec{9} )
852
  {
917
  {
853
    if ( keys %distinct_agent < $topagents )
918
    if ( keys %distinct_agent < $topagents )
854
    {
919
    {
855
      $count = keys %distinct_agent;
920
      $count = keys %distinct_agent;
856
    }
921
    }
857
    else
922
    else
858
    {
923
    {
859
      $count = $topagents;
924
      $count = $topagents;
860
    }
925
    }
861
    printf "%s\n",
926
    printf "%s\n",
862
      centred(
927
      centred(
863
      __x( "Top {count} user agents by number of posts", count => $topagents ),
928
      __x( "Top {count} user agents by number of posts", count => $topagents ),
-
 
929
      76
864
      76 );
930
      );
865
    print "=" x 76, "\n";
931
    print "=" x 76, "\n";
866
    my $i = 0;
932
    my $i = 0;
867
    foreach my $agent (
933
    foreach my $agent (
868
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
934
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
869
      keys %distinct_agent
935
      keys %distinct_agent
870
      )
936
      )
871
    {
937
    {
872
      printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
938
      printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
873
        $distinct_agent{$agent},
939
        $distinct_agent{$agent},
874
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
940
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
875
      last if ( ++$i == $count );
941
      last if ( ++$i == $count );
876
    }
942
    }
877
    print "\n", "=" x 76, "\n";
943
    print "\n", "=" x 76, "\n";
878
  }
944
  }
879
945
880
  ############################
946
  ############################
881
  ## Show timezones and counts
947
  ## Show timezones and counts
882
  ############################
948
  ############################
883
  unless ( $skipSec{10} )
949
  unless ( $skipSec{10} )
884
  {
950
  {
885
    if ( keys %tz < $toptz )
951
    if ( keys %tz < $toptz )
886
    {
952
    {
887
      $count = keys %tz;
953
      $count = keys %tz;
888
    }
954
    }
889
    else
955
    else
890
    {
956
    {
891
      $count = $toptz;
957
      $count = $toptz;
892
    }
958
    }
-
 
959
    printf "%s\n",
893
    printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 );
960
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
894
    print "=" x 76, "\n";
961
    print "=" x 76, "\n";
895
    my $i = 0;
962
    my $i = 0;
896
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
963
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
897
    {
964
    {
898
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
965
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
899
      last if ( ++$i == $count );
966
      last if ( ++$i == $count );
900
    }
967
    }
901
    print "\n", "=" x 76, "\n";
968
    print "\n", "=" x 76, "\n";
902
  }
969
  }
903
}
970
}
904
971
905
## helper subs
972
## helper subs
906
973
907
###############################
974
###############################
908
## Right pad a string with '.'s
975
## Right pad a string with '.'s
909
###############################
976
###############################
910
sub rpad
977
sub rpad
911
{
978
{
912
  ## Get text to pad, length to pad, pad chr
979
  ## Get text to pad, length to pad, pad chr
913
  my ( $text, $pad_len, $pad_chr ) = @_;
980
  my ( $text, $pad_len, $pad_chr ) = @_;
914
981
915
  ## DEBUG
982
  ## DEBUG
916
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
983
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
917
984
918
  if ( length($text) > $pad_len )
985
  if ( length($text) > $pad_len )
919
  {
986
  {
920
    $text = substr( $text, 0, $pad_len );
987
    $text = substr( $text, 0, $pad_len );
921
  }
988
  }
922
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
989
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
923
  return $padded;
990
  return $padded;
924
}
991
}
925
992
926
##################
993
##################
927
## Centre a string
994
## Centre a string
928
##################
995
##################
929
sub centred
996
sub centred
930
{
997
{
931
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
998
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
932
  my $pad_len = ( $width - length($text) ) / 2;    #/
999
  my $pad_len = ( $width - length($text) ) / 2;    #/
933
  my $centred = " " x $pad_len . $text;
1000
  my $centred = " " x $pad_len . $text;
934
  return $centred;
1001
  return $centred;
935
}
1002
}
936
1003
937
###########################
1004
###########################
938
## Put commas into a number
1005
## Put commas into a number
939
###########################
1006
###########################
940
# Get some of locale's numeric formatting parameters
-
 
941
my ($thousands_sep, $grouping) =
-
 
942
        @{localeconv()}{'thousands_sep', 'grouping'};
-
 
943
# Apply defaults if values are missing
-
 
944
$thousands_sep = ',' unless $thousands_sep;
-
 
945
# grouping and mon_grouping are packed lists
-
 
946
# of small integers (characters) telling the
-
 
947
# grouping (thousand_seps and mon_thousand_seps
-
 
948
# being the group dividers) of numbers and
-
 
949
# monetary quantities.  The integers' meanings:
-
 
950
# 255 means no more grouping, 0 means repeat
-
 
951
# the previous grouping, 1-254 means use that
-
 
952
# as the current grouping.  Grouping goes from
-
 
953
# right to left (low to high digits).  In the
-
 
954
# below we cheat slightly by never using anything
-
 
955
# else than the first grouping (whatever that is).
-
 
956
my @grouping;
-
 
957
if ($grouping) {
-
 
958
    @grouping = unpack("C*", $grouping);
-
 
959
} else {
-
 
960
    @grouping = (3);
-
 
961
}
-
 
962
-
 
963
sub commify
1007
sub commify
964
{
1008
{
965
  local $_ = shift;
1009
  local $_ = shift;
966
  #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
1010
  my $number = $_;
967
  $_ = int;    # Chop non-integer part
1011
  $_ = int;                                        # Chop non-integer part
968
  1 while
1012
  1 while
969
  s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/;
1013
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
-
 
1014
  my $int_part  = $_;
-
 
1015
  my $real_part = '';
-
 
1016
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
-
 
1017
  {
970
  return $_;
1018
    $real_part = $1;
-
 
1019
  }
-
 
1020
  return $int_part . $real_part;
971
}
1021
}
972
1022
973
################################################################
1023
################################################################
974
## Returns a string with leading and trailing whitespace removed
1024
## Returns a string with leading and trailing whitespace removed
975
################################################################
1025
################################################################
976
sub clean
1026
sub clean
977
{
1027
{
978
  my $dirty = shift;
1028
  my $dirty = shift;
979
  my $clean = $dirty;
1029
  my $clean = $dirty;
980
  $clean =~ s/^\s+|\s+$//g;
1030
  $clean =~ s/^\s+|\s+$//g;
981
1031
982
  return $clean;
1032
  return $clean;
983
}
1033
}
984
1034
985
sub usage
1035
sub usage
986
{
1036
{
987
  print __"usage: newsstat.pl NEWS.GROUP\n";
1037
  println( __ "usage: newsstat.pl NEWS.GROUP" );
988
  exit 1;
1038
  exit 1;
989
}
1039
}
990
1040
991
sub dmsg
1041
sub dmsg
992
{
1042
{
993
  print STDERR @_, "\n";
1043
  print STDERR @_, "\n";
994
}
1044
}
995
1045
996
sub dmsg2
1046
sub dmsg2
997
{
1047
{
998
  my ( $level, @msg ) = @_;
1048
  my ( $level, @msg ) = @_;
999
  print STDERR @msg, "\n" if $level >= DEBUG;
1049
  print STDERR @msg, "\n" if $level >= DEBUG;
1000
}
1050
}
-
 
1051
-
 
1052
sub println
-
 
1053
{
-
 
1054
  print @_, "\n";
-
 
1055
}
1001
 
1056