Subversion Repositories LCARS

Rev

Rev 25 | Rev 31 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 25 Rev 30
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
require 5.004;
4
require 5.004;
5
5
6
#use diagnostics;
6
#use diagnostics;
7
use utf8;
7
use utf8;
-
 
8
-
 
9
## NOTE:
-
 
10
## Enable and remove binmode when utf8::all has actually become lexically scoped
8
use Encode;
11
# use utf8:all;
9
12
10
use constant DEBUG => 0;
13
use constant DEBUG => 0;
11
14
12
## newsstat.pl
15
## newsstat.pl
13
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
16
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
14
## Based on work by Garry Knight et al.
17
## Based on work by Garry Knight et al.
15
## 
18
## 
16
## This program is free software: you can redistribute it and/or modify
19
## This program is free software: you can redistribute it and/or modify
17
## it under the terms of the GNU General Public License as published by
20
## it under the terms of the GNU General Public License as published by
18
## the Free Software Foundation, either version 3 of the License, or
21
## the Free Software Foundation, either version 3 of the License, or
19
## (at your option) any later version.
22
## (at your option) any later version.
20
## 
23
## 
21
## This program is distributed in the hope that it will be useful,
24
## This program is distributed in the hope that it will be useful,
22
## but WITHOUT ANY WARRANTY; without even the implied warranty of
25
## but WITHOUT ANY WARRANTY; without even the implied warranty of
23
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
## GNU General Public License for more details.
27
## GNU General Public License for more details.
25
## 
28
## 
26
## You should have received a copy of the GNU General Public License
29
## You should have received a copy of the GNU General Public License
27
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
30
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
28
31
29
## Print out all text to STDOUT UTF-8 encoded
32
## Print out all text to STDOUT UTF-8 encoded
30
binmode STDOUT, ':encoding(UTF-8)';
33
binmode STDOUT, ':encoding(UTF-8)';
31
binmode STDERR, ':encoding(UTF-8)';
34
binmode STDERR, ':encoding(UTF-8)';
32
35
33
## L10n
36
## L10n
34
use locale ':not_characters';
37
use locale ':not_characters';
35
38
36
# setlocale( LC_MESSAGES, '' );
39
# setlocale( LC_MESSAGES, '' );
37
require Number::Format;
40
require Number::Format;
38
41
39
## i18n
42
## i18n
40
## FIXME: Automatically include resolved '.' in @INC
43
## FIXME: Automatically include resolved '.' in @INC
41
# print join "\n", @INC;
44
# print join "\n", @INC;
42
45
43
use Locale::TextDomain ('de.pointedears.newsstat');
46
use Locale::TextDomain ('de.pointedears.newsstat');
44
use POSIX              ('locale_h');
47
use POSIX              ('locale_h');
45
use Locale::Messages qw (bind_textdomain_filter
48
use Locale::Messages qw (bind_textdomain_filter
46
  bind_textdomain_codeset
49
  bind_textdomain_codeset
47
  turn_utf_8_on);
50
  turn_utf_8_on);
48
51
49
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
52
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
50
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
53
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
51
54
52
require Mail::Message;
55
require Mail::Message;
53
require DateTime;
56
require DateTime;
54
require DateTime::Format::Mail;
57
require DateTime::Format::Mail;
55
58
56
# See comments in previous example
59
# See comments in previous example
57
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
60
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
58
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
61
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
59
  'decimal_point' };
62
  'decimal_point' };
60
63
61
# Apply defaults if values are missing
64
# Apply defaults if values are missing
62
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
65
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
63
$thousands_sep = ' ' unless $thousands_sep;
66
$thousands_sep = ' ' unless $thousands_sep;
64
67
65
# grouping and mon_grouping are packed lists
68
# grouping and mon_grouping are packed lists
66
# of small integers (characters) telling the
69
# of small integers (characters) telling the
67
# grouping (thousand_seps and mon_thousand_seps
70
# grouping (thousand_seps and mon_thousand_seps
68
# being the group dividers) of numbers and
71
# being the group dividers) of numbers and
69
# monetary quantities.  The integers' meanings:
72
# monetary quantities.  The integers' meanings:
70
# 255 means no more grouping, 0 means repeat
73
# 255 means no more grouping, 0 means repeat
71
# the previous grouping, 1-254 means use that
74
# the previous grouping, 1-254 means use that
72
# as the current grouping.  Grouping goes from
75
# as the current grouping.  Grouping goes from
73
# right to left (low to high digits).  In the
76
# right to left (low to high digits).  In the
74
# below we cheat slightly by never using anything
77
# below we cheat slightly by never using anything
75
# else than the first grouping (whatever that is).
78
# else than the first grouping (whatever that is).
76
my @grouping;
79
my @grouping;
77
if ($grouping)
80
if ($grouping)
78
{
81
{
79
  @grouping = unpack( "C*", $grouping );
82
  @grouping = unpack( "C*", $grouping );
80
}
83
}
81
else
84
else
82
{
85
{
83
  @grouping = (3);
86
  @grouping = (3);
84
}
87
}
85
88
86
## FIXME: Why don't the defaults work already?
89
## FIXME: Why don't the defaults work already?
87
my $formatter = new Number::Format(
90
my $formatter = new Number::Format(
88
  -decimal_point => $decimal_point,
91
  -decimal_point => $decimal_point,
89
  -thousands_sep => $thousands_sep,
92
  -thousands_sep => $thousands_sep,
90
93
91
  # -grouping      => $grouping[0]
94
  # -grouping      => $grouping[0]
92
);
95
);
93
96
94
###################### USER CONFIGURATIONS ############################
97
###################### USER CONFIGURATIONS ############################
95
98
96
## The name of the group to do stats for
99
## The name of the group to do stats for
97
my $newsgroup_name = $ARGV[0];
100
my $newsgroup_name = $ARGV[0];
98
$newsgroup_name or usage();
101
$newsgroup_name or usage();
99
102
100
## Check for removal flags
103
## Check for removal flags
101
my $ix;
104
my $ix;
102
my $j;
105
my $j;
103
my %skipSec;
106
my %skipSec;
104
my @skiplist;
107
my @skiplist;
105
my $args = @ARGV;
108
my $args = @ARGV;
106
for ( $ix = 1 ; $ix < $args ; $ix++ )
109
for ( $ix = 1 ; $ix < $args ; $ix++ )
107
{
110
{
108
  $j = $ix + 1;
111
  $j = $ix + 1;
109
  if ( $ARGV[$ix] eq "-x" )
112
  if ( $ARGV[$ix] eq "-x" )
110
  {
113
  {
111
    @skiplist = split( ",", $ARGV[$j] );
114
    @skiplist = split( ",", $ARGV[$j] );
112
  }
115
  }
113
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
116
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
114
  {
117
  {
115
    @skiplist = split( ",", $1 );
118
    @skiplist = split( ",", $1 );
116
  }
119
  }
117
}
120
}
118
foreach (@skiplist)
121
foreach (@skiplist)
119
{
122
{
120
  $skipSec{$_} = 1;
123
  $skipSec{$_} = 1;
121
}
124
}
122
125
123
## Leafnode users will want /var/spool/news for this variable.
126
## Leafnode users will want /var/spool/news for this variable.
124
my $news = "/var/spool/news/";
127
my $news = "/var/spool/news/";
125
128
126
## Number of top or bottom posters to show
129
## Number of top or bottom posters to show
127
my $topposters = 20;
130
my $topposters = 20;
128
131
129
## Number of threads we want to know about
132
## Number of threads we want to know about
130
my $topthreads = 20;
133
my $topthreads = 20;
131
134
132
## Number of cross-posted threads to show
135
## Number of cross-posted threads to show
133
my $topcrossposts = 10;
136
my $topcrossposts = 10;
134
137
135
## Number of agents we list
138
## Number of agents we list
136
my $topagents = 10;
139
my $topagents = 10;
137
140
138
## Number of time zones to show
141
## Number of time zones to show
139
my $toptz = 10;
142
my $toptz = 10;
140
143
141
###################### DATA STRUCTURES ######################
144
###################### DATA STRUCTURES ######################
142
my $group = $newsgroup_name;
145
my $group = $newsgroup_name;
143
$group =~ s!\.!/!g;
146
$group =~ s!\.!/!g;
144
my %data;    # name, count, agent, total, orig, quoted
147
my %data;    # name, count, agent, total, orig, quoted
145
my $totsize = 0;    # holds total sizes of all files
148
my $totsize = 0;    # holds total sizes of all files
146
my %crossposts;     # group, count
149
my %crossposts;     # group, count
147
my %threads;        # subject, count
150
my %threads;        # subject, count
148
my $replies   = 0;  # total no. of replies
151
my $replies   = 0;  # total no. of replies
149
my $origposts = 0;  # total no. of original posts
152
my $origposts = 0;  # total no. of original posts
150
my %tz;             # timezones by count
153
my %tz;             # timezones by count
151
my $earliest;       # earliest article we have found
154
my $earliest;       # earliest article we have found
152
my $latest;         # latest article we have found
155
my $latest;         # latest article we have found
153
my $totheader = 0;  # total size of header material
156
my $totheader = 0;  # total size of header material
154
my $totbody   = 0;  # total size of body material
157
my $totbody   = 0;  # total size of body material
155
my $totsig    = 0;  # total size of sig material
158
my $totsig    = 0;  # total size of sig material
156
my $totquoted = 0;  # total size of quoted material
159
my $totquoted = 0;  # total size of quoted material
157
my $totorig   = 0;  # total size of original material
160
my $totorig   = 0;  # total size of original material
158
my $totalposts;     # total no. of posts considered
161
my $totalposts;     # total no. of posts considered
159
my %distinct_agent;
162
my %distinct_agent;
160
163
161
## Used to hold counts of User Agents used
164
## Used to hold counts of User Agents used
162
my %agents = (
165
my %agents = (
163
  "Compuserver"               => 0,
166
  "Compuserver"               => 0,
164
  "Foorum"                    => 0,
167
  "Foorum"                    => 0,
165
  "Forte Agent"               => 0,
168
  "Forte Agent"               => 0,
166
  "Forte Free Agent"          => 0,
169
  "Forte Free Agent"          => 0,
167
  "Gnus"                      => 0,
170
  "Gnus"                      => 0,
168
  "KNode"                     => 0,
171
  "KNode"                     => 0,
169
  "MacSOUP"                   => 0,
172
  "MacSOUP"                   => 0,
170
  "MT-NewsWatcher"            => 0,
173
  "MT-NewsWatcher"            => 0,
171
  "MicroPlanet Gravity"       => 0,
174
  "MicroPlanet Gravity"       => 0,
172
  "Microsoft Outlook Express" => 0,
175
  "Microsoft Outlook Express" => 0,
173
  "Microsoft Windows Mail"    => 0,
176
  "Microsoft Windows Mail"    => 0,
174
  "Mozilla"                   => 0,
177
  "Mozilla"                   => 0,
175
  "News Rover"                => 0,
178
  "News Rover"                => 0,
176
  "NN"                        => 0,
179
  "NN"                        => 0,
177
  "Pan"                       => 0,
180
  "Pan"                       => 0,
178
  "rn"                        => 0,
181
  "rn"                        => 0,
179
  "slrn"                      => 0,
182
  "slrn"                      => 0,
180
  "Sylpheed"                  => 0,
183
  "Sylpheed"                  => 0,
181
  "tin"                       => 0,
184
  "tin"                       => 0,
182
  "VSoup"                     => 0,
185
  "VSoup"                     => 0,
183
  "WebTV"                     => 0,
186
  "WebTV"                     => 0,
184
  "Xnews"                     => 0,
187
  "Xnews"                     => 0,
185
);
188
);
186
189
187
my $datetime_parser = DateTime::Format::Mail->new();
190
my $datetime_parser = DateTime::Format::Mail->new();
188
$datetime_parser->loose();
191
$datetime_parser->loose();
189
192
190
my $today = DateTime->today( time_zone => 'UTC' );
193
my $today = DateTime->today( time_zone => 'UTC' );
191
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
194
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
192
my $start      = int $prev_month->strftime('%s');
195
my $start      = int $prev_month->strftime('%s');
193
my $numdays    = int DateTime->last_day_of_month(
196
my $numdays    = int DateTime->last_day_of_month(
194
  year      => $prev_month->year(),
197
  year      => $prev_month->year(),
195
  month     => $prev_month->month(),
198
  month     => $prev_month->month(),
196
  time_zone => $prev_month->time_zone(),
199
  time_zone => $prev_month->time_zone(),
197
)->day();
200
)->day();
198
my $end = int $today->clone()->set_day(1)->strftime('%s');
201
my $end = int $today->clone()->set_day(1)->strftime('%s');
199
202
200
dmsg( $start, " to ", $end ) if DEBUG;
203
dmsg( $start, " to ", $end ) if DEBUG;
201
204
202
chdir("$news$group")
205
chdir("$news$group")
203
  or die __x(
206
  or die __x(
204
  "Can't cd to {newsgroup}: {error}\n",
207
  "Can't cd to {newsgroup}: {error}\n",
205
  newsgroup => "$news$group",
208
  newsgroup => "$news$group",
206
  error     => $!
209
  error     => $!
207
  );
210
  );
208
opendir( DIR, "." )
211
opendir( DIR, "." )
209
  or die __x(
212
  or die __x(
210
  "Can't open {newsgroup}: {error}\n",
213
  "Can't open {newsgroup}: {error}\n",
211
  newsgroup => "$news$group",
214
  newsgroup => "$news$group",
212
  error     => $!
215
  error     => $!
213
  );
216
  );
214
217
215
while ( defined( my $filename = readdir(DIR) ) )
218
while ( defined( my $filename = readdir(DIR) ) )
216
{
219
{
217
  next unless -f $filename;    # only want real files
220
  next unless -f $filename;    # only want real files
218
  next if ( $filename eq ".overview" );    # real articles only
221
  next if ( $filename eq ".overview" );    # real articles only
219
222
220
  get_article($filename);                  # read in the article
223
  get_article($filename);                  # read in the article
221
}
224
}
222
closedir(DIR);                             # finished with the directory
225
closedir(DIR);                             # finished with the directory
223
226
224
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
227
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
225
228
226
## Post-processing
229
## Post-processing
227
count_agents();                            # count agents, collapsing versions
230
count_agents();                            # count agents, collapsing versions
228
fix_percent();
231
fix_percent();
229
232
230
write_data();
233
write_data();
231
display_results();
234
display_results();
232
235
233
########################################
236
########################################
234
## Get current article's header and body
237
## Get current article's header and body
235
########################################
238
########################################
236
sub get_article
239
sub get_article
237
{
240
{
238
  my $filename = shift;
241
  my $filename = shift;
239
242
240
  open( my $FILE, '<', $filename )
243
  open( my $FILE, '<', $filename )
241
    or
244
    or
242
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
245
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
243
  my $msg       = Mail::Message->read($FILE);
246
  my $msg       = Mail::Message->read($FILE);
244
  my $timestamp = $msg->timestamp();
247
  my $timestamp = $msg->timestamp();
245
  my $date      = $msg->study('Date');
248
  my $date      = $msg->study('Date');
246
249
247
  ## Disregard article if timestamp is not in range
250
  ## Disregard article if timestamp is not in range
248
  dmsg($timestamp) if DEBUG;
251
  dmsg($timestamp) if DEBUG;
249
  if ( $timestamp < $start or $timestamp >= $end )
252
  if ( $timestamp < $start or $timestamp >= $end )
250
  {
253
  {
251
    dmsg("Posting on $date ignored.") if DEBUG;
254
    dmsg("Posting on $date ignored.") if DEBUG;
252
    return;
255
    return;
253
  }
256
  }
254
257
255
  $totalposts++;    # bump count of articles considered
258
  $totalposts++;    # bump count of articles considered
256
259
257
  ## DEBUG
260
  ## DEBUG
258
  dmsg($date) if DEBUG;
261
  dmsg($date) if DEBUG;
259
262
260
  ## get stats about the file itself
263
  ## get stats about the file itself
261
  my $filesize = -s $filename;    # get total size of file
264
  my $filesize = -s $filename;    # get total size of file
262
  $totsize += $filesize;          # bump total sizes of all files
265
  $totsize += $filesize;          # bump total sizes of all files
263
266
264
  if ( ( not defined $earliest ) or $timestamp < $earliest )
267
  if ( ( not defined $earliest ) or $timestamp < $earliest )
265
  {
268
  {
266
    $earliest = $timestamp;
269
    $earliest = $timestamp;
267
  }
270
  }
268
  elsif ( ( not defined $latest ) or $timestamp > $latest )
271
  elsif ( ( not defined $latest ) or $timestamp > $latest )
269
  {
272
  {
270
    $latest = $timestamp;
273
    $latest = $timestamp;
271
  }
274
  }
272
275
273
  #print "timestamp: $timestamp\n";
276
  #print "timestamp: $timestamp\n";
274
277
275
  ## count header size
278
  ## count header size
276
  $totheader += $msg->head()->size();
279
  $totheader += $msg->head()->size();
277
280
278
  ## get the poster's name (MIME-decoded, in UTF-8)
281
  ## get the poster's name (MIME-decoded, in UTF-8)
279
  my $poster = $msg->study('From');
282
  my $poster = $msg->study('From');
280
  if ( defined $poster )
283
  if ( defined $poster )
281
  {
284
  {
282
    ## Convert old to new format
285
    ## Convert old to new format
283
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
286
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
284
287
285
    ## Collapse whitespace
288
    ## Collapse whitespace
286
    $poster =~ s/\s+/ /g;
289
    $poster =~ s/\s+/ /g;
287
290
288
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
291
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
289
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
292
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
290
293
291
    ## DEBUG
294
    ## DEBUG
292
    dmsg($poster) if DEBUG;
295
    dmsg($poster) if DEBUG;
293
296
294
    ## seen this one before?
297
    ## seen this one before?
295
    if ( !defined( $data{$poster} ) )
298
    if ( !defined( $data{$poster} ) )
296
    {
299
    {
297
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
300
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
298
      $data{$poster}{'orig'}   = 0;
301
      $data{$poster}{'orig'}   = 0;
299
      $data{$poster}{'quoted'} = 0;
302
      $data{$poster}{'quoted'} = 0;
300
    }
303
    }
301
    $data{$poster}{'count'}++;                    # bump count for this poster
304
    $data{$poster}{'count'}++;                    # bump count for this poster
302
    $data{$poster}{'size'} += $filesize;          # total size of file
305
    $data{$poster}{'size'} += $filesize;          # total size of file
303
306
304
    ## The User-Agent and/or X-Newsreader fields
307
    ## The User-Agent and/or X-Newsreader fields
305
    ## for User-Agent by poster
308
    ## for User-Agent by poster
306
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
309
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
307
    if ( defined $ua )
310
    if ( defined $ua )
308
    {
311
    {
309
      $data{$poster}{'agent'} = $ua;
312
      $data{$poster}{'agent'} = $ua;
310
313
311
      ## DEBUG
314
      ## DEBUG
312
      dmsg($ua) if DEBUG;
315
      dmsg($ua) if DEBUG;
313
    }
316
    }
314
317
315
    ## The User Agent for User-Agent by number of posts
318
    ## The User Agent for User-Agent by number of posts
316
    get_agent($msg);
319
    get_agent($msg);
317
320
318
    ## Get all cross-posted newsgroups
321
    ## Get all cross-posted newsgroups
319
    for ( split( /,/, $msg->study('Newsgroups') ) )
322
    for ( split( /,/, $msg->study('Newsgroups') ) )
320
    {
323
    {
321
      $crossposts{$_}++;    # bump count for each
324
      $crossposts{$_}++;    # bump count for each
322
    }
325
    }
323
326
324
    ## Get threads
327
    ## Get threads
325
    my $thread = $msg->study('Subject');
328
    my $thread = $msg->study('Subject');
326
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
329
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
327
    $thread =~ s/\s+/ /g;       # collapse whitespace
330
    $thread =~ s/\s+/ /g;       # collapse whitespace
328
    $threads{$thread}{'count'}++;    # bump count of this subject
331
    $threads{$thread}{'count'}++;    # bump count of this subject
329
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
332
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
330
333
331
    ## Is this an original post or a reply?
334
    ## Is this an original post or a reply?
332
    if ( defined $msg->study('References') )
335
    if ( defined $msg->study('References') )
333
    {
336
    {
334
      $replies++;
337
      $replies++;
335
    }
338
    }
336
    else
339
    else
337
    {
340
    {
338
      $origposts++;
341
      $origposts++;
339
    }
342
    }
340
343
341
    ## Get the time zone
344
    ## Get the time zone
342
    my $datetime = $datetime_parser->parse_datetime($date);
345
    my $datetime = $datetime_parser->parse_datetime($date);
343
    my $tz       = $datetime->strftime('%z');
346
    my $tz       = $datetime->strftime('%z');
344
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
347
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
345
    $tz{$tz}++;
348
    $tz{$tz}++;
346
349
347
    ## DEBUG
350
    ## DEBUG
348
    dmsg($tz) if DEBUG;
351
    dmsg($tz) if DEBUG;
349
352
350
#### Now analyse the body text ####
353
#### Now analyse the body text ####
351
    my $body = $msg->body();
354
    my $body = $msg->body();
352
355
353
    my $insig = 0;
356
    my $insig = 0;
354
    my @body  = $body->lines;
357
    my @body  = $body->lines;
355
    for (@body)
358
    for (@body)
356
    {
359
    {
357
      $totbody += length($_);    # bump total body size
360
      $totbody += length($_);    # bump total body size
358
      next if (m{^$>}o);         # don't count blank lines in body
361
      next if (m{^$>}o);         # don't count blank lines in body
359
      if ( $insig == 1 )
362
      if ( $insig == 1 )
360
      {
363
      {
361
364
362
        # bump total sig size
365
        # bump total sig size
363
        $totsig += length($_);
366
        $totsig += length($_);
364
      }
367
      }
365
      ## are we in a quote line?
368
      ## are we in a quote line?
366
      ## Bill Unruh uses ] quotes, and another poster uses ::
369
      ## Bill Unruh uses ] quotes, and another poster uses ::
367
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
370
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
368
      {
371
      {
369
        ## bump count of quoted chrs
372
        ## bump count of quoted chrs
370
        $data{$poster}{'quoted'} += length($_);
373
        $data{$poster}{'quoted'} += length($_);
371
        $totquoted += length($_);
374
        $totquoted += length($_);
372
      }
375
      }
373
      elsif (/^-- $/)
376
      elsif (/^-- $/)
374
      {
377
      {
375
        $insig = 1;
378
        $insig = 1;
376
      }
379
      }
377
      else
380
      else
378
      {
381
      {
379
        ## We must be processing an original line
382
        ## We must be processing an original line
380
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
383
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
381
        $totorig += length($_);
384
        $totorig += length($_);
382
      }
385
      }
383
    }
386
    }
384
387
385
    # end for (@body)
388
    # end for (@body)
386
  }
389
  }
387
390
388
  close($FILE);
391
  close($FILE);
389
}
392
}
390
393
391
sub get_agent
394
sub get_agent
392
{
395
{
393
  my $msg = shift;
396
  my $msg = shift;
394
397
395
  my $ua =
398
  my $ua =
396
       $msg->study('User-Agent')
399
       $msg->study('User-Agent')
397
    or $msg->study('X-Newsreader')
400
    or $msg->study('X-Newsreader')
398
    or $msg->study('X-Mailer');
401
    or $msg->study('X-Mailer');
399
  if ( not defined $ua )
402
  if ( not defined $ua )
400
  {
403
  {
401
    my $org = $msg->study('Organization');
404
    my $org = $msg->study('Organization');
402
    if ( defined $org
405
    if ( defined $org
403
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
406
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
404
    {
407
    {
405
      $ua = $org;
408
      $ua = $org;
406
    }
409
    }
407
    elsif ( $msg->study('Message-ID') =~ /pine/i )
410
    elsif ( $msg->study('Message-ID') =~ /pine/i )
408
    {
411
    {
409
      $ua = "Pine";
412
      $ua = "Pine";
410
    }
413
    }
411
  }
414
  }
412
415
413
  ## Hopefully found UA, else set to unknown
416
  ## Hopefully found UA, else set to unknown
414
  if ( not defined $ua )
417
  if ( not defined $ua )
415
  {
418
  {
416
    $ua = __ "unknown";
419
    $ua = __ "unknown";
417
  }
420
  }
418
421
419
  $ua = clean($ua);
422
  $ua = clean($ua);
420
423
421
  my $raw   = $ua;
424
  my $raw   = $ua;
422
  my $agent = $raw;
425
  my $agent = $raw;
423
426
424
  ## strip http
427
  ## strip http
425
  if ( $raw =~ /.*http.*/ )
428
  if ( $raw =~ /.*http.*/ )
426
  {
429
  {
427
    $raw =~ s!posted via!!i;
430
    $raw =~ s!posted via!!i;
428
    $raw =~ s!http://!!g;
431
    $raw =~ s!http://!!g;
429
    $raw =~ s!/!!g;
432
    $raw =~ s!/!!g;
430
    $raw =~ s! !!g;
433
    $raw =~ s! !!g;
431
  }
434
  }
432
435
433
  ## Fix Outlook from Mac
436
  ## Fix Outlook from Mac
434
  if ( $raw =~ /^microsoft/i )
437
  if ( $raw =~ /^microsoft/i )
435
  {
438
  {
436
    $raw =~ s/-/ /g;
439
    $raw =~ s/-/ /g;
437
  }
440
  }
438
441
439
  ## Pick out the popular agents
442
  ## Pick out the popular agents
440
  if ( $raw =~ /(outlook express)/i
443
  if ( $raw =~ /(outlook express)/i
441
    || $raw =~ /(windows mail)/i
444
    || $raw =~ /(windows mail)/i
442
    || $raw =~ /(microplanet gravity)/i
445
    || $raw =~ /(microplanet gravity)/i
443
    || $raw =~ /(news rover)/i
446
    || $raw =~ /(news rover)/i
444
    || $raw =~ /(forte agent)/i
447
    || $raw =~ /(forte agent)/i
445
    || $raw =~ /(forte free agent)/i )
448
    || $raw =~ /(forte free agent)/i )
446
  {
449
  {
447
    $agent = $1;
450
    $agent = $1;
448
  }
451
  }
449
  elsif (
452
  elsif (
450
    $raw =~ /^(
453
    $raw =~ /^(
451
        pan
454
        pan
452
       |sylpheed
455
       |sylpheed
453
       |slrn
456
       |slrn
454
       |mozilla
457
       |mozilla
455
       |knode
458
       |knode
456
       |tin
459
       |tin
457
       |hamster
460
       |hamster
458
       |xrn
461
       |xrn
459
       |xnews
462
       |xnews
460
       |aol
463
       |aol
461
       |gnus
464
       |gnus
462
       |krn
465
       |krn
463
       |macsoup
466
       |macsoup
464
       |messenger
467
       |messenger
465
       |openxp
468
       |openxp
466
       |pine
469
       |pine
467
       |thoth
470
       |thoth
468
       |turnpike
471
       |turnpike
469
       |winvn
472
       |winvn
470
       |vsoup
473
       |vsoup
471
       |google
474
       |google
472
       |supernews
475
       |supernews
473
       |nn
476
       |nn
474
       |rn
477
       |rn
475
       |007
478
       |007
476
       |webtv
479
       |webtv
477
       |compuserve
480
       |compuserve
478
       )/ix
481
       )/ix
479
    )
482
    )
480
  {
483
  {
481
    $agent = $1;
484
    $agent = $1;
482
  }
485
  }
483
  else
486
  else
484
  {
487
  {
485
    ## Clean up unknown agents
488
    ## Clean up unknown agents
486
    if ( $raw =~ m!^(.*?)/! )
489
    if ( $raw =~ m!^(.*?)/! )
487
    {
490
    {
488
      $agent = $1;
491
      $agent = $1;
489
    }
492
    }
490
    elsif ( $raw =~ /^(\w*)\d.*/ )
493
    elsif ( $raw =~ /^(\w*)\d.*/ )
491
    {
494
    {
492
      $agent = $1;
495
      $agent = $1;
493
    }
496
    }
494
  }
497
  }
495
498
496
  $distinct_agent{$agent}++;
499
  $distinct_agent{$agent}++;
497
  return $agent;
500
  return $agent;
498
}
501
}
499
## get_agent
502
## get_agent
500
503
501
#########################################
504
#########################################
502
## Count the User-Agents used, collapsing
505
## Count the User-Agents used, collapsing
503
## different versions into one per agent.
506
## different versions into one per agent.
504
#########################################
507
#########################################
505
sub count_agents
508
sub count_agents
506
{
509
{
507
POSTER:
510
POSTER:
508
  foreach my $poster ( keys %data )
511
  foreach my $poster ( keys %data )
509
  {
512
  {
510
    foreach my $agent_name ( keys %distinct_agent )
513
    foreach my $agent_name ( keys %distinct_agent )
511
    {    # check against known ones
514
    {    # check against known ones
512
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
515
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
513
      {
516
      {
514
        $agents{$agent_name}++;
517
        $agents{$agent_name}++;
515
        next POSTER;
518
        next POSTER;
516
      }
519
      }
517
    }
520
    }
518
    $agents{ $data{$poster}{'agent'} }++;
521
    $agents{ $data{$poster}{'agent'} }++;
519
  }
522
  }
520
}    # count_agents
523
}    # count_agents
521
524
522
#############################################
525
#############################################
523
## Set orig/total percentages for all posters
526
## Set orig/total percentages for all posters
524
#############################################
527
#############################################
525
sub fix_percent
528
sub fix_percent
526
{
529
{
527
  foreach my $poster ( keys %data )
530
  foreach my $poster ( keys %data )
528
  {
531
  {
529
    my $percent = 100;
532
    my $percent = 100;
530
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
533
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
531
    {
534
    {
532
      $percent =
535
      $percent =
533
        $data{$poster}{'orig'} * 100 /
536
        $data{$poster}{'orig'} * 100 /
534
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
537
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
535
    }
538
    }
536
    elsif ( $data{$poster}{'orig'} == 0 )
539
    elsif ( $data{$poster}{'orig'} == 0 )
537
    {
540
    {
538
      $percent = 0;
541
      $percent = 0;
539
    }
542
    }
540
    $data{$poster}{'percent'} = $percent;
543
    $data{$poster}{'percent'} = $percent;
541
  }
544
  }
542
}
545
}
543
## fix_percent
546
## fix_percent
544
547
545
##################################
548
##################################
546
## Write data structures to a file
549
## Write data structures to a file
547
##################################
550
##################################
548
sub write_data
551
sub write_data
549
{
552
{
550
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
553
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
551
    or die __x( "Can't create XDATA: {error}\n", error => $! );
554
    or die __x( "Can't create XDATA: {error}\n", error => $! );
552
  print $OUTF "Data collected from $newsgroup_name\n\n";
555
  print $OUTF "Data collected from $newsgroup_name\n\n";
553
  print $OUTF
556
  print $OUTF
554
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
557
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
555
  foreach my $name ( keys %data )
558
  foreach my $name ( keys %data )
556
  {
559
  {
557
    print $OUTF
560
    print $OUTF
558
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
561
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
559
  }
562
  }
560
  print $OUTF
563
  print $OUTF
561
"============================================================================\n";
564
"============================================================================\n";
562
  print $OUTF "Thread subjects\n";
565
  print $OUTF "Thread subjects\n";
563
  print $OUTF
566
  print $OUTF
564
"----------------------------------------------------------------------------\n";
567
"----------------------------------------------------------------------------\n";
565
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
568
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
566
  {
569
  {
567
    print $OUTF
570
    print $OUTF
568
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
571
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
569
  }
572
  }
570
  print $OUTF
573
  print $OUTF
571
"============================================================================\n";
574
"============================================================================\n";
572
  print $OUTF "Cross-posts\n";
575
  print $OUTF "Cross-posts\n";
573
  print $OUTF
576
  print $OUTF
574
"----------------------------------------------------------------------------\n";
577
"----------------------------------------------------------------------------\n";
575
  foreach my $name ( sort keys %crossposts )
578
  foreach my $name ( sort keys %crossposts )
576
  {
579
  {
577
    print $OUTF "$name : $crossposts{$name}\n";
580
    print $OUTF "$name : $crossposts{$name}\n";
578
  }
581
  }
579
  print $OUTF
582
  print $OUTF
580
"============================================================================\n";
583
"============================================================================\n";
581
  print $OUTF "User agents\n";
584
  print $OUTF "User agents\n";
582
  print $OUTF
585
  print $OUTF
583
"----------------------------------------------------------------------------\n";
586
"----------------------------------------------------------------------------\n";
584
  foreach my $name ( sort keys %agents )
587
  foreach my $name ( sort keys %agents )
585
  {
588
  {
586
    print $OUTF "$name : $agents{$name}\n";
589
    print $OUTF "$name : $agents{$name}\n";
587
  }
590
  }
588
  print $OUTF
591
  print $OUTF
589
"============================================================================\n";
592
"============================================================================\n";
590
  print $OUTF "Time zones\n";
593
  print $OUTF "Time zones\n";
591
  print $OUTF
594
  print $OUTF
592
"----------------------------------------------------------------------------\n";
595
"----------------------------------------------------------------------------\n";
593
  foreach my $name ( sort keys %tz )
596
  foreach my $name ( sort keys %tz )
594
  {
597
  {
595
    print $OUTF "$name : $tz{$name}\n";
598
    print $OUTF "$name : $tz{$name}\n";
596
  }
599
  }
597
  close $OUTF;
600
  close $OUTF;
598
}    # write_data
601
}    # write_data
599
602
600
sub display_results
603
sub display_results
601
{
604
{
602
  #################### DISPLAY RESULTS #####################
605
  #################### DISPLAY RESULTS #####################
603
  println( "=" x 76 );
606
  println( "=" x 76 );
604
  printf "%s\n",
607
  printf "%s\n",
605
    centred(
608
    centred(
606
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
609
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
607
    76 );
610
    76 );
608
  println( "=" x 76 );
611
  println( "=" x 76 );
609
  printf "%s\n",
612
  printf "%s\n",
610
    centred(
613
    centred(
611
    __(
614
    __(
612
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
615
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
613
    ),
616
    ),
614
    76
617
    76
615
    );
618
    );
616
  print "\n\n";
619
  print "\n\n";
617
  printf __"Total posts considered: %s over %d days\n",
620
  printf __"Total posts considered: %s over %d days\n",
618
    $formatter->format_number($totalposts),
621
    $formatter->format_number($totalposts),
619
    $formatter->format_number($numdays);
622
    $formatter->format_number($numdays);
620
  my $time_locale       = setlocale(LC_TIME);
623
  my $time_locale       = setlocale(LC_TIME);
621
  my $earliest_datetime = DateTime->from_epoch(
624
  my $earliest_datetime = DateTime->from_epoch(
622
    epoch     => $earliest,
625
    epoch     => $earliest,
623
    locale    => $time_locale,
626
    locale    => $time_locale,
624
    time_zone => 'UTC',
627
    time_zone => 'UTC',
625
  );
628
  );
626
  my $latest_datetime = DateTime->from_epoch(
629
  my $latest_datetime = DateTime->from_epoch(
627
    epoch     => $latest,
630
    epoch     => $latest,
628
    locale    => $time_locale,
631
    locale    => $time_locale,
629
    time_zone => 'UTC',
632
    time_zone => 'UTC',
630
  );
633
  );
631
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
634
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
632
  printf __"Earliest article: %s\n",
635
  printf __"Earliest article: %s\n",
633
    $earliest_datetime->strftime($datetime_format);
636
    $earliest_datetime->strftime($datetime_format);
634
  printf __"Latest article:   %s\n",
637
  printf __"Latest article:   %s\n",
635
    $latest_datetime->strftime($datetime_format);
638
    $latest_datetime->strftime($datetime_format);
636
  printf __"Original articles: %s; replies: %s\n",
639
  printf __"Original articles: %s; replies: %s\n",
637
    $formatter->format_number($origposts),
640
    $formatter->format_number($origposts),
638
    $formatter->format_number($replies);
641
    $formatter->format_number($replies);
639
  printf __"Total size of posts: %s bytes (%s)" . "\n",
642
  printf __"Total size of posts: %s bytes (%s)" . "\n",
640
    $formatter->format_number($totsize),
643
    $formatter->format_number($totsize),
641
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
644
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
642
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
645
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
643
    $formatter->format_number( int( $totalposts / $numdays ) ),
646
    $formatter->format_number( int( $totalposts / $numdays ) ),
644
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
647
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
645
    $formatter->format_number( int( $totsize / $totalposts ) );
648
    $formatter->format_number( int( $totsize / $totalposts ) );
646
649
647
  my $count = keys %data;
650
  my $count = keys %data;
648
  printf __"Total headers: %s; bodies: %s\n",
651
  printf __"Total headers: %s; bodies: %s\n",
649
    $formatter->format_bytes(
652
    $formatter->format_bytes(
650
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
653
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
651
    ),
654
    ),
652
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
655
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
653
  printf __
656
  printf __
654
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
657
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
655
    $formatter->format_bytes(
658
    $formatter->format_bytes(
656
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
659
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
657
    ),
660
    ),
658
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
661
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
659
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
662
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
660
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
663
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
661
  printf __"Total number of posters: %s, average %s per poster\n",
664
  printf __"Total number of posters: %s, average %s per poster\n",
662
    $formatter->format_number($count),
665
    $formatter->format_number($count),
663
    $formatter->format_bytes( $totsize / $count,
666
    $formatter->format_bytes( $totsize / $count,
664
    ( 'precision' => 1, 'mode' => 'iec' ) );
667
    ( 'precision' => 1, 'mode' => 'iec' ) );
665
  $count = keys %threads;
668
  $count = keys %threads;
666
  printf __"Total number of threads: %s, average %s per thread\n",
669
  printf __"Total number of threads: %s, average %s per thread\n",
667
    $formatter->format_number($count),
670
    $formatter->format_number($count),
668
    $formatter->format_bytes( $totsize / $count,
671
    $formatter->format_bytes( $totsize / $count,
669
    ( 'precision' => 1, 'mode' => 'iec' ) );
672
    ( 'precision' => 1, 'mode' => 'iec' ) );
670
  printf __"Total number of user agents: %d\n",
673
  printf __"Total number of user agents: %d\n",
671
    $formatter->format_number( scalar keys %agents );
674
    $formatter->format_number( scalar keys %agents );
672
  print "\n", "=" x 76, "\n"  ;
675
  print "\n", "=" x 76, "\n"  ;
673
  ########################################
676
  ########################################
674
  ## Show posters by article count  Sec 1;
677
  ## Show posters by article count  Sec 1;
675
  ########################################
678
  ########################################
676
  unless ( $skipSec{1} )
679
  unless ( $skipSec{1} )
677
  {
680
  {
678
    if ( keys %data < $topposters )
681
    if ( keys %data < $topposters )
679
    {
682
    {
680
      $count = keys %data;
683
      $count = keys %data;
681
    }
684
    }
682
    else
685
    else
683
    {
686
    {
684
      $count = $topposters;
687
      $count = $topposters;
685
    }
688
    }
686
    printf "%s\n",
689
    printf "%s\n",
687
      centred(
690
      centred(
688
      __x( "Top {count} posters by number of articles", count => $topposters ),
691
      __x( "Top {count} posters by number of articles", count => $topposters ),
689
      76
692
      76
690
      );
693
      );
691
    print "=" x 76, "\n";
694
    print "=" x 76, "\n";
692
    my $i = 0;
695
    my $i = 0;
693
    foreach
696
    foreach
694
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
697
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
695
    {
698
    {
696
      my $name = substr( $poster, 0, 65 );
699
      my $name = substr( $poster, 0, 65 );
697
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
700
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
698
        $data{$poster}{count};
701
        $data{$poster}{count};
699
      last if ( ++$i == $count );
702
      last if ( ++$i == $count );
700
    }
703
    }
701
    print "\n", "=" x 76, "\n";
704
    print "\n", "=" x 76, "\n";
702
  }
705
  }
703
706
704
  ######################################
707
  ######################################
705
  ## Show posters by size in KiB  Sec 2;
708
  ## Show posters by size in KiB  Sec 2;
706
  ######################################
709
  ######################################
707
  unless ( $skipSec{2} )
710
  unless ( $skipSec{2} )
708
  {
711
  {
709
    if ( keys %data < $topposters )
712
    if ( keys %data < $topposters )
710
    {
713
    {
711
      $count = keys %data;
714
      $count = keys %data;
712
    }
715
    }
713
    else
716
    else
714
    {
717
    {
715
      $count = $topposters;
718
      $count = $topposters;
716
    }
719
    }
717
    printf "%s\n",
720
    printf "%s\n",
718
      centred(
721
      centred(
719
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
722
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
720
      76
723
      76
721
      );
724
      );
722
    print "=" x 76, "\n";
725
    print "=" x 76, "\n";
723
    my $i = 0;
726
    my $i = 0;
724
    foreach
727
    foreach
725
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
728
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
726
    {
729
    {
727
      my $name = substr( $poster, 0, 62 );
730
      my $name = substr( $poster, 0, 62 );
728
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
731
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
729
        $data{$poster}{size} / 1024;    #/
732
        $data{$poster}{size} / 1024;    #/
730
      last if ( ++$i == $count );
733
      last if ( ++$i == $count );
731
    }
734
    }
732
    print "\n", "=" x 76, "\n";
735
    print "\n", "=" x 76, "\n";
733
  }
736
  }
734
737
735
  #####################################
738
  #####################################
736
  ## Show top posters for original text
739
  ## Show top posters for original text
737
  #####################################
740
  #####################################
738
  unless ( $skipSec{3} )
741
  unless ( $skipSec{3} )
739
  {
742
  {
740
    if ( keys %data < $topposters )
743
    if ( keys %data < $topposters )
741
    {
744
    {
742
      $count = keys %data;
745
      $count = keys %data;
743
    }
746
    }
744
    else
747
    else
745
    {
748
    {
746
      $count = $topposters;
749
      $count = $topposters;
747
    }
750
    }
748
    printf "%s\n",
751
    printf "%s\n",
749
      centred(
752
      centred(
750
      __x(
753
      __x(
751
        "Top {count} responders by original text (> 5 posts)",
754
        "Top {count} responders by original text (> 5 posts)",
752
        count => $topposters
755
        count => $topposters
753
      ),
756
      ),
754
      76
757
      76
755
      );
758
      );
756
    print "=" x 76, "\n";
759
    print "=" x 76, "\n";
757
    my $i = 0;
760
    my $i = 0;
758
    foreach my $poster (
761
    foreach my $poster (
759
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
762
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
760
      keys %data
763
      keys %data
761
      )
764
      )
762
    {
765
    {
763
      next if $data{$poster}{quoted} == 0;
766
      next if $data{$poster}{quoted} == 0;
764
      next if $data{$poster}{count} < 5;
767
      next if $data{$poster}{count} < 5;
765
      my $name = substr( $poster, 0, 63 );
768
      my $name = substr( $poster, 0, 63 );
766
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
769
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
767
        $data{$poster}{percent};
770
        $data{$poster}{percent};
768
      last if ( ++$i == $count );
771
      last if ( ++$i == $count );
769
    }
772
    }
770
    print "\n", "=" x 76, "\n";
773
    print "\n", "=" x 76, "\n";
771
  }
774
  }
772
775
773
  ########################################
776
  ########################################
774
  ## Show bottom posters for original text
777
  ## Show bottom posters for original text
775
  ########################################
778
  ########################################
776
  unless ( $skipSec{4} )
779
  unless ( $skipSec{4} )
777
  {
780
  {
778
    if ( keys %data < $topposters )
781
    if ( keys %data < $topposters )
779
    {
782
    {
780
      $count = keys %data;
783
      $count = keys %data;
781
    }
784
    }
782
    else
785
    else
783
    {
786
    {
784
      $count = $topposters;
787
      $count = $topposters;
785
    }
788
    }
786
    printf "%s\n",
789
    printf "%s\n",
787
      centred(
790
      centred(
788
      __x(
791
      __x(
789
        "Bottom {count} responders by original text  (> 5 posts)",
792
        "Bottom {count} responders by original text  (> 5 posts)",
790
        count => $topposters
793
        count => $topposters
791
      ),
794
      ),
792
      76
795
      76
793
      );
796
      );
794
    print "=" x 76, "\n";
797
    print "=" x 76, "\n";
795
    my $i = 0;
798
    my $i = 0;
796
    foreach my $poster (
799
    foreach my $poster (
797
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
800
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
798
      keys %data
801
      keys %data
799
      )
802
      )
800
    {
803
    {
801
      next if $data{$poster}{quoted} == 0;
804
      next if $data{$poster}{quoted} == 0;
802
      next if $data{$poster}{count} < 5;
805
      next if $data{$poster}{count} < 5;
803
      my $name = substr( $poster, 0, 63 );
806
      my $name = substr( $poster, 0, 63 );
804
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
807
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
805
        $data{$poster}{percent};
808
        $data{$poster}{percent};
806
      last if ( ++$i == $count );
809
      last if ( ++$i == $count );
807
    }
810
    }
808
    print "\n", "=" x 76, "\n";
811
    print "\n", "=" x 76, "\n";
809
  }
812
  }
810
813
811
  #####################################
814
  #####################################
812
  ## Show threads by number of articles
815
  ## Show threads by number of articles
813
  #####################################
816
  #####################################
814
  unless ( $skipSec{5} )
817
  unless ( $skipSec{5} )
815
  {
818
  {
816
    if ( keys %threads < $topthreads )
819
    if ( keys %threads < $topthreads )
817
    {
820
    {
818
      $count = keys %threads;
821
      $count = keys %threads;
819
    }
822
    }
820
    else
823
    else
821
    {
824
    {
822
      $count = $topthreads;
825
      $count = $topthreads;
823
    }
826
    }
824
    printf "%s\n",
827
    printf "%s\n",
825
      centred(
828
      centred(
826
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
829
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
827
      76 );
830
      76 );
828
    print "=" x 76, "\n";
831
    print "=" x 76, "\n";
829
    my $i = 0;
832
    my $i = 0;
830
    foreach my $thread (
833
    foreach my $thread (
831
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
834
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
832
      keys %threads
835
      keys %threads
833
      )
836
      )
834
    {
837
    {
835
      my $name = substr( $thread, 0, 65 );
838
      my $name = substr( $thread, 0, 65 );
836
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
839
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
837
        $threads{$thread}{'count'};
840
        $threads{$thread}{'count'};
838
      last if ( ++$i == $count );
841
      last if ( ++$i == $count );
839
    }
842
    }
840
    print "\n", "=" x 76, "\n";
843
    print "\n", "=" x 76, "\n";
841
  }
844
  }
842
845
843
  ##############################
846
  ##############################
844
  ## Show threads by size in KiB
847
  ## Show threads by size in KiB
845
  ##############################
848
  ##############################
846
  unless ( $skipSec{6} )
849
  unless ( $skipSec{6} )
847
  {
850
  {
848
    if ( keys %threads < $topthreads )
851
    if ( keys %threads < $topthreads )
849
    {
852
    {
850
      $count = keys %threads;
853
      $count = keys %threads;
851
    }
854
    }
852
    else
855
    else
853
    {
856
    {
854
      $count = $topthreads;
857
      $count = $topthreads;
855
    }
858
    }
856
    printf "%s\n",
859
    printf "%s\n",
857
      centred(
860
      centred(
858
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
861
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
859
    print "=" x 76, "\n";
862
    print "=" x 76, "\n";
860
    my $i = 0;
863
    my $i = 0;
861
    foreach my $thread (
864
    foreach my $thread (
862
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
865
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
863
      keys %threads
866
      keys %threads
864
      )
867
      )
865
    {
868
    {
866
      my $name = substr( $thread, 0, 65 );
869
      my $name = substr( $thread, 0, 65 );
867
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
870
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
868
        $threads{$thread}{'size'} / 1024;    #/
871
        $threads{$thread}{'size'} / 1024;    #/
869
      last if ( ++$i == $count );
872
      last if ( ++$i == $count );
870
    }
873
    }
871
    print "\n", "=" x 76, "\n";
874
    print "\n", "=" x 76, "\n";
872
  }
875
  }
873
876
874
  ##################################
877
  ##################################
875
  ## Show top 10 cross-posted groups
878
  ## Show top 10 cross-posted groups
876
  ##################################
879
  ##################################
877
  unless ( $skipSec{7} )
880
  unless ( $skipSec{7} )
878
  {
881
  {
879
    delete $crossposts{"$newsgroup_name"};    # don't include ours
882
    delete $crossposts{"$newsgroup_name"};    # don't include ours
880
    if ( keys %crossposts < $topcrossposts )
883
    if ( keys %crossposts < $topcrossposts )
881
    {
884
    {
882
      $count = keys %crossposts;
885
      $count = keys %crossposts;
883
    }
886
    }
884
    else
887
    else
885
    {
888
    {
886
      $count = $topcrossposts;
889
      $count = $topcrossposts;
887
    }
890
    }
888
    printf "%s\n",
891
    printf "%s\n",
889
      centred(
892
      centred(
890
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
893
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
891
    print "=" x 76, "\n";
894
    print "=" x 76, "\n";
892
    my $i = 0;
895
    my $i = 0;
893
    foreach
896
    foreach
894
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
897
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
895
    {
898
    {
896
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
899
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
897
        $crossposts{$name};
900
        $crossposts{$name};
898
      last if ( ++$i == $count );
901
      last if ( ++$i == $count );
899
    }
902
    }
900
    print "\n", "=" x 76, "\n";
903
    print "\n", "=" x 76, "\n";
901
  }
904
  }
902
905
903
  #########################
906
  #########################
904
  ## Show agents and counts
907
  ## Show agents and counts
905
  #########################
908
  #########################
906
  unless ( $skipSec{8} )
909
  unless ( $skipSec{8} )
907
  {
910
  {
908
    if ( keys %agents < $topagents )
911
    if ( keys %agents < $topagents )
909
    {
912
    {
910
      $count = keys %agents;
913
      $count = keys %agents;
911
    }
914
    }
912
    else
915
    else
913
    {
916
    {
914
      $count = $topagents;
917
      $count = $topagents;
915
    }
918
    }
916
    printf "%s\n",
919
    printf "%s\n",
917
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
920
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
918
      76 );
921
      76 );
919
    print "=" x 76, "\n";
922
    print "=" x 76, "\n";
920
    my $i = 0;
923
    my $i = 0;
921
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
924
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
922
    {
925
    {
923
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
926
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
924
        $agents{$agent};
927
        $agents{$agent};
925
      last if ( ++$i == $count );
928
      last if ( ++$i == $count );
926
    }
929
    }
927
    print "\n", "=" x 76, "\n";
930
    print "\n", "=" x 76, "\n";
928
  }
931
  }
929
932
930
  #######################
933
  #######################
931
  ## Show distinct agents
934
  ## Show distinct agents
932
  #######################
935
  #######################
933
  unless ( $skipSec{9} )
936
  unless ( $skipSec{9} )
934
  {
937
  {
935
    if ( keys %distinct_agent < $topagents )
938
    if ( keys %distinct_agent < $topagents )
936
    {
939
    {
937
      $count = keys %distinct_agent;
940
      $count = keys %distinct_agent;
938
    }
941
    }
939
    else
942
    else
940
    {
943
    {
941
      $count = $topagents;
944
      $count = $topagents;
942
    }
945
    }
943
    printf "%s\n",
946
    printf "%s\n",
944
      centred(
947
      centred(
945
      __x( "Top {count} user agents by number of posts", count => $topagents ),
948
      __x( "Top {count} user agents by number of posts", count => $topagents ),
946
      76
949
      76
947
      );
950
      );
948
    print "=" x 76, "\n";
951
    print "=" x 76, "\n";
949
    my $i = 0;
952
    my $i = 0;
950
    foreach my $agent (
953
    foreach my $agent (
951
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
954
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
952
      keys %distinct_agent
955
      keys %distinct_agent
953
      )
956
      )
954
    {
957
    {
955
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
958
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
956
        $distinct_agent{$agent},
959
        $distinct_agent{$agent},
957
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
960
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
958
      last if ( ++$i == $count );
961
      last if ( ++$i == $count );
959
    }
962
    }
960
    print "\n", "=" x 76, "\n";
963
    print "\n", "=" x 76, "\n";
961
  }
964
  }
962
965
963
  ############################
966
  ############################
964
  ## Show timezones and counts
967
  ## Show timezones and counts
965
  ############################
968
  ############################
966
  unless ( $skipSec{10} )
969
  unless ( $skipSec{10} )
967
  {
970
  {
968
    if ( keys %tz < $toptz )
971
    if ( keys %tz < $toptz )
969
    {
972
    {
970
      $count = keys %tz;
973
      $count = keys %tz;
971
    }
974
    }
972
    else
975
    else
973
    {
976
    {
974
      $count = $toptz;
977
      $count = $toptz;
975
    }
978
    }
976
    printf "%s\n",
979
    printf "%s\n",
977
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
980
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
978
    print "=" x 76, "\n";
981
    print "=" x 76, "\n";
979
    my $i = 0;
982
    my $i = 0;
980
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
983
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
981
    {
984
    {
982
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
985
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
983
      last if ( ++$i == $count );
986
      last if ( ++$i == $count );
984
    }
987
    }
985
    print "\n", "=" x 76, "\n";
988
    print "\n", "=" x 76, "\n";
986
  }
989
  }
987
}
990
}
988
991
989
## helper subs
992
## helper subs
990
993
991
###############################
994
###############################
992
## Right pad a string with '.'s
995
## Right pad a string with '.'s
993
###############################
996
###############################
994
sub rpad
997
sub rpad
995
{
998
{
996
  ## Get text to pad, length to pad, pad chr
999
  ## Get text to pad, length to pad, pad chr
997
  my ( $text, $pad_len, $pad_chr ) = @_;
1000
  my ( $text, $pad_len, $pad_chr ) = @_;
998
1001
999
  ## DEBUG
1002
  ## DEBUG
1000
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
1003
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
1001
1004
1002
  if ( length($text) > $pad_len )
1005
  if ( length($text) > $pad_len )
1003
  {
1006
  {
1004
    $text = substr( $text, 0, $pad_len );
1007
    $text = substr( $text, 0, $pad_len );
1005
  }
1008
  }
1006
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1009
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1007
  return $padded;
1010
  return $padded;
1008
}
1011
}
1009
1012
1010
##################
1013
##################
1011
## Centre a string
1014
## Centre a string
1012
##################
1015
##################
1013
sub centred
1016
sub centred
1014
{
1017
{
1015
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1018
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1016
  my $pad_len = ( $width - length($text) ) / 2;    #/
1019
  my $pad_len = ( $width - length($text) ) / 2;    #/
1017
  my $centred = " " x $pad_len . $text;
1020
  my $centred = " " x $pad_len . $text;
1018
  return $centred;
1021
  return $centred;
1019
}
1022
}
1020
1023
1021
###########################
1024
###########################
1022
## Put commas into a number
1025
## Put commas into a number
1023
###########################
1026
###########################
1024
sub commify
1027
sub commify
1025
{
1028
{
1026
  local $_ = shift;
1029
  local $_ = shift;
1027
  my $number = $_;
1030
  my $number = $_;
1028
  $_ = int;                                        # Chop non-integer part
1031
  $_ = int;                                        # Chop non-integer part
1029
  1 while
1032
  1 while
1030
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1033
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1031
  my $int_part  = $_;
1034
  my $int_part  = $_;
1032
  my $real_part = '';
1035
  my $real_part = '';
1033
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1036
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1034
  {
1037
  {
1035
    $real_part = $1;
1038
    $real_part = $1;
1036
  }
1039
  }
1037
  return $int_part . $real_part;
1040
  return $int_part . $real_part;
1038
}
1041
}
1039
1042
1040
################################################################
1043
################################################################
1041
## Returns a string with leading and trailing whitespace removed
1044
## Returns a string with leading and trailing whitespace removed
1042
################################################################
1045
################################################################
1043
sub clean
1046
sub clean
1044
{
1047
{
1045
  my $dirty = shift;
1048
  my $dirty = shift;
1046
  my $clean = $dirty;
1049
  my $clean = $dirty;
1047
  $clean =~ s/^\s+|\s+$//g;
1050
  $clean =~ s/^\s+|\s+$//g;
1048
1051
1049
  return $clean;
1052
  return $clean;
1050
}
1053
}
1051
1054
1052
sub usage
1055
sub usage
1053
{
1056
{
1054
  println( __ "usage: newsstat.pl NEWS.GROUP" );
1057
  println( __ "usage: newsstat.pl NEWS.GROUP" );
1055
  exit 1;
1058
  exit 1;
1056
}
1059
}
1057
1060
1058
sub dmsg
1061
sub dmsg
1059
{
1062
{
1060
  print STDERR @_, "\n";
1063
  print STDERR @_, "\n";
1061
}
1064
}
1062
1065
1063
sub dmsg2
1066
sub dmsg2
1064
{
1067
{
1065
  my ( $level, @msg ) = @_;
1068
  my ( $level, @msg ) = @_;
1066
  print STDERR @msg, "\n" if $level >= DEBUG;
1069
  print STDERR @msg, "\n" if $level >= DEBUG;
1067
}
1070
}
1068
1071
1069
sub println
1072
sub println
1070
{
1073
{
1071
  print @_, "\n";
1074
  print @_, "\n";
1072
}
1075
}
1073
 
1076