Subversion Repositories LCARS

Rev

Rev 24 | Rev 30 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 24 Rev 25
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
require 5.004;
4
require 5.004;
5
5
6
#use diagnostics;
6
#use diagnostics;
7
use utf8;
7
use utf8;
8
use Encode;
8
use Encode;
9
9
10
use constant DEBUG => 0;
10
use constant DEBUG => 0;
11
11
12
## newsstat.pl
12
## newsstat.pl
13
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
13
## Copyright (C) 2011, 2012  Thomas Lahn <startrek@PointedEars.de>
14
## Based on work by Garry Knight et al.
14
## Based on work by Garry Knight et al.
15
## 
15
## 
16
## This program is free software: you can redistribute it and/or modify
16
## This program is free software: you can redistribute it and/or modify
17
## it under the terms of the GNU General Public License as published by
17
## it under the terms of the GNU General Public License as published by
18
## the Free Software Foundation, either version 3 of the License, or
18
## the Free Software Foundation, either version 3 of the License, or
19
## (at your option) any later version.
19
## (at your option) any later version.
20
## 
20
## 
21
## This program is distributed in the hope that it will be useful,
21
## This program is distributed in the hope that it will be useful,
22
## but WITHOUT ANY WARRANTY; without even the implied warranty of
22
## but WITHOUT ANY WARRANTY; without even the implied warranty of
23
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
## GNU General Public License for more details.
24
## GNU General Public License for more details.
25
## 
25
## 
26
## You should have received a copy of the GNU General Public License
26
## You should have received a copy of the GNU General Public License
27
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
27
## along with this program.  If not, see <http://www.gnu.org/licenses/>.
28
28
29
## Print out all text to STDOUT UTF-8 encoded
29
## Print out all text to STDOUT UTF-8 encoded
30
binmode STDOUT, ':encoding(UTF-8)';
30
binmode STDOUT, ':encoding(UTF-8)';
31
binmode STDERR, ':encoding(UTF-8)';
31
binmode STDERR, ':encoding(UTF-8)';
32
32
33
## L10n
33
## L10n
34
use locale ':not_characters';
34
use locale ':not_characters';
35
35
36
# setlocale( LC_MESSAGES, '' );
36
# setlocale( LC_MESSAGES, '' );
37
require Number::Format;
37
require Number::Format;
38
38
39
## i18n
39
## i18n
40
## FIXME: Automatically include resolved '.' in @INC
40
## FIXME: Automatically include resolved '.' in @INC
41
# print join "\n", @INC;
41
# print join "\n", @INC;
42
42
43
use Locale::TextDomain ('de.pointedears.newsstat');
43
use Locale::TextDomain ('de.pointedears.newsstat');
44
use POSIX              ('locale_h');
44
use POSIX              ('locale_h');
45
use Locale::Messages qw (bind_textdomain_filter
45
use Locale::Messages qw (bind_textdomain_filter
46
  bind_textdomain_codeset
46
  bind_textdomain_codeset
47
  turn_utf_8_on);
47
  turn_utf_8_on);
48
48
49
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
49
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
50
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
50
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
51
51
52
require Mail::Message;
52
require Mail::Message;
53
require DateTime;
53
require DateTime;
54
require DateTime::Format::Mail;
54
require DateTime::Format::Mail;
55
55
56
# See comments in previous example
56
# See comments in previous example
57
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
57
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
58
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
58
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
59
  'decimal_point' };
59
  'decimal_point' };
60
60
61
# Apply defaults if values are missing
61
# Apply defaults if values are missing
62
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
62
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
63
$thousands_sep = ' ' unless $thousands_sep;
63
$thousands_sep = ' ' unless $thousands_sep;
64
64
65
# grouping and mon_grouping are packed lists
65
# grouping and mon_grouping are packed lists
66
# of small integers (characters) telling the
66
# of small integers (characters) telling the
67
# grouping (thousand_seps and mon_thousand_seps
67
# grouping (thousand_seps and mon_thousand_seps
68
# being the group dividers) of numbers and
68
# being the group dividers) of numbers and
69
# monetary quantities.  The integers' meanings:
69
# monetary quantities.  The integers' meanings:
70
# 255 means no more grouping, 0 means repeat
70
# 255 means no more grouping, 0 means repeat
71
# the previous grouping, 1-254 means use that
71
# the previous grouping, 1-254 means use that
72
# as the current grouping.  Grouping goes from
72
# as the current grouping.  Grouping goes from
73
# right to left (low to high digits).  In the
73
# right to left (low to high digits).  In the
74
# below we cheat slightly by never using anything
74
# below we cheat slightly by never using anything
75
# else than the first grouping (whatever that is).
75
# else than the first grouping (whatever that is).
76
my @grouping;
76
my @grouping;
77
if ($grouping)
77
if ($grouping)
78
{
78
{
79
  @grouping = unpack( "C*", $grouping );
79
  @grouping = unpack( "C*", $grouping );
80
}
80
}
81
else
81
else
82
{
82
{
83
  @grouping = (3);
83
  @grouping = (3);
84
}
84
}
85
85
86
## FIXME: Why don't the defaults work already?
86
## FIXME: Why don't the defaults work already?
87
my $formatter = new Number::Format(
87
my $formatter = new Number::Format(
88
  -decimal_point => $decimal_point,
88
  -decimal_point => $decimal_point,
89
  -thousands_sep => $thousands_sep,
89
  -thousands_sep => $thousands_sep,
90
90
91
  # -grouping      => $grouping[0]
91
  # -grouping      => $grouping[0]
92
);
92
);
93
93
94
###################### USER CONFIGURATIONS ############################
94
###################### USER CONFIGURATIONS ############################
95
95
96
## The name of the group to do stats for
96
## The name of the group to do stats for
97
my $newsgroup_name = $ARGV[0];
97
my $newsgroup_name = $ARGV[0];
98
$newsgroup_name or usage();
98
$newsgroup_name or usage();
99
99
100
## Check for removal flags
100
## Check for removal flags
101
my $ix;
101
my $ix;
102
my $j;
102
my $j;
103
my %skipSec;
103
my %skipSec;
104
my @skiplist;
104
my @skiplist;
105
my $args = @ARGV;
105
my $args = @ARGV;
106
for ( $ix = 1 ; $ix < $args ; $ix++ )
106
for ( $ix = 1 ; $ix < $args ; $ix++ )
107
{
107
{
108
  $j = $ix + 1;
108
  $j = $ix + 1;
109
  if ( $ARGV[$ix] eq "-x" )
109
  if ( $ARGV[$ix] eq "-x" )
110
  {
110
  {
111
    @skiplist = split( ",", $ARGV[$j] );
111
    @skiplist = split( ",", $ARGV[$j] );
112
  }
112
  }
113
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
113
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
114
  {
114
  {
115
    @skiplist = split( ",", $1 );
115
    @skiplist = split( ",", $1 );
116
  }
116
  }
117
}
117
}
118
foreach (@skiplist)
118
foreach (@skiplist)
119
{
119
{
120
  $skipSec{$_} = 1;
120
  $skipSec{$_} = 1;
121
}
121
}
122
122
123
## Leafnode users will want /var/spool/news for this variable.
123
## Leafnode users will want /var/spool/news for this variable.
124
my $news = "/var/spool/news/";
124
my $news = "/var/spool/news/";
125
125
126
## Number of top or bottom posters to show
126
## Number of top or bottom posters to show
127
my $topposters = 20;
127
my $topposters = 20;
128
128
129
## Number of threads we want to know about
129
## Number of threads we want to know about
130
my $topthreads = 20;
130
my $topthreads = 20;
131
131
132
## Number of cross-posted threads to show
132
## Number of cross-posted threads to show
133
my $topcrossposts = 10;
133
my $topcrossposts = 10;
134
134
135
## Number of agents we list
135
## Number of agents we list
136
my $topagents = 10;
136
my $topagents = 10;
137
137
138
## Number of time zones to show
138
## Number of time zones to show
139
my $toptz = 10;
139
my $toptz = 10;
140
140
141
###################### DATA STRUCTURES ######################
141
###################### DATA STRUCTURES ######################
142
my $group = $newsgroup_name;
142
my $group = $newsgroup_name;
143
$group =~ s!\.!/!g;
143
$group =~ s!\.!/!g;
144
my %data;    # name, count, agent, total, orig, quoted
144
my %data;    # name, count, agent, total, orig, quoted
145
my $totsize = 0;    # holds total sizes of all files
145
my $totsize = 0;    # holds total sizes of all files
146
my %crossposts;     # group, count
146
my %crossposts;     # group, count
147
my %threads;        # subject, count
147
my %threads;        # subject, count
148
my $replies   = 0;  # total no. of replies
148
my $replies   = 0;  # total no. of replies
149
my $origposts = 0;  # total no. of original posts
149
my $origposts = 0;  # total no. of original posts
150
my %tz;             # timezones by count
150
my %tz;             # timezones by count
151
my $earliest;       # earliest article we have found
151
my $earliest;       # earliest article we have found
152
my $latest;         # latest article we have found
152
my $latest;         # latest article we have found
153
my $totheader = 0;  # total size of header material
153
my $totheader = 0;  # total size of header material
154
my $totbody   = 0;  # total size of body material
154
my $totbody   = 0;  # total size of body material
155
my $totsig    = 0;  # total size of sig material
155
my $totsig    = 0;  # total size of sig material
156
my $totquoted = 0;  # total size of quoted material
156
my $totquoted = 0;  # total size of quoted material
157
my $totorig   = 0;  # total size of original material
157
my $totorig   = 0;  # total size of original material
158
my $totalposts;     # total no. of posts considered
158
my $totalposts;     # total no. of posts considered
159
my %distinct_agent;
159
my %distinct_agent;
160
160
161
## Used to hold counts of User Agents used
161
## Used to hold counts of User Agents used
162
my %agents = (
162
my %agents = (
163
  "Compuserver"               => 0,
163
  "Compuserver"               => 0,
164
  "Foorum"                    => 0,
164
  "Foorum"                    => 0,
165
  "Forte Agent"               => 0,
165
  "Forte Agent"               => 0,
166
  "Forte Free Agent"          => 0,
166
  "Forte Free Agent"          => 0,
167
  "Gnus"                      => 0,
167
  "Gnus"                      => 0,
168
  "KNode"                     => 0,
168
  "KNode"                     => 0,
169
  "MacSOUP"                   => 0,
169
  "MacSOUP"                   => 0,
170
  "MT-NewsWatcher"            => 0,
170
  "MT-NewsWatcher"            => 0,
171
  "MicroPlanet Gravity"       => 0,
171
  "MicroPlanet Gravity"       => 0,
172
  "Microsoft Outlook Express" => 0,
172
  "Microsoft Outlook Express" => 0,
173
  "Microsoft Windows Mail"    => 0,
173
  "Microsoft Windows Mail"    => 0,
174
  "Mozilla"                   => 0,
174
  "Mozilla"                   => 0,
175
  "News Rover"                => 0,
175
  "News Rover"                => 0,
176
  "NN"                        => 0,
176
  "NN"                        => 0,
177
  "Pan"                       => 0,
177
  "Pan"                       => 0,
178
  "rn"                        => 0,
178
  "rn"                        => 0,
179
  "slrn"                      => 0,
179
  "slrn"                      => 0,
180
  "Sylpheed"                  => 0,
180
  "Sylpheed"                  => 0,
181
  "tin"                       => 0,
181
  "tin"                       => 0,
182
  "VSoup"                     => 0,
182
  "VSoup"                     => 0,
183
  "WebTV"                     => 0,
183
  "WebTV"                     => 0,
184
  "Xnews"                     => 0,
184
  "Xnews"                     => 0,
185
);
185
);
186
186
187
my $datetime_parser = DateTime::Format::Mail->new();
187
my $datetime_parser = DateTime::Format::Mail->new();
188
$datetime_parser->loose();
188
$datetime_parser->loose();
189
189
190
my $today = DateTime->today( time_zone => 'UTC' );
190
my $today = DateTime->today( time_zone => 'UTC' );
191
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
191
my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1);
192
my $start      = int $prev_month->strftime('%s');
192
my $start      = int $prev_month->strftime('%s');
193
my $numdays    = int DateTime->last_day_of_month(
193
my $numdays    = int DateTime->last_day_of_month(
194
  year      => $prev_month->year(),
194
  year      => $prev_month->year(),
195
  month     => $prev_month->month(),
195
  month     => $prev_month->month(),
196
  time_zone => $prev_month->time_zone(),
196
  time_zone => $prev_month->time_zone(),
197
)->day();
197
)->day();
198
my $end = int $today->clone()->set_day(1)->strftime('%s');
198
my $end = int $today->clone()->set_day(1)->strftime('%s');
199
199
200
dmsg( $start, " to ", $end ) if DEBUG;
200
dmsg( $start, " to ", $end ) if DEBUG;
201
201
202
chdir("$news$group")
202
chdir("$news$group")
203
  or die __x(
203
  or die __x(
204
  "Can't cd to {newsgroup}: {error}\n",
204
  "Can't cd to {newsgroup}: {error}\n",
205
  newsgroup => "$news$group",
205
  newsgroup => "$news$group",
206
  error     => $!
206
  error     => $!
207
  );
207
  );
208
opendir( DIR, "." )
208
opendir( DIR, "." )
209
  or die __x(
209
  or die __x(
210
  "Can't open {newsgroup}: {error}\n",
210
  "Can't open {newsgroup}: {error}\n",
211
  newsgroup => "$news$group",
211
  newsgroup => "$news$group",
212
  error     => $!
212
  error     => $!
213
  );
213
  );
214
214
215
while ( defined( my $filename = readdir(DIR) ) )
215
while ( defined( my $filename = readdir(DIR) ) )
216
{
216
{
217
  next unless -f $filename;    # only want real files
217
  next unless -f $filename;    # only want real files
218
  next if ( $filename eq ".overview" );    # real articles only
218
  next if ( $filename eq ".overview" );    # real articles only
219
219
220
  get_article($filename);                  # read in the article
220
  get_article($filename);                  # read in the article
221
}
221
}
222
closedir(DIR);                             # finished with the directory
222
closedir(DIR);                             # finished with the directory
223
223
224
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
224
dmsg("\nearliest: $earliest\nlatest:   $latest") if DEBUG;
225
225
226
## Post-processing
226
## Post-processing
227
count_agents();                            # count agents, collapsing versions
227
count_agents();                            # count agents, collapsing versions
228
fix_percent();
228
fix_percent();
229
229
230
write_data();
230
write_data();
231
display_results();
231
display_results();
232
232
233
########################################
233
########################################
234
## Get current article's header and body
234
## Get current article's header and body
235
########################################
235
########################################
236
sub get_article
236
sub get_article
237
{
237
{
238
  my $filename = shift;
238
  my $filename = shift;
239
239
240
  open( my $FILE, '<', $filename )
240
  open( my $FILE, '<', $filename )
241
    or
241
    or
242
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
242
    die __x( "Can't open {file}: {error}\n", file => $filename, error => $! );
243
  my $msg       = Mail::Message->read($FILE);
243
  my $msg       = Mail::Message->read($FILE);
244
  my $timestamp = $msg->timestamp();
244
  my $timestamp = $msg->timestamp();
245
  my $date      = $msg->study('Date');
245
  my $date      = $msg->study('Date');
246
246
247
  ## Disregard article if timestamp is not in range
247
  ## Disregard article if timestamp is not in range
248
  dmsg($timestamp) if DEBUG;
248
  dmsg($timestamp) if DEBUG;
249
  if ( $timestamp < $start or $timestamp >= $end )
249
  if ( $timestamp < $start or $timestamp >= $end )
250
  {
250
  {
251
    dmsg("Posting on $date ignored.") if DEBUG;
251
    dmsg("Posting on $date ignored.") if DEBUG;
252
    return;
252
    return;
253
  }
253
  }
254
254
255
  $totalposts++;    # bump count of articles considered
255
  $totalposts++;    # bump count of articles considered
256
256
257
  ## DEBUG
257
  ## DEBUG
258
  dmsg($date) if DEBUG;
258
  dmsg($date) if DEBUG;
259
259
260
  ## get stats about the file itself
260
  ## get stats about the file itself
261
  my $filesize = -s $filename;    # get total size of file
261
  my $filesize = -s $filename;    # get total size of file
262
  $totsize += $filesize;          # bump total sizes of all files
262
  $totsize += $filesize;          # bump total sizes of all files
263
263
264
  if ( ( not defined $earliest ) or $timestamp < $earliest )
264
  if ( ( not defined $earliest ) or $timestamp < $earliest )
265
  {
265
  {
266
    $earliest = $timestamp;
266
    $earliest = $timestamp;
267
  }
267
  }
268
  elsif ( ( not defined $latest ) or $timestamp > $latest )
268
  elsif ( ( not defined $latest ) or $timestamp > $latest )
269
  {
269
  {
270
    $latest = $timestamp;
270
    $latest = $timestamp;
271
  }
271
  }
272
272
273
  #print "timestamp: $timestamp\n";
273
  #print "timestamp: $timestamp\n";
274
274
275
  ## count header size
275
  ## count header size
276
  $totheader += $msg->head()->size();
276
  $totheader += $msg->head()->size();
277
277
278
  ## get the poster's name (MIME-decoded, in UTF-8)
278
  ## get the poster's name (MIME-decoded, in UTF-8)
279
  my $poster = $msg->study('From');
279
  my $poster = $msg->study('From');
280
  if ( defined $poster )
280
  if ( defined $poster )
281
  {
281
  {
282
    ## Convert old to new format
282
    ## Convert old to new format
283
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
283
    $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
284
284
285
    ## Collapse whitespace
285
    ## Collapse whitespace
286
    $poster =~ s/\s+/ /g;
286
    $poster =~ s/\s+/ /g;
287
287
288
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
288
    ## Remove outer quotes; TODO: observe RFC 5322 strictly
289
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
289
    $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x;
290
290
291
    ## DEBUG
291
    ## DEBUG
292
    dmsg($poster) if DEBUG;
292
    dmsg($poster) if DEBUG;
293
293
294
    ## seen this one before?
294
    ## seen this one before?
295
    if ( !defined( $data{$poster} ) )
295
    if ( !defined( $data{$poster} ) )
296
    {
296
    {
297
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
297
      $data{$poster}{'agent'}  = __ 'unknown';    # comes after For: field
298
      $data{$poster}{'orig'}   = 0;
298
      $data{$poster}{'orig'}   = 0;
299
      $data{$poster}{'quoted'} = 0;
299
      $data{$poster}{'quoted'} = 0;
300
    }
300
    }
301
    $data{$poster}{'count'}++;                    # bump count for this poster
301
    $data{$poster}{'count'}++;                    # bump count for this poster
302
    $data{$poster}{'size'} += $filesize;          # total size of file
302
    $data{$poster}{'size'} += $filesize;          # total size of file
303
303
304
    ## The User-Agent and/or X-Newsreader fields
304
    ## The User-Agent and/or X-Newsreader fields
305
    ## for User-Agent by poster
305
    ## for User-Agent by poster
306
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
306
    my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader');
307
    if ( defined $ua )
307
    if ( defined $ua )
308
    {
308
    {
309
      $data{$poster}{'agent'} = $ua;
309
      $data{$poster}{'agent'} = $ua;
310
310
311
      ## DEBUG
311
      ## DEBUG
312
      dmsg($ua) if DEBUG;
312
      dmsg($ua) if DEBUG;
313
    }
313
    }
314
314
315
    ## The User Agent for User-Agent by number of posts
315
    ## The User Agent for User-Agent by number of posts
316
    get_agent($msg);
316
    get_agent($msg);
317
317
318
    ## Get all cross-posted newsgroups
318
    ## Get all cross-posted newsgroups
319
    for ( split( /,/, $msg->study('Newsgroups') ) )
319
    for ( split( /,/, $msg->study('Newsgroups') ) )
320
    {
320
    {
321
      $crossposts{$_}++;    # bump count for each
321
      $crossposts{$_}++;    # bump count for each
322
    }
322
    }
323
323
324
    ## Get threads
324
    ## Get threads
325
    my $thread = $msg->study('Subject');
325
    my $thread = $msg->study('Subject');
326
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
326
    $thread =~ s/^re:\s+//i;    # Remove Re: or re: at start
327
    $thread =~ s/\s+/ /g;       # collapse whitespace
327
    $thread =~ s/\s+/ /g;       # collapse whitespace
328
    $threads{$thread}{'count'}++;    # bump count of this subject
328
    $threads{$thread}{'count'}++;    # bump count of this subject
329
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
329
    $threads{$thread}{'size'} += $filesize;    # bump bytes for this thread
330
330
331
    ## Is this an original post or a reply?
331
    ## Is this an original post or a reply?
332
    if ( defined $msg->study('References') )
332
    if ( defined $msg->study('References') )
333
    {
333
    {
334
      $replies++;
334
      $replies++;
335
    }
335
    }
336
    else
336
    else
337
    {
337
    {
338
      $origposts++;
338
      $origposts++;
339
    }
339
    }
340
340
341
    ## Get the time zone
341
    ## Get the time zone
342
    my $datetime = $datetime_parser->parse_datetime($date);
342
    my $datetime = $datetime_parser->parse_datetime($date);
343
    my $tz       = $datetime->strftime('%z');
343
    my $tz       = $datetime->strftime('%z');
344
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
344
    $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o;
345
    $tz{$tz}++;
345
    $tz{$tz}++;
346
346
347
    ## DEBUG
347
    ## DEBUG
348
    dmsg($tz) if DEBUG;
348
    dmsg($tz) if DEBUG;
349
349
350
#### Now analyse the body text ####
350
#### Now analyse the body text ####
351
    my $body = $msg->body();
351
    my $body = $msg->body();
352
352
353
    my $insig = 0;
353
    my $insig = 0;
354
    my @body  = $body->lines;
354
    my @body  = $body->lines;
355
    for (@body)
355
    for (@body)
356
    {
356
    {
357
      $totbody += length($_);    # bump total body size
357
      $totbody += length($_);    # bump total body size
358
      next if (m{^$>}o);         # don't count blank lines in body
358
      next if (m{^$>}o);         # don't count blank lines in body
359
      if ( $insig == 1 )
359
      if ( $insig == 1 )
360
      {
360
      {
361
361
362
        # bump total sig size
362
        # bump total sig size
363
        $totsig += length($_);
363
        $totsig += length($_);
364
      }
364
      }
365
      ## are we in a quote line?
365
      ## are we in a quote line?
366
      ## Bill Unruh uses ] quotes, and another poster uses ::
366
      ## Bill Unruh uses ] quotes, and another poster uses ::
367
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
367
      elsif ( m{^\s*[>\]]}o or m{^\s*::}o )
368
      {
368
      {
369
        ## bump count of quoted chrs
369
        ## bump count of quoted chrs
370
        $data{$poster}{'quoted'} += length($_);
370
        $data{$poster}{'quoted'} += length($_);
371
        $totquoted += length($_);
371
        $totquoted += length($_);
372
      }
372
      }
373
      elsif (/^-- $/)
373
      elsif (/^-- $/)
374
      {
374
      {
375
        $insig = 1;
375
        $insig = 1;
376
      }
376
      }
377
      else
377
      else
378
      {
378
      {
379
        ## We must be processing an original line
379
        ## We must be processing an original line
380
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
380
        $data{$poster}{'orig'} += length($_);    # bump count of original chrs
381
        $totorig += length($_);
381
        $totorig += length($_);
382
      }
382
      }
383
    }
383
    }
384
384
385
    # end for (@body)
385
    # end for (@body)
386
  }
386
  }
387
387
388
  close($FILE);
388
  close($FILE);
389
}
389
}
390
390
391
sub get_agent
391
sub get_agent
392
{
392
{
393
  my $msg = shift;
393
  my $msg = shift;
394
394
395
  my $ua =
395
  my $ua =
396
       $msg->study('User-Agent')
396
       $msg->study('User-Agent')
397
    or $msg->study('X-Newsreader')
397
    or $msg->study('X-Newsreader')
398
    or $msg->study('X-Mailer');
398
    or $msg->study('X-Mailer');
399
  if ( not defined $ua )
399
  if ( not defined $ua )
400
  {
400
  {
401
    my $org = $msg->study('Organization');
401
    my $org = $msg->study('Organization');
402
    if ( defined $org
402
    if ( defined $org
403
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
403
      and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ )
404
    {
404
    {
405
      $ua = $org;
405
      $ua = $org;
406
    }
406
    }
407
    elsif ( $msg->study('Message-ID') =~ /pine/i )
407
    elsif ( $msg->study('Message-ID') =~ /pine/i )
408
    {
408
    {
409
      $ua = "Pine";
409
      $ua = "Pine";
410
    }
410
    }
411
  }
411
  }
412
412
413
  ## Hopefully found UA, else set to unknown
413
  ## Hopefully found UA, else set to unknown
414
  if ( not defined $ua )
414
  if ( not defined $ua )
415
  {
415
  {
416
    $ua = __ "unknown";
416
    $ua = __ "unknown";
417
  }
417
  }
418
418
419
  $ua = clean($ua);
419
  $ua = clean($ua);
420
420
421
  my $raw   = $ua;
421
  my $raw   = $ua;
422
  my $agent = $raw;
422
  my $agent = $raw;
423
423
424
  ## strip http
424
  ## strip http
425
  if ( $raw =~ /.*http.*/ )
425
  if ( $raw =~ /.*http.*/ )
426
  {
426
  {
427
    $raw =~ s!posted via!!i;
427
    $raw =~ s!posted via!!i;
428
    $raw =~ s!http://!!g;
428
    $raw =~ s!http://!!g;
429
    $raw =~ s!/!!g;
429
    $raw =~ s!/!!g;
430
    $raw =~ s! !!g;
430
    $raw =~ s! !!g;
431
  }
431
  }
432
432
433
  ## Fix Outlook from Mac
433
  ## Fix Outlook from Mac
434
  if ( $raw =~ /^microsoft/i )
434
  if ( $raw =~ /^microsoft/i )
435
  {
435
  {
436
    $raw =~ s/-/ /g;
436
    $raw =~ s/-/ /g;
437
  }
437
  }
438
438
439
  ## Pick out the popular agents
439
  ## Pick out the popular agents
440
  if ( $raw =~ /(outlook express)/i
440
  if ( $raw =~ /(outlook express)/i
441
    || $raw =~ /(windows mail)/i
441
    || $raw =~ /(windows mail)/i
442
    || $raw =~ /(microplanet gravity)/i
442
    || $raw =~ /(microplanet gravity)/i
443
    || $raw =~ /(news rover)/i
443
    || $raw =~ /(news rover)/i
444
    || $raw =~ /(forte agent)/i
444
    || $raw =~ /(forte agent)/i
445
    || $raw =~ /(forte free agent)/i )
445
    || $raw =~ /(forte free agent)/i )
446
  {
446
  {
447
    $agent = $1;
447
    $agent = $1;
448
  }
448
  }
449
  elsif (
449
  elsif (
450
    $raw =~ /^(
450
    $raw =~ /^(
451
        pan
451
        pan
452
       |sylpheed
452
       |sylpheed
453
       |slrn
453
       |slrn
454
       |mozilla
454
       |mozilla
455
       |knode
455
       |knode
456
       |tin
456
       |tin
457
       |hamster
457
       |hamster
458
       |xrn
458
       |xrn
459
       |xnews
459
       |xnews
460
       |aol
460
       |aol
461
       |gnus
461
       |gnus
462
       |krn
462
       |krn
463
       |macsoup
463
       |macsoup
464
       |messenger
464
       |messenger
465
       |openxp
465
       |openxp
466
       |pine
466
       |pine
467
       |thoth
467
       |thoth
468
       |turnpike
468
       |turnpike
469
       |winvn
469
       |winvn
470
       |vsoup
470
       |vsoup
471
       |google
471
       |google
472
       |supernews
472
       |supernews
473
       |nn
473
       |nn
474
       |rn
474
       |rn
475
       |007
475
       |007
476
       |webtv
476
       |webtv
477
       |compuserve
477
       |compuserve
478
       )/ix
478
       )/ix
479
    )
479
    )
480
  {
480
  {
481
    $agent = $1;
481
    $agent = $1;
482
  }
482
  }
483
  else
483
  else
484
  {
484
  {
485
    ## Clean up unknown agents
485
    ## Clean up unknown agents
486
    if ( $raw =~ m!^(.*?)/! )
486
    if ( $raw =~ m!^(.*?)/! )
487
    {
487
    {
488
      $agent = $1;
488
      $agent = $1;
489
    }
489
    }
490
    elsif ( $raw =~ /^(\w*)\d.*/ )
490
    elsif ( $raw =~ /^(\w*)\d.*/ )
491
    {
491
    {
492
      $agent = $1;
492
      $agent = $1;
493
    }
493
    }
494
  }
494
  }
495
495
496
  $distinct_agent{$agent}++;
496
  $distinct_agent{$agent}++;
497
  return $agent;
497
  return $agent;
498
}
498
}
499
## get_agent
499
## get_agent
500
500
501
#########################################
501
#########################################
502
## Count the User-Agents used, collapsing
502
## Count the User-Agents used, collapsing
503
## different versions into one per agent.
503
## different versions into one per agent.
504
#########################################
504
#########################################
505
sub count_agents
505
sub count_agents
506
{
506
{
507
POSTER:
507
POSTER:
508
  foreach my $poster ( keys %data )
508
  foreach my $poster ( keys %data )
509
  {
509
  {
510
    foreach my $agent_name ( keys %distinct_agent )
510
    foreach my $agent_name ( keys %distinct_agent )
511
    {    # check against known ones
511
    {    # check against known ones
512
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
512
      if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ )
513
      {
513
      {
514
        $agents{$agent_name}++;
514
        $agents{$agent_name}++;
515
        next POSTER;
515
        next POSTER;
516
      }
516
      }
517
    }
517
    }
518
    $agents{ $data{$poster}{'agent'} }++;
518
    $agents{ $data{$poster}{'agent'} }++;
519
  }
519
  }
520
}    # count_agents
520
}    # count_agents
521
521
522
#############################################
522
#############################################
523
## Set orig/total percentages for all posters
523
## Set orig/total percentages for all posters
524
#############################################
524
#############################################
525
sub fix_percent
525
sub fix_percent
526
{
526
{
527
  foreach my $poster ( keys %data )
527
  foreach my $poster ( keys %data )
528
  {
528
  {
529
    my $percent = 100;
529
    my $percent = 100;
530
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
530
    if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) )
531
    {
531
    {
532
      $percent =
532
      $percent =
533
        $data{$poster}{'orig'} * 100 /
533
        $data{$poster}{'orig'} * 100 /
534
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
534
        ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} );    #/
535
    }
535
    }
536
    elsif ( $data{$poster}{'orig'} == 0 )
536
    elsif ( $data{$poster}{'orig'} == 0 )
537
    {
537
    {
538
      $percent = 0;
538
      $percent = 0;
539
    }
539
    }
540
    $data{$poster}{'percent'} = $percent;
540
    $data{$poster}{'percent'} = $percent;
541
  }
541
  }
542
}
542
}
543
## fix_percent
543
## fix_percent
544
544
545
##################################
545
##################################
546
## Write data structures to a file
546
## Write data structures to a file
547
##################################
547
##################################
548
sub write_data
548
sub write_data
549
{
549
{
550
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
550
  open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" )
551
    or die __x( "Can't create XDATA: {error}\n", error => $! );
551
    or die __x( "Can't create XDATA: {error}\n", error => $! );
552
  print $OUTF "Data collected from $newsgroup_name\n\n";
552
  print $OUTF "Data collected from $newsgroup_name\n\n";
553
  print $OUTF
553
  print $OUTF
554
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
554
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
555
  foreach my $name ( keys %data )
555
  foreach my $name ( keys %data )
556
  {
556
  {
557
    print $OUTF
557
    print $OUTF
558
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
558
"$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n";
559
  }
559
  }
560
  print $OUTF
560
  print $OUTF
561
"============================================================================\n";
561
"============================================================================\n";
562
  print $OUTF "Thread subjects\n";
562
  print $OUTF "Thread subjects\n";
563
  print $OUTF
563
  print $OUTF
564
"----------------------------------------------------------------------------\n";
564
"----------------------------------------------------------------------------\n";
565
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
565
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
566
  {
566
  {
567
    print $OUTF
567
    print $OUTF
568
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
568
      "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n";
569
  }
569
  }
570
  print $OUTF
570
  print $OUTF
571
"============================================================================\n";
571
"============================================================================\n";
572
  print $OUTF "Cross-posts\n";
572
  print $OUTF "Cross-posts\n";
573
  print $OUTF
573
  print $OUTF
574
"----------------------------------------------------------------------------\n";
574
"----------------------------------------------------------------------------\n";
575
  foreach my $name ( sort keys %crossposts )
575
  foreach my $name ( sort keys %crossposts )
576
  {
576
  {
577
    print $OUTF "$name : $crossposts{$name}\n";
577
    print $OUTF "$name : $crossposts{$name}\n";
578
  }
578
  }
579
  print $OUTF
579
  print $OUTF
580
"============================================================================\n";
580
"============================================================================\n";
581
  print $OUTF "User agents\n";
581
  print $OUTF "User agents\n";
582
  print $OUTF
582
  print $OUTF
583
"----------------------------------------------------------------------------\n";
583
"----------------------------------------------------------------------------\n";
584
  foreach my $name ( sort keys %agents )
584
  foreach my $name ( sort keys %agents )
585
  {
585
  {
586
    print $OUTF "$name : $agents{$name}\n";
586
    print $OUTF "$name : $agents{$name}\n";
587
  }
587
  }
588
  print $OUTF
588
  print $OUTF
589
"============================================================================\n";
589
"============================================================================\n";
590
  print $OUTF "Time zones\n";
590
  print $OUTF "Time zones\n";
591
  print $OUTF
591
  print $OUTF
592
"----------------------------------------------------------------------------\n";
592
"----------------------------------------------------------------------------\n";
593
  foreach my $name ( sort keys %tz )
593
  foreach my $name ( sort keys %tz )
594
  {
594
  {
595
    print $OUTF "$name : $tz{$name}\n";
595
    print $OUTF "$name : $tz{$name}\n";
596
  }
596
  }
597
  close $OUTF;
597
  close $OUTF;
598
}    # write_data
598
}    # write_data
599
599
600
sub display_results
600
sub display_results
601
{
601
{
602
  #################### DISPLAY RESULTS #####################
602
  #################### DISPLAY RESULTS #####################
603
  println( "=" x 76 );
603
  println( "=" x 76 );
604
  printf "%s\n",
604
  printf "%s\n",
605
    centred(
605
    centred(
606
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
606
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
607
    76 );
607
    76 );
608
  println( "=" x 76 );
608
  println( "=" x 76 );
609
  printf "%s\n",
609
  printf "%s\n",
610
    centred(
610
    centred(
611
    __(
611
    __(
612
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
612
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
613
    ),
613
    ),
614
    76
614
    76
615
    );
615
    );
616
  print "\n\n";
616
  print "\n\n";
617
  printf __"Total posts considered: %s over %d days\n",
617
  printf __"Total posts considered: %s over %d days\n",
618
    $formatter->format_number($totalposts),
618
    $formatter->format_number($totalposts),
619
    $formatter->format_number($numdays);
619
    $formatter->format_number($numdays);
620
  my $time_locale       = setlocale(LC_TIME);
620
  my $time_locale       = setlocale(LC_TIME);
621
  my $earliest_datetime = DateTime->from_epoch(
621
  my $earliest_datetime = DateTime->from_epoch(
622
    epoch     => $earliest,
622
    epoch     => $earliest,
623
    locale    => $time_locale,
623
    locale    => $time_locale,
624
    time_zone => 'UTC',
624
    time_zone => 'UTC',
625
  );
625
  );
626
  my $latest_datetime = DateTime->from_epoch(
626
  my $latest_datetime = DateTime->from_epoch(
627
    epoch     => $latest,
627
    epoch     => $latest,
628
    locale    => $time_locale,
628
    locale    => $time_locale,
629
    time_zone => 'UTC',
629
    time_zone => 'UTC',
630
  );
630
  );
631
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
631
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
632
  printf __"Earliest article: %s\n",
632
  printf __"Earliest article: %s\n",
633
    $earliest_datetime->strftime($datetime_format);
633
    $earliest_datetime->strftime($datetime_format);
634
  printf __"Latest article:   %s\n",
634
  printf __"Latest article:   %s\n",
635
    $latest_datetime->strftime($datetime_format);
635
    $latest_datetime->strftime($datetime_format);
636
  printf __"Original articles: %s; replies: %s\n",
636
  printf __"Original articles: %s; replies: %s\n",
637
    $formatter->format_number($origposts),
637
    $formatter->format_number($origposts),
638
    $formatter->format_number($replies);
638
    $formatter->format_number($replies);
639
  printf __"Total size of posts: %s bytes (%s)" . "\n",
639
  printf __"Total size of posts: %s bytes (%s)" . "\n",
640
    $formatter->format_number($totsize),
640
    $formatter->format_number($totsize),
641
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
641
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
642
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
642
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
643
    $formatter->format_number( int( $totalposts / $numdays ) ),
643
    $formatter->format_number( int( $totalposts / $numdays ) ),
644
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
644
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
645
    $formatter->format_number( int( $totsize / $totalposts ) );
645
    $formatter->format_number( int( $totsize / $totalposts ) );
646
646
647
  my $count = keys %data;
647
  my $count = keys %data;
648
  printf __"Total headers: %s; bodies: %s\n",
648
  printf __"Total headers: %s; bodies: %s\n",
649
    $formatter->format_bytes(
649
    $formatter->format_bytes(
650
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
650
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
651
    ),
651
    ),
652
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
652
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
653
  printf __
653
  printf __
654
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
654
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
655
    $formatter->format_bytes(
655
    $formatter->format_bytes(
656
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
656
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
657
    ),
657
    ),
658
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
658
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
659
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
659
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
660
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
660
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
661
  printf __"Total number of posters: %s, average %s per poster\n",
661
  printf __"Total number of posters: %s, average %s per poster\n",
662
    $formatter->format_number($count),
662
    $formatter->format_number($count),
663
    $formatter->format_bytes( $totsize / $count,
663
    $formatter->format_bytes( $totsize / $count,
664
    ( 'precision' => 1, 'mode' => 'iec' ) );
664
    ( 'precision' => 1, 'mode' => 'iec' ) );
665
  $count = keys %threads;
665
  $count = keys %threads;
666
  printf __"Total number of threads: %s, average %s per thread\n",
666
  printf __"Total number of threads: %s, average %s per thread\n",
667
    $formatter->format_number($count),
667
    $formatter->format_number($count),
668
    $formatter->format_bytes( $totsize / $count,
668
    $formatter->format_bytes( $totsize / $count,
669
    ( 'precision' => 1, 'mode' => 'iec' ) );
669
    ( 'precision' => 1, 'mode' => 'iec' ) );
670
  printf __"Total number of user agents: %d\n",
670
  printf __"Total number of user agents: %d\n",
671
    $formatter->format_number( scalar keys %agents );
671
    $formatter->format_number( scalar keys %agents );
672
  print "\n", "=" x 76, "\n"  ;
672
  print "\n", "=" x 76, "\n"  ;
673
  ########################################
673
  ########################################
674
  ## Show posters by article count  Sec 1;
674
  ## Show posters by article count  Sec 1;
675
  ########################################
675
  ########################################
676
  unless ( $skipSec{1} )
676
  unless ( $skipSec{1} )
677
  {
677
  {
678
    if ( keys %data < $topposters )
678
    if ( keys %data < $topposters )
679
    {
679
    {
680
      $count = keys %data;
680
      $count = keys %data;
681
    }
681
    }
682
    else
682
    else
683
    {
683
    {
684
      $count = $topposters;
684
      $count = $topposters;
685
    }
685
    }
686
    printf "%s\n",
686
    printf "%s\n",
687
      centred(
687
      centred(
688
      __x( "Top {count} posters by number of articles", count => $topposters ),
688
      __x( "Top {count} posters by number of articles", count => $topposters ),
689
      76
689
      76
690
      );
690
      );
691
    print "=" x 76, "\n";
691
    print "=" x 76, "\n";
692
    my $i = 0;
692
    my $i = 0;
693
    foreach
693
    foreach
694
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
694
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
695
    {
695
    {
696
      my $name = substr( $poster, 0, 65 );
696
      my $name = substr( $poster, 0, 65 );
697
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
697
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
698
        $data{$poster}{count};
698
        $data{$poster}{count};
699
      last if ( ++$i == $count );
699
      last if ( ++$i == $count );
700
    }
700
    }
701
    print "\n", "=" x 76, "\n";
701
    print "\n", "=" x 76, "\n";
702
  }
702
  }
703
703
704
  ######################################
704
  ######################################
705
  ## Show posters by size in KiB  Sec 2;
705
  ## Show posters by size in KiB  Sec 2;
706
  ######################################
706
  ######################################
707
  unless ( $skipSec{2} )
707
  unless ( $skipSec{2} )
708
  {
708
  {
709
    if ( keys %data < $topposters )
709
    if ( keys %data < $topposters )
710
    {
710
    {
711
      $count = keys %data;
711
      $count = keys %data;
712
    }
712
    }
713
    else
713
    else
714
    {
714
    {
715
      $count = $topposters;
715
      $count = $topposters;
716
    }
716
    }
717
    printf "%s\n",
717
    printf "%s\n",
718
      centred(
718
      centred(
719
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
719
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
720
      76
720
      76
721
      );
721
      );
722
    print "=" x 76, "\n";
722
    print "=" x 76, "\n";
723
    my $i = 0;
723
    my $i = 0;
724
    foreach
724
    foreach
725
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
725
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
726
    {
726
    {
727
      my $name = substr( $poster, 0, 62 );
727
      my $name = substr( $poster, 0, 62 );
728
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
728
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
729
        $data{$poster}{size} / 1024;    #/
729
        $data{$poster}{size} / 1024;    #/
730
      last if ( ++$i == $count );
730
      last if ( ++$i == $count );
731
    }
731
    }
732
    print "\n", "=" x 76, "\n";
732
    print "\n", "=" x 76, "\n";
733
  }
733
  }
734
734
735
  #####################################
735
  #####################################
736
  ## Show top posters for original text
736
  ## Show top posters for original text
737
  #####################################
737
  #####################################
738
  unless ( $skipSec{3} )
738
  unless ( $skipSec{3} )
739
  {
739
  {
740
    if ( keys %data < $topposters )
740
    if ( keys %data < $topposters )
741
    {
741
    {
742
      $count = keys %data;
742
      $count = keys %data;
743
    }
743
    }
744
    else
744
    else
745
    {
745
    {
746
      $count = $topposters;
746
      $count = $topposters;
747
    }
747
    }
748
    printf "%s\n",
748
    printf "%s\n",
749
      centred(
749
      centred(
750
      __x(
750
      __x(
751
        "Top {count} responders by original text (> 5 posts)",
751
        "Top {count} responders by original text (> 5 posts)",
752
        count => $topposters
752
        count => $topposters
753
      ),
753
      ),
754
      76
754
      76
755
      );
755
      );
756
    print "=" x 76, "\n";
756
    print "=" x 76, "\n";
757
    my $i = 0;
757
    my $i = 0;
758
    foreach my $poster (
758
    foreach my $poster (
759
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
759
      sort { $data{$b}{percent} <=> $data{$a}{percent} }
760
      keys %data
760
      keys %data
761
      )
761
      )
762
    {
762
    {
763
      next if $data{$poster}{quoted} == 0;
763
      next if $data{$poster}{quoted} == 0;
764
      next if $data{$poster}{count} < 5;
764
      next if $data{$poster}{count} < 5;
765
      my $name = substr( $poster, 0, 63 );
765
      my $name = substr( $poster, 0, 63 );
766
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
766
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
767
        $data{$poster}{percent};
767
        $data{$poster}{percent};
768
      last if ( ++$i == $count );
768
      last if ( ++$i == $count );
769
    }
769
    }
770
    print "\n", "=" x 76, "\n";
770
    print "\n", "=" x 76, "\n";
771
  }
771
  }
772
772
773
  ########################################
773
  ########################################
774
  ## Show bottom posters for original text
774
  ## Show bottom posters for original text
775
  ########################################
775
  ########################################
776
  unless ( $skipSec{4} )
776
  unless ( $skipSec{4} )
777
  {
777
  {
778
    if ( keys %data < $topposters )
778
    if ( keys %data < $topposters )
779
    {
779
    {
780
      $count = keys %data;
780
      $count = keys %data;
781
    }
781
    }
782
    else
782
    else
783
    {
783
    {
784
      $count = $topposters;
784
      $count = $topposters;
785
    }
785
    }
786
    printf "%s\n",
786
    printf "%s\n",
787
      centred(
787
      centred(
788
      __x(
788
      __x(
789
        "Bottom {count} responders by original text  (> 5 posts)",
789
        "Bottom {count} responders by original text  (> 5 posts)",
790
        count => $topposters
790
        count => $topposters
791
      ),
791
      ),
792
      76
792
      76
793
      );
793
      );
794
    print "=" x 76, "\n";
794
    print "=" x 76, "\n";
795
    my $i = 0;
795
    my $i = 0;
796
    foreach my $poster (
796
    foreach my $poster (
797
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
797
      sort { $data{$a}{percent} <=> $data{$b}{percent} }
798
      keys %data
798
      keys %data
799
      )
799
      )
800
    {
800
    {
801
      next if $data{$poster}{quoted} == 0;
801
      next if $data{$poster}{quoted} == 0;
802
      next if $data{$poster}{count} < 5;
802
      next if $data{$poster}{count} < 5;
803
      my $name = substr( $poster, 0, 63 );
803
      my $name = substr( $poster, 0, 63 );
804
      printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
804
      printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
805
        $data{$poster}{percent};
805
        $data{$poster}{percent};
806
      last if ( ++$i == $count );
806
      last if ( ++$i == $count );
807
    }
807
    }
808
    print "\n", "=" x 76, "\n";
808
    print "\n", "=" x 76, "\n";
809
  }
809
  }
810
810
811
  #####################################
811
  #####################################
812
  ## Show threads by number of articles
812
  ## Show threads by number of articles
813
  #####################################
813
  #####################################
814
  unless ( $skipSec{5} )
814
  unless ( $skipSec{5} )
815
  {
815
  {
816
    if ( keys %threads < $topthreads )
816
    if ( keys %threads < $topthreads )
817
    {
817
    {
818
      $count = keys %threads;
818
      $count = keys %threads;
819
    }
819
    }
820
    else
820
    else
821
    {
821
    {
822
      $count = $topthreads;
822
      $count = $topthreads;
823
    }
823
    }
824
    printf "%s\n",
824
    printf "%s\n",
825
      centred(
825
      centred(
826
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
826
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
827
      76 );
827
      76 );
828
    print "=" x 76, "\n";
828
    print "=" x 76, "\n";
829
    my $i = 0;
829
    my $i = 0;
830
    foreach my $thread (
830
    foreach my $thread (
831
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
831
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
832
      keys %threads
832
      keys %threads
833
      )
833
      )
834
    {
834
    {
835
      my $name = substr( $thread, 0, 65 );
835
      my $name = substr( $thread, 0, 65 );
836
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
836
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
837
        $threads{$thread}{'count'};
837
        $threads{$thread}{'count'};
838
      last if ( ++$i == $count );
838
      last if ( ++$i == $count );
839
    }
839
    }
840
    print "\n", "=" x 76, "\n";
840
    print "\n", "=" x 76, "\n";
841
  }
841
  }
842
842
843
  ##############################
843
  ##############################
844
  ## Show threads by size in KiB
844
  ## Show threads by size in KiB
845
  ##############################
845
  ##############################
846
  unless ( $skipSec{6} )
846
  unless ( $skipSec{6} )
847
  {
847
  {
848
    if ( keys %threads < $topthreads )
848
    if ( keys %threads < $topthreads )
849
    {
849
    {
850
      $count = keys %threads;
850
      $count = keys %threads;
851
    }
851
    }
852
    else
852
    else
853
    {
853
    {
854
      $count = $topthreads;
854
      $count = $topthreads;
855
    }
855
    }
856
    printf "%s\n",
856
    printf "%s\n",
857
      centred(
857
      centred(
858
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
858
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
859
    print "=" x 76, "\n";
859
    print "=" x 76, "\n";
860
    my $i = 0;
860
    my $i = 0;
861
    foreach my $thread (
861
    foreach my $thread (
862
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
862
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
863
      keys %threads
863
      keys %threads
864
      )
864
      )
865
    {
865
    {
866
      my $name = substr( $thread, 0, 65 );
866
      my $name = substr( $thread, 0, 65 );
867
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
867
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
868
        $threads{$thread}{'size'} / 1024;    #/
868
        $threads{$thread}{'size'} / 1024;    #/
869
      last if ( ++$i == $count );
869
      last if ( ++$i == $count );
870
    }
870
    }
871
    print "\n", "=" x 76, "\n";
871
    print "\n", "=" x 76, "\n";
872
  }
872
  }
873
873
874
  ##################################
874
  ##################################
875
  ## Show top 10 cross-posted groups
875
  ## Show top 10 cross-posted groups
876
  ##################################
876
  ##################################
877
  unless ( $skipSec{7} )
877
  unless ( $skipSec{7} )
878
  {
878
  {
879
    delete $crossposts{"$newsgroup_name"};    # don't include ours
879
    delete $crossposts{"$newsgroup_name"};    # don't include ours
880
    if ( keys %crossposts < $topcrossposts )
880
    if ( keys %crossposts < $topcrossposts )
881
    {
881
    {
882
      $count = keys %crossposts;
882
      $count = keys %crossposts;
883
    }
883
    }
884
    else
884
    else
885
    {
885
    {
886
      $count = $topcrossposts;
886
      $count = $topcrossposts;
887
    }
887
    }
888
    printf "%s\n",
888
    printf "%s\n",
889
      centred(
889
      centred(
890
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
890
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
891
    print "=" x 76, "\n";
891
    print "=" x 76, "\n";
892
    my $i = 0;
892
    my $i = 0;
893
    foreach
893
    foreach
894
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
894
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
895
    {
895
    {
896
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
896
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
897
        $crossposts{$name};
897
        $crossposts{$name};
898
      last if ( ++$i == $count );
898
      last if ( ++$i == $count );
899
    }
899
    }
900
    print "\n", "=" x 76, "\n";
900
    print "\n", "=" x 76, "\n";
901
  }
901
  }
902
902
903
  #########################
903
  #########################
904
  ## Show agents and counts
904
  ## Show agents and counts
905
  #########################
905
  #########################
906
  unless ( $skipSec{8} )
906
  unless ( $skipSec{8} )
907
  {
907
  {
908
    if ( keys %agents < $topagents )
908
    if ( keys %agents < $topagents )
909
    {
909
    {
910
      $count = keys %agents;
910
      $count = keys %agents;
911
    }
911
    }
912
    else
912
    else
913
    {
913
    {
914
      $count = $topagents;
914
      $count = $topagents;
915
    }
915
    }
916
    printf "%s\n",
916
    printf "%s\n",
917
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
917
      centred( __x( "Top {count} user agents by poster", count => $topagents ),
918
      76 );
918
      76 );
919
    print "=" x 76, "\n";
919
    print "=" x 76, "\n";
920
    my $i = 0;
920
    my $i = 0;
921
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
921
    foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
922
    {
922
    {
923
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
923
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
924
        $agents{$agent};
924
        $agents{$agent};
925
      last if ( ++$i == $count );
925
      last if ( ++$i == $count );
926
    }
926
    }
927
    print "\n", "=" x 76, "\n";
927
    print "\n", "=" x 76, "\n";
928
  }
928
  }
929
929
930
  #######################
930
  #######################
931
  ## Show distinct agents
931
  ## Show distinct agents
932
  #######################
932
  #######################
933
  unless ( $skipSec{9} )
933
  unless ( $skipSec{9} )
934
  {
934
  {
935
    if ( keys %distinct_agent < $topagents )
935
    if ( keys %distinct_agent < $topagents )
936
    {
936
    {
937
      $count = keys %distinct_agent;
937
      $count = keys %distinct_agent;
938
    }
938
    }
939
    else
939
    else
940
    {
940
    {
941
      $count = $topagents;
941
      $count = $topagents;
942
    }
942
    }
943
    printf "%s\n",
943
    printf "%s\n",
944
      centred(
944
      centred(
945
      __x( "Top {count} user agents by number of posts", count => $topagents ),
945
      __x( "Top {count} user agents by number of posts", count => $topagents ),
946
      76
946
      76
947
      );
947
      );
948
    print "=" x 76, "\n";
948
    print "=" x 76, "\n";
949
    my $i = 0;
949
    my $i = 0;
950
    foreach my $agent (
950
    foreach my $agent (
951
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
951
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
952
      keys %distinct_agent
952
      keys %distinct_agent
953
      )
953
      )
954
    {
954
    {
955
      printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
955
      printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
956
        $distinct_agent{$agent},
956
        $distinct_agent{$agent},
957
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
957
        ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
958
      last if ( ++$i == $count );
958
      last if ( ++$i == $count );
959
    }
959
    }
960
    print "\n", "=" x 76, "\n";
960
    print "\n", "=" x 76, "\n";
961
  }
961
  }
962
962
963
  ############################
963
  ############################
964
  ## Show timezones and counts
964
  ## Show timezones and counts
965
  ############################
965
  ############################
966
  unless ( $skipSec{10} )
966
  unless ( $skipSec{10} )
967
  {
967
  {
968
    if ( keys %tz < $toptz )
968
    if ( keys %tz < $toptz )
969
    {
969
    {
970
      $count = keys %tz;
970
      $count = keys %tz;
971
    }
971
    }
972
    else
972
    else
973
    {
973
    {
974
      $count = $toptz;
974
      $count = $toptz;
975
    }
975
    }
976
    printf "%s\n",
976
    printf "%s\n",
977
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
977
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
978
    print "=" x 76, "\n";
978
    print "=" x 76, "\n";
979
    my $i = 0;
979
    my $i = 0;
980
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
980
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
981
    {
981
    {
982
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
982
      printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
983
      last if ( ++$i == $count );
983
      last if ( ++$i == $count );
984
    }
984
    }
985
    print "\n", "=" x 76, "\n";
985
    print "\n", "=" x 76, "\n";
986
  }
986
  }
987
}
987
}
988
988
989
## helper subs
989
## helper subs
990
990
991
###############################
991
###############################
992
## Right pad a string with '.'s
992
## Right pad a string with '.'s
993
###############################
993
###############################
994
sub rpad
994
sub rpad
995
{
995
{
996
  ## Get text to pad, length to pad, pad chr
996
  ## Get text to pad, length to pad, pad chr
997
  my ( $text, $pad_len, $pad_chr ) = @_;
997
  my ( $text, $pad_len, $pad_chr ) = @_;
998
998
999
  ## DEBUG
999
  ## DEBUG
1000
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
1000
  printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1;
1001
1001
1002
  if ( length($text) > $pad_len )
1002
  if ( length($text) > $pad_len )
1003
  {
1003
  {
1004
    $text = substr( $text, 0, $pad_len );
1004
    $text = substr( $text, 0, $pad_len );
1005
  }
1005
  }
1006
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1006
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
1007
  return $padded;
1007
  return $padded;
1008
}
1008
}
1009
1009
1010
##################
1010
##################
1011
## Centre a string
1011
## Centre a string
1012
##################
1012
##################
1013
sub centred
1013
sub centred
1014
{
1014
{
1015
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1015
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
1016
  my $pad_len = ( $width - length($text) ) / 2;    #/
1016
  my $pad_len = ( $width - length($text) ) / 2;    #/
1017
  my $centred = " " x $pad_len . $text;
1017
  my $centred = " " x $pad_len . $text;
1018
  return $centred;
1018
  return $centred;
1019
}
1019
}
1020
1020
1021
###########################
1021
###########################
1022
## Put commas into a number
1022
## Put commas into a number
1023
###########################
1023
###########################
1024
sub commify
1024
sub commify
1025
{
1025
{
1026
  local $_ = shift;
1026
  local $_ = shift;
1027
  my $number = $_;
1027
  my $number = $_;
1028
  $_ = int;                                        # Chop non-integer part
1028
  $_ = int;                                        # Chop non-integer part
1029
  1 while
1029
  1 while
1030
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1030
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
1031
  my $int_part  = $_;
1031
  my $int_part  = $_;
1032
  my $real_part = '';
1032
  my $real_part = '';
1033
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1033
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
1034
  {
1034
  {
1035
    $real_part = $1;
1035
    $real_part = $1;
1036
  }
1036
  }
1037
  return $int_part . $real_part;
1037
  return $int_part . $real_part;
1038
}
1038
}
1039
1039
1040
################################################################
1040
################################################################
1041
## Returns a string with leading and trailing whitespace removed
1041
## Returns a string with leading and trailing whitespace removed
1042
################################################################
1042
################################################################
1043
sub clean
1043
sub clean
1044
{
1044
{
1045
  my $dirty = shift;
1045
  my $dirty = shift;
1046
  my $clean = $dirty;
1046
  my $clean = $dirty;
1047
  $clean =~ s/^\s+|\s+$//g;
1047
  $clean =~ s/^\s+|\s+$//g;
1048
1048
1049
  return $clean;
1049
  return $clean;
1050
}
1050
}
1051
1051
1052
sub usage
1052
sub usage
1053
{
1053
{
1054
  println( __ "usage: newsstat.pl NEWS.GROUP" );
1054
  println( __ "usage: newsstat.pl NEWS.GROUP" );
1055
  exit 1;
1055
  exit 1;
1056
}
1056
}
1057
1057
1058
sub dmsg
1058
sub dmsg
1059
{
1059
{
1060
  print STDERR @_, "\n";
1060
  print STDERR @_, "\n";
1061
}
1061
}
1062
1062
1063
sub dmsg2
1063
sub dmsg2
1064
{
1064
{
1065
  my ( $level, @msg ) = @_;
1065
  my ( $level, @msg ) = @_;
1066
  print STDERR @msg, "\n" if $level >= DEBUG;
1066
  print STDERR @msg, "\n" if $level >= DEBUG;
1067
}
1067
}
1068
1068
1069
sub println
1069
sub println
1070
{
1070
{
1071
  print @_, "\n";
1071
  print @_, "\n";
1072
}
1072
}
1073
 
1073