Subversion Repositories LCARS

Rev

Rev 13 | Rev 22 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 13 Rev 14
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
use diagnostics;
4
use diagnostics;
5
use utf8;
5
use utf8;
6
use Encode;
6
use Encode;
7
7
8
## Print out all text to STDOUT UTF-8 encoded
8
## Print out all text to STDOUT UTF-8 encoded
9
binmode STDOUT, ':encoding(UTF-8)';
9
binmode STDOUT, ':encoding(UTF-8)';
10
10
11
############################
11
##############################
12
## newsstat.pl version 0.4.3
12
## newsstat.pl version 0.4.3.1
13
13
14
###########################################################################
14
###########################################################################
15
## Collect statistics about a newsgroup (specified by first argument)
15
## Collect statistics about a newsgroup (specified by first argument)
16
## in the local news spool. Check all articles in the last 30-day period.
16
## in the local news spool. Check all articles in the last 30-day period.
17
## Rank posters by number of posts and by volume of posts, report on top
17
## Rank posters by number of posts and by volume of posts, report on top
18
## and bottom 20 posters. Show their name, number of posts, size of posts,
18
## and bottom 20 posters. Show their name, number of posts, size of posts,
19
## percentage of quoted lines. Rank user-agents used, by poster rather
19
## percentage of quoted lines. Rank user-agents used, by poster rather
20
## than by post. Rank top 20 threads. Rank top 10 cross-posted groups.
20
## than by post. Rank top 20 threads. Rank top 10 cross-posted groups.
21
##
21
##
22
## Numbers and paths can be configured below.  See ChangeLog and TODO
22
## Numbers and paths can be configured below.  See ChangeLog and TODO
23
## for more.  -- PE
23
## for more.  -- PE
24
###########################################################################
24
###########################################################################
25
25
26
###################### USER CONFIGURATIONS ############################
26
###################### USER CONFIGURATIONS ############################
27
27
28
## The name of the group to do stats for
28
## The name of the group to do stats for
29
my $newsgroup_name = $ARGV[0];
29
my $newsgroup_name = $ARGV[0];
30
$newsgroup_name or &usage;
30
$newsgroup_name or &usage;
31
31
32
## Check for removal flags
32
## Check for removal flags
33
my $ix;
33
my $ix;
34
my $j;
34
my $j;
35
my %skipSec;
35
my %skipSec;
36
my @skiplist;
36
my @skiplist;
37
my $args = @ARGV;
37
my $args = @ARGV;
38
for ( $ix = 1 ; $ix < $args ; $ix++ )
38
for ( $ix = 1 ; $ix < $args ; $ix++ )
39
{
39
{
40
  $j = $ix + 1;
40
  $j = $ix + 1;
41
  if ( $ARGV[$ix] eq "-x" )
41
  if ( $ARGV[$ix] eq "-x" )
42
  {
42
  {
43
    @skiplist = split( ",", $ARGV[$j] );
43
    @skiplist = split( ",", $ARGV[$j] );
44
  }
44
  }
45
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
45
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
46
  {
46
  {
47
    @skiplist = split( ",", $1 );
47
    @skiplist = split( ",", $1 );
48
  }
48
  }
49
}
49
}
50
foreach (@skiplist)
50
foreach (@skiplist)
51
{
51
{
52
  $skipSec{$_} = 1;
52
  $skipSec{$_} = 1;
53
}
53
}
54
54
55
## Leafnode users will want /var/spool/news for this variable.
55
## Leafnode users will want /var/spool/news for this variable.
56
my $news = "/var/spool/news/";
56
my $news = "/var/spool/news/";
57
57
58
## How many days are we doing statistics for?
58
## How many days are we doing statistics for?
59
my $numdays = 30;
59
my $numdays = 30;
60
60
61
## Number of agents we list
61
## Number of agents we list
62
my $topagents = 10;
62
my $topagents = 10;
63
63
64
## Number of threads we want to know about
64
## Number of threads we want to know about
65
my $topthreads = 20;
65
my $topthreads = 20;
66
66
67
## Number of top or bottom posters to show
67
## Number of top or bottom posters to show
68
my $topposters = 20;
68
my $topposters = 20;
69
69
70
## Number of cross-posted threads to show
70
## Number of cross-posted threads to show
71
my $topcrossposts = 10;
71
my $topcrossposts = 10;
72
72
73
## Number of time zones to show
73
## Number of time zones to show
74
my $toptz = 10;
74
my $toptz = 10;
75
75
76
###################### DATA STRUCTURES ######################
76
###################### DATA STRUCTURES ######################
77
my $group = $newsgroup_name;
77
my $group = $newsgroup_name;
78
$group =~ s!\.!/!g;
78
$group =~ s!\.!/!g;
79
my %data;          # name, count, agent, total, orig, quoted
79
my %data;          # name, count, agent, total, orig, quoted
80
my %threads;       # subject, count
80
my %threads;       # subject, count
81
my %crossposts;    # group, count
81
my %crossposts;    # group, count
82
my %tz;            # timezones by count
82
my %tz;            # timezones by count
83
my %headers;       # holds header of current article
83
my %headers;       # holds header of current article
84
my %lcheader;      # holds lowercase headers
84
my %lcheader;      # holds lowercase headers
85
my @body;          # holds body of current article
85
my @body;          # holds body of current article
86
my @sig;           # holds sig text;
86
my @sig;           # holds sig text;
87
my $totalposts;    # total no. of posts considered
87
my $totalposts;    # total no. of posts considered
88
my $filename;      # name of current article file
88
my $filename;      # name of current article file
89
my $filesize;      # size of current article file
89
my $filesize;      # size of current article file
90
my $earliest;      # earliest article we have found
90
my $earliest;      # earliest article we have found
91
my $latest;        # latest article we have found
91
my $latest;        # latest article we have found
92
my $poster;        # poster we are dealing with
92
my $poster;        # poster we are dealing with
93
my $totsize   = 0; # holds total sizes of all files
93
my $totsize   = 0; # holds total sizes of all files
94
my $totheader = 0; # total size of header material
94
my $totheader = 0; # total size of header material
95
my $totbody   = 0; # total size of body material
95
my $totbody   = 0; # total size of body material
96
my $totsig    = 0; # total size of sig material
96
my $totsig    = 0; # total size of sig material
97
my $totorig   = 0; # total size of original material
97
my $totorig   = 0; # total size of original material
98
my $totquoted = 0; # total size of quoted material
98
my $totquoted = 0; # total size of quoted material
99
my $origposts = 0; # total no. of original posts
99
my $origposts = 0; # total no. of original posts
100
my $replies   = 0; # total no. of replies
100
my $replies   = 0; # total no. of replies
101
my $i;             # general purpose
101
my $i;             # general purpose
102
my %distinct_agent;
102
my %distinct_agent;
103
103
104
## Used to hold counts of User Agents used
104
## Used to hold counts of User Agents used
105
my %agents = (
105
my %agents = (
106
  "Compuserver"               => 0,
106
  "Compuserver"               => 0,
107
  "Foorum"                    => 0,
107
  "Foorum"                    => 0,
108
  "Forte Agent"               => 0,
108
  "Forte Agent"               => 0,
109
  "Forte Free Agent"          => 0,
109
  "Forte Free Agent"          => 0,
110
  "Gnus"                      => 0,
110
  "Gnus"                      => 0,
111
  "KNode"                     => 0,
111
  "KNode"                     => 0,
112
  "MacSOUP"                   => 0,
112
  "MacSOUP"                   => 0,
113
  "MT-NewsWatcher"            => 0,
113
  "MT-NewsWatcher"            => 0,
114
  "MicroPlanet Gravity"       => 0,
114
  "MicroPlanet Gravity"       => 0,
115
  "Microsoft Outlook Express" => 0,
115
  "Microsoft Outlook Express" => 0,
116
  "Microsoft Windows Mail"    => 0,
116
  "Microsoft Windows Mail"    => 0,
117
  "Mozilla"                   => 0,
117
  "Mozilla"                   => 0,
118
  "News Rover"                => 0,
118
  "News Rover"                => 0,
119
  "NN"                        => 0,
119
  "NN"                        => 0,
120
  "Pan"                       => 0,
120
  "Pan"                       => 0,
121
  "rn"                        => 0,
121
  "rn"                        => 0,
122
  "slrn"                      => 0,
122
  "slrn"                      => 0,
123
  "Sylpheed"                  => 0,
123
  "Sylpheed"                  => 0,
124
  "tin"                       => 0,
124
  "tin"                       => 0,
125
  "VSoup"                     => 0,
125
  "VSoup"                     => 0,
126
  "WebTV"                     => 0,
126
  "WebTV"                     => 0,
127
  "Xnews"                     => 0
127
  "Xnews"                     => 0
128
);
128
);
129
129
130
######################## MAIN CODE ########################
130
######################## MAIN CODE ########################
131
$! = 1;
131
$! = 1;
132
132
133
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
133
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
134
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
134
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
135
while ( defined( $filename = readdir(DIR) ) )
135
while ( defined( $filename = readdir(DIR) ) )
136
{
136
{
137
  %lcheader = ();
137
  %lcheader = ();
138
  next unless -f $filename;               # only want real files
138
  next unless -f $filename;               # only want real files
139
  next if ( $filename eq ".overview" );   # real articles only
139
  next if ( $filename eq ".overview" );   # real articles only
140
  next if ( -M $filename > $numdays );    # only want articles <= a certain age
140
  next if ( -M $filename > $numdays );    # only want articles <= a certain age
141
  $earliest = ( stat $filename )[9] unless defined($earliest);
141
  $earliest = ( stat $filename )[9] unless defined($earliest);
142
  $latest   = ( stat $filename )[9] unless defined($latest);
142
  $latest   = ( stat $filename )[9] unless defined($latest);
143
  &get_article($filename);                 # read in the article
143
  &get_article($filename);                 # read in the article
144
  &get_data;                               # grab the data from the article
144
  &get_data;                               # grab the data from the article
145
  $totalposts++;                          # bump count of articles considered
145
  $totalposts++;                          # bump count of articles considered
146
}
146
}
147
closedir(DIR);                            # finished with the directory
147
closedir(DIR);                            # finished with the directory
148
148
149
## Post-processing
149
## Post-processing
150
&count_agents;    # count agents, collapsing versions
150
&count_agents;    # count agents, collapsing versions
151
&fix_percent;     # check percentages orig/total for posters
151
&fix_percent;     # check percentages orig/total for posters
152
152
153
&write_data;
153
&write_data;
154
154
155
#################### DISPLAY RESULTS #####################
155
#################### DISPLAY RESULTS #####################
156
print "=" x 76, "\n";
156
print "=" x 76, "\n";
157
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
157
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
158
print "=" x 76, "\n";
158
print "=" x 76, "\n";
159
printf "%s\n",
159
printf "%s\n",
160
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
160
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
161
print "\n\n";
161
print "\n\n";
162
printf "Total posts considered: %s over %d days\n", commify($totalposts),
162
printf "Total posts considered: %s over %d days\n", commify($totalposts),
163
  $numdays;
163
  $numdays;
164
printf "Earliest article: %s\n",               scalar localtime($earliest);
164
printf "Earliest article: %s\n",               scalar localtime($earliest);
165
printf "Latest article:   %s\n",               scalar localtime($latest);
165
printf "Latest article:   %s\n",               scalar localtime($latest);
166
printf "Original articles: %s, replies: %s\n", commify($origposts),
166
printf "Original articles: %s, replies: %s\n", commify($origposts),
167
  commify($replies);
167
  commify($replies);
168
printf "Total size of posts: %s bytes (%s KiB) (%.2f MiB)\n", commify($totsize),
168
printf "Total size of posts: %s bytes (%s KiB) (%.2f MiB)\n", commify($totsize),
169
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
169
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
170
printf "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
170
printf "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
171
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
171
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
172
  commify( int( $totsize / $totalposts ) );
172
  commify( int( $totsize / $totalposts ) );
173
my $count = keys %data;
173
my $count = keys %data;
174
printf "Total headers: %s KiB  bodies: %s KiB\n",
174
printf "Total headers: %s KiB  bodies: %s KiB\n",
175
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
175
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
176
printf "Body text - quoted: %s KiB,  original: %s KiB = %02.2f%%, sigs: %s KiB\n",
176
printf "Body text - quoted: %s KiB,  original: %s KiB = %02.2f%%, sigs: %s KiB\n",
177
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
177
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
178
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
178
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
179
  commify( int( $totsig / 1024 ) );
179
  commify( int( $totsig / 1024 ) );
180
printf "Total number of posters: %s, average %s bytes per poster\n",
180
printf "Total number of posters: %s, average %s bytes per poster\n",
181
  commify($count), commify( int( $totsize / $count ) );     #/
181
  commify($count), commify( int( $totsize / $count ) );     #/
182
$count = keys %threads;
182
$count = keys %threads;
183
printf "Total number of threads: %s, average %s bytes per thread\n",
183
printf "Total number of threads: %s, average %s bytes per thread\n",
184
  commify($count), commify( int( $totsize / $count ) );     #/
184
  commify($count), commify( int( $totsize / $count ) );     #/
185
printf "Total number of user agents: %d\n", scalar keys %agents;
185
printf "Total number of user agents: %d\n", scalar keys %agents;
186
print "\n", "=" x 76, "\n";
186
print "\n", "=" x 76, "\n";
187
187
188
########################################
188
########################################
189
## Show posters by article count  Sec 1;
189
## Show posters by article count  Sec 1;
190
########################################
190
########################################
191
unless ( $skipSec{1} )
191
unless ( $skipSec{1} )
192
{
192
{
193
  if ( keys %data < $topposters )
193
  if ( keys %data < $topposters )
194
  {
194
  {
195
    $count = keys %data;
195
    $count = keys %data;
196
  }
196
  }
197
  else
197
  else
198
  {
198
  {
199
    $count = $topposters;
199
    $count = $topposters;
200
  }
200
  }
201
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
201
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
202
  print "=" x 76, "\n";
202
  print "=" x 76, "\n";
203
  $i = 0;
203
  $i = 0;
204
  foreach
204
  foreach
205
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
205
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
206
  {
206
  {
207
    my $name = substr( $poster, 0, 65 );
207
    my $name = substr( $poster, 0, 65 );
208
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
208
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
209
      $data{$poster}{count};
209
      $data{$poster}{count};
210
    last if ( ++$i == $count );
210
    last if ( ++$i == $count );
211
  }
211
  }
212
  print "\n", "=" x 76, "\n";
212
  print "\n", "=" x 76, "\n";
213
}
213
}
214
214
215
######################################
215
######################################
216
## Show posters by size in KiB  Sec 2;
216
## Show posters by size in KiB  Sec 2;
217
######################################
217
######################################
218
unless ( $skipSec{2} )
218
unless ( $skipSec{2} )
219
{
219
{
220
  if ( keys %data < $topposters )
220
  if ( keys %data < $topposters )
221
  {
221
  {
222
    $count = keys %data;
222
    $count = keys %data;
223
  }
223
  }
224
  else
224
  else
225
  {
225
  {
226
    $count = $topposters;
226
    $count = $topposters;
227
  }
227
  }
228
  printf "%s\n", &centred( "Top $count posters by article size in KiB", 76 );
228
  printf "%s\n", &centred( "Top $count posters by article size in KiB", 76 );
229
  print "=" x 76, "\n";
229
  print "=" x 76, "\n";
230
  $i = 0;
230
  $i = 0;
231
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
231
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
232
  {
232
  {
233
    my $name = substr( $poster, 0, 62 );
233
    my $name = substr( $poster, 0, 62 );
234
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
234
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
235
      $data{$poster}{size} / 1024;    #/
235
      $data{$poster}{size} / 1024;    #/
236
    last if ( ++$i == $count );
236
    last if ( ++$i == $count );
237
  }
237
  }
238
  print "\n", "=" x 76, "\n";
238
  print "\n", "=" x 76, "\n";
239
}
239
}
240
240
241
#####################################
241
#####################################
242
## Show top posters for original text
242
## Show top posters for original text
243
#####################################
243
#####################################
244
unless ( $skipSec{3} )
244
unless ( $skipSec{3} )
245
{
245
{
246
  if ( keys %data < $topposters )
246
  if ( keys %data < $topposters )
247
  {
247
  {
248
    $count = keys %data;
248
    $count = keys %data;
249
  }
249
  }
250
  else
250
  else
251
  {
251
  {
252
    $count = $topposters;
252
    $count = $topposters;
253
  }
253
  }
254
  printf "%s\n",
254
  printf "%s\n",
255
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
255
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
256
  print "=" x 76, "\n";
256
  print "=" x 76, "\n";
257
  $i = 0;
257
  $i = 0;
258
  foreach my $poster (
258
  foreach my $poster (
259
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
259
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
260
    keys %data
260
    keys %data
261
    )
261
    )
262
  {
262
  {
263
    next if $data{$poster}{quoted} == 0;
263
    next if $data{$poster}{quoted} == 0;
264
    next if $data{$poster}{count} < 5;
264
    next if $data{$poster}{count} < 5;
265
    my $name = substr( $poster, 0, 63 );
265
    my $name = substr( $poster, 0, 63 );
266
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
266
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
267
      $data{$poster}{percent};
267
      $data{$poster}{percent};
268
    last if ( ++$i == $count );
268
    last if ( ++$i == $count );
269
  }
269
  }
270
  print "\n", "=" x 76, "\n";
270
  print "\n", "=" x 76, "\n";
271
}
271
}
272
272
273
########################################
273
########################################
274
## Show bottom posters for original text
274
## Show bottom posters for original text
275
########################################
275
########################################
276
unless ( $skipSec{4} )
276
unless ( $skipSec{4} )
277
{
277
{
278
  if ( keys %data < $topposters )
278
  if ( keys %data < $topposters )
279
  {
279
  {
280
    $count = keys %data;
280
    $count = keys %data;
281
  }
281
  }
282
  else
282
  else
283
  {
283
  {
284
    $count = $topposters;
284
    $count = $topposters;
285
  }
285
  }
286
  printf "%s\n",
286
  printf "%s\n",
287
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
287
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
288
  print "=" x 76, "\n";
288
  print "=" x 76, "\n";
289
  $i = 0;
289
  $i = 0;
290
  foreach my $poster (
290
  foreach my $poster (
291
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
291
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
292
    keys %data
292
    keys %data
293
    )
293
    )
294
  {
294
  {
295
    next if $data{$poster}{quoted} == 0;
295
    next if $data{$poster}{quoted} == 0;
296
    next if $data{$poster}{count} < 5;
296
    next if $data{$poster}{count} < 5;
297
    my $name = substr( $poster, 0, 63 );
297
    my $name = substr( $poster, 0, 63 );
298
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
298
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
299
      $data{$poster}{percent};
299
      $data{$poster}{percent};
300
    last if ( ++$i == $count );
300
    last if ( ++$i == $count );
301
  }
301
  }
302
  print "\n", "=" x 76, "\n";
302
  print "\n", "=" x 76, "\n";
303
}
303
}
304
304
305
#####################################
305
#####################################
306
## Show threads by number of articles
306
## Show threads by number of articles
307
#####################################
307
#####################################
308
unless ( $skipSec{5} )
308
unless ( $skipSec{5} )
309
{
309
{
310
  if ( keys %threads < $topthreads )
310
  if ( keys %threads < $topthreads )
311
  {
311
  {
312
    $count = keys %threads;
312
    $count = keys %threads;
313
  }
313
  }
314
  else
314
  else
315
  {
315
  {
316
    $count = $topthreads;
316
    $count = $topthreads;
317
  }
317
  }
318
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
318
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
319
  print "=" x 76, "\n";
319
  print "=" x 76, "\n";
320
  $i = 0;
320
  $i = 0;
321
  foreach my $thread (
321
  foreach my $thread (
322
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
322
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
323
    keys %threads
323
    keys %threads
324
    )
324
    )
325
  {
325
  {
326
    my $name = substr( $thread, 0, 65 );
326
    my $name = substr( $thread, 0, 65 );
327
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
327
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
328
      $threads{$thread}{count};
328
      $threads{$thread}{count};
329
    last if ( ++$i == $count );
329
    last if ( ++$i == $count );
330
  }
330
  }
331
  print "\n", "=" x 76, "\n";
331
  print "\n", "=" x 76, "\n";
332
}
332
}
333
333
334
##############################
334
##############################
335
## Show threads by size in KiB
335
## Show threads by size in KiB
336
##############################
336
##############################
337
unless ( $skipSec{6} )
337
unless ( $skipSec{6} )
338
{
338
{
339
  if ( keys %threads < $topthreads )
339
  if ( keys %threads < $topthreads )
340
  {
340
  {
341
    $count = keys %threads;
341
    $count = keys %threads;
342
  }
342
  }
343
  else
343
  else
344
  {
344
  {
345
    $count = $topthreads;
345
    $count = $topthreads;
346
  }
346
  }
347
  printf "%s\n", &centred( "Top $count threads by size in KiB", 76 );
347
  printf "%s\n", &centred( "Top $count threads by size in KiB", 76 );
348
  print "=" x 76, "\n";
348
  print "=" x 76, "\n";
349
  $i = 0;
349
  $i = 0;
350
  foreach my $thread (
350
  foreach my $thread (
351
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
351
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
352
    keys %threads
352
    keys %threads
353
    )
353
    )
354
  {
354
  {
355
    my $name = substr( $thread, 0, 65 );
355
    my $name = substr( $thread, 0, 65 );
356
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
356
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
357
      $threads{$thread}{size} / 1024;    #/
357
      $threads{$thread}{size} / 1024;    #/
358
    last if ( ++$i == $count );
358
    last if ( ++$i == $count );
359
  }
359
  }
360
  print "\n", "=" x 76, "\n";
360
  print "\n", "=" x 76, "\n";
361
}
361
}
362
362
363
##################################
363
##################################
364
## Show top 10 cross-posted groups
364
## Show top 10 cross-posted groups
365
##################################
365
##################################
366
unless ( $skipSec{7} )
366
unless ( $skipSec{7} )
367
{
367
{
368
  delete $crossposts{"$newsgroup_name"};    # don't include ours
368
  delete $crossposts{"$newsgroup_name"};    # don't include ours
369
  if ( keys %crossposts < $topcrossposts )
369
  if ( keys %crossposts < $topcrossposts )
370
  {
370
  {
371
    $count = keys %crossposts;
371
    $count = keys %crossposts;
372
  }
372
  }
373
  else
373
  else
374
  {
374
  {
375
    $count = $topcrossposts;
375
    $count = $topcrossposts;
376
  }
376
  }
377
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
377
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
378
  print "=" x 76, "\n";
378
  print "=" x 76, "\n";
379
  $i = 0;
379
  $i = 0;
380
  foreach
380
  foreach
381
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
381
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
382
  {
382
  {
383
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
383
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
384
      $crossposts{$name};
384
      $crossposts{$name};
385
    last if ( ++$i == $count );
385
    last if ( ++$i == $count );
386
  }
386
  }
387
  print "\n", "=" x 76, "\n";
387
  print "\n", "=" x 76, "\n";
388
}
388
}
389
389
390
#########################
390
#########################
391
## Show agents and counts
391
## Show agents and counts
392
#########################
392
#########################
393
unless ( $skipSec{8} )
393
unless ( $skipSec{8} )
394
{
394
{
395
  if ( keys %agents < $topagents )
395
  if ( keys %agents < $topagents )
396
  {
396
  {
397
    $count = keys %agents;
397
    $count = keys %agents;
398
  }
398
  }
399
  else
399
  else
400
  {
400
  {
401
    $count = $topagents;
401
    $count = $topagents;
402
  }
402
  }
403
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
403
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
404
  print "=" x 76, "\n";
404
  print "=" x 76, "\n";
405
  $i = 0;
405
  $i = 0;
406
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
406
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
407
  {
407
  {
408
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
408
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
409
      $agents{$agent};
409
      $agents{$agent};
410
    last if ( ++$i == $count );
410
    last if ( ++$i == $count );
411
  }
411
  }
412
  print "\n", "=" x 76, "\n";
412
  print "\n", "=" x 76, "\n";
413
}
413
}
414
414
415
#######################
415
#######################
416
## Show distinct agents
416
## Show distinct agents
417
#######################
417
#######################
418
unless ( $skipSec{9} )
418
unless ( $skipSec{9} )
419
{
419
{
420
  if ( keys %distinct_agent < $topagents )
420
  if ( keys %distinct_agent < $topagents )
421
  {
421
  {
422
    $count = keys %distinct_agent;
422
    $count = keys %distinct_agent;
423
  }
423
  }
424
  else
424
  else
425
  {
425
  {
426
    $count = $topagents;
426
    $count = $topagents;
427
  }
427
  }
428
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
428
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
429
  print "=" x 76, "\n";
429
  print "=" x 76, "\n";
430
  $i = 0;
430
  $i = 0;
431
  foreach my $agent (
431
  foreach my $agent (
432
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
432
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
433
    keys %distinct_agent
433
    keys %distinct_agent
434
    )
434
    )
435
  {
435
  {
436
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
436
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
437
      $distinct_agent{$agent},
437
      $distinct_agent{$agent},
438
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
438
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
439
    last if ( ++$i == $count );
439
    last if ( ++$i == $count );
440
  }
440
  }
441
  print "\n", "=" x 76, "\n";
441
  print "\n", "=" x 76, "\n";
442
}
442
}
443
443
444
############################
444
############################
445
## Show timezones and counts
445
## Show timezones and counts
446
############################
446
############################
447
unless ( $skipSec{10} )
447
unless ( $skipSec{10} )
448
{
448
{
449
  if ( keys %tz < $toptz )
449
  if ( keys %tz < $toptz )
450
  {
450
  {
451
    $count = keys %tz;
451
    $count = keys %tz;
452
  }
452
  }
453
  else
453
  else
454
  {
454
  {
455
    $count = $toptz;
455
    $count = $toptz;
456
  }
456
  }
457
  printf "%s\n", &centred( "Top 10 time zones", 76 );
457
  printf "%s\n", &centred( "Top 10 time zones", 76 );
458
  print "=" x 76, "\n";
458
  print "=" x 76, "\n";
459
  $i = 0;
459
  $i = 0;
460
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
460
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
461
  {
461
  {
462
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
462
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
463
    last if ( ++$i == $count );
463
    last if ( ++$i == $count );
464
  }
464
  }
465
  print "\n", "=" x 76, "\n";
465
  print "\n", "=" x 76, "\n";
466
}
466
}
467
467
468
################################ SUBROUTINES ################################
468
################################ SUBROUTINES ################################
469
469
470
########################################
470
########################################
471
## Get current article's header and body
471
## Get current article's header and body
472
########################################
472
########################################
473
sub get_article
473
sub get_article
474
{
474
{
475
  %headers = ();    # dump old headers
475
  %headers = ();    # dump old headers
476
  my $filename = shift;    # get the name of the file
476
  my $filename = shift;    # get the name of the file
477
477
478
  ## get stats about the file itself
478
  ## get stats about the file itself
479
  $filesize = -s $filename;    # get total size of file
479
  $filesize = -s $filename;    # get total size of file
480
  $totsize += $filesize;       # bump total sizes of all files
480
  $totsize += $filesize;       # bump total sizes of all files
481
481
482
  my $mtime = ( stat $filename )[9];
482
  my $mtime = ( stat $filename )[9];
483
  if ( $mtime < $earliest )
483
  if ( $mtime < $earliest )
484
  {
484
  {
485
    $earliest = $mtime;
485
    $earliest = $mtime;
486
  }
486
  }
487
  elsif ( $mtime > $latest )
487
  elsif ( $mtime > $latest )
488
  {
488
  {
489
    $latest = $mtime;
489
    $latest = $mtime;
490
  }
490
  }
491
491
492
  ## now read the file
492
  ## now read the file
493
  open( my $FILE, '<', $filename ) or die "Can't open $filename: $!\n";
493
  open( my $FILE, '<', $filename ) or die "Can't open $filename: $!\n";
494
  while (<$FILE>)
494
  while (<$FILE>)
495
  {
495
  {
496
    $totheader += length($_);    # bump total header size
496
    $totheader += length($_);    # bump total header size
497
    last if (/^\s*$/);           # end of header?
497
    last if (/^\s*$/);           # end of header?
498
    if (/^([^:\s]*):\s*(.*)/)
498
    if (/^([^:\s]*):\s*(.*)/)
499
    {
499
    {
500
      my ( $key, $val ) = ( $1, $2 );
500
      my ( $key, $val ) = ( $1, $2 );
501
      $headers{$key} = decode( 'MIME-Header', $val );
501
      $headers{$key} = decode( 'MIME-Header', $val );
502
      $lcheader{ clean( lc($key) ) } = clean($val);
502
      $lcheader{ clean( lc($key) ) } = clean($val);
503
    }
503
    }
504
  }
504
  }
505
  @body = <$FILE>;               # slurp up body
505
  @body = <$FILE>;               # slurp up body
506
  close($FILE);
506
  close($FILE);
507
}    # get_article
507
}    # get_article
508
508
509
####################################
509
####################################
510
## Get data from the current article
510
## Get data from the current article
511
####################################
511
####################################
512
sub get_data
512
sub get_data
513
{
513
{
514
#### First, analyse header fields ####
514
#### First, analyse header fields ####
515
515
516
  ## Set up this poster if not defined, get counts, sizes
516
  ## Set up this poster if not defined, get counts, sizes
517
  my $poster = $headers{From};    # get the poster's name
517
  my $poster = $headers{From};    # get the poster's name
518
 
518
 
519
  # Convert old to new format
519
  # Convert old to new format
520
  $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
520
  $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
521
 
521
 
522
  # Collapse whitespace
522
  # Collapse whitespace
523
  $poster =~ s/\s+/ /;
523
  $poster =~ s/\s+/ /;
524
 
524
 
525
  # Remove outer quotes
525
  # Remove outer quotes
526
  $poster =~ s/^["'](.+?)["']\s+(.*)/$1 $2/;
526
  $poster =~ s/^["'](.+?)["']\s+(.*)/$1 $2/;
527
 
527
 
528
  if ( !defined( $data{$poster} ) )
528
  if ( !defined( $data{$poster} ) )
529
  {                               # seen this one before?
529
  {                               # seen this one before?
530
    $data{$poster}{agent}  = 'Unknown';    # comes after For: field
530
    $data{$poster}{agent}  = 'Unknown';    # comes after For: field
531
    $data{$poster}{orig}   = 0;
531
    $data{$poster}{orig}   = 0;
532
    $data{$poster}{quoted} = 0;
532
    $data{$poster}{quoted} = 0;
533
  }
533
  }
534
  $data{$poster}{count}++;                 # bump count for this poster
534
  $data{$poster}{count}++;                 # bump count for this poster
535
  $data{$poster}{size} += $filesize;       # total size of file
535
  $data{$poster}{size} += $filesize;       # total size of file
536
536
537
  ## The User-Agent and/or X-Newsreader fields
537
  ## The User-Agent and/or X-Newsreader fields
538
  ## for User-Agent by poster
538
  ## for User-Agent by poster
539
  if ( defined $lcheader{"user-agent"} )
539
  if ( defined $lcheader{"user-agent"} )
540
  {
540
  {
541
    $data{$poster}{agent} = $lcheader{"user-agent"};
541
    $data{$poster}{agent} = $lcheader{"user-agent"};
542
  }
542
  }
543
  if ( defined $lcheader{"x-newsreader"} )
543
  if ( defined $lcheader{"x-newsreader"} )
544
  {
544
  {
545
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
545
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
546
  }
546
  }
547
547
548
  ## The User Agent for User-Agent by number of posts
548
  ## The User Agent for User-Agent by number of posts
549
  my $UA = "unknown";
549
  my $UA = "unknown";
550
  foreach my $keys ( keys %lcheader )
550
  foreach my $keys ( keys %lcheader )
551
  {
551
  {
552
    if ( defined $lcheader{'user-agent'} )
552
    if ( defined $lcheader{'user-agent'} )
553
    {
553
    {
554
      $UA = $lcheader{'user-agent'};
554
      $UA = $lcheader{'user-agent'};
555
    }
555
    }
556
    elsif ( defined $lcheader{"x-newsreader"} )
556
    elsif ( defined $lcheader{"x-newsreader"} )
557
    {
557
    {
558
      $UA = $lcheader{"x-newsreader"};
558
      $UA = $lcheader{"x-newsreader"};
559
    }
559
    }
560
    elsif ( defined $lcheader{'x-mailer'} )
560
    elsif ( defined $lcheader{'x-mailer'} )
561
    {
561
    {
562
      $UA = $lcheader{'x-mailer'};
562
      $UA = $lcheader{'x-mailer'};
563
    }
563
    }
564
    elsif (
564
    elsif (
565
      ( defined $lcheader{'organization'} )
565
      ( defined $lcheader{'organization'} )
566
      && ( $lcheader{'organization'} =~
566
      && ( $lcheader{'organization'} =~
567
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
567
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
568
      )
568
      )
569
    {
569
    {
570
      $UA = $lcheader{'organization'};
570
      $UA = $lcheader{'organization'};
571
    }
571
    }
572
    elsif ( $lcheader{'message-id'} =~ /pine/i )
572
    elsif ( $lcheader{'message-id'} =~ /pine/i )
573
    {
573
    {
574
      $UA = "Pine";
574
      $UA = "Pine";
575
    }    ## Hopefully found UA, else set to unknown
575
    }    ## Hopefully found UA, else set to unknown
576
  }
576
  }
577
577
578
  $UA = clean($UA);
578
  $UA = clean($UA);
579
  $UA = get_agent($UA);
579
  $UA = get_agent($UA);
580
580
581
  sub get_agent
581
  sub get_agent
582
  {
582
  {
583
    my $raw   = shift;
583
    my $raw   = shift;
584
    my $agent = $raw;
584
    my $agent = $raw;
585
585
586
    ## strip http
586
    ## strip http
587
    if ( $raw =~ /.*http.*/ )
587
    if ( $raw =~ /.*http.*/ )
588
    {
588
    {
589
      $raw =~ s!posted via!!i;
589
      $raw =~ s!posted via!!i;
590
      $raw =~ s!http://!!g;
590
      $raw =~ s!http://!!g;
591
      $raw =~ s!/!!g;
591
      $raw =~ s!/!!g;
592
      $raw =~ s! !!g;
592
      $raw =~ s! !!g;
593
    }
593
    }
594
594
595
    ## Fix Outlook from Mac
595
    ## Fix Outlook from Mac
596
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
596
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
597
597
598
    ## Pick out the popular agents
598
    ## Pick out the popular agents
599
    if (
599
    if (
600
           $raw =~ /(outlook express)/i
600
           $raw =~ /(outlook express)/i
601
        || $raw =~ /(windows mail)/i
601
        || $raw =~ /(windows mail)/i
602
        || $raw =~ /(microplanet gravity)/i
602
        || $raw =~ /(microplanet gravity)/i
603
        || $raw =~ /(news rover)/i
603
        || $raw =~ /(news rover)/i
604
        || $raw =~ /(forte agent)/i
604
        || $raw =~ /(forte agent)/i
605
        || $raw =~ /(forte free agent)/i
605
        || $raw =~ /(forte free agent)/i
606
      )
606
      )
607
    {
607
    {
608
      $agent = $1;
608
      $agent = $1;
609
    }
609
    }
610
    elsif (
610
    elsif (
611
      $raw =~ /^(
611
      $raw =~ /^(
612
        pan
612
        pan
613
       |sylpheed
613
       |sylpheed
614
       |slrn
614
       |slrn
615
       |mozilla
615
       |mozilla
616
       |knode
616
       |knode
617
       |tin
617
       |tin
618
       |hamster
618
       |hamster
619
       |xrn
619
       |xrn
620
       |xnews
620
       |xnews
621
       |aol
621
       |aol
622
       |gnus
622
       |gnus
623
       |krn
623
       |krn
624
       |macsoup
624
       |macsoup
625
       |messenger
625
       |messenger
626
       |openxp
626
       |openxp
627
       |pine
627
       |pine
628
       |thoth
628
       |thoth
629
       |turnpike
629
       |turnpike
630
       |winvn
630
       |winvn
631
       |vsoup
631
       |vsoup
632
       |google
632
       |google
633
       |supernews
633
       |supernews
634
       |nn
634
       |nn
635
       |rn
635
       |rn
636
       |007
636
       |007
637
       |webtv
637
       |webtv
638
       |compuserve
638
       |compuserve
639
       )/ix
639
       )/ix
640
      )
640
      )
641
    {
641
    {
642
      $agent = $1;
642
      $agent = $1;
643
    }
643
    }
644
    else
644
    else
645
    {
645
    {
646
      ## Clean up unknown agents
646
      ## Clean up unknown agents
647
      if ( $raw =~ m!^(.*?)/! )
647
      if ( $raw =~ m!^(.*?)/! )
648
      {
648
      {
649
        $agent = $1;
649
        $agent = $1;
650
      }
650
      }
651
      elsif ( $raw =~ /^(\w*)\d.*/ )
651
      elsif ( $raw =~ /^(\w*)\d.*/ )
652
      {
652
      {
653
        $agent = $1;
653
        $agent = $1;
654
      }
654
      }
655
    }
655
    }
656
656
657
    $distinct_agent{$agent}++;
657
    $distinct_agent{$agent}++;
658
    return $agent;
658
    return $agent;
659
  }
659
  }
660
660
661
  ## Get all cross-posted newsgroups
661
  ## Get all cross-posted newsgroups
662
  for ( split /,/, $headers{"Newsgroups"} )
662
  for ( split /,/, $headers{"Newsgroups"} )
663
  {
663
  {
664
    $crossposts{$_}++;    # bump count for each
664
    $crossposts{$_}++;    # bump count for each
665
  }
665
  }
666
666
667
  ## Get threads
667
  ## Get threads
668
  my $thread = $headers{"Subject"};
668
  my $thread = $headers{"Subject"};
669
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
669
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
670
  $thread =~ s/\s+/ /g;     # collapse whitespace
670
  $thread =~ s/\s+/ /g;     # collapse whitespace
671
  $threads{$thread}{count} += 1;            # bump count of this subject
671
  $threads{$thread}{count} += 1;            # bump count of this subject
672
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
672
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
673
673
674
  ## Is this an original post or a reply?
674
  ## Is this an original post or a reply?
675
  if ( defined $headers{"References"} )
675
  if ( defined $headers{"References"} )
676
  {
676
  {
677
    $replies++;
677
    $replies++;
678
  }
678
  }
679
  else
679
  else
680
  {
680
  {
681
    $origposts++;
681
    $origposts++;
682
  }
682
  }
683
683
684
  ## Get the time zone
684
  ## Get the time zone
685
  $_ = $headers{"Date"};
685
  $_ = $headers{"Date"};
686
  my ($tz) = /\d\d:\d\d(?::\d\d)?\s+(.*)/;
686
  my ($tz) = /\d\d:\d\d(?::\d\d)?\s+(.*)/;
687
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
687
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
688
  {
688
  {
689
    $tz = "UTC";
689
    $tz = "UTC";
690
  }
690
  }
691
  $tz{$tz}++;
691
  $tz{$tz}++;
692
692
693
#### Now analyse the body text ####
693
#### Now analyse the body text ####
694
  my $insig = 0;
694
  my $insig = 0;
695
  for (@body)
695
  for (@body)
696
  {
696
  {
697
    $totbody += length($_);    # bump total body size
697
    $totbody += length($_);    # bump total body size
698
    next if (/^$>/);           # don't count blank lines in body
698
    next if (/^$>/);           # don't count blank lines in body
699
    if ( $insig == 1 )
699
    if ( $insig == 1 )
700
    {
700
    {
701
      $totsig += length($_);    # bump total sig size
701
      $totsig += length($_);    # bump total sig size
702
702
703
      ## Bill Unruh uses ] quotes, and another poster uses ::
703
      ## Bill Unruh uses ] quotes, and another poster uses ::
704
    }
704
    }
705
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
705
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
706
    {                           # are we in a quote line?
706
    {                           # are we in a quote line?
707
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
707
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
708
      $totquoted += length($_);
708
      $totquoted += length($_);
709
    }
709
    }
710
    elsif (/-- /)
710
    elsif (/-- /)
711
    {
711
    {
712
      $insig = 1;
712
      $insig = 1;
713
    }
713
    }
714
    else
714
    else
715
    {
715
    {
716
716
717
      ## We must be processing an original line
717
      ## We must be processing an original line
718
      $data{$poster}{orig} += length($_);      # bump count of original chrs
718
      $data{$poster}{orig} += length($_);      # bump count of original chrs
719
      $totorig += length($_);
719
      $totorig += length($_);
720
    }
720
    }
721
  }    # end for (@body)
721
  }    # end for (@body)
722
722
723
}    # get_data
723
}    # get_data
724
724
725
#########################################
725
#########################################
726
## Count the User-Agents used, collapsing
726
## Count the User-Agents used, collapsing
727
## different versions into one per agent.
727
## different versions into one per agent.
728
#########################################
728
#########################################
729
sub count_agents
729
sub count_agents
730
{
730
{
731
POSTER:
731
POSTER:
732
  foreach my $poster ( keys %data )
732
  foreach my $poster ( keys %data )
733
  {
733
  {
734
    foreach my $agent_name ( keys %distinct_agent )
734
    foreach my $agent_name ( keys %distinct_agent )
735
    {    # check against known ones
735
    {    # check against known ones
736
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
736
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
737
      {
737
      {
738
        $agents{$agent_name}++;
738
        $agents{$agent_name}++;
739
        next POSTER;
739
        next POSTER;
740
      }
740
      }
741
    }
741
    }
742
    $agents{ $data{$poster}{agent} }++;
742
    $agents{ $data{$poster}{agent} }++;
743
  }
743
  }
744
}    # count_agents
744
}    # count_agents
745
745
746
#############################################
746
#############################################
747
## Set orig/total percentages for all posters
747
## Set orig/total percentages for all posters
748
#############################################
748
#############################################
749
sub fix_percent
749
sub fix_percent
750
{
750
{
751
  foreach my $poster ( keys %data )
751
  foreach my $poster ( keys %data )
752
  {
752
  {
753
    my $percent = 100;
753
    my $percent = 100;
754
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
754
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
755
    {
755
    {
756
      $percent =
756
      $percent =
757
        $data{$poster}{orig} * 100 /
757
        $data{$poster}{orig} * 100 /
758
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
758
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
759
    }
759
    }
760
    elsif ( $data{$poster}{orig} == 0 )
760
    elsif ( $data{$poster}{orig} == 0 )
761
    {
761
    {
762
      $percent = 0;
762
      $percent = 0;
763
    }
763
    }
764
    $data{$poster}{percent} = $percent;
764
    $data{$poster}{percent} = $percent;
765
  }
765
  }
766
}
766
}
767
767
768
###############################
768
###############################
769
## Right pad a string with '.'s
769
## Right pad a string with '.'s
770
###############################
770
###############################
771
sub rpad
771
sub rpad
772
{
772
{
773
  ## Get text to pad, length to pad, pad chr
773
  ## Get text to pad, length to pad, pad chr
774
  my ( $text, $pad_len, $pad_chr ) = @_;
774
  my ( $text, $pad_len, $pad_chr ) = @_;
775
775
776
  ## DEBUG
776
  ## DEBUG
777
#printf "|%s| = %d\n", $text, length($text);
777
#printf "|%s| = %d\n", $text, length($text);
778
778
779
  if ( length($text) > $pad_len )
779
  if ( length($text) > $pad_len )
780
  {
780
  {
781
    $text = substr( $text, 0, $pad_len );
781
    $text = substr( $text, 0, $pad_len );
782
  }
782
  }
783
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
783
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
784
  return $padded;
784
  return $padded;
785
}
785
}
786
786
787
##################
787
##################
788
## Centre a string
788
## Centre a string
789
##################
789
##################
790
sub centred
790
sub centred
791
{
791
{
792
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
792
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
793
  my $pad_len = ( $width - length($text) ) / 2;    #/
793
  my $pad_len = ( $width - length($text) ) / 2;    #/
794
  my $centred = " " x $pad_len . $text;
794
  my $centred = " " x $pad_len . $text;
795
  return $centred;
795
  return $centred;
796
}
796
}
797
797
798
###########################
798
###########################
799
## Put commas into a number
799
## Put commas into a number
800
###########################
800
###########################
801
sub commify
801
sub commify
802
{
802
{
803
  local $_ = shift;
803
  local $_ = shift;
804
  1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
804
  1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
805
  return $_;
805
  return $_;
806
}
806
}
807
807
808
################################################################
808
################################################################
809
## Returns a string with leading and trailing whitespace removed
809
## Returns a string with leading and trailing whitespace removed
810
################################################################
810
################################################################
811
sub clean
811
sub clean
812
{
812
{
813
  my $dirty = shift;
813
  my $dirty = shift;
814
  my $clean = $dirty;
814
  my $clean = $dirty;
815
  $clean =~ s/^\s*|\s*$//g;
815
  $clean =~ s/^\s+|\s+$//g;
816
816
817
  return $clean;
817
  return $clean;
818
}
818
}
819
819
820
sub usage
820
sub usage
821
{
821
{
822
  print "usage: newstat.pl newsgroupname\n";
822
  print "usage: newstat.pl newsgroupname\n";
823
  exit 1;
823
  exit 1;
824
}
824
}
825
825
826
##################################
826
##################################
827
## Write data structures to a file
827
## Write data structures to a file
828
##################################
828
##################################
829
sub write_data
829
sub write_data
830
{
830
{
831
  open my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA"
831
  open my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA"
832
    or die "Can't create XDATA: $!\n";
832
    or die "Can't create XDATA: $!\n";
833
  print $OUTF "Data collected from $newsgroup_name\n\n";
833
  print $OUTF "Data collected from $newsgroup_name\n\n";
834
  print $OUTF
834
  print $OUTF
835
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
835
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
836
  foreach my $name ( keys %data )
836
  foreach my $name ( keys %data )
837
  {
837
  {
838
    print $OUTF
838
    print $OUTF
839
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
839
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
840
  }
840
  }
841
  print $OUTF
841
  print $OUTF
842
"============================================================================\n";
842
"============================================================================\n";
843
  print $OUTF "Thread subjects\n";
843
  print $OUTF "Thread subjects\n";
844
  print $OUTF
844
  print $OUTF
845
"----------------------------------------------------------------------------\n";
845
"----------------------------------------------------------------------------\n";
846
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
846
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
847
  {
847
  {
848
    print $OUTF
848
    print $OUTF
849
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
849
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
850
  }
850
  }
851
  print $OUTF
851
  print $OUTF
852
"============================================================================\n";
852
"============================================================================\n";
853
  print $OUTF "Cross-posts\n";
853
  print $OUTF "Cross-posts\n";
854
  print $OUTF
854
  print $OUTF
855
"----------------------------------------------------------------------------\n";
855
"----------------------------------------------------------------------------\n";
856
  foreach my $name ( sort keys %crossposts )
856
  foreach my $name ( sort keys %crossposts )
857
  {
857
  {
858
    print $OUTF "$name : $crossposts{$name}\n";
858
    print $OUTF "$name : $crossposts{$name}\n";
859
  }
859
  }
860
  print $OUTF
860
  print $OUTF
861
"============================================================================\n";
861
"============================================================================\n";
862
  print $OUTF "User agents\n";
862
  print $OUTF "User agents\n";
863
  print $OUTF
863
  print $OUTF
864
"----------------------------------------------------------------------------\n";
864
"----------------------------------------------------------------------------\n";
865
  foreach my $name ( sort keys %agents )
865
  foreach my $name ( sort keys %agents )
866
  {
866
  {
867
    print $OUTF "$name : $agents{$name}\n";
867
    print $OUTF "$name : $agents{$name}\n";
868
  }
868
  }
869
  print $OUTF
869
  print $OUTF
870
"============================================================================\n";
870
"============================================================================\n";
871
  print $OUTF "Time zones\n";
871
  print $OUTF "Time zones\n";
872
  print $OUTF
872
  print $OUTF
873
"----------------------------------------------------------------------------\n";
873
"----------------------------------------------------------------------------\n";
874
  foreach my $name ( sort keys %tz )
874
  foreach my $name ( sort keys %tz )
875
  {
875
  {
876
    print $OUTF "$name : $tz{$name}\n";
876
    print $OUTF "$name : $tz{$name}\n";
877
  }
877
  }
878
  close $OUTF;
878
  close $OUTF;
879
}    # write_data
879
}    # write_data
880
 
880