Subversion Repositories LCARS

Rev

Rev 6 | Rev 8 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 6 Rev 7
Line 1... Line 1...
1
#!/usr/bin/perl -w
1
#!/usr/bin/perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
use utf8;
4
use utf8;
5
use Encode;
5
use Encode;
6
6
7
#########################
7
#########################
8
# newsstat.pl version 0.4
8
# newsstat.pl version 0.4
9
9
10
############################################################################
10
############################################################################
11
# Collect statistics about a newsgroup (specified by first argument) in
11
# Collect statistics about a newsgroup (specified by first argument) in
12
# the local news spool. Check all articles in the last 30-day period. 
12
# the local news spool. Check all articles in the last 30-day period.
13
# Rank posters by number of posts and by volume of posts, report on top and
13
# Rank posters by number of posts and by volume of posts, report on top and
14
# bottom 20 posters. Show their name, number of posts, size of posts,
14
# bottom 20 posters. Show their name, number of posts, size of posts,
15
# percentage of quoted lines. Rank user-agents used, by poster rather than
15
# percentage of quoted lines. Rank user-agents used, by poster rather than
16
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
16
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
17
# 
17
#
18
# (Numbers and paths can be configured below.  -- PE)
18
# (Numbers and paths can be configured below.  -- PE)
19
############################################################################
19
############################################################################
20
20
21
############################################################################
21
############################################################################
22
#                       RECENT CHANGES                                     #
22
#                       RECENT CHANGES                                     #
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
24
#                 - Use warnings, utf8 (just in case) 
24
#                 - Use warnings, utf8 (just in case)
25
#                 - Documentation update
25
#                 - Documentation update
26
# N/A         NN  - Take newsgroup name as argument
26
# N/A         NN  - Take newsgroup name as argument
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
28
#                 - Allow command line flags for subtracting
28
#                 - Allow command line flags for subtracting
29
#                   output if not pertinent for a group
29
#                   output if not pertinent for a group
Line 37... Line 37...
37
# 2002-11-05  NN  - moved user configurations to the top
37
# 2002-11-05  NN  - moved user configurations to the top
38
#                 - fixed the cross-posting section
38
#                 - fixed the cross-posting section
39
#                 - introduced the $newsgroup_name variable which
39
#                 - introduced the $newsgroup_name variable which
40
#                   later becomes $news$group
40
#                   later becomes $news$group
41
#                 - changed $name to $agent_name in countagents()
41
#                 - changed $name to $agent_name in countagents()
42
# 
42
#
43
# Contributors
43
# Contributors
44
# -------------
44
# -------------
45
# NN  Nomen nominandum (name to be determined later)
45
# NN  Nomen nominandum (name to be determined later)
46
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
46
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
47
47
Line 73... Line 73...
73
my $ix;
73
my $ix;
74
my $j;
74
my $j;
75
my %skipSec;
75
my %skipSec;
76
my @skiplist;
76
my @skiplist;
77
my $args = @ARGV;
77
my $args = @ARGV;
78
for ( $ix = 1 ; $ix < $args ; $ix++ ) {
78
for ( $ix = 1 ; $ix < $args ; $ix++ )
-
 
79
{
79
       $j = $ix + 1;
80
  $j = $ix + 1;
80
       if ( $ARGV[$ix] eq "-x" ) {
81
  if ( $ARGV[$ix] eq "-x" )
-
 
82
  {
81
               @skiplist = split(",",$ARGV[$j]);
83
    @skiplist = split( ",", $ARGV[$j] );
-
 
84
  }
82
       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
85
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
-
 
86
  {
83
               @skiplist = split(",",$1);
87
    @skiplist = split( ",", $1 );
84
       }
88
  }
85
}
89
}
86
foreach(@skiplist) {
90
foreach (@skiplist)
-
 
91
{
87
  $skipSec{$_} = 1;
92
  $skipSec{$_} = 1;
88
}
93
}
89
94
90
# Leafnode users will want /var/spool/news for this variable.
95
# Leafnode users will want /var/spool/news for this variable.
91
my $news = "/var/spool/news/";
96
my $news = "/var/spool/news/";
Line 106... Line 111...
106
my $topcrossposts = 10;
111
my $topcrossposts = 10;
107
112
108
# no. of time zones to show
113
# no. of time zones to show
109
my $toptz = 10;
114
my $toptz = 10;
110
115
111
-
 
112
-
 
113
###################### DATA STRUCTURES ######################
116
###################### DATA STRUCTURES ######################
114
my $group = $newsgroup_name;
117
my $group = $newsgroup_name;
115
$group =~ s!\.!/!g;
118
$group =~ s!\.!/!g;
116
my %data;                               # name, count, agent, total, orig, quoted
119
my %data;          # name, count, agent, total, orig, quoted
117
my %threads;                            # subject, count
120
my %threads;       # subject, count
118
my %crossposts;                         # group, count
121
my %crossposts;    # group, count
119
my %tz;                                 # timezones by count
122
my %tz;            # timezones by count
120
my %headers;                            # holds header of current article
123
my %headers;       # holds header of current article
121
my %lcheader;                           # holds lowercase headers
124
my %lcheader;      # holds lowercase headers
122
my @body;                               # holds body of current article
125
my @body;          # holds body of current article
123
my @sig;                                # holds sig text;
126
my @sig;           # holds sig text;
124
my $totalposts;                         # total no. of posts considered
127
my $totalposts;    # total no. of posts considered
125
my $filename;                           # name of current article file
128
my $filename;      # name of current article file
126
my $filesize;                           # size of current article file
129
my $filesize;      # size of current article file
127
my $earliest;                           # earliest article we have found
130
my $earliest;      # earliest article we have found
128
my $latest;                             # latest article we have found
131
my $latest;        # latest article we have found
129
my $poster;                             # poster we are dealing with
132
my $poster;        # poster we are dealing with
130
my $totsize = 0;                        # holds total sizes of all files
133
my $totsize   = 0; # holds total sizes of all files
131
my $totheader = 0;                      # total size of header material
134
my $totheader = 0; # total size of header material
132
my $totbody = 0;                        # total size of body material
135
my $totbody   = 0; # total size of body material
133
my $totsig = 0;                         # total size of sig material
136
my $totsig    = 0; # total size of sig material
134
my $totorig = 0;                        # total size of original material
137
my $totorig   = 0; # total size of original material
135
my $totquoted = 0;                      # total size of quoted material
138
my $totquoted = 0; # total size of quoted material
136
my $origposts = 0;                      # total no. of original posts
139
my $origposts = 0; # total no. of original posts
137
my $replies = 0;                        # total no. of replies
140
my $replies   = 0; # total no. of replies
138
my $i;                                  # general purpose
141
my $i;             # general purpose
139
my %distinct_agent;
142
my %distinct_agent;
140
my %agents =                            # used to hold counts of User Agents used
143
my %agents =       # used to hold counts of User Agents used
-
 
144
  (
141
            (  "KNode"                         => 0,
145
  "KNode"                     => 0,
142
               "Pan"                           => 0,
146
  "Pan"                       => 0,
143
               "Mozilla"                       => 0,
147
  "Mozilla"                   => 0,
144
               "Sylpheed"                      => 0,
148
  "Sylpheed"                  => 0,
145
               "Gnus"                          => 0,
149
  "Gnus"                      => 0,
146
               "Forte Agent"                   => 0,
150
  "Forte Agent"               => 0,
147
               "Forte Free Agent"              => 0,
151
  "Forte Free Agent"          => 0,
148
               "MicroPlanet Gravity"           => 0,
152
  "MicroPlanet Gravity"       => 0,
149
               "Microsoft Outlook Express"     => 0,
153
  "Microsoft Outlook Express" => 0,
150
               "Xnews"                         => 0,
154
  "Xnews"                     => 0,
151
               "slrn"                          => 0,
155
  "slrn"                      => 0,
152
               "tin"                           => 0,
156
  "tin"                       => 0,
153
               "rn"                            => 0,
157
  "rn"                        => 0,
154
               "NN"                            => 0,
158
  "NN"                        => 0,
155
               "MacSOUP"                       => 0,
159
  "MacSOUP"                   => 0,
156
               "Foorum"                        => 0,
160
  "Foorum"                    => 0,
157
               "MT-NewsWatcher"                => 0,
161
  "MT-NewsWatcher"            => 0,
158
               "News Rover"                    => 0,
162
  "News Rover"                => 0,
159
               "WebTV"                         => 0,
163
  "WebTV"                     => 0,
160
               "Compuserver"                   => 0,
164
  "Compuserver"               => 0,
161
               "VSoup"                         => 0);
165
  "VSoup"                     => 0
-
 
166
  );
162
167
163
######################## MAIN CODE ########################
168
######################## MAIN CODE ########################
164
$! = 1;
169
$! = 1;
165
170
166
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
171
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
167
opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
172
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
168
while (defined($filename = readdir(DIR))) {
173
while ( defined( $filename = readdir(DIR) ) )
-
 
174
{
169
 %lcheader = ();
175
  %lcheader = ();
170
 next unless -f $filename;             # only want real files
176
  next unless -f $filename;    # only want real files
171
 next if ($filename eq ".overview");   # real articles only
177
  next if ( $filename eq ".overview" );    # real articles only
172
 next if (-M $filename > $numdays);    # only want articles <= a certain age
178
  next if ( -M $filename > $numdays );     # only want articles <= a certain age
173
 $earliest = (stat $filename)[9] unless defined ($earliest);
179
  $earliest = ( stat $filename )[9] unless defined($earliest);
174
 $latest   = (stat $filename)[9] unless defined ($latest);
180
  $latest   = ( stat $filename )[9] unless defined($latest);
175
 &getarticle($filename);               # read in the article
181
  &getarticle($filename);                  # read in the article
176
 &getdata;                             # grab the data from the article
182
  &getdata;                                # grab the data from the article
177
 $totalposts++;                        # bump count of articles considered
183
  $totalposts++;                           # bump count of articles considered
178
}
184
}
179
closedir(DIR);                          # finished with the directory
185
closedir(DIR);                             # finished with the directory
-
 
186
180
# post-processing
187
# post-processing
181
&countagents;                           # count agents, collapsing versions
188
&countagents;    # count agents, collapsing versions
182
&fixpercent;                            # check percentages orig/total for posters
189
&fixpercent;     # check percentages orig/total for posters
183
190
184
&writedata;
191
&writedata;
185
192
186
#################### DISPLAY RESULTS #####################
193
#################### DISPLAY RESULTS #####################
187
print "=" x 76, "\n";
194
print "=" x 76, "\n";
188
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
195
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
189
print "=" x 76, "\n";
196
print "=" x 76, "\n";
-
 
197
printf "%s\n",
190
printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
198
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
191
print "\n\n";
199
print "\n\n";
192
printf "Total posts considered: %s over %d days\n",
200
printf "Total posts considered: %s over %d days\n", commify($totalposts),
193
       commify($totalposts), $numdays;
201
  $numdays;
194
printf "Earliest article: %s\n", scalar localtime($earliest);
202
printf "Earliest article: %s\n",               scalar localtime($earliest);
195
printf "Latest article:   %s\n", scalar localtime($latest);
203
printf "Latest article:   %s\n",               scalar localtime($latest);
196
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
204
printf "Original articles: %s, replies: %s\n", commify($origposts),
-
 
205
  commify($replies);
197
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
206
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
198
       commify(int($totsize / 1024)), $totsize / 1048576; #
207
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
199
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
208
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
200
       commify(int($totalposts / $numdays)),
209
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
201
       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
210
  commify( int( $totsize / $totalposts ) );
202
my $count = keys %data;
211
my $count = keys %data;
203
printf "Total headers: %s KB  bodies: %s KB\n",
212
printf "Total headers: %s KB  bodies: %s KB\n",
204
       commify(int($totheader / 1024)), commify(int($totbody / 1024));
213
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
205
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
214
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
206
       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
215
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
207
       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
216
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
-
 
217
  commify( int( $totsig / 1024 ) );
208
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
218
printf "Total number of posters: %s, average %s bytes per poster\n",
209
       commify(int($totsize / $count)); #/
219
  commify($count), commify( int( $totsize / $count ) );     #/
210
$count = keys %threads;
220
$count = keys %threads;
211
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
221
printf "Total number of threads: %s, average %s bytes per thread\n",
212
       commify(int($totsize / $count)); #/
222
  commify($count), commify( int( $totsize / $count ) );     #/
213
printf "Total number of User-Agents: %d\n", scalar keys %agents;
223
printf "Total number of User-Agents: %d\n", scalar keys %agents;
214
print "\n", "=" x 76, "\n";
224
print "\n", "=" x 76, "\n";
215
225
216
###############################
226
###############################
217
# show posters by article count  Sec 1;
227
# show posters by article count  Sec 1;
218
###############################
228
###############################
219
unless ( $skipSec{1} ) {
229
unless ( $skipSec{1} )
-
 
230
{
220
    if (keys %data < $topposters) {
231
  if ( keys %data < $topposters )
-
 
232
  {
221
      $count = keys %data;
233
    $count = keys %data;
-
 
234
  }
222
    } else {
235
  else
-
 
236
  {
223
      $count = $topposters;
237
    $count = $topposters;
224
    }
238
  }
225
    printf "%s\n", &centred("Top $count posters by number of articles", 76);
239
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
226
    print "=" x 76, "\n";
240
  print "=" x 76, "\n";
227
    $i = 0;
241
  $i = 0;
228
    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
242
  foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
-
 
243
  {
229
    my $name = substr($poster, 0, 65);
244
    my $name = substr( $poster, 0, 65 );
230
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
245
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
-
 
246
      $data{$poster}{count};
231
    last if (++$i == $count);
247
    last if ( ++$i == $count );
232
 }
248
  }
233
 print "\n", "=" x 76, "\n";
249
  print "\n", "=" x 76, "\n";
234
}
250
}
235
251
236
################################
252
################################
237
# show posters by size in Kbytes Sec 2;
253
# show posters by size in Kbytes Sec 2;
238
################################
254
################################
239
unless ( $skipSec{2} ) {
255
unless ( $skipSec{2} )
-
 
256
{
240
  if (keys %data < $topposters) {
257
  if ( keys %data < $topposters )
-
 
258
  {
241
    $count = keys %data;
259
    $count = keys %data;
-
 
260
  }
242
  } else {
261
  else
-
 
262
  {
243
    $count = $topposters;
263
    $count = $topposters;
244
  }
264
  }
245
  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
265
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
246
  print "=" x 76, "\n";
266
  print "=" x 76, "\n";
247
  $i = 0;
267
  $i = 0;
248
  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
268
  foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
-
 
269
  {
249
    my $name = substr($poster, 0, 62);
270
    my $name = substr( $poster, 0, 62 );
250
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
271
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
-
 
272
      $data{$poster}{size} / 1024;    #/
251
    last if (++$i == $count);
273
    last if ( ++$i == $count );
252
  }
274
  }
253
  print "\n", "=" x 76, "\n";
275
  print "\n", "=" x 76, "\n";
254
}
276
}
255
277
256
####################################
278
####################################
257
# show top posters for original text
279
# show top posters for original text
258
####################################
280
####################################
259
unless ( $skipSec{3} ) {
281
unless ( $skipSec{3} )
-
 
282
{
260
   if (keys %data < $topposters) {
283
  if ( keys %data < $topposters )
-
 
284
  {
261
     $count = keys %data;
285
    $count = keys %data;
-
 
286
  }
262
   } else {
287
  else
-
 
288
  {
263
     $count = $topposters;
289
    $count = $topposters;
264
   }
290
  }
-
 
291
  printf "%s\n",
265
   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
292
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
266
   print "=" x 76, "\n";
293
  print "=" x 76, "\n";
267
   $i = 0;
294
  $i = 0;
268
   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
295
  foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
-
 
296
    keys %data )
-
 
297
  {
269
     next if $data{$poster}{quoted} == 0;
298
    next if $data{$poster}{quoted} == 0;
270
     next if $data{$poster}{count} < 5;
299
    next if $data{$poster}{count} < 5;
271
     my $name = substr($poster, 0, 63);
300
    my $name = substr( $poster, 0, 63 );
272
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
301
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
-
 
302
      $data{$poster}{percent};
273
     last if (++$i == $count);
303
    last if ( ++$i == $count );
274
   }
304
  }
275
   print "\n", "=" x 76, "\n";
305
  print "\n", "=" x 76, "\n";
276
}
306
}
277
307
278
#######################################
308
#######################################
279
# show bottom posters for original text
309
# show bottom posters for original text
280
#######################################
310
#######################################
281
unless ( $skipSec{4} ) {
311
unless ( $skipSec{4} )
-
 
312
{
282
  if (keys %data < $topposters) {
313
  if ( keys %data < $topposters )
-
 
314
  {
283
    $count = keys %data;
315
    $count = keys %data;
-
 
316
  }
284
  } else {
317
  else
-
 
318
  {
285
    $count = $topposters;
319
    $count = $topposters;
286
  }
320
  }
-
 
321
  printf "%s\n",
287
  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
322
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
288
  print "=" x 76, "\n";
323
  print "=" x 76, "\n";
289
  $i = 0;
324
  $i = 0;
290
  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
325
  foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
-
 
326
    keys %data )
-
 
327
  {
291
    next if $data{$poster}{quoted} == 0;
328
    next if $data{$poster}{quoted} == 0;
292
    next if $data{$poster}{count} < 5;
329
    next if $data{$poster}{count} < 5;
293
    my $name = substr($poster, 0, 63);
330
    my $name = substr( $poster, 0, 63 );
294
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
331
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
-
 
332
      $data{$poster}{percent};
295
    last if (++$i == $count);
333
    last if ( ++$i == $count );
296
  }
334
  }
297
  print "\n", "=" x 76, "\n";
335
  print "\n", "=" x 76, "\n";
298
}
336
}
299
337
300
####################################
338
####################################
301
# show threads by number of articles
339
# show threads by number of articles
302
####################################
340
####################################
303
unless ( $skipSec{5} ) {
341
unless ( $skipSec{5} )
-
 
342
{
304
  if (keys %threads < $topthreads) {
343
  if ( keys %threads < $topthreads )
-
 
344
  {
305
    $count = keys %threads;
345
    $count = keys %threads;
-
 
346
  }
306
  } else {
347
  else
-
 
348
  {
307
    $count = $topthreads;
349
    $count = $topthreads;
308
  }
350
  }
309
  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
351
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
310
  print "=" x 76, "\n";
352
  print "=" x 76, "\n";
311
  $i = 0;
353
  $i = 0;
312
  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
354
  foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
-
 
355
    keys %threads )
-
 
356
  {
313
    my $name = substr($thread, 0, 65);
357
    my $name = substr( $thread, 0, 65 );
314
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
358
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
-
 
359
      $threads{$thread}{count};
315
    last if (++$i == $count);
360
    last if ( ++$i == $count );
316
  }
361
  }
317
  print "\n", "=" x 76, "\n";
362
  print "\n", "=" x 76, "\n";
318
}
363
}
319
################################
364
################################
320
# show threads by size in Kbytes
365
# show threads by size in Kbytes
321
################################
366
################################
322
unless ( $skipSec{6} ) {
367
unless ( $skipSec{6} )
-
 
368
{
323
  if (keys %threads < $topthreads) {
369
  if ( keys %threads < $topthreads )
-
 
370
  {
324
    $count = keys %threads;
371
    $count = keys %threads;
-
 
372
  }
325
  } else {
373
  else
-
 
374
  {
326
    $count = $topthreads;
375
    $count = $topthreads;
327
  }
376
  }
328
  printf "%s\n", &centred("Top $count threads by size in KB", 76);
377
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
329
  print "=" x 76, "\n";
378
  print "=" x 76, "\n";
330
  $i = 0;
379
  $i = 0;
331
  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
380
  foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
-
 
381
    keys %threads )
-
 
382
  {
332
    my $name = substr($thread, 0, 65);
383
    my $name = substr( $thread, 0, 65 );
333
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
384
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
-
 
385
      $threads{$thread}{size} / 1024;    #/
334
    last if (++$i == $count);
386
    last if ( ++$i == $count );
335
  }
387
  }
336
  print "\n", "=" x 76, "\n";
388
  print "\n", "=" x 76, "\n";
337
}
389
}
338
390
339
#################################
391
#################################
340
# show top 10 cross-posted groups
392
# show top 10 cross-posted groups
341
#################################
393
#################################
342
unless ( $skipSec{7} ) {
394
unless ( $skipSec{7} )
-
 
395
{
343
delete $crossposts{"$newsgroup_name"};  # don't include ours
396
  delete $crossposts{"$newsgroup_name"};    # don't include ours
344
if (keys %crossposts < $topcrossposts) {
397
  if ( keys %crossposts < $topcrossposts )
-
 
398
  {
345
 $count = keys %crossposts;
399
    $count = keys %crossposts;
-
 
400
  }
346
} else {
401
  else
-
 
402
  {
347
 $count = $topcrossposts;
403
    $count = $topcrossposts;
348
}
404
  }
349
printf "%s\n", &centred("Top $count cross-posted groups", 76);
405
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
350
print "=" x 76, "\n";
406
  print "=" x 76, "\n";
351
$i = 0;
407
  $i = 0;
-
 
408
  foreach
352
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
409
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
-
 
410
  {
353
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
411
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
-
 
412
      $crossposts{$name};
354
 last if (++$i == $count);
413
    last if ( ++$i == $count );
355
}
414
  }
356
print "\n", "=" x 76, "\n";
415
  print "\n", "=" x 76, "\n";
357
}
416
}
358
#######################
417
#######################
359
#show agents and counts
418
#show agents and counts
360
#######################
419
#######################
361
unless ( $skipSec{8} ) {
420
unless ( $skipSec{8} )
-
 
421
{
362
if (keys %agents < $topagents) {
422
  if ( keys %agents < $topagents )
-
 
423
  {
363
 $count = keys %agents;
424
    $count = keys %agents;
-
 
425
  }
364
} else {
426
  else
-
 
427
  {
365
 $count = $topagents;
428
    $count = $topagents;
366
}
429
  }
367
printf "%s\n", &centred("Top $count User Agents by poster", 76);
430
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
368
print "=" x 76, "\n";
431
  print "=" x 76, "\n";
369
$i = 0;
432
  $i = 0;
370
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
433
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
-
 
434
  {
371
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
435
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
-
 
436
      $agents{$agent};
372
 last if (++$i == $count);
437
    last if ( ++$i == $count );
373
}
438
  }
374
print "\n", "=" x 76, "\n";
439
  print "\n", "=" x 76, "\n";
375
}
440
}
376
441
377
#######################
442
#######################
378
#show distinct agents
443
#show distinct agents
379
#######################
444
#######################
380
unless ( $skipSec{9} ) {
445
unless ( $skipSec{9} )
-
 
446
{
381
if (keys %distinct_agent < $topagents) {
447
  if ( keys %distinct_agent < $topagents )
-
 
448
  {
382
 $count = keys %distinct_agent;
449
    $count = keys %distinct_agent;
-
 
450
  }
383
} else {
451
  else
-
 
452
  {
384
 $count = $topagents;
453
    $count = $topagents;
385
}
454
  }
386
printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
455
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
387
print "=" x 76, "\n";
456
  print "=" x 76, "\n";
388
$i = 0;
457
  $i = 0;
389
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
458
  foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
-
 
459
    keys %distinct_agent )
-
 
460
  {
390
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
461
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
-
 
462
      $distinct_agent{$agent},
-
 
463
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
391
 last if (++$i == $count);
464
    last if ( ++$i == $count );
392
}
465
  }
393
print "\n", "=" x 76, "\n";
466
  print "\n", "=" x 76, "\n";
394
}
467
}
395
468
396
##########################
469
##########################
397
#show timezones and counts
470
#show timezones and counts
398
##########################
471
##########################
399
unless ( $skipSec{10} ) {
472
unless ( $skipSec{10} )
-
 
473
{
400
if (keys %tz < $toptz) {
474
  if ( keys %tz < $toptz )
-
 
475
  {
401
 $count = keys %tz;
476
    $count = keys %tz;
-
 
477
  }
402
} else {
478
  else
-
 
479
  {
403
 $count = $toptz;
480
    $count = $toptz;
404
}
481
  }
405
printf "%s\n", &centred("Top 10 time zones", 76);
482
  printf "%s\n", &centred( "Top 10 time zones", 76 );
406
print "=" x 76, "\n";
483
  print "=" x 76, "\n";
407
$i = 0;
484
  $i = 0;
408
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
485
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
-
 
486
  {
409
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
487
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
410
 last if (++$i == $count);
488
    last if ( ++$i == $count );
411
}
489
  }
412
print "\n", "=" x 76, "\n";
490
  print "\n", "=" x 76, "\n";
413
}
491
}
414
492
415
-
 
416
################################ SUBROUTINES ################################
493
################################ SUBROUTINES ################################
417
494
418
-
 
419
#######################################
495
#######################################
420
# get current article's header and body
496
# get current article's header and body
421
#######################################
497
#######################################
422
sub getarticle {
498
sub getarticle
-
 
499
{
423
 %headers = ();                        # dump old headers
500
  %headers = ();    # dump old headers
424
 my $filename = shift;                 # get the name of the file
501
  my $filename = shift;    # get the name of the file
-
 
502
425
# get stats about the file itself
503
  # get stats about the file itself
426
 $filesize = -s $filename;             # get total size of file
504
  $filesize = -s $filename;    # get total size of file
427
 $totsize += $filesize;                # bump total sizes of all files
505
  $totsize += $filesize;       # bump total sizes of all files
428
506
429
 my $mtime = (stat $filename)[9];
507
  my $mtime = ( stat $filename )[9];
430
 if ( $mtime < $earliest ) {
508
  if ( $mtime < $earliest )
-
 
509
  {
431
    $earliest = $mtime;
510
    $earliest = $mtime;
-
 
511
  }
432
 } elsif ( $mtime > $latest ) {
512
  elsif ( $mtime > $latest )
-
 
513
  {
433
    $latest = $mtime;
514
    $latest = $mtime;
434
 }
515
  }
435
516
436
# now read the file
517
  # now read the file
437
 open(FILE, $filename) or die "Can't open $filename: $!\n";
518
  open( FILE, $filename ) or die "Can't open $filename: $!\n";
438
 while (<FILE>) {
519
  while (<FILE>)
-
 
520
  {
439
   $totheader += length($_);           # bump total header size
521
    $totheader += length($_);    # bump total header size
440
   last if (/^\s*$/);                  # end of header?
522
    last if (/^\s*$/);           # end of header?
441
   if (/^([^:\s]*):\s+(.*)/) {
523
    if (/^([^:\s]*):\s+(.*)/)
-
 
524
    {
442
     my($key,$val) = ($1,$2);
525
      my ( $key, $val ) = ( $1, $2 );
443
     $headers{$key} = decode('MIME-Header', $val);
526
      $headers{$key} = decode( 'MIME-Header', $val );
444
     $lcheader{clean(lc($key))} = clean($val);
527
      $lcheader{ clean( lc($key) ) } = clean($val);
445
  }
528
    }
446
 }
529
  }
447
 @body = <FILE>;                       # slurp up body
530
  @body = <FILE>;                # slurp up body
448
 close(FILE);
531
  close(FILE);
449
} # getarticle
532
}    # getarticle
450
533
451
###################################
534
###################################
452
# get data from the current article
535
# get data from the current article
453
###################################
536
###################################
454
sub getdata {
537
sub getdata
-
 
538
{
455
#### First, analyse header fields ####
539
#### First, analyse header fields ####
456
540
457
# Set up this poster if not defined, get counts, sizes
541
  # Set up this poster if not defined, get counts, sizes
458
 $poster = encode('UTF-8', $headers{From});             # get the poster's name
542
  $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
-
 
543
  if ( !defined( $data{$poster} ) )
459
 if (!defined($data{$poster})) {       # seen this one before?
544
  {                                               # seen this one before?
460
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
545
    $data{$poster}{agent}  = 'Unknown';           # comes after For: field
461
   $data{$poster}{orig} = 0;
546
    $data{$poster}{orig}   = 0;
462
   $data{$poster}{quoted} = 0;
547
    $data{$poster}{quoted} = 0;
463
 }
548
  }
464
 $data{$poster}{count}++;      # bump count for this poster
549
  $data{$poster}{count}++;                        # bump count for this poster
465
 $data{$poster}{size} += $filesize;    # total size of file
550
  $data{$poster}{size} += $filesize;              # total size of file
466
551
467
# The User-Agent and/or X-Newsreader fields
552
  # The User-Agent and/or X-Newsreader fields
468
# for User-Agent by poster
553
  # for User-Agent by poster
469
 if (defined $lcheader{"user-agent"}) {
554
  if ( defined $lcheader{"user-agent"} )
-
 
555
  {
470
   $data{$poster}{agent} = $lcheader{"user-agent"};
556
    $data{$poster}{agent} = $lcheader{"user-agent"};
471
 }
557
  }
472
 if (defined $lcheader{"x-newsreader"}) {
558
  if ( defined $lcheader{"x-newsreader"} )
-
 
559
  {
473
   $data{$poster}{agent} = $lcheader{"x-newsreader"};
560
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
474
 }
561
  }
475
562
476
# The User Agent for User-Agent by number of posts
563
  # The User Agent for User-Agent by number of posts
477
 my $UA = "unknown";
564
  my $UA = "unknown";
478
 foreach my $keys ( keys %lcheader )
565
  foreach my $keys ( keys %lcheader )
479
 {
566
  {
480
   if (defined $lcheader{'user-agent'})
567
    if ( defined $lcheader{'user-agent'} )
481
   {
568
    {
482
           $UA = $lcheader{'user-agent'};
569
      $UA = $lcheader{'user-agent'};
483
   }
570
    }
484
    elsif (defined $lcheader{"x-newsreader"})
571
    elsif ( defined $lcheader{"x-newsreader"} )
485
   {
572
    {
486
           $UA = $lcheader{"x-newsreader"};
573
      $UA = $lcheader{"x-newsreader"};
487
   }
574
    }
488
   elsif (defined $lcheader{'x-mailer'})
575
    elsif ( defined $lcheader{'x-mailer'} )
489
   {
576
    {
490
           $UA = $lcheader{'x-mailer'};
577
      $UA = $lcheader{'x-mailer'};
491
   }
578
    }
-
 
579
    elsif (
492
   elsif ((defined $lcheader{'organization'}) &&
580
      ( defined $lcheader{'organization'} )
-
 
581
      && ( $lcheader{'organization'} =~
493
         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
582
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
-
 
583
      )
494
   {
584
    {
495
           $UA = $lcheader{'organization'};
585
      $UA = $lcheader{'organization'};
496
   }
586
    }
497
   elsif ( $lcheader{'message-id'} =~ /pine/i )
587
    elsif ( $lcheader{'message-id'} =~ /pine/i )
498
   {
588
    {
499
           $UA = "Pine";
589
      $UA = "Pine";
500
   } ## Hopefully found UA, else set to unknown
590
    }    ## Hopefully found UA, else set to unknown
501
 }
591
  }
502
-
 
503
592
504
$UA = clean($UA);
593
  $UA = clean($UA);
505
$UA = get_agent($UA);
594
  $UA = get_agent($UA);
506
-
 
507
595
508
sub get_agent {
596
  sub get_agent
-
 
597
  {
509
 my $raw = shift;
598
    my $raw   = shift;
510
 my $agent = $raw;
599
    my $agent = $raw;
511
600
512
 ## strip http
601
    ## strip http
513
 if ( $raw =~ /.*http.*/ ) {
602
    if ( $raw =~ /.*http.*/ )
-
 
603
    {
514
   $raw =~ s!posted via!!i;
604
      $raw =~ s!posted via!!i;
515
   $raw =~ s!http://!!g;
605
      $raw =~ s!http://!!g;
516
   $raw =~ s!/!!g;
606
      $raw =~ s!/!!g;
517
   $raw =~ s! !!g;
607
      $raw =~ s! !!g;
518
 }
608
    }
519
609
520
 ## Fix Outlook from Mac
610
    ## Fix Outlook from Mac
521
 if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
611
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
522
612
523
 ## Pick out the popular agents
613
    ## Pick out the popular agents
524
 if ( $raw =~ /(outlook express)/i     ||
614
    if ( $raw =~ /(outlook express)/i
525
      $raw =~ /(microplanet gravity)/i ||
615
      || $raw =~ /(microplanet gravity)/i
526
      $raw =~ /(news rover)/i          ||
616
      || $raw =~ /(news rover)/i
527
      $raw =~ /(forte agent)/i         ||
617
      || $raw =~ /(forte agent)/i
528
      $raw =~ /(forte free agent)/i
618
      || $raw =~ /(forte free agent)/i )
529
    )
619
    {
530
 {
-
 
531
       $agent = $1;
620
      $agent = $1;
532
 }
621
    }
-
 
622
    elsif (
533
 elsif ( $raw =~ /^(
623
      $raw =~ /^(
534
        pan
624
        pan
535
       |sylpheed
625
       |sylpheed
536
       |slrn
626
       |slrn
537
       |mozilla
627
       |mozilla
538
       |knode
628
       |knode
Line 556... Line 646...
556
       |nn
646
       |nn
557
       |rn
647
       |rn
558
       |007
648
       |007
559
       |webtv
649
       |webtv
560
       |compuserve
650
       |compuserve
561
       )/ix )
651
       )/ix
-
 
652
      )
562
 {
653
    {
563
       $agent = $1;
654
      $agent = $1;
564
 }
655
    }
565
 else
656
    else
566
 {
657
    {
567
 ## Clean up unknown agents
658
      ## Clean up unknown agents
568
       if ( $raw =~ m!^(.*?)/! ) {
659
      if ( $raw =~ m!^(.*?)/! )
-
 
660
      {
569
             $agent = $1;
661
        $agent = $1;
570
       }
662
      }
571
       elsif ( $raw =~ /^(\w*)\d.*/ )
663
      elsif ( $raw =~ /^(\w*)\d.*/ )
572
       {
664
      {
573
            $agent = $1;
665
        $agent = $1;
574
       }
666
      }
575
  }
667
    }
576
668
577
$distinct_agent{$agent}++;
669
    $distinct_agent{$agent}++;
578
return $agent;
670
    return $agent;
579
}
671
  }
580
672
-
 
673
  # Get all cross-posted newsgroups
-
 
674
  for ( split /,/, $headers{"Newsgroups"} )
-
 
675
  {
-
 
676
    $crossposts{$_}++;    # bump count for each
-
 
677
  }
581
678
582
# Get all cross-posted newsgroups
-
 
583
 for (split /,/, $headers{"Newsgroups"}) {
-
 
584
   $crossposts{$_}++;          # bump count for each
-
 
585
 }
-
 
586
-
 
587
# Get threads
679
  # Get threads
588
 my $thread = encode('UTF-8', $headers{"Subject"});
680
  my $thread = encode( 'UTF-8', $headers{"Subject"} );
589
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
681
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
590
 $thread =~ s/\s+/ /g;                 # collapse whitespace
682
  $thread =~ s/\s+/ /g;     # collapse whitespace
591
 $threads{$thread}{count} += 1;        # bump count of this subject
683
  $threads{$thread}{count} += 1;            # bump count of this subject
592
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
684
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
593
685
594
# Is this an original post or a reply?
686
  # Is this an original post or a reply?
595
 if (defined $headers{"References"}) {
687
  if ( defined $headers{"References"} )
-
 
688
  {
596
   $replies++;
689
    $replies++;
-
 
690
  }
597
 } else {
691
  else
-
 
692
  {
598
   $origposts++;
693
    $origposts++;
599
 }
694
  }
600
695
601
# Get the time zone
696
  # Get the time zone
602
 $_ = $headers{"Date"};
697
  $_ = $headers{"Date"};
603
 my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
698
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
604
 if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
699
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
-
 
700
  {
605
   $tz = "UTC";
701
    $tz = "UTC";
606
 }
702
  }
607
 $tz{$tz}++;
703
  $tz{$tz}++;
608
704
609
#### Now analyse the body text ####
705
#### Now analyse the body text ####
610
 my $insig = 0;
706
  my $insig = 0;
611
 for (@body) {
707
  for (@body)
-
 
708
  {
612
   $totbody += length($_);             # bump total body size
709
    $totbody += length($_);    # bump total body size
613
   next if (/^$>/);                    # don't count blank lines in body
710
    next if (/^$>/);           # don't count blank lines in body
614
   if ($insig == 1) {
711
    if ( $insig == 1 )
615
     $totsig += length($_);            # bump total sig size
-
 
616
# Bill Unruh uses ] quotes, and another poster uses ::
-
 
617
   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
-
 
618
     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
-
 
619
     $totquoted += length($_);
-
 
620
   } elsif (/-- /) {
-
 
621
     $insig = 1;
-
 
622
   } else {
712
    {
623
# we must be processing an original line
-
 
624
     $data{$poster}{orig} += length($_); # bump count of original chrs
713
      $totsig += length($_);    # bump total sig size
625
     $totorig += length($_);
-
 
626
   }
-
 
627
 } # end for (@body)
-
 
628
714
-
 
715
      # Bill Unruh uses ] quotes, and another poster uses ::
-
 
716
    }
-
 
717
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
-
 
718
    {                           # are we in a quote line?
-
 
719
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
-
 
720
      $totquoted             += length($_);
-
 
721
    }
-
 
722
    elsif (/-- /)
-
 
723
    {
-
 
724
      $insig = 1;
-
 
725
    }
-
 
726
    else
-
 
727
    {
-
 
728
-
 
729
      # we must be processing an original line
-
 
730
      $data{$poster}{orig} += length($_);      # bump count of original chrs
-
 
731
      $totorig             += length($_);
-
 
732
    }
-
 
733
  }    # end for (@body)
-
 
734
629
} # getdata
735
}    # getdata
630
736
631
########################################
737
########################################
632
# Count the User-Agents used, collapsing
738
# Count the User-Agents used, collapsing
633
# different versions into one per agent.
739
# different versions into one per agent.
634
########################################
740
########################################
635
sub countagents {
741
sub countagents
-
 
742
{
636
POSTER:
743
POSTER:
637
 foreach $poster (keys %data) {
744
  foreach $poster ( keys %data )
-
 
745
  {
638
   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
746
    foreach my $agent_name ( keys %distinct_agent )
-
 
747
    {    # check against known ones
639
     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
748
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
-
 
749
      {
640
       $agents{$agent_name}++;
750
        $agents{$agent_name}++;
641
       next POSTER;
751
        next POSTER;
642
     }
752
      }
643
   }
753
    }
644
   $agents{$data{$poster}{agent}}++;
754
    $agents{ $data{$poster}{agent} }++;
645
 }
755
  }
646
} # countagents
756
}    # countagents
647
757
648
############################################
758
############################################
649
# set orig/total percentages for all posters
759
# set orig/total percentages for all posters
650
############################################
760
############################################
651
sub fixpercent {
761
sub fixpercent
-
 
762
{
652
 foreach $poster (keys %data) {
763
  foreach $poster ( keys %data )
-
 
764
  {
653
   my $percent = 100;
765
    my $percent = 100;
654
   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
766
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
-
 
767
    {
-
 
768
      $percent = $data{$poster}{orig} * 100 /
655
     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
769
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
-
 
770
    }
656
   } elsif ($data{$poster}{orig} == 0) {
771
    elsif ( $data{$poster}{orig} == 0 )
-
 
772
    {
657
     $percent = 0;
773
      $percent = 0;
658
   }
774
    }
659
   $data{$poster}{percent} = $percent;
775
    $data{$poster}{percent} = $percent;
660
 }
776
  }
661
}
777
}
662
778
663
##############################
779
##############################
664
# right pad a string with '.'s
780
# right pad a string with '.'s
665
##############################
781
##############################
666
sub rpad {
782
sub rpad
-
 
783
{
-
 
784
667
# get text to pad, length to pad, pad chr
785
  # get text to pad, length to pad, pad chr
668
 my ($text, $pad_len, $pad_chr) = @_;
786
  my ( $text, $pad_len, $pad_chr ) = @_;
669
 if (length($text) > $pad_len) {
787
  if ( length($text) > $pad_len )
-
 
788
  {
670
   $text = substr($text, 0, $pad_len);
789
    $text = substr( $text, 0, $pad_len );
671
 }
790
  }
672
 my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
791
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
673
 return $padded;
792
  return $padded;
674
}
793
}
675
794
676
#################
795
#################
677
# centre a string
796
# centre a string
678
#################
797
#################
679
sub centred {
798
sub centred
-
 
799
{
680
 my ($text, $width) = @_;              # text to centre, size of field to centre in
800
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
681
 my $pad_len = ($width - length($text)) / 2;   #/
801
  my $pad_len = ( $width - length($text) ) / 2;    #/
682
 my $centred = " " x $pad_len . $text;
802
  my $centred = " " x $pad_len . $text;
683
 return $centred;
803
  return $centred;
684
}
804
}
685
805
686
##########################
806
##########################
687
# put commas into a number
807
# put commas into a number
688
##########################
808
##########################
689
sub commify {
809
sub commify
-
 
810
{
690
 $_  = shift;
811
  $_ = shift;
691
 1 while s/^(-?\d+)(\d{3})/$1,$2/;
812
  1 while s/^(-?\d+)(\d{3})/$1,$2/;
692
 return $_;
813
  return $_;
693
}
814
}
694
815
695
#########################
816
#########################
696
# clean
817
# clean
697
#########################
818
#########################
698
sub clean {
819
sub clean
-
 
820
{
699
 my $dirty = shift;
821
  my $dirty = shift;
700
 my $clean = $dirty;
822
  my $clean = $dirty;
701
 $clean =~ s/^\s*//;
823
  $clean =~ s/^\s*//;
702
 $clean =~ s/\s*$//;
824
  $clean =~ s/\s*$//;
703
825
704
return $clean;
826
  return $clean;
705
}
827
}
706
828
-
 
829
sub usage
-
 
830
{
707
831
708
sub usage {
-
 
709
-
 
710
 print "usage: newstat.pl newsgroupname\n";
832
  print "usage: newstat.pl newsgroupname\n";
711
 exit 1;
833
  exit 1;
712
}
834
}
713
835
714
###################################
836
###################################
715
# Write data structures to a file #
837
# Write data structures to a file #
716
###################################
838
###################################
717
sub writedata {
839
sub writedata
-
 
840
{
718
 open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
841
  open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
719
 print OUTF "Data collected from alt.os.linux.mandrake\n\n";
842
  print OUTF "Data collected from alt.os.linux.mandrake\n\n";
-
 
843
  print OUTF
720
 print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
844
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
721
 foreach my $name (keys %data) {
845
  foreach my $name ( keys %data )
-
 
846
  {
-
 
847
    print OUTF
722
   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
848
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
723
 }
849
  }
-
 
850
  print OUTF
724
 print OUTF "============================================================================\n";
851
"============================================================================\n";
725
 print OUTF "Thread subjects\n";
852
  print OUTF "Thread subjects\n";
-
 
853
  print OUTF
726
 print OUTF "----------------------------------------------------------------------------\n";
854
"----------------------------------------------------------------------------\n";
727
 foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
855
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
-
 
856
  {
728
   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
857
    print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
729
 }
858
  }
-
 
859
  print OUTF
730
 print OUTF "============================================================================\n";
860
"============================================================================\n";
731
 print OUTF "Cross-posts\n";
861
  print OUTF "Cross-posts\n";
-
 
862
  print OUTF
732
 print OUTF "----------------------------------------------------------------------------\n";
863
"----------------------------------------------------------------------------\n";
733
 foreach my $name (sort keys %crossposts) {
864
  foreach my $name ( sort keys %crossposts )
-
 
865
  {
734
   print OUTF "$name : $crossposts{$name}\n";
866
    print OUTF "$name : $crossposts{$name}\n";
735
 }
867
  }
736
 print OUTF
868
  print OUTF print OUTF
737
 print OUTF "============================================================================\n";
869
"============================================================================\n";
738
 print OUTF "User agents\n";
870
  print OUTF "User agents\n";
-
 
871
  print OUTF
739
 print OUTF "----------------------------------------------------------------------------\n";
872
"----------------------------------------------------------------------------\n";
740
 foreach my $name (sort keys %agents) {
873
  foreach my $name ( sort keys %agents )
-
 
874
  {
741
   print OUTF "$name : $agents{$name}\n";
875
    print OUTF "$name : $agents{$name}\n";
742
 }
876
  }
-
 
877
  print OUTF
743
 print OUTF "============================================================================\n";
878
"============================================================================\n";
744
 print OUTF "Time zones\n";
879
  print OUTF "Time zones\n";
-
 
880
  print OUTF
745
 print OUTF "----------------------------------------------------------------------------\n";
881
"----------------------------------------------------------------------------\n";
746
 foreach my $name (sort keys %tz) {
882
  foreach my $name ( sort keys %tz )
-
 
883
  {
747
   print OUTF "$name : $tz{$name}\n";
884
    print OUTF "$name : $tz{$name}\n";
748
 }
885
  }
749
 close OUTF;
886
  close OUTF;
750
} # writedata
887
}    # writedata