Subversion Repositories LCARS

Rev

Rev 11 | Rev 14 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 11 Rev 13
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
-
 
4
use diagnostics;
4
use utf8;
5
use utf8;
5
use encoding 'utf-8';
-
 
6
use Encode;
6
use Encode;
7
7
-
 
8
## Print out all text to STDOUT UTF-8 encoded
-
 
9
binmode STDOUT, ':encoding(UTF-8)';
-
 
10
8
###########################
11
############################
9
# newsstat.pl version 0.4.2
12
## newsstat.pl version 0.4.3
10
13
11
############################################################################
14
###########################################################################
12
# Collect statistics about a newsgroup (specified by first argument) in
15
## Collect statistics about a newsgroup (specified by first argument)
13
# the local news spool. Check all articles in the last 30-day period.
16
## in the local news spool. Check all articles in the last 30-day period.
14
# Rank posters by number of posts and by volume of posts, report on top and
17
## Rank posters by number of posts and by volume of posts, report on top
15
# bottom 20 posters. Show their name, number of posts, size of posts,
18
## and bottom 20 posters. Show their name, number of posts, size of posts,
16
# percentage of quoted lines. Rank user-agents used, by poster rather than
19
## percentage of quoted lines. Rank user-agents used, by poster rather
17
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
20
## than by post. Rank top 20 threads. Rank top 10 cross-posted groups.
18
#
21
##
19
# (Numbers and paths can be configured below.  -- PE)
22
## Numbers and paths can be configured below.  See ChangeLog and TODO
20
############################################################################
-
 
21
-
 
22
############################################################################
-
 
23
#                       RECENT CHANGES                                     #
-
 
24
# 2011-10-03  PE  - Use more compatible shebang
-
 
25
#                 - Fixed some Perl::Critic-ized code
-
 
26
#                 - Fixed wrong indent for non-ASCII names
-
 
27
#                 - Formatted source code
-
 
28
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
-
 
29
#                 - Use warnings, utf8 (just in case)
-
 
30
#                 - Documentation update
-
 
31
# N/A         NN  - Take newsgroup name as argument
-
 
32
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
-
 
33
#                 - Allow command line flags for subtracting
-
 
34
#                   output if not pertinent for a group
-
 
35
# 2002-11-09  NN  - Put Garry's writedata() function back in.
-
 
36
#                 - added "rn" to my list of UA's
-
 
37
#                 - Started using %distinct_agent for both User agent
-
 
38
#                   sections
-
 
39
#                 - named it newsstat.pl version 0.3
-
 
40
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
-
 
41
#                   mtime rather than ctime, and simplifying the logic
-
 
42
# 2002-11-05  NN  - moved user configurations to the top
-
 
43
#                 - fixed the cross-posting section
-
 
44
#                 - introduced the $newsgroup_name variable which
-
 
45
#                   later becomes $news$group
-
 
46
#                 - changed $name to $agent_name in countagents()
-
 
47
#
-
 
48
# Contributors
-
 
49
# -------------
-
 
50
# NN  Nomen nominandum (name to be determined later)
-
 
51
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
-
 
52
-
 
53
########### TODO #############
-
 
54
# Commas in bottom section of report
-
 
55
# Show date the figures were compiled
-
 
56
# No. of HTML articles (Content-Type: text/html)
-
 
57
# No. of quoted sigs (/>\s*-- /)
-
 
58
# Per cent of top-posted articles
-
 
59
# Top 10 cross-posters
-
 
60
# Top 20 news posting hosts (from Path)
-
 
61
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
-
 
62
# Count *all* User Agents that each poster uses
-
 
63
# What do we do about Bill Unruh's ] quote style?
-
 
64
# Change the way dates/times are checked
-
 
65
# include % share in posters by no. of arts
-
 
66
# include % share in posters by size
-
 
67
# Total, orig & quoted lines by user agent with per cent
-
 
68
# Take more arguments
23
## for more.  -- PE
69
#######################################################
24
###########################################################################
70
25
71
###################### USER CONFIGURATIONS ############################
26
###################### USER CONFIGURATIONS ############################
72
27
73
# The name of the group to do stats for
28
## The name of the group to do stats for
74
my $newsgroup_name = $ARGV[0];
29
my $newsgroup_name = $ARGV[0];
75
$newsgroup_name or &usage;
30
$newsgroup_name or &usage;
76
31
77
# Check for removal flags
32
## Check for removal flags
78
my $ix;
33
my $ix;
79
my $j;
34
my $j;
80
my %skipSec;
35
my %skipSec;
81
my @skiplist;
36
my @skiplist;
82
my $args = @ARGV;
37
my $args = @ARGV;
83
for ( $ix = 1 ; $ix < $args ; $ix++ )
38
for ( $ix = 1 ; $ix < $args ; $ix++ )
84
{
39
{
85
  $j = $ix + 1;
40
  $j = $ix + 1;
86
  if ( $ARGV[$ix] eq "-x" )
41
  if ( $ARGV[$ix] eq "-x" )
87
  {
42
  {
88
    @skiplist = split( ",", $ARGV[$j] );
43
    @skiplist = split( ",", $ARGV[$j] );
89
  }
44
  }
90
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
45
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
91
  {
46
  {
92
    @skiplist = split( ",", $1 );
47
    @skiplist = split( ",", $1 );
93
  }
48
  }
94
}
49
}
95
foreach (@skiplist)
50
foreach (@skiplist)
96
{
51
{
97
  $skipSec{$_} = 1;
52
  $skipSec{$_} = 1;
98
}
53
}
99
54
100
# Leafnode users will want /var/spool/news for this variable.
55
## Leafnode users will want /var/spool/news for this variable.
101
my $news = "/var/spool/news/";
56
my $news = "/var/spool/news/";
102
57
103
# How many days are we doing statistics for?
58
## How many days are we doing statistics for?
104
my $numdays = 30;
59
my $numdays = 30;
105
60
106
# no. of agents we list
61
## Number of agents we list
107
my $topagents = 10;
62
my $topagents = 10;
108
63
109
# no. of threads we want to know about
64
## Number of threads we want to know about
110
my $topthreads = 20;
65
my $topthreads = 20;
111
66
112
# no. of top or bottom posters to show
67
## Number of top or bottom posters to show
113
my $topposters = 20;
68
my $topposters = 20;
114
69
115
# no. of cross-posted threads to show
70
## Number of cross-posted threads to show
116
my $topcrossposts = 10;
71
my $topcrossposts = 10;
117
72
118
# no. of time zones to show
73
## Number of time zones to show
119
my $toptz = 10;
74
my $toptz = 10;
120
75
121
###################### DATA STRUCTURES ######################
76
###################### DATA STRUCTURES ######################
122
my $group = $newsgroup_name;
77
my $group = $newsgroup_name;
123
$group =~ s!\.!/!g;
78
$group =~ s!\.!/!g;
124
my %data;          # name, count, agent, total, orig, quoted
79
my %data;          # name, count, agent, total, orig, quoted
125
my %threads;       # subject, count
80
my %threads;       # subject, count
126
my %crossposts;    # group, count
81
my %crossposts;    # group, count
127
my %tz;            # timezones by count
82
my %tz;            # timezones by count
128
my %headers;       # holds header of current article
83
my %headers;       # holds header of current article
129
my %lcheader;      # holds lowercase headers
84
my %lcheader;      # holds lowercase headers
130
my @body;          # holds body of current article
85
my @body;          # holds body of current article
131
my @sig;           # holds sig text;
86
my @sig;           # holds sig text;
132
my $totalposts;    # total no. of posts considered
87
my $totalposts;    # total no. of posts considered
133
my $filename;      # name of current article file
88
my $filename;      # name of current article file
134
my $filesize;      # size of current article file
89
my $filesize;      # size of current article file
135
my $earliest;      # earliest article we have found
90
my $earliest;      # earliest article we have found
136
my $latest;        # latest article we have found
91
my $latest;        # latest article we have found
137
my $poster;        # poster we are dealing with
92
my $poster;        # poster we are dealing with
138
my $totsize   = 0; # holds total sizes of all files
93
my $totsize   = 0; # holds total sizes of all files
139
my $totheader = 0; # total size of header material
94
my $totheader = 0; # total size of header material
140
my $totbody   = 0; # total size of body material
95
my $totbody   = 0; # total size of body material
141
my $totsig    = 0; # total size of sig material
96
my $totsig    = 0; # total size of sig material
142
my $totorig   = 0; # total size of original material
97
my $totorig   = 0; # total size of original material
143
my $totquoted = 0; # total size of quoted material
98
my $totquoted = 0; # total size of quoted material
144
my $origposts = 0; # total no. of original posts
99
my $origposts = 0; # total no. of original posts
145
my $replies   = 0; # total no. of replies
100
my $replies   = 0; # total no. of replies
146
my $i;             # general purpose
101
my $i;             # general purpose
147
my %distinct_agent;
102
my %distinct_agent;
148
my %agents =       # used to hold counts of User Agents used
-
 
149
  (
103
150
  "KNode"                     => 0,
104
## Used to hold counts of User Agents used
151
  "Pan"                       => 0,
-
 
152
  "Mozilla"                   => 0,
105
my %agents = (
153
  "Sylpheed"                  => 0,
106
  "Compuserver"               => 0,
154
  "Gnus"                      => 0,
107
  "Foorum"                    => 0,
155
  "Forte Agent"               => 0,
108
  "Forte Agent"               => 0,
156
  "Forte Free Agent"          => 0,
109
  "Forte Free Agent"          => 0,
-
 
110
  "Gnus"                      => 0,
-
 
111
  "KNode"                     => 0,
-
 
112
  "MacSOUP"                   => 0,
-
 
113
  "MT-NewsWatcher"            => 0,
157
  "MicroPlanet Gravity"       => 0,
114
  "MicroPlanet Gravity"       => 0,
158
  "Microsoft Outlook Express" => 0,
115
  "Microsoft Outlook Express" => 0,
-
 
116
  "Microsoft Windows Mail"    => 0,
-
 
117
  "Mozilla"                   => 0,
-
 
118
  "News Rover"                => 0,
-
 
119
  "NN"                        => 0,
-
 
120
  "Pan"                       => 0,
159
  "Xnews"                     => 0,
121
  "rn"                        => 0,
160
  "slrn"                      => 0,
122
  "slrn"                      => 0,
-
 
123
  "Sylpheed"                  => 0,
161
  "tin"                       => 0,
124
  "tin"                       => 0,
162
  "rn"                        => 0,
-
 
163
  "NN"                        => 0,
-
 
164
  "MacSOUP"                   => 0,
-
 
165
  "Foorum"                    => 0,
125
  "VSoup"                     => 0,
166
  "MT-NewsWatcher"            => 0,
-
 
167
  "News Rover"                => 0,
-
 
168
  "WebTV"                     => 0,
126
  "WebTV"                     => 0,
169
  "Compuserver"               => 0,
-
 
170
  "VSoup"                     => 0
127
  "Xnews"                     => 0
171
  );
128
);
172
129
173
######################## MAIN CODE ########################
130
######################## MAIN CODE ########################
174
$! = 1;
131
$! = 1;
175
132
176
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
133
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
177
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
134
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
178
while ( defined( $filename = readdir(DIR) ) )
135
while ( defined( $filename = readdir(DIR) ) )
179
{
136
{
180
  %lcheader = ();
137
  %lcheader = ();
181
  next unless -f $filename;    # only want real files
138
  next unless -f $filename;               # only want real files
182
  next if ( $filename eq ".overview" );    # real articles only
139
  next if ( $filename eq ".overview" );   # real articles only
183
  next if ( -M $filename > $numdays );     # only want articles <= a certain age
140
  next if ( -M $filename > $numdays );    # only want articles <= a certain age
184
  $earliest = ( stat $filename )[9] unless defined($earliest);
141
  $earliest = ( stat $filename )[9] unless defined($earliest);
185
  $latest   = ( stat $filename )[9] unless defined($latest);
142
  $latest   = ( stat $filename )[9] unless defined($latest);
186
  &getarticle($filename);                  # read in the article
143
  &get_article($filename);                 # read in the article
187
  &getdata;                                # grab the data from the article
144
  &get_data;                               # grab the data from the article
188
  $totalposts++;                           # bump count of articles considered
145
  $totalposts++;                          # bump count of articles considered
189
}
146
}
190
closedir(DIR);                             # finished with the directory
147
closedir(DIR);                            # finished with the directory
191
148
192
# post-processing
149
## Post-processing
193
&countagents;    # count agents, collapsing versions
150
&count_agents;    # count agents, collapsing versions
194
&fixpercent;     # check percentages orig/total for posters
151
&fix_percent;     # check percentages orig/total for posters
195
152
196
&writedata;
153
&write_data;
197
154
198
#################### DISPLAY RESULTS #####################
155
#################### DISPLAY RESULTS #####################
199
print "=" x 76, "\n";
156
print "=" x 76, "\n";
200
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
157
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
201
print "=" x 76, "\n";
158
print "=" x 76, "\n";
202
printf "%s\n",
159
printf "%s\n",
203
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
160
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
204
print "\n\n";
161
print "\n\n";
205
printf "Total posts considered: %s over %d days\n", commify($totalposts),
162
printf "Total posts considered: %s over %d days\n", commify($totalposts),
206
  $numdays;
163
  $numdays;
207
printf "Earliest article: %s\n",               scalar localtime($earliest);
164
printf "Earliest article: %s\n",               scalar localtime($earliest);
208
printf "Latest article:   %s\n",               scalar localtime($latest);
165
printf "Latest article:   %s\n",               scalar localtime($latest);
209
printf "Original articles: %s, replies: %s\n", commify($origposts),
166
printf "Original articles: %s, replies: %s\n", commify($origposts),
210
  commify($replies);
167
  commify($replies);
211
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
168
printf "Total size of posts: %s bytes (%s KiB) (%.2f MiB)\n", commify($totsize),
212
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
169
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
213
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
170
printf "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
214
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
171
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
215
  commify( int( $totsize / $totalposts ) );
172
  commify( int( $totsize / $totalposts ) );
216
my $count = keys %data;
173
my $count = keys %data;
217
printf "Total headers: %s KB  bodies: %s KB\n",
174
printf "Total headers: %s KiB  bodies: %s KiB\n",
218
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
175
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
219
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
176
printf "Body text - quoted: %s KiB,  original: %s KiB = %02.2f%%, sigs: %s KiB\n",
220
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
177
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
221
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
178
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
222
  commify( int( $totsig / 1024 ) );
179
  commify( int( $totsig / 1024 ) );
223
printf "Total number of posters: %s, average %s bytes per poster\n",
180
printf "Total number of posters: %s, average %s bytes per poster\n",
224
  commify($count), commify( int( $totsize / $count ) );     #/
181
  commify($count), commify( int( $totsize / $count ) );     #/
225
$count = keys %threads;
182
$count = keys %threads;
226
printf "Total number of threads: %s, average %s bytes per thread\n",
183
printf "Total number of threads: %s, average %s bytes per thread\n",
227
  commify($count), commify( int( $totsize / $count ) );     #/
184
  commify($count), commify( int( $totsize / $count ) );     #/
228
printf "Total number of User-Agents: %d\n", scalar keys %agents;
185
printf "Total number of user agents: %d\n", scalar keys %agents;
229
print "\n", "=" x 76, "\n";
186
print "\n", "=" x 76, "\n";
230
187
231
###############################
188
########################################
232
# show posters by article count  Sec 1;
189
## Show posters by article count  Sec 1;
233
###############################
190
########################################
234
unless ( $skipSec{1} )
191
unless ( $skipSec{1} )
235
{
192
{
236
  if ( keys %data < $topposters )
193
  if ( keys %data < $topposters )
237
  {
194
  {
238
    $count = keys %data;
195
    $count = keys %data;
239
  }
196
  }
240
  else
197
  else
241
  {
198
  {
242
    $count = $topposters;
199
    $count = $topposters;
243
  }
200
  }
244
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
201
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
245
  print "=" x 76, "\n";
202
  print "=" x 76, "\n";
246
  $i = 0;
203
  $i = 0;
247
  foreach
204
  foreach
248
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
205
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
249
  {
206
  {
250
    my $name = substr( $poster, 0, 65 );
207
    my $name = substr( $poster, 0, 65 );
251
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
208
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
252
      $data{$poster}{count};
209
      $data{$poster}{count};
253
    last if ( ++$i == $count );
210
    last if ( ++$i == $count );
254
  }
211
  }
255
  print "\n", "=" x 76, "\n";
212
  print "\n", "=" x 76, "\n";
256
}
213
}
257
214
258
################################
215
######################################
259
# show posters by size in Kbytes Sec 2;
216
## Show posters by size in KiB  Sec 2;
260
################################
217
######################################
261
unless ( $skipSec{2} )
218
unless ( $skipSec{2} )
262
{
219
{
263
  if ( keys %data < $topposters )
220
  if ( keys %data < $topposters )
264
  {
221
  {
265
    $count = keys %data;
222
    $count = keys %data;
266
  }
223
  }
267
  else
224
  else
268
  {
225
  {
269
    $count = $topposters;
226
    $count = $topposters;
270
  }
227
  }
271
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
228
  printf "%s\n", &centred( "Top $count posters by article size in KiB", 76 );
272
  print "=" x 76, "\n";
229
  print "=" x 76, "\n";
273
  $i = 0;
230
  $i = 0;
274
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
231
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
275
  {
232
  {
276
    my $name = substr( $poster, 0, 62 );
233
    my $name = substr( $poster, 0, 62 );
277
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
234
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
278
      $data{$poster}{size} / 1024;    #/
235
      $data{$poster}{size} / 1024;    #/
279
    last if ( ++$i == $count );
236
    last if ( ++$i == $count );
280
  }
237
  }
281
  print "\n", "=" x 76, "\n";
238
  print "\n", "=" x 76, "\n";
282
}
239
}
283
240
284
####################################
241
#####################################
285
# show top posters for original text
242
## Show top posters for original text
286
####################################
243
#####################################
287
unless ( $skipSec{3} )
244
unless ( $skipSec{3} )
288
{
245
{
289
  if ( keys %data < $topposters )
246
  if ( keys %data < $topposters )
290
  {
247
  {
291
    $count = keys %data;
248
    $count = keys %data;
292
  }
249
  }
293
  else
250
  else
294
  {
251
  {
295
    $count = $topposters;
252
    $count = $topposters;
296
  }
253
  }
297
  printf "%s\n",
254
  printf "%s\n",
298
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
255
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
299
  print "=" x 76, "\n";
256
  print "=" x 76, "\n";
300
  $i = 0;
257
  $i = 0;
301
  foreach my $poster (
258
  foreach my $poster (
302
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
259
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
303
    keys %data
260
    keys %data
304
    )
261
    )
305
  {
262
  {
306
    next if $data{$poster}{quoted} == 0;
263
    next if $data{$poster}{quoted} == 0;
307
    next if $data{$poster}{count} < 5;
264
    next if $data{$poster}{count} < 5;
308
    my $name = substr( $poster, 0, 63 );
265
    my $name = substr( $poster, 0, 63 );
309
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
266
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
310
      $data{$poster}{percent};
267
      $data{$poster}{percent};
311
    last if ( ++$i == $count );
268
    last if ( ++$i == $count );
312
  }
269
  }
313
  print "\n", "=" x 76, "\n";
270
  print "\n", "=" x 76, "\n";
314
}
271
}
315
272
316
#######################################
273
########################################
317
# show bottom posters for original text
274
## Show bottom posters for original text
318
#######################################
275
########################################
319
unless ( $skipSec{4} )
276
unless ( $skipSec{4} )
320
{
277
{
321
  if ( keys %data < $topposters )
278
  if ( keys %data < $topposters )
322
  {
279
  {
323
    $count = keys %data;
280
    $count = keys %data;
324
  }
281
  }
325
  else
282
  else
326
  {
283
  {
327
    $count = $topposters;
284
    $count = $topposters;
328
  }
285
  }
329
  printf "%s\n",
286
  printf "%s\n",
330
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
287
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
331
  print "=" x 76, "\n";
288
  print "=" x 76, "\n";
332
  $i = 0;
289
  $i = 0;
333
  foreach my $poster (
290
  foreach my $poster (
334
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
291
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
335
    keys %data
292
    keys %data
336
    )
293
    )
337
  {
294
  {
338
    next if $data{$poster}{quoted} == 0;
295
    next if $data{$poster}{quoted} == 0;
339
    next if $data{$poster}{count} < 5;
296
    next if $data{$poster}{count} < 5;
340
    my $name = substr( $poster, 0, 63 );
297
    my $name = substr( $poster, 0, 63 );
341
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
298
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
342
      $data{$poster}{percent};
299
      $data{$poster}{percent};
343
    last if ( ++$i == $count );
300
    last if ( ++$i == $count );
344
  }
301
  }
345
  print "\n", "=" x 76, "\n";
302
  print "\n", "=" x 76, "\n";
346
}
303
}
347
304
348
####################################
305
#####################################
349
# show threads by number of articles
306
## Show threads by number of articles
350
####################################
307
#####################################
351
unless ( $skipSec{5} )
308
unless ( $skipSec{5} )
352
{
309
{
353
  if ( keys %threads < $topthreads )
310
  if ( keys %threads < $topthreads )
354
  {
311
  {
355
    $count = keys %threads;
312
    $count = keys %threads;
356
  }
313
  }
357
  else
314
  else
358
  {
315
  {
359
    $count = $topthreads;
316
    $count = $topthreads;
360
  }
317
  }
361
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
318
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
362
  print "=" x 76, "\n";
319
  print "=" x 76, "\n";
363
  $i = 0;
320
  $i = 0;
364
  foreach my $thread (
321
  foreach my $thread (
365
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
322
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
366
    keys %threads
323
    keys %threads
367
    )
324
    )
368
  {
325
  {
369
    my $name = substr( $thread, 0, 65 );
326
    my $name = substr( $thread, 0, 65 );
370
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
327
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
371
      $threads{$thread}{count};
328
      $threads{$thread}{count};
372
    last if ( ++$i == $count );
329
    last if ( ++$i == $count );
373
  }
330
  }
374
  print "\n", "=" x 76, "\n";
331
  print "\n", "=" x 76, "\n";
375
}
332
}
-
 
333
376
################################
334
##############################
377
# show threads by size in Kbytes
335
## Show threads by size in KiB
378
################################
336
##############################
379
unless ( $skipSec{6} )
337
unless ( $skipSec{6} )
380
{
338
{
381
  if ( keys %threads < $topthreads )
339
  if ( keys %threads < $topthreads )
382
  {
340
  {
383
    $count = keys %threads;
341
    $count = keys %threads;
384
  }
342
  }
385
  else
343
  else
386
  {
344
  {
387
    $count = $topthreads;
345
    $count = $topthreads;
388
  }
346
  }
389
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
347
  printf "%s\n", &centred( "Top $count threads by size in KiB", 76 );
390
  print "=" x 76, "\n";
348
  print "=" x 76, "\n";
391
  $i = 0;
349
  $i = 0;
392
  foreach my $thread (
350
  foreach my $thread (
393
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
351
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
394
    keys %threads
352
    keys %threads
395
    )
353
    )
396
  {
354
  {
397
    my $name = substr( $thread, 0, 65 );
355
    my $name = substr( $thread, 0, 65 );
398
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
356
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
399
      $threads{$thread}{size} / 1024;    #/
357
      $threads{$thread}{size} / 1024;    #/
400
    last if ( ++$i == $count );
358
    last if ( ++$i == $count );
401
  }
359
  }
402
  print "\n", "=" x 76, "\n";
360
  print "\n", "=" x 76, "\n";
403
}
361
}
404
362
405
#################################
363
##################################
406
# show top 10 cross-posted groups
364
## Show top 10 cross-posted groups
407
#################################
365
##################################
408
unless ( $skipSec{7} )
366
unless ( $skipSec{7} )
409
{
367
{
410
  delete $crossposts{"$newsgroup_name"};    # don't include ours
368
  delete $crossposts{"$newsgroup_name"};    # don't include ours
411
  if ( keys %crossposts < $topcrossposts )
369
  if ( keys %crossposts < $topcrossposts )
412
  {
370
  {
413
    $count = keys %crossposts;
371
    $count = keys %crossposts;
414
  }
372
  }
415
  else
373
  else
416
  {
374
  {
417
    $count = $topcrossposts;
375
    $count = $topcrossposts;
418
  }
376
  }
419
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
377
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
420
  print "=" x 76, "\n";
378
  print "=" x 76, "\n";
421
  $i = 0;
379
  $i = 0;
422
  foreach
380
  foreach
423
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
381
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
424
  {
382
  {
425
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
383
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
426
      $crossposts{$name};
384
      $crossposts{$name};
427
    last if ( ++$i == $count );
385
    last if ( ++$i == $count );
428
  }
386
  }
429
  print "\n", "=" x 76, "\n";
387
  print "\n", "=" x 76, "\n";
430
}
388
}
-
 
389
431
#######################
390
#########################
432
#show agents and counts
391
## Show agents and counts
433
#######################
392
#########################
434
unless ( $skipSec{8} )
393
unless ( $skipSec{8} )
435
{
394
{
436
  if ( keys %agents < $topagents )
395
  if ( keys %agents < $topagents )
437
  {
396
  {
438
    $count = keys %agents;
397
    $count = keys %agents;
439
  }
398
  }
440
  else
399
  else
441
  {
400
  {
442
    $count = $topagents;
401
    $count = $topagents;
443
  }
402
  }
444
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
403
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
445
  print "=" x 76, "\n";
404
  print "=" x 76, "\n";
446
  $i = 0;
405
  $i = 0;
447
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
406
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
448
  {
407
  {
449
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
408
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
450
      $agents{$agent};
409
      $agents{$agent};
451
    last if ( ++$i == $count );
410
    last if ( ++$i == $count );
452
  }
411
  }
453
  print "\n", "=" x 76, "\n";
412
  print "\n", "=" x 76, "\n";
454
}
413
}
455
414
456
#######################
415
#######################
457
#show distinct agents
416
## Show distinct agents
458
#######################
417
#######################
459
unless ( $skipSec{9} )
418
unless ( $skipSec{9} )
460
{
419
{
461
  if ( keys %distinct_agent < $topagents )
420
  if ( keys %distinct_agent < $topagents )
462
  {
421
  {
463
    $count = keys %distinct_agent;
422
    $count = keys %distinct_agent;
464
  }
423
  }
465
  else
424
  else
466
  {
425
  {
467
    $count = $topagents;
426
    $count = $topagents;
468
  }
427
  }
469
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
428
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
470
  print "=" x 76, "\n";
429
  print "=" x 76, "\n";
471
  $i = 0;
430
  $i = 0;
472
  foreach my $agent (
431
  foreach my $agent (
473
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
432
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
474
    keys %distinct_agent
433
    keys %distinct_agent
475
    )
434
    )
476
  {
435
  {
477
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
436
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
478
      $distinct_agent{$agent},
437
      $distinct_agent{$agent},
479
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
438
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
480
    last if ( ++$i == $count );
439
    last if ( ++$i == $count );
481
  }
440
  }
482
  print "\n", "=" x 76, "\n";
441
  print "\n", "=" x 76, "\n";
483
}
442
}
484
443
485
##########################
444
############################
486
#show timezones and counts
445
## Show timezones and counts
487
##########################
446
############################
488
unless ( $skipSec{10} )
447
unless ( $skipSec{10} )
489
{
448
{
490
  if ( keys %tz < $toptz )
449
  if ( keys %tz < $toptz )
491
  {
450
  {
492
    $count = keys %tz;
451
    $count = keys %tz;
493
  }
452
  }
494
  else
453
  else
495
  {
454
  {
496
    $count = $toptz;
455
    $count = $toptz;
497
  }
456
  }
498
  printf "%s\n", &centred( "Top 10 time zones", 76 );
457
  printf "%s\n", &centred( "Top 10 time zones", 76 );
499
  print "=" x 76, "\n";
458
  print "=" x 76, "\n";
500
  $i = 0;
459
  $i = 0;
501
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
460
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
502
  {
461
  {
503
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
462
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
504
    last if ( ++$i == $count );
463
    last if ( ++$i == $count );
505
  }
464
  }
506
  print "\n", "=" x 76, "\n";
465
  print "\n", "=" x 76, "\n";
507
}
466
}
508
467
509
################################ SUBROUTINES ################################
468
################################ SUBROUTINES ################################
510
469
511
#######################################
470
########################################
512
# get current article's header and body
471
## Get current article's header and body
513
#######################################
472
########################################
514
sub getarticle
473
sub get_article
515
{
474
{
516
  %headers = ();    # dump old headers
475
  %headers = ();    # dump old headers
517
  my $filename = shift;    # get the name of the file
476
  my $filename = shift;    # get the name of the file
518
477
519
  # get stats about the file itself
478
  ## get stats about the file itself
520
  $filesize = -s $filename;    # get total size of file
479
  $filesize = -s $filename;    # get total size of file
521
  $totsize += $filesize;       # bump total sizes of all files
480
  $totsize += $filesize;       # bump total sizes of all files
522
481
523
  my $mtime = ( stat $filename )[9];
482
  my $mtime = ( stat $filename )[9];
524
  if ( $mtime < $earliest )
483
  if ( $mtime < $earliest )
525
  {
484
  {
526
    $earliest = $mtime;
485
    $earliest = $mtime;
527
  }
486
  }
528
  elsif ( $mtime > $latest )
487
  elsif ( $mtime > $latest )
529
  {
488
  {
530
    $latest = $mtime;
489
    $latest = $mtime;
531
  }
490
  }
532
491
533
  # now read the file
492
  ## now read the file
534
  open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
493
  open( my $FILE, '<', $filename ) or die "Can't open $filename: $!\n";
535
  while (<$FILE>)
494
  while (<$FILE>)
536
  {
495
  {
537
    $totheader += length($_);    # bump total header size
496
    $totheader += length($_);    # bump total header size
538
    last if (/^\s*$/);           # end of header?
497
    last if (/^\s*$/);           # end of header?
539
    if (/^([^:\s]*):\s+(.*)/)
498
    if (/^([^:\s]*):\s*(.*)/)
540
    {
499
    {
541
      my ( $key, $val ) = ( $1, $2 );
500
      my ( $key, $val ) = ( $1, $2 );
542
      $headers{$key} = decode( 'MIME-Header', $val );
501
      $headers{$key} = decode( 'MIME-Header', $val );
543
      $lcheader{ clean( lc($key) ) } = clean($val);
502
      $lcheader{ clean( lc($key) ) } = clean($val);
544
    }
503
    }
545
  }
504
  }
546
  @body = <$FILE>;               # slurp up body
505
  @body = <$FILE>;               # slurp up body
547
  close($FILE);
506
  close($FILE);
548
}    # getarticle
507
}    # get_article
549
508
550
###################################
509
####################################
551
# get data from the current article
510
## Get data from the current article
552
###################################
511
####################################
553
sub getdata
512
sub get_data
554
{
513
{
555
#### First, analyse header fields ####
514
#### First, analyse header fields ####
556
515
557
  # Set up this poster if not defined, get counts, sizes
516
  ## Set up this poster if not defined, get counts, sizes
558
  my $poster = $headers{From};    # get the poster's name
517
  my $poster = $headers{From};    # get the poster's name
-
 
518
 
-
 
519
  # Convert old to new format
-
 
520
  $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/;
-
 
521
 
-
 
522
  # Collapse whitespace
-
 
523
  $poster =~ s/\s+/ /;
-
 
524
 
-
 
525
  # Remove outer quotes
-
 
526
  $poster =~ s/^["'](.+?)["']\s+(.*)/$1 $2/;
-
 
527
 
559
  if ( !defined( $data{$poster} ) )
528
  if ( !defined( $data{$poster} ) )
560
  {                                   # seen this one before?
529
  {                               # seen this one before?
561
    $data{$poster}{agent}  = 'Unknown';    # comes after For: field
530
    $data{$poster}{agent}  = 'Unknown';    # comes after For: field
562
    $data{$poster}{orig}   = 0;
531
    $data{$poster}{orig}   = 0;
563
    $data{$poster}{quoted} = 0;
532
    $data{$poster}{quoted} = 0;
564
  }
533
  }
565
  $data{$poster}{count}++;                 # bump count for this poster
534
  $data{$poster}{count}++;                 # bump count for this poster
566
  $data{$poster}{size} += $filesize;       # total size of file
535
  $data{$poster}{size} += $filesize;       # total size of file
567
536
568
  # The User-Agent and/or X-Newsreader fields
537
  ## The User-Agent and/or X-Newsreader fields
569
  # for User-Agent by poster
538
  ## for User-Agent by poster
570
  if ( defined $lcheader{"user-agent"} )
539
  if ( defined $lcheader{"user-agent"} )
571
  {
540
  {
572
    $data{$poster}{agent} = $lcheader{"user-agent"};
541
    $data{$poster}{agent} = $lcheader{"user-agent"};
573
  }
542
  }
574
  if ( defined $lcheader{"x-newsreader"} )
543
  if ( defined $lcheader{"x-newsreader"} )
575
  {
544
  {
576
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
545
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
577
  }
546
  }
578
547
579
  # The User Agent for User-Agent by number of posts
548
  ## The User Agent for User-Agent by number of posts
580
  my $UA = "unknown";
549
  my $UA = "unknown";
581
  foreach my $keys ( keys %lcheader )
550
  foreach my $keys ( keys %lcheader )
582
  {
551
  {
583
    if ( defined $lcheader{'user-agent'} )
552
    if ( defined $lcheader{'user-agent'} )
584
    {
553
    {
585
      $UA = $lcheader{'user-agent'};
554
      $UA = $lcheader{'user-agent'};
586
    }
555
    }
587
    elsif ( defined $lcheader{"x-newsreader"} )
556
    elsif ( defined $lcheader{"x-newsreader"} )
588
    {
557
    {
589
      $UA = $lcheader{"x-newsreader"};
558
      $UA = $lcheader{"x-newsreader"};
590
    }
559
    }
591
    elsif ( defined $lcheader{'x-mailer'} )
560
    elsif ( defined $lcheader{'x-mailer'} )
592
    {
561
    {
593
      $UA = $lcheader{'x-mailer'};
562
      $UA = $lcheader{'x-mailer'};
594
    }
563
    }
595
    elsif (
564
    elsif (
596
      ( defined $lcheader{'organization'} )
565
      ( defined $lcheader{'organization'} )
597
      && ( $lcheader{'organization'} =~
566
      && ( $lcheader{'organization'} =~
598
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
567
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
599
      )
568
      )
600
    {
569
    {
601
      $UA = $lcheader{'organization'};
570
      $UA = $lcheader{'organization'};
602
    }
571
    }
603
    elsif ( $lcheader{'message-id'} =~ /pine/i )
572
    elsif ( $lcheader{'message-id'} =~ /pine/i )
604
    {
573
    {
605
      $UA = "Pine";
574
      $UA = "Pine";
606
    }    ## Hopefully found UA, else set to unknown
575
    }    ## Hopefully found UA, else set to unknown
607
  }
576
  }
608
577
609
  $UA = clean($UA);
578
  $UA = clean($UA);
610
  $UA = get_agent($UA);
579
  $UA = get_agent($UA);
611
580
612
  sub get_agent
581
  sub get_agent
613
  {
582
  {
614
    my $raw   = shift;
583
    my $raw   = shift;
615
    my $agent = $raw;
584
    my $agent = $raw;
616
585
617
    ## strip http
586
    ## strip http
618
    if ( $raw =~ /.*http.*/ )
587
    if ( $raw =~ /.*http.*/ )
619
    {
588
    {
620
      $raw =~ s!posted via!!i;
589
      $raw =~ s!posted via!!i;
621
      $raw =~ s!http://!!g;
590
      $raw =~ s!http://!!g;
622
      $raw =~ s!/!!g;
591
      $raw =~ s!/!!g;
623
      $raw =~ s! !!g;
592
      $raw =~ s! !!g;
624
    }
593
    }
625
594
626
    ## Fix Outlook from Mac
595
    ## Fix Outlook from Mac
627
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
596
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
628
597
629
    ## Pick out the popular agents
598
    ## Pick out the popular agents
-
 
599
    if (
630
    if ( $raw =~ /(outlook express)/i
600
           $raw =~ /(outlook express)/i
-
 
601
        || $raw =~ /(windows mail)/i
631
      || $raw =~ /(microplanet gravity)/i
602
        || $raw =~ /(microplanet gravity)/i
632
      || $raw =~ /(news rover)/i
603
        || $raw =~ /(news rover)/i
633
      || $raw =~ /(forte agent)/i
604
        || $raw =~ /(forte agent)/i
634
      || $raw =~ /(forte free agent)/i )
605
        || $raw =~ /(forte free agent)/i
-
 
606
      )
635
    {
607
    {
636
      $agent = $1;
608
      $agent = $1;
637
    }
609
    }
638
    elsif (
610
    elsif (
639
      $raw =~ /^(
611
      $raw =~ /^(
640
        pan
612
        pan
641
       |sylpheed
613
       |sylpheed
642
       |slrn
614
       |slrn
643
       |mozilla
615
       |mozilla
644
       |knode
616
       |knode
645
       |tin
617
       |tin
646
       |hamster
618
       |hamster
647
       |xrn
619
       |xrn
648
       |xnews
620
       |xnews
649
       |aol
621
       |aol
650
       |gnus
622
       |gnus
651
       |krn
623
       |krn
652
       |macsoup
624
       |macsoup
653
       |messenger
625
       |messenger
654
       |openxp
626
       |openxp
655
       |pine
627
       |pine
656
       |thoth
628
       |thoth
657
       |turnpike
629
       |turnpike
658
       |winvn
630
       |winvn
659
       |vsoup
631
       |vsoup
660
       |google
632
       |google
661
       |supernews
633
       |supernews
662
       |nn
634
       |nn
663
       |rn
635
       |rn
664
       |007
636
       |007
665
       |webtv
637
       |webtv
666
       |compuserve
638
       |compuserve
667
       )/ix
639
       )/ix
668
      )
640
      )
669
    {
641
    {
670
      $agent = $1;
642
      $agent = $1;
671
    }
643
    }
672
    else
644
    else
673
    {
645
    {
674
      ## Clean up unknown agents
646
      ## Clean up unknown agents
675
      if ( $raw =~ m!^(.*?)/! )
647
      if ( $raw =~ m!^(.*?)/! )
676
      {
648
      {
677
        $agent = $1;
649
        $agent = $1;
678
      }
650
      }
679
      elsif ( $raw =~ /^(\w*)\d.*/ )
651
      elsif ( $raw =~ /^(\w*)\d.*/ )
680
      {
652
      {
681
        $agent = $1;
653
        $agent = $1;
682
      }
654
      }
683
    }
655
    }
684
656
685
    $distinct_agent{$agent}++;
657
    $distinct_agent{$agent}++;
686
    return $agent;
658
    return $agent;
687
  }
659
  }
688
660
689
  # Get all cross-posted newsgroups
661
  ## Get all cross-posted newsgroups
690
  for ( split /,/, $headers{"Newsgroups"} )
662
  for ( split /,/, $headers{"Newsgroups"} )
691
  {
663
  {
692
    $crossposts{$_}++;    # bump count for each
664
    $crossposts{$_}++;    # bump count for each
693
  }
665
  }
694
666
695
  # Get threads
667
  ## Get threads
696
  my $thread = $headers{"Subject"};
668
  my $thread = $headers{"Subject"};
697
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
669
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
698
  $thread =~ s/\s+/ /g;     # collapse whitespace
670
  $thread =~ s/\s+/ /g;     # collapse whitespace
699
  $threads{$thread}{count} += 1;            # bump count of this subject
671
  $threads{$thread}{count} += 1;            # bump count of this subject
700
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
672
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
701
673
702
  # Is this an original post or a reply?
674
  ## Is this an original post or a reply?
703
  if ( defined $headers{"References"} )
675
  if ( defined $headers{"References"} )
704
  {
676
  {
705
    $replies++;
677
    $replies++;
706
  }
678
  }
707
  else
679
  else
708
  {
680
  {
709
    $origposts++;
681
    $origposts++;
710
  }
682
  }
711
683
712
  # Get the time zone
684
  ## Get the time zone
713
  $_ = $headers{"Date"};
685
  $_ = $headers{"Date"};
714
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
686
  my ($tz) = /\d\d:\d\d(?::\d\d)?\s+(.*)/;
715
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
687
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
716
  {
688
  {
717
    $tz = "UTC";
689
    $tz = "UTC";
718
  }
690
  }
719
  $tz{$tz}++;
691
  $tz{$tz}++;
720
692
721
#### Now analyse the body text ####
693
#### Now analyse the body text ####
722
  my $insig = 0;
694
  my $insig = 0;
723
  for (@body)
695
  for (@body)
724
  {
696
  {
725
    $totbody += length($_);    # bump total body size
697
    $totbody += length($_);    # bump total body size
726
    next if (/^$>/);           # don't count blank lines in body
698
    next if (/^$>/);           # don't count blank lines in body
727
    if ( $insig == 1 )
699
    if ( $insig == 1 )
728
    {
700
    {
729
      $totsig += length($_);    # bump total sig size
701
      $totsig += length($_);    # bump total sig size
730
702
731
      # Bill Unruh uses ] quotes, and another poster uses ::
703
      ## Bill Unruh uses ] quotes, and another poster uses ::
732
    }
704
    }
733
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
705
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
734
    {                           # are we in a quote line?
706
    {                           # are we in a quote line?
735
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
707
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
736
      $totquoted += length($_);
708
      $totquoted += length($_);
737
    }
709
    }
738
    elsif (/-- /)
710
    elsif (/-- /)
739
    {
711
    {
740
      $insig = 1;
712
      $insig = 1;
741
    }
713
    }
742
    else
714
    else
743
    {
715
    {
744
716
745
      # we must be processing an original line
717
      ## We must be processing an original line
746
      $data{$poster}{orig} += length($_);      # bump count of original chrs
718
      $data{$poster}{orig} += length($_);      # bump count of original chrs
747
      $totorig += length($_);
719
      $totorig += length($_);
748
    }
720
    }
749
  }    # end for (@body)
721
  }    # end for (@body)
750
722
751
}    # getdata
723
}    # get_data
752
724
753
########################################
725
#########################################
754
# Count the User-Agents used, collapsing
726
## Count the User-Agents used, collapsing
755
# different versions into one per agent.
727
## different versions into one per agent.
756
########################################
728
#########################################
757
sub countagents
729
sub count_agents
758
{
730
{
759
POSTER:
731
POSTER:
760
  foreach my $poster ( keys %data )
732
  foreach my $poster ( keys %data )
761
  {
733
  {
762
    foreach my $agent_name ( keys %distinct_agent )
734
    foreach my $agent_name ( keys %distinct_agent )
763
    {    # check against known ones
735
    {    # check against known ones
764
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
736
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
765
      {
737
      {
766
        $agents{$agent_name}++;
738
        $agents{$agent_name}++;
767
        next POSTER;
739
        next POSTER;
768
      }
740
      }
769
    }
741
    }
770
    $agents{ $data{$poster}{agent} }++;
742
    $agents{ $data{$poster}{agent} }++;
771
  }
743
  }
772
}    # countagents
744
}    # count_agents
773
745
774
############################################
746
#############################################
775
# set orig/total percentages for all posters
747
## Set orig/total percentages for all posters
776
############################################
748
#############################################
777
sub fixpercent
749
sub fix_percent
778
{
750
{
779
  foreach my $poster ( keys %data )
751
  foreach my $poster ( keys %data )
780
  {
752
  {
781
    my $percent = 100;
753
    my $percent = 100;
782
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
754
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
783
    {
755
    {
784
      $percent =
756
      $percent =
785
        $data{$poster}{orig} * 100 /
757
        $data{$poster}{orig} * 100 /
786
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
758
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
787
    }
759
    }
788
    elsif ( $data{$poster}{orig} == 0 )
760
    elsif ( $data{$poster}{orig} == 0 )
789
    {
761
    {
790
      $percent = 0;
762
      $percent = 0;
791
    }
763
    }
792
    $data{$poster}{percent} = $percent;
764
    $data{$poster}{percent} = $percent;
793
  }
765
  }
794
}
766
}
795
767
796
##############################
768
###############################
797
# right pad a string with '.'s
769
## Right pad a string with '.'s
798
##############################
770
###############################
799
sub rpad
771
sub rpad
800
{
772
{
801
  # get text to pad, length to pad, pad chr
773
  ## Get text to pad, length to pad, pad chr
802
  my ( $text, $pad_len, $pad_chr ) = @_;
774
  my ( $text, $pad_len, $pad_chr ) = @_;
803
775
804
  ### DEBUG
776
  ## DEBUG
805
#  printf "|%s| = %d\n", $text, length($text);
777
#printf "|%s| = %d\n", $text, length($text);
806
778
807
  if ( length($text) > $pad_len )
779
  if ( length($text) > $pad_len )
808
  {
780
  {
809
    $text = substr( $text, 0, $pad_len );
781
    $text = substr( $text, 0, $pad_len );
810
  }
782
  }
811
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
783
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
812
  return $padded;
784
  return $padded;
813
}
785
}
814
786
815
#################
787
##################
816
# centre a string
788
## Centre a string
817
#################
789
##################
818
sub centred
790
sub centred
819
{
791
{
820
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
792
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
821
  my $pad_len = ( $width - length($text) ) / 2;    #/
793
  my $pad_len = ( $width - length($text) ) / 2;    #/
822
  my $centred = " " x $pad_len . $text;
794
  my $centred = " " x $pad_len . $text;
823
  return $centred;
795
  return $centred;
824
}
796
}
825
797
826
##########################
798
###########################
827
# put commas into a number
799
## Put commas into a number
828
##########################
800
###########################
829
sub commify
801
sub commify
830
{
802
{
831
  $_ = shift;
803
  local $_ = shift;
832
  1 while s/^(-?\d+)(\d{3})/$1,$2/;
804
  1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
833
  return $_;
805
  return $_;
834
}
806
}
835
807
836
#########################
808
################################################################
837
# clean
809
## Returns a string with leading and trailing whitespace removed
838
#########################
810
################################################################
839
sub clean
811
sub clean
840
{
812
{
841
  my $dirty = shift;
813
  my $dirty = shift;
842
  my $clean = $dirty;
814
  my $clean = $dirty;
843
  $clean =~ s/^\s*//;
-
 
844
  $clean =~ s/\s*$//;
815
  $clean =~ s/^\s*|\s*$//g;
845
816
846
  return $clean;
817
  return $clean;
847
}
818
}
848
819
849
sub usage
820
sub usage
850
{
821
{
851
-
 
852
  print "usage: newstat.pl newsgroupname\n";
822
  print "usage: newstat.pl newsgroupname\n";
853
  exit 1;
823
  exit 1;
854
}
824
}
855
825
856
###################################
826
##################################
857
# Write data structures to a file #
827
## Write data structures to a file
858
###################################
828
##################################
859
sub writedata
829
sub write_data
860
{
830
{
-
 
831
  open my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA"
861
  open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
832
    or die "Can't create XDATA: $!\n";
862
  print $OUTF "Data collected from alt.os.linux.mandrake\n\n";
833
  print $OUTF "Data collected from $newsgroup_name\n\n";
863
  print $OUTF
834
  print $OUTF
864
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
835
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
865
  foreach my $name ( keys %data )
836
  foreach my $name ( keys %data )
866
  {
837
  {
867
    print $OUTF
838
    print $OUTF
868
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
839
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
869
  }
840
  }
870
  print $OUTF
841
  print $OUTF
871
"============================================================================\n";
842
"============================================================================\n";
872
  print $OUTF "Thread subjects\n";
843
  print $OUTF "Thread subjects\n";
873
  print $OUTF
844
  print $OUTF
874
"----------------------------------------------------------------------------\n";
845
"----------------------------------------------------------------------------\n";
875
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
846
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
876
  {
847
  {
877
    print $OUTF
848
    print $OUTF
878
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
849
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
879
  }
850
  }
880
  print $OUTF
851
  print $OUTF
881
"============================================================================\n";
852
"============================================================================\n";
882
  print $OUTF "Cross-posts\n";
853
  print $OUTF "Cross-posts\n";
883
  print $OUTF
854
  print $OUTF
884
"----------------------------------------------------------------------------\n";
855
"----------------------------------------------------------------------------\n";
885
  foreach my $name ( sort keys %crossposts )
856
  foreach my $name ( sort keys %crossposts )
886
  {
857
  {
887
    print $OUTF "$name : $crossposts{$name}\n";
858
    print $OUTF "$name : $crossposts{$name}\n";
888
  }
859
  }
889
  print $OUTF print $OUTF
860
  print $OUTF
890
"============================================================================\n";
861
"============================================================================\n";
891
  print $OUTF "User agents\n";
862
  print $OUTF "User agents\n";
892
  print $OUTF
863
  print $OUTF
893
"----------------------------------------------------------------------------\n";
864
"----------------------------------------------------------------------------\n";
894
  foreach my $name ( sort keys %agents )
865
  foreach my $name ( sort keys %agents )
895
  {
866
  {
896
    print $OUTF "$name : $agents{$name}\n";
867
    print $OUTF "$name : $agents{$name}\n";
897
  }
868
  }
898
  print $OUTF
869
  print $OUTF
899
"============================================================================\n";
870
"============================================================================\n";
900
  print $OUTF "Time zones\n";
871
  print $OUTF "Time zones\n";
901
  print $OUTF
872
  print $OUTF
902
"----------------------------------------------------------------------------\n";
873
"----------------------------------------------------------------------------\n";
903
  foreach my $name ( sort keys %tz )
874
  foreach my $name ( sort keys %tz )
904
  {
875
  {
905
    print $OUTF "$name : $tz{$name}\n";
876
    print $OUTF "$name : $tz{$name}\n";
906
  }
877
  }
907
  close $OUTF;
878
  close $OUTF;
908
}    # writedata
879
}    # write_data
909
 
880