Subversion Repositories LCARS

Rev

Rev 10 | Rev 13 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 10 Rev 11
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
use utf8;
4
use utf8;
5
use encoding 'utf-8';
5
use encoding 'utf-8';
6
use Encode;
6
use Encode;
7
7
8
###########################
8
###########################
9
# newsstat.pl version 0.4.2
9
# newsstat.pl version 0.4.2
10
10
11
############################################################################
11
############################################################################
12
# Collect statistics about a newsgroup (specified by first argument) in
12
# Collect statistics about a newsgroup (specified by first argument) in
13
# the local news spool. Check all articles in the last 30-day period.
13
# the local news spool. Check all articles in the last 30-day period.
14
# Rank posters by number of posts and by volume of posts, report on top and
14
# Rank posters by number of posts and by volume of posts, report on top and
15
# bottom 20 posters. Show their name, number of posts, size of posts,
15
# bottom 20 posters. Show their name, number of posts, size of posts,
16
# percentage of quoted lines. Rank user-agents used, by poster rather than
16
# percentage of quoted lines. Rank user-agents used, by poster rather than
17
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
17
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
18
#
18
#
19
# (Numbers and paths can be configured below.  -- PE)
19
# (Numbers and paths can be configured below.  -- PE)
20
############################################################################
20
############################################################################
21
21
22
############################################################################
22
############################################################################
23
#                       RECENT CHANGES                                     #
23
#                       RECENT CHANGES                                     #
24
# 2011-10-03  PE  - Use more compatible shebang
24
# 2011-10-03  PE  - Use more compatible shebang
25
#                 - Fixed some Perl::Critic-ized code
25
#                 - Fixed some Perl::Critic-ized code
26
#                 - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
26
#                 - Fixed wrong indent for non-ASCII names
27
#                 - Formatted source code
27
#                 - Formatted source code
28
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
28
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
29
#                 - Use warnings, utf8 (just in case)
29
#                 - Use warnings, utf8 (just in case)
30
#                 - Documentation update
30
#                 - Documentation update
31
# N/A         NN  - Take newsgroup name as argument
31
# N/A         NN  - Take newsgroup name as argument
32
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
32
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
33
#                 - Allow command line flags for subtracting
33
#                 - Allow command line flags for subtracting
34
#                   output if not pertinent for a group
34
#                   output if not pertinent for a group
35
# 2002-11-09  NN  - Put Garry's writedata() function back in.
35
# 2002-11-09  NN  - Put Garry's writedata() function back in.
36
#                 - added "rn" to my list of UA's
36
#                 - added "rn" to my list of UA's
37
#                 - Started using %distinct_agent for both User agent
37
#                 - Started using %distinct_agent for both User agent
38
#                   sections
38
#                   sections
39
#                 - named it newsstat.pl version 0.3
39
#                 - named it newsstat.pl version 0.3
40
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
40
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
41
#                   mtime rather than ctime, and simplifying the logic
41
#                   mtime rather than ctime, and simplifying the logic
42
# 2002-11-05  NN  - moved user configurations to the top
42
# 2002-11-05  NN  - moved user configurations to the top
43
#                 - fixed the cross-posting section
43
#                 - fixed the cross-posting section
44
#                 - introduced the $newsgroup_name variable which
44
#                 - introduced the $newsgroup_name variable which
45
#                   later becomes $news$group
45
#                   later becomes $news$group
46
#                 - changed $name to $agent_name in countagents()
46
#                 - changed $name to $agent_name in countagents()
47
#
47
#
48
# Contributors
48
# Contributors
49
# -------------
49
# -------------
50
# NN  Nomen nominandum (name to be determined later)
50
# NN  Nomen nominandum (name to be determined later)
51
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
51
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
52
52
53
########### TODO #############
53
########### TODO #############
54
# Commas in bottom section of report
54
# Commas in bottom section of report
55
# Show date the figures were compiled
55
# Show date the figures were compiled
56
# No. of HTML articles (Content-Type: text/html)
56
# No. of HTML articles (Content-Type: text/html)
57
# No. of quoted sigs (/>\s*-- /)
57
# No. of quoted sigs (/>\s*-- /)
58
# Per cent of top-posted articles
58
# Per cent of top-posted articles
59
# Top 10 cross-posters
59
# Top 10 cross-posters
60
# Top 20 news posting hosts (from Path)
60
# Top 20 news posting hosts (from Path)
61
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
61
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
62
# Count *all* User Agents that each poster uses
62
# Count *all* User Agents that each poster uses
63
# What do we do about Bill Unruh's ] quote style?
63
# What do we do about Bill Unruh's ] quote style?
64
# Change the way dates/times are checked
64
# Change the way dates/times are checked
65
# include % share in posters by no. of arts
65
# include % share in posters by no. of arts
66
# include % share in posters by size
66
# include % share in posters by size
67
# Total, orig & quoted lines by user agent with per cent
67
# Total, orig & quoted lines by user agent with per cent
68
# Take more arguments
68
# Take more arguments
69
#######################################################
69
#######################################################
70
70
71
###################### USER CONFIGURATIONS ############################
71
###################### USER CONFIGURATIONS ############################
72
72
73
# The name of the group to do stats for
73
# The name of the group to do stats for
74
my $newsgroup_name = $ARGV[0];
74
my $newsgroup_name = $ARGV[0];
75
$newsgroup_name or &usage;
75
$newsgroup_name or &usage;
76
76
77
# Check for removal flags
77
# Check for removal flags
78
my $ix;
78
my $ix;
79
my $j;
79
my $j;
80
my %skipSec;
80
my %skipSec;
81
my @skiplist;
81
my @skiplist;
82
my $args = @ARGV;
82
my $args = @ARGV;
83
for ( $ix = 1 ; $ix < $args ; $ix++ )
83
for ( $ix = 1 ; $ix < $args ; $ix++ )
84
{
84
{
85
  $j = $ix + 1;
85
  $j = $ix + 1;
86
  if ( $ARGV[$ix] eq "-x" )
86
  if ( $ARGV[$ix] eq "-x" )
87
  {
87
  {
88
    @skiplist = split( ",", $ARGV[$j] );
88
    @skiplist = split( ",", $ARGV[$j] );
89
  }
89
  }
90
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
90
  elsif ( $ARGV[$ix] =~ /-x(\d.*)/ )
91
  {
91
  {
92
    @skiplist = split( ",", $1 );
92
    @skiplist = split( ",", $1 );
93
  }
93
  }
94
}
94
}
95
foreach (@skiplist)
95
foreach (@skiplist)
96
{
96
{
97
  $skipSec{$_} = 1;
97
  $skipSec{$_} = 1;
98
}
98
}
99
99
100
# Leafnode users will want /var/spool/news for this variable.
100
# Leafnode users will want /var/spool/news for this variable.
101
my $news = "/var/spool/news/";
101
my $news = "/var/spool/news/";
102
102
103
# How many days are we doing statistics for?
103
# How many days are we doing statistics for?
104
my $numdays = 30;
104
my $numdays = 30;
105
105
106
# no. of agents we list
106
# no. of agents we list
107
my $topagents = 10;
107
my $topagents = 10;
108
108
109
# no. of threads we want to know about
109
# no. of threads we want to know about
110
my $topthreads = 20;
110
my $topthreads = 20;
111
111
112
# no. of top or bottom posters to show
112
# no. of top or bottom posters to show
113
my $topposters = 20;
113
my $topposters = 20;
114
114
115
# no. of cross-posted threads to show
115
# no. of cross-posted threads to show
116
my $topcrossposts = 10;
116
my $topcrossposts = 10;
117
117
118
# no. of time zones to show
118
# no. of time zones to show
119
my $toptz = 10;
119
my $toptz = 10;
120
120
121
###################### DATA STRUCTURES ######################
121
###################### DATA STRUCTURES ######################
122
my $group = $newsgroup_name;
122
my $group = $newsgroup_name;
123
$group =~ s!\.!/!g;
123
$group =~ s!\.!/!g;
124
my %data;          # name, count, agent, total, orig, quoted
124
my %data;          # name, count, agent, total, orig, quoted
125
my %threads;       # subject, count
125
my %threads;       # subject, count
126
my %crossposts;    # group, count
126
my %crossposts;    # group, count
127
my %tz;            # timezones by count
127
my %tz;            # timezones by count
128
my %headers;       # holds header of current article
128
my %headers;       # holds header of current article
129
my %lcheader;      # holds lowercase headers
129
my %lcheader;      # holds lowercase headers
130
my @body;          # holds body of current article
130
my @body;          # holds body of current article
131
my @sig;           # holds sig text;
131
my @sig;           # holds sig text;
132
my $totalposts;    # total no. of posts considered
132
my $totalposts;    # total no. of posts considered
133
my $filename;      # name of current article file
133
my $filename;      # name of current article file
134
my $filesize;      # size of current article file
134
my $filesize;      # size of current article file
135
my $earliest;      # earliest article we have found
135
my $earliest;      # earliest article we have found
136
my $latest;        # latest article we have found
136
my $latest;        # latest article we have found
137
my $poster;        # poster we are dealing with
137
my $poster;        # poster we are dealing with
138
my $totsize   = 0; # holds total sizes of all files
138
my $totsize   = 0; # holds total sizes of all files
139
my $totheader = 0; # total size of header material
139
my $totheader = 0; # total size of header material
140
my $totbody   = 0; # total size of body material
140
my $totbody   = 0; # total size of body material
141
my $totsig    = 0; # total size of sig material
141
my $totsig    = 0; # total size of sig material
142
my $totorig   = 0; # total size of original material
142
my $totorig   = 0; # total size of original material
143
my $totquoted = 0; # total size of quoted material
143
my $totquoted = 0; # total size of quoted material
144
my $origposts = 0; # total no. of original posts
144
my $origposts = 0; # total no. of original posts
145
my $replies   = 0; # total no. of replies
145
my $replies   = 0; # total no. of replies
146
my $i;             # general purpose
146
my $i;             # general purpose
147
my %distinct_agent;
147
my %distinct_agent;
148
my %agents =       # used to hold counts of User Agents used
148
my %agents =       # used to hold counts of User Agents used
149
  (
149
  (
150
  "KNode"                     => 0,
150
  "KNode"                     => 0,
151
  "Pan"                       => 0,
151
  "Pan"                       => 0,
152
  "Mozilla"                   => 0,
152
  "Mozilla"                   => 0,
153
  "Sylpheed"                  => 0,
153
  "Sylpheed"                  => 0,
154
  "Gnus"                      => 0,
154
  "Gnus"                      => 0,
155
  "Forte Agent"               => 0,
155
  "Forte Agent"               => 0,
156
  "Forte Free Agent"          => 0,
156
  "Forte Free Agent"          => 0,
157
  "MicroPlanet Gravity"       => 0,
157
  "MicroPlanet Gravity"       => 0,
158
  "Microsoft Outlook Express" => 0,
158
  "Microsoft Outlook Express" => 0,
159
  "Xnews"                     => 0,
159
  "Xnews"                     => 0,
160
  "slrn"                      => 0,
160
  "slrn"                      => 0,
161
  "tin"                       => 0,
161
  "tin"                       => 0,
162
  "rn"                        => 0,
162
  "rn"                        => 0,
163
  "NN"                        => 0,
163
  "NN"                        => 0,
164
  "MacSOUP"                   => 0,
164
  "MacSOUP"                   => 0,
165
  "Foorum"                    => 0,
165
  "Foorum"                    => 0,
166
  "MT-NewsWatcher"            => 0,
166
  "MT-NewsWatcher"            => 0,
167
  "News Rover"                => 0,
167
  "News Rover"                => 0,
168
  "WebTV"                     => 0,
168
  "WebTV"                     => 0,
169
  "Compuserver"               => 0,
169
  "Compuserver"               => 0,
170
  "VSoup"                     => 0
170
  "VSoup"                     => 0
171
  );
171
  );
172
172
173
######################## MAIN CODE ########################
173
######################## MAIN CODE ########################
174
$! = 1;
174
$! = 1;
175
175
176
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
176
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
177
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
177
opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n";
178
while ( defined( $filename = readdir(DIR) ) )
178
while ( defined( $filename = readdir(DIR) ) )
179
{
179
{
180
  %lcheader = ();
180
  %lcheader = ();
181
  next unless -f $filename;    # only want real files
181
  next unless -f $filename;    # only want real files
182
  next if ( $filename eq ".overview" );    # real articles only
182
  next if ( $filename eq ".overview" );    # real articles only
183
  next if ( -M $filename > $numdays );     # only want articles <= a certain age
183
  next if ( -M $filename > $numdays );     # only want articles <= a certain age
184
  $earliest = ( stat $filename )[9] unless defined($earliest);
184
  $earliest = ( stat $filename )[9] unless defined($earliest);
185
  $latest   = ( stat $filename )[9] unless defined($latest);
185
  $latest   = ( stat $filename )[9] unless defined($latest);
186
  &getarticle($filename);                  # read in the article
186
  &getarticle($filename);                  # read in the article
187
  &getdata;                                # grab the data from the article
187
  &getdata;                                # grab the data from the article
188
  $totalposts++;                           # bump count of articles considered
188
  $totalposts++;                           # bump count of articles considered
189
}
189
}
190
closedir(DIR);                             # finished with the directory
190
closedir(DIR);                             # finished with the directory
191
191
192
# post-processing
192
# post-processing
193
&countagents;    # count agents, collapsing versions
193
&countagents;    # count agents, collapsing versions
194
&fixpercent;     # check percentages orig/total for posters
194
&fixpercent;     # check percentages orig/total for posters
195
195
196
&writedata;
196
&writedata;
197
197
198
#################### DISPLAY RESULTS #####################
198
#################### DISPLAY RESULTS #####################
199
print "=" x 76, "\n";
199
print "=" x 76, "\n";
200
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
200
printf "%s\n", &centred( "Analysis of posts to $newsgroup_name", 76 );
201
print "=" x 76, "\n";
201
print "=" x 76, "\n";
202
printf "%s\n",
202
printf "%s\n",
203
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
203
  &centred( "(stats compiled with a script by Garry Knight et al.)", 76 );
204
print "\n\n";
204
print "\n\n";
205
printf "Total posts considered: %s over %d days\n", commify($totalposts),
205
printf "Total posts considered: %s over %d days\n", commify($totalposts),
206
  $numdays;
206
  $numdays;
207
printf "Earliest article: %s\n",               scalar localtime($earliest);
207
printf "Earliest article: %s\n",               scalar localtime($earliest);
208
printf "Latest article:   %s\n",               scalar localtime($latest);
208
printf "Latest article:   %s\n",               scalar localtime($latest);
209
printf "Original articles: %s, replies: %s\n", commify($origposts),
209
printf "Original articles: %s, replies: %s\n", commify($origposts),
210
  commify($replies);
210
  commify($replies);
211
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
211
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
212
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
212
  commify( int( $totsize / 1024 ) ), $totsize / 1048576;    #
213
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
213
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
214
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
214
  commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
215
  commify( int( $totsize / $totalposts ) );
215
  commify( int( $totsize / $totalposts ) );
216
my $count = keys %data;
216
my $count = keys %data;
217
printf "Total headers: %s KB  bodies: %s KB\n",
217
printf "Total headers: %s KB  bodies: %s KB\n",
218
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
218
  commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
219
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
219
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
220
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
220
  commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
221
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
221
  ( $totorig * 100 ) / ( $totorig + $totquoted ),
222
  commify( int( $totsig / 1024 ) );
222
  commify( int( $totsig / 1024 ) );
223
printf "Total number of posters: %s, average %s bytes per poster\n",
223
printf "Total number of posters: %s, average %s bytes per poster\n",
224
  commify($count), commify( int( $totsize / $count ) );     #/
224
  commify($count), commify( int( $totsize / $count ) );     #/
225
$count = keys %threads;
225
$count = keys %threads;
226
printf "Total number of threads: %s, average %s bytes per thread\n",
226
printf "Total number of threads: %s, average %s bytes per thread\n",
227
  commify($count), commify( int( $totsize / $count ) );     #/
227
  commify($count), commify( int( $totsize / $count ) );     #/
228
printf "Total number of User-Agents: %d\n", scalar keys %agents;
228
printf "Total number of User-Agents: %d\n", scalar keys %agents;
229
print "\n", "=" x 76, "\n";
229
print "\n", "=" x 76, "\n";
230
230
231
###############################
231
###############################
232
# show posters by article count  Sec 1;
232
# show posters by article count  Sec 1;
233
###############################
233
###############################
234
unless ( $skipSec{1} )
234
unless ( $skipSec{1} )
235
{
235
{
236
  if ( keys %data < $topposters )
236
  if ( keys %data < $topposters )
237
  {
237
  {
238
    $count = keys %data;
238
    $count = keys %data;
239
  }
239
  }
240
  else
240
  else
241
  {
241
  {
242
    $count = $topposters;
242
    $count = $topposters;
243
  }
243
  }
244
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
244
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
245
  print "=" x 76, "\n";
245
  print "=" x 76, "\n";
246
  $i = 0;
246
  $i = 0;
247
  foreach
247
  foreach
248
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
248
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
249
  {
249
  {
250
    my $name = substr( $poster, 0, 65 );
250
    my $name = substr( $poster, 0, 65 );
251
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
251
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
252
      $data{$poster}{count};
252
      $data{$poster}{count};
253
    last if ( ++$i == $count );
253
    last if ( ++$i == $count );
254
  }
254
  }
255
  print "\n", "=" x 76, "\n";
255
  print "\n", "=" x 76, "\n";
256
}
256
}
257
257
258
################################
258
################################
259
# show posters by size in Kbytes Sec 2;
259
# show posters by size in Kbytes Sec 2;
260
################################
260
################################
261
unless ( $skipSec{2} )
261
unless ( $skipSec{2} )
262
{
262
{
263
  if ( keys %data < $topposters )
263
  if ( keys %data < $topposters )
264
  {
264
  {
265
    $count = keys %data;
265
    $count = keys %data;
266
  }
266
  }
267
  else
267
  else
268
  {
268
  {
269
    $count = $topposters;
269
    $count = $topposters;
270
  }
270
  }
271
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
271
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
272
  print "=" x 76, "\n";
272
  print "=" x 76, "\n";
273
  $i = 0;
273
  $i = 0;
274
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
274
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
275
  {
275
  {
276
    my $name = substr( $poster, 0, 62 );
276
    my $name = substr( $poster, 0, 62 );
277
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
277
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
278
      $data{$poster}{size} / 1024;    #/
278
      $data{$poster}{size} / 1024;    #/
279
    last if ( ++$i == $count );
279
    last if ( ++$i == $count );
280
  }
280
  }
281
  print "\n", "=" x 76, "\n";
281
  print "\n", "=" x 76, "\n";
282
}
282
}
283
283
284
####################################
284
####################################
285
# show top posters for original text
285
# show top posters for original text
286
####################################
286
####################################
287
unless ( $skipSec{3} )
287
unless ( $skipSec{3} )
288
{
288
{
289
  if ( keys %data < $topposters )
289
  if ( keys %data < $topposters )
290
  {
290
  {
291
    $count = keys %data;
291
    $count = keys %data;
292
  }
292
  }
293
  else
293
  else
294
  {
294
  {
295
    $count = $topposters;
295
    $count = $topposters;
296
  }
296
  }
297
  printf "%s\n",
297
  printf "%s\n",
298
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
298
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
299
  print "=" x 76, "\n";
299
  print "=" x 76, "\n";
300
  $i = 0;
300
  $i = 0;
301
  foreach my $poster (
301
  foreach my $poster (
302
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
302
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
303
    keys %data
303
    keys %data
304
    )
304
    )
305
  {
305
  {
306
    next if $data{$poster}{quoted} == 0;
306
    next if $data{$poster}{quoted} == 0;
307
    next if $data{$poster}{count} < 5;
307
    next if $data{$poster}{count} < 5;
308
    my $name = substr( $poster, 0, 63 );
308
    my $name = substr( $poster, 0, 63 );
309
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
309
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
310
      $data{$poster}{percent};
310
      $data{$poster}{percent};
311
    last if ( ++$i == $count );
311
    last if ( ++$i == $count );
312
  }
312
  }
313
  print "\n", "=" x 76, "\n";
313
  print "\n", "=" x 76, "\n";
314
}
314
}
315
315
316
#######################################
316
#######################################
317
# show bottom posters for original text
317
# show bottom posters for original text
318
#######################################
318
#######################################
319
unless ( $skipSec{4} )
319
unless ( $skipSec{4} )
320
{
320
{
321
  if ( keys %data < $topposters )
321
  if ( keys %data < $topposters )
322
  {
322
  {
323
    $count = keys %data;
323
    $count = keys %data;
324
  }
324
  }
325
  else
325
  else
326
  {
326
  {
327
    $count = $topposters;
327
    $count = $topposters;
328
  }
328
  }
329
  printf "%s\n",
329
  printf "%s\n",
330
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
330
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
331
  print "=" x 76, "\n";
331
  print "=" x 76, "\n";
332
  $i = 0;
332
  $i = 0;
333
  foreach my $poster (
333
  foreach my $poster (
334
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
334
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
335
    keys %data
335
    keys %data
336
    )
336
    )
337
  {
337
  {
338
    next if $data{$poster}{quoted} == 0;
338
    next if $data{$poster}{quoted} == 0;
339
    next if $data{$poster}{count} < 5;
339
    next if $data{$poster}{count} < 5;
340
    my $name = substr( $poster, 0, 63 );
340
    my $name = substr( $poster, 0, 63 );
341
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
341
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
342
      $data{$poster}{percent};
342
      $data{$poster}{percent};
343
    last if ( ++$i == $count );
343
    last if ( ++$i == $count );
344
  }
344
  }
345
  print "\n", "=" x 76, "\n";
345
  print "\n", "=" x 76, "\n";
346
}
346
}
347
347
348
####################################
348
####################################
349
# show threads by number of articles
349
# show threads by number of articles
350
####################################
350
####################################
351
unless ( $skipSec{5} )
351
unless ( $skipSec{5} )
352
{
352
{
353
  if ( keys %threads < $topthreads )
353
  if ( keys %threads < $topthreads )
354
  {
354
  {
355
    $count = keys %threads;
355
    $count = keys %threads;
356
  }
356
  }
357
  else
357
  else
358
  {
358
  {
359
    $count = $topthreads;
359
    $count = $topthreads;
360
  }
360
  }
361
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
361
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
362
  print "=" x 76, "\n";
362
  print "=" x 76, "\n";
363
  $i = 0;
363
  $i = 0;
364
  foreach my $thread (
364
  foreach my $thread (
365
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
365
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
366
    keys %threads
366
    keys %threads
367
    )
367
    )
368
  {
368
  {
369
    my $name = substr( $thread, 0, 65 );
369
    my $name = substr( $thread, 0, 65 );
370
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
370
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
371
      $threads{$thread}{count};
371
      $threads{$thread}{count};
372
    last if ( ++$i == $count );
372
    last if ( ++$i == $count );
373
  }
373
  }
374
  print "\n", "=" x 76, "\n";
374
  print "\n", "=" x 76, "\n";
375
}
375
}
376
################################
376
################################
377
# show threads by size in Kbytes
377
# show threads by size in Kbytes
378
################################
378
################################
379
unless ( $skipSec{6} )
379
unless ( $skipSec{6} )
380
{
380
{
381
  if ( keys %threads < $topthreads )
381
  if ( keys %threads < $topthreads )
382
  {
382
  {
383
    $count = keys %threads;
383
    $count = keys %threads;
384
  }
384
  }
385
  else
385
  else
386
  {
386
  {
387
    $count = $topthreads;
387
    $count = $topthreads;
388
  }
388
  }
389
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
389
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
390
  print "=" x 76, "\n";
390
  print "=" x 76, "\n";
391
  $i = 0;
391
  $i = 0;
392
  foreach my $thread (
392
  foreach my $thread (
393
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
393
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
394
    keys %threads
394
    keys %threads
395
    )
395
    )
396
  {
396
  {
397
    my $name = substr( $thread, 0, 65 );
397
    my $name = substr( $thread, 0, 65 );
398
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
398
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
399
      $threads{$thread}{size} / 1024;    #/
399
      $threads{$thread}{size} / 1024;    #/
400
    last if ( ++$i == $count );
400
    last if ( ++$i == $count );
401
  }
401
  }
402
  print "\n", "=" x 76, "\n";
402
  print "\n", "=" x 76, "\n";
403
}
403
}
404
404
405
#################################
405
#################################
406
# show top 10 cross-posted groups
406
# show top 10 cross-posted groups
407
#################################
407
#################################
408
unless ( $skipSec{7} )
408
unless ( $skipSec{7} )
409
{
409
{
410
  delete $crossposts{"$newsgroup_name"};    # don't include ours
410
  delete $crossposts{"$newsgroup_name"};    # don't include ours
411
  if ( keys %crossposts < $topcrossposts )
411
  if ( keys %crossposts < $topcrossposts )
412
  {
412
  {
413
    $count = keys %crossposts;
413
    $count = keys %crossposts;
414
  }
414
  }
415
  else
415
  else
416
  {
416
  {
417
    $count = $topcrossposts;
417
    $count = $topcrossposts;
418
  }
418
  }
419
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
419
  printf "%s\n", &centred( "Top $count cross-posted groups", 76 );
420
  print "=" x 76, "\n";
420
  print "=" x 76, "\n";
421
  $i = 0;
421
  $i = 0;
422
  foreach
422
  foreach
423
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
423
    my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
424
  {
424
  {
425
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
425
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
426
      $crossposts{$name};
426
      $crossposts{$name};
427
    last if ( ++$i == $count );
427
    last if ( ++$i == $count );
428
  }
428
  }
429
  print "\n", "=" x 76, "\n";
429
  print "\n", "=" x 76, "\n";
430
}
430
}
431
#######################
431
#######################
432
#show agents and counts
432
#show agents and counts
433
#######################
433
#######################
434
unless ( $skipSec{8} )
434
unless ( $skipSec{8} )
435
{
435
{
436
  if ( keys %agents < $topagents )
436
  if ( keys %agents < $topagents )
437
  {
437
  {
438
    $count = keys %agents;
438
    $count = keys %agents;
439
  }
439
  }
440
  else
440
  else
441
  {
441
  {
442
    $count = $topagents;
442
    $count = $topagents;
443
  }
443
  }
444
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
444
  printf "%s\n", &centred( "Top $count User Agents by poster", 76 );
445
  print "=" x 76, "\n";
445
  print "=" x 76, "\n";
446
  $i = 0;
446
  $i = 0;
447
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
447
  foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents )
448
  {
448
  {
449
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
449
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ),
450
      $agents{$agent};
450
      $agents{$agent};
451
    last if ( ++$i == $count );
451
    last if ( ++$i == $count );
452
  }
452
  }
453
  print "\n", "=" x 76, "\n";
453
  print "\n", "=" x 76, "\n";
454
}
454
}
455
455
456
#######################
456
#######################
457
#show distinct agents
457
#show distinct agents
458
#######################
458
#######################
459
unless ( $skipSec{9} )
459
unless ( $skipSec{9} )
460
{
460
{
461
  if ( keys %distinct_agent < $topagents )
461
  if ( keys %distinct_agent < $topagents )
462
  {
462
  {
463
    $count = keys %distinct_agent;
463
    $count = keys %distinct_agent;
464
  }
464
  }
465
  else
465
  else
466
  {
466
  {
467
    $count = $topagents;
467
    $count = $topagents;
468
  }
468
  }
469
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
469
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
470
  print "=" x 76, "\n";
470
  print "=" x 76, "\n";
471
  $i = 0;
471
  $i = 0;
472
  foreach my $agent (
472
  foreach my $agent (
473
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
473
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
474
    keys %distinct_agent
474
    keys %distinct_agent
475
    )
475
    )
476
  {
476
  {
477
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
477
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
478
      $distinct_agent{$agent},
478
      $distinct_agent{$agent},
479
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
479
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
480
    last if ( ++$i == $count );
480
    last if ( ++$i == $count );
481
  }
481
  }
482
  print "\n", "=" x 76, "\n";
482
  print "\n", "=" x 76, "\n";
483
}
483
}
484
484
485
##########################
485
##########################
486
#show timezones and counts
486
#show timezones and counts
487
##########################
487
##########################
488
unless ( $skipSec{10} )
488
unless ( $skipSec{10} )
489
{
489
{
490
  if ( keys %tz < $toptz )
490
  if ( keys %tz < $toptz )
491
  {
491
  {
492
    $count = keys %tz;
492
    $count = keys %tz;
493
  }
493
  }
494
  else
494
  else
495
  {
495
  {
496
    $count = $toptz;
496
    $count = $toptz;
497
  }
497
  }
498
  printf "%s\n", &centred( "Top 10 time zones", 76 );
498
  printf "%s\n", &centred( "Top 10 time zones", 76 );
499
  print "=" x 76, "\n";
499
  print "=" x 76, "\n";
500
  $i = 0;
500
  $i = 0;
501
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
501
  foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
502
  {
502
  {
503
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
503
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
504
    last if ( ++$i == $count );
504
    last if ( ++$i == $count );
505
  }
505
  }
506
  print "\n", "=" x 76, "\n";
506
  print "\n", "=" x 76, "\n";
507
}
507
}
508
508
509
################################ SUBROUTINES ################################
509
################################ SUBROUTINES ################################
510
510
511
#######################################
511
#######################################
512
# get current article's header and body
512
# get current article's header and body
513
#######################################
513
#######################################
514
sub getarticle
514
sub getarticle
515
{
515
{
516
  %headers = ();    # dump old headers
516
  %headers = ();    # dump old headers
517
  my $filename = shift;    # get the name of the file
517
  my $filename = shift;    # get the name of the file
518
518
519
  # get stats about the file itself
519
  # get stats about the file itself
520
  $filesize = -s $filename;    # get total size of file
520
  $filesize = -s $filename;    # get total size of file
521
  $totsize += $filesize;       # bump total sizes of all files
521
  $totsize += $filesize;       # bump total sizes of all files
522
522
523
  my $mtime = ( stat $filename )[9];
523
  my $mtime = ( stat $filename )[9];
524
  if ( $mtime < $earliest )
524
  if ( $mtime < $earliest )
525
  {
525
  {
526
    $earliest = $mtime;
526
    $earliest = $mtime;
527
  }
527
  }
528
  elsif ( $mtime > $latest )
528
  elsif ( $mtime > $latest )
529
  {
529
  {
530
    $latest = $mtime;
530
    $latest = $mtime;
531
  }
531
  }
532
532
533
  # now read the file
533
  # now read the file
534
  open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
534
  open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
535
  while (<$FILE>)
535
  while (<$FILE>)
536
  {
536
  {
537
    $totheader += length($_);    # bump total header size
537
    $totheader += length($_);    # bump total header size
538
    last if (/^\s*$/);           # end of header?
538
    last if (/^\s*$/);           # end of header?
539
    if (/^([^:\s]*):\s+(.*)/)
539
    if (/^([^:\s]*):\s+(.*)/)
540
    {
540
    {
541
      my ( $key, $val ) = ( $1, $2 );
541
      my ( $key, $val ) = ( $1, $2 );
542
      $headers{$key} = decode( 'MIME-Header', $val );
542
      $headers{$key} = decode( 'MIME-Header', $val );
543
      $lcheader{ clean( lc($key) ) } = clean($val);
543
      $lcheader{ clean( lc($key) ) } = clean($val);
544
    }
544
    }
545
  }
545
  }
546
  @body = <$FILE>;               # slurp up body
546
  @body = <$FILE>;               # slurp up body
547
  close($FILE);
547
  close($FILE);
548
}    # getarticle
548
}    # getarticle
549
549
550
###################################
550
###################################
551
# get data from the current article
551
# get data from the current article
552
###################################
552
###################################
553
sub getdata
553
sub getdata
554
{
554
{
555
#### First, analyse header fields ####
555
#### First, analyse header fields ####
556
556
557
  # Set up this poster if not defined, get counts, sizes
557
  # Set up this poster if not defined, get counts, sizes
558
  my $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
558
  my $poster = $headers{From};    # get the poster's name
559
  if ( !defined( $data{$poster} ) )
559
  if ( !defined( $data{$poster} ) )
560
  {                                                  # seen this one before?
560
  {                                   # seen this one before?
561
    $data{$poster}{agent}  = 'Unknown';              # comes after For: field
561
    $data{$poster}{agent}  = 'Unknown';    # comes after For: field
562
    $data{$poster}{orig}   = 0;
562
    $data{$poster}{orig}   = 0;
563
    $data{$poster}{quoted} = 0;
563
    $data{$poster}{quoted} = 0;
564
  }
564
  }
565
  $data{$poster}{count}++;    # bump count for this poster
565
  $data{$poster}{count}++;                 # bump count for this poster
566
  $data{$poster}{size} += $filesize;    # total size of file
566
  $data{$poster}{size} += $filesize;       # total size of file
567
567
568
  # The User-Agent and/or X-Newsreader fields
568
  # The User-Agent and/or X-Newsreader fields
569
  # for User-Agent by poster
569
  # for User-Agent by poster
570
  if ( defined $lcheader{"user-agent"} )
570
  if ( defined $lcheader{"user-agent"} )
571
  {
571
  {
572
    $data{$poster}{agent} = $lcheader{"user-agent"};
572
    $data{$poster}{agent} = $lcheader{"user-agent"};
573
  }
573
  }
574
  if ( defined $lcheader{"x-newsreader"} )
574
  if ( defined $lcheader{"x-newsreader"} )
575
  {
575
  {
576
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
576
    $data{$poster}{agent} = $lcheader{"x-newsreader"};
577
  }
577
  }
578
578
579
  # The User Agent for User-Agent by number of posts
579
  # The User Agent for User-Agent by number of posts
580
  my $UA = "unknown";
580
  my $UA = "unknown";
581
  foreach my $keys ( keys %lcheader )
581
  foreach my $keys ( keys %lcheader )
582
  {
582
  {
583
    if ( defined $lcheader{'user-agent'} )
583
    if ( defined $lcheader{'user-agent'} )
584
    {
584
    {
585
      $UA = $lcheader{'user-agent'};
585
      $UA = $lcheader{'user-agent'};
586
    }
586
    }
587
    elsif ( defined $lcheader{"x-newsreader"} )
587
    elsif ( defined $lcheader{"x-newsreader"} )
588
    {
588
    {
589
      $UA = $lcheader{"x-newsreader"};
589
      $UA = $lcheader{"x-newsreader"};
590
    }
590
    }
591
    elsif ( defined $lcheader{'x-mailer'} )
591
    elsif ( defined $lcheader{'x-mailer'} )
592
    {
592
    {
593
      $UA = $lcheader{'x-mailer'};
593
      $UA = $lcheader{'x-mailer'};
594
    }
594
    }
595
    elsif (
595
    elsif (
596
      ( defined $lcheader{'organization'} )
596
      ( defined $lcheader{'organization'} )
597
      && ( $lcheader{'organization'} =~
597
      && ( $lcheader{'organization'} =~
598
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
598
        /groups\.google|AOL|Supernews|WebTV|compuserve/ )
599
      )
599
      )
600
    {
600
    {
601
      $UA = $lcheader{'organization'};
601
      $UA = $lcheader{'organization'};
602
    }
602
    }
603
    elsif ( $lcheader{'message-id'} =~ /pine/i )
603
    elsif ( $lcheader{'message-id'} =~ /pine/i )
604
    {
604
    {
605
      $UA = "Pine";
605
      $UA = "Pine";
606
    }    ## Hopefully found UA, else set to unknown
606
    }    ## Hopefully found UA, else set to unknown
607
  }
607
  }
608
608
609
  $UA = clean($UA);
609
  $UA = clean($UA);
610
  $UA = get_agent($UA);
610
  $UA = get_agent($UA);
611
611
612
  sub get_agent
612
  sub get_agent
613
  {
613
  {
614
    my $raw   = shift;
614
    my $raw   = shift;
615
    my $agent = $raw;
615
    my $agent = $raw;
616
616
617
    ## strip http
617
    ## strip http
618
    if ( $raw =~ /.*http.*/ )
618
    if ( $raw =~ /.*http.*/ )
619
    {
619
    {
620
      $raw =~ s!posted via!!i;
620
      $raw =~ s!posted via!!i;
621
      $raw =~ s!http://!!g;
621
      $raw =~ s!http://!!g;
622
      $raw =~ s!/!!g;
622
      $raw =~ s!/!!g;
623
      $raw =~ s! !!g;
623
      $raw =~ s! !!g;
624
    }
624
    }
625
625
626
    ## Fix Outlook from Mac
626
    ## Fix Outlook from Mac
627
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
627
    if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; }
628
628
629
    ## Pick out the popular agents
629
    ## Pick out the popular agents
630
    if ( $raw =~ /(outlook express)/i
630
    if ( $raw =~ /(outlook express)/i
631
      || $raw =~ /(microplanet gravity)/i
631
      || $raw =~ /(microplanet gravity)/i
632
      || $raw =~ /(news rover)/i
632
      || $raw =~ /(news rover)/i
633
      || $raw =~ /(forte agent)/i
633
      || $raw =~ /(forte agent)/i
634
      || $raw =~ /(forte free agent)/i )
634
      || $raw =~ /(forte free agent)/i )
635
    {
635
    {
636
      $agent = $1;
636
      $agent = $1;
637
    }
637
    }
638
    elsif (
638
    elsif (
639
      $raw =~ /^(
639
      $raw =~ /^(
640
        pan
640
        pan
641
       |sylpheed
641
       |sylpheed
642
       |slrn
642
       |slrn
643
       |mozilla
643
       |mozilla
644
       |knode
644
       |knode
645
       |tin
645
       |tin
646
       |hamster
646
       |hamster
647
       |xrn
647
       |xrn
648
       |xnews
648
       |xnews
649
       |aol
649
       |aol
650
       |gnus
650
       |gnus
651
       |krn
651
       |krn
652
       |macsoup
652
       |macsoup
653
       |messenger
653
       |messenger
654
       |openxp
654
       |openxp
655
       |pine
655
       |pine
656
       |thoth
656
       |thoth
657
       |turnpike
657
       |turnpike
658
       |winvn
658
       |winvn
659
       |vsoup
659
       |vsoup
660
       |google
660
       |google
661
       |supernews
661
       |supernews
662
       |nn
662
       |nn
663
       |rn
663
       |rn
664
       |007
664
       |007
665
       |webtv
665
       |webtv
666
       |compuserve
666
       |compuserve
667
       )/ix
667
       )/ix
668
      )
668
      )
669
    {
669
    {
670
      $agent = $1;
670
      $agent = $1;
671
    }
671
    }
672
    else
672
    else
673
    {
673
    {
674
      ## Clean up unknown agents
674
      ## Clean up unknown agents
675
      if ( $raw =~ m!^(.*?)/! )
675
      if ( $raw =~ m!^(.*?)/! )
676
      {
676
      {
677
        $agent = $1;
677
        $agent = $1;
678
      }
678
      }
679
      elsif ( $raw =~ /^(\w*)\d.*/ )
679
      elsif ( $raw =~ /^(\w*)\d.*/ )
680
      {
680
      {
681
        $agent = $1;
681
        $agent = $1;
682
      }
682
      }
683
    }
683
    }
684
684
685
    $distinct_agent{$agent}++;
685
    $distinct_agent{$agent}++;
686
    return $agent;
686
    return $agent;
687
  }
687
  }
688
688
689
  # Get all cross-posted newsgroups
689
  # Get all cross-posted newsgroups
690
  for ( split /,/, $headers{"Newsgroups"} )
690
  for ( split /,/, $headers{"Newsgroups"} )
691
  {
691
  {
692
    $crossposts{$_}++;    # bump count for each
692
    $crossposts{$_}++;    # bump count for each
693
  }
693
  }
694
694
695
  # Get threads
695
  # Get threads
696
  my $thread = encode( 'UTF-8', $headers{"Subject"} );
696
  my $thread = $headers{"Subject"};
697
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
697
  $thread =~ s/^re: //i;    # Remove Re: or re: at start
698
  $thread =~ s/\s+/ /g;     # collapse whitespace
698
  $thread =~ s/\s+/ /g;     # collapse whitespace
699
  $threads{$thread}{count} += 1;            # bump count of this subject
699
  $threads{$thread}{count} += 1;            # bump count of this subject
700
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
700
  $threads{$thread}{size}  += $filesize;    # bump bytes for this thread
701
701
702
  # Is this an original post or a reply?
702
  # Is this an original post or a reply?
703
  if ( defined $headers{"References"} )
703
  if ( defined $headers{"References"} )
704
  {
704
  {
705
    $replies++;
705
    $replies++;
706
  }
706
  }
707
  else
707
  else
708
  {
708
  {
709
    $origposts++;
709
    $origposts++;
710
  }
710
  }
711
711
712
  # Get the time zone
712
  # Get the time zone
713
  $_ = $headers{"Date"};
713
  $_ = $headers{"Date"};
714
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
714
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
715
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
715
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
716
  {
716
  {
717
    $tz = "UTC";
717
    $tz = "UTC";
718
  }
718
  }
719
  $tz{$tz}++;
719
  $tz{$tz}++;
720
720
721
#### Now analyse the body text ####
721
#### Now analyse the body text ####
722
  my $insig = 0;
722
  my $insig = 0;
723
  for (@body)
723
  for (@body)
724
  {
724
  {
725
    $totbody += length($_);    # bump total body size
725
    $totbody += length($_);    # bump total body size
726
    next if (/^$>/);           # don't count blank lines in body
726
    next if (/^$>/);           # don't count blank lines in body
727
    if ( $insig == 1 )
727
    if ( $insig == 1 )
728
    {
728
    {
729
      $totsig += length($_);    # bump total sig size
729
      $totsig += length($_);    # bump total sig size
730
730
731
      # Bill Unruh uses ] quotes, and another poster uses ::
731
      # Bill Unruh uses ] quotes, and another poster uses ::
732
    }
732
    }
733
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
733
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
734
    {                           # are we in a quote line?
734
    {                           # are we in a quote line?
735
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
735
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
736
      $totquoted += length($_);
736
      $totquoted += length($_);
737
    }
737
    }
738
    elsif (/-- /)
738
    elsif (/-- /)
739
    {
739
    {
740
      $insig = 1;
740
      $insig = 1;
741
    }
741
    }
742
    else
742
    else
743
    {
743
    {
744
744
745
      # we must be processing an original line
745
      # we must be processing an original line
746
      $data{$poster}{orig} += length($_);      # bump count of original chrs
746
      $data{$poster}{orig} += length($_);      # bump count of original chrs
747
      $totorig += length($_);
747
      $totorig += length($_);
748
    }
748
    }
749
  }    # end for (@body)
749
  }    # end for (@body)
750
750
751
}    # getdata
751
}    # getdata
752
752
753
########################################
753
########################################
754
# Count the User-Agents used, collapsing
754
# Count the User-Agents used, collapsing
755
# different versions into one per agent.
755
# different versions into one per agent.
756
########################################
756
########################################
757
sub countagents
757
sub countagents
758
{
758
{
759
POSTER:
759
POSTER:
760
  foreach my $poster ( keys %data )
760
  foreach my $poster ( keys %data )
761
  {
761
  {
762
    foreach my $agent_name ( keys %distinct_agent )
762
    foreach my $agent_name ( keys %distinct_agent )
763
    {    # check against known ones
763
    {    # check against known ones
764
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
764
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
765
      {
765
      {
766
        $agents{$agent_name}++;
766
        $agents{$agent_name}++;
767
        next POSTER;
767
        next POSTER;
768
      }
768
      }
769
    }
769
    }
770
    $agents{ $data{$poster}{agent} }++;
770
    $agents{ $data{$poster}{agent} }++;
771
  }
771
  }
772
}    # countagents
772
}    # countagents
773
773
774
############################################
774
############################################
775
# set orig/total percentages for all posters
775
# set orig/total percentages for all posters
776
############################################
776
############################################
777
sub fixpercent
777
sub fixpercent
778
{
778
{
779
  foreach my $poster ( keys %data )
779
  foreach my $poster ( keys %data )
780
  {
780
  {
781
    my $percent = 100;
781
    my $percent = 100;
782
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
782
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
783
    {
783
    {
784
      $percent =
784
      $percent =
785
        $data{$poster}{orig} * 100 /
785
        $data{$poster}{orig} * 100 /
786
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
786
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
787
    }
787
    }
788
    elsif ( $data{$poster}{orig} == 0 )
788
    elsif ( $data{$poster}{orig} == 0 )
789
    {
789
    {
790
      $percent = 0;
790
      $percent = 0;
791
    }
791
    }
792
    $data{$poster}{percent} = $percent;
792
    $data{$poster}{percent} = $percent;
793
  }
793
  }
794
}
794
}
795
795
796
##############################
796
##############################
797
# right pad a string with '.'s
797
# right pad a string with '.'s
798
##############################
798
##############################
799
sub rpad
799
sub rpad
800
{
800
{
801
-
 
802
  # get text to pad, length to pad, pad chr
801
  # get text to pad, length to pad, pad chr
803
  my ( $text, $pad_len, $pad_chr ) = @_;
802
  my ( $text, $pad_len, $pad_chr ) = @_;
-
 
803
-
 
804
  ### DEBUG
-
 
805
#  printf "|%s| = %d\n", $text, length($text);
-
 
806
804
  if ( length($text) > $pad_len )
807
  if ( length($text) > $pad_len )
805
  {
808
  {
806
    $text = substr( $text, 0, $pad_len );
809
    $text = substr( $text, 0, $pad_len );
807
  }
810
  }
808
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
811
  my $padded = $text . $pad_chr x ( $pad_len - length($text) );
809
  return $padded;
812
  return $padded;
810
}
813
}
811
814
812
#################
815
#################
813
# centre a string
816
# centre a string
814
#################
817
#################
815
sub centred
818
sub centred
816
{
819
{
817
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
820
  my ( $text, $width ) = @_;    # text to centre, size of field to centre in
818
  my $pad_len = ( $width - length($text) ) / 2;    #/
821
  my $pad_len = ( $width - length($text) ) / 2;    #/
819
  my $centred = " " x $pad_len . $text;
822
  my $centred = " " x $pad_len . $text;
820
  return $centred;
823
  return $centred;
821
}
824
}
822
825
823
##########################
826
##########################
824
# put commas into a number
827
# put commas into a number
825
##########################
828
##########################
826
sub commify
829
sub commify
827
{
830
{
828
  $_ = shift;
831
  $_ = shift;
829
  1 while s/^(-?\d+)(\d{3})/$1,$2/;
832
  1 while s/^(-?\d+)(\d{3})/$1,$2/;
830
  return $_;
833
  return $_;
831
}
834
}
832
835
833
#########################
836
#########################
834
# clean
837
# clean
835
#########################
838
#########################
836
sub clean
839
sub clean
837
{
840
{
838
  my $dirty = shift;
841
  my $dirty = shift;
839
  my $clean = $dirty;
842
  my $clean = $dirty;
840
  $clean =~ s/^\s*//;
843
  $clean =~ s/^\s*//;
841
  $clean =~ s/\s*$//;
844
  $clean =~ s/\s*$//;
842
845
843
  return $clean;
846
  return $clean;
844
}
847
}
845
848
846
sub usage
849
sub usage
847
{
850
{
848
851
849
  print "usage: newstat.pl newsgroupname\n";
852
  print "usage: newstat.pl newsgroupname\n";
850
  exit 1;
853
  exit 1;
851
}
854
}
852
855
853
###################################
856
###################################
854
# Write data structures to a file #
857
# Write data structures to a file #
855
###################################
858
###################################
856
sub writedata
859
sub writedata
857
{
860
{
858
  open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
861
  open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
859
  print $OUTF "Data collected from alt.os.linux.mandrake\n\n";
862
  print $OUTF "Data collected from alt.os.linux.mandrake\n\n";
860
  print $OUTF
863
  print $OUTF
861
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
864
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
862
  foreach my $name ( keys %data )
865
  foreach my $name ( keys %data )
863
  {
866
  {
864
    print $OUTF
867
    print $OUTF
865
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
868
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
866
  }
869
  }
867
  print $OUTF
870
  print $OUTF
868
"============================================================================\n";
871
"============================================================================\n";
869
  print $OUTF "Thread subjects\n";
872
  print $OUTF "Thread subjects\n";
870
  print $OUTF
873
  print $OUTF
871
"----------------------------------------------------------------------------\n";
874
"----------------------------------------------------------------------------\n";
872
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
875
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
873
  {
876
  {
874
    print $OUTF
877
    print $OUTF
875
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
878
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
876
  }
879
  }
877
  print $OUTF
880
  print $OUTF
878
"============================================================================\n";
881
"============================================================================\n";
879
  print $OUTF "Cross-posts\n";
882
  print $OUTF "Cross-posts\n";
880
  print $OUTF
883
  print $OUTF
881
"----------------------------------------------------------------------------\n";
884
"----------------------------------------------------------------------------\n";
882
  foreach my $name ( sort keys %crossposts )
885
  foreach my $name ( sort keys %crossposts )
883
  {
886
  {
884
    print $OUTF "$name : $crossposts{$name}\n";
887
    print $OUTF "$name : $crossposts{$name}\n";
885
  }
888
  }
886
  print $OUTF print $OUTF
889
  print $OUTF print $OUTF
887
"============================================================================\n";
890
"============================================================================\n";
888
  print $OUTF "User agents\n";
891
  print $OUTF "User agents\n";
889
  print $OUTF
892
  print $OUTF
890
"----------------------------------------------------------------------------\n";
893
"----------------------------------------------------------------------------\n";
891
  foreach my $name ( sort keys %agents )
894
  foreach my $name ( sort keys %agents )
892
  {
895
  {
893
    print $OUTF "$name : $agents{$name}\n";
896
    print $OUTF "$name : $agents{$name}\n";
894
  }
897
  }
895
  print $OUTF
898
  print $OUTF
896
"============================================================================\n";
899
"============================================================================\n";
897
  print $OUTF "Time zones\n";
900
  print $OUTF "Time zones\n";
898
  print $OUTF
901
  print $OUTF
899
"----------------------------------------------------------------------------\n";
902
"----------------------------------------------------------------------------\n";
900
  foreach my $name ( sort keys %tz )
903
  foreach my $name ( sort keys %tz )
901
  {
904
  {
902
    print $OUTF "$name : $tz{$name}\n";
905
    print $OUTF "$name : $tz{$name}\n";
903
  }
906
  }
904
  close $OUTF;
907
  close $OUTF;
905
}    # writedata
908
}    # writedata
906
 
909