Subversion Repositories LCARS

Rev

Rev 5 | Rev 7 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5 Rev 6
1
#!/usr/bin/perl -w
1
#!/usr/bin/perl -w
2
use strict;
2
use strict;
3
#########################
3
use warnings;
4
# newsstat.pl version 0.3
4
use utf8;
5
5
use Encode;
6
6
-
 
7
#########################
-
 
8
# newsstat.pl version 0.4
7
9
8
###################################################################
10
############################################################################
9
# Collect statistics about the alt.os.linux.mandrake newsgroup.
11
# Collect statistics about a newsgroup (specified by first argument) in
10
# Check all articles in the last 7-day period. Rank posters by
12
# the local news spool. Check all articles in the last 30-day period. 
11
# no. of posts and by volume of posts, report on top and bottom
13
# Rank posters by number of posts and by volume of posts, report on top and
12
# 20 posters. Show their name, no. posts, size of posts, percentage
14
# bottom 20 posters. Show their name, number of posts, size of posts,
13
# quoted lines. Rank user-agents used, by poster rather than by
15
# percentage of quoted lines. Rank user-agents used, by poster rather than
14
# post. Rank top 10 threads. Rank top 10 cross-posted groups.
16
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
-
 
17
# 
-
 
18
# (Numbers and paths can be configured below.  -- PE)
15
###################################################################
19
############################################################################
16
20
17
##################################################################
21
############################################################################
18
#                       RECENT CHANGES                           #
22
#                       RECENT CHANGES                                     #
-
 
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
-
 
24
#                 - Use warnings, utf8 (just in case) 
-
 
25
#                 - Documentation update
-
 
26
# N/A         NN  - Take newsgroup name as argument
19
# 2004/06/19    - newsgroup name is $ARGV[0]
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
20
#               - Allow command line flags for subtracting
28
#                 - Allow command line flags for subtracting
21
#                 output if not pertinent for a group
29
#                   output if not pertinent for a group
22
# 2002/11/09    - Put Garry's writedata() function back in.
30
# 2002-11-09  NN  - Put Garry's writedata() function back in.
23
#               - added "rn" to my list of UA's
31
#                 - added "rn" to my list of UA's
24
#               - Started using %distinct_agent for both User agent
32
#                 - Started using %distinct_agent for both User agent
25
#               sections
33
#                   sections
26
#               - named it newsstat.pl version 0.3
34
#                 - named it newsstat.pl version 0.3
27
# 2002/11/06    - Fixed the earliest/latest file problem by using
35
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
28
#               mtime rather than ctime, and simplifying the logic
36
#                   mtime rather than ctime, and simplifying the logic
29
# 2002/11/05    - moved user configurations to the top
37
# 2002-11-05  NN  - moved user configurations to the top
30
#               - fixed the cross-posting section
38
#                 - fixed the cross-posting section
31
#               - introduced the $newsgroup_name variable which
39
#                 - introduced the $newsgroup_name variable which
32
#               later becomes $news$group
40
#                   later becomes $news$group
33
#               - changed $name to $agent_name in countagents()
41
#                 - changed $name to $agent_name in countagents()
-
 
42
# 
-
 
43
# Contributors
-
 
44
# -------------
-
 
45
# NN  Nomen nominandum (name to be determined later)
-
 
46
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
34
47
35
########### NEXT #############
48
########### TODO #############
36
# Commas in bottom section of report
49
# Commas in bottom section of report
37
# Show date the figures were compiled
50
# Show date the figures were compiled
38
# No. of HTML articles (Content-Type: text/html)
51
# No. of HTML articles (Content-Type: text/html)
39
# No. of quoted sigs (/>\s*-- /)
52
# No. of quoted sigs (/>\s*-- /)
40
# Per cent of top-posted articles
53
# Per cent of top-posted articles
41
# Top 10 cross-posters
54
# Top 10 cross-posters
42
# Top 20 news posting hosts (from Path)
55
# Top 20 news posting hosts (from Path)
43
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
56
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
44
# Count *all* User Agents that each poster uses
57
# Count *all* User Agents that each poster uses
45
# What do we do about Bill Unruh's ] quote style?
58
# What do we do about Bill Unruh's ] quote style?
46
# Change the way dates/times are checked
59
# Change the way dates/times are checked
47
# include % share in posters by no. of arts
60
# include % share in posters by no. of arts
48
# include % share in posters by size
61
# include % share in posters by size
49
# Total, orig & quoted lines by user agent with per cent
62
# Total, orig & quoted lines by user agent with per cent
50
# Take arguments, i.e. newsgroup name
63
# Take more arguments
51
#######################################################
64
#######################################################
52
65
53
###################### USER CONFIGURATIONS ############################
66
###################### USER CONFIGURATIONS ############################
54
67
55
# The name of the group to do stats for
68
# The name of the group to do stats for
56
my $newsgroup_name = $ARGV[0];
69
my $newsgroup_name = $ARGV[0];
57
  $newsgroup_name or &usage;
70
$newsgroup_name or &usage;
58
71
59
# Check for removal flags
72
# Check for removal flags
60
my $ix;
73
my $ix;
61
my $j;
74
my $j;
62
my %skipSec;
75
my %skipSec;
63
my @skiplist;
76
my @skiplist;
64
my $args = @ARGV;
77
my $args = @ARGV;
65
for ( $ix = 1 ; $ix < $args ; $ix++ ) {
78
for ( $ix = 1 ; $ix < $args ; $ix++ ) {
66
       $j = $ix + 1;
79
       $j = $ix + 1;
67
       if ( $ARGV[$ix] eq "-x" ) {
80
       if ( $ARGV[$ix] eq "-x" ) {
68
               @skiplist = split(",",$ARGV[$j]);
81
               @skiplist = split(",",$ARGV[$j]);
69
       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
82
       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
70
               @skiplist = split(",",$1);
83
               @skiplist = split(",",$1);
71
       }
84
       }
72
}
85
}
73
foreach(@skiplist) {
86
foreach(@skiplist) {
74
  $skipSec{$_} = 1;
87
  $skipSec{$_} = 1;
75
}
88
}
76
89
77
# Leafnode users will want /var/spool/news for this variable.
90
# Leafnode users will want /var/spool/news for this variable.
78
my $news = "/var/spool/news/";
91
my $news = "/var/spool/news/";
79
92
80
# How many days are we doing statistics for?
93
# How many days are we doing statistics for?
81
my $numdays = 30;
94
my $numdays = 30;
82
95
83
# no. of agents we list
96
# no. of agents we list
84
my $topagents = 10;
97
my $topagents = 10;
85
98
86
# no. of threads we want to know about
99
# no. of threads we want to know about
87
my $topthreads = 20;
100
my $topthreads = 20;
88
101
89
# no. of top or bottom posters to show
102
# no. of top or bottom posters to show
90
my $topposters = 20;
103
my $topposters = 20;
91
104
92
# no. of cross-posted threads to show
105
# no. of cross-posted threads to show
93
my $topcrossposts = 10;
106
my $topcrossposts = 10;
94
107
95
# no. of time zones to show
108
# no. of time zones to show
96
my $toptz = 10;
109
my $toptz = 10;
97
110
98
111
99
112
100
###################### DATA STRUCTURES ######################
113
###################### DATA STRUCTURES ######################
101
my $group = $newsgroup_name;
114
my $group = $newsgroup_name;
102
$group =~ s!\.!/!g;
115
$group =~ s!\.!/!g;
103
my %data;                               # name, count, agent, total, orig, quoted
116
my %data;                               # name, count, agent, total, orig, quoted
104
my %threads;                            # subject, count
117
my %threads;                            # subject, count
105
my %crossposts;                         # group, count
118
my %crossposts;                         # group, count
106
my %tz;                                 # timezones by count
119
my %tz;                                 # timezones by count
107
my %headers;                            # holds header of current article
120
my %headers;                            # holds header of current article
108
my %lcheader;                           # holds lowercase headers
121
my %lcheader;                           # holds lowercase headers
109
my @body;                               # holds body of current article
122
my @body;                               # holds body of current article
110
my @sig;                                # holds sig text;
123
my @sig;                                # holds sig text;
111
my $totalposts;                         # total no. of posts considered
124
my $totalposts;                         # total no. of posts considered
112
my $filename;                           # name of current article file
125
my $filename;                           # name of current article file
113
my $filesize;                           # size of current article file
126
my $filesize;                           # size of current article file
114
my $earliest;                           # earliest article we have found
127
my $earliest;                           # earliest article we have found
115
my $latest;                             # latest article we have found
128
my $latest;                             # latest article we have found
116
my $poster;                             # poster we are dealing with
129
my $poster;                             # poster we are dealing with
117
my $totsize = 0;                        # holds total sizes of all files
130
my $totsize = 0;                        # holds total sizes of all files
118
my $totheader = 0;                      # total size of header material
131
my $totheader = 0;                      # total size of header material
119
my $totbody = 0;                        # total size of body material
132
my $totbody = 0;                        # total size of body material
120
my $totsig = 0;                         # total size of sig material
133
my $totsig = 0;                         # total size of sig material
121
my $totorig = 0;                        # total size of original material
134
my $totorig = 0;                        # total size of original material
122
my $totquoted = 0;                      # total size of quoted material
135
my $totquoted = 0;                      # total size of quoted material
123
my $origposts = 0;                      # total no. of original posts
136
my $origposts = 0;                      # total no. of original posts
124
my $replies = 0;                        # total no. of replies
137
my $replies = 0;                        # total no. of replies
125
my $i;                                  # general purpose
138
my $i;                                  # general purpose
126
my %distinct_agent;
139
my %distinct_agent;
127
my %agents =                            # used to hold counts of User Agents used
140
my %agents =                            # used to hold counts of User Agents used
128
            (  "KNode"                         => 0,
141
            (  "KNode"                         => 0,
129
               "Pan"                           => 0,
142
               "Pan"                           => 0,
130
               "Mozilla"                       => 0,
143
               "Mozilla"                       => 0,
131
               "Sylpheed"                      => 0,
144
               "Sylpheed"                      => 0,
132
               "Gnus"                          => 0,
145
               "Gnus"                          => 0,
133
               "Forte Agent"                   => 0,
146
               "Forte Agent"                   => 0,
134
               "Forte Free Agent"              => 0,
147
               "Forte Free Agent"              => 0,
135
               "MicroPlanet Gravity"           => 0,
148
               "MicroPlanet Gravity"           => 0,
136
               "Microsoft Outlook Express"     => 0,
149
               "Microsoft Outlook Express"     => 0,
137
               "Xnews"                         => 0,
150
               "Xnews"                         => 0,
138
               "slrn"                          => 0,
151
               "slrn"                          => 0,
139
               "tin"                           => 0,
152
               "tin"                           => 0,
140
               "rn"                            => 0,
153
               "rn"                            => 0,
141
               "NN"                            => 0,
154
               "NN"                            => 0,
142
               "MacSOUP"                       => 0,
155
               "MacSOUP"                       => 0,
143
               "Foorum"                        => 0,
156
               "Foorum"                        => 0,
144
               "MT-NewsWatcher"                => 0,
157
               "MT-NewsWatcher"                => 0,
145
               "News Rover"                    => 0,
158
               "News Rover"                    => 0,
146
               "WebTV"                         => 0,
159
               "WebTV"                         => 0,
147
               "Compuserver"                   => 0,
160
               "Compuserver"                   => 0,
148
               "VSoup"                         => 0);
161
               "VSoup"                         => 0);
149
162
150
######################## MAIN CODE ########################
163
######################## MAIN CODE ########################
151
$! = 1;
164
$! = 1;
152
165
153
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
166
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
154
opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
167
opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
155
while (defined($filename = readdir(DIR))) {
168
while (defined($filename = readdir(DIR))) {
156
 %lcheader = ();
169
 %lcheader = ();
157
 next unless -f $filename;             # only want real files
170
 next unless -f $filename;             # only want real files
158
 next if ($filename eq ".overview");   # real articles only
171
 next if ($filename eq ".overview");   # real articles only
159
 next if (-M $filename > $numdays);    # only want articles <= a certain age
172
 next if (-M $filename > $numdays);    # only want articles <= a certain age
160
 $earliest = (stat $filename)[9] unless defined ($earliest);
173
 $earliest = (stat $filename)[9] unless defined ($earliest);
161
 $latest   = (stat $filename)[9] unless defined ($latest);
174
 $latest   = (stat $filename)[9] unless defined ($latest);
162
 &getarticle($filename);               # read in the article
175
 &getarticle($filename);               # read in the article
163
 &getdata;                             # grab the data from the article
176
 &getdata;                             # grab the data from the article
164
 $totalposts++;                        # bump count of articles considered
177
 $totalposts++;                        # bump count of articles considered
165
}
178
}
166
closedir(DIR);                          # finished with the directory
179
closedir(DIR);                          # finished with the directory
167
# post-processing
180
# post-processing
168
&countagents;                           # count agents, collapsing versions
181
&countagents;                           # count agents, collapsing versions
169
&fixpercent;                            # check percentages orig/total for posters
182
&fixpercent;                            # check percentages orig/total for posters
170
183
171
&writedata;
184
&writedata;
172
185
173
#################### DISPLAY RESULTS #####################
186
#################### DISPLAY RESULTS #####################
174
print "=" x 76, "\n";
187
print "=" x 76, "\n";
175
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
188
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
176
print "=" x 76, "\n";
189
print "=" x 76, "\n";
177
printf "%s\n", &centred("(stats compiled with a script by Garry Knight)", 76);
190
printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
178
print "\n\n";
191
print "\n\n";
179
printf "Total posts considered: %s over %d days\n",
192
printf "Total posts considered: %s over %d days\n",
180
       commify($totalposts), $numdays;
193
       commify($totalposts), $numdays;
181
printf "Earliest article: %s\n", scalar localtime($earliest);
194
printf "Earliest article: %s\n", scalar localtime($earliest);
182
printf "Latest article:   %s\n", scalar localtime($latest);
195
printf "Latest article:   %s\n", scalar localtime($latest);
183
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
196
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
184
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
197
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
185
       commify(int($totsize / 1024)), $totsize / 1048576; #
198
       commify(int($totsize / 1024)), $totsize / 1048576; #
186
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
199
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
187
       commify(int($totalposts / $numdays)),
200
       commify(int($totalposts / $numdays)),
188
       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
201
       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
189
my $count = keys %data;
202
my $count = keys %data;
190
printf "Total headers: %s KB  bodies: %s KB\n",
203
printf "Total headers: %s KB  bodies: %s KB\n",
191
       commify(int($totheader / 1024)), commify(int($totbody / 1024));
204
       commify(int($totheader / 1024)), commify(int($totbody / 1024));
192
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
205
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
193
       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
206
       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
194
       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
207
       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
195
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
208
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
196
       commify(int($totsize / $count)); #/
209
       commify(int($totsize / $count)); #/
197
$count = keys %threads;
210
$count = keys %threads;
198
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
211
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
199
       commify(int($totsize / $count)); #/
212
       commify(int($totsize / $count)); #/
200
printf "Total number of User-Agents: %d\n", scalar keys %agents;
213
printf "Total number of User-Agents: %d\n", scalar keys %agents;
201
print "\n", "=" x 76, "\n";
214
print "\n", "=" x 76, "\n";
202
215
203
###############################
216
###############################
204
# show posters by article count  Sec 1;
217
# show posters by article count  Sec 1;
205
###############################
218
###############################
206
unless ( $skipSec{1} ) {
219
unless ( $skipSec{1} ) {
207
    if (keys %data < $topposters) {
220
    if (keys %data < $topposters) {
208
      $count = keys %data;
221
      $count = keys %data;
209
    } else {
222
    } else {
210
      $count = $topposters;
223
      $count = $topposters;
211
    }
224
    }
212
    printf "%s\n", &centred("Top $count posters by number of articles", 76);
225
    printf "%s\n", &centred("Top $count posters by number of articles", 76);
213
    print "=" x 76, "\n";
226
    print "=" x 76, "\n";
214
    $i = 0;
227
    $i = 0;
215
    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
228
    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
216
    my $name = substr($poster, 0, 65);
229
    my $name = substr($poster, 0, 65);
217
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
230
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
218
    last if (++$i == $count);
231
    last if (++$i == $count);
219
 }
232
 }
220
 print "\n", "=" x 76, "\n";
233
 print "\n", "=" x 76, "\n";
221
}
234
}
222
235
223
################################
236
################################
224
# show posters by size in Kbytes Sec 2;
237
# show posters by size in Kbytes Sec 2;
225
################################
238
################################
226
unless ( $skipSec{2} ) {
239
unless ( $skipSec{2} ) {
227
  if (keys %data < $topposters) {
240
  if (keys %data < $topposters) {
228
    $count = keys %data;
241
    $count = keys %data;
229
  } else {
242
  } else {
230
    $count = $topposters;
243
    $count = $topposters;
231
  }
244
  }
232
  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
245
  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
233
  print "=" x 76, "\n";
246
  print "=" x 76, "\n";
234
  $i = 0;
247
  $i = 0;
235
  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
248
  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
236
    my $name = substr($poster, 0, 62);
249
    my $name = substr($poster, 0, 62);
237
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
250
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
238
    last if (++$i == $count);
251
    last if (++$i == $count);
239
  }
252
  }
240
  print "\n", "=" x 76, "\n";
253
  print "\n", "=" x 76, "\n";
241
}
254
}
242
255
243
####################################
256
####################################
244
# show top posters for original text
257
# show top posters for original text
245
####################################
258
####################################
246
unless ( $skipSec{3} ) {
259
unless ( $skipSec{3} ) {
247
   if (keys %data < $topposters) {
260
   if (keys %data < $topposters) {
248
     $count = keys %data;
261
     $count = keys %data;
249
   } else {
262
   } else {
250
     $count = $topposters;
263
     $count = $topposters;
251
   }
264
   }
252
   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
265
   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
253
   print "=" x 76, "\n";
266
   print "=" x 76, "\n";
254
   $i = 0;
267
   $i = 0;
255
   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
268
   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
256
     next if $data{$poster}{quoted} == 0;
269
     next if $data{$poster}{quoted} == 0;
257
     next if $data{$poster}{count} < 5;
270
     next if $data{$poster}{count} < 5;
258
     my $name = substr($poster, 0, 63);
271
     my $name = substr($poster, 0, 63);
259
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
272
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
260
     last if (++$i == $count);
273
     last if (++$i == $count);
261
   }
274
   }
262
   print "\n", "=" x 76, "\n";
275
   print "\n", "=" x 76, "\n";
263
}
276
}
264
277
265
#######################################
278
#######################################
266
# show bottom posters for original text
279
# show bottom posters for original text
267
#######################################
280
#######################################
268
unless ( $skipSec{4} ) {
281
unless ( $skipSec{4} ) {
269
  if (keys %data < $topposters) {
282
  if (keys %data < $topposters) {
270
    $count = keys %data;
283
    $count = keys %data;
271
  } else {
284
  } else {
272
    $count = $topposters;
285
    $count = $topposters;
273
  }
286
  }
274
  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
287
  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
275
  print "=" x 76, "\n";
288
  print "=" x 76, "\n";
276
  $i = 0;
289
  $i = 0;
277
  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
290
  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
278
    next if $data{$poster}{quoted} == 0;
291
    next if $data{$poster}{quoted} == 0;
279
    next if $data{$poster}{count} < 5;
292
    next if $data{$poster}{count} < 5;
280
    my $name = substr($poster, 0, 63);
293
    my $name = substr($poster, 0, 63);
281
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
294
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
282
    last if (++$i == $count);
295
    last if (++$i == $count);
283
  }
296
  }
284
  print "\n", "=" x 76, "\n";
297
  print "\n", "=" x 76, "\n";
285
}
298
}
286
299
287
####################################
300
####################################
288
# show threads by number of articles
301
# show threads by number of articles
289
####################################
302
####################################
290
unless ( $skipSec{5} ) {
303
unless ( $skipSec{5} ) {
291
  if (keys %threads < $topthreads) {
304
  if (keys %threads < $topthreads) {
292
    $count = keys %threads;
305
    $count = keys %threads;
293
  } else {
306
  } else {
294
    $count = $topthreads;
307
    $count = $topthreads;
295
  }
308
  }
296
  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
309
  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
297
  print "=" x 76, "\n";
310
  print "=" x 76, "\n";
298
  $i = 0;
311
  $i = 0;
299
  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
312
  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
300
    my $name = substr($thread, 0, 65);
313
    my $name = substr($thread, 0, 65);
301
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
314
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
302
    last if (++$i == $count);
315
    last if (++$i == $count);
303
  }
316
  }
304
  print "\n", "=" x 76, "\n";
317
  print "\n", "=" x 76, "\n";
305
}
318
}
306
################################
319
################################
307
# show threads by size in Kbytes
320
# show threads by size in Kbytes
308
################################
321
################################
309
unless ( $skipSec{6} ) {
322
unless ( $skipSec{6} ) {
310
  if (keys %threads < $topthreads) {
323
  if (keys %threads < $topthreads) {
311
    $count = keys %threads;
324
    $count = keys %threads;
312
  } else {
325
  } else {
313
    $count = $topthreads;
326
    $count = $topthreads;
314
  }
327
  }
315
  printf "%s\n", &centred("Top $count threads by size in KB", 76);
328
  printf "%s\n", &centred("Top $count threads by size in KB", 76);
316
  print "=" x 76, "\n";
329
  print "=" x 76, "\n";
317
  $i = 0;
330
  $i = 0;
318
  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
331
  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
319
    my $name = substr($thread, 0, 65);
332
    my $name = substr($thread, 0, 65);
320
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
333
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
321
    last if (++$i == $count);
334
    last if (++$i == $count);
322
  }
335
  }
323
  print "\n", "=" x 76, "\n";
336
  print "\n", "=" x 76, "\n";
324
}
337
}
325
338
326
#################################
339
#################################
327
# show top 10 cross-posted groups
340
# show top 10 cross-posted groups
328
#################################
341
#################################
329
unless ( $skipSec{7} ) {
342
unless ( $skipSec{7} ) {
330
delete $crossposts{"$newsgroup_name"};  # don't include ours
343
delete $crossposts{"$newsgroup_name"};  # don't include ours
331
if (keys %crossposts < $topcrossposts) {
344
if (keys %crossposts < $topcrossposts) {
332
 $count = keys %crossposts;
345
 $count = keys %crossposts;
333
} else {
346
} else {
334
 $count = $topcrossposts;
347
 $count = $topcrossposts;
335
}
348
}
336
printf "%s\n", &centred("Top $count cross-posted groups", 76);
349
printf "%s\n", &centred("Top $count cross-posted groups", 76);
337
print "=" x 76, "\n";
350
print "=" x 76, "\n";
338
$i = 0;
351
$i = 0;
339
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
352
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
340
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
353
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
341
 last if (++$i == $count);
354
 last if (++$i == $count);
342
}
355
}
343
print "\n", "=" x 76, "\n";
356
print "\n", "=" x 76, "\n";
344
}
357
}
345
#######################
358
#######################
346
#show agents and counts
359
#show agents and counts
347
#######################
360
#######################
348
unless ( $skipSec{8} ) {
361
unless ( $skipSec{8} ) {
349
if (keys %agents < $topagents) {
362
if (keys %agents < $topagents) {
350
 $count = keys %agents;
363
 $count = keys %agents;
351
} else {
364
} else {
352
 $count = $topagents;
365
 $count = $topagents;
353
}
366
}
354
printf "%s\n", &centred("Top $count User Agents by poster", 76);
367
printf "%s\n", &centred("Top $count User Agents by poster", 76);
355
print "=" x 76, "\n";
368
print "=" x 76, "\n";
356
$i = 0;
369
$i = 0;
357
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
370
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
358
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
371
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
359
 last if (++$i == $count);
372
 last if (++$i == $count);
360
}
373
}
361
print "\n", "=" x 76, "\n";
374
print "\n", "=" x 76, "\n";
362
}
375
}
363
376
364
#######################
377
#######################
365
#show distinct agents
378
#show distinct agents
366
#######################
379
#######################
367
unless ( $skipSec{9} ) {
380
unless ( $skipSec{9} ) {
368
if (keys %distinct_agent < $topagents) {
381
if (keys %distinct_agent < $topagents) {
369
 $count = keys %distinct_agent;
382
 $count = keys %distinct_agent;
370
} else {
383
} else {
371
 $count = $topagents;
384
 $count = $topagents;
372
}
385
}
373
printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
386
printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
374
print "=" x 76, "\n";
387
print "=" x 76, "\n";
375
$i = 0;
388
$i = 0;
376
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
389
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
377
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
390
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
378
 last if (++$i == $count);
391
 last if (++$i == $count);
379
}
392
}
380
print "\n", "=" x 76, "\n";
393
print "\n", "=" x 76, "\n";
381
}
394
}
382
395
383
##########################
396
##########################
384
#show timezones and counts
397
#show timezones and counts
385
##########################
398
##########################
386
unless ( $skipSec{10} ) {
399
unless ( $skipSec{10} ) {
387
if (keys %tz < $toptz) {
400
if (keys %tz < $toptz) {
388
 $count = keys %tz;
401
 $count = keys %tz;
389
} else {
402
} else {
390
 $count = $toptz;
403
 $count = $toptz;
391
}
404
}
392
printf "%s\n", &centred("Top 10 time zones", 76);
405
printf "%s\n", &centred("Top 10 time zones", 76);
393
print "=" x 76, "\n";
406
print "=" x 76, "\n";
394
$i = 0;
407
$i = 0;
395
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
408
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
396
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
409
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
397
 last if (++$i == $count);
410
 last if (++$i == $count);
398
}
411
}
399
print "\n", "=" x 76, "\n";
412
print "\n", "=" x 76, "\n";
400
}
413
}
401
414
402
415
403
################################ SUBROUTINES ################################
416
################################ SUBROUTINES ################################
404
417
405
418
406
#######################################
419
#######################################
407
# get current article's header and body
420
# get current article's header and body
408
#######################################
421
#######################################
409
sub getarticle {
422
sub getarticle {
410
 %headers = ();                        # dump old headers
423
 %headers = ();                        # dump old headers
411
 my $filename = shift;                 # get the name of the file
424
 my $filename = shift;                 # get the name of the file
412
# get stats about the file itself
425
# get stats about the file itself
413
 $filesize = -s $filename;             # get total size of file
426
 $filesize = -s $filename;             # get total size of file
414
 $totsize += $filesize;                # bump total sizes of all files
427
 $totsize += $filesize;                # bump total sizes of all files
415
428
416
 my $mtime = (stat $filename)[9];
429
 my $mtime = (stat $filename)[9];
417
 if ( $mtime < $earliest ) {
430
 if ( $mtime < $earliest ) {
418
    $earliest = $mtime;
431
    $earliest = $mtime;
419
 } elsif ( $mtime > $latest ) {
432
 } elsif ( $mtime > $latest ) {
420
    $latest = $mtime;
433
    $latest = $mtime;
421
 }
434
 }
422
435
423
# now read the file
436
# now read the file
424
 open(FILE, $filename) or die "Can't open $filename: $!\n";
437
 open(FILE, $filename) or die "Can't open $filename: $!\n";
425
 while (<FILE>) {
438
 while (<FILE>) {
426
   $totheader += length($_);           # bump total header size
439
   $totheader += length($_);           # bump total header size
427
   last if (/^\s*$/);                  # end of header?
440
   last if (/^\s*$/);                  # end of header?
428
   if (/^([^:\s]*):\s+(.*)/) {
441
   if (/^([^:\s]*):\s+(.*)/) {
429
     my($key,$val) = ($1,$2);
442
     my($key,$val) = ($1,$2);
430
     $headers{$key} = $val;
443
     $headers{$key} = decode('MIME-Header', $val);
431
     $lcheader{clean(lc($key))} = clean($val);
444
     $lcheader{clean(lc($key))} = clean($val);
432
  }
445
  }
433
 }
446
 }
434
 @body = <FILE>;                       # slurp up body
447
 @body = <FILE>;                       # slurp up body
435
 close(FILE);
448
 close(FILE);
436
} # getarticle
449
} # getarticle
437
450
438
###################################
451
###################################
439
# get data from the current article
452
# get data from the current article
440
###################################
453
###################################
441
sub getdata {
454
sub getdata {
442
#### First, analyse header fields ####
455
#### First, analyse header fields ####
443
456
444
# Set up this poster if not defined, get counts, sizes
457
# Set up this poster if not defined, get counts, sizes
445
 $poster = $headers{From};             # get the poster's name
458
 $poster = encode('UTF-8', $headers{From});             # get the poster's name
446
 if (!defined($data{$poster})) {       # seen this one before?
459
 if (!defined($data{$poster})) {       # seen this one before?
447
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
460
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
448
   $data{$poster}{orig} = 0;
461
   $data{$poster}{orig} = 0;
449
   $data{$poster}{quoted} = 0;
462
   $data{$poster}{quoted} = 0;
450
 }
463
 }
451
 $data{$poster}{count}++;      # bump count for this poster
464
 $data{$poster}{count}++;      # bump count for this poster
452
 $data{$poster}{size} += $filesize;    # total size of file
465
 $data{$poster}{size} += $filesize;    # total size of file
453
466
454
# The User-Agent and/or X-Newsreader fields
467
# The User-Agent and/or X-Newsreader fields
455
# for User-Agent by poster
468
# for User-Agent by poster
456
 if (defined $lcheader{"user-agent"}) {
469
 if (defined $lcheader{"user-agent"}) {
457
   $data{$poster}{agent} = $lcheader{"user-agent"};
470
   $data{$poster}{agent} = $lcheader{"user-agent"};
458
 }
471
 }
459
 if (defined $lcheader{"x-newsreader"}) {
472
 if (defined $lcheader{"x-newsreader"}) {
460
   $data{$poster}{agent} = $lcheader{"x-newsreader"};
473
   $data{$poster}{agent} = $lcheader{"x-newsreader"};
461
 }
474
 }
462
475
463
# The User Agent for User-Agent by number of posts
476
# The User Agent for User-Agent by number of posts
464
 my $UA = "unknown";
477
 my $UA = "unknown";
465
 foreach my $keys ( keys %lcheader )
478
 foreach my $keys ( keys %lcheader )
466
 {
479
 {
467
   if (defined $lcheader{'user-agent'})
480
   if (defined $lcheader{'user-agent'})
468
   {
481
   {
469
           $UA = $lcheader{'user-agent'};
482
           $UA = $lcheader{'user-agent'};
470
   }
483
   }
471
    elsif (defined $lcheader{"x-newsreader"})
484
    elsif (defined $lcheader{"x-newsreader"})
472
   {
485
   {
473
           $UA = $lcheader{"x-newsreader"};
486
           $UA = $lcheader{"x-newsreader"};
474
   }
487
   }
475
   elsif (defined $lcheader{'x-mailer'})
488
   elsif (defined $lcheader{'x-mailer'})
476
   {
489
   {
477
           $UA = $lcheader{'x-mailer'};
490
           $UA = $lcheader{'x-mailer'};
478
   }
491
   }
479
   elsif ((defined $lcheader{'organization'}) &&
492
   elsif ((defined $lcheader{'organization'}) &&
480
         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
493
         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
481
   {
494
   {
482
           $UA = $lcheader{'organization'};
495
           $UA = $lcheader{'organization'};
483
   }
496
   }
484
   elsif ( $lcheader{'message-id'} =~ /pine/i )
497
   elsif ( $lcheader{'message-id'} =~ /pine/i )
485
   {
498
   {
486
           $UA = "Pine";
499
           $UA = "Pine";
487
   } ## Hopefully found UA, else set to unknown
500
   } ## Hopefully found UA, else set to unknown
488
 }
501
 }
489
502
490
503
491
$UA = clean($UA);
504
$UA = clean($UA);
492
$UA = get_agent($UA);
505
$UA = get_agent($UA);
493
506
494
507
495
sub get_agent {
508
sub get_agent {
496
 my $raw = shift;
509
 my $raw = shift;
497
 my $agent = $raw;
510
 my $agent = $raw;
498
511
499
 ## strip http
512
 ## strip http
500
 if ( $raw =~ /.*http.*/ ) {
513
 if ( $raw =~ /.*http.*/ ) {
501
   $raw =~ s!posted via!!i;
514
   $raw =~ s!posted via!!i;
502
   $raw =~ s!http://!!g;
515
   $raw =~ s!http://!!g;
503
   $raw =~ s!/!!g;
516
   $raw =~ s!/!!g;
504
   $raw =~ s! !!g;
517
   $raw =~ s! !!g;
505
 }
518
 }
506
519
507
 ## Fix Outlook from Mac
520
 ## Fix Outlook from Mac
508
 if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
521
 if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
509
522
510
 ## Pick out the popular agents
523
 ## Pick out the popular agents
511
 if ( $raw =~ /(outlook express)/i     ||
524
 if ( $raw =~ /(outlook express)/i     ||
512
      $raw =~ /(microplanet gravity)/i ||
525
      $raw =~ /(microplanet gravity)/i ||
513
      $raw =~ /(news rover)/i          ||
526
      $raw =~ /(news rover)/i          ||
514
      $raw =~ /(forte agent)/i         ||
527
      $raw =~ /(forte agent)/i         ||
515
      $raw =~ /(forte free agent)/i
528
      $raw =~ /(forte free agent)/i
516
    )
529
    )
517
 {
530
 {
518
       $agent = $1;
531
       $agent = $1;
519
 }
532
 }
520
 elsif ( $raw =~ /^(
533
 elsif ( $raw =~ /^(
521
        pan
534
        pan
522
       |sylpheed
535
       |sylpheed
523
       |slrn
536
       |slrn
524
       |mozilla
537
       |mozilla
525
       |knode
538
       |knode
526
       |tin
539
       |tin
527
       |hamster
540
       |hamster
528
       |xrn
541
       |xrn
529
       |xnews
542
       |xnews
530
       |aol
543
       |aol
531
       |gnus
544
       |gnus
532
       |krn
545
       |krn
533
       |macsoup
546
       |macsoup
534
       |messenger
547
       |messenger
535
       |openxp
548
       |openxp
536
       |pine
549
       |pine
537
       |thoth
550
       |thoth
538
       |turnpike
551
       |turnpike
539
       |winvn
552
       |winvn
540
       |vsoup
553
       |vsoup
541
       |google
554
       |google
542
       |supernews
555
       |supernews
543
       |nn
556
       |nn
544
       |rn
557
       |rn
545
       |007
558
       |007
546
       |webtv
559
       |webtv
547
       |compuserve
560
       |compuserve
548
       )/ix )
561
       )/ix )
549
 {
562
 {
550
       $agent = $1;
563
       $agent = $1;
551
 }
564
 }
552
 else
565
 else
553
 {
566
 {
554
 ## Clean up unknown agents
567
 ## Clean up unknown agents
555
       if ( $raw =~ m!^(.*?)/! ) {
568
       if ( $raw =~ m!^(.*?)/! ) {
556
             $agent = $1;
569
             $agent = $1;
557
       }
570
       }
558
       elsif ( $raw =~ /^(\w*)\d.*/ )
571
       elsif ( $raw =~ /^(\w*)\d.*/ )
559
       {
572
       {
560
            $agent = $1;
573
            $agent = $1;
561
       }
574
       }
562
  }
575
  }
563
576
564
$distinct_agent{$agent}++;
577
$distinct_agent{$agent}++;
565
return $agent;
578
return $agent;
566
}
579
}
567
580
568
581
569
# Get all cross-posted newsgroups
582
# Get all cross-posted newsgroups
570
 for (split /,/, $headers{"Newsgroups"}) {
583
 for (split /,/, $headers{"Newsgroups"}) {
571
   $crossposts{$_}++;          # bump count for each
584
   $crossposts{$_}++;          # bump count for each
572
 }
585
 }
573
586
574
# Get threads
587
# Get threads
575
 my $thread = $headers{"Subject"};
588
 my $thread = encode('UTF-8', $headers{"Subject"});
576
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
589
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
577
 $thread =~ s/\s+/ /g;                 # collapse whitespace
590
 $thread =~ s/\s+/ /g;                 # collapse whitespace
578
 $threads{$thread}{count} += 1;        # bump count of this subject
591
 $threads{$thread}{count} += 1;        # bump count of this subject
579
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
592
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
580
593
581
# Is this an original post or a reply?
594
# Is this an original post or a reply?
582
 if (defined $headers{"References"}) {
595
 if (defined $headers{"References"}) {
583
   $replies++;
596
   $replies++;
584
 } else {
597
 } else {
585
   $origposts++;
598
   $origposts++;
586
 }
599
 }
587
600
588
# Get the time zone
601
# Get the time zone
589
 $_ = $headers{"Date"};
602
 $_ = $headers{"Date"};
590
 my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
603
 my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
591
 if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
604
 if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
592
   $tz = "UTC";
605
   $tz = "UTC";
593
 }
606
 }
594
 $tz{$tz}++;
607
 $tz{$tz}++;
595
608
596
#### Now analyse the body text ####
609
#### Now analyse the body text ####
597
 my $insig = 0;
610
 my $insig = 0;
598
 for (@body) {
611
 for (@body) {
599
   $totbody += length($_);             # bump total body size
612
   $totbody += length($_);             # bump total body size
600
   next if (/^$>/);                    # don't count blank lines in body
613
   next if (/^$>/);                    # don't count blank lines in body
601
   if ($insig == 1) {
614
   if ($insig == 1) {
602
     $totsig += length($_);            # bump total sig size
615
     $totsig += length($_);            # bump total sig size
603
# Bill Unruh uses ] quotes, and another poster uses ::
616
# Bill Unruh uses ] quotes, and another poster uses ::
604
   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
617
   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
605
     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
618
     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
606
     $totquoted += length($_);
619
     $totquoted += length($_);
607
   } elsif (/-- /) {
620
   } elsif (/-- /) {
608
     $insig = 1;
621
     $insig = 1;
609
   } else {
622
   } else {
610
# we must be processing an original line
623
# we must be processing an original line
611
     $data{$poster}{orig} += length($_); # bump count of original chrs
624
     $data{$poster}{orig} += length($_); # bump count of original chrs
612
     $totorig += length($_);
625
     $totorig += length($_);
613
   }
626
   }
614
 } # end for (@body)
627
 } # end for (@body)
615
628
616
} # getdata
629
} # getdata
617
630
618
########################################
631
########################################
619
# Count the User-Agents used, collapsing
632
# Count the User-Agents used, collapsing
620
# different versions into one per agent.
633
# different versions into one per agent.
621
########################################
634
########################################
622
sub countagents {
635
sub countagents {
623
POSTER:
636
POSTER:
624
 foreach $poster (keys %data) {
637
 foreach $poster (keys %data) {
625
   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
638
   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
626
     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
639
     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
627
       $agents{$agent_name}++;
640
       $agents{$agent_name}++;
628
       next POSTER;
641
       next POSTER;
629
     }
642
     }
630
   }
643
   }
631
   $agents{$data{$poster}{agent}}++;
644
   $agents{$data{$poster}{agent}}++;
632
 }
645
 }
633
} # countagents
646
} # countagents
634
647
635
############################################
648
############################################
636
# set orig/total percentages for all posters
649
# set orig/total percentages for all posters
637
############################################
650
############################################
638
sub fixpercent {
651
sub fixpercent {
639
 foreach $poster (keys %data) {
652
 foreach $poster (keys %data) {
640
   my $percent = 100;
653
   my $percent = 100;
641
   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
654
   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
642
     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
655
     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
643
   } elsif ($data{$poster}{orig} == 0) {
656
   } elsif ($data{$poster}{orig} == 0) {
644
     $percent = 0;
657
     $percent = 0;
645
   }
658
   }
646
   $data{$poster}{percent} = $percent;
659
   $data{$poster}{percent} = $percent;
647
 }
660
 }
648
}
661
}
649
662
650
##############################
663
##############################
651
# right pad a string with '.'s
664
# right pad a string with '.'s
652
##############################
665
##############################
653
sub rpad {
666
sub rpad {
654
# get text to pad, length to pad, pad chr
667
# get text to pad, length to pad, pad chr
655
 my ($text, $pad_len, $pad_chr) = @_;
668
 my ($text, $pad_len, $pad_chr) = @_;
656
 if (length($text) > $pad_len) {
669
 if (length($text) > $pad_len) {
657
   $text = substr($text, 0, $pad_len);
670
   $text = substr($text, 0, $pad_len);
658
 }
671
 }
659
 my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
672
 my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
660
 return $padded;
673
 return $padded;
661
}
674
}
662
675
663
#################
676
#################
664
# centre a string
677
# centre a string
665
#################
678
#################
666
sub centred {
679
sub centred {
667
 my ($text, $width) = @_;              # text to centre, size of field to centre in
680
 my ($text, $width) = @_;              # text to centre, size of field to centre in
668
 my $pad_len = ($width - length($text)) / 2;   #/
681
 my $pad_len = ($width - length($text)) / 2;   #/
669
 my $centred = " " x $pad_len . $text;
682
 my $centred = " " x $pad_len . $text;
670
 return $centred;
683
 return $centred;
671
}
684
}
672
685
673
##########################
686
##########################
674
# put commas into a number
687
# put commas into a number
675
##########################
688
##########################
676
sub commify {
689
sub commify {
677
 $_  = shift;
690
 $_  = shift;
678
 1 while s/^(-?\d+)(\d{3})/$1,$2/;
691
 1 while s/^(-?\d+)(\d{3})/$1,$2/;
679
 return $_;
692
 return $_;
680
}
693
}
681
694
682
#########################
695
#########################
683
# clean
696
# clean
684
#########################
697
#########################
685
sub clean {
698
sub clean {
686
 my $dirty = shift;
699
 my $dirty = shift;
687
 my $clean = $dirty;
700
 my $clean = $dirty;
688
 $clean =~ s/^\s*//;
701
 $clean =~ s/^\s*//;
689
 $clean =~ s/\s*$//;
702
 $clean =~ s/\s*$//;
690
703
691
return $clean;
704
return $clean;
692
}
705
}
693
706
694
707
695
sub usage {
708
sub usage {
696
709
697
 print "usage: newstat.pl newsgroupname\n";
710
 print "usage: newstat.pl newsgroupname\n";
698
 exit 1;
711
 exit 1;
699
}
712
}
700
713
701
###################################
714
###################################
702
# Write data structures to a file #
715
# Write data structures to a file #
703
###################################
716
###################################
704
sub writedata {
717
sub writedata {
705
 open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
718
 open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
706
 print OUTF "Data collected from alt.os.linux.mandrake\n\n";
719
 print OUTF "Data collected from alt.os.linux.mandrake\n\n";
707
 print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
720
 print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
708
 foreach my $name (keys %data) {
721
 foreach my $name (keys %data) {
709
   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
722
   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
710
 }
723
 }
711
 print OUTF "============================================================================\n";
724
 print OUTF "============================================================================\n";
712
 print OUTF "Thread subjects\n";
725
 print OUTF "Thread subjects\n";
713
 print OUTF "----------------------------------------------------------------------------\n";
726
 print OUTF "----------------------------------------------------------------------------\n";
714
 foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
727
 foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
715
   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
728
   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
716
 }
729
 }
717
 print OUTF "============================================================================\n";
730
 print OUTF "============================================================================\n";
718
 print OUTF "Cross-posts\n";
731
 print OUTF "Cross-posts\n";
719
 print OUTF "----------------------------------------------------------------------------\n";
732
 print OUTF "----------------------------------------------------------------------------\n";
720
 foreach my $name (sort keys %crossposts) {
733
 foreach my $name (sort keys %crossposts) {
721
   print OUTF "$name : $crossposts{$name}\n";
734
   print OUTF "$name : $crossposts{$name}\n";
722
 }
735
 }
723
 print OUTF
736
 print OUTF
724
 print OUTF "============================================================================\n";
737
 print OUTF "============================================================================\n";
725
 print OUTF "User agents\n";
738
 print OUTF "User agents\n";
726
 print OUTF "----------------------------------------------------------------------------\n";
739
 print OUTF "----------------------------------------------------------------------------\n";
727
 foreach my $name (sort keys %agents) {
740
 foreach my $name (sort keys %agents) {
728
   print OUTF "$name : $agents{$name}\n";
741
   print OUTF "$name : $agents{$name}\n";
729
 }
742
 }
730
 print OUTF "============================================================================\n";
743
 print OUTF "============================================================================\n";
731
 print OUTF "Time zones\n";
744
 print OUTF "Time zones\n";
732
 print OUTF "----------------------------------------------------------------------------\n";
745
 print OUTF "----------------------------------------------------------------------------\n";
733
 foreach my $name (sort keys %tz) {
746
 foreach my $name (sort keys %tz) {
734
   print OUTF "$name : $tz{$name}\n";
747
   print OUTF "$name : $tz{$name}\n";
735
 }
748
 }
736
 close OUTF;
749
 close OUTF;
737
} # writedata
750
} # writedata
738
 
751