Subversion Repositories LCARS

Rev

Rev 5 | Rev 7 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5 PointedEar 1
#!/usr/bin/perl -w
2
use strict;
6 PointedEar 3
use warnings;
4
use utf8;
5
use Encode;
6
 
5 PointedEar 7
#########################
6 PointedEar 8
# newsstat.pl version 0.4
5 PointedEar 9
 
6 PointedEar 10
############################################################################
11
# Collect statistics about a newsgroup (specified by first argument) in
12
# the local news spool. Check all articles in the last 30-day period. 
13
# Rank posters by number of posts and by volume of posts, report on top and
14
# bottom 20 posters. Show their name, number of posts, size of posts,
15
# percentage of quoted lines. Rank user-agents used, by poster rather than
16
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
17
# 
18
# (Numbers and paths can be configured below.  -- PE)
19
############################################################################
5 PointedEar 20
 
6 PointedEar 21
############################################################################
22
#                       RECENT CHANGES                                     #
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
24
#                 - Use warnings, utf8 (just in case) 
25
#                 - Documentation update
26
# N/A         NN  - Take newsgroup name as argument
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
28
#                 - Allow command line flags for subtracting
29
#                   output if not pertinent for a group
30
# 2002-11-09  NN  - Put Garry's writedata() function back in.
31
#                 - added "rn" to my list of UA's
32
#                 - Started using %distinct_agent for both User agent
33
#                   sections
34
#                 - named it newsstat.pl version 0.3
35
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
36
#                   mtime rather than ctime, and simplifying the logic
37
# 2002-11-05  NN  - moved user configurations to the top
38
#                 - fixed the cross-posting section
39
#                 - introduced the $newsgroup_name variable which
40
#                   later becomes $news$group
41
#                 - changed $name to $agent_name in countagents()
42
# 
43
# Contributors
44
# -------------
45
# NN  Nomen nominandum (name to be determined later)
46
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
5 PointedEar 47
 
6 PointedEar 48
########### TODO #############
5 PointedEar 49
# Commas in bottom section of report
50
# Show date the figures were compiled
51
# No. of HTML articles (Content-Type: text/html)
52
# No. of quoted sigs (/>\s*-- /)
53
# Per cent of top-posted articles
54
# Top 10 cross-posters
55
# Top 20 news posting hosts (from Path)
56
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
57
# Count *all* User Agents that each poster uses
58
# What do we do about Bill Unruh's ] quote style?
59
# Change the way dates/times are checked
60
# include % share in posters by no. of arts
61
# include % share in posters by size
62
# Total, orig & quoted lines by user agent with per cent
6 PointedEar 63
# Take more arguments
5 PointedEar 64
#######################################################
65
 
66
###################### USER CONFIGURATIONS ############################
67
 
68
# The name of the group to do stats for
69
my $newsgroup_name = $ARGV[0];
6 PointedEar 70
$newsgroup_name or &usage;
5 PointedEar 71
 
72
# Check for removal flags
73
my $ix;
74
my $j;
75
my %skipSec;
76
my @skiplist;
77
my $args = @ARGV;
78
for ( $ix = 1 ; $ix < $args ; $ix++ ) {
79
       $j = $ix + 1;
80
       if ( $ARGV[$ix] eq "-x" ) {
81
               @skiplist = split(",",$ARGV[$j]);
82
       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
83
               @skiplist = split(",",$1);
84
       }
85
}
86
foreach(@skiplist) {
87
  $skipSec{$_} = 1;
88
}
89
 
90
# Leafnode users will want /var/spool/news for this variable.
91
my $news = "/var/spool/news/";
92
 
93
# How many days are we doing statistics for?
94
my $numdays = 30;
95
 
96
# no. of agents we list
97
my $topagents = 10;
98
 
99
# no. of threads we want to know about
100
my $topthreads = 20;
101
 
102
# no. of top or bottom posters to show
103
my $topposters = 20;
104
 
105
# no. of cross-posted threads to show
106
my $topcrossposts = 10;
107
 
108
# no. of time zones to show
109
my $toptz = 10;
110
 
111
 
112
 
113
###################### DATA STRUCTURES ######################
114
my $group = $newsgroup_name;
115
$group =~ s!\.!/!g;
116
my %data;                               # name, count, agent, total, orig, quoted
117
my %threads;                            # subject, count
118
my %crossposts;                         # group, count
119
my %tz;                                 # timezones by count
120
my %headers;                            # holds header of current article
121
my %lcheader;                           # holds lowercase headers
122
my @body;                               # holds body of current article
123
my @sig;                                # holds sig text;
124
my $totalposts;                         # total no. of posts considered
125
my $filename;                           # name of current article file
126
my $filesize;                           # size of current article file
127
my $earliest;                           # earliest article we have found
128
my $latest;                             # latest article we have found
129
my $poster;                             # poster we are dealing with
130
my $totsize = 0;                        # holds total sizes of all files
131
my $totheader = 0;                      # total size of header material
132
my $totbody = 0;                        # total size of body material
133
my $totsig = 0;                         # total size of sig material
134
my $totorig = 0;                        # total size of original material
135
my $totquoted = 0;                      # total size of quoted material
136
my $origposts = 0;                      # total no. of original posts
137
my $replies = 0;                        # total no. of replies
138
my $i;                                  # general purpose
139
my %distinct_agent;
140
my %agents =                            # used to hold counts of User Agents used
141
            (  "KNode"                         => 0,
142
               "Pan"                           => 0,
143
               "Mozilla"                       => 0,
144
               "Sylpheed"                      => 0,
145
               "Gnus"                          => 0,
146
               "Forte Agent"                   => 0,
147
               "Forte Free Agent"              => 0,
148
               "MicroPlanet Gravity"           => 0,
149
               "Microsoft Outlook Express"     => 0,
150
               "Xnews"                         => 0,
151
               "slrn"                          => 0,
152
               "tin"                           => 0,
153
               "rn"                            => 0,
154
               "NN"                            => 0,
155
               "MacSOUP"                       => 0,
156
               "Foorum"                        => 0,
157
               "MT-NewsWatcher"                => 0,
158
               "News Rover"                    => 0,
159
               "WebTV"                         => 0,
160
               "Compuserver"                   => 0,
161
               "VSoup"                         => 0);
162
 
163
######################## MAIN CODE ########################
164
$! = 1;
165
 
166
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
167
opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
168
while (defined($filename = readdir(DIR))) {
169
 %lcheader = ();
170
 next unless -f $filename;             # only want real files
171
 next if ($filename eq ".overview");   # real articles only
172
 next if (-M $filename > $numdays);    # only want articles <= a certain age
173
 $earliest = (stat $filename)[9] unless defined ($earliest);
174
 $latest   = (stat $filename)[9] unless defined ($latest);
175
 &getarticle($filename);               # read in the article
176
 &getdata;                             # grab the data from the article
177
 $totalposts++;                        # bump count of articles considered
178
}
179
closedir(DIR);                          # finished with the directory
180
# post-processing
181
&countagents;                           # count agents, collapsing versions
182
&fixpercent;                            # check percentages orig/total for posters
183
 
184
&writedata;
185
 
186
#################### DISPLAY RESULTS #####################
187
print "=" x 76, "\n";
188
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
189
print "=" x 76, "\n";
6 PointedEar 190
printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
5 PointedEar 191
print "\n\n";
192
printf "Total posts considered: %s over %d days\n",
193
       commify($totalposts), $numdays;
194
printf "Earliest article: %s\n", scalar localtime($earliest);
195
printf "Latest article:   %s\n", scalar localtime($latest);
196
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
197
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
198
       commify(int($totsize / 1024)), $totsize / 1048576; #
199
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
200
       commify(int($totalposts / $numdays)),
201
       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
202
my $count = keys %data;
203
printf "Total headers: %s KB  bodies: %s KB\n",
204
       commify(int($totheader / 1024)), commify(int($totbody / 1024));
205
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
206
       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
207
       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
208
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
209
       commify(int($totsize / $count)); #/
210
$count = keys %threads;
211
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
212
       commify(int($totsize / $count)); #/
213
printf "Total number of User-Agents: %d\n", scalar keys %agents;
214
print "\n", "=" x 76, "\n";
215
 
216
###############################
217
# show posters by article count  Sec 1;
218
###############################
219
unless ( $skipSec{1} ) {
220
    if (keys %data < $topposters) {
221
      $count = keys %data;
222
    } else {
223
      $count = $topposters;
224
    }
225
    printf "%s\n", &centred("Top $count posters by number of articles", 76);
226
    print "=" x 76, "\n";
227
    $i = 0;
228
    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
229
    my $name = substr($poster, 0, 65);
230
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
231
    last if (++$i == $count);
232
 }
233
 print "\n", "=" x 76, "\n";
234
}
235
 
236
################################
237
# show posters by size in Kbytes Sec 2;
238
################################
239
unless ( $skipSec{2} ) {
240
  if (keys %data < $topposters) {
241
    $count = keys %data;
242
  } else {
243
    $count = $topposters;
244
  }
245
  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
246
  print "=" x 76, "\n";
247
  $i = 0;
248
  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
249
    my $name = substr($poster, 0, 62);
250
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
251
    last if (++$i == $count);
252
  }
253
  print "\n", "=" x 76, "\n";
254
}
255
 
256
####################################
257
# show top posters for original text
258
####################################
259
unless ( $skipSec{3} ) {
260
   if (keys %data < $topposters) {
261
     $count = keys %data;
262
   } else {
263
     $count = $topposters;
264
   }
265
   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
266
   print "=" x 76, "\n";
267
   $i = 0;
268
   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
269
     next if $data{$poster}{quoted} == 0;
270
     next if $data{$poster}{count} < 5;
271
     my $name = substr($poster, 0, 63);
272
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
273
     last if (++$i == $count);
274
   }
275
   print "\n", "=" x 76, "\n";
276
}
277
 
278
#######################################
279
# show bottom posters for original text
280
#######################################
281
unless ( $skipSec{4} ) {
282
  if (keys %data < $topposters) {
283
    $count = keys %data;
284
  } else {
285
    $count = $topposters;
286
  }
287
  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
288
  print "=" x 76, "\n";
289
  $i = 0;
290
  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
291
    next if $data{$poster}{quoted} == 0;
292
    next if $data{$poster}{count} < 5;
293
    my $name = substr($poster, 0, 63);
294
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
295
    last if (++$i == $count);
296
  }
297
  print "\n", "=" x 76, "\n";
298
}
299
 
300
####################################
301
# show threads by number of articles
302
####################################
303
unless ( $skipSec{5} ) {
304
  if (keys %threads < $topthreads) {
305
    $count = keys %threads;
306
  } else {
307
    $count = $topthreads;
308
  }
309
  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
310
  print "=" x 76, "\n";
311
  $i = 0;
312
  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
313
    my $name = substr($thread, 0, 65);
314
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
315
    last if (++$i == $count);
316
  }
317
  print "\n", "=" x 76, "\n";
318
}
319
################################
320
# show threads by size in Kbytes
321
################################
322
unless ( $skipSec{6} ) {
323
  if (keys %threads < $topthreads) {
324
    $count = keys %threads;
325
  } else {
326
    $count = $topthreads;
327
  }
328
  printf "%s\n", &centred("Top $count threads by size in KB", 76);
329
  print "=" x 76, "\n";
330
  $i = 0;
331
  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
332
    my $name = substr($thread, 0, 65);
333
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
334
    last if (++$i == $count);
335
  }
336
  print "\n", "=" x 76, "\n";
337
}
338
 
339
#################################
340
# show top 10 cross-posted groups
341
#################################
342
unless ( $skipSec{7} ) {
343
delete $crossposts{"$newsgroup_name"};  # don't include ours
344
if (keys %crossposts < $topcrossposts) {
345
 $count = keys %crossposts;
346
} else {
347
 $count = $topcrossposts;
348
}
349
printf "%s\n", &centred("Top $count cross-posted groups", 76);
350
print "=" x 76, "\n";
351
$i = 0;
352
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
353
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
354
 last if (++$i == $count);
355
}
356
print "\n", "=" x 76, "\n";
357
}
358
#######################
359
#show agents and counts
360
#######################
361
unless ( $skipSec{8} ) {
362
if (keys %agents < $topagents) {
363
 $count = keys %agents;
364
} else {
365
 $count = $topagents;
366
}
367
printf "%s\n", &centred("Top $count User Agents by poster", 76);
368
print "=" x 76, "\n";
369
$i = 0;
370
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
371
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
372
 last if (++$i == $count);
373
}
374
print "\n", "=" x 76, "\n";
375
}
376
 
377
#######################
378
#show distinct agents
379
#######################
380
unless ( $skipSec{9} ) {
381
if (keys %distinct_agent < $topagents) {
382
 $count = keys %distinct_agent;
383
} else {
384
 $count = $topagents;
385
}
386
printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
387
print "=" x 76, "\n";
388
$i = 0;
389
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
390
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
391
 last if (++$i == $count);
392
}
393
print "\n", "=" x 76, "\n";
394
}
395
 
396
##########################
397
#show timezones and counts
398
##########################
399
unless ( $skipSec{10} ) {
400
if (keys %tz < $toptz) {
401
 $count = keys %tz;
402
} else {
403
 $count = $toptz;
404
}
405
printf "%s\n", &centred("Top 10 time zones", 76);
406
print "=" x 76, "\n";
407
$i = 0;
408
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
409
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
410
 last if (++$i == $count);
411
}
412
print "\n", "=" x 76, "\n";
413
}
414
 
415
 
416
################################ SUBROUTINES ################################
417
 
418
 
419
#######################################
420
# get current article's header and body
421
#######################################
422
sub getarticle {
423
 %headers = ();                        # dump old headers
424
 my $filename = shift;                 # get the name of the file
425
# get stats about the file itself
426
 $filesize = -s $filename;             # get total size of file
427
 $totsize += $filesize;                # bump total sizes of all files
428
 
429
 my $mtime = (stat $filename)[9];
430
 if ( $mtime < $earliest ) {
431
    $earliest = $mtime;
432
 } elsif ( $mtime > $latest ) {
433
    $latest = $mtime;
434
 }
435
 
436
# now read the file
437
 open(FILE, $filename) or die "Can't open $filename: $!\n";
438
 while (<FILE>) {
439
   $totheader += length($_);           # bump total header size
440
   last if (/^\s*$/);                  # end of header?
441
   if (/^([^:\s]*):\s+(.*)/) {
442
     my($key,$val) = ($1,$2);
6 PointedEar 443
     $headers{$key} = decode('MIME-Header', $val);
5 PointedEar 444
     $lcheader{clean(lc($key))} = clean($val);
445
  }
446
 }
447
 @body = <FILE>;                       # slurp up body
448
 close(FILE);
449
} # getarticle
450
 
451
###################################
452
# get data from the current article
453
###################################
454
sub getdata {
455
#### First, analyse header fields ####
456
 
457
# Set up this poster if not defined, get counts, sizes
6 PointedEar 458
 $poster = encode('UTF-8', $headers{From});             # get the poster's name
5 PointedEar 459
 if (!defined($data{$poster})) {       # seen this one before?
460
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
461
   $data{$poster}{orig} = 0;
462
   $data{$poster}{quoted} = 0;
463
 }
464
 $data{$poster}{count}++;      # bump count for this poster
465
 $data{$poster}{size} += $filesize;    # total size of file
466
 
467
# The User-Agent and/or X-Newsreader fields
468
# for User-Agent by poster
469
 if (defined $lcheader{"user-agent"}) {
470
   $data{$poster}{agent} = $lcheader{"user-agent"};
471
 }
472
 if (defined $lcheader{"x-newsreader"}) {
473
   $data{$poster}{agent} = $lcheader{"x-newsreader"};
474
 }
475
 
476
# The User Agent for User-Agent by number of posts
477
 my $UA = "unknown";
478
 foreach my $keys ( keys %lcheader )
479
 {
480
   if (defined $lcheader{'user-agent'})
481
   {
482
           $UA = $lcheader{'user-agent'};
483
   }
484
    elsif (defined $lcheader{"x-newsreader"})
485
   {
486
           $UA = $lcheader{"x-newsreader"};
487
   }
488
   elsif (defined $lcheader{'x-mailer'})
489
   {
490
           $UA = $lcheader{'x-mailer'};
491
   }
492
   elsif ((defined $lcheader{'organization'}) &&
493
         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
494
   {
495
           $UA = $lcheader{'organization'};
496
   }
497
   elsif ( $lcheader{'message-id'} =~ /pine/i )
498
   {
499
           $UA = "Pine";
500
   } ## Hopefully found UA, else set to unknown
501
 }
502
 
503
 
504
$UA = clean($UA);
505
$UA = get_agent($UA);
506
 
507
 
508
sub get_agent {
509
 my $raw = shift;
510
 my $agent = $raw;
511
 
512
 ## strip http
513
 if ( $raw =~ /.*http.*/ ) {
514
   $raw =~ s!posted via!!i;
515
   $raw =~ s!http://!!g;
516
   $raw =~ s!/!!g;
517
   $raw =~ s! !!g;
518
 }
519
 
520
 ## Fix Outlook from Mac
521
 if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
522
 
523
 ## Pick out the popular agents
524
 if ( $raw =~ /(outlook express)/i     ||
525
      $raw =~ /(microplanet gravity)/i ||
526
      $raw =~ /(news rover)/i          ||
527
      $raw =~ /(forte agent)/i         ||
528
      $raw =~ /(forte free agent)/i
529
    )
530
 {
531
       $agent = $1;
532
 }
533
 elsif ( $raw =~ /^(
534
        pan
535
       |sylpheed
536
       |slrn
537
       |mozilla
538
       |knode
539
       |tin
540
       |hamster
541
       |xrn
542
       |xnews
543
       |aol
544
       |gnus
545
       |krn
546
       |macsoup
547
       |messenger
548
       |openxp
549
       |pine
550
       |thoth
551
       |turnpike
552
       |winvn
553
       |vsoup
554
       |google
555
       |supernews
556
       |nn
557
       |rn
558
       |007
559
       |webtv
560
       |compuserve
561
       )/ix )
562
 {
563
       $agent = $1;
564
 }
565
 else
566
 {
567
 ## Clean up unknown agents
568
       if ( $raw =~ m!^(.*?)/! ) {
569
             $agent = $1;
570
       }
571
       elsif ( $raw =~ /^(\w*)\d.*/ )
572
       {
573
            $agent = $1;
574
       }
575
  }
576
 
577
$distinct_agent{$agent}++;
578
return $agent;
579
}
580
 
581
 
582
# Get all cross-posted newsgroups
583
 for (split /,/, $headers{"Newsgroups"}) {
584
   $crossposts{$_}++;          # bump count for each
585
 }
586
 
587
# Get threads
6 PointedEar 588
 my $thread = encode('UTF-8', $headers{"Subject"});
5 PointedEar 589
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
590
 $thread =~ s/\s+/ /g;                 # collapse whitespace
591
 $threads{$thread}{count} += 1;        # bump count of this subject
592
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
593
 
594
# Is this an original post or a reply?
595
 if (defined $headers{"References"}) {
596
   $replies++;
597
 } else {
598
   $origposts++;
599
 }
600
 
601
# Get the time zone
602
 $_ = $headers{"Date"};
603
 my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
604
 if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
605
   $tz = "UTC";
606
 }
607
 $tz{$tz}++;
608
 
609
#### Now analyse the body text ####
610
 my $insig = 0;
611
 for (@body) {
612
   $totbody += length($_);             # bump total body size
613
   next if (/^$>/);                    # don't count blank lines in body
614
   if ($insig == 1) {
615
     $totsig += length($_);            # bump total sig size
616
# Bill Unruh uses ] quotes, and another poster uses ::
617
   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
618
     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
619
     $totquoted += length($_);
620
   } elsif (/-- /) {
621
     $insig = 1;
622
   } else {
623
# we must be processing an original line
624
     $data{$poster}{orig} += length($_); # bump count of original chrs
625
     $totorig += length($_);
626
   }
627
 } # end for (@body)
628
 
629
} # getdata
630
 
631
########################################
632
# Count the User-Agents used, collapsing
633
# different versions into one per agent.
634
########################################
635
sub countagents {
636
POSTER:
637
 foreach $poster (keys %data) {
638
   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
639
     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
640
       $agents{$agent_name}++;
641
       next POSTER;
642
     }
643
   }
644
   $agents{$data{$poster}{agent}}++;
645
 }
646
} # countagents
647
 
648
############################################
649
# set orig/total percentages for all posters
650
############################################
651
sub fixpercent {
652
 foreach $poster (keys %data) {
653
   my $percent = 100;
654
   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
655
     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
656
   } elsif ($data{$poster}{orig} == 0) {
657
     $percent = 0;
658
   }
659
   $data{$poster}{percent} = $percent;
660
 }
661
}
662
 
663
##############################
664
# right pad a string with '.'s
665
##############################
666
sub rpad {
667
# get text to pad, length to pad, pad chr
668
 my ($text, $pad_len, $pad_chr) = @_;
669
 if (length($text) > $pad_len) {
670
   $text = substr($text, 0, $pad_len);
671
 }
672
 my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
673
 return $padded;
674
}
675
 
676
#################
677
# centre a string
678
#################
679
sub centred {
680
 my ($text, $width) = @_;              # text to centre, size of field to centre in
681
 my $pad_len = ($width - length($text)) / 2;   #/
682
 my $centred = " " x $pad_len . $text;
683
 return $centred;
684
}
685
 
686
##########################
687
# put commas into a number
688
##########################
689
sub commify {
690
 $_  = shift;
691
 1 while s/^(-?\d+)(\d{3})/$1,$2/;
692
 return $_;
693
}
694
 
695
#########################
696
# clean
697
#########################
698
sub clean {
699
 my $dirty = shift;
700
 my $clean = $dirty;
701
 $clean =~ s/^\s*//;
702
 $clean =~ s/\s*$//;
703
 
704
return $clean;
705
}
706
 
707
 
708
sub usage {
709
 
710
 print "usage: newstat.pl newsgroupname\n";
711
 exit 1;
712
}
713
 
714
###################################
715
# Write data structures to a file #
716
###################################
717
sub writedata {
718
 open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
719
 print OUTF "Data collected from alt.os.linux.mandrake\n\n";
720
 print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
721
 foreach my $name (keys %data) {
722
   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
723
 }
724
 print OUTF "============================================================================\n";
725
 print OUTF "Thread subjects\n";
726
 print OUTF "----------------------------------------------------------------------------\n";
727
 foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
728
   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
729
 }
730
 print OUTF "============================================================================\n";
731
 print OUTF "Cross-posts\n";
732
 print OUTF "----------------------------------------------------------------------------\n";
733
 foreach my $name (sort keys %crossposts) {
734
   print OUTF "$name : $crossposts{$name}\n";
735
 }
736
 print OUTF
737
 print OUTF "============================================================================\n";
738
 print OUTF "User agents\n";
739
 print OUTF "----------------------------------------------------------------------------\n";
740
 foreach my $name (sort keys %agents) {
741
   print OUTF "$name : $agents{$name}\n";
742
 }
743
 print OUTF "============================================================================\n";
744
 print OUTF "Time zones\n";
745
 print OUTF "----------------------------------------------------------------------------\n";
746
 foreach my $name (sort keys %tz) {
747
   print OUTF "$name : $tz{$name}\n";
748
 }
749
 close OUTF;
750
} # writedata