Subversion Repositories LCARS

Rev

Rev 6 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5 PointedEar 1
#!/usr/bin/perl -w
2
use strict;
3
#########################
4
# newsstat.pl version 0.3
5
 
6
 
7
 
8
###################################################################
9
# Collect statistics about the alt.os.linux.mandrake newsgroup.
10
# Check all articles in the last 7-day period. Rank posters by
11
# no. of posts and by volume of posts, report on top and bottom
12
# 20 posters. Show their name, no. posts, size of posts, percentage
13
# quoted lines. Rank user-agents used, by poster rather than by
14
# post. Rank top 10 threads. Rank top 10 cross-posted groups.
15
###################################################################
16
 
17
##################################################################
18
#                       RECENT CHANGES                           #
19
# 2004/06/19    - newsgroup name is $ARGV[0]
20
#               - Allow command line flags for subtracting
21
#                 output if not pertinent for a group
22
# 2002/11/09    - Put Garry's writedata() function back in.
23
#               - added "rn" to my list of UA's
24
#               - Started using %distinct_agent for both User agent
25
#               sections
26
#               - named it newsstat.pl version 0.3
27
# 2002/11/06    - Fixed the earliest/latest file problem by using
28
#               mtime rather than ctime, and simplifying the logic
29
# 2002/11/05    - moved user configurations to the top
30
#               - fixed the cross-posting section
31
#               - introduced the $newsgroup_name variable which
32
#               later becomes $news$group
33
#               - changed $name to $agent_name in countagents()
34
 
35
########### NEXT #############
36
# Commas in bottom section of report
37
# Show date the figures were compiled
38
# No. of HTML articles (Content-Type: text/html)
39
# No. of quoted sigs (/>\s*-- /)
40
# Per cent of top-posted articles
41
# Top 10 cross-posters
42
# Top 20 news posting hosts (from Path)
43
# Count of certain subject words: newbie, kde, burner, sendmail, etc.
44
# Count *all* User Agents that each poster uses
45
# What do we do about Bill Unruh's ] quote style?
46
# Change the way dates/times are checked
47
# include % share in posters by no. of arts
48
# include % share in posters by size
49
# Total, orig & quoted lines by user agent with per cent
50
# Take arguments, i.e. newsgroup name
51
#######################################################
52
 
53
###################### USER CONFIGURATIONS ############################
54
 
55
# The name of the group to do stats for
56
my $newsgroup_name = $ARGV[0];
57
  $newsgroup_name or &usage;
58
 
59
# Check for removal flags
60
my $ix;
61
my $j;
62
my %skipSec;
63
my @skiplist;
64
my $args = @ARGV;
65
for ( $ix = 1 ; $ix < $args ; $ix++ ) {
66
       $j = $ix + 1;
67
       if ( $ARGV[$ix] eq "-x" ) {
68
               @skiplist = split(",",$ARGV[$j]);
69
       } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) {
70
               @skiplist = split(",",$1);
71
       }
72
}
73
foreach(@skiplist) {
74
  $skipSec{$_} = 1;
75
}
76
 
77
# Leafnode users will want /var/spool/news for this variable.
78
my $news = "/var/spool/news/";
79
 
80
# How many days are we doing statistics for?
81
my $numdays = 30;
82
 
83
# no. of agents we list
84
my $topagents = 10;
85
 
86
# no. of threads we want to know about
87
my $topthreads = 20;
88
 
89
# no. of top or bottom posters to show
90
my $topposters = 20;
91
 
92
# no. of cross-posted threads to show
93
my $topcrossposts = 10;
94
 
95
# no. of time zones to show
96
my $toptz = 10;
97
 
98
 
99
 
100
###################### DATA STRUCTURES ######################
101
my $group = $newsgroup_name;
102
$group =~ s!\.!/!g;
103
my %data;                               # name, count, agent, total, orig, quoted
104
my %threads;                            # subject, count
105
my %crossposts;                         # group, count
106
my %tz;                                 # timezones by count
107
my %headers;                            # holds header of current article
108
my %lcheader;                           # holds lowercase headers
109
my @body;                               # holds body of current article
110
my @sig;                                # holds sig text;
111
my $totalposts;                         # total no. of posts considered
112
my $filename;                           # name of current article file
113
my $filesize;                           # size of current article file
114
my $earliest;                           # earliest article we have found
115
my $latest;                             # latest article we have found
116
my $poster;                             # poster we are dealing with
117
my $totsize = 0;                        # holds total sizes of all files
118
my $totheader = 0;                      # total size of header material
119
my $totbody = 0;                        # total size of body material
120
my $totsig = 0;                         # total size of sig material
121
my $totorig = 0;                        # total size of original material
122
my $totquoted = 0;                      # total size of quoted material
123
my $origposts = 0;                      # total no. of original posts
124
my $replies = 0;                        # total no. of replies
125
my $i;                                  # general purpose
126
my %distinct_agent;
127
my %agents =                            # used to hold counts of User Agents used
128
            (  "KNode"                         => 0,
129
               "Pan"                           => 0,
130
               "Mozilla"                       => 0,
131
               "Sylpheed"                      => 0,
132
               "Gnus"                          => 0,
133
               "Forte Agent"                   => 0,
134
               "Forte Free Agent"              => 0,
135
               "MicroPlanet Gravity"           => 0,
136
               "Microsoft Outlook Express"     => 0,
137
               "Xnews"                         => 0,
138
               "slrn"                          => 0,
139
               "tin"                           => 0,
140
               "rn"                            => 0,
141
               "NN"                            => 0,
142
               "MacSOUP"                       => 0,
143
               "Foorum"                        => 0,
144
               "MT-NewsWatcher"                => 0,
145
               "News Rover"                    => 0,
146
               "WebTV"                         => 0,
147
               "Compuserver"                   => 0,
148
               "VSoup"                         => 0);
149
 
150
######################## MAIN CODE ########################
151
$! = 1;
152
 
153
chdir("$news$group") or die "Can't cd to $news$group: $!\n";
154
opendir(DIR, ".") or die "Can't open $news$group directory: $!\n";
155
while (defined($filename = readdir(DIR))) {
156
 %lcheader = ();
157
 next unless -f $filename;             # only want real files
158
 next if ($filename eq ".overview");   # real articles only
159
 next if (-M $filename > $numdays);    # only want articles <= a certain age
160
 $earliest = (stat $filename)[9] unless defined ($earliest);
161
 $latest   = (stat $filename)[9] unless defined ($latest);
162
 &getarticle($filename);               # read in the article
163
 &getdata;                             # grab the data from the article
164
 $totalposts++;                        # bump count of articles considered
165
}
166
closedir(DIR);                          # finished with the directory
167
# post-processing
168
&countagents;                           # count agents, collapsing versions
169
&fixpercent;                            # check percentages orig/total for posters
170
 
171
&writedata;
172
 
173
#################### DISPLAY RESULTS #####################
174
print "=" x 76, "\n";
175
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
176
print "=" x 76, "\n";
177
printf "%s\n", &centred("(stats compiled with a script by Garry Knight)", 76);
178
print "\n\n";
179
printf "Total posts considered: %s over %d days\n",
180
       commify($totalposts), $numdays;
181
printf "Earliest article: %s\n", scalar localtime($earliest);
182
printf "Latest article:   %s\n", scalar localtime($latest);
183
printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies);
184
printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize),
185
       commify(int($totsize / 1024)), $totsize / 1048576; #
186
printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n",
187
       commify(int($totalposts / $numdays)),
188
       $totsize / $numdays / 1048576, commify(int($totsize / $totalposts));
189
my $count = keys %data;
190
printf "Total headers: %s KB  bodies: %s KB\n",
191
       commify(int($totheader / 1024)), commify(int($totbody / 1024));
192
printf "Body text - quoted: %s KB,  original: %s KB = %02.2f%%, sigs: %s KB\n",
193
       commify(int($totquoted / 1024)), commify(int($totorig / 1024)),
194
       ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024));
195
printf "Total number of posters: %s, average %s bytes per poster\n", commify($count),
196
       commify(int($totsize / $count)); #/
197
$count = keys %threads;
198
printf "Total number of threads: %s, average %s bytes per thread\n", commify($count),
199
       commify(int($totsize / $count)); #/
200
printf "Total number of User-Agents: %d\n", scalar keys %agents;
201
print "\n", "=" x 76, "\n";
202
 
203
###############################
204
# show posters by article count  Sec 1;
205
###############################
206
unless ( $skipSec{1} ) {
207
    if (keys %data < $topposters) {
208
      $count = keys %data;
209
    } else {
210
      $count = $topposters;
211
    }
212
    printf "%s\n", &centred("Top $count posters by number of articles", 76);
213
    print "=" x 76, "\n";
214
    $i = 0;
215
    foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) {
216
    my $name = substr($poster, 0, 65);
217
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count};
218
    last if (++$i == $count);
219
 }
220
 print "\n", "=" x 76, "\n";
221
}
222
 
223
################################
224
# show posters by size in Kbytes Sec 2;
225
################################
226
unless ( $skipSec{2} ) {
227
  if (keys %data < $topposters) {
228
    $count = keys %data;
229
  } else {
230
    $count = $topposters;
231
  }
232
  printf "%s\n", &centred("Top $count posters by article size in Kbytes", 76);
233
  print "=" x 76, "\n";
234
  $i = 0;
235
  foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) {
236
    my $name = substr($poster, 0, 62);
237
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/
238
    last if (++$i == $count);
239
  }
240
  print "\n", "=" x 76, "\n";
241
}
242
 
243
####################################
244
# show top posters for original text
245
####################################
246
unless ( $skipSec{3} ) {
247
   if (keys %data < $topposters) {
248
     $count = keys %data;
249
   } else {
250
     $count = $topposters;
251
   }
252
   printf "%s\n", &centred("Top $count responders by original text (> 5 posts)", 76);
253
   print "=" x 76, "\n";
254
   $i = 0;
255
   foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) {
256
     next if $data{$poster}{quoted} == 0;
257
     next if $data{$poster}{count} < 5;
258
     my $name = substr($poster, 0, 63);
259
     printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
260
     last if (++$i == $count);
261
   }
262
   print "\n", "=" x 76, "\n";
263
}
264
 
265
#######################################
266
# show bottom posters for original text
267
#######################################
268
unless ( $skipSec{4} ) {
269
  if (keys %data < $topposters) {
270
    $count = keys %data;
271
  } else {
272
    $count = $topposters;
273
  }
274
  printf "%s\n", &centred("Bottom $count responders by original text  (> 5 posts)", 76);
275
  print "=" x 76, "\n";
276
  $i = 0;
277
  foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) {
278
    next if $data{$poster}{quoted} == 0;
279
    next if $data{$poster}{count} < 5;
280
    my $name = substr($poster, 0, 63);
281
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent};
282
    last if (++$i == $count);
283
  }
284
  print "\n", "=" x 76, "\n";
285
}
286
 
287
####################################
288
# show threads by number of articles
289
####################################
290
unless ( $skipSec{5} ) {
291
  if (keys %threads < $topthreads) {
292
    $count = keys %threads;
293
  } else {
294
    $count = $topthreads;
295
  }
296
  printf "%s\n", &centred("Top $count threads by no. of articles", 76);
297
  print "=" x 76, "\n";
298
  $i = 0;
299
  foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) {
300
    my $name = substr($thread, 0, 65);
301
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count};
302
    last if (++$i == $count);
303
  }
304
  print "\n", "=" x 76, "\n";
305
}
306
################################
307
# show threads by size in Kbytes
308
################################
309
unless ( $skipSec{6} ) {
310
  if (keys %threads < $topthreads) {
311
    $count = keys %threads;
312
  } else {
313
    $count = $topthreads;
314
  }
315
  printf "%s\n", &centred("Top $count threads by size in KB", 76);
316
  print "=" x 76, "\n";
317
  $i = 0;
318
  foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) {
319
    my $name = substr($thread, 0, 65);
320
    printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/
321
    last if (++$i == $count);
322
  }
323
  print "\n", "=" x 76, "\n";
324
}
325
 
326
#################################
327
# show top 10 cross-posted groups
328
#################################
329
unless ( $skipSec{7} ) {
330
delete $crossposts{"$newsgroup_name"};  # don't include ours
331
if (keys %crossposts < $topcrossposts) {
332
 $count = keys %crossposts;
333
} else {
334
 $count = $topcrossposts;
335
}
336
printf "%s\n", &centred("Top $count cross-posted groups", 76);
337
print "=" x 76, "\n";
338
$i = 0;
339
foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) {
340
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name};
341
 last if (++$i == $count);
342
}
343
print "\n", "=" x 76, "\n";
344
}
345
#######################
346
#show agents and counts
347
#######################
348
unless ( $skipSec{8} ) {
349
if (keys %agents < $topagents) {
350
 $count = keys %agents;
351
} else {
352
 $count = $topagents;
353
}
354
printf "%s\n", &centred("Top $count User Agents by poster", 76);
355
print "=" x 76, "\n";
356
$i = 0;
357
foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) {
358
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent};
359
 last if (++$i == $count);
360
}
361
print "\n", "=" x 76, "\n";
362
}
363
 
364
#######################
365
#show distinct agents
366
#######################
367
unless ( $skipSec{9} ) {
368
if (keys %distinct_agent < $topagents) {
369
 $count = keys %distinct_agent;
370
} else {
371
 $count = $topagents;
372
}
373
printf "%s\n", &centred("Top $count User Agents by number of posts", 76);
374
print "=" x 76, "\n";
375
$i = 0;
376
foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) {
377
printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100);
378
 last if (++$i == $count);
379
}
380
print "\n", "=" x 76, "\n";
381
}
382
 
383
##########################
384
#show timezones and counts
385
##########################
386
unless ( $skipSec{10} ) {
387
if (keys %tz < $toptz) {
388
 $count = keys %tz;
389
} else {
390
 $count = $toptz;
391
}
392
printf "%s\n", &centred("Top 10 time zones", 76);
393
print "=" x 76, "\n";
394
$i = 0;
395
foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) {
396
 printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone};
397
 last if (++$i == $count);
398
}
399
print "\n", "=" x 76, "\n";
400
}
401
 
402
 
403
################################ SUBROUTINES ################################
404
 
405
 
406
#######################################
407
# get current article's header and body
408
#######################################
409
sub getarticle {
410
 %headers = ();                        # dump old headers
411
 my $filename = shift;                 # get the name of the file
412
# get stats about the file itself
413
 $filesize = -s $filename;             # get total size of file
414
 $totsize += $filesize;                # bump total sizes of all files
415
 
416
 my $mtime = (stat $filename)[9];
417
 if ( $mtime < $earliest ) {
418
    $earliest = $mtime;
419
 } elsif ( $mtime > $latest ) {
420
    $latest = $mtime;
421
 }
422
 
423
# now read the file
424
 open(FILE, $filename) or die "Can't open $filename: $!\n";
425
 while (<FILE>) {
426
   $totheader += length($_);           # bump total header size
427
   last if (/^\s*$/);                  # end of header?
428
   if (/^([^:\s]*):\s+(.*)/) {
429
     my($key,$val) = ($1,$2);
430
     $headers{$key} = $val;
431
     $lcheader{clean(lc($key))} = clean($val);
432
  }
433
 }
434
 @body = <FILE>;                       # slurp up body
435
 close(FILE);
436
} # getarticle
437
 
438
###################################
439
# get data from the current article
440
###################################
441
sub getdata {
442
#### First, analyse header fields ####
443
 
444
# Set up this poster if not defined, get counts, sizes
445
 $poster = $headers{From};             # get the poster's name
446
 if (!defined($data{$poster})) {       # seen this one before?
447
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
448
   $data{$poster}{orig} = 0;
449
   $data{$poster}{quoted} = 0;
450
 }
451
 $data{$poster}{count}++;      # bump count for this poster
452
 $data{$poster}{size} += $filesize;    # total size of file
453
 
454
# The User-Agent and/or X-Newsreader fields
455
# for User-Agent by poster
456
 if (defined $lcheader{"user-agent"}) {
457
   $data{$poster}{agent} = $lcheader{"user-agent"};
458
 }
459
 if (defined $lcheader{"x-newsreader"}) {
460
   $data{$poster}{agent} = $lcheader{"x-newsreader"};
461
 }
462
 
463
# The User Agent for User-Agent by number of posts
464
 my $UA = "unknown";
465
 foreach my $keys ( keys %lcheader )
466
 {
467
   if (defined $lcheader{'user-agent'})
468
   {
469
           $UA = $lcheader{'user-agent'};
470
   }
471
    elsif (defined $lcheader{"x-newsreader"})
472
   {
473
           $UA = $lcheader{"x-newsreader"};
474
   }
475
   elsif (defined $lcheader{'x-mailer'})
476
   {
477
           $UA = $lcheader{'x-mailer'};
478
   }
479
   elsif ((defined $lcheader{'organization'}) &&
480
         ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/))
481
   {
482
           $UA = $lcheader{'organization'};
483
   }
484
   elsif ( $lcheader{'message-id'} =~ /pine/i )
485
   {
486
           $UA = "Pine";
487
   } ## Hopefully found UA, else set to unknown
488
 }
489
 
490
 
491
$UA = clean($UA);
492
$UA = get_agent($UA);
493
 
494
 
495
sub get_agent {
496
 my $raw = shift;
497
 my $agent = $raw;
498
 
499
 ## strip http
500
 if ( $raw =~ /.*http.*/ ) {
501
   $raw =~ s!posted via!!i;
502
   $raw =~ s!http://!!g;
503
   $raw =~ s!/!!g;
504
   $raw =~ s! !!g;
505
 }
506
 
507
 ## Fix Outlook from Mac
508
 if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;}
509
 
510
 ## Pick out the popular agents
511
 if ( $raw =~ /(outlook express)/i     ||
512
      $raw =~ /(microplanet gravity)/i ||
513
      $raw =~ /(news rover)/i          ||
514
      $raw =~ /(forte agent)/i         ||
515
      $raw =~ /(forte free agent)/i
516
    )
517
 {
518
       $agent = $1;
519
 }
520
 elsif ( $raw =~ /^(
521
        pan
522
       |sylpheed
523
       |slrn
524
       |mozilla
525
       |knode
526
       |tin
527
       |hamster
528
       |xrn
529
       |xnews
530
       |aol
531
       |gnus
532
       |krn
533
       |macsoup
534
       |messenger
535
       |openxp
536
       |pine
537
       |thoth
538
       |turnpike
539
       |winvn
540
       |vsoup
541
       |google
542
       |supernews
543
       |nn
544
       |rn
545
       |007
546
       |webtv
547
       |compuserve
548
       )/ix )
549
 {
550
       $agent = $1;
551
 }
552
 else
553
 {
554
 ## Clean up unknown agents
555
       if ( $raw =~ m!^(.*?)/! ) {
556
             $agent = $1;
557
       }
558
       elsif ( $raw =~ /^(\w*)\d.*/ )
559
       {
560
            $agent = $1;
561
       }
562
  }
563
 
564
$distinct_agent{$agent}++;
565
return $agent;
566
}
567
 
568
 
569
# Get all cross-posted newsgroups
570
 for (split /,/, $headers{"Newsgroups"}) {
571
   $crossposts{$_}++;          # bump count for each
572
 }
573
 
574
# Get threads
575
 my $thread = $headers{"Subject"};
576
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
577
 $thread =~ s/\s+/ /g;                 # collapse whitespace
578
 $threads{$thread}{count} += 1;        # bump count of this subject
579
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
580
 
581
# Is this an original post or a reply?
582
 if (defined $headers{"References"}) {
583
   $replies++;
584
 } else {
585
   $origposts++;
586
 }
587
 
588
# Get the time zone
589
 $_ = $headers{"Date"};
590
 my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
591
 if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) {
592
   $tz = "UTC";
593
 }
594
 $tz{$tz}++;
595
 
596
#### Now analyse the body text ####
597
 my $insig = 0;
598
 for (@body) {
599
   $totbody += length($_);             # bump total body size
600
   next if (/^$>/);                    # don't count blank lines in body
601
   if ($insig == 1) {
602
     $totsig += length($_);            # bump total sig size
603
# Bill Unruh uses ] quotes, and another poster uses ::
604
   } elsif (/^\s*[>\]]/ or /^\s*::/) {         # are we in a quote line?
605
     $data{$poster}{quoted} += length($_);     # bump count of quoted chrs
606
     $totquoted += length($_);
607
   } elsif (/-- /) {
608
     $insig = 1;
609
   } else {
610
# we must be processing an original line
611
     $data{$poster}{orig} += length($_); # bump count of original chrs
612
     $totorig += length($_);
613
   }
614
 } # end for (@body)
615
 
616
} # getdata
617
 
618
########################################
619
# Count the User-Agents used, collapsing
620
# different versions into one per agent.
621
########################################
622
sub countagents {
623
POSTER:
624
 foreach $poster (keys %data) {
625
   foreach my $agent_name (keys %distinct_agent) {     # check against known ones
626
     if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) {
627
       $agents{$agent_name}++;
628
       next POSTER;
629
     }
630
   }
631
   $agents{$data{$poster}{agent}}++;
632
 }
633
} # countagents
634
 
635
############################################
636
# set orig/total percentages for all posters
637
############################################
638
sub fixpercent {
639
 foreach $poster (keys %data) {
640
   my $percent = 100;
641
   if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) {
642
     $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/
643
   } elsif ($data{$poster}{orig} == 0) {
644
     $percent = 0;
645
   }
646
   $data{$poster}{percent} = $percent;
647
 }
648
}
649
 
650
##############################
651
# right pad a string with '.'s
652
##############################
653
sub rpad {
654
# get text to pad, length to pad, pad chr
655
 my ($text, $pad_len, $pad_chr) = @_;
656
 if (length($text) > $pad_len) {
657
   $text = substr($text, 0, $pad_len);
658
 }
659
 my $padded = $text . $pad_chr x ( $pad_len - length( $text ) );
660
 return $padded;
661
}
662
 
663
#################
664
# centre a string
665
#################
666
sub centred {
667
 my ($text, $width) = @_;              # text to centre, size of field to centre in
668
 my $pad_len = ($width - length($text)) / 2;   #/
669
 my $centred = " " x $pad_len . $text;
670
 return $centred;
671
}
672
 
673
##########################
674
# put commas into a number
675
##########################
676
sub commify {
677
 $_  = shift;
678
 1 while s/^(-?\d+)(\d{3})/$1,$2/;
679
 return $_;
680
}
681
 
682
#########################
683
# clean
684
#########################
685
sub clean {
686
 my $dirty = shift;
687
 my $clean = $dirty;
688
 $clean =~ s/^\s*//;
689
 $clean =~ s/\s*$//;
690
 
691
return $clean;
692
}
693
 
694
 
695
sub usage {
696
 
697
 print "usage: newstat.pl newsgroupname\n";
698
 exit 1;
699
}
700
 
701
###################################
702
# Write data structures to a file #
703
###################################
704
sub writedata {
705
 open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
706
 print OUTF "Data collected from alt.os.linux.mandrake\n\n";
707
 print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
708
 foreach my $name (keys %data) {
709
   print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
710
 }
711
 print OUTF "============================================================================\n";
712
 print OUTF "Thread subjects\n";
713
 print OUTF "----------------------------------------------------------------------------\n";
714
 foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) {
715
   print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
716
 }
717
 print OUTF "============================================================================\n";
718
 print OUTF "Cross-posts\n";
719
 print OUTF "----------------------------------------------------------------------------\n";
720
 foreach my $name (sort keys %crossposts) {
721
   print OUTF "$name : $crossposts{$name}\n";
722
 }
723
 print OUTF
724
 print OUTF "============================================================================\n";
725
 print OUTF "User agents\n";
726
 print OUTF "----------------------------------------------------------------------------\n";
727
 foreach my $name (sort keys %agents) {
728
   print OUTF "$name : $agents{$name}\n";
729
 }
730
 print OUTF "============================================================================\n";
731
 print OUTF "Time zones\n";
732
 print OUTF "----------------------------------------------------------------------------\n";
733
 foreach my $name (sort keys %tz) {
734
   print OUTF "$name : $tz{$name}\n";
735
 }
736
 close OUTF;
737
} # writedata