Subversion Repositories LCARS

Rev

Rev 5 | Rev 7 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5 Rev 6
Line 1... Line 1...
1
#!/usr/bin/perl -w
1
#!/usr/bin/perl -w
2
use strict;
2
use strict;
3
#########################
3
use warnings;
4
# newsstat.pl version 0.3
4
use utf8;
5
5
use Encode;
6
6
-
 
7
#########################
-
 
8
# newsstat.pl version 0.4
7
9
8
###################################################################
10
############################################################################
9
# Collect statistics about the alt.os.linux.mandrake newsgroup.
11
# Collect statistics about a newsgroup (specified by first argument) in
10
# Check all articles in the last 7-day period. Rank posters by
12
# the local news spool. Check all articles in the last 30-day period. 
11
# no. of posts and by volume of posts, report on top and bottom
13
# Rank posters by number of posts and by volume of posts, report on top and
12
# 20 posters. Show their name, no. posts, size of posts, percentage
14
# bottom 20 posters. Show their name, number of posts, size of posts,
13
# quoted lines. Rank user-agents used, by poster rather than by
15
# percentage of quoted lines. Rank user-agents used, by poster rather than
14
# post. Rank top 10 threads. Rank top 10 cross-posted groups.
16
# by post. Rank top 20 threads. Rank top 10 cross-posted groups.
-
 
17
# 
-
 
18
# (Numbers and paths can be configured below.  -- PE)
15
###################################################################
19
############################################################################
16
20
17
##################################################################
21
############################################################################
18
#                       RECENT CHANGES                           #
22
#                       RECENT CHANGES                                     #
-
 
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
-
 
24
#                 - Use warnings, utf8 (just in case) 
-
 
25
#                 - Documentation update
-
 
26
# N/A         NN  - Take newsgroup name as argument
19
# 2004/06/19    - newsgroup name is $ARGV[0]
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
20
#               - Allow command line flags for subtracting
28
#                 - Allow command line flags for subtracting
21
#                 output if not pertinent for a group
29
#                   output if not pertinent for a group
22
# 2002/11/09    - Put Garry's writedata() function back in.
30
# 2002-11-09  NN  - Put Garry's writedata() function back in.
23
#               - added "rn" to my list of UA's
31
#                 - added "rn" to my list of UA's
24
#               - Started using %distinct_agent for both User agent
32
#                 - Started using %distinct_agent for both User agent
25
#               sections
33
#                   sections
26
#               - named it newsstat.pl version 0.3
34
#                 - named it newsstat.pl version 0.3
27
# 2002/11/06    - Fixed the earliest/latest file problem by using
35
# 2002-11-06  NN  - Fixed the earliest/latest file problem by using
28
#               mtime rather than ctime, and simplifying the logic
36
#                   mtime rather than ctime, and simplifying the logic
29
# 2002/11/05    - moved user configurations to the top
37
# 2002-11-05  NN  - moved user configurations to the top
30
#               - fixed the cross-posting section
38
#                 - fixed the cross-posting section
31
#               - introduced the $newsgroup_name variable which
39
#                 - introduced the $newsgroup_name variable which
32
#               later becomes $news$group
40
#                   later becomes $news$group
33
#               - changed $name to $agent_name in countagents()
41
#                 - changed $name to $agent_name in countagents()
-
 
42
# 
-
 
43
# Contributors
-
 
44
# -------------
-
 
45
# NN  Nomen nominandum (name to be determined later)
-
 
46
# PE  Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
34
47
35
########### NEXT #############
48
########### TODO #############
36
# Commas in bottom section of report
49
# Commas in bottom section of report
37
# Show date the figures were compiled
50
# Show date the figures were compiled
38
# No. of HTML articles (Content-Type: text/html)
51
# No. of HTML articles (Content-Type: text/html)
39
# No. of quoted sigs (/>\s*-- /)
52
# No. of quoted sigs (/>\s*-- /)
40
# Per cent of top-posted articles
53
# Per cent of top-posted articles
Line 45... Line 58...
45
# What do we do about Bill Unruh's ] quote style?
58
# What do we do about Bill Unruh's ] quote style?
46
# Change the way dates/times are checked
59
# Change the way dates/times are checked
47
# include % share in posters by no. of arts
60
# include % share in posters by no. of arts
48
# include % share in posters by size
61
# include % share in posters by size
49
# Total, orig & quoted lines by user agent with per cent
62
# Total, orig & quoted lines by user agent with per cent
50
# Take arguments, i.e. newsgroup name
63
# Take more arguments
51
#######################################################
64
#######################################################
52
65
53
###################### USER CONFIGURATIONS ############################
66
###################### USER CONFIGURATIONS ############################
54
67
55
# The name of the group to do stats for
68
# The name of the group to do stats for
56
my $newsgroup_name = $ARGV[0];
69
my $newsgroup_name = $ARGV[0];
57
  $newsgroup_name or &usage;
70
$newsgroup_name or &usage;
58
71
59
# Check for removal flags
72
# Check for removal flags
60
my $ix;
73
my $ix;
61
my $j;
74
my $j;
62
my %skipSec;
75
my %skipSec;
Line 172... Line 185...
172
185
173
#################### DISPLAY RESULTS #####################
186
#################### DISPLAY RESULTS #####################
174
print "=" x 76, "\n";
187
print "=" x 76, "\n";
175
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
188
printf "%s\n", &centred("Analysis of posts to $newsgroup_name", 76);
176
print "=" x 76, "\n";
189
print "=" x 76, "\n";
177
printf "%s\n", &centred("(stats compiled with a script by Garry Knight)", 76);
190
printf "%s\n", &centred("(stats compiled with a script by Garry Knight et al.)", 76);
178
print "\n\n";
191
print "\n\n";
179
printf "Total posts considered: %s over %d days\n",
192
printf "Total posts considered: %s over %d days\n",
180
       commify($totalposts), $numdays;
193
       commify($totalposts), $numdays;
181
printf "Earliest article: %s\n", scalar localtime($earliest);
194
printf "Earliest article: %s\n", scalar localtime($earliest);
182
printf "Latest article:   %s\n", scalar localtime($latest);
195
printf "Latest article:   %s\n", scalar localtime($latest);
Line 425... Line 438...
425
 while (<FILE>) {
438
 while (<FILE>) {
426
   $totheader += length($_);           # bump total header size
439
   $totheader += length($_);           # bump total header size
427
   last if (/^\s*$/);                  # end of header?
440
   last if (/^\s*$/);                  # end of header?
428
   if (/^([^:\s]*):\s+(.*)/) {
441
   if (/^([^:\s]*):\s+(.*)/) {
429
     my($key,$val) = ($1,$2);
442
     my($key,$val) = ($1,$2);
430
     $headers{$key} = $val;
443
     $headers{$key} = decode('MIME-Header', $val);
431
     $lcheader{clean(lc($key))} = clean($val);
444
     $lcheader{clean(lc($key))} = clean($val);
432
  }
445
  }
433
 }
446
 }
434
 @body = <FILE>;                       # slurp up body
447
 @body = <FILE>;                       # slurp up body
435
 close(FILE);
448
 close(FILE);
Line 440... Line 453...
440
###################################
453
###################################
441
sub getdata {
454
sub getdata {
442
#### First, analyse header fields ####
455
#### First, analyse header fields ####
443
456
444
# Set up this poster if not defined, get counts, sizes
457
# Set up this poster if not defined, get counts, sizes
445
 $poster = $headers{From};             # get the poster's name
458
 $poster = encode('UTF-8', $headers{From});             # get the poster's name
446
 if (!defined($data{$poster})) {       # seen this one before?
459
 if (!defined($data{$poster})) {       # seen this one before?
447
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
460
   $data{$poster}{agent} = 'Unknown';  # comes after For: field
448
   $data{$poster}{orig} = 0;
461
   $data{$poster}{orig} = 0;
449
   $data{$poster}{quoted} = 0;
462
   $data{$poster}{quoted} = 0;
450
 }
463
 }
Line 570... Line 583...
570
 for (split /,/, $headers{"Newsgroups"}) {
583
 for (split /,/, $headers{"Newsgroups"}) {
571
   $crossposts{$_}++;          # bump count for each
584
   $crossposts{$_}++;          # bump count for each
572
 }
585
 }
573
586
574
# Get threads
587
# Get threads
575
 my $thread = $headers{"Subject"};
588
 my $thread = encode('UTF-8', $headers{"Subject"});
576
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
589
 $thread =~ s/^re: //i;                # Remove Re: or re: at start
577
 $thread =~ s/\s+/ /g;                 # collapse whitespace
590
 $thread =~ s/\s+/ /g;                 # collapse whitespace
578
 $threads{$thread}{count} += 1;        # bump count of this subject
591
 $threads{$thread}{count} += 1;        # bump count of this subject
579
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
592
 $threads{$thread}{size} += $filesize; # bump bytes for this thread
580
593