Subversion Repositories LCARS

Rev

Rev 7 | Rev 9 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7 Rev 8
Line 1... Line 1...
1
#!/usr/bin/perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
use utf8;
4
use utf8;
5
use Encode;
5
use Encode;
6
6
7
#########################
7
###########################
8
# newsstat.pl version 0.4
8
# newsstat.pl version 0.4.1
9
9
10
############################################################################
10
############################################################################
11
# Collect statistics about a newsgroup (specified by first argument) in
11
# Collect statistics about a newsgroup (specified by first argument) in
12
# the local news spool. Check all articles in the last 30-day period.
12
# the local news spool. Check all articles in the last 30-day period.
13
# Rank posters by number of posts and by volume of posts, report on top and
13
# Rank posters by number of posts and by volume of posts, report on top and
Line 237... Line 237...
237
    $count = $topposters;
237
    $count = $topposters;
238
  }
238
  }
239
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
239
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
240
  print "=" x 76, "\n";
240
  print "=" x 76, "\n";
241
  $i = 0;
241
  $i = 0;
242
  foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
242
  foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
243
  {
243
  {
244
    my $name = substr( $poster, 0, 65 );
244
    my $name = substr( $poster, 0, 65 );
245
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
245
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
246
      $data{$poster}{count};
246
      $data{$poster}{count};
247
    last if ( ++$i == $count );
247
    last if ( ++$i == $count );
Line 263... Line 263...
263
    $count = $topposters;
263
    $count = $topposters;
264
  }
264
  }
265
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
265
  printf "%s\n", &centred( "Top $count posters by article size in Kbytes", 76 );
266
  print "=" x 76, "\n";
266
  print "=" x 76, "\n";
267
  $i = 0;
267
  $i = 0;
268
  foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
268
  foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
269
  {
269
  {
270
    my $name = substr( $poster, 0, 62 );
270
    my $name = substr( $poster, 0, 62 );
271
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
271
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
272
      $data{$poster}{size} / 1024;    #/
272
      $data{$poster}{size} / 1024;    #/
273
    last if ( ++$i == $count );
273
    last if ( ++$i == $count );
Line 290... Line 290...
290
  }
290
  }
291
  printf "%s\n",
291
  printf "%s\n",
292
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
292
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
293
  print "=" x 76, "\n";
293
  print "=" x 76, "\n";
294
  $i = 0;
294
  $i = 0;
295
  foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
295
  foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
296
    keys %data )
296
    keys %data )
297
  {
297
  {
298
    next if $data{$poster}{quoted} == 0;
298
    next if $data{$poster}{quoted} == 0;
299
    next if $data{$poster}{count} < 5;
299
    next if $data{$poster}{count} < 5;
300
    my $name = substr( $poster, 0, 63 );
300
    my $name = substr( $poster, 0, 63 );
Line 320... Line 320...
320
  }
320
  }
321
  printf "%s\n",
321
  printf "%s\n",
322
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
322
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
323
  print "=" x 76, "\n";
323
  print "=" x 76, "\n";
324
  $i = 0;
324
  $i = 0;
325
  foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
325
  foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
326
    keys %data )
326
    keys %data )
327
  {
327
  {
328
    next if $data{$poster}{quoted} == 0;
328
    next if $data{$poster}{quoted} == 0;
329
    next if $data{$poster}{count} < 5;
329
    next if $data{$poster}{count} < 5;
330
    my $name = substr( $poster, 0, 63 );
330
    my $name = substr( $poster, 0, 63 );
Line 513... Line 513...
513
  {
513
  {
514
    $latest = $mtime;
514
    $latest = $mtime;
515
  }
515
  }
516
516
517
  # now read the file
517
  # now read the file
518
  open( FILE, $filename ) or die "Can't open $filename: $!\n";
518
  open(my $FILE, $filename) or die "Can't open $filename: $!\n";
519
  while (<FILE>)
519
  while (<$FILE>)
520
  {
520
  {
521
    $totheader += length($_);    # bump total header size
521
    $totheader += length($_);    # bump total header size
522
    last if (/^\s*$/);           # end of header?
522
    last if (/^\s*$/);           # end of header?
523
    if (/^([^:\s]*):\s+(.*)/)
523
    if (/^([^:\s]*):\s+(.*)/)
524
    {
524
    {
525
      my ( $key, $val ) = ( $1, $2 );
525
      my ( $key, $val ) = ( $1, $2 );
526
      $headers{$key} = decode( 'MIME-Header', $val );
526
      $headers{$key} = decode( 'MIME-Header', $val );
527
      $lcheader{ clean( lc($key) ) } = clean($val);
527
      $lcheader{ clean( lc($key) ) } = clean($val);
528
    }
528
    }
529
  }
529
  }
530
  @body = <FILE>;                # slurp up body
530
  @body = <$FILE>;                # slurp up body
531
  close(FILE);
531
  close($FILE);
532
}    # getarticle
532
}    # getarticle
533
533
534
###################################
534
###################################
535
# get data from the current article
535
# get data from the current article
536
###################################
536
###################################
Line 694... Line 694...
694
  }
694
  }
695
695
696
  # Get the time zone
696
  # Get the time zone
697
  $_ = $headers{"Date"};
697
  $_ = $headers{"Date"};
698
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
698
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
699
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
699
  if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
700
  {
700
  {
701
    $tz = "UTC";
701
    $tz = "UTC";
702
  }
702
  }
703
  $tz{$tz}++;
703
  $tz{$tz}++;
704
704
Line 739... Line 739...
739
# different versions into one per agent.
739
# different versions into one per agent.
740
########################################
740
########################################
741
sub countagents
741
sub countagents
742
{
742
{
743
POSTER:
743
POSTER:
744
  foreach $poster ( keys %data )
744
  foreach my $poster ( keys %data )
745
  {
745
  {
746
    foreach my $agent_name ( keys %distinct_agent )
746
    foreach my $agent_name ( keys %distinct_agent )
747
    {    # check against known ones
747
    {    # check against known ones
748
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
748
      if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ )
749
      {
749
      {
Line 758... Line 758...
758
############################################
758
############################################
759
# set orig/total percentages for all posters
759
# set orig/total percentages for all posters
760
############################################
760
############################################
761
sub fixpercent
761
sub fixpercent
762
{
762
{
763
  foreach $poster ( keys %data )
763
  foreach my $poster ( keys %data )
764
  {
764
  {
765
    my $percent = 100;
765
    my $percent = 100;
766
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
766
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
767
    {
767
    {
768
      $percent = $data{$poster}{orig} * 100 /
768
      $percent = $data{$poster}{orig} * 100 /
Line 836... Line 836...
836
###################################
836
###################################
837
# Write data structures to a file #
837
# Write data structures to a file #
838
###################################
838
###################################
839
sub writedata
839
sub writedata
840
{
840
{
841
  open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
841
  open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n";
842
  print OUTF "Data collected from alt.os.linux.mandrake\n\n";
842
  print $OUTF "Data collected from alt.os.linux.mandrake\n\n";
843
  print OUTF
843
  print $OUTF
844
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
844
    "Poster Data\nname : agent : count : size: orig : quoted : per cent\n";
845
  foreach my $name ( keys %data )
845
  foreach my $name ( keys %data )
846
  {
846
  {
847
    print OUTF
847
    print $OUTF
848
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
848
"$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n";
849
  }
849
  }
850
  print OUTF
850
  print $OUTF
851
"============================================================================\n";
851
"============================================================================\n";
852
  print OUTF "Thread subjects\n";
852
  print $OUTF "Thread subjects\n";
853
  print OUTF
853
  print $OUTF
854
"----------------------------------------------------------------------------\n";
854
"----------------------------------------------------------------------------\n";
855
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
855
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
856
  {
856
  {
857
    print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
857
    print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
858
  }
858
  }
859
  print OUTF
859
  print $OUTF
860
"============================================================================\n";
860
"============================================================================\n";
861
  print OUTF "Cross-posts\n";
861
  print $OUTF "Cross-posts\n";
862
  print OUTF
862
  print $OUTF
863
"----------------------------------------------------------------------------\n";
863
"----------------------------------------------------------------------------\n";
864
  foreach my $name ( sort keys %crossposts )
864
  foreach my $name ( sort keys %crossposts )
865
  {
865
  {
866
    print OUTF "$name : $crossposts{$name}\n";
866
    print $OUTF "$name : $crossposts{$name}\n";
867
  }
867
  }
868
  print OUTF print OUTF
868
  print $OUTF print $OUTF
869
"============================================================================\n";
869
"============================================================================\n";
870
  print OUTF "User agents\n";
870
  print $OUTF "User agents\n";
871
  print OUTF
871
  print $OUTF
872
"----------------------------------------------------------------------------\n";
872
"----------------------------------------------------------------------------\n";
873
  foreach my $name ( sort keys %agents )
873
  foreach my $name ( sort keys %agents )
874
  {
874
  {
875
    print OUTF "$name : $agents{$name}\n";
875
    print $OUTF "$name : $agents{$name}\n";
876
  }
876
  }
877
  print OUTF
877
  print $OUTF
878
"============================================================================\n";
878
"============================================================================\n";
879
  print OUTF "Time zones\n";
879
  print $OUTF "Time zones\n";
880
  print OUTF
880
  print $OUTF
881
"----------------------------------------------------------------------------\n";
881
"----------------------------------------------------------------------------\n";
882
  foreach my $name ( sort keys %tz )
882
  foreach my $name ( sort keys %tz )
883
  {
883
  {
884
    print OUTF "$name : $tz{$name}\n";
884
    print $OUTF "$name : $tz{$name}\n";
885
  }
885
  }
886
  close OUTF;
886
  close $OUTF;
887
}    # writedata
887
}    # writedata