Rev 7 | Rev 9 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 7 | Rev 8 | ||
|---|---|---|---|
| Line 1... | Line 1... | ||
| 1 | #!/usr/bin/perl
|
1 | #!/usr/bin/env perl
|
| 2 | use strict; |
2 | use strict; |
| 3 | use warnings; |
3 | use warnings; |
| 4 | use utf8; |
4 | use utf8; |
| 5 | use Encode; |
5 | use Encode; |
| 6 | 6 | ||
| 7 | #########################
|
7 | ###########################
|
| 8 | # newsstat.pl version 0.4
|
8 | # newsstat.pl version 0.4.1
|
| 9 | 9 | ||
| 10 | ############################################################################
|
10 | ############################################################################
|
| 11 | # Collect statistics about a newsgroup (specified by first argument) in
|
11 | # Collect statistics about a newsgroup (specified by first argument) in
|
| 12 | # the local news spool. Check all articles in the last 30-day period.
|
12 | # the local news spool. Check all articles in the last 30-day period.
|
| 13 | # Rank posters by number of posts and by volume of posts, report on top and
|
13 | # Rank posters by number of posts and by volume of posts, report on top and
|
| Line 237... | Line 237... | ||
| 237 | $count = $topposters; |
237 | $count = $topposters; |
| 238 | }
|
238 | }
|
| 239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
| 240 | print "=" x 76, "\n"; |
240 | print "=" x 76, "\n"; |
| 241 | $i = 0; |
241 | $i = 0; |
| 242 | foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
242 | foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| 243 | {
|
243 | {
|
| 244 | my $name = substr( $poster, 0, 65 ); |
244 | my $name = substr( $poster, 0, 65 ); |
| 245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 246 | $data{$poster}{count}; |
246 | $data{$poster}{count}; |
| 247 | last if ( ++$i == $count ); |
247 | last if ( ++$i == $count ); |
| Line 263... | Line 263... | ||
| 263 | $count = $topposters; |
263 | $count = $topposters; |
| 264 | }
|
264 | }
|
| 265 | printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
265 | printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
| 266 | print "=" x 76, "\n"; |
266 | print "=" x 76, "\n"; |
| 267 | $i = 0; |
267 | $i = 0; |
| 268 | foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
268 | foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
| 269 | {
|
269 | {
|
| 270 | my $name = substr( $poster, 0, 62 ); |
270 | my $name = substr( $poster, 0, 62 ); |
| 271 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
271 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 272 | $data{$poster}{size} / 1024; #/ |
272 | $data{$poster}{size} / 1024; #/ |
| 273 | last if ( ++$i == $count ); |
273 | last if ( ++$i == $count ); |
| Line 290... | Line 290... | ||
| 290 | }
|
290 | }
|
| 291 | printf "%s\n", |
291 | printf "%s\n", |
| 292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
| 293 | print "=" x 76, "\n"; |
293 | print "=" x 76, "\n"; |
| 294 | $i = 0; |
294 | $i = 0; |
| 295 | foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
295 | foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| 296 | keys %data ) |
296 | keys %data ) |
| 297 | {
|
297 | {
|
| 298 | next if $data{$poster}{quoted} == 0; |
298 | next if $data{$poster}{quoted} == 0; |
| 299 | next if $data{$poster}{count} < 5; |
299 | next if $data{$poster}{count} < 5; |
| 300 | my $name = substr( $poster, 0, 63 ); |
300 | my $name = substr( $poster, 0, 63 ); |
| Line 320... | Line 320... | ||
| 320 | }
|
320 | }
|
| 321 | printf "%s\n", |
321 | printf "%s\n", |
| 322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
| 323 | print "=" x 76, "\n"; |
323 | print "=" x 76, "\n"; |
| 324 | $i = 0; |
324 | $i = 0; |
| 325 | foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
325 | foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| 326 | keys %data ) |
326 | keys %data ) |
| 327 | {
|
327 | {
|
| 328 | next if $data{$poster}{quoted} == 0; |
328 | next if $data{$poster}{quoted} == 0; |
| 329 | next if $data{$poster}{count} < 5; |
329 | next if $data{$poster}{count} < 5; |
| 330 | my $name = substr( $poster, 0, 63 ); |
330 | my $name = substr( $poster, 0, 63 ); |
| Line 513... | Line 513... | ||
| 513 | {
|
513 | {
|
| 514 | $latest = $mtime; |
514 | $latest = $mtime; |
| 515 | }
|
515 | }
|
| 516 | 516 | ||
| 517 | # now read the file
|
517 | # now read the file
|
| 518 | open( FILE, $filename ) or die "Can't open $filename: $!\n"; |
518 | open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
| 519 | while (<FILE>) |
519 | while (<$FILE>) |
| 520 | {
|
520 | {
|
| 521 | $totheader += length($_); # bump total header size |
521 | $totheader += length($_); # bump total header size |
| 522 | last if (/^\s*$/); # end of header? |
522 | last if (/^\s*$/); # end of header? |
| 523 | if (/^([^:\s]*):\s+(.*)/) |
523 | if (/^([^:\s]*):\s+(.*)/) |
| 524 | {
|
524 | {
|
| 525 | my ( $key, $val ) = ( $1, $2 ); |
525 | my ( $key, $val ) = ( $1, $2 ); |
| 526 | $headers{$key} = decode( 'MIME-Header', $val ); |
526 | $headers{$key} = decode( 'MIME-Header', $val ); |
| 527 | $lcheader{ clean( lc($key) ) } = clean($val); |
527 | $lcheader{ clean( lc($key) ) } = clean($val); |
| 528 | }
|
528 | }
|
| 529 | }
|
529 | }
|
| 530 | @body = <FILE>; # slurp up body |
530 | @body = <$FILE>; # slurp up body |
| 531 | close(FILE); |
531 | close($FILE); |
| 532 | } # getarticle |
532 | } # getarticle |
| 533 | 533 | ||
| 534 | ###################################
|
534 | ###################################
|
| 535 | # get data from the current article
|
535 | # get data from the current article
|
| 536 | ###################################
|
536 | ###################################
|
| Line 694... | Line 694... | ||
| 694 | }
|
694 | }
|
| 695 | 695 | ||
| 696 | # Get the time zone
|
696 | # Get the time zone
|
| 697 | $_ = $headers{"Date"}; |
697 | $_ = $headers{"Date"}; |
| 698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
| 699 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
699 | if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| 700 | {
|
700 | {
|
| 701 | $tz = "UTC"; |
701 | $tz = "UTC"; |
| 702 | }
|
702 | }
|
| 703 | $tz{$tz}++; |
703 | $tz{$tz}++; |
| 704 | 704 | ||
| Line 739... | Line 739... | ||
| 739 | # different versions into one per agent.
|
739 | # different versions into one per agent.
|
| 740 | ########################################
|
740 | ########################################
|
| 741 | sub countagents
|
741 | sub countagents
|
| 742 | {
|
742 | {
|
| 743 | POSTER:
|
743 | POSTER:
|
| 744 | foreach $poster ( keys %data ) |
744 | foreach my $poster ( keys %data ) |
| 745 | {
|
745 | {
|
| 746 | foreach my $agent_name ( keys %distinct_agent ) |
746 | foreach my $agent_name ( keys %distinct_agent ) |
| 747 | { # check against known ones |
747 | { # check against known ones |
| 748 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
748 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
| 749 | {
|
749 | {
|
| Line 758... | Line 758... | ||
| 758 | ############################################
|
758 | ############################################
|
| 759 | # set orig/total percentages for all posters
|
759 | # set orig/total percentages for all posters
|
| 760 | ############################################
|
760 | ############################################
|
| 761 | sub fixpercent
|
761 | sub fixpercent
|
| 762 | {
|
762 | {
|
| 763 | foreach $poster ( keys %data ) |
763 | foreach my $poster ( keys %data ) |
| 764 | {
|
764 | {
|
| 765 | my $percent = 100; |
765 | my $percent = 100; |
| 766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
| 767 | {
|
767 | {
|
| 768 | $percent = $data{$poster}{orig} * 100 / |
768 | $percent = $data{$poster}{orig} * 100 / |
| Line 836... | Line 836... | ||
| 836 | ###################################
|
836 | ###################################
|
| 837 | # Write data structures to a file #
|
837 | # Write data structures to a file #
|
| 838 | ###################################
|
838 | ###################################
|
| 839 | sub writedata
|
839 | sub writedata
|
| 840 | {
|
840 | {
|
| 841 | open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
841 | open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
| 842 | print OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
842 | print $OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
| 843 | print OUTF |
843 | print $OUTF |
| 844 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
844 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
| 845 | foreach my $name ( keys %data ) |
845 | foreach my $name ( keys %data ) |
| 846 | {
|
846 | {
|
| 847 | print OUTF |
847 | print $OUTF |
| 848 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
848 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
| 849 | }
|
849 | }
|
| 850 | print OUTF |
850 | print $OUTF |
| 851 | "============================================================================\n"; |
851 | "============================================================================\n"; |
| 852 | print OUTF "Thread subjects\n"; |
852 | print $OUTF "Thread subjects\n"; |
| 853 | print OUTF |
853 | print $OUTF |
| 854 | "----------------------------------------------------------------------------\n"; |
854 | "----------------------------------------------------------------------------\n"; |
| 855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| 856 | {
|
856 | {
|
| 857 | print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
857 | print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| 858 | }
|
858 | }
|
| 859 | print OUTF |
859 | print $OUTF |
| 860 | "============================================================================\n"; |
860 | "============================================================================\n"; |
| 861 | print OUTF "Cross-posts\n"; |
861 | print $OUTF "Cross-posts\n"; |
| 862 | print OUTF |
862 | print $OUTF |
| 863 | "----------------------------------------------------------------------------\n"; |
863 | "----------------------------------------------------------------------------\n"; |
| 864 | foreach my $name ( sort keys %crossposts ) |
864 | foreach my $name ( sort keys %crossposts ) |
| 865 | {
|
865 | {
|
| 866 | print OUTF "$name : $crossposts{$name}\n"; |
866 | print $OUTF "$name : $crossposts{$name}\n"; |
| 867 | }
|
867 | }
|
| 868 | print OUTF print OUTF |
868 | print $OUTF print $OUTF |
| 869 | "============================================================================\n"; |
869 | "============================================================================\n"; |
| 870 | print OUTF "User agents\n"; |
870 | print $OUTF "User agents\n"; |
| 871 | print OUTF |
871 | print $OUTF |
| 872 | "----------------------------------------------------------------------------\n"; |
872 | "----------------------------------------------------------------------------\n"; |
| 873 | foreach my $name ( sort keys %agents ) |
873 | foreach my $name ( sort keys %agents ) |
| 874 | {
|
874 | {
|
| 875 | print OUTF "$name : $agents{$name}\n"; |
875 | print $OUTF "$name : $agents{$name}\n"; |
| 876 | }
|
876 | }
|
| 877 | print OUTF |
877 | print $OUTF |
| 878 | "============================================================================\n"; |
878 | "============================================================================\n"; |
| 879 | print OUTF "Time zones\n"; |
879 | print $OUTF "Time zones\n"; |
| 880 | print OUTF |
880 | print $OUTF |
| 881 | "----------------------------------------------------------------------------\n"; |
881 | "----------------------------------------------------------------------------\n"; |
| 882 | foreach my $name ( sort keys %tz ) |
882 | foreach my $name ( sort keys %tz ) |
| 883 | {
|
883 | {
|
| 884 | print OUTF "$name : $tz{$name}\n"; |
884 | print $OUTF "$name : $tz{$name}\n"; |
| 885 | }
|
885 | }
|
| 886 | close OUTF; |
886 | close $OUTF; |
| 887 | } # writedata |
887 | } # writedata |