Rev 7 | Rev 9 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7 | Rev 8 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | #!/usr/bin/perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
4 | use utf8; |
4 | use utf8; |
5 | use Encode; |
5 | use Encode; |
6 | 6 | ||
7 | #########################
|
7 | ###########################
|
8 | # newsstat.pl version 0.4
|
8 | # newsstat.pl version 0.4.1
|
9 | 9 | ||
10 | ############################################################################
|
10 | ############################################################################
|
11 | # Collect statistics about a newsgroup (specified by first argument) in
|
11 | # Collect statistics about a newsgroup (specified by first argument) in
|
12 | # the local news spool. Check all articles in the last 30-day period.
|
12 | # the local news spool. Check all articles in the last 30-day period.
|
13 | # Rank posters by number of posts and by volume of posts, report on top and
|
13 | # Rank posters by number of posts and by volume of posts, report on top and
|
Line 237... | Line 237... | ||
237 | $count = $topposters; |
237 | $count = $topposters; |
238 | }
|
238 | }
|
239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
240 | print "=" x 76, "\n"; |
240 | print "=" x 76, "\n"; |
241 | $i = 0; |
241 | $i = 0; |
242 | foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
242 | foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
243 | {
|
243 | {
|
244 | my $name = substr( $poster, 0, 65 ); |
244 | my $name = substr( $poster, 0, 65 ); |
245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
246 | $data{$poster}{count}; |
246 | $data{$poster}{count}; |
247 | last if ( ++$i == $count ); |
247 | last if ( ++$i == $count ); |
Line 263... | Line 263... | ||
263 | $count = $topposters; |
263 | $count = $topposters; |
264 | }
|
264 | }
|
265 | printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
265 | printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
266 | print "=" x 76, "\n"; |
266 | print "=" x 76, "\n"; |
267 | $i = 0; |
267 | $i = 0; |
268 | foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
268 | foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
269 | {
|
269 | {
|
270 | my $name = substr( $poster, 0, 62 ); |
270 | my $name = substr( $poster, 0, 62 ); |
271 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
271 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
272 | $data{$poster}{size} / 1024; #/ |
272 | $data{$poster}{size} / 1024; #/ |
273 | last if ( ++$i == $count ); |
273 | last if ( ++$i == $count ); |
Line 290... | Line 290... | ||
290 | }
|
290 | }
|
291 | printf "%s\n", |
291 | printf "%s\n", |
292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
293 | print "=" x 76, "\n"; |
293 | print "=" x 76, "\n"; |
294 | $i = 0; |
294 | $i = 0; |
295 | foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
295 | foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
296 | keys %data ) |
296 | keys %data ) |
297 | {
|
297 | {
|
298 | next if $data{$poster}{quoted} == 0; |
298 | next if $data{$poster}{quoted} == 0; |
299 | next if $data{$poster}{count} < 5; |
299 | next if $data{$poster}{count} < 5; |
300 | my $name = substr( $poster, 0, 63 ); |
300 | my $name = substr( $poster, 0, 63 ); |
Line 320... | Line 320... | ||
320 | }
|
320 | }
|
321 | printf "%s\n", |
321 | printf "%s\n", |
322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
323 | print "=" x 76, "\n"; |
323 | print "=" x 76, "\n"; |
324 | $i = 0; |
324 | $i = 0; |
325 | foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
325 | foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
326 | keys %data ) |
326 | keys %data ) |
327 | {
|
327 | {
|
328 | next if $data{$poster}{quoted} == 0; |
328 | next if $data{$poster}{quoted} == 0; |
329 | next if $data{$poster}{count} < 5; |
329 | next if $data{$poster}{count} < 5; |
330 | my $name = substr( $poster, 0, 63 ); |
330 | my $name = substr( $poster, 0, 63 ); |
Line 513... | Line 513... | ||
513 | {
|
513 | {
|
514 | $latest = $mtime; |
514 | $latest = $mtime; |
515 | }
|
515 | }
|
516 | 516 | ||
517 | # now read the file
|
517 | # now read the file
|
518 | open( FILE, $filename ) or die "Can't open $filename: $!\n"; |
518 | open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
519 | while (<FILE>) |
519 | while (<$FILE>) |
520 | {
|
520 | {
|
521 | $totheader += length($_); # bump total header size |
521 | $totheader += length($_); # bump total header size |
522 | last if (/^\s*$/); # end of header? |
522 | last if (/^\s*$/); # end of header? |
523 | if (/^([^:\s]*):\s+(.*)/) |
523 | if (/^([^:\s]*):\s+(.*)/) |
524 | {
|
524 | {
|
525 | my ( $key, $val ) = ( $1, $2 ); |
525 | my ( $key, $val ) = ( $1, $2 ); |
526 | $headers{$key} = decode( 'MIME-Header', $val ); |
526 | $headers{$key} = decode( 'MIME-Header', $val ); |
527 | $lcheader{ clean( lc($key) ) } = clean($val); |
527 | $lcheader{ clean( lc($key) ) } = clean($val); |
528 | }
|
528 | }
|
529 | }
|
529 | }
|
530 | @body = <FILE>; # slurp up body |
530 | @body = <$FILE>; # slurp up body |
531 | close(FILE); |
531 | close($FILE); |
532 | } # getarticle |
532 | } # getarticle |
533 | 533 | ||
534 | ###################################
|
534 | ###################################
|
535 | # get data from the current article
|
535 | # get data from the current article
|
536 | ###################################
|
536 | ###################################
|
Line 694... | Line 694... | ||
694 | }
|
694 | }
|
695 | 695 | ||
696 | # Get the time zone
|
696 | # Get the time zone
|
697 | $_ = $headers{"Date"}; |
697 | $_ = $headers{"Date"}; |
698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
699 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
699 | if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
700 | {
|
700 | {
|
701 | $tz = "UTC"; |
701 | $tz = "UTC"; |
702 | }
|
702 | }
|
703 | $tz{$tz}++; |
703 | $tz{$tz}++; |
704 | 704 | ||
Line 739... | Line 739... | ||
739 | # different versions into one per agent.
|
739 | # different versions into one per agent.
|
740 | ########################################
|
740 | ########################################
|
741 | sub countagents
|
741 | sub countagents
|
742 | {
|
742 | {
|
743 | POSTER:
|
743 | POSTER:
|
744 | foreach $poster ( keys %data ) |
744 | foreach my $poster ( keys %data ) |
745 | {
|
745 | {
|
746 | foreach my $agent_name ( keys %distinct_agent ) |
746 | foreach my $agent_name ( keys %distinct_agent ) |
747 | { # check against known ones |
747 | { # check against known ones |
748 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
748 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
749 | {
|
749 | {
|
Line 758... | Line 758... | ||
758 | ############################################
|
758 | ############################################
|
759 | # set orig/total percentages for all posters
|
759 | # set orig/total percentages for all posters
|
760 | ############################################
|
760 | ############################################
|
761 | sub fixpercent
|
761 | sub fixpercent
|
762 | {
|
762 | {
|
763 | foreach $poster ( keys %data ) |
763 | foreach my $poster ( keys %data ) |
764 | {
|
764 | {
|
765 | my $percent = 100; |
765 | my $percent = 100; |
766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
767 | {
|
767 | {
|
768 | $percent = $data{$poster}{orig} * 100 / |
768 | $percent = $data{$poster}{orig} * 100 / |
Line 836... | Line 836... | ||
836 | ###################################
|
836 | ###################################
|
837 | # Write data structures to a file #
|
837 | # Write data structures to a file #
|
838 | ###################################
|
838 | ###################################
|
839 | sub writedata
|
839 | sub writedata
|
840 | {
|
840 | {
|
841 | open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
841 | open my $OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
842 | print OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
842 | print $OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
843 | print OUTF |
843 | print $OUTF |
844 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
844 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
845 | foreach my $name ( keys %data ) |
845 | foreach my $name ( keys %data ) |
846 | {
|
846 | {
|
847 | print OUTF |
847 | print $OUTF |
848 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
848 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
849 | }
|
849 | }
|
850 | print OUTF |
850 | print $OUTF |
851 | "============================================================================\n"; |
851 | "============================================================================\n"; |
852 | print OUTF "Thread subjects\n"; |
852 | print $OUTF "Thread subjects\n"; |
853 | print OUTF |
853 | print $OUTF |
854 | "----------------------------------------------------------------------------\n"; |
854 | "----------------------------------------------------------------------------\n"; |
855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
856 | {
|
856 | {
|
857 | print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
857 | print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
858 | }
|
858 | }
|
859 | print OUTF |
859 | print $OUTF |
860 | "============================================================================\n"; |
860 | "============================================================================\n"; |
861 | print OUTF "Cross-posts\n"; |
861 | print $OUTF "Cross-posts\n"; |
862 | print OUTF |
862 | print $OUTF |
863 | "----------------------------------------------------------------------------\n"; |
863 | "----------------------------------------------------------------------------\n"; |
864 | foreach my $name ( sort keys %crossposts ) |
864 | foreach my $name ( sort keys %crossposts ) |
865 | {
|
865 | {
|
866 | print OUTF "$name : $crossposts{$name}\n"; |
866 | print $OUTF "$name : $crossposts{$name}\n"; |
867 | }
|
867 | }
|
868 | print OUTF print OUTF |
868 | print $OUTF print $OUTF |
869 | "============================================================================\n"; |
869 | "============================================================================\n"; |
870 | print OUTF "User agents\n"; |
870 | print $OUTF "User agents\n"; |
871 | print OUTF |
871 | print $OUTF |
872 | "----------------------------------------------------------------------------\n"; |
872 | "----------------------------------------------------------------------------\n"; |
873 | foreach my $name ( sort keys %agents ) |
873 | foreach my $name ( sort keys %agents ) |
874 | {
|
874 | {
|
875 | print OUTF "$name : $agents{$name}\n"; |
875 | print $OUTF "$name : $agents{$name}\n"; |
876 | }
|
876 | }
|
877 | print OUTF |
877 | print $OUTF |
878 | "============================================================================\n"; |
878 | "============================================================================\n"; |
879 | print OUTF "Time zones\n"; |
879 | print $OUTF "Time zones\n"; |
880 | print OUTF |
880 | print $OUTF |
881 | "----------------------------------------------------------------------------\n"; |
881 | "----------------------------------------------------------------------------\n"; |
882 | foreach my $name ( sort keys %tz ) |
882 | foreach my $name ( sort keys %tz ) |
883 | {
|
883 | {
|
884 | print OUTF "$name : $tz{$name}\n"; |
884 | print $OUTF "$name : $tz{$name}\n"; |
885 | }
|
885 | }
|
886 | close OUTF; |
886 | close $OUTF; |
887 | } # writedata |
887 | } # writedata |