Rev 8 | Rev 10 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8 | Rev 9 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
4 | use utf8; |
4 | use utf8; |
- | 5 | use encoding 'utf-8'; |
|
5 | use Encode; |
6 | use Encode; |
6 | 7 | ||
7 | ###########################
|
8 | ###########################
|
8 | # newsstat.pl version 0.4.1
|
9 | # newsstat.pl version 0.4.1
|
9 | 10 | ||
Line 18... | Line 19... | ||
18 | # (Numbers and paths can be configured below. -- PE)
|
19 | # (Numbers and paths can be configured below. -- PE)
|
19 | ############################################################################
|
20 | ############################################################################
|
20 | 21 | ||
21 | ############################################################################
|
22 | ############################################################################
|
22 | # RECENT CHANGES #
|
23 | # RECENT CHANGES #
|
- | 24 | # 2011-10-03 PE - Use more compatible shebang
|
|
- | 25 | # - Fixed some Perl::Critic-ized code
|
|
- | 26 | # - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
|
|
- | 27 | # - Formatted source code
|
|
23 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
28 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
24 | # - Use warnings, utf8 (just in case)
|
29 | # - Use warnings, utf8 (just in case)
|
25 | # - Documentation update
|
30 | # - Documentation update
|
26 | # N/A NN - Take newsgroup name as argument
|
31 | # N/A NN - Take newsgroup name as argument
|
27 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
32 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
Line 237... | Line 242... | ||
237 | $count = $topposters; |
242 | $count = $topposters; |
238 | }
|
243 | }
|
239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
244 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
240 | print "=" x 76, "\n"; |
245 | print "=" x 76, "\n"; |
241 | $i = 0; |
246 | $i = 0; |
- | 247 | foreach
|
|
242 | foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
248 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
243 | {
|
249 | {
|
244 | my $name = substr( $poster, 0, 65 ); |
250 | my $name = substr( $poster, 0, 65 ); |
245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
251 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
246 | $data{$poster}{count}; |
252 | $data{$poster}{count}; |
247 | last if ( ++$i == $count ); |
253 | last if ( ++$i == $count ); |
Line 290... | Line 296... | ||
290 | }
|
296 | }
|
291 | printf "%s\n", |
297 | printf "%s\n", |
292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
298 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
293 | print "=" x 76, "\n"; |
299 | print "=" x 76, "\n"; |
294 | $i = 0; |
300 | $i = 0; |
- | 301 | foreach my $poster ( |
|
295 | foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
302 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
296 | keys %data ) |
303 | keys %data |
- | 304 | )
|
|
297 | {
|
305 | {
|
298 | next if $data{$poster}{quoted} == 0; |
306 | next if $data{$poster}{quoted} == 0; |
299 | next if $data{$poster}{count} < 5; |
307 | next if $data{$poster}{count} < 5; |
300 | my $name = substr( $poster, 0, 63 ); |
308 | my $name = substr( $poster, 0, 63 ); |
301 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
309 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
Line 320... | Line 328... | ||
320 | }
|
328 | }
|
321 | printf "%s\n", |
329 | printf "%s\n", |
322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
330 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
323 | print "=" x 76, "\n"; |
331 | print "=" x 76, "\n"; |
324 | $i = 0; |
332 | $i = 0; |
- | 333 | foreach my $poster ( |
|
325 | foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
334 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
326 | keys %data ) |
335 | keys %data |
- | 336 | )
|
|
327 | {
|
337 | {
|
328 | next if $data{$poster}{quoted} == 0; |
338 | next if $data{$poster}{quoted} == 0; |
329 | next if $data{$poster}{count} < 5; |
339 | next if $data{$poster}{count} < 5; |
330 | my $name = substr( $poster, 0, 63 ); |
340 | my $name = substr( $poster, 0, 63 ); |
331 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
341 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
Line 349... | Line 359... | ||
349 | $count = $topthreads; |
359 | $count = $topthreads; |
350 | }
|
360 | }
|
351 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
361 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
352 | print "=" x 76, "\n"; |
362 | print "=" x 76, "\n"; |
353 | $i = 0; |
363 | $i = 0; |
- | 364 | foreach my $thread ( |
|
354 | foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
365 | sort { $threads{$b}{count} <=> $threads{$a}{count} } |
355 | keys %threads ) |
366 | keys %threads |
- | 367 | )
|
|
356 | {
|
368 | {
|
357 | my $name = substr( $thread, 0, 65 ); |
369 | my $name = substr( $thread, 0, 65 ); |
358 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
370 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
359 | $threads{$thread}{count}; |
371 | $threads{$thread}{count}; |
360 | last if ( ++$i == $count ); |
372 | last if ( ++$i == $count ); |
Line 375... | Line 387... | ||
375 | $count = $topthreads; |
387 | $count = $topthreads; |
376 | }
|
388 | }
|
377 | printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
389 | printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
378 | print "=" x 76, "\n"; |
390 | print "=" x 76, "\n"; |
379 | $i = 0; |
391 | $i = 0; |
- | 392 | foreach my $thread ( |
|
380 | foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
393 | sort { $threads{$b}{size} <=> $threads{$a}{size} } |
381 | keys %threads ) |
394 | keys %threads |
- | 395 | )
|
|
382 | {
|
396 | {
|
383 | my $name = substr( $thread, 0, 65 ); |
397 | my $name = substr( $thread, 0, 65 ); |
384 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
398 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
385 | $threads{$thread}{size} / 1024; #/ |
399 | $threads{$thread}{size} / 1024; #/ |
386 | last if ( ++$i == $count ); |
400 | last if ( ++$i == $count ); |
Line 453... | Line 467... | ||
453 | $count = $topagents; |
467 | $count = $topagents; |
454 | }
|
468 | }
|
455 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
469 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
456 | print "=" x 76, "\n"; |
470 | print "=" x 76, "\n"; |
457 | $i = 0; |
471 | $i = 0; |
- | 472 | foreach my $agent ( |
|
458 | foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
473 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
459 | keys %distinct_agent ) |
474 | keys %distinct_agent |
- | 475 | )
|
|
460 | {
|
476 | {
|
461 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
477 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
462 | $distinct_agent{$agent}, |
478 | $distinct_agent{$agent}, |
463 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
479 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
464 | last if ( ++$i == $count ); |
480 | last if ( ++$i == $count ); |
Line 513... | Line 529... | ||
513 | {
|
529 | {
|
514 | $latest = $mtime; |
530 | $latest = $mtime; |
515 | }
|
531 | }
|
516 | 532 | ||
517 | # now read the file
|
533 | # now read the file
|
518 | open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
534 | open( my $FILE, $filename ) or die "Can't open $filename: $!\n"; |
519 | while (<$FILE>) |
535 | while (<$FILE>) |
520 | {
|
536 | {
|
521 | $totheader += length($_); # bump total header size |
537 | $totheader += length($_); # bump total header size |
522 | last if (/^\s*$/); # end of header? |
538 | last if (/^\s*$/); # end of header? |
523 | if (/^([^:\s]*):\s+(.*)/) |
539 | if (/^([^:\s]*):\s+(.*)/) |
Line 525... | Line 541... | ||
525 | my ( $key, $val ) = ( $1, $2 ); |
541 | my ( $key, $val ) = ( $1, $2 ); |
526 | $headers{$key} = decode( 'MIME-Header', $val ); |
542 | $headers{$key} = decode( 'MIME-Header', $val ); |
527 | $lcheader{ clean( lc($key) ) } = clean($val); |
543 | $lcheader{ clean( lc($key) ) } = clean($val); |
528 | }
|
544 | }
|
529 | }
|
545 | }
|
530 | @body = <$FILE>; # slurp up body |
546 | @body = <$FILE>; # slurp up body |
531 | close($FILE); |
547 | close($FILE); |
532 | } # getarticle |
548 | } # getarticle |
533 | 549 | ||
534 | ###################################
|
550 | ###################################
|
535 | # get data from the current article
|
551 | # get data from the current article
|
Line 537... | Line 553... | ||
537 | sub getdata
|
553 | sub getdata
|
538 | {
|
554 | {
|
539 | #### First, analyse header fields ####
|
555 | #### First, analyse header fields ####
|
540 | 556 | ||
541 | # Set up this poster if not defined, get counts, sizes
|
557 | # Set up this poster if not defined, get counts, sizes
|
542 | $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
558 | my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
543 | if ( !defined( $data{$poster} ) ) |
559 | if ( !defined( $data{$poster} ) ) |
544 | { # seen this one before? |
560 | { # seen this one before? |
545 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
561 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
546 | $data{$poster}{orig} = 0; |
562 | $data{$poster}{orig} = 0; |
547 | $data{$poster}{quoted} = 0; |
563 | $data{$poster}{quoted} = 0; |
548 | }
|
564 | }
|
549 | $data{$poster}{count}++; # bump count for this poster |
565 | $data{$poster}{count}++; # bump count for this poster |
550 | $data{$poster}{size} += $filesize; # total size of file |
566 | $data{$poster}{size} += $filesize; # total size of file |
551 | 567 | ||
552 | # The User-Agent and/or X-Newsreader fields
|
568 | # The User-Agent and/or X-Newsreader fields
|
553 | # for User-Agent by poster
|
569 | # for User-Agent by poster
|
554 | if ( defined $lcheader{"user-agent"} ) |
570 | if ( defined $lcheader{"user-agent"} ) |
555 | {
|
571 | {
|
Line 694... | Line 710... | ||
694 | }
|
710 | }
|
695 | 711 | ||
696 | # Get the time zone
|
712 | # Get the time zone
|
697 | $_ = $headers{"Date"}; |
713 | $_ = $headers{"Date"}; |
698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
714 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
699 | if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
715 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
700 | {
|
716 | {
|
701 | $tz = "UTC"; |
717 | $tz = "UTC"; |
702 | }
|
718 | }
|
703 | $tz{$tz}++; |
719 | $tz{$tz}++; |
704 | 720 | ||
Line 715... | Line 731... | ||
715 | # Bill Unruh uses ] quotes, and another poster uses ::
|
731 | # Bill Unruh uses ] quotes, and another poster uses ::
|
716 | }
|
732 | }
|
717 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
733 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
718 | { # are we in a quote line? |
734 | { # are we in a quote line? |
719 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
735 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
720 | $totquoted += length($_); |
736 | $totquoted += length($_); |
721 | }
|
737 | }
|
722 | elsif (/-- /) |
738 | elsif (/-- /) |
723 | {
|
739 | {
|
724 | $insig = 1; |
740 | $insig = 1; |
725 | }
|
741 | }
|
726 | else
|
742 | else
|
727 | {
|
743 | {
|
728 | 744 | ||
729 | # we must be processing an original line
|
745 | # we must be processing an original line
|
730 | $data{$poster}{orig} += length($_); # bump count of original chrs |
746 | $data{$poster}{orig} += length($_); # bump count of original chrs |
731 | $totorig += length($_); |
747 | $totorig += length($_); |
732 | }
|
748 | }
|
733 | } # end for (@body) |
749 | } # end for (@body) |
734 | 750 | ||
735 | } # getdata |
751 | } # getdata |
736 | 752 | ||
Line 763... | Line 779... | ||
763 | foreach my $poster ( keys %data ) |
779 | foreach my $poster ( keys %data ) |
764 | {
|
780 | {
|
765 | my $percent = 100; |
781 | my $percent = 100; |
766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
782 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
767 | {
|
783 | {
|
- | 784 | $percent = |
|
768 | $percent = $data{$poster}{orig} * 100 / |
785 | $data{$poster}{orig} * 100 / |
769 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
786 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
770 | }
|
787 | }
|
771 | elsif ( $data{$poster}{orig} == 0 ) |
788 | elsif ( $data{$poster}{orig} == 0 ) |
772 | {
|
789 | {
|
773 | $percent = 0; |
790 | $percent = 0; |
Line 852... | Line 869... | ||
852 | print $OUTF "Thread subjects\n"; |
869 | print $OUTF "Thread subjects\n"; |
853 | print $OUTF |
870 | print $OUTF |
854 | "----------------------------------------------------------------------------\n"; |
871 | "----------------------------------------------------------------------------\n"; |
855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
872 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
856 | {
|
873 | {
|
- | 874 | print $OUTF |
|
857 | print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
875 | "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
858 | }
|
876 | }
|
859 | print $OUTF |
877 | print $OUTF |
860 | "============================================================================\n"; |
878 | "============================================================================\n"; |
861 | print $OUTF "Cross-posts\n"; |
879 | print $OUTF "Cross-posts\n"; |
862 | print $OUTF |
880 | print $OUTF |