Rev 8 | Rev 10 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 8 | Rev 9 | ||
|---|---|---|---|
| Line 1... | Line 1... | ||
| 1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
| 2 | use strict; |
2 | use strict; |
| 3 | use warnings; |
3 | use warnings; |
| 4 | use utf8; |
4 | use utf8; |
| - | 5 | use encoding 'utf-8'; |
|
| 5 | use Encode; |
6 | use Encode; |
| 6 | 7 | ||
| 7 | ###########################
|
8 | ###########################
|
| 8 | # newsstat.pl version 0.4.1
|
9 | # newsstat.pl version 0.4.1
|
| 9 | 10 | ||
| Line 18... | Line 19... | ||
| 18 | # (Numbers and paths can be configured below. -- PE)
|
19 | # (Numbers and paths can be configured below. -- PE)
|
| 19 | ############################################################################
|
20 | ############################################################################
|
| 20 | 21 | ||
| 21 | ############################################################################
|
22 | ############################################################################
|
| 22 | # RECENT CHANGES #
|
23 | # RECENT CHANGES #
|
| - | 24 | # 2011-10-03 PE - Use more compatible shebang
|
|
| - | 25 | # - Fixed some Perl::Critic-ized code
|
|
| - | 26 | # - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
|
|
| - | 27 | # - Formatted source code
|
|
| 23 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
28 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
| 24 | # - Use warnings, utf8 (just in case)
|
29 | # - Use warnings, utf8 (just in case)
|
| 25 | # - Documentation update
|
30 | # - Documentation update
|
| 26 | # N/A NN - Take newsgroup name as argument
|
31 | # N/A NN - Take newsgroup name as argument
|
| 27 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
32 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
| Line 237... | Line 242... | ||
| 237 | $count = $topposters; |
242 | $count = $topposters; |
| 238 | }
|
243 | }
|
| 239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
244 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
| 240 | print "=" x 76, "\n"; |
245 | print "=" x 76, "\n"; |
| 241 | $i = 0; |
246 | $i = 0; |
| - | 247 | foreach
|
|
| 242 | foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
248 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| 243 | {
|
249 | {
|
| 244 | my $name = substr( $poster, 0, 65 ); |
250 | my $name = substr( $poster, 0, 65 ); |
| 245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
251 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 246 | $data{$poster}{count}; |
252 | $data{$poster}{count}; |
| 247 | last if ( ++$i == $count ); |
253 | last if ( ++$i == $count ); |
| Line 290... | Line 296... | ||
| 290 | }
|
296 | }
|
| 291 | printf "%s\n", |
297 | printf "%s\n", |
| 292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
298 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
| 293 | print "=" x 76, "\n"; |
299 | print "=" x 76, "\n"; |
| 294 | $i = 0; |
300 | $i = 0; |
| - | 301 | foreach my $poster ( |
|
| 295 | foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
302 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| 296 | keys %data ) |
303 | keys %data |
| - | 304 | )
|
|
| 297 | {
|
305 | {
|
| 298 | next if $data{$poster}{quoted} == 0; |
306 | next if $data{$poster}{quoted} == 0; |
| 299 | next if $data{$poster}{count} < 5; |
307 | next if $data{$poster}{count} < 5; |
| 300 | my $name = substr( $poster, 0, 63 ); |
308 | my $name = substr( $poster, 0, 63 ); |
| 301 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
309 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| Line 320... | Line 328... | ||
| 320 | }
|
328 | }
|
| 321 | printf "%s\n", |
329 | printf "%s\n", |
| 322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
330 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
| 323 | print "=" x 76, "\n"; |
331 | print "=" x 76, "\n"; |
| 324 | $i = 0; |
332 | $i = 0; |
| - | 333 | foreach my $poster ( |
|
| 325 | foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
334 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| 326 | keys %data ) |
335 | keys %data |
| - | 336 | )
|
|
| 327 | {
|
337 | {
|
| 328 | next if $data{$poster}{quoted} == 0; |
338 | next if $data{$poster}{quoted} == 0; |
| 329 | next if $data{$poster}{count} < 5; |
339 | next if $data{$poster}{count} < 5; |
| 330 | my $name = substr( $poster, 0, 63 ); |
340 | my $name = substr( $poster, 0, 63 ); |
| 331 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
341 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| Line 349... | Line 359... | ||
| 349 | $count = $topthreads; |
359 | $count = $topthreads; |
| 350 | }
|
360 | }
|
| 351 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
361 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
| 352 | print "=" x 76, "\n"; |
362 | print "=" x 76, "\n"; |
| 353 | $i = 0; |
363 | $i = 0; |
| - | 364 | foreach my $thread ( |
|
| 354 | foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
365 | sort { $threads{$b}{count} <=> $threads{$a}{count} } |
| 355 | keys %threads ) |
366 | keys %threads |
| - | 367 | )
|
|
| 356 | {
|
368 | {
|
| 357 | my $name = substr( $thread, 0, 65 ); |
369 | my $name = substr( $thread, 0, 65 ); |
| 358 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
370 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 359 | $threads{$thread}{count}; |
371 | $threads{$thread}{count}; |
| 360 | last if ( ++$i == $count ); |
372 | last if ( ++$i == $count ); |
| Line 375... | Line 387... | ||
| 375 | $count = $topthreads; |
387 | $count = $topthreads; |
| 376 | }
|
388 | }
|
| 377 | printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
389 | printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
| 378 | print "=" x 76, "\n"; |
390 | print "=" x 76, "\n"; |
| 379 | $i = 0; |
391 | $i = 0; |
| - | 392 | foreach my $thread ( |
|
| 380 | foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
393 | sort { $threads{$b}{size} <=> $threads{$a}{size} } |
| 381 | keys %threads ) |
394 | keys %threads |
| - | 395 | )
|
|
| 382 | {
|
396 | {
|
| 383 | my $name = substr( $thread, 0, 65 ); |
397 | my $name = substr( $thread, 0, 65 ); |
| 384 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
398 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 385 | $threads{$thread}{size} / 1024; #/ |
399 | $threads{$thread}{size} / 1024; #/ |
| 386 | last if ( ++$i == $count ); |
400 | last if ( ++$i == $count ); |
| Line 453... | Line 467... | ||
| 453 | $count = $topagents; |
467 | $count = $topagents; |
| 454 | }
|
468 | }
|
| 455 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
469 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
| 456 | print "=" x 76, "\n"; |
470 | print "=" x 76, "\n"; |
| 457 | $i = 0; |
471 | $i = 0; |
| - | 472 | foreach my $agent ( |
|
| 458 | foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
473 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| 459 | keys %distinct_agent ) |
474 | keys %distinct_agent |
| - | 475 | )
|
|
| 460 | {
|
476 | {
|
| 461 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
477 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| 462 | $distinct_agent{$agent}, |
478 | $distinct_agent{$agent}, |
| 463 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
479 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
| 464 | last if ( ++$i == $count ); |
480 | last if ( ++$i == $count ); |
| Line 513... | Line 529... | ||
| 513 | {
|
529 | {
|
| 514 | $latest = $mtime; |
530 | $latest = $mtime; |
| 515 | }
|
531 | }
|
| 516 | 532 | ||
| 517 | # now read the file
|
533 | # now read the file
|
| 518 | open(my $FILE, $filename) or die "Can't open $filename: $!\n"; |
534 | open( my $FILE, $filename ) or die "Can't open $filename: $!\n"; |
| 519 | while (<$FILE>) |
535 | while (<$FILE>) |
| 520 | {
|
536 | {
|
| 521 | $totheader += length($_); # bump total header size |
537 | $totheader += length($_); # bump total header size |
| 522 | last if (/^\s*$/); # end of header? |
538 | last if (/^\s*$/); # end of header? |
| 523 | if (/^([^:\s]*):\s+(.*)/) |
539 | if (/^([^:\s]*):\s+(.*)/) |
| Line 525... | Line 541... | ||
| 525 | my ( $key, $val ) = ( $1, $2 ); |
541 | my ( $key, $val ) = ( $1, $2 ); |
| 526 | $headers{$key} = decode( 'MIME-Header', $val ); |
542 | $headers{$key} = decode( 'MIME-Header', $val ); |
| 527 | $lcheader{ clean( lc($key) ) } = clean($val); |
543 | $lcheader{ clean( lc($key) ) } = clean($val); |
| 528 | }
|
544 | }
|
| 529 | }
|
545 | }
|
| 530 | @body = <$FILE>; # slurp up body |
546 | @body = <$FILE>; # slurp up body |
| 531 | close($FILE); |
547 | close($FILE); |
| 532 | } # getarticle |
548 | } # getarticle |
| 533 | 549 | ||
| 534 | ###################################
|
550 | ###################################
|
| 535 | # get data from the current article
|
551 | # get data from the current article
|
| Line 537... | Line 553... | ||
| 537 | sub getdata
|
553 | sub getdata
|
| 538 | {
|
554 | {
|
| 539 | #### First, analyse header fields ####
|
555 | #### First, analyse header fields ####
|
| 540 | 556 | ||
| 541 | # Set up this poster if not defined, get counts, sizes
|
557 | # Set up this poster if not defined, get counts, sizes
|
| 542 | $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
558 | my $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
| 543 | if ( !defined( $data{$poster} ) ) |
559 | if ( !defined( $data{$poster} ) ) |
| 544 | { # seen this one before? |
560 | { # seen this one before? |
| 545 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
561 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| 546 | $data{$poster}{orig} = 0; |
562 | $data{$poster}{orig} = 0; |
| 547 | $data{$poster}{quoted} = 0; |
563 | $data{$poster}{quoted} = 0; |
| 548 | }
|
564 | }
|
| 549 | $data{$poster}{count}++; # bump count for this poster |
565 | $data{$poster}{count}++; # bump count for this poster |
| 550 | $data{$poster}{size} += $filesize; # total size of file |
566 | $data{$poster}{size} += $filesize; # total size of file |
| 551 | 567 | ||
| 552 | # The User-Agent and/or X-Newsreader fields
|
568 | # The User-Agent and/or X-Newsreader fields
|
| 553 | # for User-Agent by poster
|
569 | # for User-Agent by poster
|
| 554 | if ( defined $lcheader{"user-agent"} ) |
570 | if ( defined $lcheader{"user-agent"} ) |
| 555 | {
|
571 | {
|
| Line 694... | Line 710... | ||
| 694 | }
|
710 | }
|
| 695 | 711 | ||
| 696 | # Get the time zone
|
712 | # Get the time zone
|
| 697 | $_ = $headers{"Date"}; |
713 | $_ = $headers{"Date"}; |
| 698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
714 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
| 699 | if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
715 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| 700 | {
|
716 | {
|
| 701 | $tz = "UTC"; |
717 | $tz = "UTC"; |
| 702 | }
|
718 | }
|
| 703 | $tz{$tz}++; |
719 | $tz{$tz}++; |
| 704 | 720 | ||
| Line 715... | Line 731... | ||
| 715 | # Bill Unruh uses ] quotes, and another poster uses ::
|
731 | # Bill Unruh uses ] quotes, and another poster uses ::
|
| 716 | }
|
732 | }
|
| 717 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
733 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
| 718 | { # are we in a quote line? |
734 | { # are we in a quote line? |
| 719 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
735 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
| 720 | $totquoted += length($_); |
736 | $totquoted += length($_); |
| 721 | }
|
737 | }
|
| 722 | elsif (/-- /) |
738 | elsif (/-- /) |
| 723 | {
|
739 | {
|
| 724 | $insig = 1; |
740 | $insig = 1; |
| 725 | }
|
741 | }
|
| 726 | else
|
742 | else
|
| 727 | {
|
743 | {
|
| 728 | 744 | ||
| 729 | # we must be processing an original line
|
745 | # we must be processing an original line
|
| 730 | $data{$poster}{orig} += length($_); # bump count of original chrs |
746 | $data{$poster}{orig} += length($_); # bump count of original chrs |
| 731 | $totorig += length($_); |
747 | $totorig += length($_); |
| 732 | }
|
748 | }
|
| 733 | } # end for (@body) |
749 | } # end for (@body) |
| 734 | 750 | ||
| 735 | } # getdata |
751 | } # getdata |
| 736 | 752 | ||
| Line 763... | Line 779... | ||
| 763 | foreach my $poster ( keys %data ) |
779 | foreach my $poster ( keys %data ) |
| 764 | {
|
780 | {
|
| 765 | my $percent = 100; |
781 | my $percent = 100; |
| 766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
782 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
| 767 | {
|
783 | {
|
| - | 784 | $percent = |
|
| 768 | $percent = $data{$poster}{orig} * 100 / |
785 | $data{$poster}{orig} * 100 / |
| 769 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
786 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
| 770 | }
|
787 | }
|
| 771 | elsif ( $data{$poster}{orig} == 0 ) |
788 | elsif ( $data{$poster}{orig} == 0 ) |
| 772 | {
|
789 | {
|
| 773 | $percent = 0; |
790 | $percent = 0; |
| Line 852... | Line 869... | ||
| 852 | print $OUTF "Thread subjects\n"; |
869 | print $OUTF "Thread subjects\n"; |
| 853 | print $OUTF |
870 | print $OUTF |
| 854 | "----------------------------------------------------------------------------\n"; |
871 | "----------------------------------------------------------------------------\n"; |
| 855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
872 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| 856 | {
|
873 | {
|
| - | 874 | print $OUTF |
|
| 857 | print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
875 | "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| 858 | }
|
876 | }
|
| 859 | print $OUTF |
877 | print $OUTF |
| 860 | "============================================================================\n"; |
878 | "============================================================================\n"; |
| 861 | print $OUTF "Cross-posts\n"; |
879 | print $OUTF "Cross-posts\n"; |
| 862 | print $OUTF |
880 | print $OUTF |