Rev 30 | Rev 33 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 30 | Rev 31 | ||
|---|---|---|---|
| Line 13... | Line 13... | ||
| 13 | use constant DEBUG => 0; |
13 | use constant DEBUG => 0; |
| 14 | 14 | ||
| 15 | ## newsstat.pl
|
15 | ## newsstat.pl
|
| 16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
| 17 | ## Based on work by Garry Knight et al.
|
17 | ## Based on work by Garry Knight et al.
|
| 18 | ##
|
18 | ##
|
| 19 | ## This program is free software: you can redistribute it and/or modify
|
19 | ## This program is free software: you can redistribute it and/or modify
|
| 20 | ## it under the terms of the GNU General Public License as published by
|
20 | ## it under the terms of the GNU General Public License as published by
|
| 21 | ## the Free Software Foundation, either version 3 of the License, or
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
| 22 | ## (at your option) any later version.
|
22 | ## (at your option) any later version.
|
| 23 | ##
|
23 | ##
|
| 24 | ## This program is distributed in the hope that it will be useful,
|
24 | ## This program is distributed in the hope that it will be useful,
|
| 25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 27 | ## GNU General Public License for more details.
|
27 | ## GNU General Public License for more details.
|
| 28 | ##
|
28 | ##
|
| 29 | ## You should have received a copy of the GNU General Public License
|
29 | ## You should have received a copy of the GNU General Public License
|
| 30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 31 | 31 | ||
| 32 | ## Print out all text to STDOUT UTF-8 encoded
|
32 | ## Print out all text to STDOUT UTF-8 encoded
|
| 33 | binmode STDOUT, ':encoding(UTF-8)'; |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
| Line 96... | Line 96... | ||
| 96 | 96 | ||
| 97 | ###################### USER CONFIGURATIONS ############################
|
97 | ###################### USER CONFIGURATIONS ############################
|
| 98 | 98 | ||
| 99 | ## The name of the group to do stats for
|
99 | ## The name of the group to do stats for
|
| 100 | my $newsgroup_name = $ARGV[0]; |
100 | my $newsgroup_name = $ARGV[0]; |
| 101 | $newsgroup_name or usage(); |
101 | $newsgroup_name // usage(); |
| 102 | 102 | ||
| 103 | ## Check for removal flags
|
103 | ## Check for removal flags
|
| 104 | my $ix; |
104 | my $ix; |
| 105 | my $j; |
105 | my $j; |
| 106 | my %skipSec; |
106 | my %skipSec; |
| Line 247... | Line 247... | ||
| 247 | my $timestamp = $msg->timestamp(); |
247 | my $timestamp = $msg->timestamp(); |
| 248 | my $date = $msg->study('Date'); |
248 | my $date = $msg->study('Date'); |
| 249 | 249 | ||
| 250 | ## Disregard article if timestamp is not in range
|
250 | ## Disregard article if timestamp is not in range
|
| 251 | dmsg($timestamp) if DEBUG; |
251 | dmsg($timestamp) if DEBUG; |
| 252 | if ( $timestamp < $start or $timestamp >= $end ) |
252 | if ( $timestamp < $start || $timestamp >= $end ) |
| 253 | {
|
253 | {
|
| 254 | dmsg("Posting on $date ignored.") if DEBUG; |
254 | dmsg("Posting on $date ignored.") if DEBUG; |
| 255 | return; |
255 | return; |
| 256 | }
|
256 | }
|
| 257 | 257 | ||
| Line 262... | Line 262... | ||
| 262 | 262 | ||
| 263 | ## get stats about the file itself
|
263 | ## get stats about the file itself
|
| 264 | my $filesize = -s $filename; # get total size of file |
264 | my $filesize = -s $filename; # get total size of file |
| 265 | $totsize += $filesize; # bump total sizes of all files |
265 | $totsize += $filesize; # bump total sizes of all files |
| 266 | 266 | ||
| 267 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
267 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
| 268 | {
|
268 | {
|
| 269 | $earliest = $timestamp; |
269 | $earliest = $timestamp; |
| 270 | }
|
270 | }
|
| 271 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
271 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
| 272 | {
|
272 | {
|
| 273 | $latest = $timestamp; |
273 | $latest = $timestamp; |
| 274 | }
|
274 | }
|
| 275 | 275 | ||
| 276 | #print "timestamp: $timestamp\n";
|
276 | #print "timestamp: $timestamp\n";
|
| Line 304... | Line 304... | ||
| 304 | $data{$poster}{'count'}++; # bump count for this poster |
304 | $data{$poster}{'count'}++; # bump count for this poster |
| 305 | $data{$poster}{'size'} += $filesize; # total size of file |
305 | $data{$poster}{'size'} += $filesize; # total size of file |
| 306 | 306 | ||
| 307 | ## The User-Agent and/or X-Newsreader fields
|
307 | ## The User-Agent and/or X-Newsreader fields
|
| 308 | ## for User-Agent by poster
|
308 | ## for User-Agent by poster
|
| 309 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
309 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
| 310 | if ( defined $ua ) |
310 | if ( defined $ua ) |
| 311 | {
|
311 | {
|
| 312 | $data{$poster}{'agent'} = $ua; |
312 | $data{$poster}{'agent'} = $ua; |
| 313 | 313 | ||
| 314 | ## DEBUG
|
314 | ## DEBUG
|
| Line 365... | Line 365... | ||
| 365 | # bump total sig size
|
365 | # bump total sig size
|
| 366 | $totsig += length($_); |
366 | $totsig += length($_); |
| 367 | }
|
367 | }
|
| 368 | ## are we in a quote line?
|
368 | ## are we in a quote line?
|
| 369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
| 370 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
370 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
| 371 | {
|
371 | {
|
| 372 | ## bump count of quoted chrs
|
372 | ## bump count of quoted chrs
|
| 373 | $data{$poster}{'quoted'} += length($_); |
373 | $data{$poster}{'quoted'} += length($_); |
| 374 | $totquoted += length($_); |
374 | $totquoted += length($_); |
| 375 | }
|
375 | }
|
| Line 393... | Line 393... | ||
| 393 | 393 | ||
| 394 | sub get_agent
|
394 | sub get_agent
|
| 395 | {
|
395 | {
|
| 396 | my $msg = shift; |
396 | my $msg = shift; |
| 397 | 397 | ||
| 398 | my $ua = |
- | |
| 399 | $msg->study('User-Agent') |
- | |
| 400 | or $msg->study('X-Newsreader') |
398 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
| 401 | or $msg->study('X-Mailer'); |
399 | // $msg->study('X-Mailer'); |
| - | 400 | ||
| 402 | if ( not defined $ua ) |
401 | if ( not defined $ua ) |
| 403 | {
|
402 | {
|
| 404 | my $org = $msg->study('Organization'); |
403 | my $org = $msg->study('Organization'); |
| 405 | if ( defined $org |
404 | if ( defined $org |
| 406 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
405 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
| Line 670... | Line 669... | ||
| 670 | $formatter->format_number($count), |
669 | $formatter->format_number($count), |
| 671 | $formatter->format_bytes( $totsize / $count, |
670 | $formatter->format_bytes( $totsize / $count, |
| 672 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
671 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 673 | printf __"Total number of user agents: %d\n", |
672 | printf __"Total number of user agents: %d\n", |
| 674 | $formatter->format_number( scalar keys %agents ); |
673 | $formatter->format_number( scalar keys %agents ); |
| 675 | print "\n", "=" x 76, "\n" ; |
674 | print "\n", "=" x 76, "\n"; |
| 676 | ########################################
|
675 | ########################################
|
| 677 | ## Show posters by article count Sec 1;
|
676 | ## Show posters by article count Sec 1;
|
| 678 | ########################################
|
677 | ########################################
|
| 679 | unless ( $skipSec{1} ) |
678 | unless ( $skipSec{1} ) |
| 680 | {
|
679 | {
|
| Line 736... | Line 735... | ||
| 736 | }
|
735 | }
|
| 737 | 736 | ||
| 738 | #####################################
|
737 | #####################################
|
| 739 | ## Show top posters for original text
|
738 | ## Show top posters for original text
|
| 740 | #####################################
|
739 | #####################################
|
| - | 740 | my $topposters_real = 0; |
|
| - | 741 | ||
| 741 | unless ( $skipSec{3} ) |
742 | unless ( $skipSec{3} ) |
| 742 | {
|
743 | {
|
| 743 | if ( keys %data < $topposters ) |
744 | if ( keys %data < $topposters ) |
| 744 | {
|
745 | {
|
| 745 | $count = keys %data; |
746 | $count = keys %data; |
| 746 | }
|
747 | }
|
| 747 | else
|
748 | else
|
| 748 | {
|
749 | {
|
| 749 | $count = $topposters; |
750 | $count = $topposters; |
| 750 | }
|
751 | }
|
| - | 752 | ||
| 751 | printf "%s\n", |
753 | printf "%s\n", |
| 752 | centred(
|
754 | centred(
|
| 753 | __x(
|
755 | __x(
|
| 754 | "Top {count} responders by original text (> 5 posts)", |
756 | "Top {count} responders by original text (> 5 posts)", |
| 755 | count => $topposters |
757 | count => $topposters |
| 756 | ), |
758 | ), |
| 757 | 76
|
759 | 76
|
| 758 | ); |
760 | ); |
| 759 | print "=" x 76, "\n"; |
761 | print "=" x 76, "\n"; |
| 760 | my $i = 0; |
- | |
| 761 | foreach my $poster ( |
762 | foreach my $poster ( |
| 762 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
763 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| 763 | keys %data |
764 | keys %data |
| 764 | )
|
765 | )
|
| 765 | {
|
766 | {
|
| 766 | next if $data{$poster}{quoted} == 0; |
767 | next if $data{$poster}{quoted} == 0; |
| 767 | next if $data{$poster}{count} < 5; |
768 | next if $data{$poster}{count} < 5; |
| 768 | my $name = substr( $poster, 0, 63 ); |
769 | my $name = substr( $poster, 0, 63 ); |
| 769 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
770 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
| - | 771 | rpad( $poster, 63, "." ), |
|
| 770 | $data{$poster}{percent}; |
772 | $data{$poster}{percent}; |
| 771 | last if ( ++$i == $count ); |
773 | last if ( ++$topposters_real == $count ); |
| 772 | }
|
774 | }
|
| 773 | print "\n", "=" x 76, "\n"; |
775 | print "\n", "=" x 76, "\n"; |
| 774 | }
|
776 | }
|
| 775 | 777 | ||
| 776 | ########################################
|
778 | ########################################
|
| 777 | ## Show bottom posters for original text
|
779 | ## Show bottom posters for original text
|
| 778 | ########################################
|
780 | ########################################
|
| - | 781 | ||
| - | 782 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
|
| - | 783 | ||
| 779 | unless ( $skipSec{4} ) |
784 | unless ( $skipSec{4} ) |
| 780 | {
|
785 | {
|
| 781 | if ( keys %data < $topposters ) |
786 | if ( keys %data < $topposters ) |
| 782 | {
|
787 | {
|
| 783 | $count = keys %data; |
788 | $count = keys %data; |
| 784 | }
|
789 | }
|
| 785 | else
|
790 | else
|
| 786 | {
|
791 | {
|
| 787 | $count = $topposters; |
792 | $count = $topposters; |
| 788 | }
|
793 | }
|
| - | 794 | ||
| 789 | printf "%s\n", |
795 | printf "%s\n", |
| 790 | centred(
|
796 | centred(
|
| 791 | __x(
|
797 | __x(
|
| 792 | "Bottom {count} responders by original text (> 5 posts)", |
798 | "Bottom {count} responders by original text (> 5 posts)", |
| 793 | count => $topposters |
799 | count => $topposters |