Rev 30 | Rev 33 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 30 | Rev 31 | ||
---|---|---|---|
Line 13... | Line 13... | ||
13 | use constant DEBUG => 0; |
13 | use constant DEBUG => 0; |
14 | 14 | ||
15 | ## newsstat.pl
|
15 | ## newsstat.pl
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
17 | ## Based on work by Garry Knight et al.
|
17 | ## Based on work by Garry Knight et al.
|
18 | ##
|
18 | ##
|
19 | ## This program is free software: you can redistribute it and/or modify
|
19 | ## This program is free software: you can redistribute it and/or modify
|
20 | ## it under the terms of the GNU General Public License as published by
|
20 | ## it under the terms of the GNU General Public License as published by
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
22 | ## (at your option) any later version.
|
22 | ## (at your option) any later version.
|
23 | ##
|
23 | ##
|
24 | ## This program is distributed in the hope that it will be useful,
|
24 | ## This program is distributed in the hope that it will be useful,
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
27 | ## GNU General Public License for more details.
|
27 | ## GNU General Public License for more details.
|
28 | ##
|
28 | ##
|
29 | ## You should have received a copy of the GNU General Public License
|
29 | ## You should have received a copy of the GNU General Public License
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
31 | 31 | ||
32 | ## Print out all text to STDOUT UTF-8 encoded
|
32 | ## Print out all text to STDOUT UTF-8 encoded
|
33 | binmode STDOUT, ':encoding(UTF-8)'; |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
Line 96... | Line 96... | ||
96 | 96 | ||
97 | ###################### USER CONFIGURATIONS ############################
|
97 | ###################### USER CONFIGURATIONS ############################
|
98 | 98 | ||
99 | ## The name of the group to do stats for
|
99 | ## The name of the group to do stats for
|
100 | my $newsgroup_name = $ARGV[0]; |
100 | my $newsgroup_name = $ARGV[0]; |
101 | $newsgroup_name or usage(); |
101 | $newsgroup_name // usage(); |
102 | 102 | ||
103 | ## Check for removal flags
|
103 | ## Check for removal flags
|
104 | my $ix; |
104 | my $ix; |
105 | my $j; |
105 | my $j; |
106 | my %skipSec; |
106 | my %skipSec; |
Line 247... | Line 247... | ||
247 | my $timestamp = $msg->timestamp(); |
247 | my $timestamp = $msg->timestamp(); |
248 | my $date = $msg->study('Date'); |
248 | my $date = $msg->study('Date'); |
249 | 249 | ||
250 | ## Disregard article if timestamp is not in range
|
250 | ## Disregard article if timestamp is not in range
|
251 | dmsg($timestamp) if DEBUG; |
251 | dmsg($timestamp) if DEBUG; |
252 | if ( $timestamp < $start or $timestamp >= $end ) |
252 | if ( $timestamp < $start || $timestamp >= $end ) |
253 | {
|
253 | {
|
254 | dmsg("Posting on $date ignored.") if DEBUG; |
254 | dmsg("Posting on $date ignored.") if DEBUG; |
255 | return; |
255 | return; |
256 | }
|
256 | }
|
257 | 257 | ||
Line 262... | Line 262... | ||
262 | 262 | ||
263 | ## get stats about the file itself
|
263 | ## get stats about the file itself
|
264 | my $filesize = -s $filename; # get total size of file |
264 | my $filesize = -s $filename; # get total size of file |
265 | $totsize += $filesize; # bump total sizes of all files |
265 | $totsize += $filesize; # bump total sizes of all files |
266 | 266 | ||
267 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
267 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
268 | {
|
268 | {
|
269 | $earliest = $timestamp; |
269 | $earliest = $timestamp; |
270 | }
|
270 | }
|
271 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
271 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
272 | {
|
272 | {
|
273 | $latest = $timestamp; |
273 | $latest = $timestamp; |
274 | }
|
274 | }
|
275 | 275 | ||
276 | #print "timestamp: $timestamp\n";
|
276 | #print "timestamp: $timestamp\n";
|
Line 304... | Line 304... | ||
304 | $data{$poster}{'count'}++; # bump count for this poster |
304 | $data{$poster}{'count'}++; # bump count for this poster |
305 | $data{$poster}{'size'} += $filesize; # total size of file |
305 | $data{$poster}{'size'} += $filesize; # total size of file |
306 | 306 | ||
307 | ## The User-Agent and/or X-Newsreader fields
|
307 | ## The User-Agent and/or X-Newsreader fields
|
308 | ## for User-Agent by poster
|
308 | ## for User-Agent by poster
|
309 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
309 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
310 | if ( defined $ua ) |
310 | if ( defined $ua ) |
311 | {
|
311 | {
|
312 | $data{$poster}{'agent'} = $ua; |
312 | $data{$poster}{'agent'} = $ua; |
313 | 313 | ||
314 | ## DEBUG
|
314 | ## DEBUG
|
Line 365... | Line 365... | ||
365 | # bump total sig size
|
365 | # bump total sig size
|
366 | $totsig += length($_); |
366 | $totsig += length($_); |
367 | }
|
367 | }
|
368 | ## are we in a quote line?
|
368 | ## are we in a quote line?
|
369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
370 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
370 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
371 | {
|
371 | {
|
372 | ## bump count of quoted chrs
|
372 | ## bump count of quoted chrs
|
373 | $data{$poster}{'quoted'} += length($_); |
373 | $data{$poster}{'quoted'} += length($_); |
374 | $totquoted += length($_); |
374 | $totquoted += length($_); |
375 | }
|
375 | }
|
Line 393... | Line 393... | ||
393 | 393 | ||
394 | sub get_agent
|
394 | sub get_agent
|
395 | {
|
395 | {
|
396 | my $msg = shift; |
396 | my $msg = shift; |
397 | 397 | ||
398 | my $ua = |
- | |
399 | $msg->study('User-Agent') |
- | |
400 | or $msg->study('X-Newsreader') |
398 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
401 | or $msg->study('X-Mailer'); |
399 | // $msg->study('X-Mailer'); |
- | 400 | ||
402 | if ( not defined $ua ) |
401 | if ( not defined $ua ) |
403 | {
|
402 | {
|
404 | my $org = $msg->study('Organization'); |
403 | my $org = $msg->study('Organization'); |
405 | if ( defined $org |
404 | if ( defined $org |
406 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
405 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
Line 670... | Line 669... | ||
670 | $formatter->format_number($count), |
669 | $formatter->format_number($count), |
671 | $formatter->format_bytes( $totsize / $count, |
670 | $formatter->format_bytes( $totsize / $count, |
672 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
671 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
673 | printf __"Total number of user agents: %d\n", |
672 | printf __"Total number of user agents: %d\n", |
674 | $formatter->format_number( scalar keys %agents ); |
673 | $formatter->format_number( scalar keys %agents ); |
675 | print "\n", "=" x 76, "\n" ; |
674 | print "\n", "=" x 76, "\n"; |
676 | ########################################
|
675 | ########################################
|
677 | ## Show posters by article count Sec 1;
|
676 | ## Show posters by article count Sec 1;
|
678 | ########################################
|
677 | ########################################
|
679 | unless ( $skipSec{1} ) |
678 | unless ( $skipSec{1} ) |
680 | {
|
679 | {
|
Line 736... | Line 735... | ||
736 | }
|
735 | }
|
737 | 736 | ||
738 | #####################################
|
737 | #####################################
|
739 | ## Show top posters for original text
|
738 | ## Show top posters for original text
|
740 | #####################################
|
739 | #####################################
|
- | 740 | my $topposters_real = 0; |
|
- | 741 | ||
741 | unless ( $skipSec{3} ) |
742 | unless ( $skipSec{3} ) |
742 | {
|
743 | {
|
743 | if ( keys %data < $topposters ) |
744 | if ( keys %data < $topposters ) |
744 | {
|
745 | {
|
745 | $count = keys %data; |
746 | $count = keys %data; |
746 | }
|
747 | }
|
747 | else
|
748 | else
|
748 | {
|
749 | {
|
749 | $count = $topposters; |
750 | $count = $topposters; |
750 | }
|
751 | }
|
- | 752 | ||
751 | printf "%s\n", |
753 | printf "%s\n", |
752 | centred(
|
754 | centred(
|
753 | __x(
|
755 | __x(
|
754 | "Top {count} responders by original text (> 5 posts)", |
756 | "Top {count} responders by original text (> 5 posts)", |
755 | count => $topposters |
757 | count => $topposters |
756 | ), |
758 | ), |
757 | 76
|
759 | 76
|
758 | ); |
760 | ); |
759 | print "=" x 76, "\n"; |
761 | print "=" x 76, "\n"; |
760 | my $i = 0; |
- | |
761 | foreach my $poster ( |
762 | foreach my $poster ( |
762 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
763 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
763 | keys %data |
764 | keys %data |
764 | )
|
765 | )
|
765 | {
|
766 | {
|
766 | next if $data{$poster}{quoted} == 0; |
767 | next if $data{$poster}{quoted} == 0; |
767 | next if $data{$poster}{count} < 5; |
768 | next if $data{$poster}{count} < 5; |
768 | my $name = substr( $poster, 0, 63 ); |
769 | my $name = substr( $poster, 0, 63 ); |
769 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
770 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
- | 771 | rpad( $poster, 63, "." ), |
|
770 | $data{$poster}{percent}; |
772 | $data{$poster}{percent}; |
771 | last if ( ++$i == $count ); |
773 | last if ( ++$topposters_real == $count ); |
772 | }
|
774 | }
|
773 | print "\n", "=" x 76, "\n"; |
775 | print "\n", "=" x 76, "\n"; |
774 | }
|
776 | }
|
775 | 777 | ||
776 | ########################################
|
778 | ########################################
|
777 | ## Show bottom posters for original text
|
779 | ## Show bottom posters for original text
|
778 | ########################################
|
780 | ########################################
|
- | 781 | ||
- | 782 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
|
- | 783 | ||
779 | unless ( $skipSec{4} ) |
784 | unless ( $skipSec{4} ) |
780 | {
|
785 | {
|
781 | if ( keys %data < $topposters ) |
786 | if ( keys %data < $topposters ) |
782 | {
|
787 | {
|
783 | $count = keys %data; |
788 | $count = keys %data; |
784 | }
|
789 | }
|
785 | else
|
790 | else
|
786 | {
|
791 | {
|
787 | $count = $topposters; |
792 | $count = $topposters; |
788 | }
|
793 | }
|
- | 794 | ||
789 | printf "%s\n", |
795 | printf "%s\n", |
790 | centred(
|
796 | centred(
|
791 | __x(
|
797 | __x(
|
792 | "Bottom {count} responders by original text (> 5 posts)", |
798 | "Bottom {count} responders by original text (> 5 posts)", |
793 | count => $topposters |
799 | count => $topposters |