Rev 22 | Rev 24 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 22 | Rev 23 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
- | 4 | require 5.004; |
|
4 | 5 | ||
5 | #use diagnostics;
|
6 | #use diagnostics;
|
6 | use utf8; |
7 | use utf8; |
7 | use Encode; |
8 | use Encode; |
8 | 9 | ||
Line 10... | Line 11... | ||
10 | 11 | ||
11 | ## Print out all text to STDOUT UTF-8 encoded
|
12 | ## Print out all text to STDOUT UTF-8 encoded
|
12 | binmode STDOUT, ':encoding(UTF-8)'; |
13 | binmode STDOUT, ':encoding(UTF-8)'; |
13 | binmode STDERR, ':encoding(UTF-8)'; |
14 | binmode STDERR, ':encoding(UTF-8)'; |
14 | 15 | ||
- | 16 | ## L10n
|
|
- | 17 | use locale ':not_characters'; |
|
- | 18 | ||
- | 19 | # setlocale( LC_MESSAGES, '' );
|
|
- | 20 | require Number::Format; |
|
- | 21 | ||
- | 22 | ## i18n
|
|
15 | # FIXME: Automatically include resolved '.' in @INC
|
23 | ## FIXME: Automatically include resolved '.' in @INC
|
16 | # print join "\n", @INC;
|
24 | # print join "\n", @INC;
|
17 | 25 | ||
18 | use locale ':not_characters'; |
- | |
19 | use Locale::TextDomain ('de.pointedears.newsstat'); |
26 | use Locale::TextDomain ('de.pointedears.newsstat'); |
20 | use POSIX ('locale_h'); |
27 | use POSIX ('locale_h'); |
21 | use Locale::Messages qw (bind_textdomain_filter |
28 | use Locale::Messages qw (bind_textdomain_filter |
22 | bind_textdomain_codeset |
29 | bind_textdomain_codeset |
23 | turn_utf_8_on); |
30 | turn_utf_8_on); |
24 | #setlocale( LC_MESSAGES, '' );
|
- | |
- | 31 | ||
25 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
32 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
26 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
33 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
27 | 34 | ||
28 | require Mail::Message; |
35 | require Mail::Message; |
29 | require DateTime; |
36 | require DateTime; |
30 | require DateTime::Format::Mail; |
37 | require DateTime::Format::Mail; |
- | 38 | ||
- | 39 | # See comments in previous example
|
|
- | 40 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
|
- | 41 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
|
- | 42 | 'decimal_point' }; |
|
- | 43 | ||
- | 44 | # Apply defaults if values are missing
|
|
- | 45 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
|
- | 46 | $thousands_sep = ' ' unless $thousands_sep; |
|
- | 47 | ||
- | 48 | # grouping and mon_grouping are packed lists
|
|
- | 49 | # of small integers (characters) telling the
|
|
- | 50 | # grouping (thousand_seps and mon_thousand_seps
|
|
- | 51 | # being the group dividers) of numbers and
|
|
- | 52 | # monetary quantities. The integers' meanings:
|
|
- | 53 | # 255 means no more grouping, 0 means repeat
|
|
- | 54 | # the previous grouping, 1-254 means use that
|
|
- | 55 | # as the current grouping. Grouping goes from
|
|
- | 56 | # right to left (low to high digits). In the
|
|
- | 57 | # below we cheat slightly by never using anything
|
|
- | 58 | # else than the first grouping (whatever that is).
|
|
- | 59 | my @grouping; |
|
- | 60 | if ($grouping) |
|
- | 61 | {
|
|
- | 62 | @grouping = unpack( "C*", $grouping ); |
|
- | 63 | }
|
|
- | 64 | else
|
|
- | 65 | {
|
|
- | 66 | @grouping = (3); |
|
- | 67 | }
|
|
- | 68 | ||
- | 69 | ## FIXME: Why don't the defaults work already?
|
|
- | 70 | my $formatter = new Number::Format( |
|
- | 71 | -decimal_point => $decimal_point, |
|
- | 72 | -thousands_sep => $thousands_sep, |
|
- | 73 | ||
- | 74 | # -grouping => $grouping[0]
|
|
- | 75 | ); |
|
31 | 76 | ||
32 | ###################### USER CONFIGURATIONS ############################
|
77 | ###################### USER CONFIGURATIONS ############################
|
33 | 78 | ||
34 | ## The name of the group to do stats for
|
79 | ## The name of the group to do stats for
|
35 | my $newsgroup_name = $ARGV[0]; |
80 | my $newsgroup_name = $ARGV[0]; |
36 | $newsgroup_name or usage(); |
81 | $newsgroup_name or usage(); |
Line 536... | Line 581... | ||
536 | } # write_data |
581 | } # write_data |
537 | 582 | ||
538 | sub display_results
|
583 | sub display_results
|
539 | {
|
584 | {
|
540 | #################### DISPLAY RESULTS #####################
|
585 | #################### DISPLAY RESULTS #####################
|
541 | print "=" x 76, "\n"; |
586 | println( "=" x 76 ); |
542 | printf "%s\n", |
587 | printf "%s\n", |
543 | centred(
|
588 | centred(
|
544 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
589 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
545 | 76 ); |
590 | 76 ); |
546 | print "=" x 76, "\n"; |
591 | println( "=" x 76 ); |
547 | printf "%s\n", |
592 | printf "%s\n", |
548 | centred(
|
593 | centred(
|
549 | __ |
594 | __(
|
550 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)", |
595 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
- | 596 | ), |
|
551 | 76
|
597 | 76
|
552 | ); |
598 | ); |
553 | print "\n\n"; |
599 | print "\n\n"; |
554 | printf __"Total posts considered: %s over %d days" . "\n", |
600 | printf __"Total posts considered: %s over %d days\n", |
555 | commify($totalposts), |
601 | $formatter->format_number($totalposts), |
556 | $numdays; |
602 | $formatter->format_number($numdays); |
557 | my $time_locale = setlocale(LC_TIME); |
603 | my $time_locale = setlocale(LC_TIME); |
558 | my $earliest_datetime = DateTime->from_epoch( |
604 | my $earliest_datetime = DateTime->from_epoch( |
559 | epoch => $earliest, |
605 | epoch => $earliest, |
560 | locale => $time_locale, |
606 | locale => $time_locale, |
561 | time_zone => 'UTC', |
607 | time_zone => 'UTC', |
562 | ); |
608 | ); |
563 | my $latest_datetime = DateTime->from_epoch( |
609 | my $latest_datetime = DateTime->from_epoch( |
564 | epoch => $latest, |
610 | epoch => $latest, |
565 | locale => $time_locale, |
611 | locale => $time_locale, |
566 | time_zone => 'UTC', |
612 | time_zone => 'UTC', |
567 | ); |
613 | ); |
568 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
614 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
- | 615 | printf __"Earliest article: %s\n", |
|
569 | printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format); |
616 | $earliest_datetime->strftime($datetime_format); |
- | 617 | printf __"Latest article: %s\n", |
|
570 | printf __"Latest article" . ": %s\n", $latest_datetime->strftime($datetime_format); |
618 | $latest_datetime->strftime($datetime_format); |
571 | printf __"Original articles: %s; replies" . ": %s\n", |
619 | printf __"Original articles: %s; replies: %s\n", |
572 | commify($origposts), |
620 | $formatter->format_number($origposts), |
573 | commify($replies); |
621 | $formatter->format_number($replies); |
574 | printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n", |
622 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
- | 623 | $formatter->format_number($totsize), |
|
575 | commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
624 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
576 | printf __ |
- | |
577 | "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
625 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
578 | commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
626 | $formatter->format_number( int( $totalposts / $numdays ) ), |
- | 627 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
|
579 | commify( int( $totsize / $totalposts ) ); |
628 | $formatter->format_number( int( $totsize / $totalposts ) ); |
- | 629 | ||
580 | my $count = keys %data; |
630 | my $count = keys %data; |
581 | printf __"Total headers: %s KiB; bodies: %s KiB\n", |
631 | printf __"Total headers: %s; bodies: %s\n", |
- | 632 | $formatter->format_bytes( |
|
- | 633 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
|
- | 634 | ), |
|
582 | commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
635 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
583 | printf __ |
636 | printf __ |
584 | "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n", |
637 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
- | 638 | $formatter->format_bytes( |
|
- | 639 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
|
- | 640 | ), |
|
585 | commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
641 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
586 | ( $totorig * 100 ) / ( $totorig + $totquoted ), |
642 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
587 | commify( int( $totsig / 1024 ) ); |
643 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
588 | printf __"Total number of posters: %s, average %s bytes per poster\n", |
644 | printf __"Total number of posters: %s, average %s per poster\n", |
- | 645 | $formatter->format_number($count), |
|
589 | commify($count), commify( int( $totsize / $count ) ); #/ |
646 | $formatter->format_bytes( $totsize / $count, |
- | 647 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
|
590 | $count = keys %threads; |
648 | $count = keys %threads; |
591 | printf __"Total number of threads: %s, average %s bytes per thread\n", |
649 | printf __"Total number of threads: %s, average %s per thread\n", |
- | 650 | $formatter->format_number($count), |
|
592 | commify($count), commify( int( $totsize / $count ) ); #/ |
651 | $formatter->format_bytes( $totsize / $count, |
- | 652 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
|
593 | printf __"Total number of user agents: %d\n", scalar keys %agents; |
653 | printf __"Total number of user agents: %d\n", |
- | 654 | $formatter->format_number( scalar keys %agents ); |
|
594 | print "\n", "=" x 76, "\n"; |
655 | print "\n", "=" x 76, "\n" ; |
595 | - | ||
596 | ########################################
|
656 | ########################################
|
597 | ## Show posters by article count Sec 1;
|
657 | ## Show posters by article count Sec 1;
|
598 | ########################################
|
658 | ########################################
|
599 | unless ( $skipSec{1} ) |
659 | unless ( $skipSec{1} ) |
600 | {
|
660 | {
|
Line 606... | Line 666... | ||
606 | {
|
666 | {
|
607 | $count = $topposters; |
667 | $count = $topposters; |
608 | }
|
668 | }
|
609 | printf "%s\n", |
669 | printf "%s\n", |
610 | centred(
|
670 | centred(
|
611 | __x( "Top {count} posters by number of articles", count => $topposters ), 76 ); |
671 | __x( "Top {count} posters by number of articles", count => $topposters ), |
- | 672 | 76
|
|
- | 673 | ); |
|
612 | print "=" x 76, "\n"; |
674 | print "=" x 76, "\n"; |
613 | my $i = 0; |
675 | my $i = 0; |
614 | foreach
|
676 | foreach
|
615 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
677 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
616 | {
|
678 | {
|
Line 636... | Line 698... | ||
636 | $count = $topposters; |
698 | $count = $topposters; |
637 | }
|
699 | }
|
638 | printf "%s\n", |
700 | printf "%s\n", |
639 | centred(
|
701 | centred(
|
640 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
702 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
- | 703 | 76
|
|
641 | 76 ); |
704 | ); |
642 | print "=" x 76, "\n"; |
705 | print "=" x 76, "\n"; |
643 | my $i = 0; |
706 | my $i = 0; |
644 | foreach
|
707 | foreach
|
645 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
708 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
646 | {
|
709 | {
|
Line 740... | Line 803... | ||
740 | else
|
803 | else
|
741 | {
|
804 | {
|
742 | $count = $topthreads; |
805 | $count = $topthreads; |
743 | }
|
806 | }
|
744 | printf "%s\n", |
807 | printf "%s\n", |
- | 808 | centred(
|
|
745 | centred( __x( "Top {count} threads by no. of articles", count => $topthreads ), |
809 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
746 | 76 ); |
810 | 76 ); |
747 | print "=" x 76, "\n"; |
811 | print "=" x 76, "\n"; |
748 | my $i = 0; |
812 | my $i = 0; |
749 | foreach my $thread ( |
813 | foreach my $thread ( |
750 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
814 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
Line 771... | Line 835... | ||
771 | else
|
835 | else
|
772 | {
|
836 | {
|
773 | $count = $topthreads; |
837 | $count = $topthreads; |
774 | }
|
838 | }
|
775 | printf "%s\n", |
839 | printf "%s\n", |
- | 840 | centred(
|
|
776 | centred( __x( "Top {count} threads by size in KiB", count => $topthreads ), |
841 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
777 | 76 ); |
- | |
778 | print "=" x 76, "\n"; |
842 | print "=" x 76, "\n"; |
779 | my $i = 0; |
843 | my $i = 0; |
780 | foreach my $thread ( |
844 | foreach my $thread ( |
781 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
845 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
782 | keys %threads |
846 | keys %threads |
Line 803... | Line 867... | ||
803 | else
|
867 | else
|
804 | {
|
868 | {
|
805 | $count = $topcrossposts; |
869 | $count = $topcrossposts; |
806 | }
|
870 | }
|
807 | printf "%s\n", |
871 | printf "%s\n", |
- | 872 | centred(
|
|
808 | centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
873 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
809 | print "=" x 76, "\n"; |
874 | print "=" x 76, "\n"; |
810 | my $i = 0; |
875 | my $i = 0; |
811 | foreach
|
876 | foreach
|
812 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
877 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
813 | {
|
878 | {
|
Line 859... | Line 924... | ||
859 | $count = $topagents; |
924 | $count = $topagents; |
860 | }
|
925 | }
|
861 | printf "%s\n", |
926 | printf "%s\n", |
862 | centred(
|
927 | centred(
|
863 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
928 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
- | 929 | 76
|
|
864 | 76 ); |
930 | ); |
865 | print "=" x 76, "\n"; |
931 | print "=" x 76, "\n"; |
866 | my $i = 0; |
932 | my $i = 0; |
867 | foreach my $agent ( |
933 | foreach my $agent ( |
868 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
934 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
869 | keys %distinct_agent |
935 | keys %distinct_agent |
Line 888... | Line 954... | ||
888 | }
|
954 | }
|
889 | else
|
955 | else
|
890 | {
|
956 | {
|
891 | $count = $toptz; |
957 | $count = $toptz; |
892 | }
|
958 | }
|
- | 959 | printf "%s\n", |
|
893 | printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 ); |
960 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
894 | print "=" x 76, "\n"; |
961 | print "=" x 76, "\n"; |
895 | my $i = 0; |
962 | my $i = 0; |
896 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
963 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
897 | {
|
964 | {
|
898 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
965 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
Line 935... | Line 1002... | ||
935 | }
|
1002 | }
|
936 | 1003 | ||
937 | ###########################
|
1004 | ###########################
|
938 | ## Put commas into a number
|
1005 | ## Put commas into a number
|
939 | ###########################
|
1006 | ###########################
|
940 | # Get some of locale's numeric formatting parameters
|
- | |
941 | my ($thousands_sep, $grouping) = |
- | |
942 | @{localeconv()}{'thousands_sep', 'grouping'}; |
- | |
943 | # Apply defaults if values are missing
|
- | |
944 | $thousands_sep = ',' unless $thousands_sep; |
- | |
945 | # grouping and mon_grouping are packed lists
|
- | |
946 | # of small integers (characters) telling the
|
- | |
947 | # grouping (thousand_seps and mon_thousand_seps
|
- | |
948 | # being the group dividers) of numbers and
|
- | |
949 | # monetary quantities. The integers' meanings:
|
- | |
950 | # 255 means no more grouping, 0 means repeat
|
- | |
951 | # the previous grouping, 1-254 means use that
|
- | |
952 | # as the current grouping. Grouping goes from
|
- | |
953 | # right to left (low to high digits). In the
|
- | |
954 | # below we cheat slightly by never using anything
|
- | |
955 | # else than the first grouping (whatever that is).
|
- | |
956 | my @grouping; |
- | |
957 | if ($grouping) { |
- | |
958 | @grouping = unpack("C*", $grouping); |
- | |
959 | } else { |
- | |
960 | @grouping = (3); |
- | |
961 | }
|
- | |
962 | - | ||
963 | sub commify
|
1007 | sub commify
|
964 | {
|
1008 | {
|
965 | local $_ = shift; |
1009 | local $_ = shift; |
966 | #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
|
1010 | my $number = $_; |
967 | $_ = int; # Chop non-integer part |
1011 | $_ = int; # Chop non-integer part |
968 | 1 while |
1012 | 1 while |
969 | s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/; |
1013 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
- | 1014 | my $int_part = $_; |
|
- | 1015 | my $real_part = ''; |
|
- | 1016 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
|
- | 1017 | {
|
|
970 | return $_; |
1018 | $real_part = $1; |
- | 1019 | }
|
|
- | 1020 | return $int_part . $real_part; |
|
971 | }
|
1021 | }
|
972 | 1022 | ||
973 | ################################################################
|
1023 | ################################################################
|
974 | ## Returns a string with leading and trailing whitespace removed
|
1024 | ## Returns a string with leading and trailing whitespace removed
|
975 | ################################################################
|
1025 | ################################################################
|
Line 982... | Line 1032... | ||
982 | return $clean; |
1032 | return $clean; |
983 | }
|
1033 | }
|
984 | 1034 | ||
985 | sub usage
|
1035 | sub usage
|
986 | {
|
1036 | {
|
987 | print __"usage: newsstat.pl NEWS.GROUP\n"; |
1037 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
988 | exit 1; |
1038 | exit 1; |
989 | }
|
1039 | }
|
990 | 1040 | ||
991 | sub dmsg
|
1041 | sub dmsg
|
992 | {
|
1042 | {
|
Line 996... | Line 1046... | ||
996 | sub dmsg2
|
1046 | sub dmsg2
|
997 | {
|
1047 | {
|
998 | my ( $level, @msg ) = @_; |
1048 | my ( $level, @msg ) = @_; |
999 | print STDERR @msg, "\n" if $level >= DEBUG; |
1049 | print STDERR @msg, "\n" if $level >= DEBUG; |
1000 | }
|
1050 | }
|
- | 1051 | ||
- | 1052 | sub println
|
|
- | 1053 | {
|
|
- | 1054 | print @_, "\n"; |
|
- | 1055 | }
|