Subversion Repositories LCARS

Rev

Rev 22 | Rev 24 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 22 Rev 23
Line 1... Line 1...
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
-
 
4
require 5.004;
4
5
5
#use diagnostics;
6
#use diagnostics;
6
use utf8;
7
use utf8;
7
use Encode;
8
use Encode;
8
9
Line 10... Line 11...
10
11
11
## Print out all text to STDOUT UTF-8 encoded
12
## Print out all text to STDOUT UTF-8 encoded
12
binmode STDOUT, ':encoding(UTF-8)';
13
binmode STDOUT, ':encoding(UTF-8)';
13
binmode STDERR, ':encoding(UTF-8)';
14
binmode STDERR, ':encoding(UTF-8)';
14
15
-
 
16
## L10n
-
 
17
use locale ':not_characters';
-
 
18
-
 
19
# setlocale( LC_MESSAGES, '' );
-
 
20
require Number::Format;
-
 
21
-
 
22
## i18n
15
# FIXME: Automatically include resolved '.' in @INC
23
## FIXME: Automatically include resolved '.' in @INC
16
# print join "\n", @INC;
24
# print join "\n", @INC;
17
25
18
use locale ':not_characters';
-
 
19
use Locale::TextDomain ('de.pointedears.newsstat');
26
use Locale::TextDomain ('de.pointedears.newsstat');
20
use POSIX ('locale_h');
27
use POSIX              ('locale_h');
21
use Locale::Messages qw (bind_textdomain_filter
28
use Locale::Messages qw (bind_textdomain_filter
22
                         bind_textdomain_codeset
29
  bind_textdomain_codeset
23
                         turn_utf_8_on);
30
  turn_utf_8_on);
24
#setlocale( LC_MESSAGES, '' );
-
 
-
 
31
25
bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
32
bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
26
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
33
bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
27
 
34
28
require Mail::Message;
35
require Mail::Message;
29
require DateTime;
36
require DateTime;
30
require DateTime::Format::Mail;
37
require DateTime::Format::Mail;
-
 
38
-
 
39
# See comments in previous example
-
 
40
my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
-
 
41
  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
-
 
42
  'decimal_point' };
-
 
43
-
 
44
# Apply defaults if values are missing
-
 
45
$thousands_sep = $mon_thousands_sep unless $thousands_sep;
-
 
46
$thousands_sep = ' ' unless $thousands_sep;
-
 
47
-
 
48
# grouping and mon_grouping are packed lists
-
 
49
# of small integers (characters) telling the
-
 
50
# grouping (thousand_seps and mon_thousand_seps
-
 
51
# being the group dividers) of numbers and
-
 
52
# monetary quantities.  The integers' meanings:
-
 
53
# 255 means no more grouping, 0 means repeat
-
 
54
# the previous grouping, 1-254 means use that
-
 
55
# as the current grouping.  Grouping goes from
-
 
56
# right to left (low to high digits).  In the
-
 
57
# below we cheat slightly by never using anything
-
 
58
# else than the first grouping (whatever that is).
-
 
59
my @grouping;
-
 
60
if ($grouping)
-
 
61
{
-
 
62
  @grouping = unpack( "C*", $grouping );
-
 
63
}
-
 
64
else
-
 
65
{
-
 
66
  @grouping = (3);
-
 
67
}
-
 
68
-
 
69
## FIXME: Why don't the defaults work already?
-
 
70
my $formatter = new Number::Format(
-
 
71
  -decimal_point => $decimal_point,
-
 
72
  -thousands_sep => $thousands_sep,
-
 
73
-
 
74
  # -grouping      => $grouping[0]
-
 
75
);
31
 
76
32
###################### USER CONFIGURATIONS ############################
77
###################### USER CONFIGURATIONS ############################
33
78
34
## The name of the group to do stats for
79
## The name of the group to do stats for
35
my $newsgroup_name = $ARGV[0];
80
my $newsgroup_name = $ARGV[0];
36
$newsgroup_name or usage();
81
$newsgroup_name or usage();
Line 536... Line 581...
536
}    # write_data
581
}    # write_data
537
582
538
sub display_results
583
sub display_results
539
{
584
{
540
  #################### DISPLAY RESULTS #####################
585
  #################### DISPLAY RESULTS #####################
541
  print "=" x 76, "\n";
586
  println( "=" x 76 );
542
  printf "%s\n",
587
  printf "%s\n",
543
    centred(
588
    centred(
544
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
589
    __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
545
    76 );
590
    76 );
546
  print "=" x 76, "\n";
591
  println( "=" x 76 );
547
  printf "%s\n",
592
  printf "%s\n",
548
    centred(
593
    centred(
549
    __
594
    __(
550
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)",
595
"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
-
 
596
    ),
551
    76
597
    76
552
    );
598
    );
553
  print "\n\n";
599
  print "\n\n";
554
  printf __"Total posts considered: %s over %d days" . "\n",
600
  printf __"Total posts considered: %s over %d days\n",
555
    commify($totalposts),
601
    $formatter->format_number($totalposts),
556
    $numdays;
602
    $formatter->format_number($numdays);
557
  my $time_locale = setlocale(LC_TIME);
603
  my $time_locale       = setlocale(LC_TIME);
558
  my $earliest_datetime = DateTime->from_epoch(
604
  my $earliest_datetime = DateTime->from_epoch(
559
    epoch => $earliest,
605
    epoch     => $earliest,
560
    locale => $time_locale,
606
    locale    => $time_locale,
561
    time_zone => 'UTC',
607
    time_zone => 'UTC',
562
  );
608
  );
563
  my $latest_datetime = DateTime->from_epoch(
609
  my $latest_datetime = DateTime->from_epoch(
564
    epoch => $latest,
610
    epoch     => $latest,
565
    locale => $time_locale,
611
    locale    => $time_locale,
566
    time_zone => 'UTC',
612
    time_zone => 'UTC',
567
  );
613
  );
568
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
614
  my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
-
 
615
  printf __"Earliest article: %s\n",
569
  printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format);
616
    $earliest_datetime->strftime($datetime_format);
-
 
617
  printf __"Latest article:   %s\n",
570
  printf __"Latest article" . ":   %s\n", $latest_datetime->strftime($datetime_format);
618
    $latest_datetime->strftime($datetime_format);
571
  printf __"Original articles: %s; replies" . ": %s\n",
619
  printf __"Original articles: %s; replies: %s\n",
572
    commify($origposts),
620
    $formatter->format_number($origposts),
573
    commify($replies);
621
    $formatter->format_number($replies);
574
  printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n",
622
  printf __"Total size of posts: %s bytes (%s)" . "\n",
-
 
623
    $formatter->format_number($totsize),
575
    commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576;  #
624
    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
576
  printf __
-
 
577
    "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
625
  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
578
    commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
626
    $formatter->format_number( int( $totalposts / $numdays ) ),
-
 
627
    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
579
    commify( int( $totsize / $totalposts ) );
628
    $formatter->format_number( int( $totsize / $totalposts ) );
-
 
629
580
  my $count = keys %data;
630
  my $count = keys %data;
581
  printf __"Total headers: %s KiB; bodies: %s KiB\n",
631
  printf __"Total headers: %s; bodies: %s\n",
-
 
632
    $formatter->format_bytes(
-
 
633
    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
-
 
634
    ),
582
    commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
635
    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
583
  printf __
636
  printf __
584
    "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n",
637
    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
-
 
638
    $formatter->format_bytes(
-
 
639
    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
-
 
640
    ),
585
    commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
641
    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
586
    ( $totorig * 100 ) / ( $totorig + $totquoted ),
642
    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
587
    commify( int( $totsig / 1024 ) );
643
    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
588
  printf __"Total number of posters: %s, average %s bytes per poster\n",
644
  printf __"Total number of posters: %s, average %s per poster\n",
-
 
645
    $formatter->format_number($count),
589
    commify($count), commify( int( $totsize / $count ) );    #/
646
    $formatter->format_bytes( $totsize / $count,
-
 
647
    ( 'precision' => 1, 'mode' => 'iec' ) );
590
  $count = keys %threads;
648
  $count = keys %threads;
591
  printf __"Total number of threads: %s, average %s bytes per thread\n",
649
  printf __"Total number of threads: %s, average %s per thread\n",
-
 
650
    $formatter->format_number($count),
592
    commify($count), commify( int( $totsize / $count ) );    #/
651
    $formatter->format_bytes( $totsize / $count,
-
 
652
    ( 'precision' => 1, 'mode' => 'iec' ) );
593
  printf __"Total number of user agents: %d\n", scalar keys %agents;
653
  printf __"Total number of user agents: %d\n",
-
 
654
    $formatter->format_number( scalar keys %agents );
594
  print "\n", "=" x 76, "\n";
655
  print "\n", "=" x 76, "\n"  ;
595
-
 
596
  ########################################
656
  ########################################
597
  ## Show posters by article count  Sec 1;
657
  ## Show posters by article count  Sec 1;
598
  ########################################
658
  ########################################
599
  unless ( $skipSec{1} )
659
  unless ( $skipSec{1} )
600
  {
660
  {
Line 606... Line 666...
606
    {
666
    {
607
      $count = $topposters;
667
      $count = $topposters;
608
    }
668
    }
609
    printf "%s\n",
669
    printf "%s\n",
610
      centred(
670
      centred(
611
      __x( "Top {count} posters by number of articles", count => $topposters ), 76 );
671
      __x( "Top {count} posters by number of articles", count => $topposters ),
-
 
672
      76
-
 
673
      );
612
    print "=" x 76, "\n";
674
    print "=" x 76, "\n";
613
    my $i = 0;
675
    my $i = 0;
614
    foreach
676
    foreach
615
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
677
      my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
616
    {
678
    {
Line 636... Line 698...
636
      $count = $topposters;
698
      $count = $topposters;
637
    }
699
    }
638
    printf "%s\n",
700
    printf "%s\n",
639
      centred(
701
      centred(
640
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
702
      __x( "Top {count} posters by article size in KiB", count => $topposters ),
-
 
703
      76
641
      76 );
704
      );
642
    print "=" x 76, "\n";
705
    print "=" x 76, "\n";
643
    my $i = 0;
706
    my $i = 0;
644
    foreach
707
    foreach
645
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
708
      my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
646
    {
709
    {
Line 740... Line 803...
740
    else
803
    else
741
    {
804
    {
742
      $count = $topthreads;
805
      $count = $topthreads;
743
    }
806
    }
744
    printf "%s\n",
807
    printf "%s\n",
-
 
808
      centred(
745
      centred( __x( "Top {count} threads by no. of articles", count => $topthreads ),
809
      __x( "Top {count} threads by no. of articles", count => $topthreads ),
746
      76 );
810
      76 );
747
    print "=" x 76, "\n";
811
    print "=" x 76, "\n";
748
    my $i = 0;
812
    my $i = 0;
749
    foreach my $thread (
813
    foreach my $thread (
750
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
814
      sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
Line 771... Line 835...
771
    else
835
    else
772
    {
836
    {
773
      $count = $topthreads;
837
      $count = $topthreads;
774
    }
838
    }
775
    printf "%s\n",
839
    printf "%s\n",
-
 
840
      centred(
776
      centred( __x( "Top {count} threads by size in KiB", count => $topthreads ),
841
      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
777
      76 );
-
 
778
    print "=" x 76, "\n";
842
    print "=" x 76, "\n";
779
    my $i = 0;
843
    my $i = 0;
780
    foreach my $thread (
844
    foreach my $thread (
781
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
845
      sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
782
      keys %threads
846
      keys %threads
Line 803... Line 867...
803
    else
867
    else
804
    {
868
    {
805
      $count = $topcrossposts;
869
      $count = $topcrossposts;
806
    }
870
    }
807
    printf "%s\n",
871
    printf "%s\n",
-
 
872
      centred(
808
      centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
873
      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
809
    print "=" x 76, "\n";
874
    print "=" x 76, "\n";
810
    my $i = 0;
875
    my $i = 0;
811
    foreach
876
    foreach
812
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
877
      my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
813
    {
878
    {
Line 859... Line 924...
859
      $count = $topagents;
924
      $count = $topagents;
860
    }
925
    }
861
    printf "%s\n",
926
    printf "%s\n",
862
      centred(
927
      centred(
863
      __x( "Top {count} user agents by number of posts", count => $topagents ),
928
      __x( "Top {count} user agents by number of posts", count => $topagents ),
-
 
929
      76
864
      76 );
930
      );
865
    print "=" x 76, "\n";
931
    print "=" x 76, "\n";
866
    my $i = 0;
932
    my $i = 0;
867
    foreach my $agent (
933
    foreach my $agent (
868
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
934
      sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
869
      keys %distinct_agent
935
      keys %distinct_agent
Line 888... Line 954...
888
    }
954
    }
889
    else
955
    else
890
    {
956
    {
891
      $count = $toptz;
957
      $count = $toptz;
892
    }
958
    }
-
 
959
    printf "%s\n",
893
    printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 );
960
      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
894
    print "=" x 76, "\n";
961
    print "=" x 76, "\n";
895
    my $i = 0;
962
    my $i = 0;
896
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
963
    foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
897
    {
964
    {
898
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
965
      printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
Line 935... Line 1002...
935
}
1002
}
936
1003
937
###########################
1004
###########################
938
## Put commas into a number
1005
## Put commas into a number
939
###########################
1006
###########################
940
# Get some of locale's numeric formatting parameters
-
 
941
my ($thousands_sep, $grouping) =
-
 
942
        @{localeconv()}{'thousands_sep', 'grouping'};
-
 
943
# Apply defaults if values are missing
-
 
944
$thousands_sep = ',' unless $thousands_sep;
-
 
945
# grouping and mon_grouping are packed lists
-
 
946
# of small integers (characters) telling the
-
 
947
# grouping (thousand_seps and mon_thousand_seps
-
 
948
# being the group dividers) of numbers and
-
 
949
# monetary quantities.  The integers' meanings:
-
 
950
# 255 means no more grouping, 0 means repeat
-
 
951
# the previous grouping, 1-254 means use that
-
 
952
# as the current grouping.  Grouping goes from
-
 
953
# right to left (low to high digits).  In the
-
 
954
# below we cheat slightly by never using anything
-
 
955
# else than the first grouping (whatever that is).
-
 
956
my @grouping;
-
 
957
if ($grouping) {
-
 
958
    @grouping = unpack("C*", $grouping);
-
 
959
} else {
-
 
960
    @grouping = (3);
-
 
961
}
-
 
962
-
 
963
sub commify
1007
sub commify
964
{
1008
{
965
  local $_ = shift;
1009
  local $_ = shift;
966
  #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
1010
  my $number = $_;
967
  $_ = int;    # Chop non-integer part
1011
  $_ = int;                                        # Chop non-integer part
968
  1 while
1012
  1 while
969
  s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/;
1013
    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
-
 
1014
  my $int_part  = $_;
-
 
1015
  my $real_part = '';
-
 
1016
  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
-
 
1017
  {
970
  return $_;
1018
    $real_part = $1;
-
 
1019
  }
-
 
1020
  return $int_part . $real_part;
971
}
1021
}
972
1022
973
################################################################
1023
################################################################
974
## Returns a string with leading and trailing whitespace removed
1024
## Returns a string with leading and trailing whitespace removed
975
################################################################
1025
################################################################
Line 982... Line 1032...
982
  return $clean;
1032
  return $clean;
983
}
1033
}
984
1034
985
sub usage
1035
sub usage
986
{
1036
{
987
  print __"usage: newsstat.pl NEWS.GROUP\n";
1037
  println( __ "usage: newsstat.pl NEWS.GROUP" );
988
  exit 1;
1038
  exit 1;
989
}
1039
}
990
1040
991
sub dmsg
1041
sub dmsg
992
{
1042
{
Line 996... Line 1046...
996
sub dmsg2
1046
sub dmsg2
997
{
1047
{
998
  my ( $level, @msg ) = @_;
1048
  my ( $level, @msg ) = @_;
999
  print STDERR @msg, "\n" if $level >= DEBUG;
1049
  print STDERR @msg, "\n" if $level >= DEBUG;
1000
}
1050
}
-
 
1051
-
 
1052
sub println
-
 
1053
{
-
 
1054
  print @_, "\n";
-
 
1055
}