WebSVN - LCARS - Diff - Rev 22 and 23 - /trunk/tools/network/news/newsstat/newsstat.pl

 #!/usr/bin/env perl
 use strict;
 use warnings;
+require 5.004;
 #use diagnostics;
 use utf8;
 use Encode;
 ## Print out all text to STDOUT UTF-8 encoded
 binmode STDOUT, ':encoding(UTF-8)';
 binmode STDERR, ':encoding(UTF-8)';
+## L10n
+use locale ':not_characters';
+# setlocale( LC_MESSAGES, '' );
+require Number::Format;
+## i18n
-# FIXME: Automatically include resolved '.' in @INC
+## FIXME: Automatically include resolved '.' in @INC
 # print join "\n", @INC;
-use locale ':not_characters';
 use Locale::TextDomain ('de.pointedears.newsstat');
-use POSIX ('locale_h');
+use POSIX              ('locale_h');
 use Locale::Messages qw (bind_textdomain_filter
-                         bind_textdomain_codeset
+  bind_textdomain_codeset
-                         turn_utf_8_on);
+  turn_utf_8_on);
-#setlocale( LC_MESSAGES, '' );
-bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on;
+bind_textdomain_filter 'de.pointedears.newsstat',  \&turn_utf_8_on;
 bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8';
 require Mail::Message;
 require DateTime;
 require DateTime::Format::Mail;
+# See comments in previous example
+my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) =
+  @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping',
+  'decimal_point' };
+# Apply defaults if values are missing
+$thousands_sep = $mon_thousands_sep unless $thousands_sep;
+$thousands_sep = ' ' unless $thousands_sep;
+# grouping and mon_grouping are packed lists
+# of small integers (characters) telling the
+# grouping (thousand_seps and mon_thousand_seps
+# being the group dividers) of numbers and
+# monetary quantities.  The integers' meanings:
+# 255 means no more grouping, 0 means repeat
+# the previous grouping, 1-254 means use that
+# as the current grouping.  Grouping goes from
+# right to left (low to high digits).  In the
+# below we cheat slightly by never using anything
+# else than the first grouping (whatever that is).
+my @grouping;
+if ($grouping)
+{
+  @grouping = unpack( "C*", $grouping );
+}
+else
+{
+  @grouping = (3);
+}
+## FIXME: Why don't the defaults work already?
+my $formatter = new Number::Format(
+  -decimal_point => $decimal_point,
+  -thousands_sep => $thousands_sep,
+  # -grouping      => $grouping[0]
+);
 ###################### USER CONFIGURATIONS ############################
 ## The name of the group to do stats for
 my $newsgroup_name = $ARGV[0];
 $newsgroup_name or usage();
 }    # write_data
 sub display_results
+{
   #################### DISPLAY RESULTS #####################
-  print "=" x 76, "\n";
+  println( "=" x 76 );
   printf "%s\n",
     centred(
     __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ),
 );
-  print "=" x 76, "\n";
+  println( "=" x 76 );
   printf "%s\n",
     centred(
-    __
+    __(
-"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)",
+"(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
+    ),
     );
   print "\n\n";
-  printf __"Total posts considered: %s over %d days" . "\n",
+  printf __"Total posts considered: %s over %d days\n",
-    commify($totalposts),
+    $formatter->format_number($totalposts),
-    $numdays;
+    $formatter->format_number($numdays);
-  my $time_locale = setlocale(LC_TIME);
+  my $time_locale       = setlocale(LC_TIME);
   my $earliest_datetime = DateTime->from_epoch(
-    epoch => $earliest,
+    epoch     => $earliest,
-    locale => $time_locale,
+    locale    => $time_locale,
     time_zone => 'UTC',
   );
   my $latest_datetime = DateTime->from_epoch(
-    epoch => $latest,
+    epoch     => $latest,
-    locale => $time_locale,
+    locale    => $time_locale,
     time_zone => 'UTC',
   );
   my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z';
+  printf __"Earliest article: %s\n",
-  printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format);
+    $earliest_datetime->strftime($datetime_format);
+  printf __"Latest article:   %s\n",
-  printf __"Latest article" . ":   %s\n", $latest_datetime->strftime($datetime_format);
+    $latest_datetime->strftime($datetime_format);
-  printf __"Original articles: %s; replies" . ": %s\n",
+  printf __"Original articles: %s; replies: %s\n",
-    commify($origposts),
+    $formatter->format_number($origposts),
-    commify($replies);
+    $formatter->format_number($replies);
-  printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n",
+  printf __"Total size of posts: %s bytes (%s)" . "\n",
+    $formatter->format_number($totsize),
-    commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576;  #
+    $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) );
-  printf __
-    "Average %s articles per day, %.2f MiB per day, %s bytes per article\n",
+  printf __"Average %s articles per day, %s per day, %s bytes per article\n",
-    commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576,
+    $formatter->format_number( int( $totalposts / $numdays ) ),
+    $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ),
-    commify( int( $totsize / $totalposts ) );
+    $formatter->format_number( int( $totsize / $totalposts ) );
   my $count = keys %data;
-  printf __"Total headers: %s KiB; bodies: %s KiB\n",
+  printf __"Total headers: %s; bodies: %s\n",
+    $formatter->format_bytes(
+    $totheader, ( 'precision' => 1, 'mode' => 'iec' )
+    ),
-    commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) );
+    $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) );
   printf __
-    "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n",
+    "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n",
+    $formatter->format_bytes(
+    $totquoted, ( 'precision' => 1, 'mode' => 'iec' )
+    ),
-    commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ),
+    $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ),
-    ( $totorig * 100 ) / ( $totorig + $totquoted ),
+    $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ),
-    commify( int( $totsig / 1024 ) );
+    $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) );
-  printf __"Total number of posters: %s, average %s bytes per poster\n",
+  printf __"Total number of posters: %s, average %s per poster\n",
+    $formatter->format_number($count),
-    commify($count), commify( int( $totsize / $count ) );    #/
+    $formatter->format_bytes( $totsize / $count,
+    ( 'precision' => 1, 'mode' => 'iec' ) );
   $count = keys %threads;
-  printf __"Total number of threads: %s, average %s bytes per thread\n",
+  printf __"Total number of threads: %s, average %s per thread\n",
+    $formatter->format_number($count),
-    commify($count), commify( int( $totsize / $count ) );    #/
+    $formatter->format_bytes( $totsize / $count,
+    ( 'precision' => 1, 'mode' => 'iec' ) );
-  printf __"Total number of user agents: %d\n", scalar keys %agents;
+  printf __"Total number of user agents: %d\n",
+    $formatter->format_number( scalar keys %agents );
-  print "\n", "=" x 76, "\n";
+  print "\n", "=" x 76, "\n"  ;
   ########################################
   ## Show posters by article count  Sec 1;
   ########################################
   unless ( $skipSec{1} )
+  {
+    {
       $count = $topposters;
+    }
     printf "%s\n",
       centred(
-      __x( "Top {count} posters by number of articles", count => $topposters ), 76 );
+      __x( "Top {count} posters by number of articles", count => $topposters ),
+      );
     print "=" x 76, "\n";
     my $i = 0;
     foreach
       my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
+    {
       $count = $topposters;
+    }
     printf "%s\n",
       centred(
       __x( "Top {count} posters by article size in KiB", count => $topposters ),
-);
+      );
     print "=" x 76, "\n";
     my $i = 0;
     foreach
       my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data )
+    {
     else
+    {
       $count = $topthreads;
+    }
     printf "%s\n",
+      centred(
-      centred( __x( "Top {count} threads by no. of articles", count => $topthreads ),
+      __x( "Top {count} threads by no. of articles", count => $topthreads ),
 );
     print "=" x 76, "\n";
     my $i = 0;
     foreach my $thread (
       sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} }
     else
+    {
       $count = $topthreads;
+    }
     printf "%s\n",
+      centred(
-      centred( __x( "Top {count} threads by size in KiB", count => $topthreads ),
+      __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 );
-);
     print "=" x 76, "\n";
     my $i = 0;
     foreach my $thread (
       sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} }
       keys %threads
     else
+    {
       $count = $topcrossposts;
+    }
     printf "%s\n",
+      centred(
-      centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
+      __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 );
     print "=" x 76, "\n";
     my $i = 0;
     foreach
       my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts )
+    {
       $count = $topagents;
+    }
     printf "%s\n",
       centred(
       __x( "Top {count} user agents by number of posts", count => $topagents ),
-);
+      );
     print "=" x 76, "\n";
     my $i = 0;
     foreach my $agent (
       sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
       keys %distinct_agent
+    }
     else
+    {
       $count = $toptz;
+    }
+    printf "%s\n",
-    printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 );
+      centred( __x( "Top {count} time zones", count => $toptz ), 76 );
     print "=" x 76, "\n";
     my $i = 0;
     foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz )
+    {
       printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone};
+}
 ###########################
 ## Put commas into a number
 ###########################
-# Get some of locale's numeric formatting parameters
-my ($thousands_sep, $grouping) =
-        @{localeconv()}{'thousands_sep', 'grouping'};
-# Apply defaults if values are missing
-$thousands_sep = ',' unless $thousands_sep;
-# grouping and mon_grouping are packed lists
-# of small integers (characters) telling the
-# grouping (thousand_seps and mon_thousand_seps
-# being the group dividers) of numbers and
-# monetary quantities.  The integers' meanings:
-# 255 means no more grouping, 0 means repeat
-# the previous grouping, 1-254 means use that
-# as the current grouping.  Grouping goes from
-# right to left (low to high digits).  In the
-# below we cheat slightly by never using anything
-# else than the first grouping (whatever that is).
-my @grouping;
-if ($grouping) {
-    @grouping = unpack("C*", $grouping);
-} else {
-    @grouping = (3);
-}
 sub commify
+{
   local $_ = shift;
-  #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
+  my $number = $_;
-  $_ = int;    # Chop non-integer part
+  $_ = int;                                        # Chop non-integer part
 while
-  s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/;
+    s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/;
+  my $int_part  = $_;
+  my $real_part = '';
+  if ( $number =~ /(\Q$decimal_point\E\d+)$/ )
+  {
-  return $_;
+    $real_part = $1;
+  }
+  return $int_part . $real_part;
+}
 ################################################################
 ## Returns a string with leading and trailing whitespace removed
 ################################################################
   return $clean;
+}
 sub usage
+{
-  print __"usage: newsstat.pl NEWS.GROUP\n";
+  println( __ "usage: newsstat.pl NEWS.GROUP" );
   exit 1;
+}
 sub dmsg
+{
 sub dmsg2
+{
   my ( $level, @msg ) = @_;
   print STDERR @msg, "\n" if $level >= DEBUG;
+}
+sub println
+{
+  print @_, "\n";
+}

Subversion Repositories LCARS

(root)/trunk/tools/network/news/newsstat/newsstat.pl - Rev 22 → 23