#!/usr/bin/env perl use strict; use warnings; require 5.004; #use diagnostics; use utf8; ## NOTE: ## Enable and remove binmode when utf8::all has actually become lexically scoped # use utf8:all; use constant DEBUG => 0; ## newsstat.pl ## Copyright (C) 2011, 2012 Thomas Lahn ## Based on work by Garry Knight et al. ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ## Print out all text to STDOUT UTF-8 encoded binmode STDOUT, ':encoding(UTF-8)'; binmode STDERR, ':encoding(UTF-8)'; ## L10n use locale ':not_characters'; # setlocale( LC_MESSAGES, '' ); require Number::Format; ## i18n ## FIXME: Automatically include resolved '.' in @INC # print join "\n", @INC; use Locale::TextDomain ('de.pointedears.newsstat'); use POSIX ('locale_h'); use Locale::Messages qw (bind_textdomain_filter bind_textdomain_codeset turn_utf_8_on); bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; require Mail::Message; require DateTime; require DateTime::Format::Mail; # See comments in previous example my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', 'decimal_point' }; # Apply defaults if values are missing $thousands_sep = $mon_thousands_sep unless $thousands_sep; $thousands_sep = ' ' unless $thousands_sep; # grouping and mon_grouping are packed lists # of small integers (characters) telling the # grouping (thousand_seps and mon_thousand_seps # being the group dividers) of numbers and # monetary quantities. The integers' meanings: # 255 means no more grouping, 0 means repeat # the previous grouping, 1-254 means use that # as the current grouping. Grouping goes from # right to left (low to high digits). In the # below we cheat slightly by never using anything # else than the first grouping (whatever that is). my @grouping; if ($grouping) { @grouping = unpack( "C*", $grouping ); } else { @grouping = (3); } ## FIXME: Why don't the defaults work already? my $formatter = new Number::Format( -decimal_point => $decimal_point, -thousands_sep => $thousands_sep, # -grouping => $grouping[0] ); ###################### USER CONFIGURATIONS ############################ ## The name of the group to do stats for my $newsgroup_name = $ARGV[0]; $newsgroup_name // usage(); ## Check for removal flags my $ix; my $j; my %skipSec; my @skiplist; my $args = @ARGV; for ( $ix = 1 ; $ix < $args ; $ix++ ) { $j = $ix + 1; if ( $ARGV[$ix] eq "-x" ) { @skiplist = split( ",", $ARGV[$j] ); } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) { @skiplist = split( ",", $1 ); } } foreach (@skiplist) { $skipSec{$_} = 1; } ## Leafnode users will want /var/spool/news for this variable. my $news = "/var/spool/news/"; ## Number of top or bottom posters to show my $topposters = 20; ## Number of threads we want to know about my $topthreads = 20; ## Number of cross-posted threads to show my $topcrossposts = 10; ## Number of agents we list my $topagents = 10; ## Number of time zones to show my $toptz = 10; ###################### DATA STRUCTURES ###################### my $group = $newsgroup_name; $group =~ s!\.!/!g; my %data; # name, count, agent, total, orig, quoted my $totsize = 0; # holds total sizes of all files my %crossposts; # group, count my %threads; # subject, count my $replies = 0; # total no. of replies my $origposts = 0; # total no. of original posts my %tz; # timezones by count my $earliest; # earliest article we have found my $latest; # latest article we have found my $totheader = 0; # total size of header material my $totbody = 0; # total size of body material my $totsig = 0; # total size of sig material my $totquoted = 0; # total size of quoted material my $totorig = 0; # total size of original material my $totalposts; # total no. of posts considered my %distinct_agent; ## Used to hold counts of User Agents used my %agents = ( "Compuserver" => 0, "Foorum" => 0, "Forte Agent" => 0, "Forte Free Agent" => 0, "Gnus" => 0, "KNode" => 0, "MacSOUP" => 0, "MT-NewsWatcher" => 0, "MicroPlanet Gravity" => 0, "Microsoft Outlook Express" => 0, "Microsoft Windows Mail" => 0, "Mozilla" => 0, "News Rover" => 0, "NN" => 0, "Pan" => 0, "rn" => 0, "slrn" => 0, "Sylpheed" => 0, "tin" => 0, "VSoup" => 0, "WebTV" => 0, "Xnews" => 0, ); my $datetime_parser = DateTime::Format::Mail->new(); $datetime_parser->loose(); my $today = DateTime->today( time_zone => 'UTC' ); my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); my $start = int $prev_month->strftime('%s'); my $numdays = int DateTime->last_day_of_month( year => $prev_month->year(), month => $prev_month->month(), time_zone => $prev_month->time_zone(), )->day(); my $end = int $today->clone()->set_day(1)->strftime('%s'); dmsg( $start, " to ", $end ) if DEBUG; chdir("$news$group") or die __x( "Can't cd to {newsgroup}: {error}\n", newsgroup => "$news$group", error => $! ); opendir( DIR, "." ) or die __x( "Can't open {newsgroup}: {error}\n", newsgroup => "$news$group", error => $! ); while ( defined( my $filename = readdir(DIR) ) ) { next unless -f $filename; # only want real files next if ( $filename eq ".overview" ); # real articles only get_article($filename); # read in the article } closedir(DIR); # finished with the directory dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; ## Post-processing count_agents(); # count agents, collapsing versions fix_percent(); write_data(); display_results(); ######################################## ## Get current article's header and body ######################################## sub get_article { my $filename = shift; open( my $FILE, '<', $filename ) or die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); my $msg = Mail::Message->read($FILE); my $timestamp = $msg->timestamp(); my $date = $msg->study('Date'); ## Disregard article if timestamp is not in range dmsg($timestamp) if DEBUG; if ( $timestamp < $start || $timestamp >= $end ) { dmsg("Posting on $date ignored.") if DEBUG; return; } $totalposts++; # bump count of articles considered ## DEBUG dmsg($date) if DEBUG; ## get stats about the file itself my $filesize = -s $filename; # get total size of file $totsize += $filesize; # bump total sizes of all files if ( ( not defined $earliest ) || $timestamp < $earliest ) { $earliest = $timestamp; } elsif ( ( not defined $latest ) || $timestamp > $latest ) { $latest = $timestamp; } #print "timestamp: $timestamp\n"; ## count header size $totheader += $msg->head()->size(); ## get the poster's name (MIME-decoded, in UTF-8) my $poster = $msg->study('From'); if ( defined $poster ) { ## Convert old to new format $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; ## Collapse whitespace $poster =~ s/\s+/ /g; ## Remove outer quotes; TODO: observe RFC 5322 strictly $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; ## DEBUG dmsg($poster) if DEBUG; ## seen this one before? if ( !defined( $data{$poster} ) ) { $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field $data{$poster}{'orig'} = 0; $data{$poster}{'quoted'} = 0; } $data{$poster}{'count'}++; # bump count for this poster $data{$poster}{'size'} += $filesize; # total size of file ## The User-Agent and/or X-Newsreader fields ## for User-Agent by poster my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); if ( defined $ua ) { $data{$poster}{'agent'} = $ua; ## DEBUG dmsg($ua) if DEBUG; } ## The User Agent for User-Agent by number of posts get_agent($msg); ## Get all cross-posted newsgroups for ( split( /,/, $msg->study('Newsgroups') ) ) { $crossposts{$_}++; # bump count for each } ## Get threads my $thread = $msg->study('Subject'); $thread =~ s/^\s*re:\s*//i; # Remove Re: or re: at the start $thread =~ s/\s*\(was:\s*.*\)\s*$//i; # Remove (was: ...) at the end $thread =~ s/\s+/ /g; # collapse whitespace $threads{$thread}{'count'}++; # bump count of this subject $threads{$thread}{'size'} += $filesize; # bump bytes for this thread ## Is this an original post or a reply? if ( defined $msg->study('References') ) { $replies++; } else { $origposts++; } ## Get the time zone my $datetime = $datetime_parser->parse_datetime($date); my $tz = $datetime->strftime('%z'); $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; $tz{$tz}++; ## DEBUG dmsg($tz) if DEBUG; #### Now analyse the body text #### my $body = $msg->body(); my $insig = 0; my @body = $body->lines; for (@body) { $totbody += length($_); # bump total body size next if (m{^$>}o); # don't count blank lines in body if ( $insig == 1 ) { # bump total sig size $totsig += length($_); } ## are we in a quote line? ## Bill Unruh uses ] quotes, and another poster uses :: elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) { ## bump count of quoted chrs $data{$poster}{'quoted'} += length($_); $totquoted += length($_); } elsif (/^-- $/) { $insig = 1; } else { ## We must be processing an original line $data{$poster}{'orig'} += length($_); # bump count of original chrs $totorig += length($_); } } # end for (@body) } close($FILE); } sub get_agent { my $msg = shift; my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') // $msg->study('X-Mailer'); if ( not defined $ua ) { my $org = $msg->study('Organization'); if ( defined $org and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) { $ua = $org; } elsif ( $msg->study('Message-ID') =~ /pine/i ) { $ua = "Pine"; } } ## Hopefully found UA, else set to unknown if ( not defined $ua ) { $ua = __ "unknown"; } $ua = clean($ua); my $raw = $ua; my $agent = $raw; ## strip http if ( $raw =~ /.*http.*/ ) { $raw =~ s!posted via!!i; $raw =~ s!http://!!g; $raw =~ s!/!!g; $raw =~ s! !!g; } ## Fix Outlook from Mac if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; } ## Pick out the popular agents if ( $raw =~ /(outlook express)/i || $raw =~ /(windows mail)/i || $raw =~ /(microplanet gravity)/i || $raw =~ /(news rover)/i || $raw =~ /(forte agent)/i || $raw =~ /(forte free agent)/i ) { $agent = $1; } elsif ( $raw =~ /^( pan |sylpheed |slrn |mozilla |knode |tin |hamster |xrn |xnews |aol |gnus |krn |macsoup |messenger |openxp |pine |thoth |turnpike |winvn |vsoup |google |supernews |nn |rn |007 |webtv |compuserve )/ix ) { $agent = $1; } else { ## Clean up unknown agents if ( $raw =~ m!^(.*?)/! ) { $agent = $1; } elsif ( $raw =~ /^(\w*)\d.*/ ) { $agent = $1; } } $distinct_agent{$agent}++; return $agent; } ## get_agent ######################################### ## Count the User-Agents used, collapsing ## different versions into one per agent. ######################################### sub count_agents { POSTER: foreach my $poster ( keys %data ) { foreach my $agent_name ( keys %distinct_agent ) { # check against known ones if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) { $agents{$agent_name}++; next POSTER; } } $agents{ $data{$poster}{'agent'} }++; } } # count_agents ############################################# ## Set orig/total percentages for all posters ############################################# sub fix_percent { foreach my $poster ( keys %data ) { my $percent = 100; if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) { $percent = $data{$poster}{'orig'} * 100 / ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ } elsif ( $data{$poster}{'orig'} == 0 ) { $percent = 0; } $data{$poster}{'percent'} = $percent; } } ## fix_percent ################################## ## Write data structures to a file ################################## sub write_data { open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) or die __x( "Can't create XDATA: {error}\n", error => $! ); print $OUTF "Data collected from $newsgroup_name\n\n"; print $OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; foreach my $name ( keys %data ) { print $OUTF "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; } print $OUTF "============================================================================\n"; print $OUTF "Thread subjects\n"; print $OUTF "----------------------------------------------------------------------------\n"; foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) { print $OUTF "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; } print $OUTF "============================================================================\n"; print $OUTF "Cross-posts\n"; print $OUTF "----------------------------------------------------------------------------\n"; foreach my $name ( sort keys %crossposts ) { print $OUTF "$name : $crossposts{$name}\n"; } print $OUTF "============================================================================\n"; print $OUTF "User agents\n"; print $OUTF "----------------------------------------------------------------------------\n"; foreach my $name ( sort keys %agents ) { print $OUTF "$name : $agents{$name}\n"; } print $OUTF "============================================================================\n"; print $OUTF "Time zones\n"; print $OUTF "----------------------------------------------------------------------------\n"; foreach my $name ( sort keys %tz ) { print $OUTF "$name : $tz{$name}\n"; } close $OUTF; } # write_data sub display_results { #################### DISPLAY RESULTS ##################### print "=" x 76, "\n"; printf "%s\n", centred( __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), 76 ); print "=" x 76, "\n"; printf "%s\n", centred( __( "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)" ), 76 ); print "\n\n"; printf __"Total posts considered: %s over %d days\n", $formatter->format_number($totalposts), $formatter->format_number($numdays); my $time_locale = setlocale(LC_TIME); my $earliest_datetime = DateTime->from_epoch( epoch => $earliest, locale => $time_locale, time_zone => 'UTC', ); my $latest_datetime = DateTime->from_epoch( epoch => $latest, locale => $time_locale, time_zone => 'UTC', ); my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; printf __"Earliest article: %s\n", $earliest_datetime->strftime($datetime_format); printf __"Latest article: %s\n", $latest_datetime->strftime($datetime_format); printf __"Original articles: %s; replies: %s\n", $formatter->format_number($origposts), $formatter->format_number($replies); printf __"Total size of posts: %s bytes (%s)" . "\n", $formatter->format_number($totsize), $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); printf __"Average %s articles per day, %s per day, %s bytes per article\n", $formatter->format_number( int( $totalposts / $numdays ) ), $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), $formatter->format_number( int( $totsize / $totalposts ) ); my $count = keys %data; printf __"Total headers: %s; bodies: %s\n", $formatter->format_bytes( $totheader, ( 'precision' => 1, 'mode' => 'iec' ) ), $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); printf __ "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", $formatter->format_bytes( $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) ), $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); printf __"Total number of posters: %s, average %s per poster\n", $formatter->format_number($count), $formatter->format_bytes( $totsize / $count, ( 'precision' => 1, 'mode' => 'iec' ) ); $count = keys %threads; printf __"Total number of threads: %s, average %s per thread\n", $formatter->format_number($count), $formatter->format_bytes( $totsize / $count, ( 'precision' => 1, 'mode' => 'iec' ) ); printf __"Total number of user agents: %d\n", $formatter->format_number( scalar keys %agents ); print "\n", "=" x 76, "\n"; ######################################## ## Show posters by article count Sec 1; ######################################## unless ( $skipSec{1} ) { if ( keys %data < $topposters ) { $count = keys %data; } else { $count = $topposters; } printf "%s\n", centred( __x( "Top {count} posters by number of articles", count => $topposters ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) { my $name = substr( $poster, 0, 65 ); printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), $data{$poster}{count}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ###################################### ## Show posters by size in KiB Sec 2; ###################################### unless ( $skipSec{2} ) { if ( keys %data < $topposters ) { $count = keys %data; } else { $count = $topposters; } printf "%s\n", centred( __x( "Top {count} posters by article size in KiB", count => $topposters ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) { my $name = substr( $poster, 0, 62 ); printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), $data{$poster}{size} / 1024; #/ last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ##################################### ## Show top posters for original text ##################################### my $topposters_real = 0; unless ( $skipSec{3} ) { if ( keys %data < $topposters ) { $count = keys %data; } else { $count = $topposters; } printf "%s\n", centred( __x( "Top {count} responders by original text (> 5 posts)", count => $topposters ), 76 ); print "=" x 76, "\n"; foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data ) { next if $data{$poster}{quoted} == 0; next if $data{$poster}{count} < 5; my $name = substr( $poster, 0, 63 ); printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, rpad( $poster, 63, "." ), $data{$poster}{percent}; last if ( ++$topposters_real == $count ); } print "\n", "=" x 76, "\n"; } ######################################## ## Show bottom posters for original text ######################################## $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; unless ( $skipSec{4} ) { if ( keys %data < $topposters ) { $count = keys %data; } else { $count = $topposters; } printf "%s\n", centred( __x( "Bottom {count} responders by original text (> 5 posts)", count => $topposters ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data ) { next if $data{$poster}{quoted} == 0; next if $data{$poster}{count} < 5; my $name = substr( $poster, 0, 63 ); printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), $data{$poster}{percent}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ##################################### ## Show threads by number of articles ##################################### unless ( $skipSec{5} ) { if ( keys %threads < $topthreads ) { $count = keys %threads; } else { $count = $topthreads; } printf "%s\n", centred( __x( "Top {count} threads by no. of articles", count => $topthreads ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $thread ( sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } keys %threads ) { my $name = substr( $thread, 0, 65 ); printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), $threads{$thread}{'count'}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ############################## ## Show threads by size in KiB ############################## unless ( $skipSec{6} ) { if ( keys %threads < $topthreads ) { $count = keys %threads; } else { $count = $topthreads; } printf "%s\n", centred( __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $thread ( sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } keys %threads ) { my $name = substr( $thread, 0, 65 ); printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), $threads{$thread}{'size'} / 1024; #/ last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ################################## ## Show top 10 cross-posted groups ################################## unless ( $skipSec{7} ) { delete $crossposts{"$newsgroup_name"}; # don't include ours if ( keys %crossposts < $topcrossposts ) { $count = keys %crossposts; } else { $count = $topcrossposts; } printf "%s\n", centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) { printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), $crossposts{$name}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ######################### ## Show agents and counts ######################### unless ( $skipSec{8} ) { if ( keys %agents < $topagents ) { $count = keys %agents; } else { $count = $topagents; } printf "%s\n", centred( __x( "Top {count} user agents by poster", count => $topagents ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) { printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), $agents{$agent}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ####################### ## Show distinct agents ####################### unless ( $skipSec{9} ) { if ( keys %distinct_agent < $topagents ) { $count = keys %distinct_agent; } else { $count = $topagents; } printf "%s\n", centred( __x( "Top {count} user agents by number of posts", count => $topagents ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } keys %distinct_agent ) { printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), $distinct_agent{$agent}, ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } ############################ ## Show timezones and counts ############################ unless ( $skipSec{10} ) { if ( keys %tz < $toptz ) { $count = keys %tz; } else { $count = $toptz; } printf "%s\n", centred( __x( "Top {count} time zones", count => $toptz ), 76 ); print "=" x 76, "\n"; my $i = 0; foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) { printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; last if ( ++$i == $count ); } print "\n", "=" x 76, "\n"; } } ## helper subs ############################### ## Right pad a string with '.'s ############################### sub rpad { ## Get text to pad, length to pad, pad chr my ( $text, $pad_len, $pad_chr ) = @_; ## DEBUG printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; if ( length($text) > $pad_len ) { $text = substr( $text, 0, $pad_len ); } my $padded = $text . $pad_chr x ( $pad_len - length($text) ); return $padded; } ################## ## Centre a string ################## sub centred { my ( $text, $width ) = @_; # text to centre, size of field to centre in my $pad_len = ( $width - length($text) ) / 2; #/ my $centred = " " x $pad_len . $text; return $centred; } ########################### ## Put commas into a number ########################### sub commify { local $_ = shift; my $number = $_; $_ = int; # Chop non-integer part 1 while s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; my $int_part = $_; my $real_part = ''; if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) { $real_part = $1; } return $int_part . $real_part; } ################################################################ ## Returns a string with leading and trailing whitespace removed ################################################################ sub clean { my $dirty = shift; my $clean = $dirty; $clean =~ s/^\s+|\s+$//g; return $clean; } sub usage { print __ "usage: newsstat.pl NEWS.GROUP", "\n"; exit 1; } sub dmsg { print STDERR @_, "\n"; } sub dmsg2 { my ( $level, @msg ) = @_; print STDERR @msg, "\n" if $level >= DEBUG; }