Rev 23 | Rev 25 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 8 | PointedEar | 1 | #!/usr/bin/env perl |
| 5 | PointedEar | 2 | use strict; |
| 6 | PointedEar | 3 | use warnings; |
| 23 | PointedEar | 4 | require 5.004; |
| 22 | PointedEar | 5 | |
| 6 | #use diagnostics; |
||
| 6 | PointedEar | 7 | use utf8; |
| 8 | use Encode; |
||
| 9 | |||
| 22 | PointedEar | 10 | use constant DEBUG => 0; |
| 11 | |||
| 24 | PointedEar | 12 | ## newsstat.pl |
| 13 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de> |
||
| 14 | ## Based on work by Garry Knight et al. |
||
| 15 | ## |
||
| 16 | ## This program is free software: you can redistribute it and/or modify |
||
| 17 | ## it under the terms of the GNU General Public License as published by |
||
| 18 | ## the Free Software Foundation, either version 3 of the License, or |
||
| 19 | ## (at your option) any later version. |
||
| 20 | ## |
||
| 21 | ## This program is distributed in the hope that it will be useful, |
||
| 22 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 23 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 24 | ## GNU General Public License for more details. |
||
| 25 | ## |
||
| 26 | ## You should have received a copy of the GNU General Public License |
||
| 27 | ## along with this program. If not, see <http://www.gnu.org/licenses/>. |
||
| 28 | |||
| 13 | PointedEar | 29 | ## Print out all text to STDOUT UTF-8 encoded |
| 30 | binmode STDOUT, ':encoding(UTF-8)'; |
||
| 22 | PointedEar | 31 | binmode STDERR, ':encoding(UTF-8)'; |
| 5 | PointedEar | 32 | |
| 23 | PointedEar | 33 | ## L10n |
| 34 | use locale ':not_characters'; |
||
| 35 | |||
| 36 | # setlocale( LC_MESSAGES, '' ); |
||
| 37 | require Number::Format; |
||
| 38 | |||
| 39 | ## i18n |
||
| 40 | ## FIXME: Automatically include resolved '.' in @INC |
||
| 22 | PointedEar | 41 | # print join "\n", @INC; |
| 5 | PointedEar | 42 | |
| 22 | PointedEar | 43 | use Locale::TextDomain ('de.pointedears.newsstat'); |
| 23 | PointedEar | 44 | use POSIX ('locale_h'); |
| 22 | PointedEar | 45 | use Locale::Messages qw (bind_textdomain_filter |
| 23 | PointedEar | 46 | bind_textdomain_codeset |
| 47 | turn_utf_8_on); |
||
| 48 | |||
| 49 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
||
| 22 | PointedEar | 50 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
| 23 | PointedEar | 51 | |
| 22 | PointedEar | 52 | require Mail::Message; |
| 53 | require DateTime; |
||
| 54 | require DateTime::Format::Mail; |
||
| 23 | PointedEar | 55 | |
| 56 | # See comments in previous example |
||
| 57 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
||
| 58 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
||
| 59 | 'decimal_point' }; |
||
| 60 | |||
| 61 | # Apply defaults if values are missing |
||
| 62 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
||
| 63 | $thousands_sep = ' ' unless $thousands_sep; |
||
| 64 | |||
| 65 | # grouping and mon_grouping are packed lists |
||
| 66 | # of small integers (characters) telling the |
||
| 67 | # grouping (thousand_seps and mon_thousand_seps |
||
| 68 | # being the group dividers) of numbers and |
||
| 69 | # monetary quantities. The integers' meanings: |
||
| 70 | # 255 means no more grouping, 0 means repeat |
||
| 71 | # the previous grouping, 1-254 means use that |
||
| 72 | # as the current grouping. Grouping goes from |
||
| 73 | # right to left (low to high digits). In the |
||
| 74 | # below we cheat slightly by never using anything |
||
| 75 | # else than the first grouping (whatever that is). |
||
| 76 | my @grouping; |
||
| 77 | if ($grouping) |
||
| 78 | { |
||
| 79 | @grouping = unpack( "C*", $grouping ); |
||
| 80 | } |
||
| 81 | else |
||
| 82 | { |
||
| 83 | @grouping = (3); |
||
| 84 | } |
||
| 85 | |||
| 86 | ## FIXME: Why don't the defaults work already? |
||
| 87 | my $formatter = new Number::Format( |
||
| 88 | -decimal_point => $decimal_point, |
||
| 89 | -thousands_sep => $thousands_sep, |
||
| 90 | |||
| 91 | # -grouping => $grouping[0] |
||
| 92 | ); |
||
| 93 | |||
| 5 | PointedEar | 94 | ###################### USER CONFIGURATIONS ############################ |
| 95 | |||
| 13 | PointedEar | 96 | ## The name of the group to do stats for |
| 5 | PointedEar | 97 | my $newsgroup_name = $ARGV[0]; |
| 22 | PointedEar | 98 | $newsgroup_name or usage(); |
| 5 | PointedEar | 99 | |
| 13 | PointedEar | 100 | ## Check for removal flags |
| 5 | PointedEar | 101 | my $ix; |
| 102 | my $j; |
||
| 103 | my %skipSec; |
||
| 104 | my @skiplist; |
||
| 105 | my $args = @ARGV; |
||
| 7 | PointedEar | 106 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
| 107 | { |
||
| 108 | $j = $ix + 1; |
||
| 109 | if ( $ARGV[$ix] eq "-x" ) |
||
| 110 | { |
||
| 111 | @skiplist = split( ",", $ARGV[$j] ); |
||
| 112 | } |
||
| 113 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
||
| 114 | { |
||
| 115 | @skiplist = split( ",", $1 ); |
||
| 116 | } |
||
| 5 | PointedEar | 117 | } |
| 7 | PointedEar | 118 | foreach (@skiplist) |
| 119 | { |
||
| 5 | PointedEar | 120 | $skipSec{$_} = 1; |
| 121 | } |
||
| 122 | |||
| 13 | PointedEar | 123 | ## Leafnode users will want /var/spool/news for this variable. |
| 5 | PointedEar | 124 | my $news = "/var/spool/news/"; |
| 125 | |||
| 22 | PointedEar | 126 | ## Number of top or bottom posters to show |
| 127 | my $topposters = 20; |
||
| 5 | PointedEar | 128 | |
| 13 | PointedEar | 129 | ## Number of threads we want to know about |
| 5 | PointedEar | 130 | my $topthreads = 20; |
| 131 | |||
| 13 | PointedEar | 132 | ## Number of cross-posted threads to show |
| 5 | PointedEar | 133 | my $topcrossposts = 10; |
| 134 | |||
| 22 | PointedEar | 135 | ## Number of agents we list |
| 136 | my $topagents = 10; |
||
| 137 | |||
| 13 | PointedEar | 138 | ## Number of time zones to show |
| 5 | PointedEar | 139 | my $toptz = 10; |
| 140 | |||
| 141 | ###################### DATA STRUCTURES ###################### |
||
| 142 | my $group = $newsgroup_name; |
||
| 143 | $group =~ s!\.!/!g; |
||
| 22 | PointedEar | 144 | my %data; # name, count, agent, total, orig, quoted |
| 145 | my $totsize = 0; # holds total sizes of all files |
||
| 146 | my %crossposts; # group, count |
||
| 147 | my %threads; # subject, count |
||
| 148 | my $replies = 0; # total no. of replies |
||
| 149 | my $origposts = 0; # total no. of original posts |
||
| 150 | my %tz; # timezones by count |
||
| 151 | my $earliest; # earliest article we have found |
||
| 152 | my $latest; # latest article we have found |
||
| 153 | my $totheader = 0; # total size of header material |
||
| 154 | my $totbody = 0; # total size of body material |
||
| 155 | my $totsig = 0; # total size of sig material |
||
| 156 | my $totquoted = 0; # total size of quoted material |
||
| 157 | my $totorig = 0; # total size of original material |
||
| 158 | my $totalposts; # total no. of posts considered |
||
| 5 | PointedEar | 159 | my %distinct_agent; |
| 13 | PointedEar | 160 | |
| 161 | ## Used to hold counts of User Agents used |
||
| 162 | my %agents = ( |
||
| 163 | "Compuserver" => 0, |
||
| 164 | "Foorum" => 0, |
||
| 7 | PointedEar | 165 | "Forte Agent" => 0, |
| 166 | "Forte Free Agent" => 0, |
||
| 13 | PointedEar | 167 | "Gnus" => 0, |
| 168 | "KNode" => 0, |
||
| 169 | "MacSOUP" => 0, |
||
| 170 | "MT-NewsWatcher" => 0, |
||
| 7 | PointedEar | 171 | "MicroPlanet Gravity" => 0, |
| 172 | "Microsoft Outlook Express" => 0, |
||
| 13 | PointedEar | 173 | "Microsoft Windows Mail" => 0, |
| 174 | "Mozilla" => 0, |
||
| 175 | "News Rover" => 0, |
||
| 176 | "NN" => 0, |
||
| 177 | "Pan" => 0, |
||
| 178 | "rn" => 0, |
||
| 7 | PointedEar | 179 | "slrn" => 0, |
| 13 | PointedEar | 180 | "Sylpheed" => 0, |
| 7 | PointedEar | 181 | "tin" => 0, |
| 13 | PointedEar | 182 | "VSoup" => 0, |
| 7 | PointedEar | 183 | "WebTV" => 0, |
| 22 | PointedEar | 184 | "Xnews" => 0, |
| 13 | PointedEar | 185 | ); |
| 5 | PointedEar | 186 | |
| 22 | PointedEar | 187 | my $datetime_parser = DateTime::Format::Mail->new(); |
| 188 | $datetime_parser->loose(); |
||
| 5 | PointedEar | 189 | |
| 22 | PointedEar | 190 | my $today = DateTime->today( time_zone => 'UTC' ); |
| 191 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
||
| 192 | my $start = int $prev_month->strftime('%s'); |
||
| 193 | my $numdays = int DateTime->last_day_of_month( |
||
| 194 | year => $prev_month->year(), |
||
| 195 | month => $prev_month->month(), |
||
| 196 | time_zone => $prev_month->time_zone(), |
||
| 197 | )->day(); |
||
| 198 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
||
| 199 | |||
| 200 | dmsg( $start, " to ", $end ) if DEBUG; |
||
| 201 | |||
| 202 | chdir("$news$group") |
||
| 203 | or die __x( |
||
| 204 | "Can't cd to {newsgroup}: {error}\n", |
||
| 205 | newsgroup => "$news$group", |
||
| 206 | error => $! |
||
| 207 | ); |
||
| 208 | opendir( DIR, "." ) |
||
| 209 | or die __x( |
||
| 210 | "Can't open {newsgroup}: {error}\n", |
||
| 211 | newsgroup => "$news$group", |
||
| 212 | error => $! |
||
| 213 | ); |
||
| 214 | |||
| 215 | while ( defined( my $filename = readdir(DIR) ) ) |
||
| 7 | PointedEar | 216 | { |
| 22 | PointedEar | 217 | next unless -f $filename; # only want real files |
| 218 | next if ( $filename eq ".overview" ); # real articles only |
||
| 219 | |||
| 220 | get_article($filename); # read in the article |
||
| 5 | PointedEar | 221 | } |
| 22 | PointedEar | 222 | closedir(DIR); # finished with the directory |
| 7 | PointedEar | 223 | |
| 22 | PointedEar | 224 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
| 225 | |||
| 13 | PointedEar | 226 | ## Post-processing |
| 22 | PointedEar | 227 | count_agents(); # count agents, collapsing versions |
| 228 | fix_percent(); |
||
| 5 | PointedEar | 229 | |
| 22 | PointedEar | 230 | write_data(); |
| 231 | display_results(); |
||
| 5 | PointedEar | 232 | |
| 13 | PointedEar | 233 | ######################################## |
| 22 | PointedEar | 234 | ## Get current article's header and body |
| 13 | PointedEar | 235 | ######################################## |
| 22 | PointedEar | 236 | sub get_article |
| 7 | PointedEar | 237 | { |
| 22 | PointedEar | 238 | my $filename = shift; |
| 5 | PointedEar | 239 | |
| 22 | PointedEar | 240 | open( my $FILE, '<', $filename ) |
| 241 | or |
||
| 242 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
||
| 243 | my $msg = Mail::Message->read($FILE); |
||
| 244 | my $timestamp = $msg->timestamp(); |
||
| 245 | my $date = $msg->study('Date'); |
||
| 5 | PointedEar | 246 | |
| 22 | PointedEar | 247 | ## Disregard article if timestamp is not in range |
| 248 | dmsg($timestamp) if DEBUG; |
||
| 249 | if ( $timestamp < $start or $timestamp >= $end ) |
||
| 7 | PointedEar | 250 | { |
| 22 | PointedEar | 251 | dmsg("Posting on $date ignored.") if DEBUG; |
| 252 | return; |
||
| 7 | PointedEar | 253 | } |
| 5 | PointedEar | 254 | |
| 22 | PointedEar | 255 | $totalposts++; # bump count of articles considered |
| 5 | PointedEar | 256 | |
| 22 | PointedEar | 257 | ## DEBUG |
| 258 | dmsg($date) if DEBUG; |
||
| 13 | PointedEar | 259 | |
| 22 | PointedEar | 260 | ## get stats about the file itself |
| 261 | my $filesize = -s $filename; # get total size of file |
||
| 262 | $totsize += $filesize; # bump total sizes of all files |
||
| 5 | PointedEar | 263 | |
| 22 | PointedEar | 264 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
| 7 | PointedEar | 265 | { |
| 22 | PointedEar | 266 | $earliest = $timestamp; |
| 7 | PointedEar | 267 | } |
| 22 | PointedEar | 268 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
| 7 | PointedEar | 269 | { |
| 22 | PointedEar | 270 | $latest = $timestamp; |
| 7 | PointedEar | 271 | } |
| 13 | PointedEar | 272 | |
| 22 | PointedEar | 273 | #print "timestamp: $timestamp\n"; |
| 5 | PointedEar | 274 | |
| 22 | PointedEar | 275 | ## count header size |
| 276 | $totheader += $msg->head()->size(); |
||
| 5 | PointedEar | 277 | |
| 22 | PointedEar | 278 | ## get the poster's name (MIME-decoded, in UTF-8) |
| 279 | my $poster = $msg->study('From'); |
||
| 280 | if ( defined $poster ) |
||
| 7 | PointedEar | 281 | { |
| 22 | PointedEar | 282 | ## Convert old to new format |
| 283 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
||
| 5 | PointedEar | 284 | |
| 22 | PointedEar | 285 | ## Collapse whitespace |
| 286 | $poster =~ s/\s+/ /g; |
||
| 5 | PointedEar | 287 | |
| 22 | PointedEar | 288 | ## Remove outer quotes; TODO: observe RFC 5322 strictly |
| 289 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
||
| 5 | PointedEar | 290 | |
| 22 | PointedEar | 291 | ## DEBUG |
| 292 | dmsg($poster) if DEBUG; |
||
| 7 | PointedEar | 293 | |
| 22 | PointedEar | 294 | ## seen this one before? |
| 295 | if ( !defined( $data{$poster} ) ) |
||
| 296 | { |
||
| 297 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
||
| 298 | $data{$poster}{'orig'} = 0; |
||
| 299 | $data{$poster}{'quoted'} = 0; |
||
| 300 | } |
||
| 301 | $data{$poster}{'count'}++; # bump count for this poster |
||
| 302 | $data{$poster}{'size'} += $filesize; # total size of file |
||
| 5 | PointedEar | 303 | |
| 22 | PointedEar | 304 | ## The User-Agent and/or X-Newsreader fields |
| 305 | ## for User-Agent by poster |
||
| 306 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
||
| 307 | if ( defined $ua ) |
||
| 7 | PointedEar | 308 | { |
| 22 | PointedEar | 309 | $data{$poster}{'agent'} = $ua; |
| 310 | |||
| 311 | ## DEBUG |
||
| 312 | dmsg($ua) if DEBUG; |
||
| 7 | PointedEar | 313 | } |
| 5 | PointedEar | 314 | |
| 22 | PointedEar | 315 | ## The User Agent for User-Agent by number of posts |
| 316 | get_agent($msg); |
||
| 5 | PointedEar | 317 | |
| 22 | PointedEar | 318 | ## Get all cross-posted newsgroups |
| 319 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
||
| 320 | { |
||
| 321 | $crossposts{$_}++; # bump count for each |
||
| 322 | } |
||
| 5 | PointedEar | 323 | |
| 22 | PointedEar | 324 | ## Get threads |
| 325 | my $thread = $msg->study('Subject'); |
||
| 326 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
||
| 327 | $thread =~ s/\s+/ /g; # collapse whitespace |
||
| 328 | $threads{$thread}{'count'}++; # bump count of this subject |
||
| 329 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
||
| 5 | PointedEar | 330 | |
| 22 | PointedEar | 331 | ## Is this an original post or a reply? |
| 332 | if ( defined $msg->study('References') ) |
||
| 7 | PointedEar | 333 | { |
| 22 | PointedEar | 334 | $replies++; |
| 7 | PointedEar | 335 | } |
| 22 | PointedEar | 336 | else |
| 7 | PointedEar | 337 | { |
| 22 | PointedEar | 338 | $origposts++; |
| 7 | PointedEar | 339 | } |
| 22 | PointedEar | 340 | |
| 341 | ## Get the time zone |
||
| 342 | my $datetime = $datetime_parser->parse_datetime($date); |
||
| 343 | my $tz = $datetime->strftime('%z'); |
||
| 344 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
||
| 345 | $tz{$tz}++; |
||
| 346 | |||
| 347 | ## DEBUG |
||
| 348 | dmsg($tz) if DEBUG; |
||
| 349 | |||
| 350 | #### Now analyse the body text #### |
||
| 351 | my $body = $msg->body(); |
||
| 352 | |||
| 353 | my $insig = 0; |
||
| 354 | my @body = $body->lines; |
||
| 355 | for (@body) |
||
| 7 | PointedEar | 356 | { |
| 22 | PointedEar | 357 | $totbody += length($_); # bump total body size |
| 358 | next if (m{^$>}o); # don't count blank lines in body |
||
| 359 | if ( $insig == 1 ) |
||
| 360 | { |
||
| 361 | |||
| 362 | # bump total sig size |
||
| 363 | $totsig += length($_); |
||
| 364 | } |
||
| 365 | ## are we in a quote line? |
||
| 366 | ## Bill Unruh uses ] quotes, and another poster uses :: |
||
| 367 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
||
| 368 | { |
||
| 369 | ## bump count of quoted chrs |
||
| 370 | $data{$poster}{'quoted'} += length($_); |
||
| 371 | $totquoted += length($_); |
||
| 372 | } |
||
| 373 | elsif (/^-- $/) |
||
| 374 | { |
||
| 375 | $insig = 1; |
||
| 376 | } |
||
| 377 | else |
||
| 378 | { |
||
| 379 | ## We must be processing an original line |
||
| 380 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
||
| 381 | $totorig += length($_); |
||
| 382 | } |
||
| 7 | PointedEar | 383 | } |
| 22 | PointedEar | 384 | |
| 385 | # end for (@body) |
||
| 386 | } |
||
| 387 | |||
| 388 | close($FILE); |
||
| 389 | } |
||
| 390 | |||
| 391 | sub get_agent |
||
| 392 | { |
||
| 393 | my $msg = shift; |
||
| 394 | |||
| 395 | my $ua = |
||
| 396 | $msg->study('User-Agent') |
||
| 397 | or $msg->study('X-Newsreader') |
||
| 398 | or $msg->study('X-Mailer'); |
||
| 399 | if ( not defined $ua ) |
||
| 400 | { |
||
| 401 | my $org = $msg->study('Organization'); |
||
| 402 | if ( defined $org |
||
| 403 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
||
| 7 | PointedEar | 404 | { |
| 22 | PointedEar | 405 | $ua = $org; |
| 7 | PointedEar | 406 | } |
| 22 | PointedEar | 407 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
| 7 | PointedEar | 408 | { |
| 22 | PointedEar | 409 | $ua = "Pine"; |
| 410 | } |
||
| 7 | PointedEar | 411 | } |
| 5 | PointedEar | 412 | |
| 22 | PointedEar | 413 | ## Hopefully found UA, else set to unknown |
| 414 | if ( not defined $ua ) |
||
| 415 | { |
||
| 416 | $ua = __ "unknown"; |
||
| 417 | } |
||
| 5 | PointedEar | 418 | |
| 22 | PointedEar | 419 | $ua = clean($ua); |
| 420 | |||
| 421 | my $raw = $ua; |
||
| 422 | my $agent = $raw; |
||
| 423 | |||
| 424 | ## strip http |
||
| 425 | if ( $raw =~ /.*http.*/ ) |
||
| 7 | PointedEar | 426 | { |
| 22 | PointedEar | 427 | $raw =~ s!posted via!!i; |
| 428 | $raw =~ s!http://!!g; |
||
| 429 | $raw =~ s!/!!g; |
||
| 430 | $raw =~ s! !!g; |
||
| 431 | } |
||
| 5 | PointedEar | 432 | |
| 22 | PointedEar | 433 | ## Fix Outlook from Mac |
| 434 | if ( $raw =~ /^microsoft/i ) |
||
| 435 | { |
||
| 436 | $raw =~ s/-/ /g; |
||
| 437 | } |
||
| 5 | PointedEar | 438 | |
| 22 | PointedEar | 439 | ## Pick out the popular agents |
| 440 | if ( $raw =~ /(outlook express)/i |
||
| 441 | || $raw =~ /(windows mail)/i |
||
| 442 | || $raw =~ /(microplanet gravity)/i |
||
| 443 | || $raw =~ /(news rover)/i |
||
| 444 | || $raw =~ /(forte agent)/i |
||
| 445 | || $raw =~ /(forte free agent)/i ) |
||
| 446 | { |
||
| 447 | $agent = $1; |
||
| 448 | } |
||
| 449 | elsif ( |
||
| 450 | $raw =~ /^( |
||
| 5 | PointedEar | 451 | pan |
| 452 | |sylpheed |
||
| 453 | |slrn |
||
| 454 | |mozilla |
||
| 455 | |knode |
||
| 456 | |tin |
||
| 457 | |hamster |
||
| 458 | |xrn |
||
| 459 | |xnews |
||
| 460 | |aol |
||
| 461 | |gnus |
||
| 462 | |krn |
||
| 463 | |macsoup |
||
| 464 | |messenger |
||
| 465 | |openxp |
||
| 466 | |pine |
||
| 467 | |thoth |
||
| 468 | |turnpike |
||
| 469 | |winvn |
||
| 470 | |vsoup |
||
| 471 | |google |
||
| 472 | |supernews |
||
| 473 | |nn |
||
| 474 | |rn |
||
| 475 | |007 |
||
| 476 | |webtv |
||
| 477 | |compuserve |
||
| 7 | PointedEar | 478 | )/ix |
| 22 | PointedEar | 479 | ) |
| 480 | { |
||
| 481 | $agent = $1; |
||
| 482 | } |
||
| 483 | else |
||
| 484 | { |
||
| 485 | ## Clean up unknown agents |
||
| 486 | if ( $raw =~ m!^(.*?)/! ) |
||
| 7 | PointedEar | 487 | { |
| 488 | $agent = $1; |
||
| 489 | } |
||
| 22 | PointedEar | 490 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
| 7 | PointedEar | 491 | { |
| 22 | PointedEar | 492 | $agent = $1; |
| 493 | } |
||
| 494 | } |
||
| 495 | |||
| 496 | $distinct_agent{$agent}++; |
||
| 497 | return $agent; |
||
| 498 | } |
||
| 499 | ## get_agent |
||
| 500 | |||
| 501 | ######################################### |
||
| 502 | ## Count the User-Agents used, collapsing |
||
| 503 | ## different versions into one per agent. |
||
| 504 | ######################################### |
||
| 505 | sub count_agents |
||
| 506 | { |
||
| 507 | POSTER: |
||
| 508 | foreach my $poster ( keys %data ) |
||
| 509 | { |
||
| 510 | foreach my $agent_name ( keys %distinct_agent ) |
||
| 511 | { # check against known ones |
||
| 512 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
||
| 7 | PointedEar | 513 | { |
| 22 | PointedEar | 514 | $agents{$agent_name}++; |
| 515 | next POSTER; |
||
| 7 | PointedEar | 516 | } |
| 517 | } |
||
| 22 | PointedEar | 518 | $agents{ $data{$poster}{'agent'} }++; |
| 519 | } |
||
| 520 | } # count_agents |
||
| 7 | PointedEar | 521 | |
| 22 | PointedEar | 522 | ############################################# |
| 523 | ## Set orig/total percentages for all posters |
||
| 524 | ############################################# |
||
| 525 | sub fix_percent |
||
| 526 | { |
||
| 527 | foreach my $poster ( keys %data ) |
||
| 528 | { |
||
| 529 | my $percent = 100; |
||
| 530 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
||
| 531 | { |
||
| 532 | $percent = |
||
| 533 | $data{$poster}{'orig'} * 100 / |
||
| 534 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
||
| 535 | } |
||
| 536 | elsif ( $data{$poster}{'orig'} == 0 ) |
||
| 537 | { |
||
| 538 | $percent = 0; |
||
| 539 | } |
||
| 540 | $data{$poster}{'percent'} = $percent; |
||
| 5 | PointedEar | 541 | } |
| 22 | PointedEar | 542 | } |
| 543 | ## fix_percent |
||
| 5 | PointedEar | 544 | |
| 22 | PointedEar | 545 | ################################## |
| 546 | ## Write data structures to a file |
||
| 547 | ################################## |
||
| 548 | sub write_data |
||
| 549 | { |
||
| 550 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
||
| 551 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
||
| 552 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
||
| 553 | print $OUTF |
||
| 554 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
||
| 555 | foreach my $name ( keys %data ) |
||
| 7 | PointedEar | 556 | { |
| 22 | PointedEar | 557 | print $OUTF |
| 558 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
||
| 7 | PointedEar | 559 | } |
| 22 | PointedEar | 560 | print $OUTF |
| 561 | "============================================================================\n"; |
||
| 562 | print $OUTF "Thread subjects\n"; |
||
| 563 | print $OUTF |
||
| 564 | "----------------------------------------------------------------------------\n"; |
||
| 565 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
||
| 566 | { |
||
| 567 | print $OUTF |
||
| 568 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
||
| 569 | } |
||
| 570 | print $OUTF |
||
| 571 | "============================================================================\n"; |
||
| 572 | print $OUTF "Cross-posts\n"; |
||
| 573 | print $OUTF |
||
| 574 | "----------------------------------------------------------------------------\n"; |
||
| 575 | foreach my $name ( sort keys %crossposts ) |
||
| 576 | { |
||
| 577 | print $OUTF "$name : $crossposts{$name}\n"; |
||
| 578 | } |
||
| 579 | print $OUTF |
||
| 580 | "============================================================================\n"; |
||
| 581 | print $OUTF "User agents\n"; |
||
| 582 | print $OUTF |
||
| 583 | "----------------------------------------------------------------------------\n"; |
||
| 584 | foreach my $name ( sort keys %agents ) |
||
| 585 | { |
||
| 586 | print $OUTF "$name : $agents{$name}\n"; |
||
| 587 | } |
||
| 588 | print $OUTF |
||
| 589 | "============================================================================\n"; |
||
| 590 | print $OUTF "Time zones\n"; |
||
| 591 | print $OUTF |
||
| 592 | "----------------------------------------------------------------------------\n"; |
||
| 593 | foreach my $name ( sort keys %tz ) |
||
| 594 | { |
||
| 595 | print $OUTF "$name : $tz{$name}\n"; |
||
| 596 | } |
||
| 597 | close $OUTF; |
||
| 598 | } # write_data |
||
| 5 | PointedEar | 599 | |
| 22 | PointedEar | 600 | sub display_results |
| 601 | { |
||
| 602 | #################### DISPLAY RESULTS ##################### |
||
| 23 | PointedEar | 603 | println( "=" x 76 ); |
| 22 | PointedEar | 604 | printf "%s\n", |
| 605 | centred( |
||
| 606 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
||
| 607 | 76 ); |
||
| 23 | PointedEar | 608 | println( "=" x 76 ); |
| 22 | PointedEar | 609 | printf "%s\n", |
| 610 | centred( |
||
| 23 | PointedEar | 611 | __( |
| 612 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)" |
||
| 613 | ), |
||
| 22 | PointedEar | 614 | 76 |
| 615 | ); |
||
| 616 | print "\n\n"; |
||
| 23 | PointedEar | 617 | printf __"Total posts considered: %s over %d days\n", |
| 618 | $formatter->format_number($totalposts), |
||
| 619 | $formatter->format_number($numdays); |
||
| 620 | my $time_locale = setlocale(LC_TIME); |
||
| 22 | PointedEar | 621 | my $earliest_datetime = DateTime->from_epoch( |
| 23 | PointedEar | 622 | epoch => $earliest, |
| 623 | locale => $time_locale, |
||
| 22 | PointedEar | 624 | time_zone => 'UTC', |
| 625 | ); |
||
| 626 | my $latest_datetime = DateTime->from_epoch( |
||
| 23 | PointedEar | 627 | epoch => $latest, |
| 628 | locale => $time_locale, |
||
| 22 | PointedEar | 629 | time_zone => 'UTC', |
| 630 | ); |
||
| 631 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
||
| 23 | PointedEar | 632 | printf __"Earliest article: %s\n", |
| 633 | $earliest_datetime->strftime($datetime_format); |
||
| 634 | printf __"Latest article: %s\n", |
||
| 635 | $latest_datetime->strftime($datetime_format); |
||
| 636 | printf __"Original articles: %s; replies: %s\n", |
||
| 637 | $formatter->format_number($origposts), |
||
| 638 | $formatter->format_number($replies); |
||
| 639 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
||
| 640 | $formatter->format_number($totsize), |
||
| 641 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
| 642 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
||
| 643 | $formatter->format_number( int( $totalposts / $numdays ) ), |
||
| 644 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
||
| 645 | $formatter->format_number( int( $totsize / $totalposts ) ); |
||
| 646 | |||
| 22 | PointedEar | 647 | my $count = keys %data; |
| 23 | PointedEar | 648 | printf __"Total headers: %s; bodies: %s\n", |
| 649 | $formatter->format_bytes( |
||
| 650 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
||
| 651 | ), |
||
| 652 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
| 22 | PointedEar | 653 | printf __ |
| 23 | PointedEar | 654 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
| 655 | $formatter->format_bytes( |
||
| 656 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
||
| 657 | ), |
||
| 658 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
||
| 659 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
||
| 660 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
| 661 | printf __"Total number of posters: %s, average %s per poster\n", |
||
| 662 | $formatter->format_number($count), |
||
| 663 | $formatter->format_bytes( $totsize / $count, |
||
| 664 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
| 22 | PointedEar | 665 | $count = keys %threads; |
| 23 | PointedEar | 666 | printf __"Total number of threads: %s, average %s per thread\n", |
| 667 | $formatter->format_number($count), |
||
| 668 | $formatter->format_bytes( $totsize / $count, |
||
| 669 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
| 670 | printf __"Total number of user agents: %d\n", |
||
| 671 | $formatter->format_number( scalar keys %agents ); |
||
| 672 | print "\n", "=" x 76, "\n" ; |
||
| 22 | PointedEar | 673 | ######################################## |
| 674 | ## Show posters by article count Sec 1; |
||
| 675 | ######################################## |
||
| 676 | unless ( $skipSec{1} ) |
||
| 7 | PointedEar | 677 | { |
| 22 | PointedEar | 678 | if ( keys %data < $topposters ) |
| 679 | { |
||
| 680 | $count = keys %data; |
||
| 681 | } |
||
| 682 | else |
||
| 683 | { |
||
| 684 | $count = $topposters; |
||
| 685 | } |
||
| 686 | printf "%s\n", |
||
| 687 | centred( |
||
| 23 | PointedEar | 688 | __x( "Top {count} posters by number of articles", count => $topposters ), |
| 689 | 76 |
||
| 690 | ); |
||
| 22 | PointedEar | 691 | print "=" x 76, "\n"; |
| 692 | my $i = 0; |
||
| 693 | foreach |
||
| 694 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
||
| 695 | { |
||
| 696 | my $name = substr( $poster, 0, 65 ); |
||
| 697 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
||
| 698 | $data{$poster}{count}; |
||
| 699 | last if ( ++$i == $count ); |
||
| 700 | } |
||
| 701 | print "\n", "=" x 76, "\n"; |
||
| 7 | PointedEar | 702 | } |
| 22 | PointedEar | 703 | |
| 704 | ###################################### |
||
| 705 | ## Show posters by size in KiB Sec 2; |
||
| 706 | ###################################### |
||
| 707 | unless ( $skipSec{2} ) |
||
| 7 | PointedEar | 708 | { |
| 22 | PointedEar | 709 | if ( keys %data < $topposters ) |
| 710 | { |
||
| 711 | $count = keys %data; |
||
| 712 | } |
||
| 713 | else |
||
| 714 | { |
||
| 715 | $count = $topposters; |
||
| 716 | } |
||
| 717 | printf "%s\n", |
||
| 718 | centred( |
||
| 719 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
||
| 23 | PointedEar | 720 | 76 |
| 721 | ); |
||
| 22 | PointedEar | 722 | print "=" x 76, "\n"; |
| 723 | my $i = 0; |
||
| 724 | foreach |
||
| 725 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
||
| 726 | { |
||
| 727 | my $name = substr( $poster, 0, 62 ); |
||
| 728 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
||
| 729 | $data{$poster}{size} / 1024; #/ |
||
| 730 | last if ( ++$i == $count ); |
||
| 731 | } |
||
| 732 | print "\n", "=" x 76, "\n"; |
||
| 7 | PointedEar | 733 | } |
| 5 | PointedEar | 734 | |
| 22 | PointedEar | 735 | ##################################### |
| 736 | ## Show top posters for original text |
||
| 737 | ##################################### |
||
| 738 | unless ( $skipSec{3} ) |
||
| 7 | PointedEar | 739 | { |
| 22 | PointedEar | 740 | if ( keys %data < $topposters ) |
| 741 | { |
||
| 742 | $count = keys %data; |
||
| 743 | } |
||
| 744 | else |
||
| 745 | { |
||
| 746 | $count = $topposters; |
||
| 747 | } |
||
| 748 | printf "%s\n", |
||
| 749 | centred( |
||
| 750 | __x( |
||
| 751 | "Top {count} responders by original text (> 5 posts)", |
||
| 752 | count => $topposters |
||
| 753 | ), |
||
| 754 | 76 |
||
| 755 | ); |
||
| 756 | print "=" x 76, "\n"; |
||
| 757 | my $i = 0; |
||
| 758 | foreach my $poster ( |
||
| 759 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
||
| 760 | keys %data |
||
| 761 | ) |
||
| 762 | { |
||
| 763 | next if $data{$poster}{quoted} == 0; |
||
| 764 | next if $data{$poster}{count} < 5; |
||
| 765 | my $name = substr( $poster, 0, 63 ); |
||
| 766 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
||
| 767 | $data{$poster}{percent}; |
||
| 768 | last if ( ++$i == $count ); |
||
| 769 | } |
||
| 770 | print "\n", "=" x 76, "\n"; |
||
| 7 | PointedEar | 771 | } |
| 5 | PointedEar | 772 | |
| 22 | PointedEar | 773 | ######################################## |
| 774 | ## Show bottom posters for original text |
||
| 775 | ######################################## |
||
| 776 | unless ( $skipSec{4} ) |
||
| 7 | PointedEar | 777 | { |
| 22 | PointedEar | 778 | if ( keys %data < $topposters ) |
| 7 | PointedEar | 779 | { |
| 22 | PointedEar | 780 | $count = keys %data; |
| 781 | } |
||
| 782 | else |
||
| 783 | { |
||
| 784 | $count = $topposters; |
||
| 785 | } |
||
| 786 | printf "%s\n", |
||
| 787 | centred( |
||
| 788 | __x( |
||
| 789 | "Bottom {count} responders by original text (> 5 posts)", |
||
| 790 | count => $topposters |
||
| 791 | ), |
||
| 792 | 76 |
||
| 793 | ); |
||
| 794 | print "=" x 76, "\n"; |
||
| 795 | my $i = 0; |
||
| 796 | foreach my $poster ( |
||
| 797 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
||
| 798 | keys %data |
||
| 799 | ) |
||
| 800 | { |
||
| 801 | next if $data{$poster}{quoted} == 0; |
||
| 802 | next if $data{$poster}{count} < 5; |
||
| 803 | my $name = substr( $poster, 0, 63 ); |
||
| 804 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
||
| 805 | $data{$poster}{percent}; |
||
| 806 | last if ( ++$i == $count ); |
||
| 807 | } |
||
| 808 | print "\n", "=" x 76, "\n"; |
||
| 809 | } |
||
| 5 | PointedEar | 810 | |
| 22 | PointedEar | 811 | ##################################### |
| 812 | ## Show threads by number of articles |
||
| 813 | ##################################### |
||
| 814 | unless ( $skipSec{5} ) |
||
| 815 | { |
||
| 816 | if ( keys %threads < $topthreads ) |
||
| 817 | { |
||
| 818 | $count = keys %threads; |
||
| 7 | PointedEar | 819 | } |
| 22 | PointedEar | 820 | else |
| 821 | { |
||
| 822 | $count = $topthreads; |
||
| 7 | PointedEar | 823 | } |
| 22 | PointedEar | 824 | printf "%s\n", |
| 23 | PointedEar | 825 | centred( |
| 826 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
||
| 22 | PointedEar | 827 | 76 ); |
| 828 | print "=" x 76, "\n"; |
||
| 829 | my $i = 0; |
||
| 830 | foreach my $thread ( |
||
| 831 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
||
| 832 | keys %threads |
||
| 833 | ) |
||
| 7 | PointedEar | 834 | { |
| 22 | PointedEar | 835 | my $name = substr( $thread, 0, 65 ); |
| 836 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
||
| 837 | $threads{$thread}{'count'}; |
||
| 838 | last if ( ++$i == $count ); |
||
| 7 | PointedEar | 839 | } |
| 22 | PointedEar | 840 | print "\n", "=" x 76, "\n"; |
| 841 | } |
||
| 842 | |||
| 843 | ############################## |
||
| 844 | ## Show threads by size in KiB |
||
| 845 | ############################## |
||
| 846 | unless ( $skipSec{6} ) |
||
| 847 | { |
||
| 848 | if ( keys %threads < $topthreads ) |
||
| 849 | { |
||
| 850 | $count = keys %threads; |
||
| 851 | } |
||
| 7 | PointedEar | 852 | else |
| 853 | { |
||
| 22 | PointedEar | 854 | $count = $topthreads; |
| 855 | } |
||
| 856 | printf "%s\n", |
||
| 23 | PointedEar | 857 | centred( |
| 858 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
||
| 22 | PointedEar | 859 | print "=" x 76, "\n"; |
| 860 | my $i = 0; |
||
| 861 | foreach my $thread ( |
||
| 862 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
||
| 863 | keys %threads |
||
| 864 | ) |
||
| 865 | { |
||
| 866 | my $name = substr( $thread, 0, 65 ); |
||
| 867 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
||
| 868 | $threads{$thread}{'size'} / 1024; #/ |
||
| 869 | last if ( ++$i == $count ); |
||
| 870 | } |
||
| 871 | print "\n", "=" x 76, "\n"; |
||
| 872 | } |
||
| 5 | PointedEar | 873 | |
| 22 | PointedEar | 874 | ################################## |
| 875 | ## Show top 10 cross-posted groups |
||
| 876 | ################################## |
||
| 877 | unless ( $skipSec{7} ) |
||
| 878 | { |
||
| 879 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
||
| 880 | if ( keys %crossposts < $topcrossposts ) |
||
| 881 | { |
||
| 882 | $count = keys %crossposts; |
||
| 7 | PointedEar | 883 | } |
| 22 | PointedEar | 884 | else |
| 885 | { |
||
| 886 | $count = $topcrossposts; |
||
| 887 | } |
||
| 888 | printf "%s\n", |
||
| 23 | PointedEar | 889 | centred( |
| 890 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
||
| 22 | PointedEar | 891 | print "=" x 76, "\n"; |
| 892 | my $i = 0; |
||
| 893 | foreach |
||
| 894 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
||
| 895 | { |
||
| 896 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
||
| 897 | $crossposts{$name}; |
||
| 898 | last if ( ++$i == $count ); |
||
| 899 | } |
||
| 900 | print "\n", "=" x 76, "\n"; |
||
| 901 | } |
||
| 5 | PointedEar | 902 | |
| 22 | PointedEar | 903 | ######################### |
| 904 | ## Show agents and counts |
||
| 905 | ######################### |
||
| 906 | unless ( $skipSec{8} ) |
||
| 907 | { |
||
| 908 | if ( keys %agents < $topagents ) |
||
| 909 | { |
||
| 910 | $count = keys %agents; |
||
| 911 | } |
||
| 912 | else |
||
| 913 | { |
||
| 914 | $count = $topagents; |
||
| 915 | } |
||
| 916 | printf "%s\n", |
||
| 917 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
||
| 918 | 76 ); |
||
| 919 | print "=" x 76, "\n"; |
||
| 920 | my $i = 0; |
||
| 921 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
||
| 922 | { |
||
| 923 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
||
| 924 | $agents{$agent}; |
||
| 925 | last if ( ++$i == $count ); |
||
| 926 | } |
||
| 927 | print "\n", "=" x 76, "\n"; |
||
| 928 | } |
||
| 5 | PointedEar | 929 | |
| 22 | PointedEar | 930 | ####################### |
| 931 | ## Show distinct agents |
||
| 932 | ####################### |
||
| 933 | unless ( $skipSec{9} ) |
||
| 7 | PointedEar | 934 | { |
| 22 | PointedEar | 935 | if ( keys %distinct_agent < $topagents ) |
| 936 | { |
||
| 937 | $count = keys %distinct_agent; |
||
| 7 | PointedEar | 938 | } |
| 22 | PointedEar | 939 | else |
| 940 | { |
||
| 941 | $count = $topagents; |
||
| 942 | } |
||
| 943 | printf "%s\n", |
||
| 944 | centred( |
||
| 945 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
||
| 23 | PointedEar | 946 | 76 |
| 947 | ); |
||
| 22 | PointedEar | 948 | print "=" x 76, "\n"; |
| 949 | my $i = 0; |
||
| 950 | foreach my $agent ( |
||
| 951 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
||
| 952 | keys %distinct_agent |
||
| 953 | ) |
||
| 954 | { |
||
| 955 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
||
| 956 | $distinct_agent{$agent}, |
||
| 957 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
||
| 958 | last if ( ++$i == $count ); |
||
| 959 | } |
||
| 960 | print "\n", "=" x 76, "\n"; |
||
| 7 | PointedEar | 961 | } |
| 5 | PointedEar | 962 | |
| 22 | PointedEar | 963 | ############################ |
| 964 | ## Show timezones and counts |
||
| 965 | ############################ |
||
| 966 | unless ( $skipSec{10} ) |
||
| 7 | PointedEar | 967 | { |
| 22 | PointedEar | 968 | if ( keys %tz < $toptz ) |
| 7 | PointedEar | 969 | { |
| 22 | PointedEar | 970 | $count = keys %tz; |
| 7 | PointedEar | 971 | } |
| 22 | PointedEar | 972 | else |
| 7 | PointedEar | 973 | { |
| 22 | PointedEar | 974 | $count = $toptz; |
| 7 | PointedEar | 975 | } |
| 23 | PointedEar | 976 | printf "%s\n", |
| 977 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
||
| 22 | PointedEar | 978 | print "=" x 76, "\n"; |
| 979 | my $i = 0; |
||
| 980 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
||
| 981 | { |
||
| 982 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
||
| 983 | last if ( ++$i == $count ); |
||
| 984 | } |
||
| 985 | print "\n", "=" x 76, "\n"; |
||
| 7 | PointedEar | 986 | } |
| 5 | PointedEar | 987 | } |
| 988 | |||
| 22 | PointedEar | 989 | ## helper subs |
| 990 | |||
| 13 | PointedEar | 991 | ############################### |
| 992 | ## Right pad a string with '.'s |
||
| 993 | ############################### |
||
| 7 | PointedEar | 994 | sub rpad |
| 995 | { |
||
| 13 | PointedEar | 996 | ## Get text to pad, length to pad, pad chr |
| 7 | PointedEar | 997 | my ( $text, $pad_len, $pad_chr ) = @_; |
| 11 | PointedEar | 998 | |
| 13 | PointedEar | 999 | ## DEBUG |
| 22 | PointedEar | 1000 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
| 11 | PointedEar | 1001 | |
| 7 | PointedEar | 1002 | if ( length($text) > $pad_len ) |
| 1003 | { |
||
| 1004 | $text = substr( $text, 0, $pad_len ); |
||
| 1005 | } |
||
| 1006 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
||
| 1007 | return $padded; |
||
| 5 | PointedEar | 1008 | } |
| 1009 | |||
| 13 | PointedEar | 1010 | ################## |
| 1011 | ## Centre a string |
||
| 1012 | ################## |
||
| 7 | PointedEar | 1013 | sub centred |
| 1014 | { |
||
| 1015 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
||
| 1016 | my $pad_len = ( $width - length($text) ) / 2; #/ |
||
| 1017 | my $centred = " " x $pad_len . $text; |
||
| 1018 | return $centred; |
||
| 5 | PointedEar | 1019 | } |
| 1020 | |||
| 13 | PointedEar | 1021 | ########################### |
| 1022 | ## Put commas into a number |
||
| 1023 | ########################### |
||
| 7 | PointedEar | 1024 | sub commify |
| 1025 | { |
||
| 13 | PointedEar | 1026 | local $_ = shift; |
| 23 | PointedEar | 1027 | my $number = $_; |
| 1028 | $_ = int; # Chop non-integer part |
||
| 22 | PointedEar | 1029 | 1 while |
| 23 | PointedEar | 1030 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
| 1031 | my $int_part = $_; |
||
| 1032 | my $real_part = ''; |
||
| 1033 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
||
| 1034 | { |
||
| 1035 | $real_part = $1; |
||
| 1036 | } |
||
| 1037 | return $int_part . $real_part; |
||
| 5 | PointedEar | 1038 | } |
| 1039 | |||
| 13 | PointedEar | 1040 | ################################################################ |
| 1041 | ## Returns a string with leading and trailing whitespace removed |
||
| 1042 | ################################################################ |
||
| 7 | PointedEar | 1043 | sub clean |
| 1044 | { |
||
| 1045 | my $dirty = shift; |
||
| 1046 | my $clean = $dirty; |
||
| 14 | PointedEar | 1047 | $clean =~ s/^\s+|\s+$//g; |
| 5 | PointedEar | 1048 | |
| 7 | PointedEar | 1049 | return $clean; |
| 5 | PointedEar | 1050 | } |
| 1051 | |||
| 7 | PointedEar | 1052 | sub usage |
| 1053 | { |
||
| 23 | PointedEar | 1054 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
| 7 | PointedEar | 1055 | exit 1; |
| 5 | PointedEar | 1056 | } |
| 1057 | |||
| 22 | PointedEar | 1058 | sub dmsg |
| 7 | PointedEar | 1059 | { |
| 22 | PointedEar | 1060 | print STDERR @_, "\n"; |
| 1061 | } |
||
| 1062 | |||
| 1063 | sub dmsg2 |
||
| 1064 | { |
||
| 1065 | my ( $level, @msg ) = @_; |
||
| 1066 | print STDERR @msg, "\n" if $level >= DEBUG; |
||
| 1067 | } |
||
| 23 | PointedEar | 1068 | |
| 1069 | sub println |
||
| 1070 | { |
||
| 1071 | print @_, "\n"; |
||
| 1072 | } |