Rev 48 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8 | PointedEar | 1 | #!/usr/bin/env perl |
5 | PointedEar | 2 | use strict; |
6 | PointedEar | 3 | use warnings; |
23 | PointedEar | 4 | require 5.004; |
22 | PointedEar | 5 | |
6 | #use diagnostics; |
||
6 | PointedEar | 7 | use utf8; |
8 | |||
30 | PointedEar | 9 | ## NOTE: |
10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped |
||
11 | # use utf8:all; |
||
12 | |||
22 | PointedEar | 13 | use constant DEBUG => 0; |
14 | |||
24 | PointedEar | 15 | ## newsstat.pl |
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de> |
||
17 | ## Based on work by Garry Knight et al. |
||
31 | PointedEar | 18 | ## |
24 | PointedEar | 19 | ## This program is free software: you can redistribute it and/or modify |
20 | ## it under the terms of the GNU General Public License as published by |
||
21 | ## the Free Software Foundation, either version 3 of the License, or |
||
22 | ## (at your option) any later version. |
||
31 | PointedEar | 23 | ## |
24 | PointedEar | 24 | ## This program is distributed in the hope that it will be useful, |
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
27 | ## GNU General Public License for more details. |
||
31 | PointedEar | 28 | ## |
24 | PointedEar | 29 | ## You should have received a copy of the GNU General Public License |
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>. |
||
31 | |||
13 | PointedEar | 32 | ## Print out all text to STDOUT UTF-8 encoded |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
||
22 | PointedEar | 34 | binmode STDERR, ':encoding(UTF-8)'; |
5 | PointedEar | 35 | |
23 | PointedEar | 36 | ## L10n |
37 | use locale ':not_characters'; |
||
38 | |||
39 | # setlocale( LC_MESSAGES, '' ); |
||
40 | require Number::Format; |
||
41 | |||
42 | ## i18n |
||
43 | ## FIXME: Automatically include resolved '.' in @INC |
||
22 | PointedEar | 44 | # print join "\n", @INC; |
5 | PointedEar | 45 | |
22 | PointedEar | 46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
23 | PointedEar | 47 | use POSIX ('locale_h'); |
22 | PointedEar | 48 | use Locale::Messages qw (bind_textdomain_filter |
23 | PointedEar | 49 | bind_textdomain_codeset |
50 | turn_utf_8_on); |
||
51 | |||
52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
||
22 | PointedEar | 53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
23 | PointedEar | 54 | |
22 | PointedEar | 55 | require Mail::Message; |
56 | require DateTime; |
||
57 | require DateTime::Format::Mail; |
||
23 | PointedEar | 58 | |
59 | # See comments in previous example |
||
60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
||
61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
||
62 | 'decimal_point' }; |
||
63 | |||
64 | # Apply defaults if values are missing |
||
65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
||
66 | $thousands_sep = ' ' unless $thousands_sep; |
||
67 | |||
68 | # grouping and mon_grouping are packed lists |
||
69 | # of small integers (characters) telling the |
||
70 | # grouping (thousand_seps and mon_thousand_seps |
||
71 | # being the group dividers) of numbers and |
||
72 | # monetary quantities. The integers' meanings: |
||
73 | # 255 means no more grouping, 0 means repeat |
||
74 | # the previous grouping, 1-254 means use that |
||
75 | # as the current grouping. Grouping goes from |
||
76 | # right to left (low to high digits). In the |
||
77 | # below we cheat slightly by never using anything |
||
78 | # else than the first grouping (whatever that is). |
||
79 | my @grouping; |
||
80 | if ($grouping) |
||
81 | { |
||
82 | @grouping = unpack( "C*", $grouping ); |
||
83 | } |
||
84 | else |
||
85 | { |
||
86 | @grouping = (3); |
||
87 | } |
||
88 | |||
89 | ## FIXME: Why don't the defaults work already? |
||
90 | my $formatter = new Number::Format( |
||
91 | -decimal_point => $decimal_point, |
||
92 | -thousands_sep => $thousands_sep, |
||
49 | PointedEar | 93 | -kibi_suffix => ' KiB', |
94 | -mebi_suffix => ' MiB', |
||
95 | -gibi_suffix => ' GiB', |
||
96 | |||
23 | PointedEar | 97 | # -grouping => $grouping[0] |
98 | ); |
||
99 | |||
5 | PointedEar | 100 | ###################### USER CONFIGURATIONS ############################ |
101 | |||
13 | PointedEar | 102 | ## The name of the group to do stats for |
5 | PointedEar | 103 | my $newsgroup_name = $ARGV[0]; |
31 | PointedEar | 104 | $newsgroup_name // usage(); |
5 | PointedEar | 105 | |
13 | PointedEar | 106 | ## Check for removal flags |
5 | PointedEar | 107 | my $ix; |
108 | my $j; |
||
109 | my %skipSec; |
||
110 | my @skiplist; |
||
111 | my $args = @ARGV; |
||
7 | PointedEar | 112 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
113 | { |
||
114 | $j = $ix + 1; |
||
115 | if ( $ARGV[$ix] eq "-x" ) |
||
116 | { |
||
117 | @skiplist = split( ",", $ARGV[$j] ); |
||
118 | } |
||
119 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
||
120 | { |
||
121 | @skiplist = split( ",", $1 ); |
||
122 | } |
||
5 | PointedEar | 123 | } |
7 | PointedEar | 124 | foreach (@skiplist) |
125 | { |
||
5 | PointedEar | 126 | $skipSec{$_} = 1; |
127 | } |
||
128 | |||
13 | PointedEar | 129 | ## Leafnode users will want /var/spool/news for this variable. |
5 | PointedEar | 130 | my $news = "/var/spool/news/"; |
131 | |||
22 | PointedEar | 132 | ## Number of top or bottom posters to show |
133 | my $topposters = 20; |
||
5 | PointedEar | 134 | |
13 | PointedEar | 135 | ## Number of threads we want to know about |
5 | PointedEar | 136 | my $topthreads = 20; |
137 | |||
13 | PointedEar | 138 | ## Number of cross-posted threads to show |
5 | PointedEar | 139 | my $topcrossposts = 10; |
140 | |||
22 | PointedEar | 141 | ## Number of agents we list |
142 | my $topagents = 10; |
||
143 | |||
13 | PointedEar | 144 | ## Number of time zones to show |
5 | PointedEar | 145 | my $toptz = 10; |
146 | |||
147 | ###################### DATA STRUCTURES ###################### |
||
148 | my $group = $newsgroup_name; |
||
149 | $group =~ s!\.!/!g; |
||
22 | PointedEar | 150 | my %data; # name, count, agent, total, orig, quoted |
151 | my $totsize = 0; # holds total sizes of all files |
||
152 | my %crossposts; # group, count |
||
153 | my %threads; # subject, count |
||
154 | my $replies = 0; # total no. of replies |
||
155 | my $origposts = 0; # total no. of original posts |
||
156 | my %tz; # timezones by count |
||
157 | my $earliest; # earliest article we have found |
||
158 | my $latest; # latest article we have found |
||
159 | my $totheader = 0; # total size of header material |
||
160 | my $totbody = 0; # total size of body material |
||
161 | my $totsig = 0; # total size of sig material |
||
162 | my $totquoted = 0; # total size of quoted material |
||
163 | my $totorig = 0; # total size of original material |
||
164 | my $totalposts; # total no. of posts considered |
||
5 | PointedEar | 165 | my %distinct_agent; |
13 | PointedEar | 166 | |
167 | ## Used to hold counts of User Agents used |
||
168 | my %agents = ( |
||
169 | "Compuserver" => 0, |
||
170 | "Foorum" => 0, |
||
7 | PointedEar | 171 | "Forte Agent" => 0, |
172 | "Forte Free Agent" => 0, |
||
13 | PointedEar | 173 | "Gnus" => 0, |
174 | "KNode" => 0, |
||
175 | "MacSOUP" => 0, |
||
176 | "MT-NewsWatcher" => 0, |
||
7 | PointedEar | 177 | "MicroPlanet Gravity" => 0, |
178 | "Microsoft Outlook Express" => 0, |
||
13 | PointedEar | 179 | "Microsoft Windows Mail" => 0, |
180 | "Mozilla" => 0, |
||
181 | "News Rover" => 0, |
||
182 | "NN" => 0, |
||
183 | "Pan" => 0, |
||
184 | "rn" => 0, |
||
7 | PointedEar | 185 | "slrn" => 0, |
13 | PointedEar | 186 | "Sylpheed" => 0, |
7 | PointedEar | 187 | "tin" => 0, |
13 | PointedEar | 188 | "VSoup" => 0, |
7 | PointedEar | 189 | "WebTV" => 0, |
22 | PointedEar | 190 | "Xnews" => 0, |
13 | PointedEar | 191 | ); |
5 | PointedEar | 192 | |
22 | PointedEar | 193 | my $datetime_parser = DateTime::Format::Mail->new(); |
194 | $datetime_parser->loose(); |
||
5 | PointedEar | 195 | |
22 | PointedEar | 196 | my $today = DateTime->today( time_zone => 'UTC' ); |
197 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
||
198 | my $start = int $prev_month->strftime('%s'); |
||
199 | my $numdays = int DateTime->last_day_of_month( |
||
200 | year => $prev_month->year(), |
||
201 | month => $prev_month->month(), |
||
202 | time_zone => $prev_month->time_zone(), |
||
203 | )->day(); |
||
204 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
||
205 | |||
206 | dmsg( $start, " to ", $end ) if DEBUG; |
||
207 | |||
208 | chdir("$news$group") |
||
209 | or die __x( |
||
210 | "Can't cd to {newsgroup}: {error}\n", |
||
211 | newsgroup => "$news$group", |
||
212 | error => $! |
||
213 | ); |
||
214 | opendir( DIR, "." ) |
||
215 | or die __x( |
||
216 | "Can't open {newsgroup}: {error}\n", |
||
217 | newsgroup => "$news$group", |
||
218 | error => $! |
||
219 | ); |
||
220 | |||
221 | while ( defined( my $filename = readdir(DIR) ) ) |
||
7 | PointedEar | 222 | { |
22 | PointedEar | 223 | next unless -f $filename; # only want real files |
224 | next if ( $filename eq ".overview" ); # real articles only |
||
225 | |||
226 | get_article($filename); # read in the article |
||
5 | PointedEar | 227 | } |
22 | PointedEar | 228 | closedir(DIR); # finished with the directory |
7 | PointedEar | 229 | |
22 | PointedEar | 230 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
231 | |||
13 | PointedEar | 232 | ## Post-processing |
22 | PointedEar | 233 | count_agents(); # count agents, collapsing versions |
234 | fix_percent(); |
||
5 | PointedEar | 235 | |
22 | PointedEar | 236 | write_data(); |
237 | display_results(); |
||
5 | PointedEar | 238 | |
13 | PointedEar | 239 | ######################################## |
22 | PointedEar | 240 | ## Get current article's header and body |
13 | PointedEar | 241 | ######################################## |
22 | PointedEar | 242 | sub get_article |
7 | PointedEar | 243 | { |
22 | PointedEar | 244 | my $filename = shift; |
5 | PointedEar | 245 | |
22 | PointedEar | 246 | open( my $FILE, '<', $filename ) |
247 | or |
||
248 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
||
249 | my $msg = Mail::Message->read($FILE); |
||
250 | my $timestamp = $msg->timestamp(); |
||
251 | my $date = $msg->study('Date'); |
||
5 | PointedEar | 252 | |
22 | PointedEar | 253 | ## Disregard article if timestamp is not in range |
254 | dmsg($timestamp) if DEBUG; |
||
31 | PointedEar | 255 | if ( $timestamp < $start || $timestamp >= $end ) |
7 | PointedEar | 256 | { |
22 | PointedEar | 257 | dmsg("Posting on $date ignored.") if DEBUG; |
258 | return; |
||
7 | PointedEar | 259 | } |
5 | PointedEar | 260 | |
22 | PointedEar | 261 | $totalposts++; # bump count of articles considered |
5 | PointedEar | 262 | |
22 | PointedEar | 263 | ## DEBUG |
264 | dmsg($date) if DEBUG; |
||
13 | PointedEar | 265 | |
22 | PointedEar | 266 | ## get stats about the file itself |
267 | my $filesize = -s $filename; # get total size of file |
||
268 | $totsize += $filesize; # bump total sizes of all files |
||
5 | PointedEar | 269 | |
31 | PointedEar | 270 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
7 | PointedEar | 271 | { |
22 | PointedEar | 272 | $earliest = $timestamp; |
7 | PointedEar | 273 | } |
31 | PointedEar | 274 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
7 | PointedEar | 275 | { |
22 | PointedEar | 276 | $latest = $timestamp; |
7 | PointedEar | 277 | } |
13 | PointedEar | 278 | |
22 | PointedEar | 279 | #print "timestamp: $timestamp\n"; |
5 | PointedEar | 280 | |
22 | PointedEar | 281 | ## count header size |
282 | $totheader += $msg->head()->size(); |
||
5 | PointedEar | 283 | |
22 | PointedEar | 284 | ## get the poster's name (MIME-decoded, in UTF-8) |
285 | my $poster = $msg->study('From'); |
||
286 | if ( defined $poster ) |
||
7 | PointedEar | 287 | { |
22 | PointedEar | 288 | ## Convert old to new format |
289 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
||
5 | PointedEar | 290 | |
22 | PointedEar | 291 | ## Collapse whitespace |
292 | $poster =~ s/\s+/ /g; |
||
5 | PointedEar | 293 | |
22 | PointedEar | 294 | ## Remove outer quotes; TODO: observe RFC 5322 strictly |
295 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
||
5 | PointedEar | 296 | |
22 | PointedEar | 297 | ## DEBUG |
298 | dmsg($poster) if DEBUG; |
||
7 | PointedEar | 299 | |
22 | PointedEar | 300 | ## seen this one before? |
301 | if ( !defined( $data{$poster} ) ) |
||
302 | { |
||
303 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
||
304 | $data{$poster}{'orig'} = 0; |
||
305 | $data{$poster}{'quoted'} = 0; |
||
306 | } |
||
307 | $data{$poster}{'count'}++; # bump count for this poster |
||
308 | $data{$poster}{'size'} += $filesize; # total size of file |
||
5 | PointedEar | 309 | |
22 | PointedEar | 310 | ## The User-Agent and/or X-Newsreader fields |
311 | ## for User-Agent by poster |
||
31 | PointedEar | 312 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
22 | PointedEar | 313 | if ( defined $ua ) |
7 | PointedEar | 314 | { |
22 | PointedEar | 315 | $data{$poster}{'agent'} = $ua; |
316 | |||
317 | ## DEBUG |
||
318 | dmsg($ua) if DEBUG; |
||
7 | PointedEar | 319 | } |
5 | PointedEar | 320 | |
48 | PointedEar | 321 | ## The User Agent for User-Agent by number of articles |
22 | PointedEar | 322 | get_agent($msg); |
5 | PointedEar | 323 | |
22 | PointedEar | 324 | ## Get all cross-posted newsgroups |
325 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
||
326 | { |
||
327 | $crossposts{$_}++; # bump count for each |
||
328 | } |
||
5 | PointedEar | 329 | |
22 | PointedEar | 330 | ## Get threads |
331 | my $thread = $msg->study('Subject'); |
||
48 | PointedEar | 332 | $thread =~ s/^\s*re:\s*//i; # Remove Re: or re: at the start |
33 | PointedEar | 333 | $thread =~ s/\s*\(was:\s*.*\)\s*$//i; # Remove (was: ...) at the end |
48 | PointedEar | 334 | $thread =~ s/\s+/ /g; # collapse whitespace |
335 | $threads{$thread}{'count'}++; # bump count of this subject |
||
336 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
||
5 | PointedEar | 337 | |
22 | PointedEar | 338 | ## Is this an original post or a reply? |
339 | if ( defined $msg->study('References') ) |
||
7 | PointedEar | 340 | { |
22 | PointedEar | 341 | $replies++; |
7 | PointedEar | 342 | } |
22 | PointedEar | 343 | else |
7 | PointedEar | 344 | { |
22 | PointedEar | 345 | $origposts++; |
7 | PointedEar | 346 | } |
22 | PointedEar | 347 | |
348 | ## Get the time zone |
||
349 | my $datetime = $datetime_parser->parse_datetime($date); |
||
350 | my $tz = $datetime->strftime('%z'); |
||
351 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
||
352 | $tz{$tz}++; |
||
353 | |||
354 | ## DEBUG |
||
355 | dmsg($tz) if DEBUG; |
||
356 | |||
357 | #### Now analyse the body text #### |
||
358 | my $body = $msg->body(); |
||
359 | |||
360 | my $insig = 0; |
||
361 | my @body = $body->lines; |
||
362 | for (@body) |
||
7 | PointedEar | 363 | { |
22 | PointedEar | 364 | $totbody += length($_); # bump total body size |
365 | next if (m{^$>}o); # don't count blank lines in body |
||
366 | if ( $insig == 1 ) |
||
367 | { |
||
368 | |||
369 | # bump total sig size |
||
370 | $totsig += length($_); |
||
371 | } |
||
372 | ## are we in a quote line? |
||
373 | ## Bill Unruh uses ] quotes, and another poster uses :: |
||
31 | PointedEar | 374 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
22 | PointedEar | 375 | { |
376 | ## bump count of quoted chrs |
||
377 | $data{$poster}{'quoted'} += length($_); |
||
378 | $totquoted += length($_); |
||
379 | } |
||
380 | elsif (/^-- $/) |
||
381 | { |
||
382 | $insig = 1; |
||
383 | } |
||
384 | else |
||
385 | { |
||
386 | ## We must be processing an original line |
||
387 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
||
388 | $totorig += length($_); |
||
389 | } |
||
7 | PointedEar | 390 | } |
22 | PointedEar | 391 | |
392 | # end for (@body) |
||
393 | } |
||
394 | |||
395 | close($FILE); |
||
396 | } |
||
397 | |||
398 | sub get_agent |
||
399 | { |
||
400 | my $msg = shift; |
||
401 | |||
31 | PointedEar | 402 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
403 | // $msg->study('X-Mailer'); |
||
404 | |||
22 | PointedEar | 405 | if ( not defined $ua ) |
406 | { |
||
407 | my $org = $msg->study('Organization'); |
||
408 | if ( defined $org |
||
409 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
||
7 | PointedEar | 410 | { |
22 | PointedEar | 411 | $ua = $org; |
7 | PointedEar | 412 | } |
22 | PointedEar | 413 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
7 | PointedEar | 414 | { |
22 | PointedEar | 415 | $ua = "Pine"; |
416 | } |
||
7 | PointedEar | 417 | } |
5 | PointedEar | 418 | |
22 | PointedEar | 419 | ## Hopefully found UA, else set to unknown |
420 | if ( not defined $ua ) |
||
421 | { |
||
422 | $ua = __ "unknown"; |
||
423 | } |
||
5 | PointedEar | 424 | |
22 | PointedEar | 425 | $ua = clean($ua); |
426 | |||
427 | my $raw = $ua; |
||
428 | my $agent = $raw; |
||
429 | |||
430 | ## strip http |
||
431 | if ( $raw =~ /.*http.*/ ) |
||
7 | PointedEar | 432 | { |
22 | PointedEar | 433 | $raw =~ s!posted via!!i; |
434 | $raw =~ s!http://!!g; |
||
435 | $raw =~ s!/!!g; |
||
436 | $raw =~ s! !!g; |
||
437 | } |
||
5 | PointedEar | 438 | |
22 | PointedEar | 439 | ## Fix Outlook from Mac |
440 | if ( $raw =~ /^microsoft/i ) |
||
441 | { |
||
442 | $raw =~ s/-/ /g; |
||
443 | } |
||
5 | PointedEar | 444 | |
22 | PointedEar | 445 | ## Pick out the popular agents |
446 | if ( $raw =~ /(outlook express)/i |
||
447 | || $raw =~ /(windows mail)/i |
||
448 | || $raw =~ /(microplanet gravity)/i |
||
449 | || $raw =~ /(news rover)/i |
||
450 | || $raw =~ /(forte agent)/i |
||
451 | || $raw =~ /(forte free agent)/i ) |
||
452 | { |
||
453 | $agent = $1; |
||
454 | } |
||
455 | elsif ( |
||
456 | $raw =~ /^( |
||
5 | PointedEar | 457 | pan |
458 | |sylpheed |
||
459 | |slrn |
||
460 | |mozilla |
||
461 | |knode |
||
462 | |tin |
||
463 | |hamster |
||
464 | |xrn |
||
465 | |xnews |
||
466 | |aol |
||
467 | |gnus |
||
468 | |krn |
||
469 | |macsoup |
||
470 | |messenger |
||
471 | |openxp |
||
472 | |pine |
||
473 | |thoth |
||
474 | |turnpike |
||
475 | |winvn |
||
476 | |vsoup |
||
477 | |google |
||
478 | |supernews |
||
479 | |nn |
||
480 | |rn |
||
481 | |007 |
||
482 | |webtv |
||
483 | |compuserve |
||
7 | PointedEar | 484 | )/ix |
22 | PointedEar | 485 | ) |
486 | { |
||
487 | $agent = $1; |
||
488 | } |
||
489 | else |
||
490 | { |
||
491 | ## Clean up unknown agents |
||
492 | if ( $raw =~ m!^(.*?)/! ) |
||
7 | PointedEar | 493 | { |
494 | $agent = $1; |
||
495 | } |
||
22 | PointedEar | 496 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
7 | PointedEar | 497 | { |
22 | PointedEar | 498 | $agent = $1; |
499 | } |
||
500 | } |
||
501 | |||
502 | $distinct_agent{$agent}++; |
||
503 | return $agent; |
||
504 | } |
||
505 | ## get_agent |
||
506 | |||
507 | ######################################### |
||
508 | ## Count the User-Agents used, collapsing |
||
509 | ## different versions into one per agent. |
||
510 | ######################################### |
||
511 | sub count_agents |
||
512 | { |
||
513 | POSTER: |
||
514 | foreach my $poster ( keys %data ) |
||
515 | { |
||
516 | foreach my $agent_name ( keys %distinct_agent ) |
||
517 | { # check against known ones |
||
518 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
||
7 | PointedEar | 519 | { |
22 | PointedEar | 520 | $agents{$agent_name}++; |
521 | next POSTER; |
||
7 | PointedEar | 522 | } |
523 | } |
||
22 | PointedEar | 524 | $agents{ $data{$poster}{'agent'} }++; |
525 | } |
||
526 | } # count_agents |
||
7 | PointedEar | 527 | |
22 | PointedEar | 528 | ############################################# |
529 | ## Set orig/total percentages for all posters |
||
530 | ############################################# |
||
531 | sub fix_percent |
||
532 | { |
||
533 | foreach my $poster ( keys %data ) |
||
534 | { |
||
535 | my $percent = 100; |
||
536 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
||
537 | { |
||
538 | $percent = |
||
539 | $data{$poster}{'orig'} * 100 / |
||
540 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
||
541 | } |
||
542 | elsif ( $data{$poster}{'orig'} == 0 ) |
||
543 | { |
||
544 | $percent = 0; |
||
545 | } |
||
546 | $data{$poster}{'percent'} = $percent; |
||
5 | PointedEar | 547 | } |
22 | PointedEar | 548 | } |
549 | ## fix_percent |
||
5 | PointedEar | 550 | |
22 | PointedEar | 551 | ################################## |
552 | ## Write data structures to a file |
||
553 | ################################## |
||
554 | sub write_data |
||
555 | { |
||
556 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
||
557 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
||
558 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
||
559 | print $OUTF |
||
560 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
||
561 | foreach my $name ( keys %data ) |
||
7 | PointedEar | 562 | { |
22 | PointedEar | 563 | print $OUTF |
564 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
||
7 | PointedEar | 565 | } |
22 | PointedEar | 566 | print $OUTF |
567 | "============================================================================\n"; |
||
568 | print $OUTF "Thread subjects\n"; |
||
569 | print $OUTF |
||
570 | "----------------------------------------------------------------------------\n"; |
||
571 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
||
572 | { |
||
573 | print $OUTF |
||
574 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
||
575 | } |
||
576 | print $OUTF |
||
577 | "============================================================================\n"; |
||
578 | print $OUTF "Cross-posts\n"; |
||
579 | print $OUTF |
||
580 | "----------------------------------------------------------------------------\n"; |
||
581 | foreach my $name ( sort keys %crossposts ) |
||
582 | { |
||
583 | print $OUTF "$name : $crossposts{$name}\n"; |
||
584 | } |
||
585 | print $OUTF |
||
586 | "============================================================================\n"; |
||
587 | print $OUTF "User agents\n"; |
||
588 | print $OUTF |
||
589 | "----------------------------------------------------------------------------\n"; |
||
590 | foreach my $name ( sort keys %agents ) |
||
591 | { |
||
592 | print $OUTF "$name : $agents{$name}\n"; |
||
593 | } |
||
594 | print $OUTF |
||
595 | "============================================================================\n"; |
||
596 | print $OUTF "Time zones\n"; |
||
597 | print $OUTF |
||
598 | "----------------------------------------------------------------------------\n"; |
||
599 | foreach my $name ( sort keys %tz ) |
||
600 | { |
||
601 | print $OUTF "$name : $tz{$name}\n"; |
||
602 | } |
||
603 | close $OUTF; |
||
604 | } # write_data |
||
5 | PointedEar | 605 | |
22 | PointedEar | 606 | sub display_results |
607 | { |
||
608 | #################### DISPLAY RESULTS ##################### |
||
45 | PointedEar | 609 | print "=" x 76, "\n"; |
22 | PointedEar | 610 | printf "%s\n", |
611 | centred( |
||
48 | PointedEar | 612 | __x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ), |
613 | 76 |
||
614 | ); |
||
45 | PointedEar | 615 | print "=" x 76, "\n"; |
22 | PointedEar | 616 | printf "%s\n", |
617 | centred( |
||
23 | PointedEar | 618 | __( |
619 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)" |
||
620 | ), |
||
22 | PointedEar | 621 | 76 |
622 | ); |
||
49 | PointedEar | 623 | print "\n"; |
624 | printf __"Total articles considered: %s over %d days\n", |
||
23 | PointedEar | 625 | $formatter->format_number($totalposts), |
626 | $formatter->format_number($numdays); |
||
627 | my $time_locale = setlocale(LC_TIME); |
||
22 | PointedEar | 628 | my $earliest_datetime = DateTime->from_epoch( |
23 | PointedEar | 629 | epoch => $earliest, |
630 | locale => $time_locale, |
||
22 | PointedEar | 631 | time_zone => 'UTC', |
632 | ); |
||
633 | my $latest_datetime = DateTime->from_epoch( |
||
23 | PointedEar | 634 | epoch => $latest, |
635 | locale => $time_locale, |
||
22 | PointedEar | 636 | time_zone => 'UTC', |
637 | ); |
||
638 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
||
49 | PointedEar | 639 | printf __"Earliest article: %s" . "\n", |
23 | PointedEar | 640 | $earliest_datetime->strftime($datetime_format); |
49 | PointedEar | 641 | printf __"Latest article: %s" . "\n", |
23 | PointedEar | 642 | $latest_datetime->strftime($datetime_format); |
49 | PointedEar | 643 | printf __"Original articles: %s; replies: %s" . "\n", |
23 | PointedEar | 644 | $formatter->format_number($origposts), |
645 | $formatter->format_number($replies); |
||
49 | PointedEar | 646 | printf __"Total size of articles: %s bytes (%s)" . "\n", |
23 | PointedEar | 647 | $formatter->format_number($totsize), |
648 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
49 | PointedEar | 649 | printf __"Average %s articles per day, %s per day, %s bytes per article.\n", |
23 | PointedEar | 650 | $formatter->format_number( int( $totalposts / $numdays ) ), |
651 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
||
652 | $formatter->format_number( int( $totsize / $totalposts ) ); |
||
653 | |||
22 | PointedEar | 654 | my $count = keys %data; |
49 | PointedEar | 655 | print "\n"; |
656 | printf __"Total headers: %s; bodies: %s\n", |
||
23 | PointedEar | 657 | $formatter->format_bytes( |
658 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
||
659 | ), |
||
660 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
22 | PointedEar | 661 | printf __ |
23 | PointedEar | 662 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
663 | $formatter->format_bytes( |
||
664 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
||
665 | ), |
||
666 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
||
49 | PointedEar | 667 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ) |
668 | , |
||
23 | PointedEar | 669 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
49 | PointedEar | 670 | print "\n"; |
671 | printf __"Total number of posters: %s, average %s per poster\n", |
||
23 | PointedEar | 672 | $formatter->format_number($count), |
673 | $formatter->format_bytes( $totsize / $count, |
||
674 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
22 | PointedEar | 675 | $count = keys %threads; |
49 | PointedEar | 676 | printf __"Total number of threads: %s, average %s per thread\n", |
23 | PointedEar | 677 | $formatter->format_number($count), |
678 | $formatter->format_bytes( $totsize / $count, |
||
679 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
||
680 | printf __"Total number of user agents: %d\n", |
||
681 | $formatter->format_number( scalar keys %agents ); |
||
31 | PointedEar | 682 | print "\n", "=" x 76, "\n"; |
22 | PointedEar | 683 | ######################################## |
684 | ## Show posters by article count Sec 1; |
||
685 | ######################################## |
||
686 | unless ( $skipSec{1} ) |
||
7 | PointedEar | 687 | { |
22 | PointedEar | 688 | if ( keys %data < $topposters ) |
689 | { |
||
690 | $count = keys %data; |
||
691 | } |
||
692 | else |
||
693 | { |
||
694 | $count = $topposters; |
||
695 | } |
||
696 | printf "%s\n", |
||
697 | centred( |
||
23 | PointedEar | 698 | __x( "Top {count} posters by number of articles", count => $topposters ), |
699 | 76 |
||
700 | ); |
||
22 | PointedEar | 701 | print "=" x 76, "\n"; |
702 | my $i = 0; |
||
703 | foreach |
||
704 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
||
705 | { |
||
706 | my $name = substr( $poster, 0, 65 ); |
||
25 | PointedEar | 707 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
22 | PointedEar | 708 | $data{$poster}{count}; |
709 | last if ( ++$i == $count ); |
||
710 | } |
||
711 | print "\n", "=" x 76, "\n"; |
||
7 | PointedEar | 712 | } |
22 | PointedEar | 713 | |
714 | ###################################### |
||
715 | ## Show posters by size in KiB Sec 2; |
||
716 | ###################################### |
||
717 | unless ( $skipSec{2} ) |
||
7 | PointedEar | 718 | { |
22 | PointedEar | 719 | if ( keys %data < $topposters ) |
720 | { |
||
721 | $count = keys %data; |
||
722 | } |
||
723 | else |
||
724 | { |
||
725 | $count = $topposters; |
||
726 | } |
||
727 | printf "%s\n", |
||
728 | centred( |
||
729 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
||
23 | PointedEar | 730 | 76 |
731 | ); |
||
22 | PointedEar | 732 | print "=" x 76, "\n"; |
733 | my $i = 0; |
||
734 | foreach |
||
735 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
||
736 | { |
||
737 | my $name = substr( $poster, 0, 62 ); |
||
25 | PointedEar | 738 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
22 | PointedEar | 739 | $data{$poster}{size} / 1024; #/ |
740 | last if ( ++$i == $count ); |
||
741 | } |
||
742 | print "\n", "=" x 76, "\n"; |
||
7 | PointedEar | 743 | } |
5 | PointedEar | 744 | |
22 | PointedEar | 745 | ##################################### |
746 | ## Show top posters for original text |
||
747 | ##################################### |
||
31 | PointedEar | 748 | my $topposters_real = 0; |
749 | |||
22 | PointedEar | 750 | unless ( $skipSec{3} ) |
7 | PointedEar | 751 | { |
22 | PointedEar | 752 | if ( keys %data < $topposters ) |
753 | { |
||
754 | $count = keys %data; |
||
755 | } |
||
756 | else |
||
757 | { |
||
758 | $count = $topposters; |
||
759 | } |
||
31 | PointedEar | 760 | |
22 | PointedEar | 761 | printf "%s\n", |
762 | centred( |
||
763 | __x( |
||
48 | PointedEar | 764 | "Top {count} responders by original text (> 5 articles)", |
22 | PointedEar | 765 | count => $topposters |
766 | ), |
||
767 | 76 |
||
768 | ); |
||
769 | print "=" x 76, "\n"; |
||
770 | foreach my $poster ( |
||
771 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
||
772 | keys %data |
||
773 | ) |
||
774 | { |
||
775 | next if $data{$poster}{quoted} == 0; |
||
776 | next if $data{$poster}{count} < 5; |
||
777 | my $name = substr( $poster, 0, 63 ); |
||
31 | PointedEar | 778 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
779 | rpad( $poster, 63, "." ), |
||
22 | PointedEar | 780 | $data{$poster}{percent}; |
31 | PointedEar | 781 | last if ( ++$topposters_real == $count ); |
22 | PointedEar | 782 | } |
783 | print "\n", "=" x 76, "\n"; |
||
7 | PointedEar | 784 | } |
5 | PointedEar | 785 | |
22 | PointedEar | 786 | ######################################## |
787 | ## Show bottom posters for original text |
||
788 | ######################################## |
||
31 | PointedEar | 789 | |
790 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
||
791 | |||
22 | PointedEar | 792 | unless ( $skipSec{4} ) |
7 | PointedEar | 793 | { |
22 | PointedEar | 794 | if ( keys %data < $topposters ) |
7 | PointedEar | 795 | { |
22 | PointedEar | 796 | $count = keys %data; |
797 | } |
||
798 | else |
||
799 | { |
||
800 | $count = $topposters; |
||
801 | } |
||
31 | PointedEar | 802 | |
22 | PointedEar | 803 | printf "%s\n", |
804 | centred( |
||
805 | __x( |
||
48 | PointedEar | 806 | "Bottom {count} responders by original text (> 5 articles)", |
22 | PointedEar | 807 | count => $topposters |
808 | ), |
||
809 | 76 |
||
810 | ); |
||
811 | print "=" x 76, "\n"; |
||
812 | my $i = 0; |
||
813 | foreach my $poster ( |
||
814 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
||
815 | keys %data |
||
816 | ) |
||
817 | { |
||
818 | next if $data{$poster}{quoted} == 0; |
||
819 | next if $data{$poster}{count} < 5; |
||
820 | my $name = substr( $poster, 0, 63 ); |
||
25 | PointedEar | 821 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
22 | PointedEar | 822 | $data{$poster}{percent}; |
823 | last if ( ++$i == $count ); |
||
824 | } |
||
825 | print "\n", "=" x 76, "\n"; |
||
826 | } |
||
5 | PointedEar | 827 | |
22 | PointedEar | 828 | ##################################### |
829 | ## Show threads by number of articles |
||
830 | ##################################### |
||
831 | unless ( $skipSec{5} ) |
||
832 | { |
||
833 | if ( keys %threads < $topthreads ) |
||
834 | { |
||
835 | $count = keys %threads; |
||
7 | PointedEar | 836 | } |
22 | PointedEar | 837 | else |
838 | { |
||
839 | $count = $topthreads; |
||
7 | PointedEar | 840 | } |
22 | PointedEar | 841 | printf "%s\n", |
23 | PointedEar | 842 | centred( |
843 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
||
22 | PointedEar | 844 | 76 ); |
845 | print "=" x 76, "\n"; |
||
846 | my $i = 0; |
||
847 | foreach my $thread ( |
||
848 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
||
849 | keys %threads |
||
850 | ) |
||
7 | PointedEar | 851 | { |
22 | PointedEar | 852 | my $name = substr( $thread, 0, 65 ); |
25 | PointedEar | 853 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
22 | PointedEar | 854 | $threads{$thread}{'count'}; |
855 | last if ( ++$i == $count ); |
||
7 | PointedEar | 856 | } |
22 | PointedEar | 857 | print "\n", "=" x 76, "\n"; |
858 | } |
||
859 | |||
860 | ############################## |
||
861 | ## Show threads by size in KiB |
||
862 | ############################## |
||
863 | unless ( $skipSec{6} ) |
||
864 | { |
||
865 | if ( keys %threads < $topthreads ) |
||
866 | { |
||
867 | $count = keys %threads; |
||
868 | } |
||
7 | PointedEar | 869 | else |
870 | { |
||
22 | PointedEar | 871 | $count = $topthreads; |
872 | } |
||
873 | printf "%s\n", |
||
23 | PointedEar | 874 | centred( |
875 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
||
22 | PointedEar | 876 | print "=" x 76, "\n"; |
877 | my $i = 0; |
||
878 | foreach my $thread ( |
||
879 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
||
880 | keys %threads |
||
881 | ) |
||
882 | { |
||
883 | my $name = substr( $thread, 0, 65 ); |
||
25 | PointedEar | 884 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
22 | PointedEar | 885 | $threads{$thread}{'size'} / 1024; #/ |
886 | last if ( ++$i == $count ); |
||
887 | } |
||
888 | print "\n", "=" x 76, "\n"; |
||
889 | } |
||
5 | PointedEar | 890 | |
22 | PointedEar | 891 | ################################## |
892 | ## Show top 10 cross-posted groups |
||
893 | ################################## |
||
894 | unless ( $skipSec{7} ) |
||
895 | { |
||
896 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
||
897 | if ( keys %crossposts < $topcrossposts ) |
||
898 | { |
||
899 | $count = keys %crossposts; |
||
7 | PointedEar | 900 | } |
22 | PointedEar | 901 | else |
902 | { |
||
903 | $count = $topcrossposts; |
||
904 | } |
||
905 | printf "%s\n", |
||
23 | PointedEar | 906 | centred( |
907 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
||
22 | PointedEar | 908 | print "=" x 76, "\n"; |
909 | my $i = 0; |
||
910 | foreach |
||
911 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
||
912 | { |
||
25 | PointedEar | 913 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
22 | PointedEar | 914 | $crossposts{$name}; |
915 | last if ( ++$i == $count ); |
||
916 | } |
||
917 | print "\n", "=" x 76, "\n"; |
||
918 | } |
||
5 | PointedEar | 919 | |
22 | PointedEar | 920 | ######################### |
921 | ## Show agents and counts |
||
922 | ######################### |
||
923 | unless ( $skipSec{8} ) |
||
924 | { |
||
925 | if ( keys %agents < $topagents ) |
||
926 | { |
||
927 | $count = keys %agents; |
||
928 | } |
||
929 | else |
||
930 | { |
||
931 | $count = $topagents; |
||
932 | } |
||
933 | printf "%s\n", |
||
934 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
||
935 | 76 ); |
||
936 | print "=" x 76, "\n"; |
||
937 | my $i = 0; |
||
938 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
||
939 | { |
||
25 | PointedEar | 940 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
22 | PointedEar | 941 | $agents{$agent}; |
942 | last if ( ++$i == $count ); |
||
943 | } |
||
944 | print "\n", "=" x 76, "\n"; |
||
945 | } |
||
5 | PointedEar | 946 | |
22 | PointedEar | 947 | ####################### |
948 | ## Show distinct agents |
||
949 | ####################### |
||
950 | unless ( $skipSec{9} ) |
||
7 | PointedEar | 951 | { |
22 | PointedEar | 952 | if ( keys %distinct_agent < $topagents ) |
953 | { |
||
954 | $count = keys %distinct_agent; |
||
7 | PointedEar | 955 | } |
22 | PointedEar | 956 | else |
957 | { |
||
958 | $count = $topagents; |
||
959 | } |
||
960 | printf "%s\n", |
||
961 | centred( |
||
48 | PointedEar | 962 | __x( |
963 | "Top {count} user agents by number of articles", |
||
964 | count => $topagents |
||
965 | ), |
||
23 | PointedEar | 966 | 76 |
967 | ); |
||
22 | PointedEar | 968 | print "=" x 76, "\n"; |
969 | my $i = 0; |
||
970 | foreach my $agent ( |
||
971 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
||
972 | keys %distinct_agent |
||
973 | ) |
||
974 | { |
||
25 | PointedEar | 975 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
22 | PointedEar | 976 | $distinct_agent{$agent}, |
977 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
||
978 | last if ( ++$i == $count ); |
||
979 | } |
||
980 | print "\n", "=" x 76, "\n"; |
||
7 | PointedEar | 981 | } |
5 | PointedEar | 982 | |
22 | PointedEar | 983 | ############################ |
984 | ## Show timezones and counts |
||
985 | ############################ |
||
986 | unless ( $skipSec{10} ) |
||
7 | PointedEar | 987 | { |
22 | PointedEar | 988 | if ( keys %tz < $toptz ) |
7 | PointedEar | 989 | { |
22 | PointedEar | 990 | $count = keys %tz; |
7 | PointedEar | 991 | } |
22 | PointedEar | 992 | else |
7 | PointedEar | 993 | { |
22 | PointedEar | 994 | $count = $toptz; |
7 | PointedEar | 995 | } |
23 | PointedEar | 996 | printf "%s\n", |
48 | PointedEar | 997 | centred( |
998 | __x( "Top {count} time zones by number of articles", count => $toptz ), |
||
999 | 76 ); |
||
22 | PointedEar | 1000 | print "=" x 76, "\n"; |
1001 | my $i = 0; |
||
1002 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
||
1003 | { |
||
25 | PointedEar | 1004 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
22 | PointedEar | 1005 | last if ( ++$i == $count ); |
1006 | } |
||
1007 | print "\n", "=" x 76, "\n"; |
||
7 | PointedEar | 1008 | } |
5 | PointedEar | 1009 | } |
1010 | |||
22 | PointedEar | 1011 | ## helper subs |
1012 | |||
13 | PointedEar | 1013 | ############################### |
1014 | ## Right pad a string with '.'s |
||
1015 | ############################### |
||
7 | PointedEar | 1016 | sub rpad |
1017 | { |
||
13 | PointedEar | 1018 | ## Get text to pad, length to pad, pad chr |
7 | PointedEar | 1019 | my ( $text, $pad_len, $pad_chr ) = @_; |
11 | PointedEar | 1020 | |
13 | PointedEar | 1021 | ## DEBUG |
22 | PointedEar | 1022 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
11 | PointedEar | 1023 | |
7 | PointedEar | 1024 | if ( length($text) > $pad_len ) |
1025 | { |
||
1026 | $text = substr( $text, 0, $pad_len ); |
||
1027 | } |
||
1028 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
||
1029 | return $padded; |
||
5 | PointedEar | 1030 | } |
1031 | |||
13 | PointedEar | 1032 | ################## |
1033 | ## Centre a string |
||
1034 | ################## |
||
7 | PointedEar | 1035 | sub centred |
1036 | { |
||
1037 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
||
1038 | my $pad_len = ( $width - length($text) ) / 2; #/ |
||
1039 | my $centred = " " x $pad_len . $text; |
||
1040 | return $centred; |
||
5 | PointedEar | 1041 | } |
1042 | |||
13 | PointedEar | 1043 | ########################### |
1044 | ## Put commas into a number |
||
1045 | ########################### |
||
7 | PointedEar | 1046 | sub commify |
1047 | { |
||
13 | PointedEar | 1048 | local $_ = shift; |
23 | PointedEar | 1049 | my $number = $_; |
1050 | $_ = int; # Chop non-integer part |
||
22 | PointedEar | 1051 | 1 while |
23 | PointedEar | 1052 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1053 | my $int_part = $_; |
||
1054 | my $real_part = ''; |
||
1055 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
||
1056 | { |
||
1057 | $real_part = $1; |
||
1058 | } |
||
1059 | return $int_part . $real_part; |
||
5 | PointedEar | 1060 | } |
1061 | |||
13 | PointedEar | 1062 | ################################################################ |
1063 | ## Returns a string with leading and trailing whitespace removed |
||
1064 | ################################################################ |
||
7 | PointedEar | 1065 | sub clean |
1066 | { |
||
1067 | my $dirty = shift; |
||
1068 | my $clean = $dirty; |
||
14 | PointedEar | 1069 | $clean =~ s/^\s+|\s+$//g; |
5 | PointedEar | 1070 | |
7 | PointedEar | 1071 | return $clean; |
5 | PointedEar | 1072 | } |
1073 | |||
7 | PointedEar | 1074 | sub usage |
1075 | { |
||
45 | PointedEar | 1076 | print __ "usage: newsstat.pl NEWS.GROUP", "\n"; |
7 | PointedEar | 1077 | exit 1; |
5 | PointedEar | 1078 | } |
1079 | |||
22 | PointedEar | 1080 | sub dmsg |
7 | PointedEar | 1081 | { |
22 | PointedEar | 1082 | print STDERR @_, "\n"; |
1083 | } |
||
1084 | |||
1085 | sub dmsg2 |
||
1086 | { |
||
1087 | my ( $level, @msg ) = @_; |
||
1088 | print STDERR @msg, "\n" if $level >= DEBUG; |
||
48 | PointedEar | 1089 | } |