Rev 25 | Rev 31 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 25 | Rev 30 | ||
---|---|---|---|
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
4 | require 5.004; |
4 | require 5.004; |
5 | 5 | ||
6 | #use diagnostics;
|
6 | #use diagnostics;
|
7 | use utf8; |
7 | use utf8; |
- | 8 | ||
- | 9 | ## NOTE:
|
|
- | 10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped
|
|
8 | use Encode; |
11 | # use utf8:all;
|
9 | 12 | ||
10 | use constant DEBUG => 0; |
13 | use constant DEBUG => 0; |
11 | 14 | ||
12 | ## newsstat.pl
|
15 | ## newsstat.pl
|
13 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
14 | ## Based on work by Garry Knight et al.
|
17 | ## Based on work by Garry Knight et al.
|
15 | ##
|
18 | ##
|
16 | ## This program is free software: you can redistribute it and/or modify
|
19 | ## This program is free software: you can redistribute it and/or modify
|
17 | ## it under the terms of the GNU General Public License as published by
|
20 | ## it under the terms of the GNU General Public License as published by
|
18 | ## the Free Software Foundation, either version 3 of the License, or
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
19 | ## (at your option) any later version.
|
22 | ## (at your option) any later version.
|
20 | ##
|
23 | ##
|
21 | ## This program is distributed in the hope that it will be useful,
|
24 | ## This program is distributed in the hope that it will be useful,
|
22 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
23 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
24 | ## GNU General Public License for more details.
|
27 | ## GNU General Public License for more details.
|
25 | ##
|
28 | ##
|
26 | ## You should have received a copy of the GNU General Public License
|
29 | ## You should have received a copy of the GNU General Public License
|
27 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
28 | 31 | ||
29 | ## Print out all text to STDOUT UTF-8 encoded
|
32 | ## Print out all text to STDOUT UTF-8 encoded
|
30 | binmode STDOUT, ':encoding(UTF-8)'; |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
31 | binmode STDERR, ':encoding(UTF-8)'; |
34 | binmode STDERR, ':encoding(UTF-8)'; |
32 | 35 | ||
33 | ## L10n
|
36 | ## L10n
|
34 | use locale ':not_characters'; |
37 | use locale ':not_characters'; |
35 | 38 | ||
36 | # setlocale( LC_MESSAGES, '' );
|
39 | # setlocale( LC_MESSAGES, '' );
|
37 | require Number::Format; |
40 | require Number::Format; |
38 | 41 | ||
39 | ## i18n
|
42 | ## i18n
|
40 | ## FIXME: Automatically include resolved '.' in @INC
|
43 | ## FIXME: Automatically include resolved '.' in @INC
|
41 | # print join "\n", @INC;
|
44 | # print join "\n", @INC;
|
42 | 45 | ||
43 | use Locale::TextDomain ('de.pointedears.newsstat'); |
46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
44 | use POSIX ('locale_h'); |
47 | use POSIX ('locale_h'); |
45 | use Locale::Messages qw (bind_textdomain_filter |
48 | use Locale::Messages qw (bind_textdomain_filter |
46 | bind_textdomain_codeset |
49 | bind_textdomain_codeset |
47 | turn_utf_8_on); |
50 | turn_utf_8_on); |
48 | 51 | ||
49 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
50 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
51 | 54 | ||
52 | require Mail::Message; |
55 | require Mail::Message; |
53 | require DateTime; |
56 | require DateTime; |
54 | require DateTime::Format::Mail; |
57 | require DateTime::Format::Mail; |
55 | 58 | ||
56 | # See comments in previous example
|
59 | # See comments in previous example
|
57 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
58 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
59 | 'decimal_point' }; |
62 | 'decimal_point' }; |
60 | 63 | ||
61 | # Apply defaults if values are missing
|
64 | # Apply defaults if values are missing
|
62 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
63 | $thousands_sep = ' ' unless $thousands_sep; |
66 | $thousands_sep = ' ' unless $thousands_sep; |
64 | 67 | ||
65 | # grouping and mon_grouping are packed lists
|
68 | # grouping and mon_grouping are packed lists
|
66 | # of small integers (characters) telling the
|
69 | # of small integers (characters) telling the
|
67 | # grouping (thousand_seps and mon_thousand_seps
|
70 | # grouping (thousand_seps and mon_thousand_seps
|
68 | # being the group dividers) of numbers and
|
71 | # being the group dividers) of numbers and
|
69 | # monetary quantities. The integers' meanings:
|
72 | # monetary quantities. The integers' meanings:
|
70 | # 255 means no more grouping, 0 means repeat
|
73 | # 255 means no more grouping, 0 means repeat
|
71 | # the previous grouping, 1-254 means use that
|
74 | # the previous grouping, 1-254 means use that
|
72 | # as the current grouping. Grouping goes from
|
75 | # as the current grouping. Grouping goes from
|
73 | # right to left (low to high digits). In the
|
76 | # right to left (low to high digits). In the
|
74 | # below we cheat slightly by never using anything
|
77 | # below we cheat slightly by never using anything
|
75 | # else than the first grouping (whatever that is).
|
78 | # else than the first grouping (whatever that is).
|
76 | my @grouping; |
79 | my @grouping; |
77 | if ($grouping) |
80 | if ($grouping) |
78 | {
|
81 | {
|
79 | @grouping = unpack( "C*", $grouping ); |
82 | @grouping = unpack( "C*", $grouping ); |
80 | }
|
83 | }
|
81 | else
|
84 | else
|
82 | {
|
85 | {
|
83 | @grouping = (3); |
86 | @grouping = (3); |
84 | }
|
87 | }
|
85 | 88 | ||
86 | ## FIXME: Why don't the defaults work already?
|
89 | ## FIXME: Why don't the defaults work already?
|
87 | my $formatter = new Number::Format( |
90 | my $formatter = new Number::Format( |
88 | -decimal_point => $decimal_point, |
91 | -decimal_point => $decimal_point, |
89 | -thousands_sep => $thousands_sep, |
92 | -thousands_sep => $thousands_sep, |
90 | 93 | ||
91 | # -grouping => $grouping[0]
|
94 | # -grouping => $grouping[0]
|
92 | ); |
95 | ); |
93 | 96 | ||
94 | ###################### USER CONFIGURATIONS ############################
|
97 | ###################### USER CONFIGURATIONS ############################
|
95 | 98 | ||
96 | ## The name of the group to do stats for
|
99 | ## The name of the group to do stats for
|
97 | my $newsgroup_name = $ARGV[0]; |
100 | my $newsgroup_name = $ARGV[0]; |
98 | $newsgroup_name or usage(); |
101 | $newsgroup_name or usage(); |
99 | 102 | ||
100 | ## Check for removal flags
|
103 | ## Check for removal flags
|
101 | my $ix; |
104 | my $ix; |
102 | my $j; |
105 | my $j; |
103 | my %skipSec; |
106 | my %skipSec; |
104 | my @skiplist; |
107 | my @skiplist; |
105 | my $args = @ARGV; |
108 | my $args = @ARGV; |
106 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
109 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
107 | {
|
110 | {
|
108 | $j = $ix + 1; |
111 | $j = $ix + 1; |
109 | if ( $ARGV[$ix] eq "-x" ) |
112 | if ( $ARGV[$ix] eq "-x" ) |
110 | {
|
113 | {
|
111 | @skiplist = split( ",", $ARGV[$j] ); |
114 | @skiplist = split( ",", $ARGV[$j] ); |
112 | }
|
115 | }
|
113 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
116 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
114 | {
|
117 | {
|
115 | @skiplist = split( ",", $1 ); |
118 | @skiplist = split( ",", $1 ); |
116 | }
|
119 | }
|
117 | }
|
120 | }
|
118 | foreach (@skiplist) |
121 | foreach (@skiplist) |
119 | {
|
122 | {
|
120 | $skipSec{$_} = 1; |
123 | $skipSec{$_} = 1; |
121 | }
|
124 | }
|
122 | 125 | ||
123 | ## Leafnode users will want /var/spool/news for this variable.
|
126 | ## Leafnode users will want /var/spool/news for this variable.
|
124 | my $news = "/var/spool/news/"; |
127 | my $news = "/var/spool/news/"; |
125 | 128 | ||
126 | ## Number of top or bottom posters to show
|
129 | ## Number of top or bottom posters to show
|
127 | my $topposters = 20; |
130 | my $topposters = 20; |
128 | 131 | ||
129 | ## Number of threads we want to know about
|
132 | ## Number of threads we want to know about
|
130 | my $topthreads = 20; |
133 | my $topthreads = 20; |
131 | 134 | ||
132 | ## Number of cross-posted threads to show
|
135 | ## Number of cross-posted threads to show
|
133 | my $topcrossposts = 10; |
136 | my $topcrossposts = 10; |
134 | 137 | ||
135 | ## Number of agents we list
|
138 | ## Number of agents we list
|
136 | my $topagents = 10; |
139 | my $topagents = 10; |
137 | 140 | ||
138 | ## Number of time zones to show
|
141 | ## Number of time zones to show
|
139 | my $toptz = 10; |
142 | my $toptz = 10; |
140 | 143 | ||
141 | ###################### DATA STRUCTURES ######################
|
144 | ###################### DATA STRUCTURES ######################
|
142 | my $group = $newsgroup_name; |
145 | my $group = $newsgroup_name; |
143 | $group =~ s!\.!/!g; |
146 | $group =~ s!\.!/!g; |
144 | my %data; # name, count, agent, total, orig, quoted |
147 | my %data; # name, count, agent, total, orig, quoted |
145 | my $totsize = 0; # holds total sizes of all files |
148 | my $totsize = 0; # holds total sizes of all files |
146 | my %crossposts; # group, count |
149 | my %crossposts; # group, count |
147 | my %threads; # subject, count |
150 | my %threads; # subject, count |
148 | my $replies = 0; # total no. of replies |
151 | my $replies = 0; # total no. of replies |
149 | my $origposts = 0; # total no. of original posts |
152 | my $origposts = 0; # total no. of original posts |
150 | my %tz; # timezones by count |
153 | my %tz; # timezones by count |
151 | my $earliest; # earliest article we have found |
154 | my $earliest; # earliest article we have found |
152 | my $latest; # latest article we have found |
155 | my $latest; # latest article we have found |
153 | my $totheader = 0; # total size of header material |
156 | my $totheader = 0; # total size of header material |
154 | my $totbody = 0; # total size of body material |
157 | my $totbody = 0; # total size of body material |
155 | my $totsig = 0; # total size of sig material |
158 | my $totsig = 0; # total size of sig material |
156 | my $totquoted = 0; # total size of quoted material |
159 | my $totquoted = 0; # total size of quoted material |
157 | my $totorig = 0; # total size of original material |
160 | my $totorig = 0; # total size of original material |
158 | my $totalposts; # total no. of posts considered |
161 | my $totalposts; # total no. of posts considered |
159 | my %distinct_agent; |
162 | my %distinct_agent; |
160 | 163 | ||
161 | ## Used to hold counts of User Agents used
|
164 | ## Used to hold counts of User Agents used
|
162 | my %agents = ( |
165 | my %agents = ( |
163 | "Compuserver" => 0, |
166 | "Compuserver" => 0, |
164 | "Foorum" => 0, |
167 | "Foorum" => 0, |
165 | "Forte Agent" => 0, |
168 | "Forte Agent" => 0, |
166 | "Forte Free Agent" => 0, |
169 | "Forte Free Agent" => 0, |
167 | "Gnus" => 0, |
170 | "Gnus" => 0, |
168 | "KNode" => 0, |
171 | "KNode" => 0, |
169 | "MacSOUP" => 0, |
172 | "MacSOUP" => 0, |
170 | "MT-NewsWatcher" => 0, |
173 | "MT-NewsWatcher" => 0, |
171 | "MicroPlanet Gravity" => 0, |
174 | "MicroPlanet Gravity" => 0, |
172 | "Microsoft Outlook Express" => 0, |
175 | "Microsoft Outlook Express" => 0, |
173 | "Microsoft Windows Mail" => 0, |
176 | "Microsoft Windows Mail" => 0, |
174 | "Mozilla" => 0, |
177 | "Mozilla" => 0, |
175 | "News Rover" => 0, |
178 | "News Rover" => 0, |
176 | "NN" => 0, |
179 | "NN" => 0, |
177 | "Pan" => 0, |
180 | "Pan" => 0, |
178 | "rn" => 0, |
181 | "rn" => 0, |
179 | "slrn" => 0, |
182 | "slrn" => 0, |
180 | "Sylpheed" => 0, |
183 | "Sylpheed" => 0, |
181 | "tin" => 0, |
184 | "tin" => 0, |
182 | "VSoup" => 0, |
185 | "VSoup" => 0, |
183 | "WebTV" => 0, |
186 | "WebTV" => 0, |
184 | "Xnews" => 0, |
187 | "Xnews" => 0, |
185 | ); |
188 | ); |
186 | 189 | ||
187 | my $datetime_parser = DateTime::Format::Mail->new(); |
190 | my $datetime_parser = DateTime::Format::Mail->new(); |
188 | $datetime_parser->loose(); |
191 | $datetime_parser->loose(); |
189 | 192 | ||
190 | my $today = DateTime->today( time_zone => 'UTC' ); |
193 | my $today = DateTime->today( time_zone => 'UTC' ); |
191 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
194 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
192 | my $start = int $prev_month->strftime('%s'); |
195 | my $start = int $prev_month->strftime('%s'); |
193 | my $numdays = int DateTime->last_day_of_month( |
196 | my $numdays = int DateTime->last_day_of_month( |
194 | year => $prev_month->year(), |
197 | year => $prev_month->year(), |
195 | month => $prev_month->month(), |
198 | month => $prev_month->month(), |
196 | time_zone => $prev_month->time_zone(), |
199 | time_zone => $prev_month->time_zone(), |
197 | )->day(); |
200 | )->day(); |
198 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
201 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
199 | 202 | ||
200 | dmsg( $start, " to ", $end ) if DEBUG; |
203 | dmsg( $start, " to ", $end ) if DEBUG; |
201 | 204 | ||
202 | chdir("$news$group") |
205 | chdir("$news$group") |
203 | or die __x( |
206 | or die __x( |
204 | "Can't cd to {newsgroup}: {error}\n", |
207 | "Can't cd to {newsgroup}: {error}\n", |
205 | newsgroup => "$news$group", |
208 | newsgroup => "$news$group", |
206 | error => $! |
209 | error => $! |
207 | ); |
210 | ); |
208 | opendir( DIR, "." ) |
211 | opendir( DIR, "." ) |
209 | or die __x( |
212 | or die __x( |
210 | "Can't open {newsgroup}: {error}\n", |
213 | "Can't open {newsgroup}: {error}\n", |
211 | newsgroup => "$news$group", |
214 | newsgroup => "$news$group", |
212 | error => $! |
215 | error => $! |
213 | ); |
216 | ); |
214 | 217 | ||
215 | while ( defined( my $filename = readdir(DIR) ) ) |
218 | while ( defined( my $filename = readdir(DIR) ) ) |
216 | {
|
219 | {
|
217 | next unless -f $filename; # only want real files |
220 | next unless -f $filename; # only want real files |
218 | next if ( $filename eq ".overview" ); # real articles only |
221 | next if ( $filename eq ".overview" ); # real articles only |
219 | 222 | ||
220 | get_article($filename); # read in the article |
223 | get_article($filename); # read in the article |
221 | }
|
224 | }
|
222 | closedir(DIR); # finished with the directory |
225 | closedir(DIR); # finished with the directory |
223 | 226 | ||
224 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
227 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
225 | 228 | ||
226 | ## Post-processing
|
229 | ## Post-processing
|
227 | count_agents(); # count agents, collapsing versions |
230 | count_agents(); # count agents, collapsing versions |
228 | fix_percent(); |
231 | fix_percent(); |
229 | 232 | ||
230 | write_data(); |
233 | write_data(); |
231 | display_results(); |
234 | display_results(); |
232 | 235 | ||
233 | ########################################
|
236 | ########################################
|
234 | ## Get current article's header and body
|
237 | ## Get current article's header and body
|
235 | ########################################
|
238 | ########################################
|
236 | sub get_article
|
239 | sub get_article
|
237 | {
|
240 | {
|
238 | my $filename = shift; |
241 | my $filename = shift; |
239 | 242 | ||
240 | open( my $FILE, '<', $filename ) |
243 | open( my $FILE, '<', $filename ) |
241 | or
|
244 | or
|
242 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
245 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
243 | my $msg = Mail::Message->read($FILE); |
246 | my $msg = Mail::Message->read($FILE); |
244 | my $timestamp = $msg->timestamp(); |
247 | my $timestamp = $msg->timestamp(); |
245 | my $date = $msg->study('Date'); |
248 | my $date = $msg->study('Date'); |
246 | 249 | ||
247 | ## Disregard article if timestamp is not in range
|
250 | ## Disregard article if timestamp is not in range
|
248 | dmsg($timestamp) if DEBUG; |
251 | dmsg($timestamp) if DEBUG; |
249 | if ( $timestamp < $start or $timestamp >= $end ) |
252 | if ( $timestamp < $start or $timestamp >= $end ) |
250 | {
|
253 | {
|
251 | dmsg("Posting on $date ignored.") if DEBUG; |
254 | dmsg("Posting on $date ignored.") if DEBUG; |
252 | return; |
255 | return; |
253 | }
|
256 | }
|
254 | 257 | ||
255 | $totalposts++; # bump count of articles considered |
258 | $totalposts++; # bump count of articles considered |
256 | 259 | ||
257 | ## DEBUG
|
260 | ## DEBUG
|
258 | dmsg($date) if DEBUG; |
261 | dmsg($date) if DEBUG; |
259 | 262 | ||
260 | ## get stats about the file itself
|
263 | ## get stats about the file itself
|
261 | my $filesize = -s $filename; # get total size of file |
264 | my $filesize = -s $filename; # get total size of file |
262 | $totsize += $filesize; # bump total sizes of all files |
265 | $totsize += $filesize; # bump total sizes of all files |
263 | 266 | ||
264 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
267 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
265 | {
|
268 | {
|
266 | $earliest = $timestamp; |
269 | $earliest = $timestamp; |
267 | }
|
270 | }
|
268 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
271 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
269 | {
|
272 | {
|
270 | $latest = $timestamp; |
273 | $latest = $timestamp; |
271 | }
|
274 | }
|
272 | 275 | ||
273 | #print "timestamp: $timestamp\n";
|
276 | #print "timestamp: $timestamp\n";
|
274 | 277 | ||
275 | ## count header size
|
278 | ## count header size
|
276 | $totheader += $msg->head()->size(); |
279 | $totheader += $msg->head()->size(); |
277 | 280 | ||
278 | ## get the poster's name (MIME-decoded, in UTF-8)
|
281 | ## get the poster's name (MIME-decoded, in UTF-8)
|
279 | my $poster = $msg->study('From'); |
282 | my $poster = $msg->study('From'); |
280 | if ( defined $poster ) |
283 | if ( defined $poster ) |
281 | {
|
284 | {
|
282 | ## Convert old to new format
|
285 | ## Convert old to new format
|
283 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
286 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
284 | 287 | ||
285 | ## Collapse whitespace
|
288 | ## Collapse whitespace
|
286 | $poster =~ s/\s+/ /g; |
289 | $poster =~ s/\s+/ /g; |
287 | 290 | ||
288 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
291 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
289 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
292 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
290 | 293 | ||
291 | ## DEBUG
|
294 | ## DEBUG
|
292 | dmsg($poster) if DEBUG; |
295 | dmsg($poster) if DEBUG; |
293 | 296 | ||
294 | ## seen this one before?
|
297 | ## seen this one before?
|
295 | if ( !defined( $data{$poster} ) ) |
298 | if ( !defined( $data{$poster} ) ) |
296 | {
|
299 | {
|
297 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
300 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
298 | $data{$poster}{'orig'} = 0; |
301 | $data{$poster}{'orig'} = 0; |
299 | $data{$poster}{'quoted'} = 0; |
302 | $data{$poster}{'quoted'} = 0; |
300 | }
|
303 | }
|
301 | $data{$poster}{'count'}++; # bump count for this poster |
304 | $data{$poster}{'count'}++; # bump count for this poster |
302 | $data{$poster}{'size'} += $filesize; # total size of file |
305 | $data{$poster}{'size'} += $filesize; # total size of file |
303 | 306 | ||
304 | ## The User-Agent and/or X-Newsreader fields
|
307 | ## The User-Agent and/or X-Newsreader fields
|
305 | ## for User-Agent by poster
|
308 | ## for User-Agent by poster
|
306 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
309 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
307 | if ( defined $ua ) |
310 | if ( defined $ua ) |
308 | {
|
311 | {
|
309 | $data{$poster}{'agent'} = $ua; |
312 | $data{$poster}{'agent'} = $ua; |
310 | 313 | ||
311 | ## DEBUG
|
314 | ## DEBUG
|
312 | dmsg($ua) if DEBUG; |
315 | dmsg($ua) if DEBUG; |
313 | }
|
316 | }
|
314 | 317 | ||
315 | ## The User Agent for User-Agent by number of posts
|
318 | ## The User Agent for User-Agent by number of posts
|
316 | get_agent($msg); |
319 | get_agent($msg); |
317 | 320 | ||
318 | ## Get all cross-posted newsgroups
|
321 | ## Get all cross-posted newsgroups
|
319 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
322 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
320 | {
|
323 | {
|
321 | $crossposts{$_}++; # bump count for each |
324 | $crossposts{$_}++; # bump count for each |
322 | }
|
325 | }
|
323 | 326 | ||
324 | ## Get threads
|
327 | ## Get threads
|
325 | my $thread = $msg->study('Subject'); |
328 | my $thread = $msg->study('Subject'); |
326 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
329 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
327 | $thread =~ s/\s+/ /g; # collapse whitespace |
330 | $thread =~ s/\s+/ /g; # collapse whitespace |
328 | $threads{$thread}{'count'}++; # bump count of this subject |
331 | $threads{$thread}{'count'}++; # bump count of this subject |
329 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
332 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
330 | 333 | ||
331 | ## Is this an original post or a reply?
|
334 | ## Is this an original post or a reply?
|
332 | if ( defined $msg->study('References') ) |
335 | if ( defined $msg->study('References') ) |
333 | {
|
336 | {
|
334 | $replies++; |
337 | $replies++; |
335 | }
|
338 | }
|
336 | else
|
339 | else
|
337 | {
|
340 | {
|
338 | $origposts++; |
341 | $origposts++; |
339 | }
|
342 | }
|
340 | 343 | ||
341 | ## Get the time zone
|
344 | ## Get the time zone
|
342 | my $datetime = $datetime_parser->parse_datetime($date); |
345 | my $datetime = $datetime_parser->parse_datetime($date); |
343 | my $tz = $datetime->strftime('%z'); |
346 | my $tz = $datetime->strftime('%z'); |
344 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
347 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
345 | $tz{$tz}++; |
348 | $tz{$tz}++; |
346 | 349 | ||
347 | ## DEBUG
|
350 | ## DEBUG
|
348 | dmsg($tz) if DEBUG; |
351 | dmsg($tz) if DEBUG; |
349 | 352 | ||
350 | #### Now analyse the body text ####
|
353 | #### Now analyse the body text ####
|
351 | my $body = $msg->body(); |
354 | my $body = $msg->body(); |
352 | 355 | ||
353 | my $insig = 0; |
356 | my $insig = 0; |
354 | my @body = $body->lines; |
357 | my @body = $body->lines; |
355 | for (@body) |
358 | for (@body) |
356 | {
|
359 | {
|
357 | $totbody += length($_); # bump total body size |
360 | $totbody += length($_); # bump total body size |
358 | next if (m{^$>}o); # don't count blank lines in body |
361 | next if (m{^$>}o); # don't count blank lines in body |
359 | if ( $insig == 1 ) |
362 | if ( $insig == 1 ) |
360 | {
|
363 | {
|
361 | 364 | ||
362 | # bump total sig size
|
365 | # bump total sig size
|
363 | $totsig += length($_); |
366 | $totsig += length($_); |
364 | }
|
367 | }
|
365 | ## are we in a quote line?
|
368 | ## are we in a quote line?
|
366 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
367 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
370 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
368 | {
|
371 | {
|
369 | ## bump count of quoted chrs
|
372 | ## bump count of quoted chrs
|
370 | $data{$poster}{'quoted'} += length($_); |
373 | $data{$poster}{'quoted'} += length($_); |
371 | $totquoted += length($_); |
374 | $totquoted += length($_); |
372 | }
|
375 | }
|
373 | elsif (/^-- $/) |
376 | elsif (/^-- $/) |
374 | {
|
377 | {
|
375 | $insig = 1; |
378 | $insig = 1; |
376 | }
|
379 | }
|
377 | else
|
380 | else
|
378 | {
|
381 | {
|
379 | ## We must be processing an original line
|
382 | ## We must be processing an original line
|
380 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
383 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
381 | $totorig += length($_); |
384 | $totorig += length($_); |
382 | }
|
385 | }
|
383 | }
|
386 | }
|
384 | 387 | ||
385 | # end for (@body)
|
388 | # end for (@body)
|
386 | }
|
389 | }
|
387 | 390 | ||
388 | close($FILE); |
391 | close($FILE); |
389 | }
|
392 | }
|
390 | 393 | ||
391 | sub get_agent
|
394 | sub get_agent
|
392 | {
|
395 | {
|
393 | my $msg = shift; |
396 | my $msg = shift; |
394 | 397 | ||
395 | my $ua = |
398 | my $ua = |
396 | $msg->study('User-Agent') |
399 | $msg->study('User-Agent') |
397 | or $msg->study('X-Newsreader') |
400 | or $msg->study('X-Newsreader') |
398 | or $msg->study('X-Mailer'); |
401 | or $msg->study('X-Mailer'); |
399 | if ( not defined $ua ) |
402 | if ( not defined $ua ) |
400 | {
|
403 | {
|
401 | my $org = $msg->study('Organization'); |
404 | my $org = $msg->study('Organization'); |
402 | if ( defined $org |
405 | if ( defined $org |
403 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
406 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
404 | {
|
407 | {
|
405 | $ua = $org; |
408 | $ua = $org; |
406 | }
|
409 | }
|
407 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
410 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
408 | {
|
411 | {
|
409 | $ua = "Pine"; |
412 | $ua = "Pine"; |
410 | }
|
413 | }
|
411 | }
|
414 | }
|
412 | 415 | ||
413 | ## Hopefully found UA, else set to unknown
|
416 | ## Hopefully found UA, else set to unknown
|
414 | if ( not defined $ua ) |
417 | if ( not defined $ua ) |
415 | {
|
418 | {
|
416 | $ua = __ "unknown"; |
419 | $ua = __ "unknown"; |
417 | }
|
420 | }
|
418 | 421 | ||
419 | $ua = clean($ua); |
422 | $ua = clean($ua); |
420 | 423 | ||
421 | my $raw = $ua; |
424 | my $raw = $ua; |
422 | my $agent = $raw; |
425 | my $agent = $raw; |
423 | 426 | ||
424 | ## strip http
|
427 | ## strip http
|
425 | if ( $raw =~ /.*http.*/ ) |
428 | if ( $raw =~ /.*http.*/ ) |
426 | {
|
429 | {
|
427 | $raw =~ s!posted via!!i; |
430 | $raw =~ s!posted via!!i; |
428 | $raw =~ s!http://!!g; |
431 | $raw =~ s!http://!!g; |
429 | $raw =~ s!/!!g; |
432 | $raw =~ s!/!!g; |
430 | $raw =~ s! !!g; |
433 | $raw =~ s! !!g; |
431 | }
|
434 | }
|
432 | 435 | ||
433 | ## Fix Outlook from Mac
|
436 | ## Fix Outlook from Mac
|
434 | if ( $raw =~ /^microsoft/i ) |
437 | if ( $raw =~ /^microsoft/i ) |
435 | {
|
438 | {
|
436 | $raw =~ s/-/ /g; |
439 | $raw =~ s/-/ /g; |
437 | }
|
440 | }
|
438 | 441 | ||
439 | ## Pick out the popular agents
|
442 | ## Pick out the popular agents
|
440 | if ( $raw =~ /(outlook express)/i |
443 | if ( $raw =~ /(outlook express)/i |
441 | || $raw =~ /(windows mail)/i |
444 | || $raw =~ /(windows mail)/i |
442 | || $raw =~ /(microplanet gravity)/i |
445 | || $raw =~ /(microplanet gravity)/i |
443 | || $raw =~ /(news rover)/i |
446 | || $raw =~ /(news rover)/i |
444 | || $raw =~ /(forte agent)/i |
447 | || $raw =~ /(forte agent)/i |
445 | || $raw =~ /(forte free agent)/i ) |
448 | || $raw =~ /(forte free agent)/i ) |
446 | {
|
449 | {
|
447 | $agent = $1; |
450 | $agent = $1; |
448 | }
|
451 | }
|
449 | elsif ( |
452 | elsif ( |
450 | $raw =~ /^( |
453 | $raw =~ /^( |
451 | pan |
454 | pan |
452 | |sylpheed
|
455 | |sylpheed
|
453 | |slrn
|
456 | |slrn
|
454 | |mozilla
|
457 | |mozilla
|
455 | |knode
|
458 | |knode
|
456 | |tin
|
459 | |tin
|
457 | |hamster
|
460 | |hamster
|
458 | |xrn
|
461 | |xrn
|
459 | |xnews
|
462 | |xnews
|
460 | |aol
|
463 | |aol
|
461 | |gnus
|
464 | |gnus
|
462 | |krn
|
465 | |krn
|
463 | |macsoup
|
466 | |macsoup
|
464 | |messenger
|
467 | |messenger
|
465 | |openxp
|
468 | |openxp
|
466 | |pine
|
469 | |pine
|
467 | |thoth
|
470 | |thoth
|
468 | |turnpike
|
471 | |turnpike
|
469 | |winvn
|
472 | |winvn
|
470 | |vsoup
|
473 | |vsoup
|
471 | |google
|
474 | |google
|
472 | |supernews
|
475 | |supernews
|
473 | |nn
|
476 | |nn
|
474 | |rn
|
477 | |rn
|
475 | |007
|
478 | |007
|
476 | |webtv
|
479 | |webtv
|
477 | |compuserve
|
480 | |compuserve
|
478 | )/ix |
481 | )/ix |
479 | )
|
482 | )
|
480 | {
|
483 | {
|
481 | $agent = $1; |
484 | $agent = $1; |
482 | }
|
485 | }
|
483 | else
|
486 | else
|
484 | {
|
487 | {
|
485 | ## Clean up unknown agents
|
488 | ## Clean up unknown agents
|
486 | if ( $raw =~ m!^(.*?)/! ) |
489 | if ( $raw =~ m!^(.*?)/! ) |
487 | {
|
490 | {
|
488 | $agent = $1; |
491 | $agent = $1; |
489 | }
|
492 | }
|
490 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
493 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
491 | {
|
494 | {
|
492 | $agent = $1; |
495 | $agent = $1; |
493 | }
|
496 | }
|
494 | }
|
497 | }
|
495 | 498 | ||
496 | $distinct_agent{$agent}++; |
499 | $distinct_agent{$agent}++; |
497 | return $agent; |
500 | return $agent; |
498 | }
|
501 | }
|
499 | ## get_agent
|
502 | ## get_agent
|
500 | 503 | ||
501 | #########################################
|
504 | #########################################
|
502 | ## Count the User-Agents used, collapsing
|
505 | ## Count the User-Agents used, collapsing
|
503 | ## different versions into one per agent.
|
506 | ## different versions into one per agent.
|
504 | #########################################
|
507 | #########################################
|
505 | sub count_agents
|
508 | sub count_agents
|
506 | {
|
509 | {
|
507 | POSTER:
|
510 | POSTER:
|
508 | foreach my $poster ( keys %data ) |
511 | foreach my $poster ( keys %data ) |
509 | {
|
512 | {
|
510 | foreach my $agent_name ( keys %distinct_agent ) |
513 | foreach my $agent_name ( keys %distinct_agent ) |
511 | { # check against known ones |
514 | { # check against known ones |
512 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
515 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
513 | {
|
516 | {
|
514 | $agents{$agent_name}++; |
517 | $agents{$agent_name}++; |
515 | next POSTER; |
518 | next POSTER; |
516 | }
|
519 | }
|
517 | }
|
520 | }
|
518 | $agents{ $data{$poster}{'agent'} }++; |
521 | $agents{ $data{$poster}{'agent'} }++; |
519 | }
|
522 | }
|
520 | } # count_agents |
523 | } # count_agents |
521 | 524 | ||
522 | #############################################
|
525 | #############################################
|
523 | ## Set orig/total percentages for all posters
|
526 | ## Set orig/total percentages for all posters
|
524 | #############################################
|
527 | #############################################
|
525 | sub fix_percent
|
528 | sub fix_percent
|
526 | {
|
529 | {
|
527 | foreach my $poster ( keys %data ) |
530 | foreach my $poster ( keys %data ) |
528 | {
|
531 | {
|
529 | my $percent = 100; |
532 | my $percent = 100; |
530 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
533 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
531 | {
|
534 | {
|
532 | $percent = |
535 | $percent = |
533 | $data{$poster}{'orig'} * 100 / |
536 | $data{$poster}{'orig'} * 100 / |
534 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
537 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
535 | }
|
538 | }
|
536 | elsif ( $data{$poster}{'orig'} == 0 ) |
539 | elsif ( $data{$poster}{'orig'} == 0 ) |
537 | {
|
540 | {
|
538 | $percent = 0; |
541 | $percent = 0; |
539 | }
|
542 | }
|
540 | $data{$poster}{'percent'} = $percent; |
543 | $data{$poster}{'percent'} = $percent; |
541 | }
|
544 | }
|
542 | }
|
545 | }
|
543 | ## fix_percent
|
546 | ## fix_percent
|
544 | 547 | ||
545 | ##################################
|
548 | ##################################
|
546 | ## Write data structures to a file
|
549 | ## Write data structures to a file
|
547 | ##################################
|
550 | ##################################
|
548 | sub write_data
|
551 | sub write_data
|
549 | {
|
552 | {
|
550 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
553 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
551 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
554 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
552 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
555 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
553 | print $OUTF |
556 | print $OUTF |
554 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
557 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
555 | foreach my $name ( keys %data ) |
558 | foreach my $name ( keys %data ) |
556 | {
|
559 | {
|
557 | print $OUTF |
560 | print $OUTF |
558 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
561 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
559 | }
|
562 | }
|
560 | print $OUTF |
563 | print $OUTF |
561 | "============================================================================\n"; |
564 | "============================================================================\n"; |
562 | print $OUTF "Thread subjects\n"; |
565 | print $OUTF "Thread subjects\n"; |
563 | print $OUTF |
566 | print $OUTF |
564 | "----------------------------------------------------------------------------\n"; |
567 | "----------------------------------------------------------------------------\n"; |
565 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
568 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
566 | {
|
569 | {
|
567 | print $OUTF |
570 | print $OUTF |
568 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
571 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
569 | }
|
572 | }
|
570 | print $OUTF |
573 | print $OUTF |
571 | "============================================================================\n"; |
574 | "============================================================================\n"; |
572 | print $OUTF "Cross-posts\n"; |
575 | print $OUTF "Cross-posts\n"; |
573 | print $OUTF |
576 | print $OUTF |
574 | "----------------------------------------------------------------------------\n"; |
577 | "----------------------------------------------------------------------------\n"; |
575 | foreach my $name ( sort keys %crossposts ) |
578 | foreach my $name ( sort keys %crossposts ) |
576 | {
|
579 | {
|
577 | print $OUTF "$name : $crossposts{$name}\n"; |
580 | print $OUTF "$name : $crossposts{$name}\n"; |
578 | }
|
581 | }
|
579 | print $OUTF |
582 | print $OUTF |
580 | "============================================================================\n"; |
583 | "============================================================================\n"; |
581 | print $OUTF "User agents\n"; |
584 | print $OUTF "User agents\n"; |
582 | print $OUTF |
585 | print $OUTF |
583 | "----------------------------------------------------------------------------\n"; |
586 | "----------------------------------------------------------------------------\n"; |
584 | foreach my $name ( sort keys %agents ) |
587 | foreach my $name ( sort keys %agents ) |
585 | {
|
588 | {
|
586 | print $OUTF "$name : $agents{$name}\n"; |
589 | print $OUTF "$name : $agents{$name}\n"; |
587 | }
|
590 | }
|
588 | print $OUTF |
591 | print $OUTF |
589 | "============================================================================\n"; |
592 | "============================================================================\n"; |
590 | print $OUTF "Time zones\n"; |
593 | print $OUTF "Time zones\n"; |
591 | print $OUTF |
594 | print $OUTF |
592 | "----------------------------------------------------------------------------\n"; |
595 | "----------------------------------------------------------------------------\n"; |
593 | foreach my $name ( sort keys %tz ) |
596 | foreach my $name ( sort keys %tz ) |
594 | {
|
597 | {
|
595 | print $OUTF "$name : $tz{$name}\n"; |
598 | print $OUTF "$name : $tz{$name}\n"; |
596 | }
|
599 | }
|
597 | close $OUTF; |
600 | close $OUTF; |
598 | } # write_data |
601 | } # write_data |
599 | 602 | ||
600 | sub display_results
|
603 | sub display_results
|
601 | {
|
604 | {
|
602 | #################### DISPLAY RESULTS #####################
|
605 | #################### DISPLAY RESULTS #####################
|
603 | println( "=" x 76 ); |
606 | println( "=" x 76 ); |
604 | printf "%s\n", |
607 | printf "%s\n", |
605 | centred(
|
608 | centred(
|
606 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
609 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
607 | 76 ); |
610 | 76 ); |
608 | println( "=" x 76 ); |
611 | println( "=" x 76 ); |
609 | printf "%s\n", |
612 | printf "%s\n", |
610 | centred(
|
613 | centred(
|
611 | __(
|
614 | __(
|
612 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
615 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
613 | ), |
616 | ), |
614 | 76
|
617 | 76
|
615 | ); |
618 | ); |
616 | print "\n\n"; |
619 | print "\n\n"; |
617 | printf __"Total posts considered: %s over %d days\n", |
620 | printf __"Total posts considered: %s over %d days\n", |
618 | $formatter->format_number($totalposts), |
621 | $formatter->format_number($totalposts), |
619 | $formatter->format_number($numdays); |
622 | $formatter->format_number($numdays); |
620 | my $time_locale = setlocale(LC_TIME); |
623 | my $time_locale = setlocale(LC_TIME); |
621 | my $earliest_datetime = DateTime->from_epoch( |
624 | my $earliest_datetime = DateTime->from_epoch( |
622 | epoch => $earliest, |
625 | epoch => $earliest, |
623 | locale => $time_locale, |
626 | locale => $time_locale, |
624 | time_zone => 'UTC', |
627 | time_zone => 'UTC', |
625 | ); |
628 | ); |
626 | my $latest_datetime = DateTime->from_epoch( |
629 | my $latest_datetime = DateTime->from_epoch( |
627 | epoch => $latest, |
630 | epoch => $latest, |
628 | locale => $time_locale, |
631 | locale => $time_locale, |
629 | time_zone => 'UTC', |
632 | time_zone => 'UTC', |
630 | ); |
633 | ); |
631 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
634 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
632 | printf __"Earliest article: %s\n", |
635 | printf __"Earliest article: %s\n", |
633 | $earliest_datetime->strftime($datetime_format); |
636 | $earliest_datetime->strftime($datetime_format); |
634 | printf __"Latest article: %s\n", |
637 | printf __"Latest article: %s\n", |
635 | $latest_datetime->strftime($datetime_format); |
638 | $latest_datetime->strftime($datetime_format); |
636 | printf __"Original articles: %s; replies: %s\n", |
639 | printf __"Original articles: %s; replies: %s\n", |
637 | $formatter->format_number($origposts), |
640 | $formatter->format_number($origposts), |
638 | $formatter->format_number($replies); |
641 | $formatter->format_number($replies); |
639 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
642 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
640 | $formatter->format_number($totsize), |
643 | $formatter->format_number($totsize), |
641 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
644 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
642 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
645 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
643 | $formatter->format_number( int( $totalposts / $numdays ) ), |
646 | $formatter->format_number( int( $totalposts / $numdays ) ), |
644 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
647 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
645 | $formatter->format_number( int( $totsize / $totalposts ) ); |
648 | $formatter->format_number( int( $totsize / $totalposts ) ); |
646 | 649 | ||
647 | my $count = keys %data; |
650 | my $count = keys %data; |
648 | printf __"Total headers: %s; bodies: %s\n", |
651 | printf __"Total headers: %s; bodies: %s\n", |
649 | $formatter->format_bytes( |
652 | $formatter->format_bytes( |
650 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
653 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
651 | ), |
654 | ), |
652 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
655 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
653 | printf __ |
656 | printf __ |
654 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
657 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
655 | $formatter->format_bytes( |
658 | $formatter->format_bytes( |
656 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
659 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
657 | ), |
660 | ), |
658 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
661 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
659 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
662 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
660 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
663 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
661 | printf __"Total number of posters: %s, average %s per poster\n", |
664 | printf __"Total number of posters: %s, average %s per poster\n", |
662 | $formatter->format_number($count), |
665 | $formatter->format_number($count), |
663 | $formatter->format_bytes( $totsize / $count, |
666 | $formatter->format_bytes( $totsize / $count, |
664 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
667 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
665 | $count = keys %threads; |
668 | $count = keys %threads; |
666 | printf __"Total number of threads: %s, average %s per thread\n", |
669 | printf __"Total number of threads: %s, average %s per thread\n", |
667 | $formatter->format_number($count), |
670 | $formatter->format_number($count), |
668 | $formatter->format_bytes( $totsize / $count, |
671 | $formatter->format_bytes( $totsize / $count, |
669 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
672 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
670 | printf __"Total number of user agents: %d\n", |
673 | printf __"Total number of user agents: %d\n", |
671 | $formatter->format_number( scalar keys %agents ); |
674 | $formatter->format_number( scalar keys %agents ); |
672 | print "\n", "=" x 76, "\n" ; |
675 | print "\n", "=" x 76, "\n" ; |
673 | ########################################
|
676 | ########################################
|
674 | ## Show posters by article count Sec 1;
|
677 | ## Show posters by article count Sec 1;
|
675 | ########################################
|
678 | ########################################
|
676 | unless ( $skipSec{1} ) |
679 | unless ( $skipSec{1} ) |
677 | {
|
680 | {
|
678 | if ( keys %data < $topposters ) |
681 | if ( keys %data < $topposters ) |
679 | {
|
682 | {
|
680 | $count = keys %data; |
683 | $count = keys %data; |
681 | }
|
684 | }
|
682 | else
|
685 | else
|
683 | {
|
686 | {
|
684 | $count = $topposters; |
687 | $count = $topposters; |
685 | }
|
688 | }
|
686 | printf "%s\n", |
689 | printf "%s\n", |
687 | centred(
|
690 | centred(
|
688 | __x( "Top {count} posters by number of articles", count => $topposters ), |
691 | __x( "Top {count} posters by number of articles", count => $topposters ), |
689 | 76
|
692 | 76
|
690 | ); |
693 | ); |
691 | print "=" x 76, "\n"; |
694 | print "=" x 76, "\n"; |
692 | my $i = 0; |
695 | my $i = 0; |
693 | foreach
|
696 | foreach
|
694 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
697 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
695 | {
|
698 | {
|
696 | my $name = substr( $poster, 0, 65 ); |
699 | my $name = substr( $poster, 0, 65 ); |
697 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
700 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
698 | $data{$poster}{count}; |
701 | $data{$poster}{count}; |
699 | last if ( ++$i == $count ); |
702 | last if ( ++$i == $count ); |
700 | }
|
703 | }
|
701 | print "\n", "=" x 76, "\n"; |
704 | print "\n", "=" x 76, "\n"; |
702 | }
|
705 | }
|
703 | 706 | ||
704 | ######################################
|
707 | ######################################
|
705 | ## Show posters by size in KiB Sec 2;
|
708 | ## Show posters by size in KiB Sec 2;
|
706 | ######################################
|
709 | ######################################
|
707 | unless ( $skipSec{2} ) |
710 | unless ( $skipSec{2} ) |
708 | {
|
711 | {
|
709 | if ( keys %data < $topposters ) |
712 | if ( keys %data < $topposters ) |
710 | {
|
713 | {
|
711 | $count = keys %data; |
714 | $count = keys %data; |
712 | }
|
715 | }
|
713 | else
|
716 | else
|
714 | {
|
717 | {
|
715 | $count = $topposters; |
718 | $count = $topposters; |
716 | }
|
719 | }
|
717 | printf "%s\n", |
720 | printf "%s\n", |
718 | centred(
|
721 | centred(
|
719 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
722 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
720 | 76
|
723 | 76
|
721 | ); |
724 | ); |
722 | print "=" x 76, "\n"; |
725 | print "=" x 76, "\n"; |
723 | my $i = 0; |
726 | my $i = 0; |
724 | foreach
|
727 | foreach
|
725 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
728 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
726 | {
|
729 | {
|
727 | my $name = substr( $poster, 0, 62 ); |
730 | my $name = substr( $poster, 0, 62 ); |
728 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
731 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
729 | $data{$poster}{size} / 1024; #/ |
732 | $data{$poster}{size} / 1024; #/ |
730 | last if ( ++$i == $count ); |
733 | last if ( ++$i == $count ); |
731 | }
|
734 | }
|
732 | print "\n", "=" x 76, "\n"; |
735 | print "\n", "=" x 76, "\n"; |
733 | }
|
736 | }
|
734 | 737 | ||
735 | #####################################
|
738 | #####################################
|
736 | ## Show top posters for original text
|
739 | ## Show top posters for original text
|
737 | #####################################
|
740 | #####################################
|
738 | unless ( $skipSec{3} ) |
741 | unless ( $skipSec{3} ) |
739 | {
|
742 | {
|
740 | if ( keys %data < $topposters ) |
743 | if ( keys %data < $topposters ) |
741 | {
|
744 | {
|
742 | $count = keys %data; |
745 | $count = keys %data; |
743 | }
|
746 | }
|
744 | else
|
747 | else
|
745 | {
|
748 | {
|
746 | $count = $topposters; |
749 | $count = $topposters; |
747 | }
|
750 | }
|
748 | printf "%s\n", |
751 | printf "%s\n", |
749 | centred(
|
752 | centred(
|
750 | __x(
|
753 | __x(
|
751 | "Top {count} responders by original text (> 5 posts)", |
754 | "Top {count} responders by original text (> 5 posts)", |
752 | count => $topposters |
755 | count => $topposters |
753 | ), |
756 | ), |
754 | 76
|
757 | 76
|
755 | ); |
758 | ); |
756 | print "=" x 76, "\n"; |
759 | print "=" x 76, "\n"; |
757 | my $i = 0; |
760 | my $i = 0; |
758 | foreach my $poster ( |
761 | foreach my $poster ( |
759 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
762 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
760 | keys %data |
763 | keys %data |
761 | )
|
764 | )
|
762 | {
|
765 | {
|
763 | next if $data{$poster}{quoted} == 0; |
766 | next if $data{$poster}{quoted} == 0; |
764 | next if $data{$poster}{count} < 5; |
767 | next if $data{$poster}{count} < 5; |
765 | my $name = substr( $poster, 0, 63 ); |
768 | my $name = substr( $poster, 0, 63 ); |
766 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
769 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
767 | $data{$poster}{percent}; |
770 | $data{$poster}{percent}; |
768 | last if ( ++$i == $count ); |
771 | last if ( ++$i == $count ); |
769 | }
|
772 | }
|
770 | print "\n", "=" x 76, "\n"; |
773 | print "\n", "=" x 76, "\n"; |
771 | }
|
774 | }
|
772 | 775 | ||
773 | ########################################
|
776 | ########################################
|
774 | ## Show bottom posters for original text
|
777 | ## Show bottom posters for original text
|
775 | ########################################
|
778 | ########################################
|
776 | unless ( $skipSec{4} ) |
779 | unless ( $skipSec{4} ) |
777 | {
|
780 | {
|
778 | if ( keys %data < $topposters ) |
781 | if ( keys %data < $topposters ) |
779 | {
|
782 | {
|
780 | $count = keys %data; |
783 | $count = keys %data; |
781 | }
|
784 | }
|
782 | else
|
785 | else
|
783 | {
|
786 | {
|
784 | $count = $topposters; |
787 | $count = $topposters; |
785 | }
|
788 | }
|
786 | printf "%s\n", |
789 | printf "%s\n", |
787 | centred(
|
790 | centred(
|
788 | __x(
|
791 | __x(
|
789 | "Bottom {count} responders by original text (> 5 posts)", |
792 | "Bottom {count} responders by original text (> 5 posts)", |
790 | count => $topposters |
793 | count => $topposters |
791 | ), |
794 | ), |
792 | 76
|
795 | 76
|
793 | ); |
796 | ); |
794 | print "=" x 76, "\n"; |
797 | print "=" x 76, "\n"; |
795 | my $i = 0; |
798 | my $i = 0; |
796 | foreach my $poster ( |
799 | foreach my $poster ( |
797 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
800 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
798 | keys %data |
801 | keys %data |
799 | )
|
802 | )
|
800 | {
|
803 | {
|
801 | next if $data{$poster}{quoted} == 0; |
804 | next if $data{$poster}{quoted} == 0; |
802 | next if $data{$poster}{count} < 5; |
805 | next if $data{$poster}{count} < 5; |
803 | my $name = substr( $poster, 0, 63 ); |
806 | my $name = substr( $poster, 0, 63 ); |
804 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
807 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
805 | $data{$poster}{percent}; |
808 | $data{$poster}{percent}; |
806 | last if ( ++$i == $count ); |
809 | last if ( ++$i == $count ); |
807 | }
|
810 | }
|
808 | print "\n", "=" x 76, "\n"; |
811 | print "\n", "=" x 76, "\n"; |
809 | }
|
812 | }
|
810 | 813 | ||
811 | #####################################
|
814 | #####################################
|
812 | ## Show threads by number of articles
|
815 | ## Show threads by number of articles
|
813 | #####################################
|
816 | #####################################
|
814 | unless ( $skipSec{5} ) |
817 | unless ( $skipSec{5} ) |
815 | {
|
818 | {
|
816 | if ( keys %threads < $topthreads ) |
819 | if ( keys %threads < $topthreads ) |
817 | {
|
820 | {
|
818 | $count = keys %threads; |
821 | $count = keys %threads; |
819 | }
|
822 | }
|
820 | else
|
823 | else
|
821 | {
|
824 | {
|
822 | $count = $topthreads; |
825 | $count = $topthreads; |
823 | }
|
826 | }
|
824 | printf "%s\n", |
827 | printf "%s\n", |
825 | centred(
|
828 | centred(
|
826 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
829 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
827 | 76 ); |
830 | 76 ); |
828 | print "=" x 76, "\n"; |
831 | print "=" x 76, "\n"; |
829 | my $i = 0; |
832 | my $i = 0; |
830 | foreach my $thread ( |
833 | foreach my $thread ( |
831 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
834 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
832 | keys %threads |
835 | keys %threads |
833 | )
|
836 | )
|
834 | {
|
837 | {
|
835 | my $name = substr( $thread, 0, 65 ); |
838 | my $name = substr( $thread, 0, 65 ); |
836 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
839 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
837 | $threads{$thread}{'count'}; |
840 | $threads{$thread}{'count'}; |
838 | last if ( ++$i == $count ); |
841 | last if ( ++$i == $count ); |
839 | }
|
842 | }
|
840 | print "\n", "=" x 76, "\n"; |
843 | print "\n", "=" x 76, "\n"; |
841 | }
|
844 | }
|
842 | 845 | ||
843 | ##############################
|
846 | ##############################
|
844 | ## Show threads by size in KiB
|
847 | ## Show threads by size in KiB
|
845 | ##############################
|
848 | ##############################
|
846 | unless ( $skipSec{6} ) |
849 | unless ( $skipSec{6} ) |
847 | {
|
850 | {
|
848 | if ( keys %threads < $topthreads ) |
851 | if ( keys %threads < $topthreads ) |
849 | {
|
852 | {
|
850 | $count = keys %threads; |
853 | $count = keys %threads; |
851 | }
|
854 | }
|
852 | else
|
855 | else
|
853 | {
|
856 | {
|
854 | $count = $topthreads; |
857 | $count = $topthreads; |
855 | }
|
858 | }
|
856 | printf "%s\n", |
859 | printf "%s\n", |
857 | centred(
|
860 | centred(
|
858 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
861 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
859 | print "=" x 76, "\n"; |
862 | print "=" x 76, "\n"; |
860 | my $i = 0; |
863 | my $i = 0; |
861 | foreach my $thread ( |
864 | foreach my $thread ( |
862 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
865 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
863 | keys %threads |
866 | keys %threads |
864 | )
|
867 | )
|
865 | {
|
868 | {
|
866 | my $name = substr( $thread, 0, 65 ); |
869 | my $name = substr( $thread, 0, 65 ); |
867 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
870 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
868 | $threads{$thread}{'size'} / 1024; #/ |
871 | $threads{$thread}{'size'} / 1024; #/ |
869 | last if ( ++$i == $count ); |
872 | last if ( ++$i == $count ); |
870 | }
|
873 | }
|
871 | print "\n", "=" x 76, "\n"; |
874 | print "\n", "=" x 76, "\n"; |
872 | }
|
875 | }
|
873 | 876 | ||
874 | ##################################
|
877 | ##################################
|
875 | ## Show top 10 cross-posted groups
|
878 | ## Show top 10 cross-posted groups
|
876 | ##################################
|
879 | ##################################
|
877 | unless ( $skipSec{7} ) |
880 | unless ( $skipSec{7} ) |
878 | {
|
881 | {
|
879 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
882 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
880 | if ( keys %crossposts < $topcrossposts ) |
883 | if ( keys %crossposts < $topcrossposts ) |
881 | {
|
884 | {
|
882 | $count = keys %crossposts; |
885 | $count = keys %crossposts; |
883 | }
|
886 | }
|
884 | else
|
887 | else
|
885 | {
|
888 | {
|
886 | $count = $topcrossposts; |
889 | $count = $topcrossposts; |
887 | }
|
890 | }
|
888 | printf "%s\n", |
891 | printf "%s\n", |
889 | centred(
|
892 | centred(
|
890 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
893 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
891 | print "=" x 76, "\n"; |
894 | print "=" x 76, "\n"; |
892 | my $i = 0; |
895 | my $i = 0; |
893 | foreach
|
896 | foreach
|
894 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
897 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
895 | {
|
898 | {
|
896 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
899 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
897 | $crossposts{$name}; |
900 | $crossposts{$name}; |
898 | last if ( ++$i == $count ); |
901 | last if ( ++$i == $count ); |
899 | }
|
902 | }
|
900 | print "\n", "=" x 76, "\n"; |
903 | print "\n", "=" x 76, "\n"; |
901 | }
|
904 | }
|
902 | 905 | ||
903 | #########################
|
906 | #########################
|
904 | ## Show agents and counts
|
907 | ## Show agents and counts
|
905 | #########################
|
908 | #########################
|
906 | unless ( $skipSec{8} ) |
909 | unless ( $skipSec{8} ) |
907 | {
|
910 | {
|
908 | if ( keys %agents < $topagents ) |
911 | if ( keys %agents < $topagents ) |
909 | {
|
912 | {
|
910 | $count = keys %agents; |
913 | $count = keys %agents; |
911 | }
|
914 | }
|
912 | else
|
915 | else
|
913 | {
|
916 | {
|
914 | $count = $topagents; |
917 | $count = $topagents; |
915 | }
|
918 | }
|
916 | printf "%s\n", |
919 | printf "%s\n", |
917 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
920 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
918 | 76 ); |
921 | 76 ); |
919 | print "=" x 76, "\n"; |
922 | print "=" x 76, "\n"; |
920 | my $i = 0; |
923 | my $i = 0; |
921 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
924 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
922 | {
|
925 | {
|
923 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
926 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
924 | $agents{$agent}; |
927 | $agents{$agent}; |
925 | last if ( ++$i == $count ); |
928 | last if ( ++$i == $count ); |
926 | }
|
929 | }
|
927 | print "\n", "=" x 76, "\n"; |
930 | print "\n", "=" x 76, "\n"; |
928 | }
|
931 | }
|
929 | 932 | ||
930 | #######################
|
933 | #######################
|
931 | ## Show distinct agents
|
934 | ## Show distinct agents
|
932 | #######################
|
935 | #######################
|
933 | unless ( $skipSec{9} ) |
936 | unless ( $skipSec{9} ) |
934 | {
|
937 | {
|
935 | if ( keys %distinct_agent < $topagents ) |
938 | if ( keys %distinct_agent < $topagents ) |
936 | {
|
939 | {
|
937 | $count = keys %distinct_agent; |
940 | $count = keys %distinct_agent; |
938 | }
|
941 | }
|
939 | else
|
942 | else
|
940 | {
|
943 | {
|
941 | $count = $topagents; |
944 | $count = $topagents; |
942 | }
|
945 | }
|
943 | printf "%s\n", |
946 | printf "%s\n", |
944 | centred(
|
947 | centred(
|
945 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
948 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
946 | 76
|
949 | 76
|
947 | ); |
950 | ); |
948 | print "=" x 76, "\n"; |
951 | print "=" x 76, "\n"; |
949 | my $i = 0; |
952 | my $i = 0; |
950 | foreach my $agent ( |
953 | foreach my $agent ( |
951 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
954 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
952 | keys %distinct_agent |
955 | keys %distinct_agent |
953 | )
|
956 | )
|
954 | {
|
957 | {
|
955 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
958 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
956 | $distinct_agent{$agent}, |
959 | $distinct_agent{$agent}, |
957 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
960 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
958 | last if ( ++$i == $count ); |
961 | last if ( ++$i == $count ); |
959 | }
|
962 | }
|
960 | print "\n", "=" x 76, "\n"; |
963 | print "\n", "=" x 76, "\n"; |
961 | }
|
964 | }
|
962 | 965 | ||
963 | ############################
|
966 | ############################
|
964 | ## Show timezones and counts
|
967 | ## Show timezones and counts
|
965 | ############################
|
968 | ############################
|
966 | unless ( $skipSec{10} ) |
969 | unless ( $skipSec{10} ) |
967 | {
|
970 | {
|
968 | if ( keys %tz < $toptz ) |
971 | if ( keys %tz < $toptz ) |
969 | {
|
972 | {
|
970 | $count = keys %tz; |
973 | $count = keys %tz; |
971 | }
|
974 | }
|
972 | else
|
975 | else
|
973 | {
|
976 | {
|
974 | $count = $toptz; |
977 | $count = $toptz; |
975 | }
|
978 | }
|
976 | printf "%s\n", |
979 | printf "%s\n", |
977 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
980 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
978 | print "=" x 76, "\n"; |
981 | print "=" x 76, "\n"; |
979 | my $i = 0; |
982 | my $i = 0; |
980 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
983 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
981 | {
|
984 | {
|
982 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
985 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
983 | last if ( ++$i == $count ); |
986 | last if ( ++$i == $count ); |
984 | }
|
987 | }
|
985 | print "\n", "=" x 76, "\n"; |
988 | print "\n", "=" x 76, "\n"; |
986 | }
|
989 | }
|
987 | }
|
990 | }
|
988 | 991 | ||
989 | ## helper subs
|
992 | ## helper subs
|
990 | 993 | ||
991 | ###############################
|
994 | ###############################
|
992 | ## Right pad a string with '.'s
|
995 | ## Right pad a string with '.'s
|
993 | ###############################
|
996 | ###############################
|
994 | sub rpad
|
997 | sub rpad
|
995 | {
|
998 | {
|
996 | ## Get text to pad, length to pad, pad chr
|
999 | ## Get text to pad, length to pad, pad chr
|
997 | my ( $text, $pad_len, $pad_chr ) = @_; |
1000 | my ( $text, $pad_len, $pad_chr ) = @_; |
998 | 1001 | ||
999 | ## DEBUG
|
1002 | ## DEBUG
|
1000 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
1003 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
1001 | 1004 | ||
1002 | if ( length($text) > $pad_len ) |
1005 | if ( length($text) > $pad_len ) |
1003 | {
|
1006 | {
|
1004 | $text = substr( $text, 0, $pad_len ); |
1007 | $text = substr( $text, 0, $pad_len ); |
1005 | }
|
1008 | }
|
1006 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
1009 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
1007 | return $padded; |
1010 | return $padded; |
1008 | }
|
1011 | }
|
1009 | 1012 | ||
1010 | ##################
|
1013 | ##################
|
1011 | ## Centre a string
|
1014 | ## Centre a string
|
1012 | ##################
|
1015 | ##################
|
1013 | sub centred
|
1016 | sub centred
|
1014 | {
|
1017 | {
|
1015 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
1018 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
1016 | my $pad_len = ( $width - length($text) ) / 2; #/ |
1019 | my $pad_len = ( $width - length($text) ) / 2; #/ |
1017 | my $centred = " " x $pad_len . $text; |
1020 | my $centred = " " x $pad_len . $text; |
1018 | return $centred; |
1021 | return $centred; |
1019 | }
|
1022 | }
|
1020 | 1023 | ||
1021 | ###########################
|
1024 | ###########################
|
1022 | ## Put commas into a number
|
1025 | ## Put commas into a number
|
1023 | ###########################
|
1026 | ###########################
|
1024 | sub commify
|
1027 | sub commify
|
1025 | {
|
1028 | {
|
1026 | local $_ = shift; |
1029 | local $_ = shift; |
1027 | my $number = $_; |
1030 | my $number = $_; |
1028 | $_ = int; # Chop non-integer part |
1031 | $_ = int; # Chop non-integer part |
1029 | 1 while |
1032 | 1 while |
1030 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1033 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1031 | my $int_part = $_; |
1034 | my $int_part = $_; |
1032 | my $real_part = ''; |
1035 | my $real_part = ''; |
1033 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
1036 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
1034 | {
|
1037 | {
|
1035 | $real_part = $1; |
1038 | $real_part = $1; |
1036 | }
|
1039 | }
|
1037 | return $int_part . $real_part; |
1040 | return $int_part . $real_part; |
1038 | }
|
1041 | }
|
1039 | 1042 | ||
1040 | ################################################################
|
1043 | ################################################################
|
1041 | ## Returns a string with leading and trailing whitespace removed
|
1044 | ## Returns a string with leading and trailing whitespace removed
|
1042 | ################################################################
|
1045 | ################################################################
|
1043 | sub clean
|
1046 | sub clean
|
1044 | {
|
1047 | {
|
1045 | my $dirty = shift; |
1048 | my $dirty = shift; |
1046 | my $clean = $dirty; |
1049 | my $clean = $dirty; |
1047 | $clean =~ s/^\s+|\s+$//g; |
1050 | $clean =~ s/^\s+|\s+$//g; |
1048 | 1051 | ||
1049 | return $clean; |
1052 | return $clean; |
1050 | }
|
1053 | }
|
1051 | 1054 | ||
1052 | sub usage
|
1055 | sub usage
|
1053 | {
|
1056 | {
|
1054 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
1057 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
1055 | exit 1; |
1058 | exit 1; |
1056 | }
|
1059 | }
|
1057 | 1060 | ||
1058 | sub dmsg
|
1061 | sub dmsg
|
1059 | {
|
1062 | {
|
1060 | print STDERR @_, "\n"; |
1063 | print STDERR @_, "\n"; |
1061 | }
|
1064 | }
|
1062 | 1065 | ||
1063 | sub dmsg2
|
1066 | sub dmsg2
|
1064 | {
|
1067 | {
|
1065 | my ( $level, @msg ) = @_; |
1068 | my ( $level, @msg ) = @_; |
1066 | print STDERR @msg, "\n" if $level >= DEBUG; |
1069 | print STDERR @msg, "\n" if $level >= DEBUG; |
1067 | }
|
1070 | }
|
1068 | 1071 | ||
1069 | sub println
|
1072 | sub println
|
1070 | {
|
1073 | {
|
1071 | print @_, "\n"; |
1074 | print @_, "\n"; |
1072 | }
|
1075 | }
|
1073 | 1076 |