Rev 49 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 49 | Rev 198 | ||
---|---|---|---|
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
4 | require 5.004; |
4 | require 5.004; |
5 | 5 | ||
6 | #use diagnostics;
|
6 | #use diagnostics;
|
7 | use utf8; |
7 | use utf8; |
8 | 8 | ||
9 | ## NOTE:
|
9 | ## NOTE:
|
10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped
|
10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped
|
11 | # use utf8:all;
|
11 | # use utf8:all;
|
12 | 12 | ||
13 | use constant DEBUG => 0; |
13 | use constant DEBUG => 0; |
14 | 14 | ||
15 | ## newsstat.pl
|
15 | ## newsstat.pl
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
17 | ## Based on work by Garry Knight et al.
|
17 | ## Based on work by Garry Knight et al.
|
18 | ##
|
18 | ##
|
19 | ## This program is free software: you can redistribute it and/or modify
|
19 | ## This program is free software: you can redistribute it and/or modify
|
20 | ## it under the terms of the GNU General Public License as published by
|
20 | ## it under the terms of the GNU General Public License as published by
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
22 | ## (at your option) any later version.
|
22 | ## (at your option) any later version.
|
23 | ##
|
23 | ##
|
24 | ## This program is distributed in the hope that it will be useful,
|
24 | ## This program is distributed in the hope that it will be useful,
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
27 | ## GNU General Public License for more details.
|
27 | ## GNU General Public License for more details.
|
28 | ##
|
28 | ##
|
29 | ## You should have received a copy of the GNU General Public License
|
29 | ## You should have received a copy of the GNU General Public License
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
31 | 31 | ||
32 | ## Print out all text to STDOUT UTF-8 encoded
|
32 | ## Print out all text to STDOUT UTF-8 encoded
|
33 | binmode STDOUT, ':encoding(UTF-8)'; |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
34 | binmode STDERR, ':encoding(UTF-8)'; |
34 | binmode STDERR, ':encoding(UTF-8)'; |
35 | 35 | ||
36 | ## L10n
|
36 | ## L10n
|
37 | use locale ':not_characters'; |
37 | use locale ':not_characters'; |
38 | 38 | ||
39 | # setlocale( LC_MESSAGES, '' );
|
39 | # setlocale( LC_MESSAGES, '' );
|
40 | require Number::Format; |
40 | require Number::Format; |
41 | 41 | ||
42 | ## i18n
|
42 | ## i18n
|
43 | ## FIXME: Automatically include resolved '.' in @INC
|
43 | ## FIXME: Automatically include resolved '.' in @INC
|
44 | # print join "\n", @INC;
|
44 | # print join "\n", @INC;
|
45 | 45 | ||
46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
47 | use POSIX ('locale_h'); |
47 | use POSIX ('locale_h'); |
48 | use Locale::Messages qw (bind_textdomain_filter |
48 | use Locale::Messages qw (bind_textdomain_filter |
49 | bind_textdomain_codeset |
49 | bind_textdomain_codeset |
50 | turn_utf_8_on); |
50 | turn_utf_8_on); |
51 | 51 | ||
52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
54 | 54 | ||
55 | require Mail::Message; |
55 | require Mail::Message; |
56 | require DateTime; |
56 | require DateTime; |
57 | require DateTime::Format::Mail; |
57 | require DateTime::Format::Mail; |
58 | 58 | ||
59 | # See comments in previous example
|
59 | # See comments in previous example
|
60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
62 | 'decimal_point' }; |
62 | 'decimal_point' }; |
63 | 63 | ||
64 | # Apply defaults if values are missing
|
64 | # Apply defaults if values are missing
|
65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
66 | $thousands_sep = ' ' unless $thousands_sep; |
66 | $thousands_sep = ' ' unless $thousands_sep; |
67 | 67 | ||
68 | # grouping and mon_grouping are packed lists
|
68 | # grouping and mon_grouping are packed lists
|
69 | # of small integers (characters) telling the
|
69 | # of small integers (characters) telling the
|
70 | # grouping (thousand_seps and mon_thousand_seps
|
70 | # grouping (thousand_seps and mon_thousand_seps
|
71 | # being the group dividers) of numbers and
|
71 | # being the group dividers) of numbers and
|
72 | # monetary quantities. The integers' meanings:
|
72 | # monetary quantities. The integers' meanings:
|
73 | # 255 means no more grouping, 0 means repeat
|
73 | # 255 means no more grouping, 0 means repeat
|
74 | # the previous grouping, 1-254 means use that
|
74 | # the previous grouping, 1-254 means use that
|
75 | # as the current grouping. Grouping goes from
|
75 | # as the current grouping. Grouping goes from
|
76 | # right to left (low to high digits). In the
|
76 | # right to left (low to high digits). In the
|
77 | # below we cheat slightly by never using anything
|
77 | # below we cheat slightly by never using anything
|
78 | # else than the first grouping (whatever that is).
|
78 | # else than the first grouping (whatever that is).
|
79 | my @grouping; |
79 | my @grouping; |
80 | if ($grouping) |
80 | if ($grouping) |
81 | {
|
81 | {
|
82 | @grouping = unpack( "C*", $grouping ); |
82 | @grouping = unpack( "C*", $grouping ); |
83 | }
|
83 | }
|
84 | else
|
84 | else
|
85 | {
|
85 | {
|
86 | @grouping = (3); |
86 | @grouping = (3); |
87 | }
|
87 | }
|
88 | 88 | ||
89 | ## FIXME: Why don't the defaults work already?
|
89 | ## FIXME: Why don't the defaults work already?
|
90 | my $formatter = new Number::Format( |
90 | my $formatter = new Number::Format( |
91 | -decimal_point => $decimal_point, |
91 | -decimal_point => $decimal_point, |
92 | -thousands_sep => $thousands_sep, |
92 | -thousands_sep => $thousands_sep, |
93 | -kibi_suffix => ' KiB', |
93 | -kibi_suffix => ' KiB', |
94 | -mebi_suffix => ' MiB', |
94 | -mebi_suffix => ' MiB', |
95 | -gibi_suffix => ' GiB', |
95 | -gibi_suffix => ' GiB', |
96 | 96 | ||
97 | # -grouping => $grouping[0]
|
97 | # -grouping => $grouping[0]
|
98 | ); |
98 | ); |
99 | 99 | ||
100 | ###################### USER CONFIGURATIONS ############################
|
100 | ###################### USER CONFIGURATIONS ############################
|
101 | 101 | ||
102 | ## The name of the group to do stats for
|
102 | ## The name of the group to do stats for
|
103 | my $newsgroup_name = $ARGV[0]; |
103 | my $newsgroup_name = $ARGV[0]; |
104 | $newsgroup_name // usage(); |
104 | $newsgroup_name // usage(); |
105 | 105 | ||
106 | ## Check for removal flags
|
106 | ## Check for removal flags
|
107 | my $ix; |
107 | my $ix; |
108 | my $j; |
108 | my $j; |
109 | my %skipSec; |
109 | my %skipSec; |
110 | my @skiplist; |
110 | my @skiplist; |
111 | my $args = @ARGV; |
111 | my $args = @ARGV; |
112 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
112 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
113 | {
|
113 | {
|
114 | $j = $ix + 1; |
114 | $j = $ix + 1; |
115 | if ( $ARGV[$ix] eq "-x" ) |
115 | if ( $ARGV[$ix] eq "-x" ) |
116 | {
|
116 | {
|
117 | @skiplist = split( ",", $ARGV[$j] ); |
117 | @skiplist = split( ",", $ARGV[$j] ); |
118 | }
|
118 | }
|
119 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
119 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
120 | {
|
120 | {
|
121 | @skiplist = split( ",", $1 ); |
121 | @skiplist = split( ",", $1 ); |
122 | }
|
122 | }
|
123 | }
|
123 | }
|
124 | foreach (@skiplist) |
124 | foreach (@skiplist) |
125 | {
|
125 | {
|
126 | $skipSec{$_} = 1; |
126 | $skipSec{$_} = 1; |
127 | }
|
127 | }
|
128 | 128 | ||
129 | ## Leafnode users will want /var/spool/news for this variable.
|
129 | ## Leafnode users will want /var/spool/news for this variable.
|
130 | my $news = "/var/spool/news/"; |
130 | my $news = "/var/spool/news/"; |
131 | 131 | ||
132 | ## Number of top or bottom posters to show
|
132 | ## Number of top or bottom posters to show
|
133 | my $topposters = 20; |
133 | my $topposters = 20; |
134 | 134 | ||
135 | ## Number of threads we want to know about
|
135 | ## Number of threads we want to know about
|
136 | my $topthreads = 20; |
136 | my $topthreads = 20; |
137 | 137 | ||
138 | ## Number of cross-posted threads to show
|
138 | ## Number of cross-posted threads to show
|
139 | my $topcrossposts = 10; |
139 | my $topcrossposts = 10; |
140 | 140 | ||
141 | ## Number of agents we list
|
141 | ## Number of agents we list
|
142 | my $topagents = 10; |
142 | my $topagents = 10; |
143 | 143 | ||
144 | ## Number of time zones to show
|
144 | ## Number of time zones to show
|
145 | my $toptz = 10; |
145 | my $toptz = 10; |
146 | 146 | ||
147 | ###################### DATA STRUCTURES ######################
|
147 | ###################### DATA STRUCTURES ######################
|
148 | my $group = $newsgroup_name; |
148 | my $group = $newsgroup_name; |
149 | $group =~ s!\.!/!g; |
149 | $group =~ s!\.!/!g; |
150 | my %data; # name, count, agent, total, orig, quoted |
150 | my %data; # name, count, agent, total, orig, quoted |
151 | my $totsize = 0; # holds total sizes of all files |
151 | my $totsize = 0; # holds total sizes of all files |
152 | my %crossposts; # group, count |
152 | my %crossposts; # group, count |
153 | my %threads; # subject, count |
153 | my %threads; # subject, count |
154 | my $replies = 0; # total no. of replies |
154 | my $replies = 0; # total no. of replies |
155 | my $origposts = 0; # total no. of original posts |
155 | my $origposts = 0; # total no. of original posts |
156 | my %tz; # timezones by count |
156 | my %tz; # timezones by count |
157 | my $earliest; # earliest article we have found |
157 | my $earliest; # earliest article we have found |
158 | my $latest; # latest article we have found |
158 | my $latest; # latest article we have found |
159 | my $totheader = 0; # total size of header material |
159 | my $totheader = 0; # total size of header material |
160 | my $totbody = 0; # total size of body material |
160 | my $totbody = 0; # total size of body material |
161 | my $totsig = 0; # total size of sig material |
161 | my $totsig = 0; # total size of sig material |
162 | my $totquoted = 0; # total size of quoted material |
162 | my $totquoted = 0; # total size of quoted material |
163 | my $totorig = 0; # total size of original material |
163 | my $totorig = 0; # total size of original material |
164 | my $totalposts; # total no. of posts considered |
164 | my $totalposts; # total no. of posts considered |
165 | my %distinct_agent; |
165 | my %distinct_agent; |
166 | 166 | ||
167 | ## Used to hold counts of User Agents used
|
167 | ## Used to hold counts of User Agents used
|
168 | my %agents = ( |
168 | my %agents = ( |
169 | "Compuserver" => 0, |
169 | "Compuserver" => 0, |
170 | "Foorum" => 0, |
170 | "Foorum" => 0, |
171 | "Forte Agent" => 0, |
171 | "Forte Agent" => 0, |
172 | "Forte Free Agent" => 0, |
172 | "Forte Free Agent" => 0, |
173 | "Gnus" => 0, |
173 | "Gnus" => 0, |
174 | "KNode" => 0, |
174 | "KNode" => 0, |
175 | "MacSOUP" => 0, |
175 | "MacSOUP" => 0, |
176 | "MT-NewsWatcher" => 0, |
176 | "MT-NewsWatcher" => 0, |
177 | "MicroPlanet Gravity" => 0, |
177 | "MicroPlanet Gravity" => 0, |
178 | "Microsoft Outlook Express" => 0, |
178 | "Microsoft Outlook Express" => 0, |
179 | "Microsoft Windows Mail" => 0, |
179 | "Microsoft Windows Mail" => 0, |
180 | "Mozilla" => 0, |
180 | "Mozilla" => 0, |
181 | "News Rover" => 0, |
181 | "News Rover" => 0, |
182 | "NN" => 0, |
182 | "NN" => 0, |
183 | "Pan" => 0, |
183 | "Pan" => 0, |
184 | "rn" => 0, |
184 | "rn" => 0, |
185 | "slrn" => 0, |
185 | "slrn" => 0, |
186 | "Sylpheed" => 0, |
186 | "Sylpheed" => 0, |
187 | "tin" => 0, |
187 | "tin" => 0, |
188 | "VSoup" => 0, |
188 | "VSoup" => 0, |
189 | "WebTV" => 0, |
189 | "WebTV" => 0, |
190 | "Xnews" => 0, |
190 | "Xnews" => 0, |
191 | ); |
191 | ); |
192 | 192 | ||
193 | my $datetime_parser = DateTime::Format::Mail->new(); |
193 | my $datetime_parser = DateTime::Format::Mail->new(); |
194 | $datetime_parser->loose(); |
194 | $datetime_parser->loose(); |
195 | 195 | ||
196 | my $today = DateTime->today( time_zone => 'UTC' ); |
196 | my $today = DateTime->today( time_zone => 'UTC' ); |
197 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
197 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
198 | my $start = int $prev_month->strftime('%s'); |
198 | my $start = int $prev_month->strftime('%s'); |
199 | my $numdays = int DateTime->last_day_of_month( |
199 | my $numdays = int DateTime->last_day_of_month( |
200 | year => $prev_month->year(), |
200 | year => $prev_month->year(), |
201 | month => $prev_month->month(), |
201 | month => $prev_month->month(), |
202 | time_zone => $prev_month->time_zone(), |
202 | time_zone => $prev_month->time_zone(), |
203 | )->day(); |
203 | )->day(); |
204 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
204 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
205 | 205 | ||
206 | dmsg( $start, " to ", $end ) if DEBUG; |
206 | dmsg( $start, " to ", $end ) if DEBUG; |
207 | 207 | ||
208 | chdir("$news$group") |
208 | chdir("$news$group") |
209 | or die __x( |
209 | or die __x( |
210 | "Can't cd to {newsgroup}: {error}\n", |
210 | "Can't cd to {newsgroup}: {error}\n", |
211 | newsgroup => "$news$group", |
211 | newsgroup => "$news$group", |
212 | error => $! |
212 | error => $! |
213 | ); |
213 | ); |
214 | opendir( DIR, "." ) |
214 | opendir( DIR, "." ) |
215 | or die __x( |
215 | or die __x( |
216 | "Can't open {newsgroup}: {error}\n", |
216 | "Can't open {newsgroup}: {error}\n", |
217 | newsgroup => "$news$group", |
217 | newsgroup => "$news$group", |
218 | error => $! |
218 | error => $! |
219 | ); |
219 | ); |
220 | 220 | ||
221 | while ( defined( my $filename = readdir(DIR) ) ) |
221 | while ( defined( my $filename = readdir(DIR) ) ) |
222 | {
|
222 | {
|
223 | next unless -f $filename; # only want real files |
223 | next unless -f $filename; # only want real files |
224 | next if ( $filename eq ".overview" ); # real articles only |
224 | next if ( $filename eq ".overview" ); # real articles only |
225 | 225 | ||
226 | get_article($filename); # read in the article |
226 | get_article($filename); # read in the article |
227 | }
|
227 | }
|
228 | closedir(DIR); # finished with the directory |
228 | closedir(DIR); # finished with the directory |
229 | 229 | ||
230 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
230 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
231 | 231 | ||
232 | ## Post-processing
|
232 | ## Post-processing
|
233 | count_agents(); # count agents, collapsing versions |
233 | count_agents(); # count agents, collapsing versions |
234 | fix_percent(); |
234 | fix_percent(); |
235 | 235 | ||
236 | write_data(); |
236 | write_data(); |
237 | display_results(); |
237 | display_results(); |
238 | 238 | ||
239 | ########################################
|
239 | ########################################
|
240 | ## Get current article's header and body
|
240 | ## Get current article's header and body
|
241 | ########################################
|
241 | ########################################
|
242 | sub get_article
|
242 | sub get_article
|
243 | {
|
243 | {
|
244 | my $filename = shift; |
244 | my $filename = shift; |
245 | 245 | ||
246 | open( my $FILE, '<', $filename ) |
246 | open( my $FILE, '<', $filename ) |
247 | or
|
247 | or
|
248 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
248 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
249 | my $msg = Mail::Message->read($FILE); |
249 | my $msg = Mail::Message->read($FILE); |
250 | my $timestamp = $msg->timestamp(); |
250 | my $timestamp = $msg->timestamp(); |
251 | my $date = $msg->study('Date'); |
251 | my $date = $msg->study('Date'); |
252 | 252 | ||
253 | ## Disregard article if timestamp is not in range
|
253 | ## Disregard article if timestamp is not in range
|
254 | dmsg($timestamp) if DEBUG; |
254 | dmsg($timestamp) if DEBUG; |
255 | if ( $timestamp < $start || $timestamp >= $end ) |
255 | if ( $timestamp < $start || $timestamp >= $end ) |
256 | {
|
256 | {
|
257 | dmsg("Posting on $date ignored.") if DEBUG; |
257 | dmsg("Posting on $date ignored.") if DEBUG; |
258 | return; |
258 | return; |
259 | }
|
259 | }
|
260 | 260 | ||
261 | $totalposts++; # bump count of articles considered |
261 | $totalposts++; # bump count of articles considered |
262 | 262 | ||
263 | ## DEBUG
|
263 | ## DEBUG
|
264 | dmsg($date) if DEBUG; |
264 | dmsg($date) if DEBUG; |
265 | 265 | ||
266 | ## get stats about the file itself
|
266 | ## get stats about the file itself
|
267 | my $filesize = -s $filename; # get total size of file |
267 | my $filesize = -s $filename; # get total size of file |
268 | $totsize += $filesize; # bump total sizes of all files |
268 | $totsize += $filesize; # bump total sizes of all files |
269 | 269 | ||
270 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
270 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
271 | {
|
271 | {
|
272 | $earliest = $timestamp; |
272 | $earliest = $timestamp; |
273 | }
|
273 | }
|
274 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
274 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
275 | {
|
275 | {
|
276 | $latest = $timestamp; |
276 | $latest = $timestamp; |
277 | }
|
277 | }
|
278 | 278 | ||
279 | #print "timestamp: $timestamp\n";
|
279 | #print "timestamp: $timestamp\n";
|
280 | 280 | ||
281 | ## count header size
|
281 | ## count header size
|
282 | $totheader += $msg->head()->size(); |
282 | $totheader += $msg->head()->size(); |
283 | 283 | ||
284 | ## get the poster's name (MIME-decoded, in UTF-8)
|
284 | ## get the poster's name (MIME-decoded, in UTF-8)
|
285 | my $poster = $msg->study('From'); |
285 | my $poster = $msg->study('From'); |
286 | if ( defined $poster ) |
286 | if ( defined $poster ) |
287 | {
|
287 | {
|
288 | ## Convert old to new format
|
288 | ## Convert old to new format
|
289 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
289 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
290 | 290 | ||
291 | ## Collapse whitespace
|
291 | ## Collapse whitespace
|
292 | $poster =~ s/\s+/ /g; |
292 | $poster =~ s/\s+/ /g; |
293 | 293 | ||
294 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
294 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
295 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
295 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
296 | 296 | ||
297 | ## DEBUG
|
297 | ## DEBUG
|
298 | dmsg($poster) if DEBUG; |
298 | dmsg($poster) if DEBUG; |
299 | 299 | ||
300 | ## seen this one before?
|
300 | ## seen this one before?
|
301 | if ( !defined( $data{$poster} ) ) |
301 | if ( !defined( $data{$poster} ) ) |
302 | {
|
302 | {
|
303 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
303 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
304 | $data{$poster}{'orig'} = 0; |
304 | $data{$poster}{'orig'} = 0; |
305 | $data{$poster}{'quoted'} = 0; |
305 | $data{$poster}{'quoted'} = 0; |
306 | }
|
306 | }
|
307 | $data{$poster}{'count'}++; # bump count for this poster |
307 | $data{$poster}{'count'}++; # bump count for this poster |
308 | $data{$poster}{'size'} += $filesize; # total size of file |
308 | $data{$poster}{'size'} += $filesize; # total size of file |
309 | 309 | ||
310 | ## The User-Agent and/or X-Newsreader fields
|
310 | ## The User-Agent and/or X-Newsreader fields
|
311 | ## for User-Agent by poster
|
311 | ## for User-Agent by poster
|
312 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
312 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
313 | if ( defined $ua ) |
313 | if ( defined $ua ) |
314 | {
|
314 | {
|
315 | $data{$poster}{'agent'} = $ua; |
315 | $data{$poster}{'agent'} = $ua; |
316 | 316 | ||
317 | ## DEBUG
|
317 | ## DEBUG
|
318 | dmsg($ua) if DEBUG; |
318 | dmsg($ua) if DEBUG; |
319 | }
|
319 | }
|
320 | 320 | ||
321 | ## The User Agent for User-Agent by number of articles
|
321 | ## The User Agent for User-Agent by number of articles
|
322 | get_agent($msg); |
322 | get_agent($msg); |
323 | 323 | ||
324 | ## Get all cross-posted newsgroups
|
324 | ## Get all cross-posted newsgroups
|
325 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
325 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
326 | {
|
326 | {
|
327 | $crossposts{$_}++; # bump count for each |
327 | $crossposts{$_}++; # bump count for each |
328 | }
|
328 | }
|
329 | 329 | ||
330 | ## Get threads
|
330 | ## Get threads
|
331 | my $thread = $msg->study('Subject'); |
331 | my $thread = $msg->study('Subject'); |
332 | $thread =~ s/^\s*re:\s*//i; # Remove Re: or re: at the start |
332 | $thread =~ s/^\s*re:\s*//i; # Remove Re: or re: at the start |
333 | $thread =~ s/\s*\(was:\s*.*\)\s*$//i; # Remove (was: ...) at the end |
333 | $thread =~ s/\s*\(was:\s*.*\)\s*$//i; # Remove (was: ...) at the end |
334 | $thread =~ s/\s+/ /g; # collapse whitespace |
334 | $thread =~ s/\s+/ /g; # collapse whitespace |
335 | $threads{$thread}{'count'}++; # bump count of this subject |
335 | $threads{$thread}{'count'}++; # bump count of this subject |
336 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
336 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
337 | 337 | ||
338 | ## Is this an original post or a reply?
|
338 | ## Is this an original post or a reply?
|
339 | if ( defined $msg->study('References') ) |
339 | if ( defined $msg->study('References') ) |
340 | {
|
340 | {
|
341 | $replies++; |
341 | $replies++; |
342 | }
|
342 | }
|
343 | else
|
343 | else
|
344 | {
|
344 | {
|
345 | $origposts++; |
345 | $origposts++; |
346 | }
|
346 | }
|
347 | 347 | ||
348 | ## Get the time zone
|
348 | ## Get the time zone
|
349 | my $datetime = $datetime_parser->parse_datetime($date); |
349 | my $datetime = $datetime_parser->parse_datetime($date); |
350 | my $tz = $datetime->strftime('%z'); |
350 | my $tz = $datetime->strftime('%z'); |
351 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
351 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
352 | $tz{$tz}++; |
352 | $tz{$tz}++; |
353 | 353 | ||
354 | ## DEBUG
|
354 | ## DEBUG
|
355 | dmsg($tz) if DEBUG; |
355 | dmsg($tz) if DEBUG; |
356 | 356 | ||
357 | #### Now analyse the body text ####
|
357 | #### Now analyse the body text ####
|
358 | my $body = $msg->body(); |
358 | my $body = $msg->body(); |
359 | 359 | ||
360 | my $insig = 0; |
360 | my $insig = 0; |
361 | my @body = $body->lines; |
361 | my @body = $body->lines; |
362 | for (@body) |
362 | for (@body) |
363 | {
|
363 | {
|
364 | $totbody += length($_); # bump total body size |
364 | $totbody += length($_); # bump total body size |
365 | next if (m{^$>}o); # don't count blank lines in body |
365 | next if (m{^$>}o); # don't count blank lines in body |
366 | if ( $insig == 1 ) |
366 | if ( $insig == 1 ) |
367 | {
|
367 | {
|
368 | 368 | ||
369 | # bump total sig size
|
369 | # bump total sig size
|
370 | $totsig += length($_); |
370 | $totsig += length($_); |
371 | }
|
371 | }
|
372 | ## are we in a quote line?
|
372 | ## are we in a quote line?
|
373 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
373 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
374 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
374 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
375 | {
|
375 | {
|
376 | ## bump count of quoted chrs
|
376 | ## bump count of quoted chrs
|
377 | $data{$poster}{'quoted'} += length($_); |
377 | $data{$poster}{'quoted'} += length($_); |
378 | $totquoted += length($_); |
378 | $totquoted += length($_); |
379 | }
|
379 | }
|
380 | elsif (/^-- $/) |
380 | elsif (/^-- $/) |
381 | {
|
381 | {
|
382 | $insig = 1; |
382 | $insig = 1; |
383 | }
|
383 | }
|
384 | else
|
384 | else
|
385 | {
|
385 | {
|
386 | ## We must be processing an original line
|
386 | ## We must be processing an original line
|
387 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
387 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
388 | $totorig += length($_); |
388 | $totorig += length($_); |
389 | }
|
389 | }
|
390 | }
|
390 | }
|
391 | 391 | ||
392 | # end for (@body)
|
392 | # end for (@body)
|
393 | }
|
393 | }
|
394 | 394 | ||
395 | close($FILE); |
395 | close($FILE); |
396 | }
|
396 | }
|
397 | 397 | ||
398 | sub get_agent
|
398 | sub get_agent
|
399 | {
|
399 | {
|
400 | my $msg = shift; |
400 | my $msg = shift; |
401 | 401 | ||
402 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
402 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
403 | // $msg->study('X-Mailer'); |
403 | // $msg->study('X-Mailer'); |
404 | 404 | ||
405 | if ( not defined $ua ) |
405 | if ( not defined $ua ) |
406 | {
|
406 | {
|
407 | my $org = $msg->study('Organization'); |
407 | my $org = $msg->study('Organization'); |
408 | if ( defined $org |
408 | if ( defined $org |
409 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
409 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
410 | {
|
410 | {
|
411 | $ua = $org; |
411 | $ua = $org; |
412 | }
|
412 | }
|
413 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
413 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
414 | {
|
414 | {
|
415 | $ua = "Pine"; |
415 | $ua = "Pine"; |
416 | }
|
416 | }
|
417 | }
|
417 | }
|
418 | 418 | ||
419 | ## Hopefully found UA, else set to unknown
|
419 | ## Hopefully found UA, else set to unknown
|
420 | if ( not defined $ua ) |
420 | if ( not defined $ua ) |
421 | {
|
421 | {
|
422 | $ua = __ "unknown"; |
422 | $ua = __ "unknown"; |
423 | }
|
423 | }
|
424 | 424 | ||
425 | $ua = clean($ua); |
425 | $ua = clean($ua); |
426 | 426 | ||
427 | my $raw = $ua; |
427 | my $raw = $ua; |
428 | my $agent = $raw; |
428 | my $agent = $raw; |
429 | 429 | ||
430 | ## strip http
|
430 | ## strip http
|
431 | if ( $raw =~ /.*http.*/ ) |
431 | if ( $raw =~ /.*http.*/ ) |
432 | {
|
432 | {
|
433 | $raw =~ s!posted via!!i; |
433 | $raw =~ s!posted via!!i; |
434 | $raw =~ s!http://!!g; |
434 | $raw =~ s!http://!!g; |
435 | $raw =~ s!/!!g; |
435 | $raw =~ s!/!!g; |
436 | $raw =~ s! !!g; |
436 | $raw =~ s! !!g; |
437 | }
|
437 | }
|
438 | 438 | ||
439 | ## Fix Outlook from Mac
|
439 | ## Fix Outlook from Mac
|
440 | if ( $raw =~ /^microsoft/i ) |
440 | if ( $raw =~ /^microsoft/i ) |
441 | {
|
441 | {
|
442 | $raw =~ s/-/ /g; |
442 | $raw =~ s/-/ /g; |
443 | }
|
443 | }
|
444 | 444 | ||
445 | ## Pick out the popular agents
|
445 | ## Pick out the popular agents
|
446 | if ( $raw =~ /(outlook express)/i |
446 | if ( $raw =~ /(outlook express)/i |
447 | || $raw =~ /(windows mail)/i |
447 | || $raw =~ /(windows mail)/i |
448 | || $raw =~ /(microplanet gravity)/i |
448 | || $raw =~ /(microplanet gravity)/i |
449 | || $raw =~ /(news rover)/i |
449 | || $raw =~ /(news rover)/i |
450 | || $raw =~ /(forte agent)/i |
450 | || $raw =~ /(forte agent)/i |
451 | || $raw =~ /(forte free agent)/i ) |
451 | || $raw =~ /(forte free agent)/i ) |
452 | {
|
452 | {
|
453 | $agent = $1; |
453 | $agent = $1; |
454 | }
|
454 | }
|
455 | elsif ( |
455 | elsif ( |
456 | $raw =~ /^( |
456 | $raw =~ /^( |
457 | pan |
457 | pan |
458 | |sylpheed
|
458 | |sylpheed
|
459 | |slrn
|
459 | |slrn
|
460 | |mozilla
|
460 | |mozilla
|
461 | |knode
|
461 | |knode
|
462 | |tin
|
462 | |tin
|
463 | |hamster
|
463 | |hamster
|
464 | |xrn
|
464 | |xrn
|
465 | |xnews
|
465 | |xnews
|
466 | |aol
|
466 | |aol
|
467 | |gnus
|
467 | |gnus
|
468 | |krn
|
468 | |krn
|
469 | |macsoup
|
469 | |macsoup
|
470 | |messenger
|
470 | |messenger
|
471 | |openxp
|
471 | |openxp
|
472 | |pine
|
472 | |pine
|
473 | |thoth
|
473 | |thoth
|
474 | |turnpike
|
474 | |turnpike
|
475 | |winvn
|
475 | |winvn
|
476 | |vsoup
|
476 | |vsoup
|
477 | |google
|
477 | |google
|
478 | |supernews
|
478 | |supernews
|
479 | |nn
|
479 | |nn
|
480 | |rn
|
480 | |rn
|
481 | |007
|
481 | |007
|
482 | |webtv
|
482 | |webtv
|
483 | |compuserve
|
483 | |compuserve
|
484 | )/ix |
484 | )/ix |
485 | )
|
485 | )
|
486 | {
|
486 | {
|
487 | $agent = $1; |
487 | $agent = $1; |
488 | }
|
488 | }
|
489 | else
|
489 | else
|
490 | {
|
490 | {
|
491 | ## Clean up unknown agents
|
491 | ## Clean up unknown agents
|
492 | if ( $raw =~ m!^(.*?)/! ) |
492 | if ( $raw =~ m!^(.*?)/! ) |
493 | {
|
493 | {
|
494 | $agent = $1; |
494 | $agent = $1; |
495 | }
|
495 | }
|
496 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
496 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
497 | {
|
497 | {
|
498 | $agent = $1; |
498 | $agent = $1; |
499 | }
|
499 | }
|
500 | }
|
500 | }
|
501 | 501 | ||
502 | $distinct_agent{$agent}++; |
502 | $distinct_agent{$agent}++; |
503 | return $agent; |
503 | return $agent; |
504 | }
|
504 | }
|
505 | ## get_agent
|
505 | ## get_agent
|
506 | 506 | ||
507 | #########################################
|
507 | #########################################
|
508 | ## Count the User-Agents used, collapsing
|
508 | ## Count the User-Agents used, collapsing
|
509 | ## different versions into one per agent.
|
509 | ## different versions into one per agent.
|
510 | #########################################
|
510 | #########################################
|
511 | sub count_agents
|
511 | sub count_agents
|
512 | {
|
512 | {
|
513 | POSTER:
|
513 | POSTER:
|
514 | foreach my $poster ( keys %data ) |
514 | foreach my $poster ( keys %data ) |
515 | {
|
515 | {
|
516 | foreach my $agent_name ( keys %distinct_agent ) |
516 | foreach my $agent_name ( keys %distinct_agent ) |
517 | { # check against known ones |
517 | { # check against known ones |
518 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
518 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
519 | {
|
519 | {
|
520 | $agents{$agent_name}++; |
520 | $agents{$agent_name}++; |
521 | next POSTER; |
521 | next POSTER; |
522 | }
|
522 | }
|
523 | }
|
523 | }
|
524 | $agents{ $data{$poster}{'agent'} }++; |
524 | $agents{ $data{$poster}{'agent'} }++; |
525 | }
|
525 | }
|
526 | } # count_agents |
526 | } # count_agents |
527 | 527 | ||
528 | #############################################
|
528 | #############################################
|
529 | ## Set orig/total percentages for all posters
|
529 | ## Set orig/total percentages for all posters
|
530 | #############################################
|
530 | #############################################
|
531 | sub fix_percent
|
531 | sub fix_percent
|
532 | {
|
532 | {
|
533 | foreach my $poster ( keys %data ) |
533 | foreach my $poster ( keys %data ) |
534 | {
|
534 | {
|
535 | my $percent = 100; |
535 | my $percent = 100; |
536 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
536 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
537 | {
|
537 | {
|
538 | $percent = |
538 | $percent = |
539 | $data{$poster}{'orig'} * 100 / |
539 | $data{$poster}{'orig'} * 100 / |
540 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
540 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
541 | }
|
541 | }
|
542 | elsif ( $data{$poster}{'orig'} == 0 ) |
542 | elsif ( $data{$poster}{'orig'} == 0 ) |
543 | {
|
543 | {
|
544 | $percent = 0; |
544 | $percent = 0; |
545 | }
|
545 | }
|
546 | $data{$poster}{'percent'} = $percent; |
546 | $data{$poster}{'percent'} = $percent; |
547 | }
|
547 | }
|
548 | }
|
548 | }
|
549 | ## fix_percent
|
549 | ## fix_percent
|
550 | 550 | ||
551 | ##################################
|
551 | ##################################
|
552 | ## Write data structures to a file
|
552 | ## Write data structures to a file
|
553 | ##################################
|
553 | ##################################
|
554 | sub write_data
|
554 | sub write_data
|
555 | {
|
555 | {
|
556 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
556 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
557 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
557 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
558 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
558 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
559 | print $OUTF |
559 | print $OUTF |
560 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
560 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
561 | foreach my $name ( keys %data ) |
561 | foreach my $name ( keys %data ) |
562 | {
|
562 | {
|
563 | print $OUTF |
563 | print $OUTF |
564 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
564 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
565 | }
|
565 | }
|
566 | print $OUTF |
566 | print $OUTF |
567 | "============================================================================\n"; |
567 | "============================================================================\n"; |
568 | print $OUTF "Thread subjects\n"; |
568 | print $OUTF "Thread subjects\n"; |
569 | print $OUTF |
569 | print $OUTF |
570 | "----------------------------------------------------------------------------\n"; |
570 | "----------------------------------------------------------------------------\n"; |
571 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
571 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
572 | {
|
572 | {
|
573 | print $OUTF |
573 | print $OUTF |
574 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
574 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
575 | }
|
575 | }
|
576 | print $OUTF |
576 | print $OUTF |
577 | "============================================================================\n"; |
577 | "============================================================================\n"; |
578 | print $OUTF "Cross-posts\n"; |
578 | print $OUTF "Cross-posts\n"; |
579 | print $OUTF |
579 | print $OUTF |
580 | "----------------------------------------------------------------------------\n"; |
580 | "----------------------------------------------------------------------------\n"; |
581 | foreach my $name ( sort keys %crossposts ) |
581 | foreach my $name ( sort keys %crossposts ) |
582 | {
|
582 | {
|
583 | print $OUTF "$name : $crossposts{$name}\n"; |
583 | print $OUTF "$name : $crossposts{$name}\n"; |
584 | }
|
584 | }
|
585 | print $OUTF |
585 | print $OUTF |
586 | "============================================================================\n"; |
586 | "============================================================================\n"; |
587 | print $OUTF "User agents\n"; |
587 | print $OUTF "User agents\n"; |
588 | print $OUTF |
588 | print $OUTF |
589 | "----------------------------------------------------------------------------\n"; |
589 | "----------------------------------------------------------------------------\n"; |
590 | foreach my $name ( sort keys %agents ) |
590 | foreach my $name ( sort keys %agents ) |
591 | {
|
591 | {
|
592 | print $OUTF "$name : $agents{$name}\n"; |
592 | print $OUTF "$name : $agents{$name}\n"; |
593 | }
|
593 | }
|
594 | print $OUTF |
594 | print $OUTF |
595 | "============================================================================\n"; |
595 | "============================================================================\n"; |
596 | print $OUTF "Time zones\n"; |
596 | print $OUTF "Time zones\n"; |
597 | print $OUTF |
597 | print $OUTF |
598 | "----------------------------------------------------------------------------\n"; |
598 | "----------------------------------------------------------------------------\n"; |
599 | foreach my $name ( sort keys %tz ) |
599 | foreach my $name ( sort keys %tz ) |
600 | {
|
600 | {
|
601 | print $OUTF "$name : $tz{$name}\n"; |
601 | print $OUTF "$name : $tz{$name}\n"; |
602 | }
|
602 | }
|
603 | close $OUTF; |
603 | close $OUTF; |
604 | } # write_data |
604 | } # write_data |
605 | 605 | ||
606 | sub display_results
|
606 | sub display_results
|
607 | {
|
607 | {
|
608 | #################### DISPLAY RESULTS #####################
|
608 | #################### DISPLAY RESULTS #####################
|
609 | print "=" x 76, "\n"; |
609 | print "=" x 76, "\n"; |
610 | printf "%s\n", |
610 | printf "%s\n", |
611 | centred(
|
611 | centred(
|
612 | __x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ), |
612 | __x( "Analysis of articles to {newsgroup}", newsgroup => $newsgroup_name ), |
613 | 76
|
613 | 76
|
614 | ); |
614 | ); |
615 | print "=" x 76, "\n"; |
615 | print "=" x 76, "\n"; |
616 | printf "%s\n", |
616 | printf "%s\n", |
617 | centred(
|
617 | centred(
|
618 | __(
|
618 | __(
|
619 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
619 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
620 | ), |
620 | ), |
621 | 76
|
621 | 76
|
622 | ); |
622 | ); |
623 | print "\n"; |
623 | print "\n"; |
624 | printf __"Total articles considered: %s over %d days\n", |
624 | printf __"Total articles considered: %s over %d days\n", |
625 | $formatter->format_number($totalposts), |
625 | $formatter->format_number($totalposts), |
626 | $formatter->format_number($numdays); |
626 | $formatter->format_number($numdays); |
627 | my $time_locale = setlocale(LC_TIME); |
627 | my $time_locale = setlocale(LC_TIME); |
628 | my $earliest_datetime = DateTime->from_epoch( |
628 | my $earliest_datetime = DateTime->from_epoch( |
629 | epoch => $earliest, |
629 | epoch => $earliest, |
630 | locale => $time_locale, |
630 | locale => $time_locale, |
631 | time_zone => 'UTC', |
631 | time_zone => 'UTC', |
632 | ); |
632 | ); |
633 | my $latest_datetime = DateTime->from_epoch( |
633 | my $latest_datetime = DateTime->from_epoch( |
634 | epoch => $latest, |
634 | epoch => $latest, |
635 | locale => $time_locale, |
635 | locale => $time_locale, |
636 | time_zone => 'UTC', |
636 | time_zone => 'UTC', |
637 | ); |
637 | ); |
638 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
638 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
639 | printf __"Earliest article: %s" . "\n", |
639 | printf __"Earliest article: %s" . "\n", |
640 | $earliest_datetime->strftime($datetime_format); |
640 | $earliest_datetime->strftime($datetime_format); |
641 | printf __"Latest article: %s" . "\n", |
641 | printf __"Latest article: %s" . "\n", |
642 | $latest_datetime->strftime($datetime_format); |
642 | $latest_datetime->strftime($datetime_format); |
643 | printf __"Original articles: %s; replies: %s" . "\n", |
643 | printf __"Original articles: %s; replies: %s" . "\n", |
644 | $formatter->format_number($origposts), |
644 | $formatter->format_number($origposts), |
645 | $formatter->format_number($replies); |
645 | $formatter->format_number($replies); |
646 | printf __"Total size of articles: %s bytes (%s)" . "\n", |
646 | printf __"Total size of articles: %s bytes (%s)" . "\n", |
647 | $formatter->format_number($totsize), |
647 | $formatter->format_number($totsize), |
648 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
648 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
649 | printf __"Average %s articles per day, %s per day, %s bytes per article.\n", |
649 | printf __"Average %s articles per day, %s per day, %s bytes per article.\n", |
650 | $formatter->format_number( int( $totalposts / $numdays ) ), |
650 | $formatter->format_number( int( $totalposts / $numdays ) ), |
651 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
651 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
652 | $formatter->format_number( int( $totsize / $totalposts ) ); |
652 | $formatter->format_number( int( $totsize / $totalposts ) ); |
653 | 653 | ||
654 | my $count = keys %data; |
654 | my $count = keys %data; |
655 | print "\n"; |
655 | print "\n"; |
656 | printf __"Total headers: %s; bodies: %s\n", |
656 | printf __"Total headers: %s; bodies: %s\n", |
657 | $formatter->format_bytes( |
657 | $formatter->format_bytes( |
658 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
658 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
659 | ), |
659 | ), |
660 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
660 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
661 | printf __ |
661 | printf __ |
662 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
662 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
663 | $formatter->format_bytes( |
663 | $formatter->format_bytes( |
664 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
664 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
665 | ), |
665 | ), |
666 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
666 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
667 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ) |
667 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ) |
668 | ,
|
668 | ,
|
669 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
669 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
670 | print "\n"; |
670 | print "\n"; |
671 | printf __"Total number of posters: %s, average %s per poster\n", |
671 | printf __"Total number of posters: %s, average %s per poster\n", |
672 | $formatter->format_number($count), |
672 | $formatter->format_number($count), |
673 | $formatter->format_bytes( $totsize / $count, |
673 | $formatter->format_bytes( $totsize / $count, |
674 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
674 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
675 | $count = keys %threads; |
675 | $count = keys %threads; |
676 | printf __"Total number of threads: %s, average %s per thread\n", |
676 | printf __"Total number of threads: %s, average %s per thread\n", |
677 | $formatter->format_number($count), |
677 | $formatter->format_number($count), |
678 | $formatter->format_bytes( $totsize / $count, |
678 | $formatter->format_bytes( $totsize / $count, |
679 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
679 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
680 | printf __"Total number of user agents: %d\n", |
680 | printf __"Total number of user agents: %d\n", |
681 | $formatter->format_number( scalar keys %agents ); |
681 | $formatter->format_number( scalar keys %agents ); |
682 | print "\n", "=" x 76, "\n"; |
682 | print "\n", "=" x 76, "\n"; |
683 | ########################################
|
683 | ########################################
|
684 | ## Show posters by article count Sec 1;
|
684 | ## Show posters by article count Sec 1;
|
685 | ########################################
|
685 | ########################################
|
686 | unless ( $skipSec{1} ) |
686 | unless ( $skipSec{1} ) |
687 | {
|
687 | {
|
688 | if ( keys %data < $topposters ) |
688 | if ( keys %data < $topposters ) |
689 | {
|
689 | {
|
690 | $count = keys %data; |
690 | $count = keys %data; |
691 | }
|
691 | }
|
692 | else
|
692 | else
|
693 | {
|
693 | {
|
694 | $count = $topposters; |
694 | $count = $topposters; |
695 | }
|
695 | }
|
696 | printf "%s\n", |
696 | printf "%s\n", |
697 | centred(
|
697 | centred(
|
698 | __x( "Top {count} posters by number of articles", count => $topposters ), |
698 | __x( "Top {count} posters by number of articles", count => $topposters ), |
699 | 76
|
699 | 76
|
700 | ); |
700 | ); |
701 | print "=" x 76, "\n"; |
701 | print "=" x 76, "\n"; |
702 | my $i = 0; |
702 | my $i = 0; |
703 | foreach
|
703 | foreach
|
704 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
704 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
705 | {
|
705 | {
|
706 | my $name = substr( $poster, 0, 65 ); |
706 | my $name = substr( $poster, 0, 65 ); |
707 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
707 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
708 | $data{$poster}{count}; |
708 | $data{$poster}{count}; |
709 | last if ( ++$i == $count ); |
709 | last if ( ++$i == $count ); |
710 | }
|
710 | }
|
711 | print "\n", "=" x 76, "\n"; |
711 | print "\n", "=" x 76, "\n"; |
712 | }
|
712 | }
|
713 | 713 | ||
714 | ######################################
|
714 | ######################################
|
715 | ## Show posters by size in KiB Sec 2;
|
715 | ## Show posters by size in KiB Sec 2;
|
716 | ######################################
|
716 | ######################################
|
717 | unless ( $skipSec{2} ) |
717 | unless ( $skipSec{2} ) |
718 | {
|
718 | {
|
719 | if ( keys %data < $topposters ) |
719 | if ( keys %data < $topposters ) |
720 | {
|
720 | {
|
721 | $count = keys %data; |
721 | $count = keys %data; |
722 | }
|
722 | }
|
723 | else
|
723 | else
|
724 | {
|
724 | {
|
725 | $count = $topposters; |
725 | $count = $topposters; |
726 | }
|
726 | }
|
727 | printf "%s\n", |
727 | printf "%s\n", |
728 | centred(
|
728 | centred(
|
729 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
729 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
730 | 76
|
730 | 76
|
731 | ); |
731 | ); |
732 | print "=" x 76, "\n"; |
732 | print "=" x 76, "\n"; |
733 | my $i = 0; |
733 | my $i = 0; |
734 | foreach
|
734 | foreach
|
735 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
735 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
736 | {
|
736 | {
|
737 | my $name = substr( $poster, 0, 62 ); |
737 | my $name = substr( $poster, 0, 62 ); |
738 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
738 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
739 | $data{$poster}{size} / 1024; #/ |
739 | $data{$poster}{size} / 1024; #/ |
740 | last if ( ++$i == $count ); |
740 | last if ( ++$i == $count ); |
741 | }
|
741 | }
|
742 | print "\n", "=" x 76, "\n"; |
742 | print "\n", "=" x 76, "\n"; |
743 | }
|
743 | }
|
744 | 744 | ||
745 | #####################################
|
745 | #####################################
|
746 | ## Show top posters for original text
|
746 | ## Show top posters for original text
|
747 | #####################################
|
747 | #####################################
|
748 | my $topposters_real = 0; |
748 | my $topposters_real = 0; |
749 | 749 | ||
750 | unless ( $skipSec{3} ) |
750 | unless ( $skipSec{3} ) |
751 | {
|
751 | {
|
752 | if ( keys %data < $topposters ) |
752 | if ( keys %data < $topposters ) |
753 | {
|
753 | {
|
754 | $count = keys %data; |
754 | $count = keys %data; |
755 | }
|
755 | }
|
756 | else
|
756 | else
|
757 | {
|
757 | {
|
758 | $count = $topposters; |
758 | $count = $topposters; |
759 | }
|
759 | }
|
760 | 760 | ||
761 | printf "%s\n", |
761 | printf "%s\n", |
762 | centred(
|
762 | centred(
|
763 | __x(
|
763 | __x(
|
764 | "Top {count} responders by original text (> 5 articles)", |
764 | "Top {count} responders by original text (> 5 articles)", |
765 | count => $topposters |
765 | count => $topposters |
766 | ), |
766 | ), |
767 | 76
|
767 | 76
|
768 | ); |
768 | ); |
769 | print "=" x 76, "\n"; |
769 | print "=" x 76, "\n"; |
770 | foreach my $poster ( |
770 | foreach my $poster ( |
771 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
771 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
772 | keys %data |
772 | keys %data |
773 | )
|
773 | )
|
774 | {
|
774 | {
|
775 | next if $data{$poster}{quoted} == 0; |
775 | next if $data{$poster}{quoted} == 0; |
776 | next if $data{$poster}{count} < 5; |
776 | next if $data{$poster}{count} < 5; |
777 | my $name = substr( $poster, 0, 63 ); |
777 | my $name = substr( $poster, 0, 63 ); |
778 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
778 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
779 | rpad( $poster, 63, "." ), |
779 | rpad( $poster, 63, "." ), |
780 | $data{$poster}{percent}; |
780 | $data{$poster}{percent}; |
781 | last if ( ++$topposters_real == $count ); |
781 | last if ( ++$topposters_real == $count ); |
782 | }
|
782 | }
|
783 | print "\n", "=" x 76, "\n"; |
783 | print "\n", "=" x 76, "\n"; |
784 | }
|
784 | }
|
785 | 785 | ||
786 | ########################################
|
786 | ########################################
|
787 | ## Show bottom posters for original text
|
787 | ## Show bottom posters for original text
|
788 | ########################################
|
788 | ########################################
|
789 | 789 | ||
790 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
790 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
791 | 791 | ||
792 | unless ( $skipSec{4} ) |
792 | unless ( $skipSec{4} ) |
793 | {
|
793 | {
|
794 | if ( keys %data < $topposters ) |
794 | if ( keys %data < $topposters ) |
795 | {
|
795 | {
|
796 | $count = keys %data; |
796 | $count = keys %data; |
797 | }
|
797 | }
|
798 | else
|
798 | else
|
799 | {
|
799 | {
|
800 | $count = $topposters; |
800 | $count = $topposters; |
801 | }
|
801 | }
|
802 | 802 | ||
803 | printf "%s\n", |
803 | printf "%s\n", |
804 | centred(
|
804 | centred(
|
805 | __x(
|
805 | __x(
|
806 | "Bottom {count} responders by original text (> 5 articles)", |
806 | "Bottom {count} responders by original text (> 5 articles)", |
807 | count => $topposters |
807 | count => $topposters |
808 | ), |
808 | ), |
809 | 76
|
809 | 76
|
810 | ); |
810 | ); |
811 | print "=" x 76, "\n"; |
811 | print "=" x 76, "\n"; |
812 | my $i = 0; |
812 | my $i = 0; |
813 | foreach my $poster ( |
813 | foreach my $poster ( |
814 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
814 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
815 | keys %data |
815 | keys %data |
816 | )
|
816 | )
|
817 | {
|
817 | {
|
818 | next if $data{$poster}{quoted} == 0; |
818 | next if $data{$poster}{quoted} == 0; |
819 | next if $data{$poster}{count} < 5; |
819 | next if $data{$poster}{count} < 5; |
820 | my $name = substr( $poster, 0, 63 ); |
820 | my $name = substr( $poster, 0, 63 ); |
821 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
821 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
822 | $data{$poster}{percent}; |
822 | $data{$poster}{percent}; |
823 | last if ( ++$i == $count ); |
823 | last if ( ++$i == $count ); |
824 | }
|
824 | }
|
825 | print "\n", "=" x 76, "\n"; |
825 | print "\n", "=" x 76, "\n"; |
826 | }
|
826 | }
|
827 | 827 | ||
828 | #####################################
|
828 | #####################################
|
829 | ## Show threads by number of articles
|
829 | ## Show threads by number of articles
|
830 | #####################################
|
830 | #####################################
|
831 | unless ( $skipSec{5} ) |
831 | unless ( $skipSec{5} ) |
832 | {
|
832 | {
|
833 | if ( keys %threads < $topthreads ) |
833 | if ( keys %threads < $topthreads ) |
834 | {
|
834 | {
|
835 | $count = keys %threads; |
835 | $count = keys %threads; |
836 | }
|
836 | }
|
837 | else
|
837 | else
|
838 | {
|
838 | {
|
839 | $count = $topthreads; |
839 | $count = $topthreads; |
840 | }
|
840 | }
|
841 | printf "%s\n", |
841 | printf "%s\n", |
842 | centred(
|
842 | centred(
|
843 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
843 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
844 | 76 ); |
844 | 76 ); |
845 | print "=" x 76, "\n"; |
845 | print "=" x 76, "\n"; |
846 | my $i = 0; |
846 | my $i = 0; |
847 | foreach my $thread ( |
847 | foreach my $thread ( |
848 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
848 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
849 | keys %threads |
849 | keys %threads |
850 | )
|
850 | )
|
851 | {
|
851 | {
|
852 | my $name = substr( $thread, 0, 65 ); |
852 | my $name = substr( $thread, 0, 65 ); |
853 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
853 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
854 | $threads{$thread}{'count'}; |
854 | $threads{$thread}{'count'}; |
855 | last if ( ++$i == $count ); |
855 | last if ( ++$i == $count ); |
856 | }
|
856 | }
|
857 | print "\n", "=" x 76, "\n"; |
857 | print "\n", "=" x 76, "\n"; |
858 | }
|
858 | }
|
859 | 859 | ||
860 | ##############################
|
860 | ##############################
|
861 | ## Show threads by size in KiB
|
861 | ## Show threads by size in KiB
|
862 | ##############################
|
862 | ##############################
|
863 | unless ( $skipSec{6} ) |
863 | unless ( $skipSec{6} ) |
864 | {
|
864 | {
|
865 | if ( keys %threads < $topthreads ) |
865 | if ( keys %threads < $topthreads ) |
866 | {
|
866 | {
|
867 | $count = keys %threads; |
867 | $count = keys %threads; |
868 | }
|
868 | }
|
869 | else
|
869 | else
|
870 | {
|
870 | {
|
871 | $count = $topthreads; |
871 | $count = $topthreads; |
872 | }
|
872 | }
|
873 | printf "%s\n", |
873 | printf "%s\n", |
874 | centred(
|
874 | centred(
|
875 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
875 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
876 | print "=" x 76, "\n"; |
876 | print "=" x 76, "\n"; |
877 | my $i = 0; |
877 | my $i = 0; |
878 | foreach my $thread ( |
878 | foreach my $thread ( |
879 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
879 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
880 | keys %threads |
880 | keys %threads |
881 | )
|
881 | )
|
882 | {
|
882 | {
|
883 | my $name = substr( $thread, 0, 65 ); |
883 | my $name = substr( $thread, 0, 65 ); |
884 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
884 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
885 | $threads{$thread}{'size'} / 1024; #/ |
885 | $threads{$thread}{'size'} / 1024; #/ |
886 | last if ( ++$i == $count ); |
886 | last if ( ++$i == $count ); |
887 | }
|
887 | }
|
888 | print "\n", "=" x 76, "\n"; |
888 | print "\n", "=" x 76, "\n"; |
889 | }
|
889 | }
|
890 | 890 | ||
891 | ##################################
|
891 | ##################################
|
892 | ## Show top 10 cross-posted groups
|
892 | ## Show top 10 cross-posted groups
|
893 | ##################################
|
893 | ##################################
|
894 | unless ( $skipSec{7} ) |
894 | unless ( $skipSec{7} ) |
895 | {
|
895 | {
|
896 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
896 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
897 | if ( keys %crossposts < $topcrossposts ) |
897 | if ( keys %crossposts < $topcrossposts ) |
898 | {
|
898 | {
|
899 | $count = keys %crossposts; |
899 | $count = keys %crossposts; |
900 | }
|
900 | }
|
901 | else
|
901 | else
|
902 | {
|
902 | {
|
903 | $count = $topcrossposts; |
903 | $count = $topcrossposts; |
904 | }
|
904 | }
|
905 | printf "%s\n", |
905 | printf "%s\n", |
906 | centred(
|
906 | centred(
|
907 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
907 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
908 | print "=" x 76, "\n"; |
908 | print "=" x 76, "\n"; |
909 | my $i = 0; |
909 | my $i = 0; |
910 | foreach
|
910 | foreach
|
911 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
911 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
912 | {
|
912 | {
|
913 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
913 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
914 | $crossposts{$name}; |
914 | $crossposts{$name}; |
915 | last if ( ++$i == $count ); |
915 | last if ( ++$i == $count ); |
916 | }
|
916 | }
|
917 | print "\n", "=" x 76, "\n"; |
917 | print "\n", "=" x 76, "\n"; |
918 | }
|
918 | }
|
919 | 919 | ||
920 | #########################
|
920 | #########################
|
921 | ## Show agents and counts
|
921 | ## Show agents and counts
|
922 | #########################
|
922 | #########################
|
923 | unless ( $skipSec{8} ) |
923 | unless ( $skipSec{8} ) |
924 | {
|
924 | {
|
925 | if ( keys %agents < $topagents ) |
925 | if ( keys %agents < $topagents ) |
926 | {
|
926 | {
|
927 | $count = keys %agents; |
927 | $count = keys %agents; |
928 | }
|
928 | }
|
929 | else
|
929 | else
|
930 | {
|
930 | {
|
931 | $count = $topagents; |
931 | $count = $topagents; |
932 | }
|
932 | }
|
933 | printf "%s\n", |
933 | printf "%s\n", |
934 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
934 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
935 | 76 ); |
935 | 76 ); |
936 | print "=" x 76, "\n"; |
936 | print "=" x 76, "\n"; |
937 | my $i = 0; |
937 | my $i = 0; |
938 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
938 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
939 | {
|
939 | {
|
940 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
940 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
941 | $agents{$agent}; |
941 | $agents{$agent}; |
942 | last if ( ++$i == $count ); |
942 | last if ( ++$i == $count ); |
943 | }
|
943 | }
|
944 | print "\n", "=" x 76, "\n"; |
944 | print "\n", "=" x 76, "\n"; |
945 | }
|
945 | }
|
946 | 946 | ||
947 | #######################
|
947 | #######################
|
948 | ## Show distinct agents
|
948 | ## Show distinct agents
|
949 | #######################
|
949 | #######################
|
950 | unless ( $skipSec{9} ) |
950 | unless ( $skipSec{9} ) |
951 | {
|
951 | {
|
952 | if ( keys %distinct_agent < $topagents ) |
952 | if ( keys %distinct_agent < $topagents ) |
953 | {
|
953 | {
|
954 | $count = keys %distinct_agent; |
954 | $count = keys %distinct_agent; |
955 | }
|
955 | }
|
956 | else
|
956 | else
|
957 | {
|
957 | {
|
958 | $count = $topagents; |
958 | $count = $topagents; |
959 | }
|
959 | }
|
960 | printf "%s\n", |
960 | printf "%s\n", |
961 | centred(
|
961 | centred(
|
962 | __x(
|
962 | __x(
|
963 | "Top {count} user agents by number of articles", |
963 | "Top {count} user agents by number of articles", |
964 | count => $topagents |
964 | count => $topagents |
965 | ), |
965 | ), |
966 | 76
|
966 | 76
|
967 | ); |
967 | ); |
968 | print "=" x 76, "\n"; |
968 | print "=" x 76, "\n"; |
969 | my $i = 0; |
969 | my $i = 0; |
970 | foreach my $agent ( |
970 | foreach my $agent ( |
971 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
971 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
972 | keys %distinct_agent |
972 | keys %distinct_agent |
973 | )
|
973 | )
|
974 | {
|
974 | {
|
975 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
975 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
976 | $distinct_agent{$agent}, |
976 | $distinct_agent{$agent}, |
977 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
977 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
978 | last if ( ++$i == $count ); |
978 | last if ( ++$i == $count ); |
979 | }
|
979 | }
|
980 | print "\n", "=" x 76, "\n"; |
980 | print "\n", "=" x 76, "\n"; |
981 | }
|
981 | }
|
982 | 982 | ||
983 | ############################
|
983 | ############################
|
984 | ## Show timezones and counts
|
984 | ## Show timezones and counts
|
985 | ############################
|
985 | ############################
|
986 | unless ( $skipSec{10} ) |
986 | unless ( $skipSec{10} ) |
987 | {
|
987 | {
|
988 | if ( keys %tz < $toptz ) |
988 | if ( keys %tz < $toptz ) |
989 | {
|
989 | {
|
990 | $count = keys %tz; |
990 | $count = keys %tz; |
991 | }
|
991 | }
|
992 | else
|
992 | else
|
993 | {
|
993 | {
|
994 | $count = $toptz; |
994 | $count = $toptz; |
995 | }
|
995 | }
|
996 | printf "%s\n", |
996 | printf "%s\n", |
997 | centred(
|
997 | centred(
|
998 | __x( "Top {count} time zones by number of articles", count => $toptz ), |
998 | __x( "Top {count} time zones by number of articles", count => $toptz ), |
999 | 76 ); |
999 | 76 ); |
1000 | print "=" x 76, "\n"; |
1000 | print "=" x 76, "\n"; |
1001 | my $i = 0; |
1001 | my $i = 0; |
1002 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
1002 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
1003 | {
|
1003 | {
|
1004 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
1004 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
1005 | last if ( ++$i == $count ); |
1005 | last if ( ++$i == $count ); |
1006 | }
|
1006 | }
|
1007 | print "\n", "=" x 76, "\n"; |
1007 | print "\n", "=" x 76, "\n"; |
1008 | }
|
1008 | }
|
1009 | }
|
1009 | }
|
1010 | 1010 | ||
1011 | ## helper subs
|
1011 | ## helper subs
|
1012 | 1012 | ||
1013 | ###############################
|
1013 | ###############################
|
1014 | ## Right pad a string with '.'s
|
1014 | ## Right pad a string with '.'s
|
1015 | ###############################
|
1015 | ###############################
|
1016 | sub rpad
|
1016 | sub rpad
|
1017 | {
|
1017 | {
|
1018 | ## Get text to pad, length to pad, pad chr
|
1018 | ## Get text to pad, length to pad, pad chr
|
1019 | my ( $text, $pad_len, $pad_chr ) = @_; |
1019 | my ( $text, $pad_len, $pad_chr ) = @_; |
1020 | 1020 | ||
1021 | ## DEBUG
|
1021 | ## DEBUG
|
1022 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
1022 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
1023 | 1023 | ||
1024 | if ( length($text) > $pad_len ) |
1024 | if ( length($text) > $pad_len ) |
1025 | {
|
1025 | {
|
1026 | $text = substr( $text, 0, $pad_len ); |
1026 | $text = substr( $text, 0, $pad_len ); |
1027 | }
|
1027 | }
|
1028 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
1028 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
1029 | return $padded; |
1029 | return $padded; |
1030 | }
|
1030 | }
|
1031 | 1031 | ||
1032 | ##################
|
1032 | ##################
|
1033 | ## Centre a string
|
1033 | ## Centre a string
|
1034 | ##################
|
1034 | ##################
|
1035 | sub centred
|
1035 | sub centred
|
1036 | {
|
1036 | {
|
1037 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
1037 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
1038 | my $pad_len = ( $width - length($text) ) / 2; #/ |
1038 | my $pad_len = ( $width - length($text) ) / 2; #/ |
1039 | my $centred = " " x $pad_len . $text; |
1039 | my $centred = " " x $pad_len . $text; |
1040 | return $centred; |
1040 | return $centred; |
1041 | }
|
1041 | }
|
1042 | 1042 | ||
1043 | ###########################
|
1043 | ###########################
|
1044 | ## Put commas into a number
|
1044 | ## Put commas into a number
|
1045 | ###########################
|
1045 | ###########################
|
1046 | sub commify
|
1046 | sub commify
|
1047 | {
|
1047 | {
|
1048 | local $_ = shift; |
1048 | local $_ = shift; |
1049 | my $number = $_; |
1049 | my $number = $_; |
1050 | $_ = int; # Chop non-integer part |
1050 | $_ = int; # Chop non-integer part |
1051 | 1 while |
1051 | 1 while |
1052 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1052 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1053 | my $int_part = $_; |
1053 | my $int_part = $_; |
1054 | my $real_part = ''; |
1054 | my $real_part = ''; |
1055 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
1055 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
1056 | {
|
1056 | {
|
1057 | $real_part = $1; |
1057 | $real_part = $1; |
1058 | }
|
1058 | }
|
1059 | return $int_part . $real_part; |
1059 | return $int_part . $real_part; |
1060 | }
|
1060 | }
|
1061 | 1061 | ||
1062 | ################################################################
|
1062 | ################################################################
|
1063 | ## Returns a string with leading and trailing whitespace removed
|
1063 | ## Returns a string with leading and trailing whitespace removed
|
1064 | ################################################################
|
1064 | ################################################################
|
1065 | sub clean
|
1065 | sub clean
|
1066 | {
|
1066 | {
|
1067 | my $dirty = shift; |
1067 | my $dirty = shift; |
1068 | my $clean = $dirty; |
1068 | my $clean = $dirty; |
1069 | $clean =~ s/^\s+|\s+$//g; |
1069 | $clean =~ s/^\s+|\s+$//g; |
1070 | 1070 | ||
1071 | return $clean; |
1071 | return $clean; |
1072 | }
|
1072 | }
|
1073 | 1073 | ||
1074 | sub usage
|
1074 | sub usage
|
1075 | {
|
1075 | {
|
1076 | print __ "usage: newsstat.pl NEWS.GROUP", "\n"; |
1076 | print __ "usage: newsstat.pl NEWS.GROUP", "\n"; |
1077 | exit 1; |
1077 | exit 1; |
1078 | }
|
1078 | }
|
1079 | 1079 | ||
1080 | sub dmsg
|
1080 | sub dmsg
|
1081 | {
|
1081 | {
|
1082 | print STDERR @_, "\n"; |
1082 | print STDERR @_, "\n"; |
1083 | }
|
1083 | }
|
1084 | 1084 | ||
1085 | sub dmsg2
|
1085 | sub dmsg2
|
1086 | {
|
1086 | {
|
1087 | my ( $level, @msg ) = @_; |
1087 | my ( $level, @msg ) = @_; |
1088 | print STDERR @msg, "\n" if $level >= DEBUG; |
1088 | print STDERR @msg, "\n" if $level >= DEBUG; |
1089 | }
|
1089 | }
|
1090 | 1090 |