Rev 22 | Rev 24 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 22 | Rev 23 | ||
---|---|---|---|
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
- | 4 | require 5.004; |
|
4 | 5 | ||
5 | #use diagnostics;
|
6 | #use diagnostics;
|
6 | use utf8; |
7 | use utf8; |
7 | use Encode; |
8 | use Encode; |
8 | 9 | ||
9 | use constant DEBUG => 0; |
10 | use constant DEBUG => 0; |
10 | 11 | ||
11 | ## Print out all text to STDOUT UTF-8 encoded
|
12 | ## Print out all text to STDOUT UTF-8 encoded
|
12 | binmode STDOUT, ':encoding(UTF-8)'; |
13 | binmode STDOUT, ':encoding(UTF-8)'; |
13 | binmode STDERR, ':encoding(UTF-8)'; |
14 | binmode STDERR, ':encoding(UTF-8)'; |
14 | 15 | ||
- | 16 | ## L10n
|
|
- | 17 | use locale ':not_characters'; |
|
- | 18 | ||
- | 19 | # setlocale( LC_MESSAGES, '' );
|
|
- | 20 | require Number::Format; |
|
- | 21 | ||
- | 22 | ## i18n
|
|
15 | # FIXME: Automatically include resolved '.' in @INC
|
23 | ## FIXME: Automatically include resolved '.' in @INC
|
16 | # print join "\n", @INC;
|
24 | # print join "\n", @INC;
|
17 | 25 | ||
18 | use locale ':not_characters'; |
- | |
19 | use Locale::TextDomain ('de.pointedears.newsstat'); |
26 | use Locale::TextDomain ('de.pointedears.newsstat'); |
20 | use POSIX ('locale_h'); |
27 | use POSIX ('locale_h'); |
21 | use Locale::Messages qw (bind_textdomain_filter |
28 | use Locale::Messages qw (bind_textdomain_filter |
22 | bind_textdomain_codeset |
29 | bind_textdomain_codeset |
23 | turn_utf_8_on); |
30 | turn_utf_8_on); |
24 | #setlocale( LC_MESSAGES, '' );
|
- | |
- | 31 | ||
25 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
32 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
26 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
33 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
27 | 34 | ||
28 | require Mail::Message; |
35 | require Mail::Message; |
29 | require DateTime; |
36 | require DateTime; |
30 | require DateTime::Format::Mail; |
37 | require DateTime::Format::Mail; |
- | 38 | ||
- | 39 | # See comments in previous example
|
|
- | 40 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
|
- | 41 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
|
- | 42 | 'decimal_point' }; |
|
- | 43 | ||
- | 44 | # Apply defaults if values are missing
|
|
- | 45 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
|
- | 46 | $thousands_sep = ' ' unless $thousands_sep; |
|
- | 47 | ||
- | 48 | # grouping and mon_grouping are packed lists
|
|
- | 49 | # of small integers (characters) telling the
|
|
- | 50 | # grouping (thousand_seps and mon_thousand_seps
|
|
- | 51 | # being the group dividers) of numbers and
|
|
- | 52 | # monetary quantities. The integers' meanings:
|
|
- | 53 | # 255 means no more grouping, 0 means repeat
|
|
- | 54 | # the previous grouping, 1-254 means use that
|
|
- | 55 | # as the current grouping. Grouping goes from
|
|
- | 56 | # right to left (low to high digits). In the
|
|
- | 57 | # below we cheat slightly by never using anything
|
|
- | 58 | # else than the first grouping (whatever that is).
|
|
- | 59 | my @grouping; |
|
- | 60 | if ($grouping) |
|
- | 61 | {
|
|
- | 62 | @grouping = unpack( "C*", $grouping ); |
|
- | 63 | }
|
|
- | 64 | else
|
|
- | 65 | {
|
|
- | 66 | @grouping = (3); |
|
- | 67 | }
|
|
- | 68 | ||
- | 69 | ## FIXME: Why don't the defaults work already?
|
|
- | 70 | my $formatter = new Number::Format( |
|
- | 71 | -decimal_point => $decimal_point, |
|
- | 72 | -thousands_sep => $thousands_sep, |
|
- | 73 | ||
- | 74 | # -grouping => $grouping[0]
|
|
- | 75 | ); |
|
31 | 76 | ||
32 | ###################### USER CONFIGURATIONS ############################
|
77 | ###################### USER CONFIGURATIONS ############################
|
33 | 78 | ||
34 | ## The name of the group to do stats for
|
79 | ## The name of the group to do stats for
|
35 | my $newsgroup_name = $ARGV[0]; |
80 | my $newsgroup_name = $ARGV[0]; |
36 | $newsgroup_name or usage(); |
81 | $newsgroup_name or usage(); |
37 | 82 | ||
38 | ## Check for removal flags
|
83 | ## Check for removal flags
|
39 | my $ix; |
84 | my $ix; |
40 | my $j; |
85 | my $j; |
41 | my %skipSec; |
86 | my %skipSec; |
42 | my @skiplist; |
87 | my @skiplist; |
43 | my $args = @ARGV; |
88 | my $args = @ARGV; |
44 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
89 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
45 | {
|
90 | {
|
46 | $j = $ix + 1; |
91 | $j = $ix + 1; |
47 | if ( $ARGV[$ix] eq "-x" ) |
92 | if ( $ARGV[$ix] eq "-x" ) |
48 | {
|
93 | {
|
49 | @skiplist = split( ",", $ARGV[$j] ); |
94 | @skiplist = split( ",", $ARGV[$j] ); |
50 | }
|
95 | }
|
51 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
96 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
52 | {
|
97 | {
|
53 | @skiplist = split( ",", $1 ); |
98 | @skiplist = split( ",", $1 ); |
54 | }
|
99 | }
|
55 | }
|
100 | }
|
56 | foreach (@skiplist) |
101 | foreach (@skiplist) |
57 | {
|
102 | {
|
58 | $skipSec{$_} = 1; |
103 | $skipSec{$_} = 1; |
59 | }
|
104 | }
|
60 | 105 | ||
61 | ## Leafnode users will want /var/spool/news for this variable.
|
106 | ## Leafnode users will want /var/spool/news for this variable.
|
62 | my $news = "/var/spool/news/"; |
107 | my $news = "/var/spool/news/"; |
63 | 108 | ||
64 | ## Number of top or bottom posters to show
|
109 | ## Number of top or bottom posters to show
|
65 | my $topposters = 20; |
110 | my $topposters = 20; |
66 | 111 | ||
67 | ## Number of threads we want to know about
|
112 | ## Number of threads we want to know about
|
68 | my $topthreads = 20; |
113 | my $topthreads = 20; |
69 | 114 | ||
70 | ## Number of cross-posted threads to show
|
115 | ## Number of cross-posted threads to show
|
71 | my $topcrossposts = 10; |
116 | my $topcrossposts = 10; |
72 | 117 | ||
73 | ## Number of agents we list
|
118 | ## Number of agents we list
|
74 | my $topagents = 10; |
119 | my $topagents = 10; |
75 | 120 | ||
76 | ## Number of time zones to show
|
121 | ## Number of time zones to show
|
77 | my $toptz = 10; |
122 | my $toptz = 10; |
78 | 123 | ||
79 | ###################### DATA STRUCTURES ######################
|
124 | ###################### DATA STRUCTURES ######################
|
80 | my $group = $newsgroup_name; |
125 | my $group = $newsgroup_name; |
81 | $group =~ s!\.!/!g; |
126 | $group =~ s!\.!/!g; |
82 | my %data; # name, count, agent, total, orig, quoted |
127 | my %data; # name, count, agent, total, orig, quoted |
83 | my $totsize = 0; # holds total sizes of all files |
128 | my $totsize = 0; # holds total sizes of all files |
84 | my %crossposts; # group, count |
129 | my %crossposts; # group, count |
85 | my %threads; # subject, count |
130 | my %threads; # subject, count |
86 | my $replies = 0; # total no. of replies |
131 | my $replies = 0; # total no. of replies |
87 | my $origposts = 0; # total no. of original posts |
132 | my $origposts = 0; # total no. of original posts |
88 | my %tz; # timezones by count |
133 | my %tz; # timezones by count |
89 | my $earliest; # earliest article we have found |
134 | my $earliest; # earliest article we have found |
90 | my $latest; # latest article we have found |
135 | my $latest; # latest article we have found |
91 | my $totheader = 0; # total size of header material |
136 | my $totheader = 0; # total size of header material |
92 | my $totbody = 0; # total size of body material |
137 | my $totbody = 0; # total size of body material |
93 | my $totsig = 0; # total size of sig material |
138 | my $totsig = 0; # total size of sig material |
94 | my $totquoted = 0; # total size of quoted material |
139 | my $totquoted = 0; # total size of quoted material |
95 | my $totorig = 0; # total size of original material |
140 | my $totorig = 0; # total size of original material |
96 | my $totalposts; # total no. of posts considered |
141 | my $totalposts; # total no. of posts considered |
97 | my %distinct_agent; |
142 | my %distinct_agent; |
98 | 143 | ||
99 | ## Used to hold counts of User Agents used
|
144 | ## Used to hold counts of User Agents used
|
100 | my %agents = ( |
145 | my %agents = ( |
101 | "Compuserver" => 0, |
146 | "Compuserver" => 0, |
102 | "Foorum" => 0, |
147 | "Foorum" => 0, |
103 | "Forte Agent" => 0, |
148 | "Forte Agent" => 0, |
104 | "Forte Free Agent" => 0, |
149 | "Forte Free Agent" => 0, |
105 | "Gnus" => 0, |
150 | "Gnus" => 0, |
106 | "KNode" => 0, |
151 | "KNode" => 0, |
107 | "MacSOUP" => 0, |
152 | "MacSOUP" => 0, |
108 | "MT-NewsWatcher" => 0, |
153 | "MT-NewsWatcher" => 0, |
109 | "MicroPlanet Gravity" => 0, |
154 | "MicroPlanet Gravity" => 0, |
110 | "Microsoft Outlook Express" => 0, |
155 | "Microsoft Outlook Express" => 0, |
111 | "Microsoft Windows Mail" => 0, |
156 | "Microsoft Windows Mail" => 0, |
112 | "Mozilla" => 0, |
157 | "Mozilla" => 0, |
113 | "News Rover" => 0, |
158 | "News Rover" => 0, |
114 | "NN" => 0, |
159 | "NN" => 0, |
115 | "Pan" => 0, |
160 | "Pan" => 0, |
116 | "rn" => 0, |
161 | "rn" => 0, |
117 | "slrn" => 0, |
162 | "slrn" => 0, |
118 | "Sylpheed" => 0, |
163 | "Sylpheed" => 0, |
119 | "tin" => 0, |
164 | "tin" => 0, |
120 | "VSoup" => 0, |
165 | "VSoup" => 0, |
121 | "WebTV" => 0, |
166 | "WebTV" => 0, |
122 | "Xnews" => 0, |
167 | "Xnews" => 0, |
123 | ); |
168 | ); |
124 | 169 | ||
125 | my $datetime_parser = DateTime::Format::Mail->new(); |
170 | my $datetime_parser = DateTime::Format::Mail->new(); |
126 | $datetime_parser->loose(); |
171 | $datetime_parser->loose(); |
127 | 172 | ||
128 | my $today = DateTime->today( time_zone => 'UTC' ); |
173 | my $today = DateTime->today( time_zone => 'UTC' ); |
129 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
174 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
130 | my $start = int $prev_month->strftime('%s'); |
175 | my $start = int $prev_month->strftime('%s'); |
131 | my $numdays = int DateTime->last_day_of_month( |
176 | my $numdays = int DateTime->last_day_of_month( |
132 | year => $prev_month->year(), |
177 | year => $prev_month->year(), |
133 | month => $prev_month->month(), |
178 | month => $prev_month->month(), |
134 | time_zone => $prev_month->time_zone(), |
179 | time_zone => $prev_month->time_zone(), |
135 | )->day(); |
180 | )->day(); |
136 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
181 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
137 | 182 | ||
138 | dmsg( $start, " to ", $end ) if DEBUG; |
183 | dmsg( $start, " to ", $end ) if DEBUG; |
139 | 184 | ||
140 | chdir("$news$group") |
185 | chdir("$news$group") |
141 | or die __x( |
186 | or die __x( |
142 | "Can't cd to {newsgroup}: {error}\n", |
187 | "Can't cd to {newsgroup}: {error}\n", |
143 | newsgroup => "$news$group", |
188 | newsgroup => "$news$group", |
144 | error => $! |
189 | error => $! |
145 | ); |
190 | ); |
146 | opendir( DIR, "." ) |
191 | opendir( DIR, "." ) |
147 | or die __x( |
192 | or die __x( |
148 | "Can't open {newsgroup}: {error}\n", |
193 | "Can't open {newsgroup}: {error}\n", |
149 | newsgroup => "$news$group", |
194 | newsgroup => "$news$group", |
150 | error => $! |
195 | error => $! |
151 | ); |
196 | ); |
152 | 197 | ||
153 | while ( defined( my $filename = readdir(DIR) ) ) |
198 | while ( defined( my $filename = readdir(DIR) ) ) |
154 | {
|
199 | {
|
155 | next unless -f $filename; # only want real files |
200 | next unless -f $filename; # only want real files |
156 | next if ( $filename eq ".overview" ); # real articles only |
201 | next if ( $filename eq ".overview" ); # real articles only |
157 | 202 | ||
158 | get_article($filename); # read in the article |
203 | get_article($filename); # read in the article |
159 | }
|
204 | }
|
160 | closedir(DIR); # finished with the directory |
205 | closedir(DIR); # finished with the directory |
161 | 206 | ||
162 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
207 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
163 | 208 | ||
164 | ## Post-processing
|
209 | ## Post-processing
|
165 | count_agents(); # count agents, collapsing versions |
210 | count_agents(); # count agents, collapsing versions |
166 | fix_percent(); |
211 | fix_percent(); |
167 | 212 | ||
168 | write_data(); |
213 | write_data(); |
169 | display_results(); |
214 | display_results(); |
170 | 215 | ||
171 | ########################################
|
216 | ########################################
|
172 | ## Get current article's header and body
|
217 | ## Get current article's header and body
|
173 | ########################################
|
218 | ########################################
|
174 | sub get_article
|
219 | sub get_article
|
175 | {
|
220 | {
|
176 | my $filename = shift; |
221 | my $filename = shift; |
177 | 222 | ||
178 | open( my $FILE, '<', $filename ) |
223 | open( my $FILE, '<', $filename ) |
179 | or
|
224 | or
|
180 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
225 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
181 | my $msg = Mail::Message->read($FILE); |
226 | my $msg = Mail::Message->read($FILE); |
182 | my $timestamp = $msg->timestamp(); |
227 | my $timestamp = $msg->timestamp(); |
183 | my $date = $msg->study('Date'); |
228 | my $date = $msg->study('Date'); |
184 | 229 | ||
185 | ## Disregard article if timestamp is not in range
|
230 | ## Disregard article if timestamp is not in range
|
186 | dmsg($timestamp) if DEBUG; |
231 | dmsg($timestamp) if DEBUG; |
187 | if ( $timestamp < $start or $timestamp >= $end ) |
232 | if ( $timestamp < $start or $timestamp >= $end ) |
188 | {
|
233 | {
|
189 | dmsg("Posting on $date ignored.") if DEBUG; |
234 | dmsg("Posting on $date ignored.") if DEBUG; |
190 | return; |
235 | return; |
191 | }
|
236 | }
|
192 | 237 | ||
193 | $totalposts++; # bump count of articles considered |
238 | $totalposts++; # bump count of articles considered |
194 | 239 | ||
195 | ## DEBUG
|
240 | ## DEBUG
|
196 | dmsg($date) if DEBUG; |
241 | dmsg($date) if DEBUG; |
197 | 242 | ||
198 | ## get stats about the file itself
|
243 | ## get stats about the file itself
|
199 | my $filesize = -s $filename; # get total size of file |
244 | my $filesize = -s $filename; # get total size of file |
200 | $totsize += $filesize; # bump total sizes of all files |
245 | $totsize += $filesize; # bump total sizes of all files |
201 | 246 | ||
202 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
247 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
203 | {
|
248 | {
|
204 | $earliest = $timestamp; |
249 | $earliest = $timestamp; |
205 | }
|
250 | }
|
206 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
251 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
207 | {
|
252 | {
|
208 | $latest = $timestamp; |
253 | $latest = $timestamp; |
209 | }
|
254 | }
|
210 | 255 | ||
211 | #print "timestamp: $timestamp\n";
|
256 | #print "timestamp: $timestamp\n";
|
212 | 257 | ||
213 | ## count header size
|
258 | ## count header size
|
214 | $totheader += $msg->head()->size(); |
259 | $totheader += $msg->head()->size(); |
215 | 260 | ||
216 | ## get the poster's name (MIME-decoded, in UTF-8)
|
261 | ## get the poster's name (MIME-decoded, in UTF-8)
|
217 | my $poster = $msg->study('From'); |
262 | my $poster = $msg->study('From'); |
218 | if ( defined $poster ) |
263 | if ( defined $poster ) |
219 | {
|
264 | {
|
220 | ## Convert old to new format
|
265 | ## Convert old to new format
|
221 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
266 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
222 | 267 | ||
223 | ## Collapse whitespace
|
268 | ## Collapse whitespace
|
224 | $poster =~ s/\s+/ /g; |
269 | $poster =~ s/\s+/ /g; |
225 | 270 | ||
226 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
271 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
227 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
272 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
228 | 273 | ||
229 | ## DEBUG
|
274 | ## DEBUG
|
230 | dmsg($poster) if DEBUG; |
275 | dmsg($poster) if DEBUG; |
231 | 276 | ||
232 | ## seen this one before?
|
277 | ## seen this one before?
|
233 | if ( !defined( $data{$poster} ) ) |
278 | if ( !defined( $data{$poster} ) ) |
234 | {
|
279 | {
|
235 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
280 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
236 | $data{$poster}{'orig'} = 0; |
281 | $data{$poster}{'orig'} = 0; |
237 | $data{$poster}{'quoted'} = 0; |
282 | $data{$poster}{'quoted'} = 0; |
238 | }
|
283 | }
|
239 | $data{$poster}{'count'}++; # bump count for this poster |
284 | $data{$poster}{'count'}++; # bump count for this poster |
240 | $data{$poster}{'size'} += $filesize; # total size of file |
285 | $data{$poster}{'size'} += $filesize; # total size of file |
241 | 286 | ||
242 | ## The User-Agent and/or X-Newsreader fields
|
287 | ## The User-Agent and/or X-Newsreader fields
|
243 | ## for User-Agent by poster
|
288 | ## for User-Agent by poster
|
244 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
289 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
245 | if ( defined $ua ) |
290 | if ( defined $ua ) |
246 | {
|
291 | {
|
247 | $data{$poster}{'agent'} = $ua; |
292 | $data{$poster}{'agent'} = $ua; |
248 | 293 | ||
249 | ## DEBUG
|
294 | ## DEBUG
|
250 | dmsg($ua) if DEBUG; |
295 | dmsg($ua) if DEBUG; |
251 | }
|
296 | }
|
252 | 297 | ||
253 | ## The User Agent for User-Agent by number of posts
|
298 | ## The User Agent for User-Agent by number of posts
|
254 | get_agent($msg); |
299 | get_agent($msg); |
255 | 300 | ||
256 | ## Get all cross-posted newsgroups
|
301 | ## Get all cross-posted newsgroups
|
257 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
302 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
258 | {
|
303 | {
|
259 | $crossposts{$_}++; # bump count for each |
304 | $crossposts{$_}++; # bump count for each |
260 | }
|
305 | }
|
261 | 306 | ||
262 | ## Get threads
|
307 | ## Get threads
|
263 | my $thread = $msg->study('Subject'); |
308 | my $thread = $msg->study('Subject'); |
264 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
309 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
265 | $thread =~ s/\s+/ /g; # collapse whitespace |
310 | $thread =~ s/\s+/ /g; # collapse whitespace |
266 | $threads{$thread}{'count'}++; # bump count of this subject |
311 | $threads{$thread}{'count'}++; # bump count of this subject |
267 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
312 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
268 | 313 | ||
269 | ## Is this an original post or a reply?
|
314 | ## Is this an original post or a reply?
|
270 | if ( defined $msg->study('References') ) |
315 | if ( defined $msg->study('References') ) |
271 | {
|
316 | {
|
272 | $replies++; |
317 | $replies++; |
273 | }
|
318 | }
|
274 | else
|
319 | else
|
275 | {
|
320 | {
|
276 | $origposts++; |
321 | $origposts++; |
277 | }
|
322 | }
|
278 | 323 | ||
279 | ## Get the time zone
|
324 | ## Get the time zone
|
280 | my $datetime = $datetime_parser->parse_datetime($date); |
325 | my $datetime = $datetime_parser->parse_datetime($date); |
281 | my $tz = $datetime->strftime('%z'); |
326 | my $tz = $datetime->strftime('%z'); |
282 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
327 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
283 | $tz{$tz}++; |
328 | $tz{$tz}++; |
284 | 329 | ||
285 | ## DEBUG
|
330 | ## DEBUG
|
286 | dmsg($tz) if DEBUG; |
331 | dmsg($tz) if DEBUG; |
287 | 332 | ||
288 | #### Now analyse the body text ####
|
333 | #### Now analyse the body text ####
|
289 | my $body = $msg->body(); |
334 | my $body = $msg->body(); |
290 | 335 | ||
291 | my $insig = 0; |
336 | my $insig = 0; |
292 | my @body = $body->lines; |
337 | my @body = $body->lines; |
293 | for (@body) |
338 | for (@body) |
294 | {
|
339 | {
|
295 | $totbody += length($_); # bump total body size |
340 | $totbody += length($_); # bump total body size |
296 | next if (m{^$>}o); # don't count blank lines in body |
341 | next if (m{^$>}o); # don't count blank lines in body |
297 | if ( $insig == 1 ) |
342 | if ( $insig == 1 ) |
298 | {
|
343 | {
|
299 | 344 | ||
300 | # bump total sig size
|
345 | # bump total sig size
|
301 | $totsig += length($_); |
346 | $totsig += length($_); |
302 | }
|
347 | }
|
303 | ## are we in a quote line?
|
348 | ## are we in a quote line?
|
304 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
349 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
305 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
350 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
306 | {
|
351 | {
|
307 | ## bump count of quoted chrs
|
352 | ## bump count of quoted chrs
|
308 | $data{$poster}{'quoted'} += length($_); |
353 | $data{$poster}{'quoted'} += length($_); |
309 | $totquoted += length($_); |
354 | $totquoted += length($_); |
310 | }
|
355 | }
|
311 | elsif (/^-- $/) |
356 | elsif (/^-- $/) |
312 | {
|
357 | {
|
313 | $insig = 1; |
358 | $insig = 1; |
314 | }
|
359 | }
|
315 | else
|
360 | else
|
316 | {
|
361 | {
|
317 | ## We must be processing an original line
|
362 | ## We must be processing an original line
|
318 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
363 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
319 | $totorig += length($_); |
364 | $totorig += length($_); |
320 | }
|
365 | }
|
321 | }
|
366 | }
|
322 | 367 | ||
323 | # end for (@body)
|
368 | # end for (@body)
|
324 | }
|
369 | }
|
325 | 370 | ||
326 | close($FILE); |
371 | close($FILE); |
327 | }
|
372 | }
|
328 | 373 | ||
329 | sub get_agent
|
374 | sub get_agent
|
330 | {
|
375 | {
|
331 | my $msg = shift; |
376 | my $msg = shift; |
332 | 377 | ||
333 | my $ua = |
378 | my $ua = |
334 | $msg->study('User-Agent') |
379 | $msg->study('User-Agent') |
335 | or $msg->study('X-Newsreader') |
380 | or $msg->study('X-Newsreader') |
336 | or $msg->study('X-Mailer'); |
381 | or $msg->study('X-Mailer'); |
337 | if ( not defined $ua ) |
382 | if ( not defined $ua ) |
338 | {
|
383 | {
|
339 | my $org = $msg->study('Organization'); |
384 | my $org = $msg->study('Organization'); |
340 | if ( defined $org |
385 | if ( defined $org |
341 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
386 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
342 | {
|
387 | {
|
343 | $ua = $org; |
388 | $ua = $org; |
344 | }
|
389 | }
|
345 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
390 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
346 | {
|
391 | {
|
347 | $ua = "Pine"; |
392 | $ua = "Pine"; |
348 | }
|
393 | }
|
349 | }
|
394 | }
|
350 | 395 | ||
351 | ## Hopefully found UA, else set to unknown
|
396 | ## Hopefully found UA, else set to unknown
|
352 | if ( not defined $ua ) |
397 | if ( not defined $ua ) |
353 | {
|
398 | {
|
354 | $ua = __ "unknown"; |
399 | $ua = __ "unknown"; |
355 | }
|
400 | }
|
356 | 401 | ||
357 | $ua = clean($ua); |
402 | $ua = clean($ua); |
358 | 403 | ||
359 | my $raw = $ua; |
404 | my $raw = $ua; |
360 | my $agent = $raw; |
405 | my $agent = $raw; |
361 | 406 | ||
362 | ## strip http
|
407 | ## strip http
|
363 | if ( $raw =~ /.*http.*/ ) |
408 | if ( $raw =~ /.*http.*/ ) |
364 | {
|
409 | {
|
365 | $raw =~ s!posted via!!i; |
410 | $raw =~ s!posted via!!i; |
366 | $raw =~ s!http://!!g; |
411 | $raw =~ s!http://!!g; |
367 | $raw =~ s!/!!g; |
412 | $raw =~ s!/!!g; |
368 | $raw =~ s! !!g; |
413 | $raw =~ s! !!g; |
369 | }
|
414 | }
|
370 | 415 | ||
371 | ## Fix Outlook from Mac
|
416 | ## Fix Outlook from Mac
|
372 | if ( $raw =~ /^microsoft/i ) |
417 | if ( $raw =~ /^microsoft/i ) |
373 | {
|
418 | {
|
374 | $raw =~ s/-/ /g; |
419 | $raw =~ s/-/ /g; |
375 | }
|
420 | }
|
376 | 421 | ||
377 | ## Pick out the popular agents
|
422 | ## Pick out the popular agents
|
378 | if ( $raw =~ /(outlook express)/i |
423 | if ( $raw =~ /(outlook express)/i |
379 | || $raw =~ /(windows mail)/i |
424 | || $raw =~ /(windows mail)/i |
380 | || $raw =~ /(microplanet gravity)/i |
425 | || $raw =~ /(microplanet gravity)/i |
381 | || $raw =~ /(news rover)/i |
426 | || $raw =~ /(news rover)/i |
382 | || $raw =~ /(forte agent)/i |
427 | || $raw =~ /(forte agent)/i |
383 | || $raw =~ /(forte free agent)/i ) |
428 | || $raw =~ /(forte free agent)/i ) |
384 | {
|
429 | {
|
385 | $agent = $1; |
430 | $agent = $1; |
386 | }
|
431 | }
|
387 | elsif ( |
432 | elsif ( |
388 | $raw =~ /^( |
433 | $raw =~ /^( |
389 | pan |
434 | pan |
390 | |sylpheed
|
435 | |sylpheed
|
391 | |slrn
|
436 | |slrn
|
392 | |mozilla
|
437 | |mozilla
|
393 | |knode
|
438 | |knode
|
394 | |tin
|
439 | |tin
|
395 | |hamster
|
440 | |hamster
|
396 | |xrn
|
441 | |xrn
|
397 | |xnews
|
442 | |xnews
|
398 | |aol
|
443 | |aol
|
399 | |gnus
|
444 | |gnus
|
400 | |krn
|
445 | |krn
|
401 | |macsoup
|
446 | |macsoup
|
402 | |messenger
|
447 | |messenger
|
403 | |openxp
|
448 | |openxp
|
404 | |pine
|
449 | |pine
|
405 | |thoth
|
450 | |thoth
|
406 | |turnpike
|
451 | |turnpike
|
407 | |winvn
|
452 | |winvn
|
408 | |vsoup
|
453 | |vsoup
|
409 | |google
|
454 | |google
|
410 | |supernews
|
455 | |supernews
|
411 | |nn
|
456 | |nn
|
412 | |rn
|
457 | |rn
|
413 | |007
|
458 | |007
|
414 | |webtv
|
459 | |webtv
|
415 | |compuserve
|
460 | |compuserve
|
416 | )/ix |
461 | )/ix |
417 | )
|
462 | )
|
418 | {
|
463 | {
|
419 | $agent = $1; |
464 | $agent = $1; |
420 | }
|
465 | }
|
421 | else
|
466 | else
|
422 | {
|
467 | {
|
423 | ## Clean up unknown agents
|
468 | ## Clean up unknown agents
|
424 | if ( $raw =~ m!^(.*?)/! ) |
469 | if ( $raw =~ m!^(.*?)/! ) |
425 | {
|
470 | {
|
426 | $agent = $1; |
471 | $agent = $1; |
427 | }
|
472 | }
|
428 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
473 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
429 | {
|
474 | {
|
430 | $agent = $1; |
475 | $agent = $1; |
431 | }
|
476 | }
|
432 | }
|
477 | }
|
433 | 478 | ||
434 | $distinct_agent{$agent}++; |
479 | $distinct_agent{$agent}++; |
435 | return $agent; |
480 | return $agent; |
436 | }
|
481 | }
|
437 | ## get_agent
|
482 | ## get_agent
|
438 | 483 | ||
439 | #########################################
|
484 | #########################################
|
440 | ## Count the User-Agents used, collapsing
|
485 | ## Count the User-Agents used, collapsing
|
441 | ## different versions into one per agent.
|
486 | ## different versions into one per agent.
|
442 | #########################################
|
487 | #########################################
|
443 | sub count_agents
|
488 | sub count_agents
|
444 | {
|
489 | {
|
445 | POSTER:
|
490 | POSTER:
|
446 | foreach my $poster ( keys %data ) |
491 | foreach my $poster ( keys %data ) |
447 | {
|
492 | {
|
448 | foreach my $agent_name ( keys %distinct_agent ) |
493 | foreach my $agent_name ( keys %distinct_agent ) |
449 | { # check against known ones |
494 | { # check against known ones |
450 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
495 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
451 | {
|
496 | {
|
452 | $agents{$agent_name}++; |
497 | $agents{$agent_name}++; |
453 | next POSTER; |
498 | next POSTER; |
454 | }
|
499 | }
|
455 | }
|
500 | }
|
456 | $agents{ $data{$poster}{'agent'} }++; |
501 | $agents{ $data{$poster}{'agent'} }++; |
457 | }
|
502 | }
|
458 | } # count_agents |
503 | } # count_agents |
459 | 504 | ||
460 | #############################################
|
505 | #############################################
|
461 | ## Set orig/total percentages for all posters
|
506 | ## Set orig/total percentages for all posters
|
462 | #############################################
|
507 | #############################################
|
463 | sub fix_percent
|
508 | sub fix_percent
|
464 | {
|
509 | {
|
465 | foreach my $poster ( keys %data ) |
510 | foreach my $poster ( keys %data ) |
466 | {
|
511 | {
|
467 | my $percent = 100; |
512 | my $percent = 100; |
468 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
513 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
469 | {
|
514 | {
|
470 | $percent = |
515 | $percent = |
471 | $data{$poster}{'orig'} * 100 / |
516 | $data{$poster}{'orig'} * 100 / |
472 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
517 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
473 | }
|
518 | }
|
474 | elsif ( $data{$poster}{'orig'} == 0 ) |
519 | elsif ( $data{$poster}{'orig'} == 0 ) |
475 | {
|
520 | {
|
476 | $percent = 0; |
521 | $percent = 0; |
477 | }
|
522 | }
|
478 | $data{$poster}{'percent'} = $percent; |
523 | $data{$poster}{'percent'} = $percent; |
479 | }
|
524 | }
|
480 | }
|
525 | }
|
481 | ## fix_percent
|
526 | ## fix_percent
|
482 | 527 | ||
483 | ##################################
|
528 | ##################################
|
484 | ## Write data structures to a file
|
529 | ## Write data structures to a file
|
485 | ##################################
|
530 | ##################################
|
486 | sub write_data
|
531 | sub write_data
|
487 | {
|
532 | {
|
488 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
533 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
489 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
534 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
490 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
535 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
491 | print $OUTF |
536 | print $OUTF |
492 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
537 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
493 | foreach my $name ( keys %data ) |
538 | foreach my $name ( keys %data ) |
494 | {
|
539 | {
|
495 | print $OUTF |
540 | print $OUTF |
496 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
541 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
497 | }
|
542 | }
|
498 | print $OUTF |
543 | print $OUTF |
499 | "============================================================================\n"; |
544 | "============================================================================\n"; |
500 | print $OUTF "Thread subjects\n"; |
545 | print $OUTF "Thread subjects\n"; |
501 | print $OUTF |
546 | print $OUTF |
502 | "----------------------------------------------------------------------------\n"; |
547 | "----------------------------------------------------------------------------\n"; |
503 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
548 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
504 | {
|
549 | {
|
505 | print $OUTF |
550 | print $OUTF |
506 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
551 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
507 | }
|
552 | }
|
508 | print $OUTF |
553 | print $OUTF |
509 | "============================================================================\n"; |
554 | "============================================================================\n"; |
510 | print $OUTF "Cross-posts\n"; |
555 | print $OUTF "Cross-posts\n"; |
511 | print $OUTF |
556 | print $OUTF |
512 | "----------------------------------------------------------------------------\n"; |
557 | "----------------------------------------------------------------------------\n"; |
513 | foreach my $name ( sort keys %crossposts ) |
558 | foreach my $name ( sort keys %crossposts ) |
514 | {
|
559 | {
|
515 | print $OUTF "$name : $crossposts{$name}\n"; |
560 | print $OUTF "$name : $crossposts{$name}\n"; |
516 | }
|
561 | }
|
517 | print $OUTF |
562 | print $OUTF |
518 | "============================================================================\n"; |
563 | "============================================================================\n"; |
519 | print $OUTF "User agents\n"; |
564 | print $OUTF "User agents\n"; |
520 | print $OUTF |
565 | print $OUTF |
521 | "----------------------------------------------------------------------------\n"; |
566 | "----------------------------------------------------------------------------\n"; |
522 | foreach my $name ( sort keys %agents ) |
567 | foreach my $name ( sort keys %agents ) |
523 | {
|
568 | {
|
524 | print $OUTF "$name : $agents{$name}\n"; |
569 | print $OUTF "$name : $agents{$name}\n"; |
525 | }
|
570 | }
|
526 | print $OUTF |
571 | print $OUTF |
527 | "============================================================================\n"; |
572 | "============================================================================\n"; |
528 | print $OUTF "Time zones\n"; |
573 | print $OUTF "Time zones\n"; |
529 | print $OUTF |
574 | print $OUTF |
530 | "----------------------------------------------------------------------------\n"; |
575 | "----------------------------------------------------------------------------\n"; |
531 | foreach my $name ( sort keys %tz ) |
576 | foreach my $name ( sort keys %tz ) |
532 | {
|
577 | {
|
533 | print $OUTF "$name : $tz{$name}\n"; |
578 | print $OUTF "$name : $tz{$name}\n"; |
534 | }
|
579 | }
|
535 | close $OUTF; |
580 | close $OUTF; |
536 | } # write_data |
581 | } # write_data |
537 | 582 | ||
538 | sub display_results
|
583 | sub display_results
|
539 | {
|
584 | {
|
540 | #################### DISPLAY RESULTS #####################
|
585 | #################### DISPLAY RESULTS #####################
|
541 | print "=" x 76, "\n"; |
586 | println( "=" x 76 ); |
542 | printf "%s\n", |
587 | printf "%s\n", |
543 | centred(
|
588 | centred(
|
544 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
589 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
545 | 76 ); |
590 | 76 ); |
546 | print "=" x 76, "\n"; |
591 | println( "=" x 76 ); |
547 | printf "%s\n", |
592 | printf "%s\n", |
548 | centred(
|
593 | centred(
|
549 | __ |
594 | __(
|
550 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)", |
595 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
- | 596 | ), |
|
551 | 76
|
597 | 76
|
552 | ); |
598 | ); |
553 | print "\n\n"; |
599 | print "\n\n"; |
554 | printf __"Total posts considered: %s over %d days" . "\n", |
600 | printf __"Total posts considered: %s over %d days\n", |
555 | commify($totalposts), |
601 | $formatter->format_number($totalposts), |
556 | $numdays; |
602 | $formatter->format_number($numdays); |
557 | my $time_locale = setlocale(LC_TIME); |
603 | my $time_locale = setlocale(LC_TIME); |
558 | my $earliest_datetime = DateTime->from_epoch( |
604 | my $earliest_datetime = DateTime->from_epoch( |
559 | epoch => $earliest, |
605 | epoch => $earliest, |
560 | locale => $time_locale, |
606 | locale => $time_locale, |
561 | time_zone => 'UTC', |
607 | time_zone => 'UTC', |
562 | ); |
608 | ); |
563 | my $latest_datetime = DateTime->from_epoch( |
609 | my $latest_datetime = DateTime->from_epoch( |
564 | epoch => $latest, |
610 | epoch => $latest, |
565 | locale => $time_locale, |
611 | locale => $time_locale, |
566 | time_zone => 'UTC', |
612 | time_zone => 'UTC', |
567 | ); |
613 | ); |
568 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
614 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
- | 615 | printf __"Earliest article: %s\n", |
|
569 | printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format); |
616 | $earliest_datetime->strftime($datetime_format); |
- | 617 | printf __"Latest article: %s\n", |
|
570 | printf __"Latest article" . ": %s\n", $latest_datetime->strftime($datetime_format); |
618 | $latest_datetime->strftime($datetime_format); |
571 | printf __"Original articles: %s; replies" . ": %s\n", |
619 | printf __"Original articles: %s; replies: %s\n", |
572 | commify($origposts), |
620 | $formatter->format_number($origposts), |
573 | commify($replies); |
621 | $formatter->format_number($replies); |
574 | printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n", |
622 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
- | 623 | $formatter->format_number($totsize), |
|
575 | commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
624 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
576 | printf __ |
- | |
577 | "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
625 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
578 | commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
626 | $formatter->format_number( int( $totalposts / $numdays ) ), |
- | 627 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
|
579 | commify( int( $totsize / $totalposts ) ); |
628 | $formatter->format_number( int( $totsize / $totalposts ) ); |
- | 629 | ||
580 | my $count = keys %data; |
630 | my $count = keys %data; |
581 | printf __"Total headers: %s KiB; bodies: %s KiB\n", |
631 | printf __"Total headers: %s; bodies: %s\n", |
- | 632 | $formatter->format_bytes( |
|
- | 633 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
|
- | 634 | ), |
|
582 | commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
635 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
583 | printf __ |
636 | printf __ |
584 | "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n", |
637 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
- | 638 | $formatter->format_bytes( |
|
- | 639 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
|
- | 640 | ), |
|
585 | commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
641 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
586 | ( $totorig * 100 ) / ( $totorig + $totquoted ), |
642 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
587 | commify( int( $totsig / 1024 ) ); |
643 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
588 | printf __"Total number of posters: %s, average %s bytes per poster\n", |
644 | printf __"Total number of posters: %s, average %s per poster\n", |
- | 645 | $formatter->format_number($count), |
|
589 | commify($count), commify( int( $totsize / $count ) ); #/ |
646 | $formatter->format_bytes( $totsize / $count, |
- | 647 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
|
590 | $count = keys %threads; |
648 | $count = keys %threads; |
591 | printf __"Total number of threads: %s, average %s bytes per thread\n", |
649 | printf __"Total number of threads: %s, average %s per thread\n", |
- | 650 | $formatter->format_number($count), |
|
592 | commify($count), commify( int( $totsize / $count ) ); #/ |
651 | $formatter->format_bytes( $totsize / $count, |
- | 652 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
|
593 | printf __"Total number of user agents: %d\n", scalar keys %agents; |
653 | printf __"Total number of user agents: %d\n", |
- | 654 | $formatter->format_number( scalar keys %agents ); |
|
594 | print "\n", "=" x 76, "\n"; |
655 | print "\n", "=" x 76, "\n" ; |
595 | - | ||
596 | ########################################
|
656 | ########################################
|
597 | ## Show posters by article count Sec 1;
|
657 | ## Show posters by article count Sec 1;
|
598 | ########################################
|
658 | ########################################
|
599 | unless ( $skipSec{1} ) |
659 | unless ( $skipSec{1} ) |
600 | {
|
660 | {
|
601 | if ( keys %data < $topposters ) |
661 | if ( keys %data < $topposters ) |
602 | {
|
662 | {
|
603 | $count = keys %data; |
663 | $count = keys %data; |
604 | }
|
664 | }
|
605 | else
|
665 | else
|
606 | {
|
666 | {
|
607 | $count = $topposters; |
667 | $count = $topposters; |
608 | }
|
668 | }
|
609 | printf "%s\n", |
669 | printf "%s\n", |
610 | centred(
|
670 | centred(
|
611 | __x( "Top {count} posters by number of articles", count => $topposters ), 76 ); |
671 | __x( "Top {count} posters by number of articles", count => $topposters ), |
- | 672 | 76
|
|
- | 673 | ); |
|
612 | print "=" x 76, "\n"; |
674 | print "=" x 76, "\n"; |
613 | my $i = 0; |
675 | my $i = 0; |
614 | foreach
|
676 | foreach
|
615 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
677 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
616 | {
|
678 | {
|
617 | my $name = substr( $poster, 0, 65 ); |
679 | my $name = substr( $poster, 0, 65 ); |
618 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
680 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
619 | $data{$poster}{count}; |
681 | $data{$poster}{count}; |
620 | last if ( ++$i == $count ); |
682 | last if ( ++$i == $count ); |
621 | }
|
683 | }
|
622 | print "\n", "=" x 76, "\n"; |
684 | print "\n", "=" x 76, "\n"; |
623 | }
|
685 | }
|
624 | 686 | ||
625 | ######################################
|
687 | ######################################
|
626 | ## Show posters by size in KiB Sec 2;
|
688 | ## Show posters by size in KiB Sec 2;
|
627 | ######################################
|
689 | ######################################
|
628 | unless ( $skipSec{2} ) |
690 | unless ( $skipSec{2} ) |
629 | {
|
691 | {
|
630 | if ( keys %data < $topposters ) |
692 | if ( keys %data < $topposters ) |
631 | {
|
693 | {
|
632 | $count = keys %data; |
694 | $count = keys %data; |
633 | }
|
695 | }
|
634 | else
|
696 | else
|
635 | {
|
697 | {
|
636 | $count = $topposters; |
698 | $count = $topposters; |
637 | }
|
699 | }
|
638 | printf "%s\n", |
700 | printf "%s\n", |
639 | centred(
|
701 | centred(
|
640 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
702 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
- | 703 | 76
|
|
641 | 76 ); |
704 | ); |
642 | print "=" x 76, "\n"; |
705 | print "=" x 76, "\n"; |
643 | my $i = 0; |
706 | my $i = 0; |
644 | foreach
|
707 | foreach
|
645 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
708 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
646 | {
|
709 | {
|
647 | my $name = substr( $poster, 0, 62 ); |
710 | my $name = substr( $poster, 0, 62 ); |
648 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
711 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
649 | $data{$poster}{size} / 1024; #/ |
712 | $data{$poster}{size} / 1024; #/ |
650 | last if ( ++$i == $count ); |
713 | last if ( ++$i == $count ); |
651 | }
|
714 | }
|
652 | print "\n", "=" x 76, "\n"; |
715 | print "\n", "=" x 76, "\n"; |
653 | }
|
716 | }
|
654 | 717 | ||
655 | #####################################
|
718 | #####################################
|
656 | ## Show top posters for original text
|
719 | ## Show top posters for original text
|
657 | #####################################
|
720 | #####################################
|
658 | unless ( $skipSec{3} ) |
721 | unless ( $skipSec{3} ) |
659 | {
|
722 | {
|
660 | if ( keys %data < $topposters ) |
723 | if ( keys %data < $topposters ) |
661 | {
|
724 | {
|
662 | $count = keys %data; |
725 | $count = keys %data; |
663 | }
|
726 | }
|
664 | else
|
727 | else
|
665 | {
|
728 | {
|
666 | $count = $topposters; |
729 | $count = $topposters; |
667 | }
|
730 | }
|
668 | printf "%s\n", |
731 | printf "%s\n", |
669 | centred(
|
732 | centred(
|
670 | __x(
|
733 | __x(
|
671 | "Top {count} responders by original text (> 5 posts)", |
734 | "Top {count} responders by original text (> 5 posts)", |
672 | count => $topposters |
735 | count => $topposters |
673 | ), |
736 | ), |
674 | 76
|
737 | 76
|
675 | ); |
738 | ); |
676 | print "=" x 76, "\n"; |
739 | print "=" x 76, "\n"; |
677 | my $i = 0; |
740 | my $i = 0; |
678 | foreach my $poster ( |
741 | foreach my $poster ( |
679 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
742 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
680 | keys %data |
743 | keys %data |
681 | )
|
744 | )
|
682 | {
|
745 | {
|
683 | next if $data{$poster}{quoted} == 0; |
746 | next if $data{$poster}{quoted} == 0; |
684 | next if $data{$poster}{count} < 5; |
747 | next if $data{$poster}{count} < 5; |
685 | my $name = substr( $poster, 0, 63 ); |
748 | my $name = substr( $poster, 0, 63 ); |
686 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
749 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
687 | $data{$poster}{percent}; |
750 | $data{$poster}{percent}; |
688 | last if ( ++$i == $count ); |
751 | last if ( ++$i == $count ); |
689 | }
|
752 | }
|
690 | print "\n", "=" x 76, "\n"; |
753 | print "\n", "=" x 76, "\n"; |
691 | }
|
754 | }
|
692 | 755 | ||
693 | ########################################
|
756 | ########################################
|
694 | ## Show bottom posters for original text
|
757 | ## Show bottom posters for original text
|
695 | ########################################
|
758 | ########################################
|
696 | unless ( $skipSec{4} ) |
759 | unless ( $skipSec{4} ) |
697 | {
|
760 | {
|
698 | if ( keys %data < $topposters ) |
761 | if ( keys %data < $topposters ) |
699 | {
|
762 | {
|
700 | $count = keys %data; |
763 | $count = keys %data; |
701 | }
|
764 | }
|
702 | else
|
765 | else
|
703 | {
|
766 | {
|
704 | $count = $topposters; |
767 | $count = $topposters; |
705 | }
|
768 | }
|
706 | printf "%s\n", |
769 | printf "%s\n", |
707 | centred(
|
770 | centred(
|
708 | __x(
|
771 | __x(
|
709 | "Bottom {count} responders by original text (> 5 posts)", |
772 | "Bottom {count} responders by original text (> 5 posts)", |
710 | count => $topposters |
773 | count => $topposters |
711 | ), |
774 | ), |
712 | 76
|
775 | 76
|
713 | ); |
776 | ); |
714 | print "=" x 76, "\n"; |
777 | print "=" x 76, "\n"; |
715 | my $i = 0; |
778 | my $i = 0; |
716 | foreach my $poster ( |
779 | foreach my $poster ( |
717 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
780 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
718 | keys %data |
781 | keys %data |
719 | )
|
782 | )
|
720 | {
|
783 | {
|
721 | next if $data{$poster}{quoted} == 0; |
784 | next if $data{$poster}{quoted} == 0; |
722 | next if $data{$poster}{count} < 5; |
785 | next if $data{$poster}{count} < 5; |
723 | my $name = substr( $poster, 0, 63 ); |
786 | my $name = substr( $poster, 0, 63 ); |
724 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
787 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
725 | $data{$poster}{percent}; |
788 | $data{$poster}{percent}; |
726 | last if ( ++$i == $count ); |
789 | last if ( ++$i == $count ); |
727 | }
|
790 | }
|
728 | print "\n", "=" x 76, "\n"; |
791 | print "\n", "=" x 76, "\n"; |
729 | }
|
792 | }
|
730 | 793 | ||
731 | #####################################
|
794 | #####################################
|
732 | ## Show threads by number of articles
|
795 | ## Show threads by number of articles
|
733 | #####################################
|
796 | #####################################
|
734 | unless ( $skipSec{5} ) |
797 | unless ( $skipSec{5} ) |
735 | {
|
798 | {
|
736 | if ( keys %threads < $topthreads ) |
799 | if ( keys %threads < $topthreads ) |
737 | {
|
800 | {
|
738 | $count = keys %threads; |
801 | $count = keys %threads; |
739 | }
|
802 | }
|
740 | else
|
803 | else
|
741 | {
|
804 | {
|
742 | $count = $topthreads; |
805 | $count = $topthreads; |
743 | }
|
806 | }
|
744 | printf "%s\n", |
807 | printf "%s\n", |
- | 808 | centred(
|
|
745 | centred( __x( "Top {count} threads by no. of articles", count => $topthreads ), |
809 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
746 | 76 ); |
810 | 76 ); |
747 | print "=" x 76, "\n"; |
811 | print "=" x 76, "\n"; |
748 | my $i = 0; |
812 | my $i = 0; |
749 | foreach my $thread ( |
813 | foreach my $thread ( |
750 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
814 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
751 | keys %threads |
815 | keys %threads |
752 | )
|
816 | )
|
753 | {
|
817 | {
|
754 | my $name = substr( $thread, 0, 65 ); |
818 | my $name = substr( $thread, 0, 65 ); |
755 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
819 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
756 | $threads{$thread}{'count'}; |
820 | $threads{$thread}{'count'}; |
757 | last if ( ++$i == $count ); |
821 | last if ( ++$i == $count ); |
758 | }
|
822 | }
|
759 | print "\n", "=" x 76, "\n"; |
823 | print "\n", "=" x 76, "\n"; |
760 | }
|
824 | }
|
761 | 825 | ||
762 | ##############################
|
826 | ##############################
|
763 | ## Show threads by size in KiB
|
827 | ## Show threads by size in KiB
|
764 | ##############################
|
828 | ##############################
|
765 | unless ( $skipSec{6} ) |
829 | unless ( $skipSec{6} ) |
766 | {
|
830 | {
|
767 | if ( keys %threads < $topthreads ) |
831 | if ( keys %threads < $topthreads ) |
768 | {
|
832 | {
|
769 | $count = keys %threads; |
833 | $count = keys %threads; |
770 | }
|
834 | }
|
771 | else
|
835 | else
|
772 | {
|
836 | {
|
773 | $count = $topthreads; |
837 | $count = $topthreads; |
774 | }
|
838 | }
|
775 | printf "%s\n", |
839 | printf "%s\n", |
- | 840 | centred(
|
|
776 | centred( __x( "Top {count} threads by size in KiB", count => $topthreads ), |
841 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
777 | 76 ); |
- | |
778 | print "=" x 76, "\n"; |
842 | print "=" x 76, "\n"; |
779 | my $i = 0; |
843 | my $i = 0; |
780 | foreach my $thread ( |
844 | foreach my $thread ( |
781 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
845 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
782 | keys %threads |
846 | keys %threads |
783 | )
|
847 | )
|
784 | {
|
848 | {
|
785 | my $name = substr( $thread, 0, 65 ); |
849 | my $name = substr( $thread, 0, 65 ); |
786 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
850 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
787 | $threads{$thread}{'size'} / 1024; #/ |
851 | $threads{$thread}{'size'} / 1024; #/ |
788 | last if ( ++$i == $count ); |
852 | last if ( ++$i == $count ); |
789 | }
|
853 | }
|
790 | print "\n", "=" x 76, "\n"; |
854 | print "\n", "=" x 76, "\n"; |
791 | }
|
855 | }
|
792 | 856 | ||
793 | ##################################
|
857 | ##################################
|
794 | ## Show top 10 cross-posted groups
|
858 | ## Show top 10 cross-posted groups
|
795 | ##################################
|
859 | ##################################
|
796 | unless ( $skipSec{7} ) |
860 | unless ( $skipSec{7} ) |
797 | {
|
861 | {
|
798 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
862 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
799 | if ( keys %crossposts < $topcrossposts ) |
863 | if ( keys %crossposts < $topcrossposts ) |
800 | {
|
864 | {
|
801 | $count = keys %crossposts; |
865 | $count = keys %crossposts; |
802 | }
|
866 | }
|
803 | else
|
867 | else
|
804 | {
|
868 | {
|
805 | $count = $topcrossposts; |
869 | $count = $topcrossposts; |
806 | }
|
870 | }
|
807 | printf "%s\n", |
871 | printf "%s\n", |
- | 872 | centred(
|
|
808 | centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
873 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
809 | print "=" x 76, "\n"; |
874 | print "=" x 76, "\n"; |
810 | my $i = 0; |
875 | my $i = 0; |
811 | foreach
|
876 | foreach
|
812 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
877 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
813 | {
|
878 | {
|
814 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
879 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
815 | $crossposts{$name}; |
880 | $crossposts{$name}; |
816 | last if ( ++$i == $count ); |
881 | last if ( ++$i == $count ); |
817 | }
|
882 | }
|
818 | print "\n", "=" x 76, "\n"; |
883 | print "\n", "=" x 76, "\n"; |
819 | }
|
884 | }
|
820 | 885 | ||
821 | #########################
|
886 | #########################
|
822 | ## Show agents and counts
|
887 | ## Show agents and counts
|
823 | #########################
|
888 | #########################
|
824 | unless ( $skipSec{8} ) |
889 | unless ( $skipSec{8} ) |
825 | {
|
890 | {
|
826 | if ( keys %agents < $topagents ) |
891 | if ( keys %agents < $topagents ) |
827 | {
|
892 | {
|
828 | $count = keys %agents; |
893 | $count = keys %agents; |
829 | }
|
894 | }
|
830 | else
|
895 | else
|
831 | {
|
896 | {
|
832 | $count = $topagents; |
897 | $count = $topagents; |
833 | }
|
898 | }
|
834 | printf "%s\n", |
899 | printf "%s\n", |
835 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
900 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
836 | 76 ); |
901 | 76 ); |
837 | print "=" x 76, "\n"; |
902 | print "=" x 76, "\n"; |
838 | my $i = 0; |
903 | my $i = 0; |
839 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
904 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
840 | {
|
905 | {
|
841 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
906 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
842 | $agents{$agent}; |
907 | $agents{$agent}; |
843 | last if ( ++$i == $count ); |
908 | last if ( ++$i == $count ); |
844 | }
|
909 | }
|
845 | print "\n", "=" x 76, "\n"; |
910 | print "\n", "=" x 76, "\n"; |
846 | }
|
911 | }
|
847 | 912 | ||
848 | #######################
|
913 | #######################
|
849 | ## Show distinct agents
|
914 | ## Show distinct agents
|
850 | #######################
|
915 | #######################
|
851 | unless ( $skipSec{9} ) |
916 | unless ( $skipSec{9} ) |
852 | {
|
917 | {
|
853 | if ( keys %distinct_agent < $topagents ) |
918 | if ( keys %distinct_agent < $topagents ) |
854 | {
|
919 | {
|
855 | $count = keys %distinct_agent; |
920 | $count = keys %distinct_agent; |
856 | }
|
921 | }
|
857 | else
|
922 | else
|
858 | {
|
923 | {
|
859 | $count = $topagents; |
924 | $count = $topagents; |
860 | }
|
925 | }
|
861 | printf "%s\n", |
926 | printf "%s\n", |
862 | centred(
|
927 | centred(
|
863 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
928 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
- | 929 | 76
|
|
864 | 76 ); |
930 | ); |
865 | print "=" x 76, "\n"; |
931 | print "=" x 76, "\n"; |
866 | my $i = 0; |
932 | my $i = 0; |
867 | foreach my $agent ( |
933 | foreach my $agent ( |
868 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
934 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
869 | keys %distinct_agent |
935 | keys %distinct_agent |
870 | )
|
936 | )
|
871 | {
|
937 | {
|
872 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
938 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
873 | $distinct_agent{$agent}, |
939 | $distinct_agent{$agent}, |
874 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
940 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
875 | last if ( ++$i == $count ); |
941 | last if ( ++$i == $count ); |
876 | }
|
942 | }
|
877 | print "\n", "=" x 76, "\n"; |
943 | print "\n", "=" x 76, "\n"; |
878 | }
|
944 | }
|
879 | 945 | ||
880 | ############################
|
946 | ############################
|
881 | ## Show timezones and counts
|
947 | ## Show timezones and counts
|
882 | ############################
|
948 | ############################
|
883 | unless ( $skipSec{10} ) |
949 | unless ( $skipSec{10} ) |
884 | {
|
950 | {
|
885 | if ( keys %tz < $toptz ) |
951 | if ( keys %tz < $toptz ) |
886 | {
|
952 | {
|
887 | $count = keys %tz; |
953 | $count = keys %tz; |
888 | }
|
954 | }
|
889 | else
|
955 | else
|
890 | {
|
956 | {
|
891 | $count = $toptz; |
957 | $count = $toptz; |
892 | }
|
958 | }
|
- | 959 | printf "%s\n", |
|
893 | printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 ); |
960 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
894 | print "=" x 76, "\n"; |
961 | print "=" x 76, "\n"; |
895 | my $i = 0; |
962 | my $i = 0; |
896 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
963 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
897 | {
|
964 | {
|
898 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
965 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
899 | last if ( ++$i == $count ); |
966 | last if ( ++$i == $count ); |
900 | }
|
967 | }
|
901 | print "\n", "=" x 76, "\n"; |
968 | print "\n", "=" x 76, "\n"; |
902 | }
|
969 | }
|
903 | }
|
970 | }
|
904 | 971 | ||
905 | ## helper subs
|
972 | ## helper subs
|
906 | 973 | ||
907 | ###############################
|
974 | ###############################
|
908 | ## Right pad a string with '.'s
|
975 | ## Right pad a string with '.'s
|
909 | ###############################
|
976 | ###############################
|
910 | sub rpad
|
977 | sub rpad
|
911 | {
|
978 | {
|
912 | ## Get text to pad, length to pad, pad chr
|
979 | ## Get text to pad, length to pad, pad chr
|
913 | my ( $text, $pad_len, $pad_chr ) = @_; |
980 | my ( $text, $pad_len, $pad_chr ) = @_; |
914 | 981 | ||
915 | ## DEBUG
|
982 | ## DEBUG
|
916 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
983 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
917 | 984 | ||
918 | if ( length($text) > $pad_len ) |
985 | if ( length($text) > $pad_len ) |
919 | {
|
986 | {
|
920 | $text = substr( $text, 0, $pad_len ); |
987 | $text = substr( $text, 0, $pad_len ); |
921 | }
|
988 | }
|
922 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
989 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
923 | return $padded; |
990 | return $padded; |
924 | }
|
991 | }
|
925 | 992 | ||
926 | ##################
|
993 | ##################
|
927 | ## Centre a string
|
994 | ## Centre a string
|
928 | ##################
|
995 | ##################
|
929 | sub centred
|
996 | sub centred
|
930 | {
|
997 | {
|
931 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
998 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
932 | my $pad_len = ( $width - length($text) ) / 2; #/ |
999 | my $pad_len = ( $width - length($text) ) / 2; #/ |
933 | my $centred = " " x $pad_len . $text; |
1000 | my $centred = " " x $pad_len . $text; |
934 | return $centred; |
1001 | return $centred; |
935 | }
|
1002 | }
|
936 | 1003 | ||
937 | ###########################
|
1004 | ###########################
|
938 | ## Put commas into a number
|
1005 | ## Put commas into a number
|
939 | ###########################
|
1006 | ###########################
|
940 | # Get some of locale's numeric formatting parameters
|
- | |
941 | my ($thousands_sep, $grouping) = |
- | |
942 | @{localeconv()}{'thousands_sep', 'grouping'}; |
- | |
943 | # Apply defaults if values are missing
|
- | |
944 | $thousands_sep = ',' unless $thousands_sep; |
- | |
945 | # grouping and mon_grouping are packed lists
|
- | |
946 | # of small integers (characters) telling the
|
- | |
947 | # grouping (thousand_seps and mon_thousand_seps
|
- | |
948 | # being the group dividers) of numbers and
|
- | |
949 | # monetary quantities. The integers' meanings:
|
- | |
950 | # 255 means no more grouping, 0 means repeat
|
- | |
951 | # the previous grouping, 1-254 means use that
|
- | |
952 | # as the current grouping. Grouping goes from
|
- | |
953 | # right to left (low to high digits). In the
|
- | |
954 | # below we cheat slightly by never using anything
|
- | |
955 | # else than the first grouping (whatever that is).
|
- | |
956 | my @grouping; |
- | |
957 | if ($grouping) { |
- | |
958 | @grouping = unpack("C*", $grouping); |
- | |
959 | } else { |
- | |
960 | @grouping = (3); |
- | |
961 | }
|
- | |
962 | - | ||
963 | sub commify
|
1007 | sub commify
|
964 | {
|
1008 | {
|
965 | local $_ = shift; |
1009 | local $_ = shift; |
966 | #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
|
1010 | my $number = $_; |
967 | $_ = int; # Chop non-integer part |
1011 | $_ = int; # Chop non-integer part |
968 | 1 while |
1012 | 1 while |
969 | s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/; |
1013 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
- | 1014 | my $int_part = $_; |
|
- | 1015 | my $real_part = ''; |
|
- | 1016 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
|
- | 1017 | {
|
|
970 | return $_; |
1018 | $real_part = $1; |
- | 1019 | }
|
|
- | 1020 | return $int_part . $real_part; |
|
971 | }
|
1021 | }
|
972 | 1022 | ||
973 | ################################################################
|
1023 | ################################################################
|
974 | ## Returns a string with leading and trailing whitespace removed
|
1024 | ## Returns a string with leading and trailing whitespace removed
|
975 | ################################################################
|
1025 | ################################################################
|
976 | sub clean
|
1026 | sub clean
|
977 | {
|
1027 | {
|
978 | my $dirty = shift; |
1028 | my $dirty = shift; |
979 | my $clean = $dirty; |
1029 | my $clean = $dirty; |
980 | $clean =~ s/^\s+|\s+$//g; |
1030 | $clean =~ s/^\s+|\s+$//g; |
981 | 1031 | ||
982 | return $clean; |
1032 | return $clean; |
983 | }
|
1033 | }
|
984 | 1034 | ||
985 | sub usage
|
1035 | sub usage
|
986 | {
|
1036 | {
|
987 | print __"usage: newsstat.pl NEWS.GROUP\n"; |
1037 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
988 | exit 1; |
1038 | exit 1; |
989 | }
|
1039 | }
|
990 | 1040 | ||
991 | sub dmsg
|
1041 | sub dmsg
|
992 | {
|
1042 | {
|
993 | print STDERR @_, "\n"; |
1043 | print STDERR @_, "\n"; |
994 | }
|
1044 | }
|
995 | 1045 | ||
996 | sub dmsg2
|
1046 | sub dmsg2
|
997 | {
|
1047 | {
|
998 | my ( $level, @msg ) = @_; |
1048 | my ( $level, @msg ) = @_; |
999 | print STDERR @msg, "\n" if $level >= DEBUG; |
1049 | print STDERR @msg, "\n" if $level >= DEBUG; |
1000 | }
|
1050 | }
|
- | 1051 | ||
- | 1052 | sub println
|
|
- | 1053 | {
|
|
- | 1054 | print @_, "\n"; |
|
- | 1055 | }
|
|
1001 | 1056 |