Rev 30 | Rev 33 | Go to most recent revision | Only display areas with differences | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 30 | Rev 31 | ||
|---|---|---|---|
| 1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
| 2 | use strict; |
2 | use strict; |
| 3 | use warnings; |
3 | use warnings; |
| 4 | require 5.004; |
4 | require 5.004; |
| 5 | 5 | ||
| 6 | #use diagnostics;
|
6 | #use diagnostics;
|
| 7 | use utf8; |
7 | use utf8; |
| 8 | 8 | ||
| 9 | ## NOTE:
|
9 | ## NOTE:
|
| 10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped
|
10 | ## Enable and remove binmode when utf8::all has actually become lexically scoped
|
| 11 | # use utf8:all;
|
11 | # use utf8:all;
|
| 12 | 12 | ||
| 13 | use constant DEBUG => 0; |
13 | use constant DEBUG => 0; |
| 14 | 14 | ||
| 15 | ## newsstat.pl
|
15 | ## newsstat.pl
|
| 16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
16 | ## Copyright (C) 2011, 2012 Thomas Lahn <startrek@PointedEars.de>
|
| 17 | ## Based on work by Garry Knight et al.
|
17 | ## Based on work by Garry Knight et al.
|
| 18 | ##
|
18 | ##
|
| 19 | ## This program is free software: you can redistribute it and/or modify
|
19 | ## This program is free software: you can redistribute it and/or modify
|
| 20 | ## it under the terms of the GNU General Public License as published by
|
20 | ## it under the terms of the GNU General Public License as published by
|
| 21 | ## the Free Software Foundation, either version 3 of the License, or
|
21 | ## the Free Software Foundation, either version 3 of the License, or
|
| 22 | ## (at your option) any later version.
|
22 | ## (at your option) any later version.
|
| 23 | ##
|
23 | ##
|
| 24 | ## This program is distributed in the hope that it will be useful,
|
24 | ## This program is distributed in the hope that it will be useful,
|
| 25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
25 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
26 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 27 | ## GNU General Public License for more details.
|
27 | ## GNU General Public License for more details.
|
| 28 | ##
|
28 | ##
|
| 29 | ## You should have received a copy of the GNU General Public License
|
29 | ## You should have received a copy of the GNU General Public License
|
| 30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
30 | ## along with this program. If not, see <http://www.gnu.org/licenses/>.
|
| 31 | 31 | ||
| 32 | ## Print out all text to STDOUT UTF-8 encoded
|
32 | ## Print out all text to STDOUT UTF-8 encoded
|
| 33 | binmode STDOUT, ':encoding(UTF-8)'; |
33 | binmode STDOUT, ':encoding(UTF-8)'; |
| 34 | binmode STDERR, ':encoding(UTF-8)'; |
34 | binmode STDERR, ':encoding(UTF-8)'; |
| 35 | 35 | ||
| 36 | ## L10n
|
36 | ## L10n
|
| 37 | use locale ':not_characters'; |
37 | use locale ':not_characters'; |
| 38 | 38 | ||
| 39 | # setlocale( LC_MESSAGES, '' );
|
39 | # setlocale( LC_MESSAGES, '' );
|
| 40 | require Number::Format; |
40 | require Number::Format; |
| 41 | 41 | ||
| 42 | ## i18n
|
42 | ## i18n
|
| 43 | ## FIXME: Automatically include resolved '.' in @INC
|
43 | ## FIXME: Automatically include resolved '.' in @INC
|
| 44 | # print join "\n", @INC;
|
44 | # print join "\n", @INC;
|
| 45 | 45 | ||
| 46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
46 | use Locale::TextDomain ('de.pointedears.newsstat'); |
| 47 | use POSIX ('locale_h'); |
47 | use POSIX ('locale_h'); |
| 48 | use Locale::Messages qw (bind_textdomain_filter |
48 | use Locale::Messages qw (bind_textdomain_filter |
| 49 | bind_textdomain_codeset |
49 | bind_textdomain_codeset |
| 50 | turn_utf_8_on); |
50 | turn_utf_8_on); |
| 51 | 51 | ||
| 52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
52 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
| 53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
53 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
| 54 | 54 | ||
| 55 | require Mail::Message; |
55 | require Mail::Message; |
| 56 | require DateTime; |
56 | require DateTime; |
| 57 | require DateTime::Format::Mail; |
57 | require DateTime::Format::Mail; |
| 58 | 58 | ||
| 59 | # See comments in previous example
|
59 | # See comments in previous example
|
| 60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
60 | my ( $thousands_sep, $mon_thousands_sep, $grouping, $decimal_point ) = |
| 61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
61 | @{ localeconv() }{ 'thousands_sep', 'mon_thousands_sep', 'grouping', |
| 62 | 'decimal_point' }; |
62 | 'decimal_point' }; |
| 63 | 63 | ||
| 64 | # Apply defaults if values are missing
|
64 | # Apply defaults if values are missing
|
| 65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
65 | $thousands_sep = $mon_thousands_sep unless $thousands_sep; |
| 66 | $thousands_sep = ' ' unless $thousands_sep; |
66 | $thousands_sep = ' ' unless $thousands_sep; |
| 67 | 67 | ||
| 68 | # grouping and mon_grouping are packed lists
|
68 | # grouping and mon_grouping are packed lists
|
| 69 | # of small integers (characters) telling the
|
69 | # of small integers (characters) telling the
|
| 70 | # grouping (thousand_seps and mon_thousand_seps
|
70 | # grouping (thousand_seps and mon_thousand_seps
|
| 71 | # being the group dividers) of numbers and
|
71 | # being the group dividers) of numbers and
|
| 72 | # monetary quantities. The integers' meanings:
|
72 | # monetary quantities. The integers' meanings:
|
| 73 | # 255 means no more grouping, 0 means repeat
|
73 | # 255 means no more grouping, 0 means repeat
|
| 74 | # the previous grouping, 1-254 means use that
|
74 | # the previous grouping, 1-254 means use that
|
| 75 | # as the current grouping. Grouping goes from
|
75 | # as the current grouping. Grouping goes from
|
| 76 | # right to left (low to high digits). In the
|
76 | # right to left (low to high digits). In the
|
| 77 | # below we cheat slightly by never using anything
|
77 | # below we cheat slightly by never using anything
|
| 78 | # else than the first grouping (whatever that is).
|
78 | # else than the first grouping (whatever that is).
|
| 79 | my @grouping; |
79 | my @grouping; |
| 80 | if ($grouping) |
80 | if ($grouping) |
| 81 | {
|
81 | {
|
| 82 | @grouping = unpack( "C*", $grouping ); |
82 | @grouping = unpack( "C*", $grouping ); |
| 83 | }
|
83 | }
|
| 84 | else
|
84 | else
|
| 85 | {
|
85 | {
|
| 86 | @grouping = (3); |
86 | @grouping = (3); |
| 87 | }
|
87 | }
|
| 88 | 88 | ||
| 89 | ## FIXME: Why don't the defaults work already?
|
89 | ## FIXME: Why don't the defaults work already?
|
| 90 | my $formatter = new Number::Format( |
90 | my $formatter = new Number::Format( |
| 91 | -decimal_point => $decimal_point, |
91 | -decimal_point => $decimal_point, |
| 92 | -thousands_sep => $thousands_sep, |
92 | -thousands_sep => $thousands_sep, |
| 93 | 93 | ||
| 94 | # -grouping => $grouping[0]
|
94 | # -grouping => $grouping[0]
|
| 95 | ); |
95 | ); |
| 96 | 96 | ||
| 97 | ###################### USER CONFIGURATIONS ############################
|
97 | ###################### USER CONFIGURATIONS ############################
|
| 98 | 98 | ||
| 99 | ## The name of the group to do stats for
|
99 | ## The name of the group to do stats for
|
| 100 | my $newsgroup_name = $ARGV[0]; |
100 | my $newsgroup_name = $ARGV[0]; |
| 101 | $newsgroup_name or usage(); |
101 | $newsgroup_name // usage(); |
| 102 | 102 | ||
| 103 | ## Check for removal flags
|
103 | ## Check for removal flags
|
| 104 | my $ix; |
104 | my $ix; |
| 105 | my $j; |
105 | my $j; |
| 106 | my %skipSec; |
106 | my %skipSec; |
| 107 | my @skiplist; |
107 | my @skiplist; |
| 108 | my $args = @ARGV; |
108 | my $args = @ARGV; |
| 109 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
109 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
| 110 | {
|
110 | {
|
| 111 | $j = $ix + 1; |
111 | $j = $ix + 1; |
| 112 | if ( $ARGV[$ix] eq "-x" ) |
112 | if ( $ARGV[$ix] eq "-x" ) |
| 113 | {
|
113 | {
|
| 114 | @skiplist = split( ",", $ARGV[$j] ); |
114 | @skiplist = split( ",", $ARGV[$j] ); |
| 115 | }
|
115 | }
|
| 116 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
116 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
| 117 | {
|
117 | {
|
| 118 | @skiplist = split( ",", $1 ); |
118 | @skiplist = split( ",", $1 ); |
| 119 | }
|
119 | }
|
| 120 | }
|
120 | }
|
| 121 | foreach (@skiplist) |
121 | foreach (@skiplist) |
| 122 | {
|
122 | {
|
| 123 | $skipSec{$_} = 1; |
123 | $skipSec{$_} = 1; |
| 124 | }
|
124 | }
|
| 125 | 125 | ||
| 126 | ## Leafnode users will want /var/spool/news for this variable.
|
126 | ## Leafnode users will want /var/spool/news for this variable.
|
| 127 | my $news = "/var/spool/news/"; |
127 | my $news = "/var/spool/news/"; |
| 128 | 128 | ||
| 129 | ## Number of top or bottom posters to show
|
129 | ## Number of top or bottom posters to show
|
| 130 | my $topposters = 20; |
130 | my $topposters = 20; |
| 131 | 131 | ||
| 132 | ## Number of threads we want to know about
|
132 | ## Number of threads we want to know about
|
| 133 | my $topthreads = 20; |
133 | my $topthreads = 20; |
| 134 | 134 | ||
| 135 | ## Number of cross-posted threads to show
|
135 | ## Number of cross-posted threads to show
|
| 136 | my $topcrossposts = 10; |
136 | my $topcrossposts = 10; |
| 137 | 137 | ||
| 138 | ## Number of agents we list
|
138 | ## Number of agents we list
|
| 139 | my $topagents = 10; |
139 | my $topagents = 10; |
| 140 | 140 | ||
| 141 | ## Number of time zones to show
|
141 | ## Number of time zones to show
|
| 142 | my $toptz = 10; |
142 | my $toptz = 10; |
| 143 | 143 | ||
| 144 | ###################### DATA STRUCTURES ######################
|
144 | ###################### DATA STRUCTURES ######################
|
| 145 | my $group = $newsgroup_name; |
145 | my $group = $newsgroup_name; |
| 146 | $group =~ s!\.!/!g; |
146 | $group =~ s!\.!/!g; |
| 147 | my %data; # name, count, agent, total, orig, quoted |
147 | my %data; # name, count, agent, total, orig, quoted |
| 148 | my $totsize = 0; # holds total sizes of all files |
148 | my $totsize = 0; # holds total sizes of all files |
| 149 | my %crossposts; # group, count |
149 | my %crossposts; # group, count |
| 150 | my %threads; # subject, count |
150 | my %threads; # subject, count |
| 151 | my $replies = 0; # total no. of replies |
151 | my $replies = 0; # total no. of replies |
| 152 | my $origposts = 0; # total no. of original posts |
152 | my $origposts = 0; # total no. of original posts |
| 153 | my %tz; # timezones by count |
153 | my %tz; # timezones by count |
| 154 | my $earliest; # earliest article we have found |
154 | my $earliest; # earliest article we have found |
| 155 | my $latest; # latest article we have found |
155 | my $latest; # latest article we have found |
| 156 | my $totheader = 0; # total size of header material |
156 | my $totheader = 0; # total size of header material |
| 157 | my $totbody = 0; # total size of body material |
157 | my $totbody = 0; # total size of body material |
| 158 | my $totsig = 0; # total size of sig material |
158 | my $totsig = 0; # total size of sig material |
| 159 | my $totquoted = 0; # total size of quoted material |
159 | my $totquoted = 0; # total size of quoted material |
| 160 | my $totorig = 0; # total size of original material |
160 | my $totorig = 0; # total size of original material |
| 161 | my $totalposts; # total no. of posts considered |
161 | my $totalposts; # total no. of posts considered |
| 162 | my %distinct_agent; |
162 | my %distinct_agent; |
| 163 | 163 | ||
| 164 | ## Used to hold counts of User Agents used
|
164 | ## Used to hold counts of User Agents used
|
| 165 | my %agents = ( |
165 | my %agents = ( |
| 166 | "Compuserver" => 0, |
166 | "Compuserver" => 0, |
| 167 | "Foorum" => 0, |
167 | "Foorum" => 0, |
| 168 | "Forte Agent" => 0, |
168 | "Forte Agent" => 0, |
| 169 | "Forte Free Agent" => 0, |
169 | "Forte Free Agent" => 0, |
| 170 | "Gnus" => 0, |
170 | "Gnus" => 0, |
| 171 | "KNode" => 0, |
171 | "KNode" => 0, |
| 172 | "MacSOUP" => 0, |
172 | "MacSOUP" => 0, |
| 173 | "MT-NewsWatcher" => 0, |
173 | "MT-NewsWatcher" => 0, |
| 174 | "MicroPlanet Gravity" => 0, |
174 | "MicroPlanet Gravity" => 0, |
| 175 | "Microsoft Outlook Express" => 0, |
175 | "Microsoft Outlook Express" => 0, |
| 176 | "Microsoft Windows Mail" => 0, |
176 | "Microsoft Windows Mail" => 0, |
| 177 | "Mozilla" => 0, |
177 | "Mozilla" => 0, |
| 178 | "News Rover" => 0, |
178 | "News Rover" => 0, |
| 179 | "NN" => 0, |
179 | "NN" => 0, |
| 180 | "Pan" => 0, |
180 | "Pan" => 0, |
| 181 | "rn" => 0, |
181 | "rn" => 0, |
| 182 | "slrn" => 0, |
182 | "slrn" => 0, |
| 183 | "Sylpheed" => 0, |
183 | "Sylpheed" => 0, |
| 184 | "tin" => 0, |
184 | "tin" => 0, |
| 185 | "VSoup" => 0, |
185 | "VSoup" => 0, |
| 186 | "WebTV" => 0, |
186 | "WebTV" => 0, |
| 187 | "Xnews" => 0, |
187 | "Xnews" => 0, |
| 188 | ); |
188 | ); |
| 189 | 189 | ||
| 190 | my $datetime_parser = DateTime::Format::Mail->new(); |
190 | my $datetime_parser = DateTime::Format::Mail->new(); |
| 191 | $datetime_parser->loose(); |
191 | $datetime_parser->loose(); |
| 192 | 192 | ||
| 193 | my $today = DateTime->today( time_zone => 'UTC' ); |
193 | my $today = DateTime->today( time_zone => 'UTC' ); |
| 194 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
194 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
| 195 | my $start = int $prev_month->strftime('%s'); |
195 | my $start = int $prev_month->strftime('%s'); |
| 196 | my $numdays = int DateTime->last_day_of_month( |
196 | my $numdays = int DateTime->last_day_of_month( |
| 197 | year => $prev_month->year(), |
197 | year => $prev_month->year(), |
| 198 | month => $prev_month->month(), |
198 | month => $prev_month->month(), |
| 199 | time_zone => $prev_month->time_zone(), |
199 | time_zone => $prev_month->time_zone(), |
| 200 | )->day(); |
200 | )->day(); |
| 201 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
201 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
| 202 | 202 | ||
| 203 | dmsg( $start, " to ", $end ) if DEBUG; |
203 | dmsg( $start, " to ", $end ) if DEBUG; |
| 204 | 204 | ||
| 205 | chdir("$news$group") |
205 | chdir("$news$group") |
| 206 | or die __x( |
206 | or die __x( |
| 207 | "Can't cd to {newsgroup}: {error}\n", |
207 | "Can't cd to {newsgroup}: {error}\n", |
| 208 | newsgroup => "$news$group", |
208 | newsgroup => "$news$group", |
| 209 | error => $! |
209 | error => $! |
| 210 | ); |
210 | ); |
| 211 | opendir( DIR, "." ) |
211 | opendir( DIR, "." ) |
| 212 | or die __x( |
212 | or die __x( |
| 213 | "Can't open {newsgroup}: {error}\n", |
213 | "Can't open {newsgroup}: {error}\n", |
| 214 | newsgroup => "$news$group", |
214 | newsgroup => "$news$group", |
| 215 | error => $! |
215 | error => $! |
| 216 | ); |
216 | ); |
| 217 | 217 | ||
| 218 | while ( defined( my $filename = readdir(DIR) ) ) |
218 | while ( defined( my $filename = readdir(DIR) ) ) |
| 219 | {
|
219 | {
|
| 220 | next unless -f $filename; # only want real files |
220 | next unless -f $filename; # only want real files |
| 221 | next if ( $filename eq ".overview" ); # real articles only |
221 | next if ( $filename eq ".overview" ); # real articles only |
| 222 | 222 | ||
| 223 | get_article($filename); # read in the article |
223 | get_article($filename); # read in the article |
| 224 | }
|
224 | }
|
| 225 | closedir(DIR); # finished with the directory |
225 | closedir(DIR); # finished with the directory |
| 226 | 226 | ||
| 227 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
227 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
| 228 | 228 | ||
| 229 | ## Post-processing
|
229 | ## Post-processing
|
| 230 | count_agents(); # count agents, collapsing versions |
230 | count_agents(); # count agents, collapsing versions |
| 231 | fix_percent(); |
231 | fix_percent(); |
| 232 | 232 | ||
| 233 | write_data(); |
233 | write_data(); |
| 234 | display_results(); |
234 | display_results(); |
| 235 | 235 | ||
| 236 | ########################################
|
236 | ########################################
|
| 237 | ## Get current article's header and body
|
237 | ## Get current article's header and body
|
| 238 | ########################################
|
238 | ########################################
|
| 239 | sub get_article
|
239 | sub get_article
|
| 240 | {
|
240 | {
|
| 241 | my $filename = shift; |
241 | my $filename = shift; |
| 242 | 242 | ||
| 243 | open( my $FILE, '<', $filename ) |
243 | open( my $FILE, '<', $filename ) |
| 244 | or
|
244 | or
|
| 245 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
245 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
| 246 | my $msg = Mail::Message->read($FILE); |
246 | my $msg = Mail::Message->read($FILE); |
| 247 | my $timestamp = $msg->timestamp(); |
247 | my $timestamp = $msg->timestamp(); |
| 248 | my $date = $msg->study('Date'); |
248 | my $date = $msg->study('Date'); |
| 249 | 249 | ||
| 250 | ## Disregard article if timestamp is not in range
|
250 | ## Disregard article if timestamp is not in range
|
| 251 | dmsg($timestamp) if DEBUG; |
251 | dmsg($timestamp) if DEBUG; |
| 252 | if ( $timestamp < $start or $timestamp >= $end ) |
252 | if ( $timestamp < $start || $timestamp >= $end ) |
| 253 | {
|
253 | {
|
| 254 | dmsg("Posting on $date ignored.") if DEBUG; |
254 | dmsg("Posting on $date ignored.") if DEBUG; |
| 255 | return; |
255 | return; |
| 256 | }
|
256 | }
|
| 257 | 257 | ||
| 258 | $totalposts++; # bump count of articles considered |
258 | $totalposts++; # bump count of articles considered |
| 259 | 259 | ||
| 260 | ## DEBUG
|
260 | ## DEBUG
|
| 261 | dmsg($date) if DEBUG; |
261 | dmsg($date) if DEBUG; |
| 262 | 262 | ||
| 263 | ## get stats about the file itself
|
263 | ## get stats about the file itself
|
| 264 | my $filesize = -s $filename; # get total size of file |
264 | my $filesize = -s $filename; # get total size of file |
| 265 | $totsize += $filesize; # bump total sizes of all files |
265 | $totsize += $filesize; # bump total sizes of all files |
| 266 | 266 | ||
| 267 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
267 | if ( ( not defined $earliest ) || $timestamp < $earliest ) |
| 268 | {
|
268 | {
|
| 269 | $earliest = $timestamp; |
269 | $earliest = $timestamp; |
| 270 | }
|
270 | }
|
| 271 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
271 | elsif ( ( not defined $latest ) || $timestamp > $latest ) |
| 272 | {
|
272 | {
|
| 273 | $latest = $timestamp; |
273 | $latest = $timestamp; |
| 274 | }
|
274 | }
|
| 275 | 275 | ||
| 276 | #print "timestamp: $timestamp\n";
|
276 | #print "timestamp: $timestamp\n";
|
| 277 | 277 | ||
| 278 | ## count header size
|
278 | ## count header size
|
| 279 | $totheader += $msg->head()->size(); |
279 | $totheader += $msg->head()->size(); |
| 280 | 280 | ||
| 281 | ## get the poster's name (MIME-decoded, in UTF-8)
|
281 | ## get the poster's name (MIME-decoded, in UTF-8)
|
| 282 | my $poster = $msg->study('From'); |
282 | my $poster = $msg->study('From'); |
| 283 | if ( defined $poster ) |
283 | if ( defined $poster ) |
| 284 | {
|
284 | {
|
| 285 | ## Convert old to new format
|
285 | ## Convert old to new format
|
| 286 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
286 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
| 287 | 287 | ||
| 288 | ## Collapse whitespace
|
288 | ## Collapse whitespace
|
| 289 | $poster =~ s/\s+/ /g; |
289 | $poster =~ s/\s+/ /g; |
| 290 | 290 | ||
| 291 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
291 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
| 292 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
292 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
| 293 | 293 | ||
| 294 | ## DEBUG
|
294 | ## DEBUG
|
| 295 | dmsg($poster) if DEBUG; |
295 | dmsg($poster) if DEBUG; |
| 296 | 296 | ||
| 297 | ## seen this one before?
|
297 | ## seen this one before?
|
| 298 | if ( !defined( $data{$poster} ) ) |
298 | if ( !defined( $data{$poster} ) ) |
| 299 | {
|
299 | {
|
| 300 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
300 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
| 301 | $data{$poster}{'orig'} = 0; |
301 | $data{$poster}{'orig'} = 0; |
| 302 | $data{$poster}{'quoted'} = 0; |
302 | $data{$poster}{'quoted'} = 0; |
| 303 | }
|
303 | }
|
| 304 | $data{$poster}{'count'}++; # bump count for this poster |
304 | $data{$poster}{'count'}++; # bump count for this poster |
| 305 | $data{$poster}{'size'} += $filesize; # total size of file |
305 | $data{$poster}{'size'} += $filesize; # total size of file |
| 306 | 306 | ||
| 307 | ## The User-Agent and/or X-Newsreader fields
|
307 | ## The User-Agent and/or X-Newsreader fields
|
| 308 | ## for User-Agent by poster
|
308 | ## for User-Agent by poster
|
| 309 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
309 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader'); |
| 310 | if ( defined $ua ) |
310 | if ( defined $ua ) |
| 311 | {
|
311 | {
|
| 312 | $data{$poster}{'agent'} = $ua; |
312 | $data{$poster}{'agent'} = $ua; |
| 313 | 313 | ||
| 314 | ## DEBUG
|
314 | ## DEBUG
|
| 315 | dmsg($ua) if DEBUG; |
315 | dmsg($ua) if DEBUG; |
| 316 | }
|
316 | }
|
| 317 | 317 | ||
| 318 | ## The User Agent for User-Agent by number of posts
|
318 | ## The User Agent for User-Agent by number of posts
|
| 319 | get_agent($msg); |
319 | get_agent($msg); |
| 320 | 320 | ||
| 321 | ## Get all cross-posted newsgroups
|
321 | ## Get all cross-posted newsgroups
|
| 322 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
322 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
| 323 | {
|
323 | {
|
| 324 | $crossposts{$_}++; # bump count for each |
324 | $crossposts{$_}++; # bump count for each |
| 325 | }
|
325 | }
|
| 326 | 326 | ||
| 327 | ## Get threads
|
327 | ## Get threads
|
| 328 | my $thread = $msg->study('Subject'); |
328 | my $thread = $msg->study('Subject'); |
| 329 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
329 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
| 330 | $thread =~ s/\s+/ /g; # collapse whitespace |
330 | $thread =~ s/\s+/ /g; # collapse whitespace |
| 331 | $threads{$thread}{'count'}++; # bump count of this subject |
331 | $threads{$thread}{'count'}++; # bump count of this subject |
| 332 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
332 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
| 333 | 333 | ||
| 334 | ## Is this an original post or a reply?
|
334 | ## Is this an original post or a reply?
|
| 335 | if ( defined $msg->study('References') ) |
335 | if ( defined $msg->study('References') ) |
| 336 | {
|
336 | {
|
| 337 | $replies++; |
337 | $replies++; |
| 338 | }
|
338 | }
|
| 339 | else
|
339 | else
|
| 340 | {
|
340 | {
|
| 341 | $origposts++; |
341 | $origposts++; |
| 342 | }
|
342 | }
|
| 343 | 343 | ||
| 344 | ## Get the time zone
|
344 | ## Get the time zone
|
| 345 | my $datetime = $datetime_parser->parse_datetime($date); |
345 | my $datetime = $datetime_parser->parse_datetime($date); |
| 346 | my $tz = $datetime->strftime('%z'); |
346 | my $tz = $datetime->strftime('%z'); |
| 347 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
347 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
| 348 | $tz{$tz}++; |
348 | $tz{$tz}++; |
| 349 | 349 | ||
| 350 | ## DEBUG
|
350 | ## DEBUG
|
| 351 | dmsg($tz) if DEBUG; |
351 | dmsg($tz) if DEBUG; |
| 352 | 352 | ||
| 353 | #### Now analyse the body text ####
|
353 | #### Now analyse the body text ####
|
| 354 | my $body = $msg->body(); |
354 | my $body = $msg->body(); |
| 355 | 355 | ||
| 356 | my $insig = 0; |
356 | my $insig = 0; |
| 357 | my @body = $body->lines; |
357 | my @body = $body->lines; |
| 358 | for (@body) |
358 | for (@body) |
| 359 | {
|
359 | {
|
| 360 | $totbody += length($_); # bump total body size |
360 | $totbody += length($_); # bump total body size |
| 361 | next if (m{^$>}o); # don't count blank lines in body |
361 | next if (m{^$>}o); # don't count blank lines in body |
| 362 | if ( $insig == 1 ) |
362 | if ( $insig == 1 ) |
| 363 | {
|
363 | {
|
| 364 | 364 | ||
| 365 | # bump total sig size
|
365 | # bump total sig size
|
| 366 | $totsig += length($_); |
366 | $totsig += length($_); |
| 367 | }
|
367 | }
|
| 368 | ## are we in a quote line?
|
368 | ## are we in a quote line?
|
| 369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
369 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
| 370 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
370 | elsif ( m{^\s*[>\]]}o || m{^\s*::}o ) |
| 371 | {
|
371 | {
|
| 372 | ## bump count of quoted chrs
|
372 | ## bump count of quoted chrs
|
| 373 | $data{$poster}{'quoted'} += length($_); |
373 | $data{$poster}{'quoted'} += length($_); |
| 374 | $totquoted += length($_); |
374 | $totquoted += length($_); |
| 375 | }
|
375 | }
|
| 376 | elsif (/^-- $/) |
376 | elsif (/^-- $/) |
| 377 | {
|
377 | {
|
| 378 | $insig = 1; |
378 | $insig = 1; |
| 379 | }
|
379 | }
|
| 380 | else
|
380 | else
|
| 381 | {
|
381 | {
|
| 382 | ## We must be processing an original line
|
382 | ## We must be processing an original line
|
| 383 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
383 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
| 384 | $totorig += length($_); |
384 | $totorig += length($_); |
| 385 | }
|
385 | }
|
| 386 | }
|
386 | }
|
| 387 | 387 | ||
| 388 | # end for (@body)
|
388 | # end for (@body)
|
| 389 | }
|
389 | }
|
| 390 | 390 | ||
| 391 | close($FILE); |
391 | close($FILE); |
| 392 | }
|
392 | }
|
| 393 | 393 | ||
| 394 | sub get_agent
|
394 | sub get_agent
|
| 395 | {
|
395 | {
|
| 396 | my $msg = shift; |
396 | my $msg = shift; |
| 397 | 397 | ||
| 398 | my $ua = |
- | |
| 399 | $msg->study('User-Agent') |
- | |
| 400 | or $msg->study('X-Newsreader') |
398 | my $ua = $msg->study('User-Agent') // $msg->study('X-Newsreader') |
| 401 | or $msg->study('X-Mailer'); |
399 | // $msg->study('X-Mailer'); |
| - | 400 | ||
| 402 | if ( not defined $ua ) |
401 | if ( not defined $ua ) |
| 403 | {
|
402 | {
|
| 404 | my $org = $msg->study('Organization'); |
403 | my $org = $msg->study('Organization'); |
| 405 | if ( defined $org |
404 | if ( defined $org |
| 406 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
405 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
| 407 | {
|
406 | {
|
| 408 | $ua = $org; |
407 | $ua = $org; |
| 409 | }
|
408 | }
|
| 410 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
409 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
| 411 | {
|
410 | {
|
| 412 | $ua = "Pine"; |
411 | $ua = "Pine"; |
| 413 | }
|
412 | }
|
| 414 | }
|
413 | }
|
| 415 | 414 | ||
| 416 | ## Hopefully found UA, else set to unknown
|
415 | ## Hopefully found UA, else set to unknown
|
| 417 | if ( not defined $ua ) |
416 | if ( not defined $ua ) |
| 418 | {
|
417 | {
|
| 419 | $ua = __ "unknown"; |
418 | $ua = __ "unknown"; |
| 420 | }
|
419 | }
|
| 421 | 420 | ||
| 422 | $ua = clean($ua); |
421 | $ua = clean($ua); |
| 423 | 422 | ||
| 424 | my $raw = $ua; |
423 | my $raw = $ua; |
| 425 | my $agent = $raw; |
424 | my $agent = $raw; |
| 426 | 425 | ||
| 427 | ## strip http
|
426 | ## strip http
|
| 428 | if ( $raw =~ /.*http.*/ ) |
427 | if ( $raw =~ /.*http.*/ ) |
| 429 | {
|
428 | {
|
| 430 | $raw =~ s!posted via!!i; |
429 | $raw =~ s!posted via!!i; |
| 431 | $raw =~ s!http://!!g; |
430 | $raw =~ s!http://!!g; |
| 432 | $raw =~ s!/!!g; |
431 | $raw =~ s!/!!g; |
| 433 | $raw =~ s! !!g; |
432 | $raw =~ s! !!g; |
| 434 | }
|
433 | }
|
| 435 | 434 | ||
| 436 | ## Fix Outlook from Mac
|
435 | ## Fix Outlook from Mac
|
| 437 | if ( $raw =~ /^microsoft/i ) |
436 | if ( $raw =~ /^microsoft/i ) |
| 438 | {
|
437 | {
|
| 439 | $raw =~ s/-/ /g; |
438 | $raw =~ s/-/ /g; |
| 440 | }
|
439 | }
|
| 441 | 440 | ||
| 442 | ## Pick out the popular agents
|
441 | ## Pick out the popular agents
|
| 443 | if ( $raw =~ /(outlook express)/i |
442 | if ( $raw =~ /(outlook express)/i |
| 444 | || $raw =~ /(windows mail)/i |
443 | || $raw =~ /(windows mail)/i |
| 445 | || $raw =~ /(microplanet gravity)/i |
444 | || $raw =~ /(microplanet gravity)/i |
| 446 | || $raw =~ /(news rover)/i |
445 | || $raw =~ /(news rover)/i |
| 447 | || $raw =~ /(forte agent)/i |
446 | || $raw =~ /(forte agent)/i |
| 448 | || $raw =~ /(forte free agent)/i ) |
447 | || $raw =~ /(forte free agent)/i ) |
| 449 | {
|
448 | {
|
| 450 | $agent = $1; |
449 | $agent = $1; |
| 451 | }
|
450 | }
|
| 452 | elsif ( |
451 | elsif ( |
| 453 | $raw =~ /^( |
452 | $raw =~ /^( |
| 454 | pan |
453 | pan |
| 455 | |sylpheed
|
454 | |sylpheed
|
| 456 | |slrn
|
455 | |slrn
|
| 457 | |mozilla
|
456 | |mozilla
|
| 458 | |knode
|
457 | |knode
|
| 459 | |tin
|
458 | |tin
|
| 460 | |hamster
|
459 | |hamster
|
| 461 | |xrn
|
460 | |xrn
|
| 462 | |xnews
|
461 | |xnews
|
| 463 | |aol
|
462 | |aol
|
| 464 | |gnus
|
463 | |gnus
|
| 465 | |krn
|
464 | |krn
|
| 466 | |macsoup
|
465 | |macsoup
|
| 467 | |messenger
|
466 | |messenger
|
| 468 | |openxp
|
467 | |openxp
|
| 469 | |pine
|
468 | |pine
|
| 470 | |thoth
|
469 | |thoth
|
| 471 | |turnpike
|
470 | |turnpike
|
| 472 | |winvn
|
471 | |winvn
|
| 473 | |vsoup
|
472 | |vsoup
|
| 474 | |google
|
473 | |google
|
| 475 | |supernews
|
474 | |supernews
|
| 476 | |nn
|
475 | |nn
|
| 477 | |rn
|
476 | |rn
|
| 478 | |007
|
477 | |007
|
| 479 | |webtv
|
478 | |webtv
|
| 480 | |compuserve
|
479 | |compuserve
|
| 481 | )/ix |
480 | )/ix |
| 482 | )
|
481 | )
|
| 483 | {
|
482 | {
|
| 484 | $agent = $1; |
483 | $agent = $1; |
| 485 | }
|
484 | }
|
| 486 | else
|
485 | else
|
| 487 | {
|
486 | {
|
| 488 | ## Clean up unknown agents
|
487 | ## Clean up unknown agents
|
| 489 | if ( $raw =~ m!^(.*?)/! ) |
488 | if ( $raw =~ m!^(.*?)/! ) |
| 490 | {
|
489 | {
|
| 491 | $agent = $1; |
490 | $agent = $1; |
| 492 | }
|
491 | }
|
| 493 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
492 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
| 494 | {
|
493 | {
|
| 495 | $agent = $1; |
494 | $agent = $1; |
| 496 | }
|
495 | }
|
| 497 | }
|
496 | }
|
| 498 | 497 | ||
| 499 | $distinct_agent{$agent}++; |
498 | $distinct_agent{$agent}++; |
| 500 | return $agent; |
499 | return $agent; |
| 501 | }
|
500 | }
|
| 502 | ## get_agent
|
501 | ## get_agent
|
| 503 | 502 | ||
| 504 | #########################################
|
503 | #########################################
|
| 505 | ## Count the User-Agents used, collapsing
|
504 | ## Count the User-Agents used, collapsing
|
| 506 | ## different versions into one per agent.
|
505 | ## different versions into one per agent.
|
| 507 | #########################################
|
506 | #########################################
|
| 508 | sub count_agents
|
507 | sub count_agents
|
| 509 | {
|
508 | {
|
| 510 | POSTER:
|
509 | POSTER:
|
| 511 | foreach my $poster ( keys %data ) |
510 | foreach my $poster ( keys %data ) |
| 512 | {
|
511 | {
|
| 513 | foreach my $agent_name ( keys %distinct_agent ) |
512 | foreach my $agent_name ( keys %distinct_agent ) |
| 514 | { # check against known ones |
513 | { # check against known ones |
| 515 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
514 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
| 516 | {
|
515 | {
|
| 517 | $agents{$agent_name}++; |
516 | $agents{$agent_name}++; |
| 518 | next POSTER; |
517 | next POSTER; |
| 519 | }
|
518 | }
|
| 520 | }
|
519 | }
|
| 521 | $agents{ $data{$poster}{'agent'} }++; |
520 | $agents{ $data{$poster}{'agent'} }++; |
| 522 | }
|
521 | }
|
| 523 | } # count_agents |
522 | } # count_agents |
| 524 | 523 | ||
| 525 | #############################################
|
524 | #############################################
|
| 526 | ## Set orig/total percentages for all posters
|
525 | ## Set orig/total percentages for all posters
|
| 527 | #############################################
|
526 | #############################################
|
| 528 | sub fix_percent
|
527 | sub fix_percent
|
| 529 | {
|
528 | {
|
| 530 | foreach my $poster ( keys %data ) |
529 | foreach my $poster ( keys %data ) |
| 531 | {
|
530 | {
|
| 532 | my $percent = 100; |
531 | my $percent = 100; |
| 533 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
532 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
| 534 | {
|
533 | {
|
| 535 | $percent = |
534 | $percent = |
| 536 | $data{$poster}{'orig'} * 100 / |
535 | $data{$poster}{'orig'} * 100 / |
| 537 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
536 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
| 538 | }
|
537 | }
|
| 539 | elsif ( $data{$poster}{'orig'} == 0 ) |
538 | elsif ( $data{$poster}{'orig'} == 0 ) |
| 540 | {
|
539 | {
|
| 541 | $percent = 0; |
540 | $percent = 0; |
| 542 | }
|
541 | }
|
| 543 | $data{$poster}{'percent'} = $percent; |
542 | $data{$poster}{'percent'} = $percent; |
| 544 | }
|
543 | }
|
| 545 | }
|
544 | }
|
| 546 | ## fix_percent
|
545 | ## fix_percent
|
| 547 | 546 | ||
| 548 | ##################################
|
547 | ##################################
|
| 549 | ## Write data structures to a file
|
548 | ## Write data structures to a file
|
| 550 | ##################################
|
549 | ##################################
|
| 551 | sub write_data
|
550 | sub write_data
|
| 552 | {
|
551 | {
|
| 553 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
552 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
| 554 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
553 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
| 555 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
554 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
| 556 | print $OUTF |
555 | print $OUTF |
| 557 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
556 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
| 558 | foreach my $name ( keys %data ) |
557 | foreach my $name ( keys %data ) |
| 559 | {
|
558 | {
|
| 560 | print $OUTF |
559 | print $OUTF |
| 561 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
560 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
| 562 | }
|
561 | }
|
| 563 | print $OUTF |
562 | print $OUTF |
| 564 | "============================================================================\n"; |
563 | "============================================================================\n"; |
| 565 | print $OUTF "Thread subjects\n"; |
564 | print $OUTF "Thread subjects\n"; |
| 566 | print $OUTF |
565 | print $OUTF |
| 567 | "----------------------------------------------------------------------------\n"; |
566 | "----------------------------------------------------------------------------\n"; |
| 568 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
567 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| 569 | {
|
568 | {
|
| 570 | print $OUTF |
569 | print $OUTF |
| 571 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
570 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
| 572 | }
|
571 | }
|
| 573 | print $OUTF |
572 | print $OUTF |
| 574 | "============================================================================\n"; |
573 | "============================================================================\n"; |
| 575 | print $OUTF "Cross-posts\n"; |
574 | print $OUTF "Cross-posts\n"; |
| 576 | print $OUTF |
575 | print $OUTF |
| 577 | "----------------------------------------------------------------------------\n"; |
576 | "----------------------------------------------------------------------------\n"; |
| 578 | foreach my $name ( sort keys %crossposts ) |
577 | foreach my $name ( sort keys %crossposts ) |
| 579 | {
|
578 | {
|
| 580 | print $OUTF "$name : $crossposts{$name}\n"; |
579 | print $OUTF "$name : $crossposts{$name}\n"; |
| 581 | }
|
580 | }
|
| 582 | print $OUTF |
581 | print $OUTF |
| 583 | "============================================================================\n"; |
582 | "============================================================================\n"; |
| 584 | print $OUTF "User agents\n"; |
583 | print $OUTF "User agents\n"; |
| 585 | print $OUTF |
584 | print $OUTF |
| 586 | "----------------------------------------------------------------------------\n"; |
585 | "----------------------------------------------------------------------------\n"; |
| 587 | foreach my $name ( sort keys %agents ) |
586 | foreach my $name ( sort keys %agents ) |
| 588 | {
|
587 | {
|
| 589 | print $OUTF "$name : $agents{$name}\n"; |
588 | print $OUTF "$name : $agents{$name}\n"; |
| 590 | }
|
589 | }
|
| 591 | print $OUTF |
590 | print $OUTF |
| 592 | "============================================================================\n"; |
591 | "============================================================================\n"; |
| 593 | print $OUTF "Time zones\n"; |
592 | print $OUTF "Time zones\n"; |
| 594 | print $OUTF |
593 | print $OUTF |
| 595 | "----------------------------------------------------------------------------\n"; |
594 | "----------------------------------------------------------------------------\n"; |
| 596 | foreach my $name ( sort keys %tz ) |
595 | foreach my $name ( sort keys %tz ) |
| 597 | {
|
596 | {
|
| 598 | print $OUTF "$name : $tz{$name}\n"; |
597 | print $OUTF "$name : $tz{$name}\n"; |
| 599 | }
|
598 | }
|
| 600 | close $OUTF; |
599 | close $OUTF; |
| 601 | } # write_data |
600 | } # write_data |
| 602 | 601 | ||
| 603 | sub display_results
|
602 | sub display_results
|
| 604 | {
|
603 | {
|
| 605 | #################### DISPLAY RESULTS #####################
|
604 | #################### DISPLAY RESULTS #####################
|
| 606 | println( "=" x 76 ); |
605 | println( "=" x 76 ); |
| 607 | printf "%s\n", |
606 | printf "%s\n", |
| 608 | centred(
|
607 | centred(
|
| 609 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
608 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
| 610 | 76 ); |
609 | 76 ); |
| 611 | println( "=" x 76 ); |
610 | println( "=" x 76 ); |
| 612 | printf "%s\n", |
611 | printf "%s\n", |
| 613 | centred(
|
612 | centred(
|
| 614 | __(
|
613 | __(
|
| 615 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
614 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)"
|
| 616 | ), |
615 | ), |
| 617 | 76
|
616 | 76
|
| 618 | ); |
617 | ); |
| 619 | print "\n\n"; |
618 | print "\n\n"; |
| 620 | printf __"Total posts considered: %s over %d days\n", |
619 | printf __"Total posts considered: %s over %d days\n", |
| 621 | $formatter->format_number($totalposts), |
620 | $formatter->format_number($totalposts), |
| 622 | $formatter->format_number($numdays); |
621 | $formatter->format_number($numdays); |
| 623 | my $time_locale = setlocale(LC_TIME); |
622 | my $time_locale = setlocale(LC_TIME); |
| 624 | my $earliest_datetime = DateTime->from_epoch( |
623 | my $earliest_datetime = DateTime->from_epoch( |
| 625 | epoch => $earliest, |
624 | epoch => $earliest, |
| 626 | locale => $time_locale, |
625 | locale => $time_locale, |
| 627 | time_zone => 'UTC', |
626 | time_zone => 'UTC', |
| 628 | ); |
627 | ); |
| 629 | my $latest_datetime = DateTime->from_epoch( |
628 | my $latest_datetime = DateTime->from_epoch( |
| 630 | epoch => $latest, |
629 | epoch => $latest, |
| 631 | locale => $time_locale, |
630 | locale => $time_locale, |
| 632 | time_zone => 'UTC', |
631 | time_zone => 'UTC', |
| 633 | ); |
632 | ); |
| 634 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
633 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
| 635 | printf __"Earliest article: %s\n", |
634 | printf __"Earliest article: %s\n", |
| 636 | $earliest_datetime->strftime($datetime_format); |
635 | $earliest_datetime->strftime($datetime_format); |
| 637 | printf __"Latest article: %s\n", |
636 | printf __"Latest article: %s\n", |
| 638 | $latest_datetime->strftime($datetime_format); |
637 | $latest_datetime->strftime($datetime_format); |
| 639 | printf __"Original articles: %s; replies: %s\n", |
638 | printf __"Original articles: %s; replies: %s\n", |
| 640 | $formatter->format_number($origposts), |
639 | $formatter->format_number($origposts), |
| 641 | $formatter->format_number($replies); |
640 | $formatter->format_number($replies); |
| 642 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
641 | printf __"Total size of posts: %s bytes (%s)" . "\n", |
| 643 | $formatter->format_number($totsize), |
642 | $formatter->format_number($totsize), |
| 644 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
643 | $formatter->format_bytes( $totsize, ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 645 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
644 | printf __"Average %s articles per day, %s per day, %s bytes per article\n", |
| 646 | $formatter->format_number( int( $totalposts / $numdays ) ), |
645 | $formatter->format_number( int( $totalposts / $numdays ) ), |
| 647 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
646 | $formatter->format_bytes( $totsize / $numdays, ( 'mode' => 'iec' ) ), |
| 648 | $formatter->format_number( int( $totsize / $totalposts ) ); |
647 | $formatter->format_number( int( $totsize / $totalposts ) ); |
| 649 | 648 | ||
| 650 | my $count = keys %data; |
649 | my $count = keys %data; |
| 651 | printf __"Total headers: %s; bodies: %s\n", |
650 | printf __"Total headers: %s; bodies: %s\n", |
| 652 | $formatter->format_bytes( |
651 | $formatter->format_bytes( |
| 653 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
652 | $totheader, ( 'precision' => 1, 'mode' => 'iec' ) |
| 654 | ), |
653 | ), |
| 655 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
654 | $formatter->format_bytes( $totbody, ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 656 | printf __ |
655 | printf __ |
| 657 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
656 | "Body text - quoted: %s; original: %s = %s%%; sigs: %s\n", |
| 658 | $formatter->format_bytes( |
657 | $formatter->format_bytes( |
| 659 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
658 | $totquoted, ( 'precision' => 1, 'mode' => 'iec' ) |
| 660 | ), |
659 | ), |
| 661 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
660 | $formatter->format_bytes( $totorig, ( 'precision' => 1, 'mode' => 'iec' ) ), |
| 662 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
661 | $formatter->format_number( ( $totorig * 100 ) / ( $totorig + $totquoted ) ), |
| 663 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
662 | $formatter->format_bytes( $totsig, ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 664 | printf __"Total number of posters: %s, average %s per poster\n", |
663 | printf __"Total number of posters: %s, average %s per poster\n", |
| 665 | $formatter->format_number($count), |
664 | $formatter->format_number($count), |
| 666 | $formatter->format_bytes( $totsize / $count, |
665 | $formatter->format_bytes( $totsize / $count, |
| 667 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
666 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 668 | $count = keys %threads; |
667 | $count = keys %threads; |
| 669 | printf __"Total number of threads: %s, average %s per thread\n", |
668 | printf __"Total number of threads: %s, average %s per thread\n", |
| 670 | $formatter->format_number($count), |
669 | $formatter->format_number($count), |
| 671 | $formatter->format_bytes( $totsize / $count, |
670 | $formatter->format_bytes( $totsize / $count, |
| 672 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
671 | ( 'precision' => 1, 'mode' => 'iec' ) ); |
| 673 | printf __"Total number of user agents: %d\n", |
672 | printf __"Total number of user agents: %d\n", |
| 674 | $formatter->format_number( scalar keys %agents ); |
673 | $formatter->format_number( scalar keys %agents ); |
| 675 | print "\n", "=" x 76, "\n" ; |
674 | print "\n", "=" x 76, "\n"; |
| 676 | ########################################
|
675 | ########################################
|
| 677 | ## Show posters by article count Sec 1;
|
676 | ## Show posters by article count Sec 1;
|
| 678 | ########################################
|
677 | ########################################
|
| 679 | unless ( $skipSec{1} ) |
678 | unless ( $skipSec{1} ) |
| 680 | {
|
679 | {
|
| 681 | if ( keys %data < $topposters ) |
680 | if ( keys %data < $topposters ) |
| 682 | {
|
681 | {
|
| 683 | $count = keys %data; |
682 | $count = keys %data; |
| 684 | }
|
683 | }
|
| 685 | else
|
684 | else
|
| 686 | {
|
685 | {
|
| 687 | $count = $topposters; |
686 | $count = $topposters; |
| 688 | }
|
687 | }
|
| 689 | printf "%s\n", |
688 | printf "%s\n", |
| 690 | centred(
|
689 | centred(
|
| 691 | __x( "Top {count} posters by number of articles", count => $topposters ), |
690 | __x( "Top {count} posters by number of articles", count => $topposters ), |
| 692 | 76
|
691 | 76
|
| 693 | ); |
692 | ); |
| 694 | print "=" x 76, "\n"; |
693 | print "=" x 76, "\n"; |
| 695 | my $i = 0; |
694 | my $i = 0; |
| 696 | foreach
|
695 | foreach
|
| 697 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
696 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| 698 | {
|
697 | {
|
| 699 | my $name = substr( $poster, 0, 65 ); |
698 | my $name = substr( $poster, 0, 65 ); |
| 700 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
699 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 701 | $data{$poster}{count}; |
700 | $data{$poster}{count}; |
| 702 | last if ( ++$i == $count ); |
701 | last if ( ++$i == $count ); |
| 703 | }
|
702 | }
|
| 704 | print "\n", "=" x 76, "\n"; |
703 | print "\n", "=" x 76, "\n"; |
| 705 | }
|
704 | }
|
| 706 | 705 | ||
| 707 | ######################################
|
706 | ######################################
|
| 708 | ## Show posters by size in KiB Sec 2;
|
707 | ## Show posters by size in KiB Sec 2;
|
| 709 | ######################################
|
708 | ######################################
|
| 710 | unless ( $skipSec{2} ) |
709 | unless ( $skipSec{2} ) |
| 711 | {
|
710 | {
|
| 712 | if ( keys %data < $topposters ) |
711 | if ( keys %data < $topposters ) |
| 713 | {
|
712 | {
|
| 714 | $count = keys %data; |
713 | $count = keys %data; |
| 715 | }
|
714 | }
|
| 716 | else
|
715 | else
|
| 717 | {
|
716 | {
|
| 718 | $count = $topposters; |
717 | $count = $topposters; |
| 719 | }
|
718 | }
|
| 720 | printf "%s\n", |
719 | printf "%s\n", |
| 721 | centred(
|
720 | centred(
|
| 722 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
721 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
| 723 | 76
|
722 | 76
|
| 724 | ); |
723 | ); |
| 725 | print "=" x 76, "\n"; |
724 | print "=" x 76, "\n"; |
| 726 | my $i = 0; |
725 | my $i = 0; |
| 727 | foreach
|
726 | foreach
|
| 728 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
727 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
| 729 | {
|
728 | {
|
| 730 | my $name = substr( $poster, 0, 62 ); |
729 | my $name = substr( $poster, 0, 62 ); |
| 731 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
730 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| 732 | $data{$poster}{size} / 1024; #/ |
731 | $data{$poster}{size} / 1024; #/ |
| 733 | last if ( ++$i == $count ); |
732 | last if ( ++$i == $count ); |
| 734 | }
|
733 | }
|
| 735 | print "\n", "=" x 76, "\n"; |
734 | print "\n", "=" x 76, "\n"; |
| 736 | }
|
735 | }
|
| 737 | 736 | ||
| 738 | #####################################
|
737 | #####################################
|
| 739 | ## Show top posters for original text
|
738 | ## Show top posters for original text
|
| 740 | #####################################
|
739 | #####################################
|
| - | 740 | my $topposters_real = 0; |
|
| - | 741 | ||
| 741 | unless ( $skipSec{3} ) |
742 | unless ( $skipSec{3} ) |
| 742 | {
|
743 | {
|
| 743 | if ( keys %data < $topposters ) |
744 | if ( keys %data < $topposters ) |
| 744 | {
|
745 | {
|
| 745 | $count = keys %data; |
746 | $count = keys %data; |
| 746 | }
|
747 | }
|
| 747 | else
|
748 | else
|
| 748 | {
|
749 | {
|
| 749 | $count = $topposters; |
750 | $count = $topposters; |
| 750 | }
|
751 | }
|
| - | 752 | ||
| 751 | printf "%s\n", |
753 | printf "%s\n", |
| 752 | centred(
|
754 | centred(
|
| 753 | __x(
|
755 | __x(
|
| 754 | "Top {count} responders by original text (> 5 posts)", |
756 | "Top {count} responders by original text (> 5 posts)", |
| 755 | count => $topposters |
757 | count => $topposters |
| 756 | ), |
758 | ), |
| 757 | 76
|
759 | 76
|
| 758 | ); |
760 | ); |
| 759 | print "=" x 76, "\n"; |
761 | print "=" x 76, "\n"; |
| 760 | my $i = 0; |
- | |
| 761 | foreach my $poster ( |
762 | foreach my $poster ( |
| 762 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
763 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| 763 | keys %data |
764 | keys %data |
| 764 | )
|
765 | )
|
| 765 | {
|
766 | {
|
| 766 | next if $data{$poster}{quoted} == 0; |
767 | next if $data{$poster}{quoted} == 0; |
| 767 | next if $data{$poster}{count} < 5; |
768 | next if $data{$poster}{count} < 5; |
| 768 | my $name = substr( $poster, 0, 63 ); |
769 | my $name = substr( $poster, 0, 63 ); |
| 769 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
770 | printf "%2d. %-63s : %02.2f%%\n", $topposters_real + 1, |
| - | 771 | rpad( $poster, 63, "." ), |
|
| 770 | $data{$poster}{percent}; |
772 | $data{$poster}{percent}; |
| 771 | last if ( ++$i == $count ); |
773 | last if ( ++$topposters_real == $count ); |
| 772 | }
|
774 | }
|
| 773 | print "\n", "=" x 76, "\n"; |
775 | print "\n", "=" x 76, "\n"; |
| 774 | }
|
776 | }
|
| 775 | 777 | ||
| 776 | ########################################
|
778 | ########################################
|
| 777 | ## Show bottom posters for original text
|
779 | ## Show bottom posters for original text
|
| 778 | ########################################
|
780 | ########################################
|
| - | 781 | ||
| - | 782 | $skipSec{4} = ( $topposters_real <= $topposters ) unless defined $skipSec{4}; |
|
| - | 783 | ||
| 779 | unless ( $skipSec{4} ) |
784 | unless ( $skipSec{4} ) |
| 780 | {
|
785 | {
|
| 781 | if ( keys %data < $topposters ) |
786 | if ( keys %data < $topposters ) |
| 782 | {
|
787 | {
|
| 783 | $count = keys %data; |
788 | $count = keys %data; |
| 784 | }
|
789 | }
|
| 785 | else
|
790 | else
|
| 786 | {
|
791 | {
|
| 787 | $count = $topposters; |
792 | $count = $topposters; |
| 788 | }
|
793 | }
|
| - | 794 | ||
| 789 | printf "%s\n", |
795 | printf "%s\n", |
| 790 | centred(
|
796 | centred(
|
| 791 | __x(
|
797 | __x(
|
| 792 | "Bottom {count} responders by original text (> 5 posts)", |
798 | "Bottom {count} responders by original text (> 5 posts)", |
| 793 | count => $topposters |
799 | count => $topposters |
| 794 | ), |
800 | ), |
| 795 | 76
|
801 | 76
|
| 796 | ); |
802 | ); |
| 797 | print "=" x 76, "\n"; |
803 | print "=" x 76, "\n"; |
| 798 | my $i = 0; |
804 | my $i = 0; |
| 799 | foreach my $poster ( |
805 | foreach my $poster ( |
| 800 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
806 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| 801 | keys %data |
807 | keys %data |
| 802 | )
|
808 | )
|
| 803 | {
|
809 | {
|
| 804 | next if $data{$poster}{quoted} == 0; |
810 | next if $data{$poster}{quoted} == 0; |
| 805 | next if $data{$poster}{count} < 5; |
811 | next if $data{$poster}{count} < 5; |
| 806 | my $name = substr( $poster, 0, 63 ); |
812 | my $name = substr( $poster, 0, 63 ); |
| 807 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
813 | printf "%2d. %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| 808 | $data{$poster}{percent}; |
814 | $data{$poster}{percent}; |
| 809 | last if ( ++$i == $count ); |
815 | last if ( ++$i == $count ); |
| 810 | }
|
816 | }
|
| 811 | print "\n", "=" x 76, "\n"; |
817 | print "\n", "=" x 76, "\n"; |
| 812 | }
|
818 | }
|
| 813 | 819 | ||
| 814 | #####################################
|
820 | #####################################
|
| 815 | ## Show threads by number of articles
|
821 | ## Show threads by number of articles
|
| 816 | #####################################
|
822 | #####################################
|
| 817 | unless ( $skipSec{5} ) |
823 | unless ( $skipSec{5} ) |
| 818 | {
|
824 | {
|
| 819 | if ( keys %threads < $topthreads ) |
825 | if ( keys %threads < $topthreads ) |
| 820 | {
|
826 | {
|
| 821 | $count = keys %threads; |
827 | $count = keys %threads; |
| 822 | }
|
828 | }
|
| 823 | else
|
829 | else
|
| 824 | {
|
830 | {
|
| 825 | $count = $topthreads; |
831 | $count = $topthreads; |
| 826 | }
|
832 | }
|
| 827 | printf "%s\n", |
833 | printf "%s\n", |
| 828 | centred(
|
834 | centred(
|
| 829 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
835 | __x( "Top {count} threads by no. of articles", count => $topthreads ), |
| 830 | 76 ); |
836 | 76 ); |
| 831 | print "=" x 76, "\n"; |
837 | print "=" x 76, "\n"; |
| 832 | my $i = 0; |
838 | my $i = 0; |
| 833 | foreach my $thread ( |
839 | foreach my $thread ( |
| 834 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
840 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
| 835 | keys %threads |
841 | keys %threads |
| 836 | )
|
842 | )
|
| 837 | {
|
843 | {
|
| 838 | my $name = substr( $thread, 0, 65 ); |
844 | my $name = substr( $thread, 0, 65 ); |
| 839 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
845 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 840 | $threads{$thread}{'count'}; |
846 | $threads{$thread}{'count'}; |
| 841 | last if ( ++$i == $count ); |
847 | last if ( ++$i == $count ); |
| 842 | }
|
848 | }
|
| 843 | print "\n", "=" x 76, "\n"; |
849 | print "\n", "=" x 76, "\n"; |
| 844 | }
|
850 | }
|
| 845 | 851 | ||
| 846 | ##############################
|
852 | ##############################
|
| 847 | ## Show threads by size in KiB
|
853 | ## Show threads by size in KiB
|
| 848 | ##############################
|
854 | ##############################
|
| 849 | unless ( $skipSec{6} ) |
855 | unless ( $skipSec{6} ) |
| 850 | {
|
856 | {
|
| 851 | if ( keys %threads < $topthreads ) |
857 | if ( keys %threads < $topthreads ) |
| 852 | {
|
858 | {
|
| 853 | $count = keys %threads; |
859 | $count = keys %threads; |
| 854 | }
|
860 | }
|
| 855 | else
|
861 | else
|
| 856 | {
|
862 | {
|
| 857 | $count = $topthreads; |
863 | $count = $topthreads; |
| 858 | }
|
864 | }
|
| 859 | printf "%s\n", |
865 | printf "%s\n", |
| 860 | centred(
|
866 | centred(
|
| 861 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
867 | __x( "Top {count} threads by size in KiB", count => $topthreads ), 76 ); |
| 862 | print "=" x 76, "\n"; |
868 | print "=" x 76, "\n"; |
| 863 | my $i = 0; |
869 | my $i = 0; |
| 864 | foreach my $thread ( |
870 | foreach my $thread ( |
| 865 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
871 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
| 866 | keys %threads |
872 | keys %threads |
| 867 | )
|
873 | )
|
| 868 | {
|
874 | {
|
| 869 | my $name = substr( $thread, 0, 65 ); |
875 | my $name = substr( $thread, 0, 65 ); |
| 870 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
876 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 871 | $threads{$thread}{'size'} / 1024; #/ |
877 | $threads{$thread}{'size'} / 1024; #/ |
| 872 | last if ( ++$i == $count ); |
878 | last if ( ++$i == $count ); |
| 873 | }
|
879 | }
|
| 874 | print "\n", "=" x 76, "\n"; |
880 | print "\n", "=" x 76, "\n"; |
| 875 | }
|
881 | }
|
| 876 | 882 | ||
| 877 | ##################################
|
883 | ##################################
|
| 878 | ## Show top 10 cross-posted groups
|
884 | ## Show top 10 cross-posted groups
|
| 879 | ##################################
|
885 | ##################################
|
| 880 | unless ( $skipSec{7} ) |
886 | unless ( $skipSec{7} ) |
| 881 | {
|
887 | {
|
| 882 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
888 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
| 883 | if ( keys %crossposts < $topcrossposts ) |
889 | if ( keys %crossposts < $topcrossposts ) |
| 884 | {
|
890 | {
|
| 885 | $count = keys %crossposts; |
891 | $count = keys %crossposts; |
| 886 | }
|
892 | }
|
| 887 | else
|
893 | else
|
| 888 | {
|
894 | {
|
| 889 | $count = $topcrossposts; |
895 | $count = $topcrossposts; |
| 890 | }
|
896 | }
|
| 891 | printf "%s\n", |
897 | printf "%s\n", |
| 892 | centred(
|
898 | centred(
|
| 893 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
899 | __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
| 894 | print "=" x 76, "\n"; |
900 | print "=" x 76, "\n"; |
| 895 | my $i = 0; |
901 | my $i = 0; |
| 896 | foreach
|
902 | foreach
|
| 897 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
903 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
| 898 | {
|
904 | {
|
| 899 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
905 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| 900 | $crossposts{$name}; |
906 | $crossposts{$name}; |
| 901 | last if ( ++$i == $count ); |
907 | last if ( ++$i == $count ); |
| 902 | }
|
908 | }
|
| 903 | print "\n", "=" x 76, "\n"; |
909 | print "\n", "=" x 76, "\n"; |
| 904 | }
|
910 | }
|
| 905 | 911 | ||
| 906 | #########################
|
912 | #########################
|
| 907 | ## Show agents and counts
|
913 | ## Show agents and counts
|
| 908 | #########################
|
914 | #########################
|
| 909 | unless ( $skipSec{8} ) |
915 | unless ( $skipSec{8} ) |
| 910 | {
|
916 | {
|
| 911 | if ( keys %agents < $topagents ) |
917 | if ( keys %agents < $topagents ) |
| 912 | {
|
918 | {
|
| 913 | $count = keys %agents; |
919 | $count = keys %agents; |
| 914 | }
|
920 | }
|
| 915 | else
|
921 | else
|
| 916 | {
|
922 | {
|
| 917 | $count = $topagents; |
923 | $count = $topagents; |
| 918 | }
|
924 | }
|
| 919 | printf "%s\n", |
925 | printf "%s\n", |
| 920 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
926 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
| 921 | 76 ); |
927 | 76 ); |
| 922 | print "=" x 76, "\n"; |
928 | print "=" x 76, "\n"; |
| 923 | my $i = 0; |
929 | my $i = 0; |
| 924 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
930 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
| 925 | {
|
931 | {
|
| 926 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
932 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
| 927 | $agents{$agent}; |
933 | $agents{$agent}; |
| 928 | last if ( ++$i == $count ); |
934 | last if ( ++$i == $count ); |
| 929 | }
|
935 | }
|
| 930 | print "\n", "=" x 76, "\n"; |
936 | print "\n", "=" x 76, "\n"; |
| 931 | }
|
937 | }
|
| 932 | 938 | ||
| 933 | #######################
|
939 | #######################
|
| 934 | ## Show distinct agents
|
940 | ## Show distinct agents
|
| 935 | #######################
|
941 | #######################
|
| 936 | unless ( $skipSec{9} ) |
942 | unless ( $skipSec{9} ) |
| 937 | {
|
943 | {
|
| 938 | if ( keys %distinct_agent < $topagents ) |
944 | if ( keys %distinct_agent < $topagents ) |
| 939 | {
|
945 | {
|
| 940 | $count = keys %distinct_agent; |
946 | $count = keys %distinct_agent; |
| 941 | }
|
947 | }
|
| 942 | else
|
948 | else
|
| 943 | {
|
949 | {
|
| 944 | $count = $topagents; |
950 | $count = $topagents; |
| 945 | }
|
951 | }
|
| 946 | printf "%s\n", |
952 | printf "%s\n", |
| 947 | centred(
|
953 | centred(
|
| 948 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
954 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
| 949 | 76
|
955 | 76
|
| 950 | ); |
956 | ); |
| 951 | print "=" x 76, "\n"; |
957 | print "=" x 76, "\n"; |
| 952 | my $i = 0; |
958 | my $i = 0; |
| 953 | foreach my $agent ( |
959 | foreach my $agent ( |
| 954 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
960 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| 955 | keys %distinct_agent |
961 | keys %distinct_agent |
| 956 | )
|
962 | )
|
| 957 | {
|
963 | {
|
| 958 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
964 | printf "%2d. %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| 959 | $distinct_agent{$agent}, |
965 | $distinct_agent{$agent}, |
| 960 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
966 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
| 961 | last if ( ++$i == $count ); |
967 | last if ( ++$i == $count ); |
| 962 | }
|
968 | }
|
| 963 | print "\n", "=" x 76, "\n"; |
969 | print "\n", "=" x 76, "\n"; |
| 964 | }
|
970 | }
|
| 965 | 971 | ||
| 966 | ############################
|
972 | ############################
|
| 967 | ## Show timezones and counts
|
973 | ## Show timezones and counts
|
| 968 | ############################
|
974 | ############################
|
| 969 | unless ( $skipSec{10} ) |
975 | unless ( $skipSec{10} ) |
| 970 | {
|
976 | {
|
| 971 | if ( keys %tz < $toptz ) |
977 | if ( keys %tz < $toptz ) |
| 972 | {
|
978 | {
|
| 973 | $count = keys %tz; |
979 | $count = keys %tz; |
| 974 | }
|
980 | }
|
| 975 | else
|
981 | else
|
| 976 | {
|
982 | {
|
| 977 | $count = $toptz; |
983 | $count = $toptz; |
| 978 | }
|
984 | }
|
| 979 | printf "%s\n", |
985 | printf "%s\n", |
| 980 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
986 | centred( __x( "Top {count} time zones", count => $toptz ), 76 ); |
| 981 | print "=" x 76, "\n"; |
987 | print "=" x 76, "\n"; |
| 982 | my $i = 0; |
988 | my $i = 0; |
| 983 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
989 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
| 984 | {
|
990 | {
|
| 985 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
991 | printf "%2d. %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
| 986 | last if ( ++$i == $count ); |
992 | last if ( ++$i == $count ); |
| 987 | }
|
993 | }
|
| 988 | print "\n", "=" x 76, "\n"; |
994 | print "\n", "=" x 76, "\n"; |
| 989 | }
|
995 | }
|
| 990 | }
|
996 | }
|
| 991 | 997 | ||
| 992 | ## helper subs
|
998 | ## helper subs
|
| 993 | 999 | ||
| 994 | ###############################
|
1000 | ###############################
|
| 995 | ## Right pad a string with '.'s
|
1001 | ## Right pad a string with '.'s
|
| 996 | ###############################
|
1002 | ###############################
|
| 997 | sub rpad
|
1003 | sub rpad
|
| 998 | {
|
1004 | {
|
| 999 | ## Get text to pad, length to pad, pad chr
|
1005 | ## Get text to pad, length to pad, pad chr
|
| 1000 | my ( $text, $pad_len, $pad_chr ) = @_; |
1006 | my ( $text, $pad_len, $pad_chr ) = @_; |
| 1001 | 1007 | ||
| 1002 | ## DEBUG
|
1008 | ## DEBUG
|
| 1003 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
1009 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
| 1004 | 1010 | ||
| 1005 | if ( length($text) > $pad_len ) |
1011 | if ( length($text) > $pad_len ) |
| 1006 | {
|
1012 | {
|
| 1007 | $text = substr( $text, 0, $pad_len ); |
1013 | $text = substr( $text, 0, $pad_len ); |
| 1008 | }
|
1014 | }
|
| 1009 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
1015 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
| 1010 | return $padded; |
1016 | return $padded; |
| 1011 | }
|
1017 | }
|
| 1012 | 1018 | ||
| 1013 | ##################
|
1019 | ##################
|
| 1014 | ## Centre a string
|
1020 | ## Centre a string
|
| 1015 | ##################
|
1021 | ##################
|
| 1016 | sub centred
|
1022 | sub centred
|
| 1017 | {
|
1023 | {
|
| 1018 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
1024 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
| 1019 | my $pad_len = ( $width - length($text) ) / 2; #/ |
1025 | my $pad_len = ( $width - length($text) ) / 2; #/ |
| 1020 | my $centred = " " x $pad_len . $text; |
1026 | my $centred = " " x $pad_len . $text; |
| 1021 | return $centred; |
1027 | return $centred; |
| 1022 | }
|
1028 | }
|
| 1023 | 1029 | ||
| 1024 | ###########################
|
1030 | ###########################
|
| 1025 | ## Put commas into a number
|
1031 | ## Put commas into a number
|
| 1026 | ###########################
|
1032 | ###########################
|
| 1027 | sub commify
|
1033 | sub commify
|
| 1028 | {
|
1034 | {
|
| 1029 | local $_ = shift; |
1035 | local $_ = shift; |
| 1030 | my $number = $_; |
1036 | my $number = $_; |
| 1031 | $_ = int; # Chop non-integer part |
1037 | $_ = int; # Chop non-integer part |
| 1032 | 1 while |
1038 | 1 while |
| 1033 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
1039 | s/([-+]?\d)(\d{$grouping[0]}($|\Q$thousands_sep\E))/$1$thousands_sep$2/; |
| 1034 | my $int_part = $_; |
1040 | my $int_part = $_; |
| 1035 | my $real_part = ''; |
1041 | my $real_part = ''; |
| 1036 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
1042 | if ( $number =~ /(\Q$decimal_point\E\d+)$/ ) |
| 1037 | {
|
1043 | {
|
| 1038 | $real_part = $1; |
1044 | $real_part = $1; |
| 1039 | }
|
1045 | }
|
| 1040 | return $int_part . $real_part; |
1046 | return $int_part . $real_part; |
| 1041 | }
|
1047 | }
|
| 1042 | 1048 | ||
| 1043 | ################################################################
|
1049 | ################################################################
|
| 1044 | ## Returns a string with leading and trailing whitespace removed
|
1050 | ## Returns a string with leading and trailing whitespace removed
|
| 1045 | ################################################################
|
1051 | ################################################################
|
| 1046 | sub clean
|
1052 | sub clean
|
| 1047 | {
|
1053 | {
|
| 1048 | my $dirty = shift; |
1054 | my $dirty = shift; |
| 1049 | my $clean = $dirty; |
1055 | my $clean = $dirty; |
| 1050 | $clean =~ s/^\s+|\s+$//g; |
1056 | $clean =~ s/^\s+|\s+$//g; |
| 1051 | 1057 | ||
| 1052 | return $clean; |
1058 | return $clean; |
| 1053 | }
|
1059 | }
|
| 1054 | 1060 | ||
| 1055 | sub usage
|
1061 | sub usage
|
| 1056 | {
|
1062 | {
|
| 1057 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
1063 | println( __ "usage: newsstat.pl NEWS.GROUP" ); |
| 1058 | exit 1; |
1064 | exit 1; |
| 1059 | }
|
1065 | }
|
| 1060 | 1066 | ||
| 1061 | sub dmsg
|
1067 | sub dmsg
|
| 1062 | {
|
1068 | {
|
| 1063 | print STDERR @_, "\n"; |
1069 | print STDERR @_, "\n"; |
| 1064 | }
|
1070 | }
|
| 1065 | 1071 | ||
| 1066 | sub dmsg2
|
1072 | sub dmsg2
|
| 1067 | {
|
1073 | {
|
| 1068 | my ( $level, @msg ) = @_; |
1074 | my ( $level, @msg ) = @_; |
| 1069 | print STDERR @msg, "\n" if $level >= DEBUG; |
1075 | print STDERR @msg, "\n" if $level >= DEBUG; |
| 1070 | }
|
1076 | }
|
| 1071 | 1077 | ||
| 1072 | sub println
|
1078 | sub println
|
| 1073 | {
|
1079 | {
|
| 1074 | print @_, "\n"; |
1080 | print @_, "\n"; |
| 1075 | }
|
1081 | }
|
| 1076 | 1082 | ||