Rev 14 | Rev 23 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 14 | Rev 22 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | #!/usr/bin/env perl
|
1 | #!/usr/bin/env perl
|
2 | use strict; |
2 | use strict; |
3 | use warnings; |
3 | use warnings; |
- | 4 | ||
4 | use diagnostics; |
5 | #use diagnostics;
|
5 | use utf8; |
6 | use utf8; |
6 | use Encode; |
7 | use Encode; |
7 | 8 | ||
- | 9 | use constant DEBUG => 0; |
|
- | 10 | ||
8 | ## Print out all text to STDOUT UTF-8 encoded
|
11 | ## Print out all text to STDOUT UTF-8 encoded
|
9 | binmode STDOUT, ':encoding(UTF-8)'; |
12 | binmode STDOUT, ':encoding(UTF-8)'; |
- | 13 | binmode STDERR, ':encoding(UTF-8)'; |
|
10 | 14 | ||
11 | ##############################
|
- | |
12 | ## newsstat.pl version 0.4.3.1
|
- | |
13 | - | ||
14 | ###########################################################################
|
- | |
15 | ## Collect statistics about a newsgroup (specified by first argument)
|
- | |
16 | ## in the local news spool. Check all articles in the last 30-day period.
|
- | |
17 | ## Rank posters by number of posts and by volume of posts, report on top
|
- | |
18 | ## and bottom 20 posters. Show their name, number of posts, size of posts,
|
- | |
19 | ## percentage of quoted lines. Rank user-agents used, by poster rather
|
- | |
20 | ## than by post. Rank top 20 threads. Rank top 10 cross-posted groups.
|
- | |
21 | ##
|
- | |
22 | ## Numbers and paths can be configured below. See ChangeLog and TODO
|
15 | # FIXME: Automatically include resolved '.' in @INC
|
23 | ## for more. -- PE
|
16 | # print join "\n", @INC;
|
24 | ###########################################################################
|
- | |
25 | 17 | ||
- | 18 | use locale ':not_characters'; |
|
- | 19 | use Locale::TextDomain ('de.pointedears.newsstat'); |
|
- | 20 | use POSIX ('locale_h'); |
|
- | 21 | use Locale::Messages qw (bind_textdomain_filter |
|
- | 22 | bind_textdomain_codeset |
|
- | 23 | turn_utf_8_on); |
|
- | 24 | #setlocale( LC_MESSAGES, '' );
|
|
- | 25 | bind_textdomain_filter 'de.pointedears.newsstat', \&turn_utf_8_on; |
|
- | 26 | bind_textdomain_codeset 'de.pointedears.newsstat', 'utf-8'; |
|
- | 27 | ||
- | 28 | require Mail::Message; |
|
- | 29 | require DateTime; |
|
- | 30 | require DateTime::Format::Mail; |
|
- | 31 | ||
26 | ###################### USER CONFIGURATIONS ############################
|
32 | ###################### USER CONFIGURATIONS ############################
|
27 | 33 | ||
28 | ## The name of the group to do stats for
|
34 | ## The name of the group to do stats for
|
29 | my $newsgroup_name = $ARGV[0]; |
35 | my $newsgroup_name = $ARGV[0]; |
30 | $newsgroup_name or &usage; |
36 | $newsgroup_name or usage(); |
31 | 37 | ||
32 | ## Check for removal flags
|
38 | ## Check for removal flags
|
33 | my $ix; |
39 | my $ix; |
34 | my $j; |
40 | my $j; |
35 | my %skipSec; |
41 | my %skipSec; |
Line 53... | Line 59... | ||
53 | }
|
59 | }
|
54 | 60 | ||
55 | ## Leafnode users will want /var/spool/news for this variable.
|
61 | ## Leafnode users will want /var/spool/news for this variable.
|
56 | my $news = "/var/spool/news/"; |
62 | my $news = "/var/spool/news/"; |
57 | 63 | ||
58 | ## How many days are we doing statistics for?
|
- | |
59 | my $numdays = 30; |
- | |
60 | - | ||
61 | ## Number of agents we list
|
64 | ## Number of top or bottom posters to show
|
62 | my $topagents = 10; |
65 | my $topposters = 20; |
63 | 66 | ||
64 | ## Number of threads we want to know about
|
67 | ## Number of threads we want to know about
|
65 | my $topthreads = 20; |
68 | my $topthreads = 20; |
66 | 69 | ||
67 | ## Number of top or bottom posters to show
|
- | |
68 | my $topposters = 20; |
- | |
69 | - | ||
70 | ## Number of cross-posted threads to show
|
70 | ## Number of cross-posted threads to show
|
71 | my $topcrossposts = 10; |
71 | my $topcrossposts = 10; |
72 | 72 | ||
- | 73 | ## Number of agents we list
|
|
- | 74 | my $topagents = 10; |
|
- | 75 | ||
73 | ## Number of time zones to show
|
76 | ## Number of time zones to show
|
74 | my $toptz = 10; |
77 | my $toptz = 10; |
75 | 78 | ||
76 | ###################### DATA STRUCTURES ######################
|
79 | ###################### DATA STRUCTURES ######################
|
77 | my $group = $newsgroup_name; |
80 | my $group = $newsgroup_name; |
78 | $group =~ s!\.!/!g; |
81 | $group =~ s!\.!/!g; |
79 | my %data; # name, count, agent, total, orig, quoted |
82 | my %data; # name, count, agent, total, orig, quoted |
80 | my %threads; # subject, count |
83 | my $totsize = 0; # holds total sizes of all files |
81 | my %crossposts; # group, count |
84 | my %crossposts; # group, count |
82 | my %tz; # timezones by count |
85 | my %threads; # subject, count |
83 | my %headers; # holds header of current article |
- | |
84 | my %lcheader; # holds lowercase headers |
- | |
85 | my @body; # holds body of current article |
86 | my $replies = 0; # total no. of replies |
86 | my @sig; # holds sig text; |
- | |
87 | my $totalposts; # total no. of posts considered |
87 | my $origposts = 0; # total no. of original posts |
88 | my $filename; # name of current article file |
- | |
89 | my $filesize; # size of current article file |
88 | my %tz; # timezones by count |
90 | my $earliest; # earliest article we have found |
89 | my $earliest; # earliest article we have found |
91 | my $latest; # latest article we have found |
90 | my $latest; # latest article we have found |
92 | my $poster; # poster we are dealing with |
- | |
93 | my $totsize = 0; # holds total sizes of all files |
- | |
94 | my $totheader = 0; # total size of header material |
91 | my $totheader = 0; # total size of header material |
95 | my $totbody = 0; # total size of body material |
92 | my $totbody = 0; # total size of body material |
96 | my $totsig = 0; # total size of sig material |
93 | my $totsig = 0; # total size of sig material |
97 | my $totorig = 0; # total size of original material |
- | |
98 | my $totquoted = 0; # total size of quoted material |
94 | my $totquoted = 0; # total size of quoted material |
99 | my $origposts = 0; # total no. of original posts |
95 | my $totorig = 0; # total size of original material |
100 | my $replies = 0; # total no. of replies |
96 | my $totalposts; # total no. of posts considered |
101 | my $i; # general purpose |
- | |
102 | my %distinct_agent; |
97 | my %distinct_agent; |
103 | 98 | ||
104 | ## Used to hold counts of User Agents used
|
99 | ## Used to hold counts of User Agents used
|
105 | my %agents = ( |
100 | my %agents = ( |
106 | "Compuserver" => 0, |
101 | "Compuserver" => 0, |
Line 122... | Line 117... | ||
122 | "slrn" => 0, |
117 | "slrn" => 0, |
123 | "Sylpheed" => 0, |
118 | "Sylpheed" => 0, |
124 | "tin" => 0, |
119 | "tin" => 0, |
125 | "VSoup" => 0, |
120 | "VSoup" => 0, |
126 | "WebTV" => 0, |
121 | "WebTV" => 0, |
127 | "Xnews" => 0 |
122 | "Xnews" => 0, |
128 | ); |
123 | ); |
129 | 124 | ||
- | 125 | my $datetime_parser = DateTime::Format::Mail->new(); |
|
- | 126 | $datetime_parser->loose(); |
|
- | 127 | ||
- | 128 | my $today = DateTime->today( time_zone => 'UTC' ); |
|
- | 129 | my $prev_month = $today->clone()->subtract( months => 1 )->set_day(1); |
|
- | 130 | my $start = int $prev_month->strftime('%s'); |
|
- | 131 | my $numdays = int DateTime->last_day_of_month( |
|
- | 132 | year => $prev_month->year(), |
|
- | 133 | month => $prev_month->month(), |
|
- | 134 | time_zone => $prev_month->time_zone(), |
|
- | 135 | )->day(); |
|
130 | ######################## MAIN CODE ########################
|
136 | my $end = int $today->clone()->set_day(1)->strftime('%s'); |
- | 137 | ||
- | 138 | dmsg( $start, " to ", $end ) if DEBUG; |
|
- | 139 | ||
- | 140 | chdir("$news$group") |
|
- | 141 | or die __x( |
|
- | 142 | "Can't cd to {newsgroup}: {error}\n", |
|
- | 143 | newsgroup => "$news$group", |
|
- | 144 | error => $! |
|
- | 145 | ); |
|
- | 146 | opendir( DIR, "." ) |
|
- | 147 | or die __x( |
|
- | 148 | "Can't open {newsgroup}: {error}\n", |
|
- | 149 | newsgroup => "$news$group", |
|
- | 150 | error => $! |
|
131 | $! = 1; |
151 | ); |
- | 152 | ||
- | 153 | while ( defined( my $filename = readdir(DIR) ) ) |
|
- | 154 | {
|
|
- | 155 | next unless -f $filename; # only want real files |
|
- | 156 | next if ( $filename eq ".overview" ); # real articles only |
|
132 | 157 | ||
133 | chdir("$news$group") or die "Can't cd to $news$group: $!\n"; |
- | |
134 | opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n"; |
- | |
135 | while ( defined( $filename = readdir(DIR) ) ) |
- | |
136 | {
|
- | |
137 | %lcheader = (); |
- | |
138 | next unless -f $filename; # only want real files |
- | |
139 | next if ( $filename eq ".overview" ); # real articles only |
- | |
140 | next if ( -M $filename > $numdays ); # only want articles <= a certain age |
- | |
141 | $earliest = ( stat $filename )[9] unless defined($earliest); |
- | |
142 | $latest = ( stat $filename )[9] unless defined($latest); |
- | |
143 | &get_article($filename); # read in the article |
158 | get_article($filename); # read in the article |
144 | &get_data; # grab the data from the article |
- | |
145 | $totalposts++; # bump count of articles considered |
- | |
146 | }
|
159 | }
|
147 | closedir(DIR); # finished with the directory |
160 | closedir(DIR); # finished with the directory |
148 | 161 | ||
149 | ## Post-processing
|
- | |
150 | &count_agents; # count agents, collapsing versions |
162 | dmsg("\nearliest: $earliest\nlatest: $latest") if DEBUG; |
151 | &fix_percent; # check percentages orig/total for posters |
- | |
152 | 163 | ||
- | 164 | ## Post-processing
|
|
- | 165 | count_agents(); # count agents, collapsing versions |
|
153 | &write_data; |
166 | fix_percent(); |
154 | 167 | ||
155 | #################### DISPLAY RESULTS #####################
|
- | |
156 | print "=" x 76, "\n"; |
- | |
157 | printf "%s\n", ¢red( "Analysis of posts to $newsgroup_name", 76 ); |
- | |
158 | print "=" x 76, "\n"; |
- | |
159 | printf "%s\n", |
- | |
160 | ¢red( "(stats compiled with a script by Garry Knight et al.)", 76 ); |
- | |
161 | print "\n\n"; |
168 | write_data(); |
162 | printf "Total posts considered: %s over %d days\n", commify($totalposts), |
- | |
163 | $numdays; |
- | |
164 | printf "Earliest article: %s\n", scalar localtime($earliest); |
- | |
165 | printf "Latest article: %s\n", scalar localtime($latest); |
- | |
166 | printf "Original articles: %s, replies: %s\n", commify($origposts), |
- | |
167 | commify($replies); |
169 | display_results(); |
168 | printf "Total size of posts: %s bytes (%s KiB) (%.2f MiB)\n", commify($totsize), |
- | |
169 | commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
- | |
170 | printf "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
- | |
171 | commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
- | |
172 | commify( int( $totsize / $totalposts ) ); |
- | |
173 | my $count = keys %data; |
- | |
174 | printf "Total headers: %s KiB bodies: %s KiB\n", |
- | |
175 | commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
- | |
176 | printf "Body text - quoted: %s KiB, original: %s KiB = %02.2f%%, sigs: %s KiB\n", |
- | |
177 | commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
- | |
178 | ( $totorig * 100 ) / ( $totorig + $totquoted ), |
- | |
179 | commify( int( $totsig / 1024 ) ); |
- | |
180 | printf "Total number of posters: %s, average %s bytes per poster\n", |
- | |
181 | commify($count), commify( int( $totsize / $count ) ); #/ |
- | |
182 | $count = keys %threads; |
- | |
183 | printf "Total number of threads: %s, average %s bytes per thread\n", |
- | |
184 | commify($count), commify( int( $totsize / $count ) ); #/ |
- | |
185 | printf "Total number of user agents: %d\n", scalar keys %agents; |
- | |
186 | print "\n", "=" x 76, "\n"; |
- | |
187 | 170 | ||
188 | ########################################
|
171 | ########################################
|
189 | ## Show posters by article count Sec 1;
|
172 | ## Get current article's header and body
|
190 | ########################################
|
173 | ########################################
|
191 | unless ( $skipSec{1} ) |
174 | sub get_article
|
192 | {
|
175 | {
|
193 | if ( keys %data < $topposters ) |
- | |
194 | {
|
- | |
195 | $count = keys %data; |
176 | my $filename = shift; |
196 | }
|
- | |
197 | else
|
- | |
198 | {
|
- | |
199 | $count = $topposters; |
- | |
200 | }
|
- | |
201 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
- | |
202 | print "=" x 76, "\n"; |
- | |
203 | $i = 0; |
- | |
204 | foreach
|
- | |
205 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
- | |
206 | {
|
- | |
207 | my $name = substr( $poster, 0, 65 ); |
- | |
208 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
- | |
209 | $data{$poster}{count}; |
- | |
210 | last if ( ++$i == $count ); |
- | |
211 | }
|
- | |
212 | print "\n", "=" x 76, "\n"; |
- | |
213 | }
|
- | |
214 | 177 | ||
215 | ######################################
|
- | |
216 | ## Show posters by size in KiB Sec 2;
|
- | |
217 | ######################################
|
- | |
218 | unless ( $skipSec{2} ) |
- | |
219 | {
|
- | |
220 | if ( keys %data < $topposters ) |
178 | open( my $FILE, '<', $filename ) |
221 | {
|
- | |
222 | $count = keys %data; |
- | |
223 | }
|
- | |
224 | else
|
179 | or
|
225 | {
|
- | |
226 | $count = $topposters; |
- | |
227 | }
|
- | |
228 | printf "%s\n", ¢red( "Top $count posters by article size in KiB", 76 ); |
180 | die __x( "Can't open {file}: {error}\n", file => $filename, error => $! ); |
229 | print "=" x 76, "\n"; |
- | |
230 | $i = 0; |
- | |
231 | foreach my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
- | |
232 | {
|
- | |
233 | my $name = substr( $poster, 0, 62 ); |
181 | my $msg = Mail::Message->read($FILE); |
234 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
- | |
235 | $data{$poster}{size} / 1024; #/ |
182 | my $timestamp = $msg->timestamp(); |
236 | last if ( ++$i == $count ); |
183 | my $date = $msg->study('Date'); |
237 | }
|
- | |
238 | print "\n", "=" x 76, "\n"; |
- | |
239 | }
|
- | |
240 | 184 | ||
241 | #####################################
|
- | |
242 | ## Show top posters for original text
|
185 | ## Disregard article if timestamp is not in range
|
243 | #####################################
|
- | |
244 | unless ( $skipSec{3} ) |
186 | dmsg($timestamp) if DEBUG; |
245 | {
|
- | |
246 | if ( keys %data < $topposters ) |
187 | if ( $timestamp < $start or $timestamp >= $end ) |
247 | {
|
188 | {
|
- | 189 | dmsg("Posting on $date ignored.") if DEBUG; |
|
248 | $count = keys %data; |
190 | return; |
249 | }
|
191 | }
|
250 | else
|
- | |
251 | {
|
- | |
252 | $count = $topposters; |
- | |
253 | }
|
- | |
254 | printf "%s\n", |
- | |
255 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
- | |
256 | print "=" x 76, "\n"; |
- | |
257 | $i = 0; |
- | |
258 | foreach my $poster ( |
- | |
259 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
- | |
260 | keys %data |
- | |
261 | )
|
- | |
262 | {
|
- | |
263 | next if $data{$poster}{quoted} == 0; |
- | |
264 | next if $data{$poster}{count} < 5; |
- | |
265 | my $name = substr( $poster, 0, 63 ); |
- | |
266 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
- | |
267 | $data{$poster}{percent}; |
- | |
268 | last if ( ++$i == $count ); |
- | |
269 | }
|
- | |
270 | print "\n", "=" x 76, "\n"; |
- | |
271 | }
|
- | |
272 | 192 | ||
273 | ########################################
|
- | |
274 | ## Show bottom posters for original text
|
- | |
275 | ########################################
|
- | |
276 | unless ( $skipSec{4} ) |
- | |
277 | {
|
- | |
278 | if ( keys %data < $topposters ) |
- | |
279 | {
|
- | |
280 | $count = keys %data; |
- | |
281 | }
|
- | |
282 | else
|
- | |
283 | {
|
- | |
284 | $count = $topposters; |
- | |
285 | }
|
- | |
286 | printf "%s\n", |
- | |
287 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
- | |
288 | print "=" x 76, "\n"; |
- | |
289 | $i = 0; |
- | |
290 | foreach my $poster ( |
- | |
291 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
193 | $totalposts++; # bump count of articles considered |
292 | keys %data |
- | |
293 | )
|
- | |
294 | {
|
- | |
295 | next if $data{$poster}{quoted} == 0; |
- | |
296 | next if $data{$poster}{count} < 5; |
- | |
297 | my $name = substr( $poster, 0, 63 ); |
- | |
298 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
- | |
299 | $data{$poster}{percent}; |
- | |
300 | last if ( ++$i == $count ); |
- | |
301 | }
|
- | |
302 | print "\n", "=" x 76, "\n"; |
- | |
303 | }
|
- | |
304 | 194 | ||
305 | #####################################
|
- | |
306 | ## Show threads by number of articles
|
- | |
307 | #####################################
|
- | |
308 | unless ( $skipSec{5} ) |
- | |
309 | {
|
- | |
310 | if ( keys %threads < $topthreads ) |
- | |
311 | {
|
- | |
312 | $count = keys %threads; |
- | |
313 | }
|
- | |
314 | else
|
- | |
315 | {
|
- | |
316 | $count = $topthreads; |
- | |
317 | }
|
- | |
318 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
- | |
319 | print "=" x 76, "\n"; |
- | |
320 | $i = 0; |
195 | ## DEBUG
|
321 | foreach my $thread ( |
196 | dmsg($date) if DEBUG; |
322 | sort { $threads{$b}{count} <=> $threads{$a}{count} } |
- | |
323 | keys %threads |
- | |
324 | )
|
- | |
325 | {
|
- | |
326 | my $name = substr( $thread, 0, 65 ); |
- | |
327 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
- | |
328 | $threads{$thread}{count}; |
- | |
329 | last if ( ++$i == $count ); |
- | |
330 | }
|
- | |
331 | print "\n", "=" x 76, "\n"; |
- | |
332 | }
|
- | |
333 | 197 | ||
334 | ##############################
|
- | |
335 | ## Show threads by size in KiB
|
198 | ## get stats about the file itself
|
336 | ##############################
|
- | |
337 | unless ( $skipSec{6} ) |
- | |
338 | {
|
- | |
339 | if ( keys %threads < $topthreads ) |
- | |
340 | {
|
- | |
341 | $count = keys %threads; |
- | |
342 | }
|
- | |
343 | else
|
- | |
344 | {
|
- | |
345 | $count = $topthreads; |
- | |
346 | }
|
- | |
347 | printf "%s\n", ¢red( "Top $count threads by size in KiB", 76 ); |
- | |
348 | print "=" x 76, "\n"; |
- | |
349 | $i = 0; |
- | |
350 | foreach my $thread ( |
- | |
351 | sort { $threads{$b}{size} <=> $threads{$a}{size} } |
199 | my $filesize = -s $filename; # get total size of file |
352 | keys %threads |
- | |
353 | )
|
- | |
354 | {
|
- | |
355 | my $name = substr( $thread, 0, 65 ); |
- | |
356 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
200 | $totsize += $filesize; # bump total sizes of all files |
357 | $threads{$thread}{size} / 1024; #/ |
- | |
358 | last if ( ++$i == $count ); |
- | |
359 | }
|
- | |
360 | print "\n", "=" x 76, "\n"; |
- | |
361 | }
|
- | |
362 | 201 | ||
363 | ##################################
|
- | |
364 | ## Show top 10 cross-posted groups
|
- | |
365 | ##################################
|
- | |
366 | unless ( $skipSec{7} ) |
- | |
367 | {
|
- | |
368 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
- | |
369 | if ( keys %crossposts < $topcrossposts ) |
202 | if ( ( not defined $earliest ) or $timestamp < $earliest ) |
370 | {
|
203 | {
|
371 | $count = keys %crossposts; |
204 | $earliest = $timestamp; |
372 | }
|
205 | }
|
373 | else
|
206 | elsif ( ( not defined $latest ) or $timestamp > $latest ) |
374 | {
|
207 | {
|
375 | $count = $topcrossposts; |
208 | $latest = $timestamp; |
376 | }
|
209 | }
|
377 | printf "%s\n", ¢red( "Top $count cross-posted groups", 76 ); |
- | |
378 | print "=" x 76, "\n"; |
- | |
379 | $i = 0; |
- | |
380 | foreach
|
- | |
381 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
- | |
382 | {
|
- | |
383 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
- | |
384 | $crossposts{$name}; |
- | |
385 | last if ( ++$i == $count ); |
- | |
386 | }
|
- | |
387 | print "\n", "=" x 76, "\n"; |
- | |
388 | }
|
- | |
389 | 210 | ||
390 | #########################
|
- | |
391 | ## Show agents and counts
|
- | |
392 | #########################
|
- | |
393 | unless ( $skipSec{8} ) |
- | |
394 | {
|
- | |
395 | if ( keys %agents < $topagents ) |
- | |
396 | {
|
- | |
397 | $count = keys %agents; |
- | |
398 | }
|
- | |
399 | else
|
- | |
400 | {
|
- | |
401 | $count = $topagents; |
- | |
402 | }
|
- | |
403 | printf "%s\n", ¢red( "Top $count User Agents by poster", 76 ); |
- | |
404 | print "=" x 76, "\n"; |
- | |
405 | $i = 0; |
- | |
406 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
- | |
407 | {
|
- | |
408 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
- | |
409 | $agents{$agent}; |
- | |
410 | last if ( ++$i == $count ); |
- | |
411 | }
|
- | |
412 | print "\n", "=" x 76, "\n"; |
211 | #print "timestamp: $timestamp\n";
|
413 | }
|
- | |
414 | 212 | ||
415 | #######################
|
- | |
416 | ## Show distinct agents
|
- | |
417 | #######################
|
- | |
418 | unless ( $skipSec{9} ) |
- | |
419 | {
|
- | |
420 | if ( keys %distinct_agent < $topagents ) |
- | |
421 | {
|
- | |
422 | $count = keys %distinct_agent; |
- | |
423 | }
|
- | |
424 | else
|
- | |
425 | {
|
- | |
426 | $count = $topagents; |
213 | ## count header size
|
427 | }
|
- | |
428 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
- | |
429 | print "=" x 76, "\n"; |
- | |
430 | $i = 0; |
- | |
431 | foreach my $agent ( |
- | |
432 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
- | |
433 | keys %distinct_agent |
- | |
434 | )
|
- | |
435 | {
|
- | |
436 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
- | |
437 | $distinct_agent{$agent}, |
- | |
438 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
- | |
439 | last if ( ++$i == $count ); |
214 | $totheader += $msg->head()->size(); |
440 | }
|
- | |
441 | print "\n", "=" x 76, "\n"; |
- | |
442 | }
|
- | |
443 | 215 | ||
444 | ############################
|
- | |
445 | ## Show timezones and counts
|
216 | ## get the poster's name (MIME-decoded, in UTF-8)
|
446 | ############################
|
- | |
447 | unless ( $skipSec{10} ) |
217 | my $poster = $msg->study('From'); |
448 | {
|
- | |
449 | if ( keys %tz < $toptz ) |
218 | if ( defined $poster ) |
450 | {
|
219 | {
|
451 | $count = keys %tz; |
220 | ## Convert old to new format
|
452 | }
|
- | |
453 | else
|
- | |
454 | {
|
- | |
455 | $count = $toptz; |
- | |
456 | }
|
- | |
457 | printf "%s\n", ¢red( "Top 10 time zones", 76 ); |
221 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
458 | print "=" x 76, "\n"; |
- | |
459 | $i = 0; |
- | |
460 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
- | |
461 | {
|
- | |
462 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
- | |
463 | last if ( ++$i == $count ); |
- | |
464 | }
|
- | |
465 | print "\n", "=" x 76, "\n"; |
- | |
466 | }
|
- | |
467 | 222 | ||
468 | ################################ SUBROUTINES ################################
|
- | |
469 | - | ||
470 | ########################################
|
- | |
471 | ## Get current article's header and body
|
223 | ## Collapse whitespace
|
472 | ########################################
|
- | |
473 | sub get_article
|
- | |
474 | {
|
- | |
475 | %headers = (); # dump old headers |
224 | $poster =~ s/\s+/ /g; |
476 | my $filename = shift; # get the name of the file |
- | |
477 | 225 | ||
478 | ## get stats about the file itself
|
- | |
479 | $filesize = -s $filename; # get total size of file |
226 | ## Remove outer quotes; TODO: observe RFC 5322 strictly
|
480 | $totsize += $filesize; # bump total sizes of all files |
227 | $poster =~ s/^ " (.+ ) " \s+ (.*)/$1 $2/x; |
481 | 228 | ||
482 | my $mtime = ( stat $filename )[9]; |
- | |
483 | if ( $mtime < $earliest ) |
- | |
484 | {
|
- | |
485 | $earliest = $mtime; |
229 | ## DEBUG
|
486 | }
|
- | |
487 | elsif ( $mtime > $latest ) |
- | |
488 | {
|
- | |
489 | $latest = $mtime; |
230 | dmsg($poster) if DEBUG; |
490 | }
|
- | |
491 | 231 | ||
492 | ## now read the file
|
232 | ## seen this one before?
|
493 | open( my $FILE, '<', $filename ) or die "Can't open $filename: $!\n"; |
- | |
494 | while (<$FILE>) |
- | |
495 | {
|
- | |
496 | $totheader += length($_); # bump total header size |
- | |
497 | last if (/^\s*$/); # end of header? |
- | |
498 | if (/^([^:\s]*):\s*(.*)/) |
233 | if ( !defined( $data{$poster} ) ) |
499 | {
|
234 | {
|
500 | my ( $key, $val ) = ( $1, $2 ); |
235 | $data{$poster}{'agent'} = __ 'unknown'; # comes after For: field |
501 | $headers{$key} = decode( 'MIME-Header', $val ); |
236 | $data{$poster}{'orig'} = 0; |
502 | $lcheader{ clean( lc($key) ) } = clean($val); |
237 | $data{$poster}{'quoted'} = 0; |
503 | }
|
238 | }
|
504 | }
|
- | |
505 | @body = <$FILE>; # slurp up body |
239 | $data{$poster}{'count'}++; # bump count for this poster |
506 | close($FILE); |
- | |
507 | } # get_article |
240 | $data{$poster}{'size'} += $filesize; # total size of file |
508 | 241 | ||
509 | ####################################
|
242 | ## The User-Agent and/or X-Newsreader fields
|
510 | ## Get data from the current article
|
243 | ## for User-Agent by poster
|
511 | ####################################
|
244 | my $ua = $msg->study('User-Agent') or $msg->study('X-Newsreader'); |
512 | sub get_data
|
245 | if ( defined $ua ) |
513 | {
|
246 | {
|
514 | #### First, analyse header fields ####
|
247 | $data{$poster}{'agent'} = $ua; |
515 | 248 | ||
516 | ## Set up this poster if not defined, get counts, sizes
|
- | |
517 | my $poster = $headers{From}; # get the poster's name |
- | |
518 | - | ||
519 | # Convert old to new format
|
- | |
520 | $poster =~ s/^\s*(.+?\@.+?)\s*\((.+?)\)\s*$/$2 <$1>/; |
- | |
521 | - | ||
522 | # Collapse whitespace
|
- | |
523 | $poster =~ s/\s+/ /; |
- | |
524 | - | ||
525 | # Remove outer quotes
|
249 | ## DEBUG
|
526 | $poster =~ s/^["'](.+?)["']\s+(.*)/$1 $2/; |
- | |
527 | - | ||
528 | if ( !defined( $data{$poster} ) ) |
- | |
529 | { # seen this one before? |
- | |
530 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
- | |
531 | $data{$poster}{orig} = 0; |
250 | dmsg($ua) if DEBUG; |
532 | $data{$poster}{quoted} = 0; |
- | |
533 | }
|
251 | }
|
534 | $data{$poster}{count}++; # bump count for this poster |
- | |
535 | $data{$poster}{size} += $filesize; # total size of file |
- | |
536 | 252 | ||
537 | ## The User-Agent and/or X-Newsreader fields
|
253 | ## The User Agent for User-Agent by number of posts
|
538 | ## for User-Agent by poster
|
254 | get_agent($msg); |
539 | if ( defined $lcheader{"user-agent"} ) |
- | |
540 | {
|
- | |
541 | $data{$poster}{agent} = $lcheader{"user-agent"}; |
- | |
542 | }
|
- | |
543 | if ( defined $lcheader{"x-newsreader"} ) |
- | |
544 | {
|
- | |
545 | $data{$poster}{agent} = $lcheader{"x-newsreader"}; |
- | |
546 | }
|
- | |
547 | 255 | ||
548 | ## The User Agent for User-Agent by number of posts
|
256 | ## Get all cross-posted newsgroups
|
549 | my $UA = "unknown"; |
- | |
550 | foreach my $keys ( keys %lcheader ) |
- | |
551 | {
|
- | |
552 | if ( defined $lcheader{'user-agent'} ) |
257 | for ( split( /,/, $msg->study('Newsgroups') ) ) |
553 | {
|
258 | {
|
554 | $UA = $lcheader{'user-agent'}; |
259 | $crossposts{$_}++; # bump count for each |
555 | }
|
260 | }
|
- | 261 | ||
- | 262 | ## Get threads
|
|
- | 263 | my $thread = $msg->study('Subject'); |
|
- | 264 | $thread =~ s/^re:\s+//i; # Remove Re: or re: at start |
|
- | 265 | $thread =~ s/\s+/ /g; # collapse whitespace |
|
- | 266 | $threads{$thread}{'count'}++; # bump count of this subject |
|
- | 267 | $threads{$thread}{'size'} += $filesize; # bump bytes for this thread |
|
- | 268 | ||
- | 269 | ## Is this an original post or a reply?
|
|
556 | elsif ( defined $lcheader{"x-newsreader"} ) |
270 | if ( defined $msg->study('References') ) |
557 | {
|
271 | {
|
558 | $UA = $lcheader{"x-newsreader"}; |
272 | $replies++; |
559 | }
|
273 | }
|
560 | elsif ( defined $lcheader{'x-mailer'} ) |
274 | else
|
561 | {
|
275 | {
|
562 | $UA = $lcheader{'x-mailer'}; |
276 | $origposts++; |
563 | }
|
277 | }
|
- | 278 | ||
- | 279 | ## Get the time zone
|
|
- | 280 | my $datetime = $datetime_parser->parse_datetime($date); |
|
- | 281 | my $tz = $datetime->strftime('%z'); |
|
- | 282 | $tz = "UTC" if $tz =~ m{^(?:GMT|0000)$}o; |
|
- | 283 | $tz{$tz}++; |
|
- | 284 | ||
- | 285 | ## DEBUG
|
|
- | 286 | dmsg($tz) if DEBUG; |
|
- | 287 | ||
- | 288 | #### Now analyse the body text ####
|
|
- | 289 | my $body = $msg->body(); |
|
- | 290 | ||
- | 291 | my $insig = 0; |
|
- | 292 | my @body = $body->lines; |
|
- | 293 | for (@body) |
|
- | 294 | {
|
|
- | 295 | $totbody += length($_); # bump total body size |
|
- | 296 | next if (m{^$>}o); # don't count blank lines in body |
|
- | 297 | if ( $insig == 1 ) |
|
- | 298 | {
|
|
- | 299 | ||
- | 300 | # bump total sig size
|
|
- | 301 | $totsig += length($_); |
|
564 | elsif ( |
302 | }
|
- | 303 | ## are we in a quote line?
|
|
- | 304 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
|
565 | ( defined $lcheader{'organization'} ) |
305 | elsif ( m{^\s*[>\]]}o or m{^\s*::}o ) |
- | 306 | {
|
|
566 | && ( $lcheader{'organization'} =~ |
307 | ## bump count of quoted chrs
|
567 | /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
308 | $data{$poster}{'quoted'} += length($_); |
- | 309 | $totquoted += length($_); |
|
- | 310 | }
|
|
- | 311 | elsif (/^-- $/) |
|
- | 312 | {
|
|
- | 313 | $insig = 1; |
|
568 | )
|
314 | }
|
- | 315 | else
|
|
569 | {
|
316 | {
|
- | 317 | ## We must be processing an original line
|
|
- | 318 | $data{$poster}{'orig'} += length($_); # bump count of original chrs |
|
570 | $UA = $lcheader{'organization'}; |
319 | $totorig += length($_); |
- | 320 | }
|
|
571 | }
|
321 | }
|
572 | elsif ( $lcheader{'message-id'} =~ /pine/i ) |
- | |
573 | {
|
322 | |
574 | $UA = "Pine"; |
323 | # end for (@body)
|
575 | } ## Hopefully found UA, else set to unknown |
- | |
576 | }
|
324 | }
|
577 | 325 | ||
578 | $UA = clean($UA); |
326 | close($FILE); |
579 | $UA = get_agent($UA); |
- | |
- | 327 | }
|
|
580 | 328 | ||
581 | sub get_agent
|
329 | sub get_agent
|
582 | {
|
330 | {
|
583 | my $raw = shift; |
331 | my $msg = shift; |
584 | my $agent = $raw; |
- | |
585 | 332 | ||
- | 333 | my $ua = |
|
- | 334 | $msg->study('User-Agent') |
|
- | 335 | or $msg->study('X-Newsreader') |
|
- | 336 | or $msg->study('X-Mailer'); |
|
586 | ## strip http
|
337 | if ( not defined $ua ) |
- | 338 | {
|
|
- | 339 | my $org = $msg->study('Organization'); |
|
587 | if ( $raw =~ /.*http.*/ ) |
340 | if ( defined $org |
- | 341 | and $org =~ /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
|
588 | {
|
342 | {
|
589 | $raw =~ s!posted via!!i; |
- | |
590 | $raw =~ s!http://!!g; |
- | |
591 | $raw =~ s!/!!g; |
- | |
592 | $raw =~ s! !!g; |
343 | $ua = $org; |
593 | }
|
344 | }
|
594 | - | ||
595 | ## Fix Outlook from Mac
|
- | |
596 | if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; } |
345 | elsif ( $msg->study('Message-ID') =~ /pine/i ) |
597 | - | ||
598 | ## Pick out the popular agents
|
- | |
599 | if ( |
- | |
600 | $raw =~ /(outlook express)/i |
- | |
601 | || $raw =~ /(windows mail)/i |
- | |
602 | || $raw =~ /(microplanet gravity)/i |
- | |
603 | || $raw =~ /(news rover)/i |
- | |
604 | || $raw =~ /(forte agent)/i |
- | |
605 | || $raw =~ /(forte free agent)/i |
- | |
606 | )
|
- | |
607 | {
|
346 | {
|
608 | $agent = $1; |
347 | $ua = "Pine"; |
609 | }
|
348 | }
|
- | 349 | }
|
|
- | 350 | ||
- | 351 | ## Hopefully found UA, else set to unknown
|
|
- | 352 | if ( not defined $ua ) |
|
- | 353 | {
|
|
- | 354 | $ua = __ "unknown"; |
|
- | 355 | }
|
|
- | 356 | ||
- | 357 | $ua = clean($ua); |
|
- | 358 | ||
- | 359 | my $raw = $ua; |
|
- | 360 | my $agent = $raw; |
|
- | 361 | ||
- | 362 | ## strip http
|
|
- | 363 | if ( $raw =~ /.*http.*/ ) |
|
- | 364 | {
|
|
- | 365 | $raw =~ s!posted via!!i; |
|
- | 366 | $raw =~ s!http://!!g; |
|
- | 367 | $raw =~ s!/!!g; |
|
- | 368 | $raw =~ s! !!g; |
|
- | 369 | }
|
|
- | 370 | ||
- | 371 | ## Fix Outlook from Mac
|
|
- | 372 | if ( $raw =~ /^microsoft/i ) |
|
- | 373 | {
|
|
- | 374 | $raw =~ s/-/ /g; |
|
- | 375 | }
|
|
- | 376 | ||
- | 377 | ## Pick out the popular agents
|
|
- | 378 | if ( $raw =~ /(outlook express)/i |
|
- | 379 | || $raw =~ /(windows mail)/i |
|
- | 380 | || $raw =~ /(microplanet gravity)/i |
|
- | 381 | || $raw =~ /(news rover)/i |
|
- | 382 | || $raw =~ /(forte agent)/i |
|
- | 383 | || $raw =~ /(forte free agent)/i ) |
|
- | 384 | {
|
|
- | 385 | $agent = $1; |
|
- | 386 | }
|
|
610 | elsif ( |
387 | elsif ( |
611 | $raw =~ /^( |
388 | $raw =~ /^( |
612 | pan |
389 | pan |
613 | |sylpheed
|
390 | |sylpheed
|
614 | |slrn
|
391 | |slrn
|
615 | |mozilla
|
392 | |mozilla
|
616 | |knode
|
393 | |knode
|
Line 635... | Line 412... | ||
635 | |rn
|
412 | |rn
|
636 | |007
|
413 | |007
|
637 | |webtv
|
414 | |webtv
|
638 | |compuserve
|
415 | |compuserve
|
639 | )/ix |
416 | )/ix |
640 | )
|
417 | )
|
- | 418 | {
|
|
- | 419 | $agent = $1; |
|
- | 420 | }
|
|
- | 421 | else
|
|
- | 422 | {
|
|
- | 423 | ## Clean up unknown agents
|
|
- | 424 | if ( $raw =~ m!^(.*?)/! ) |
|
641 | {
|
425 | {
|
642 | $agent = $1; |
426 | $agent = $1; |
643 | }
|
427 | }
|
644 | else
|
428 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
645 | {
|
429 | {
|
646 | ## Clean up unknown agents
|
430 | $agent = $1; |
647 | if ( $raw =~ m!^(.*?)/! ) |
- | |
648 | {
|
431 | }
|
- | 432 | }
|
|
- | 433 | ||
- | 434 | $distinct_agent{$agent}++; |
|
649 | $agent = $1; |
435 | return $agent; |
- | 436 | }
|
|
- | 437 | ## get_agent
|
|
- | 438 | ||
- | 439 | #########################################
|
|
- | 440 | ## Count the User-Agents used, collapsing
|
|
- | 441 | ## different versions into one per agent.
|
|
- | 442 | #########################################
|
|
- | 443 | sub count_agents
|
|
- | 444 | {
|
|
- | 445 | POSTER:
|
|
- | 446 | foreach my $poster ( keys %data ) |
|
650 | }
|
447 | {
|
- | 448 | foreach my $agent_name ( keys %distinct_agent ) |
|
651 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
449 | { # check against known ones |
- | 450 | if ( $data{$poster}{'agent'} =~ /\Q$agent_name\E/ ) |
|
652 | {
|
451 | {
|
- | 452 | $agents{$agent_name}++; |
|
653 | $agent = $1; |
453 | next POSTER; |
654 | }
|
454 | }
|
655 | }
|
455 | }
|
- | 456 | $agents{ $data{$poster}{'agent'} }++; |
|
- | 457 | }
|
|
- | 458 | } # count_agents |
|
656 | 459 | ||
- | 460 | #############################################
|
|
- | 461 | ## Set orig/total percentages for all posters
|
|
- | 462 | #############################################
|
|
- | 463 | sub fix_percent
|
|
- | 464 | {
|
|
- | 465 | foreach my $poster ( keys %data ) |
|
- | 466 | {
|
|
657 | $distinct_agent{$agent}++; |
467 | my $percent = 100; |
- | 468 | if ( ( $data{$poster}{'orig'} != 0 ) and ( $data{$poster}{'quoted'} != 0 ) ) |
|
- | 469 | {
|
|
658 | return $agent; |
470 | $percent = |
- | 471 | $data{$poster}{'orig'} * 100 / |
|
- | 472 | ( $data{$poster}{'quoted'} + $data{$poster}{'orig'} ); #/ |
|
- | 473 | }
|
|
- | 474 | elsif ( $data{$poster}{'orig'} == 0 ) |
|
- | 475 | {
|
|
- | 476 | $percent = 0; |
|
- | 477 | }
|
|
- | 478 | $data{$poster}{'percent'} = $percent; |
|
659 | }
|
479 | }
|
- | 480 | }
|
|
- | 481 | ## fix_percent
|
|
660 | 482 | ||
- | 483 | ##################################
|
|
- | 484 | ## Write data structures to a file
|
|
- | 485 | ##################################
|
|
- | 486 | sub write_data
|
|
- | 487 | {
|
|
- | 488 | open( my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" ) |
|
- | 489 | or die __x( "Can't create XDATA: {error}\n", error => $! ); |
|
- | 490 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
|
- | 491 | print $OUTF |
|
- | 492 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
|
- | 493 | foreach my $name ( keys %data ) |
|
- | 494 | {
|
|
- | 495 | print $OUTF |
|
- | 496 | "$name : $data{$name}{'agent'} : $data{$name}{'count'} : $data{$name}{'size'} : $data{$name}{'orig'} : $data{$name}{'quoted'} : $data{$name}{'percent'}\n"; |
|
- | 497 | }
|
|
- | 498 | print $OUTF |
|
- | 499 | "============================================================================\n"; |
|
- | 500 | print $OUTF "Thread subjects\n"; |
|
- | 501 | print $OUTF |
|
- | 502 | "----------------------------------------------------------------------------\n"; |
|
- | 503 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
|
- | 504 | {
|
|
- | 505 | print $OUTF |
|
- | 506 | "$thread : $threads{$thread}{'count'} : $threads{$thread}{'size'}\n"; |
|
- | 507 | }
|
|
- | 508 | print $OUTF |
|
- | 509 | "============================================================================\n"; |
|
661 | ## Get all cross-posted newsgroups
|
510 | print $OUTF "Cross-posts\n"; |
- | 511 | print $OUTF |
|
- | 512 | "----------------------------------------------------------------------------\n"; |
|
662 | for ( split /,/, $headers{"Newsgroups"} ) |
513 | foreach my $name ( sort keys %crossposts ) |
- | 514 | {
|
|
- | 515 | print $OUTF "$name : $crossposts{$name}\n"; |
|
- | 516 | }
|
|
- | 517 | print $OUTF |
|
- | 518 | "============================================================================\n"; |
|
- | 519 | print $OUTF "User agents\n"; |
|
- | 520 | print $OUTF |
|
- | 521 | "----------------------------------------------------------------------------\n"; |
|
- | 522 | foreach my $name ( sort keys %agents ) |
|
- | 523 | {
|
|
- | 524 | print $OUTF "$name : $agents{$name}\n"; |
|
- | 525 | }
|
|
- | 526 | print $OUTF |
|
- | 527 | "============================================================================\n"; |
|
- | 528 | print $OUTF "Time zones\n"; |
|
- | 529 | print $OUTF |
|
- | 530 | "----------------------------------------------------------------------------\n"; |
|
- | 531 | foreach my $name ( sort keys %tz ) |
|
663 | {
|
532 | {
|
664 | $crossposts{$_}++; # bump count for each |
533 | print $OUTF "$name : $tz{$name}\n"; |
665 | }
|
534 | }
|
- | 535 | close $OUTF; |
|
- | 536 | } # write_data |
|
666 | 537 | ||
- | 538 | sub display_results
|
|
- | 539 | {
|
|
- | 540 | #################### DISPLAY RESULTS #####################
|
|
- | 541 | print "=" x 76, "\n"; |
|
- | 542 | printf "%s\n", |
|
667 | ## Get threads
|
543 | centred(
|
- | 544 | __x( "Analysis of posts to {newsgroup}", newsgroup => $newsgroup_name ), |
|
- | 545 | 76 ); |
|
- | 546 | print "=" x 76, "\n"; |
|
- | 547 | printf "%s\n", |
|
- | 548 | centred(
|
|
- | 549 | __ |
|
- | 550 | "(compiled with a script by Thomas 'PointedEars' Lahn, based on work by\nGarry Knight et al.)", |
|
- | 551 | 76
|
|
- | 552 | ); |
|
- | 553 | print "\n\n"; |
|
- | 554 | printf __"Total posts considered: %s over %d days" . "\n", |
|
- | 555 | commify($totalposts), |
|
- | 556 | $numdays; |
|
668 | my $thread = $headers{"Subject"}; |
557 | my $time_locale = setlocale(LC_TIME); |
669 | $thread =~ s/^re: //i; # Remove Re: or re: at start |
558 | my $earliest_datetime = DateTime->from_epoch( |
- | 559 | epoch => $earliest, |
|
- | 560 | locale => $time_locale, |
|
- | 561 | time_zone => 'UTC', |
|
- | 562 | ); |
|
- | 563 | my $latest_datetime = DateTime->from_epoch( |
|
- | 564 | epoch => $latest, |
|
- | 565 | locale => $time_locale, |
|
- | 566 | time_zone => 'UTC', |
|
- | 567 | ); |
|
- | 568 | my $datetime_format = '%a, %Y-%m-%dT%H:%M:%S %Z'; |
|
- | 569 | printf __"Earliest article" . ": %s\n", $earliest_datetime->strftime($datetime_format); |
|
- | 570 | printf __"Latest article" . ": %s\n", $latest_datetime->strftime($datetime_format); |
|
- | 571 | printf __"Original articles: %s; replies" . ": %s\n", |
|
- | 572 | commify($origposts), |
|
- | 573 | commify($replies); |
|
- | 574 | printf __"Total size of posts: %s bytes (%s KiB) (%.2f MiB)" . "\n", |
|
- | 575 | commify($totsize), commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
|
- | 576 | printf __ |
|
- | 577 | "Average %s articles per day, %.2f MiB per day, %s bytes per article\n", |
|
- | 578 | commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
|
- | 579 | commify( int( $totsize / $totalposts ) ); |
|
- | 580 | my $count = keys %data; |
|
- | 581 | printf __"Total headers: %s KiB; bodies: %s KiB\n", |
|
- | 582 | commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
|
- | 583 | printf __ |
|
- | 584 | "Body text - quoted: %s KiB; original: %s KiB = %02.2f%%; sigs: %s KiB\n", |
|
- | 585 | commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
|
670 | $thread =~ s/\s+/ /g; # collapse whitespace |
586 | ( $totorig * 100 ) / ( $totorig + $totquoted ), |
- | 587 | commify( int( $totsig / 1024 ) ); |
|
- | 588 | printf __"Total number of posters: %s, average %s bytes per poster\n", |
|
671 | $threads{$thread}{count} += 1; # bump count of this subject |
589 | commify($count), commify( int( $totsize / $count ) ); #/ |
- | 590 | $count = keys %threads; |
|
672 | $threads{$thread}{size} += $filesize; # bump bytes for this thread |
591 | printf __"Total number of threads: %s, average %s bytes per thread\n", |
- | 592 | commify($count), commify( int( $totsize / $count ) ); #/ |
|
- | 593 | printf __"Total number of user agents: %d\n", scalar keys %agents; |
|
- | 594 | print "\n", "=" x 76, "\n"; |
|
673 | 595 | ||
- | 596 | ########################################
|
|
674 | ## Is this an original post or a reply?
|
597 | ## Show posters by article count Sec 1;
|
- | 598 | ########################################
|
|
675 | if ( defined $headers{"References"} ) |
599 | unless ( $skipSec{1} ) |
676 | {
|
600 | {
|
- | 601 | if ( keys %data < $topposters ) |
|
- | 602 | {
|
|
- | 603 | $count = keys %data; |
|
- | 604 | }
|
|
- | 605 | else
|
|
- | 606 | {
|
|
- | 607 | $count = $topposters; |
|
- | 608 | }
|
|
- | 609 | printf "%s\n", |
|
- | 610 | centred(
|
|
- | 611 | __x( "Top {count} posters by number of articles", count => $topposters ), 76 ); |
|
- | 612 | print "=" x 76, "\n"; |
|
677 | $replies++; |
613 | my $i = 0; |
- | 614 | foreach
|
|
- | 615 | my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
|
- | 616 | {
|
|
- | 617 | my $name = substr( $poster, 0, 65 ); |
|
- | 618 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
|
- | 619 | $data{$poster}{count}; |
|
- | 620 | last if ( ++$i == $count ); |
|
- | 621 | }
|
|
- | 622 | print "\n", "=" x 76, "\n"; |
|
678 | }
|
623 | }
|
- | 624 | ||
- | 625 | ######################################
|
|
- | 626 | ## Show posters by size in KiB Sec 2;
|
|
- | 627 | ######################################
|
|
679 | else
|
628 | unless ( $skipSec{2} ) |
680 | {
|
629 | {
|
- | 630 | if ( keys %data < $topposters ) |
|
- | 631 | {
|
|
- | 632 | $count = keys %data; |
|
- | 633 | }
|
|
- | 634 | else
|
|
- | 635 | {
|
|
- | 636 | $count = $topposters; |
|
- | 637 | }
|
|
681 | $origposts++; |
638 | printf "%s\n", |
- | 639 | centred(
|
|
- | 640 | __x( "Top {count} posters by article size in KiB", count => $topposters ), |
|
- | 641 | 76 ); |
|
- | 642 | print "=" x 76, "\n"; |
|
- | 643 | my $i = 0; |
|
- | 644 | foreach
|
|
- | 645 | my $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
|
- | 646 | {
|
|
- | 647 | my $name = substr( $poster, 0, 62 ); |
|
- | 648 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
|
- | 649 | $data{$poster}{size} / 1024; #/ |
|
- | 650 | last if ( ++$i == $count ); |
|
- | 651 | }
|
|
- | 652 | print "\n", "=" x 76, "\n"; |
|
682 | }
|
653 | }
|
683 | 654 | ||
684 | ## Get the time zone
|
655 | #####################################
|
685 | $_ = $headers{"Date"}; |
656 | ## Show top posters for original text
|
686 | my ($tz) = /\d\d:\d\d(?::\d\d)?\s+(.*)/; |
657 | #####################################
|
687 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
658 | unless ( $skipSec{3} ) |
688 | {
|
659 | {
|
- | 660 | if ( keys %data < $topposters ) |
|
- | 661 | {
|
|
- | 662 | $count = keys %data; |
|
- | 663 | }
|
|
- | 664 | else
|
|
- | 665 | {
|
|
- | 666 | $count = $topposters; |
|
- | 667 | }
|
|
689 | $tz = "UTC"; |
668 | printf "%s\n", |
- | 669 | centred(
|
|
- | 670 | __x(
|
|
- | 671 | "Top {count} responders by original text (> 5 posts)", |
|
- | 672 | count => $topposters |
|
- | 673 | ), |
|
- | 674 | 76
|
|
- | 675 | ); |
|
- | 676 | print "=" x 76, "\n"; |
|
- | 677 | my $i = 0; |
|
- | 678 | foreach my $poster ( |
|
- | 679 | sort { $data{$b}{percent} <=> $data{$a}{percent} } |
|
- | 680 | keys %data |
|
- | 681 | )
|
|
- | 682 | {
|
|
- | 683 | next if $data{$poster}{quoted} == 0; |
|
- | 684 | next if $data{$poster}{count} < 5; |
|
- | 685 | my $name = substr( $poster, 0, 63 ); |
|
- | 686 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
|
- | 687 | $data{$poster}{percent}; |
|
- | 688 | last if ( ++$i == $count ); |
|
- | 689 | }
|
|
- | 690 | print "\n", "=" x 76, "\n"; |
|
690 | }
|
691 | }
|
691 | $tz{$tz}++; |
- | |
692 | 692 | ||
693 | #### Now analyse the body text ####
|
693 | ########################################
|
694 | my $insig = 0; |
694 | ## Show bottom posters for original text
|
- | 695 | ########################################
|
|
695 | for (@body) |
696 | unless ( $skipSec{4} ) |
696 | {
|
697 | {
|
697 | $totbody += length($_); # bump total body size |
- | |
698 | next if (/^$>/); # don't count blank lines in body |
- | |
699 | if ( $insig == 1 ) |
698 | if ( keys %data < $topposters ) |
700 | {
|
699 | {
|
- | 700 | $count = keys %data; |
|
- | 701 | }
|
|
- | 702 | else
|
|
- | 703 | {
|
|
- | 704 | $count = $topposters; |
|
- | 705 | }
|
|
- | 706 | printf "%s\n", |
|
- | 707 | centred(
|
|
- | 708 | __x(
|
|
- | 709 | "Bottom {count} responders by original text (> 5 posts)", |
|
- | 710 | count => $topposters |
|
- | 711 | ), |
|
- | 712 | 76
|
|
- | 713 | ); |
|
- | 714 | print "=" x 76, "\n"; |
|
- | 715 | my $i = 0; |
|
- | 716 | foreach my $poster ( |
|
- | 717 | sort { $data{$a}{percent} <=> $data{$b}{percent} } |
|
- | 718 | keys %data |
|
- | 719 | )
|
|
- | 720 | {
|
|
701 | $totsig += length($_); # bump total sig size |
721 | next if $data{$poster}{quoted} == 0; |
- | 722 | next if $data{$poster}{count} < 5; |
|
- | 723 | my $name = substr( $poster, 0, 63 ); |
|
- | 724 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
|
- | 725 | $data{$poster}{percent}; |
|
- | 726 | last if ( ++$i == $count ); |
|
- | 727 | }
|
|
- | 728 | print "\n", "=" x 76, "\n"; |
|
- | 729 | }
|
|
702 | 730 | ||
- | 731 | #####################################
|
|
703 | ## Bill Unruh uses ] quotes, and another poster uses ::
|
732 | ## Show threads by number of articles
|
- | 733 | #####################################
|
|
- | 734 | unless ( $skipSec{5} ) |
|
- | 735 | {
|
|
- | 736 | if ( keys %threads < $topthreads ) |
|
- | 737 | {
|
|
- | 738 | $count = keys %threads; |
|
704 | }
|
739 | }
|
705 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
740 | else
|
706 | { # are we in a quote line? |
741 | {
|
707 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
- | |
708 | $totquoted += length($_); |
742 | $count = $topthreads; |
709 | }
|
743 | }
|
710 | elsif (/-- /) |
744 | printf "%s\n", |
- | 745 | centred( __x( "Top {count} threads by no. of articles", count => $topthreads ), |
|
- | 746 | 76 ); |
|
- | 747 | print "=" x 76, "\n"; |
|
- | 748 | my $i = 0; |
|
- | 749 | foreach my $thread ( |
|
- | 750 | sort { $threads{$b}{'count'} <=> $threads{$a}{'count'} } |
|
- | 751 | keys %threads |
|
- | 752 | )
|
|
711 | {
|
753 | {
|
- | 754 | my $name = substr( $thread, 0, 65 ); |
|
- | 755 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
|
- | 756 | $threads{$thread}{'count'}; |
|
- | 757 | last if ( ++$i == $count ); |
|
- | 758 | }
|
|
- | 759 | print "\n", "=" x 76, "\n"; |
|
- | 760 | }
|
|
- | 761 | ||
- | 762 | ##############################
|
|
- | 763 | ## Show threads by size in KiB
|
|
- | 764 | ##############################
|
|
- | 765 | unless ( $skipSec{6} ) |
|
- | 766 | {
|
|
- | 767 | if ( keys %threads < $topthreads ) |
|
- | 768 | {
|
|
712 | $insig = 1; |
769 | $count = keys %threads; |
713 | }
|
770 | }
|
714 | else
|
771 | else
|
715 | {
|
772 | {
|
- | 773 | $count = $topthreads; |
|
- | 774 | }
|
|
- | 775 | printf "%s\n", |
|
- | 776 | centred( __x( "Top {count} threads by size in KiB", count => $topthreads ), |
|
- | 777 | 76 ); |
|
- | 778 | print "=" x 76, "\n"; |
|
- | 779 | my $i = 0; |
|
- | 780 | foreach my $thread ( |
|
- | 781 | sort { $threads{$b}{'size'} <=> $threads{$a}{'size'} } |
|
- | 782 | keys %threads |
|
- | 783 | )
|
|
- | 784 | {
|
|
- | 785 | my $name = substr( $thread, 0, 65 ); |
|
- | 786 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
|
- | 787 | $threads{$thread}{'size'} / 1024; #/ |
|
- | 788 | last if ( ++$i == $count ); |
|
- | 789 | }
|
|
- | 790 | print "\n", "=" x 76, "\n"; |
|
- | 791 | }
|
|
716 | 792 | ||
- | 793 | ##################################
|
|
- | 794 | ## Show top 10 cross-posted groups
|
|
- | 795 | ##################################
|
|
- | 796 | unless ( $skipSec{7} ) |
|
- | 797 | {
|
|
- | 798 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
|
717 | ## We must be processing an original line
|
799 | if ( keys %crossposts < $topcrossposts ) |
- | 800 | {
|
|
- | 801 | $count = keys %crossposts; |
|
- | 802 | }
|
|
- | 803 | else
|
|
- | 804 | {
|
|
- | 805 | $count = $topcrossposts; |
|
- | 806 | }
|
|
- | 807 | printf "%s\n", |
|
718 | $data{$poster}{orig} += length($_); # bump count of original chrs |
808 | centred( __x( "Top {count} cross-posted groups", count => $topcrossposts ), 76 ); |
- | 809 | print "=" x 76, "\n"; |
|
- | 810 | my $i = 0; |
|
- | 811 | foreach
|
|
- | 812 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
|
- | 813 | {
|
|
- | 814 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
|
- | 815 | $crossposts{$name}; |
|
719 | $totorig += length($_); |
816 | last if ( ++$i == $count ); |
720 | }
|
817 | }
|
721 | } # end for (@body) |
818 | print "\n", "=" x 76, "\n"; |
- | 819 | }
|
|
722 | 820 | ||
- | 821 | #########################
|
|
- | 822 | ## Show agents and counts
|
|
- | 823 | #########################
|
|
- | 824 | unless ( $skipSec{8} ) |
|
- | 825 | {
|
|
- | 826 | if ( keys %agents < $topagents ) |
|
- | 827 | {
|
|
- | 828 | $count = keys %agents; |
|
- | 829 | }
|
|
- | 830 | else
|
|
- | 831 | {
|
|
- | 832 | $count = $topagents; |
|
- | 833 | }
|
|
- | 834 | printf "%s\n", |
|
- | 835 | centred( __x( "Top {count} user agents by poster", count => $topagents ), |
|
- | 836 | 76 ); |
|
- | 837 | print "=" x 76, "\n"; |
|
- | 838 | my $i = 0; |
|
- | 839 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
|
- | 840 | {
|
|
- | 841 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
|
723 | } # get_data |
842 | $agents{$agent}; |
- | 843 | last if ( ++$i == $count ); |
|
- | 844 | }
|
|
- | 845 | print "\n", "=" x 76, "\n"; |
|
- | 846 | }
|
|
724 | 847 | ||
725 | #########################################
|
848 | #######################
|
726 | ## Count the User-Agents used, collapsing
|
- | |
727 | ## different versions into one per agent.
|
849 | ## Show distinct agents
|
728 | #########################################
|
850 | #######################
|
729 | sub count_agents
|
- | |
730 | {
|
- | |
731 | POSTER:
|
- | |
732 | foreach my $poster ( keys %data ) |
851 | unless ( $skipSec{9} ) |
733 | {
|
852 | {
|
734 | foreach my $agent_name ( keys %distinct_agent ) |
- | |
735 | { # check against known ones |
- | |
736 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
853 | if ( keys %distinct_agent < $topagents ) |
737 | {
|
854 | {
|
738 | $agents{$agent_name}++; |
855 | $count = keys %distinct_agent; |
739 | next POSTER; |
- | |
740 | }
|
- | |
741 | }
|
856 | }
|
- | 857 | else
|
|
- | 858 | {
|
|
- | 859 | $count = $topagents; |
|
- | 860 | }
|
|
- | 861 | printf "%s\n", |
|
- | 862 | centred(
|
|
- | 863 | __x( "Top {count} user agents by number of posts", count => $topagents ), |
|
- | 864 | 76 ); |
|
- | 865 | print "=" x 76, "\n"; |
|
- | 866 | my $i = 0; |
|
- | 867 | foreach my $agent ( |
|
- | 868 | sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
|
- | 869 | keys %distinct_agent |
|
- | 870 | )
|
|
- | 871 | {
|
|
- | 872 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
|
742 | $agents{ $data{$poster}{agent} }++; |
873 | $distinct_agent{$agent}, |
- | 874 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
|
- | 875 | last if ( ++$i == $count ); |
|
- | 876 | }
|
|
- | 877 | print "\n", "=" x 76, "\n"; |
|
743 | }
|
878 | }
|
744 | } # count_agents |
- | |
745 | 879 | ||
746 | #############################################
|
880 | ############################
|
747 | ## Set orig/total percentages for all posters
|
881 | ## Show timezones and counts
|
748 | #############################################
|
882 | ############################
|
749 | sub fix_percent
|
- | |
750 | {
|
- | |
751 | foreach my $poster ( keys %data ) |
883 | unless ( $skipSec{10} ) |
752 | {
|
884 | {
|
753 | my $percent = 100; |
885 | if ( keys %tz < $toptz ) |
754 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
- | |
755 | {
|
886 | {
|
756 | $percent = |
887 | $count = keys %tz; |
757 | $data{$poster}{orig} * 100 / |
- | |
758 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
- | |
759 | }
|
888 | }
|
760 | elsif ( $data{$poster}{orig} == 0 ) |
889 | else
|
761 | {
|
890 | {
|
762 | $percent = 0; |
891 | $count = $toptz; |
763 | }
|
892 | }
|
- | 893 | printf "%s\n", centred( __x("Top {count} time zones", count => $toptz), 76 ); |
|
- | 894 | print "=" x 76, "\n"; |
|
- | 895 | my $i = 0; |
|
- | 896 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
|
- | 897 | {
|
|
- | 898 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
|
764 | $data{$poster}{percent} = $percent; |
899 | last if ( ++$i == $count ); |
- | 900 | }
|
|
- | 901 | print "\n", "=" x 76, "\n"; |
|
765 | }
|
902 | }
|
766 | }
|
903 | }
|
767 | 904 | ||
- | 905 | ## helper subs
|
|
- | 906 | ||
768 | ###############################
|
907 | ###############################
|
769 | ## Right pad a string with '.'s
|
908 | ## Right pad a string with '.'s
|
770 | ###############################
|
909 | ###############################
|
771 | sub rpad
|
910 | sub rpad
|
772 | {
|
911 | {
|
773 | ## Get text to pad, length to pad, pad chr
|
912 | ## Get text to pad, length to pad, pad chr
|
774 | my ( $text, $pad_len, $pad_chr ) = @_; |
913 | my ( $text, $pad_len, $pad_chr ) = @_; |
775 | 914 | ||
776 | ## DEBUG
|
915 | ## DEBUG
|
777 | #printf "|%s| = %d\n", $text, length($text);
|
916 | printf( "|%s| = %d\n", $text, length($text) ) if DEBUG > 1; |
778 | 917 | ||
779 | if ( length($text) > $pad_len ) |
918 | if ( length($text) > $pad_len ) |
780 | {
|
919 | {
|
781 | $text = substr( $text, 0, $pad_len ); |
920 | $text = substr( $text, 0, $pad_len ); |
782 | }
|
921 | }
|
Line 796... | Line 935... | ||
796 | }
|
935 | }
|
797 | 936 | ||
798 | ###########################
|
937 | ###########################
|
799 | ## Put commas into a number
|
938 | ## Put commas into a number
|
800 | ###########################
|
939 | ###########################
|
- | 940 | # Get some of locale's numeric formatting parameters
|
|
- | 941 | my ($thousands_sep, $grouping) = |
|
- | 942 | @{localeconv()}{'thousands_sep', 'grouping'}; |
|
- | 943 | # Apply defaults if values are missing
|
|
- | 944 | $thousands_sep = ',' unless $thousands_sep; |
|
- | 945 | # grouping and mon_grouping are packed lists
|
|
- | 946 | # of small integers (characters) telling the
|
|
- | 947 | # grouping (thousand_seps and mon_thousand_seps
|
|
- | 948 | # being the group dividers) of numbers and
|
|
- | 949 | # monetary quantities. The integers' meanings:
|
|
- | 950 | # 255 means no more grouping, 0 means repeat
|
|
- | 951 | # the previous grouping, 1-254 means use that
|
|
- | 952 | # as the current grouping. Grouping goes from
|
|
- | 953 | # right to left (low to high digits). In the
|
|
- | 954 | # below we cheat slightly by never using anything
|
|
- | 955 | # else than the first grouping (whatever that is).
|
|
- | 956 | my @grouping; |
|
- | 957 | if ($grouping) { |
|
- | 958 | @grouping = unpack("C*", $grouping); |
|
- | 959 | } else { |
|
- | 960 | @grouping = (3); |
|
- | 961 | }
|
|
- | 962 | ||
801 | sub commify
|
963 | sub commify
|
802 | {
|
964 | {
|
803 | local $_ = shift; |
965 | local $_ = shift; |
804 | 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; |
966 | #1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
|
- | 967 | $_ = int; # Chop non-integer part |
|
- | 968 | 1 while |
|
- | 969 | s/(\d)(\d{$grouping[0]}($|$thousands_sep))/$1$thousands_sep$2/; |
|
805 | return $_; |
970 | return $_; |
806 | }
|
971 | }
|
807 | 972 | ||
808 | ################################################################
|
973 | ################################################################
|
809 | ## Returns a string with leading and trailing whitespace removed
|
974 | ## Returns a string with leading and trailing whitespace removed
|
Line 817... | Line 982... | ||
817 | return $clean; |
982 | return $clean; |
818 | }
|
983 | }
|
819 | 984 | ||
820 | sub usage
|
985 | sub usage
|
821 | {
|
986 | {
|
822 | print "usage: newstat.pl newsgroupname\n"; |
987 | print __"usage: newsstat.pl NEWS.GROUP\n"; |
823 | exit 1; |
988 | exit 1; |
824 | }
|
989 | }
|
825 | 990 | ||
826 | ##################################
|
- | |
827 | ## Write data structures to a file
|
- | |
828 | ##################################
|
- | |
829 | sub write_data
|
991 | sub dmsg
|
830 | {
|
992 | {
|
831 | open my $OUTF, ">:encoding(UTF-8)", "/tmp/XDATA" |
- | |
832 | or die "Can't create XDATA: $!\n"; |
- | |
833 | print $OUTF "Data collected from $newsgroup_name\n\n"; |
- | |
834 | print $OUTF |
- | |
835 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
- | |
836 | foreach my $name ( keys %data ) |
- | |
837 | {
|
- | |
838 | print $OUTF |
- | |
839 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
- | |
840 | }
|
- | |
841 | print $OUTF |
- | |
842 | "============================================================================\n"; |
- | |
843 | print $OUTF "Thread subjects\n"; |
993 | print STDERR @_, "\n"; |
844 | print $OUTF |
- | |
845 | "----------------------------------------------------------------------------\n"; |
- | |
846 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
- | |
847 | {
|
- | |
848 | print $OUTF |
- | |
849 | "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
- | |
850 | }
|
994 | }
|
851 | print $OUTF |
- | |
852 | "============================================================================\n"; |
- | |
853 | print $OUTF "Cross-posts\n"; |
- | |
854 | print $OUTF |
- | |
855 | "----------------------------------------------------------------------------\n"; |
- | |
856 | foreach my $name ( sort keys %crossposts ) |
- | |
857 | {
|
- | |
858 | print $OUTF "$name : $crossposts{$name}\n"; |
- | |
859 | }
|
995 | |
860 | print $OUTF |
- | |
861 | "============================================================================\n"; |
- | |
862 | print $OUTF "User agents\n"; |
- | |
863 | print $OUTF |
996 | sub dmsg2
|
864 | "----------------------------------------------------------------------------\n"; |
- | |
865 | foreach my $name ( sort keys %agents ) |
- | |
866 | {
|
997 | {
|
867 | print $OUTF "$name : $agents{$name}\n"; |
- | |
868 | }
|
- | |
869 | print $OUTF |
- | |
870 | "============================================================================\n"; |
- | |
871 | print $OUTF "Time zones\n"; |
998 | my ( $level, @msg ) = @_; |
872 | print $OUTF |
- | |
873 | "----------------------------------------------------------------------------\n"; |
- | |
874 | foreach my $name ( sort keys %tz ) |
- | |
875 | {
|
- | |
876 | print $OUTF "$name : $tz{$name}\n"; |
999 | print STDERR @msg, "\n" if $level >= DEBUG; |
877 | }
|
1000 | }
|
878 | close $OUTF; |
- | |
879 | } # write_data |
- |