Rev 6 | Rev 8 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
| Rev 6 | Rev 7 | ||
|---|---|---|---|
| Line 1... | Line 1... | ||
| 1 | #!/usr/bin/perl -w
|
1 | #!/usr/bin/perl
|
| 2 | use strict; |
2 | use strict; |
| 3 | use warnings; |
3 | use warnings; |
| 4 | use utf8; |
4 | use utf8; |
| 5 | use Encode; |
5 | use Encode; |
| 6 | 6 | ||
| 7 | #########################
|
7 | #########################
|
| 8 | # newsstat.pl version 0.4
|
8 | # newsstat.pl version 0.4
|
| 9 | 9 | ||
| 10 | ############################################################################
|
10 | ############################################################################
|
| 11 | # Collect statistics about a newsgroup (specified by first argument) in
|
11 | # Collect statistics about a newsgroup (specified by first argument) in
|
| 12 | # the local news spool. Check all articles in the last 30-day period.
|
12 | # the local news spool. Check all articles in the last 30-day period.
|
| 13 | # Rank posters by number of posts and by volume of posts, report on top and
|
13 | # Rank posters by number of posts and by volume of posts, report on top and
|
| 14 | # bottom 20 posters. Show their name, number of posts, size of posts,
|
14 | # bottom 20 posters. Show their name, number of posts, size of posts,
|
| 15 | # percentage of quoted lines. Rank user-agents used, by poster rather than
|
15 | # percentage of quoted lines. Rank user-agents used, by poster rather than
|
| 16 | # by post. Rank top 20 threads. Rank top 10 cross-posted groups.
|
16 | # by post. Rank top 20 threads. Rank top 10 cross-posted groups.
|
| 17 | #
|
17 | #
|
| 18 | # (Numbers and paths can be configured below. -- PE)
|
18 | # (Numbers and paths can be configured below. -- PE)
|
| 19 | ############################################################################
|
19 | ############################################################################
|
| 20 | 20 | ||
| 21 | ############################################################################
|
21 | ############################################################################
|
| 22 | # RECENT CHANGES #
|
22 | # RECENT CHANGES #
|
| 23 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
23 | # 2011-07-03 PE - Use Encode to decode/encode MIME encodings
|
| 24 | # - Use warnings, utf8 (just in case)
|
24 | # - Use warnings, utf8 (just in case)
|
| 25 | # - Documentation update
|
25 | # - Documentation update
|
| 26 | # N/A NN - Take newsgroup name as argument
|
26 | # N/A NN - Take newsgroup name as argument
|
| 27 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
27 | # 2004-06-19 NN - newsgroup name is $ARGV[0]
|
| 28 | # - Allow command line flags for subtracting
|
28 | # - Allow command line flags for subtracting
|
| 29 | # output if not pertinent for a group
|
29 | # output if not pertinent for a group
|
| Line 37... | Line 37... | ||
| 37 | # 2002-11-05 NN - moved user configurations to the top
|
37 | # 2002-11-05 NN - moved user configurations to the top
|
| 38 | # - fixed the cross-posting section
|
38 | # - fixed the cross-posting section
|
| 39 | # - introduced the $newsgroup_name variable which
|
39 | # - introduced the $newsgroup_name variable which
|
| 40 | # later becomes $news$group
|
40 | # later becomes $news$group
|
| 41 | # - changed $name to $agent_name in countagents()
|
41 | # - changed $name to $agent_name in countagents()
|
| 42 | #
|
42 | #
|
| 43 | # Contributors
|
43 | # Contributors
|
| 44 | # -------------
|
44 | # -------------
|
| 45 | # NN Nomen nominandum (name to be determined later)
|
45 | # NN Nomen nominandum (name to be determined later)
|
| 46 | # PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
|
46 | # PE Thomas 'PointedEars' Lahn <startrek@PointedEars.de>
|
| 47 | 47 | ||
| Line 73... | Line 73... | ||
| 73 | my $ix; |
73 | my $ix; |
| 74 | my $j; |
74 | my $j; |
| 75 | my %skipSec; |
75 | my %skipSec; |
| 76 | my @skiplist; |
76 | my @skiplist; |
| 77 | my $args = @ARGV; |
77 | my $args = @ARGV; |
| 78 | for ( $ix = 1 ; $ix < $args ; $ix++ ) { |
78 | for ( $ix = 1 ; $ix < $args ; $ix++ ) |
| - | 79 | {
|
|
| 79 | $j = $ix + 1; |
80 | $j = $ix + 1; |
| 80 | if ( $ARGV[$ix] eq "-x" ) { |
81 | if ( $ARGV[$ix] eq "-x" ) |
| - | 82 | {
|
|
| 81 | @skiplist = split(",",$ARGV[$j]); |
83 | @skiplist = split( ",", $ARGV[$j] ); |
| - | 84 | }
|
|
| 82 | } elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) { |
85 | elsif ( $ARGV[$ix] =~ /-x(\d.*)/ ) |
| - | 86 | {
|
|
| 83 | @skiplist = split(",",$1); |
87 | @skiplist = split( ",", $1 ); |
| 84 | }
|
88 | }
|
| 85 | }
|
89 | }
|
| 86 | foreach(@skiplist) { |
90 | foreach (@skiplist) |
| - | 91 | {
|
|
| 87 | $skipSec{$_} = 1; |
92 | $skipSec{$_} = 1; |
| 88 | }
|
93 | }
|
| 89 | 94 | ||
| 90 | # Leafnode users will want /var/spool/news for this variable.
|
95 | # Leafnode users will want /var/spool/news for this variable.
|
| 91 | my $news = "/var/spool/news/"; |
96 | my $news = "/var/spool/news/"; |
| Line 106... | Line 111... | ||
| 106 | my $topcrossposts = 10; |
111 | my $topcrossposts = 10; |
| 107 | 112 | ||
| 108 | # no. of time zones to show
|
113 | # no. of time zones to show
|
| 109 | my $toptz = 10; |
114 | my $toptz = 10; |
| 110 | 115 | ||
| 111 | - | ||
| 112 | - | ||
| 113 | ###################### DATA STRUCTURES ######################
|
116 | ###################### DATA STRUCTURES ######################
|
| 114 | my $group = $newsgroup_name; |
117 | my $group = $newsgroup_name; |
| 115 | $group =~ s!\.!/!g; |
118 | $group =~ s!\.!/!g; |
| 116 | my %data; # name, count, agent, total, orig, quoted |
119 | my %data; # name, count, agent, total, orig, quoted |
| 117 | my %threads; # subject, count |
120 | my %threads; # subject, count |
| 118 | my %crossposts; # group, count |
121 | my %crossposts; # group, count |
| 119 | my %tz; # timezones by count |
122 | my %tz; # timezones by count |
| 120 | my %headers; # holds header of current article |
123 | my %headers; # holds header of current article |
| 121 | my %lcheader; # holds lowercase headers |
124 | my %lcheader; # holds lowercase headers |
| 122 | my @body; # holds body of current article |
125 | my @body; # holds body of current article |
| 123 | my @sig; # holds sig text; |
126 | my @sig; # holds sig text; |
| 124 | my $totalposts; # total no. of posts considered |
127 | my $totalposts; # total no. of posts considered |
| 125 | my $filename; # name of current article file |
128 | my $filename; # name of current article file |
| 126 | my $filesize; # size of current article file |
129 | my $filesize; # size of current article file |
| 127 | my $earliest; # earliest article we have found |
130 | my $earliest; # earliest article we have found |
| 128 | my $latest; # latest article we have found |
131 | my $latest; # latest article we have found |
| 129 | my $poster; # poster we are dealing with |
132 | my $poster; # poster we are dealing with |
| 130 | my $totsize = 0; # holds total sizes of all files |
133 | my $totsize = 0; # holds total sizes of all files |
| 131 | my $totheader = 0; # total size of header material |
134 | my $totheader = 0; # total size of header material |
| 132 | my $totbody = 0; # total size of body material |
135 | my $totbody = 0; # total size of body material |
| 133 | my $totsig = 0; # total size of sig material |
136 | my $totsig = 0; # total size of sig material |
| 134 | my $totorig = 0; # total size of original material |
137 | my $totorig = 0; # total size of original material |
| 135 | my $totquoted = 0; # total size of quoted material |
138 | my $totquoted = 0; # total size of quoted material |
| 136 | my $origposts = 0; # total no. of original posts |
139 | my $origposts = 0; # total no. of original posts |
| 137 | my $replies = 0; # total no. of replies |
140 | my $replies = 0; # total no. of replies |
| 138 | my $i; # general purpose |
141 | my $i; # general purpose |
| 139 | my %distinct_agent; |
142 | my %distinct_agent; |
| 140 | my %agents = # used to hold counts of User Agents used |
143 | my %agents = # used to hold counts of User Agents used |
| - | 144 | (
|
|
| 141 | ( "KNode" => 0, |
145 | "KNode" => 0, |
| 142 | "Pan" => 0, |
146 | "Pan" => 0, |
| 143 | "Mozilla" => 0, |
147 | "Mozilla" => 0, |
| 144 | "Sylpheed" => 0, |
148 | "Sylpheed" => 0, |
| 145 | "Gnus" => 0, |
149 | "Gnus" => 0, |
| 146 | "Forte Agent" => 0, |
150 | "Forte Agent" => 0, |
| 147 | "Forte Free Agent" => 0, |
151 | "Forte Free Agent" => 0, |
| 148 | "MicroPlanet Gravity" => 0, |
152 | "MicroPlanet Gravity" => 0, |
| 149 | "Microsoft Outlook Express" => 0, |
153 | "Microsoft Outlook Express" => 0, |
| 150 | "Xnews" => 0, |
154 | "Xnews" => 0, |
| 151 | "slrn" => 0, |
155 | "slrn" => 0, |
| 152 | "tin" => 0, |
156 | "tin" => 0, |
| 153 | "rn" => 0, |
157 | "rn" => 0, |
| 154 | "NN" => 0, |
158 | "NN" => 0, |
| 155 | "MacSOUP" => 0, |
159 | "MacSOUP" => 0, |
| 156 | "Foorum" => 0, |
160 | "Foorum" => 0, |
| 157 | "MT-NewsWatcher" => 0, |
161 | "MT-NewsWatcher" => 0, |
| 158 | "News Rover" => 0, |
162 | "News Rover" => 0, |
| 159 | "WebTV" => 0, |
163 | "WebTV" => 0, |
| 160 | "Compuserver" => 0, |
164 | "Compuserver" => 0, |
| 161 | "VSoup" => 0); |
165 | "VSoup" => 0 |
| - | 166 | ); |
|
| 162 | 167 | ||
| 163 | ######################## MAIN CODE ########################
|
168 | ######################## MAIN CODE ########################
|
| 164 | $! = 1; |
169 | $! = 1; |
| 165 | 170 | ||
| 166 | chdir("$news$group") or die "Can't cd to $news$group: $!\n"; |
171 | chdir("$news$group") or die "Can't cd to $news$group: $!\n"; |
| 167 | opendir(DIR, ".") or die "Can't open $news$group directory: $!\n"; |
172 | opendir( DIR, "." ) or die "Can't open $news$group directory: $!\n"; |
| 168 | while (defined($filename = readdir(DIR))) { |
173 | while ( defined( $filename = readdir(DIR) ) ) |
| - | 174 | {
|
|
| 169 | %lcheader = (); |
175 | %lcheader = (); |
| 170 | next unless -f $filename; # only want real files |
176 | next unless -f $filename; # only want real files |
| 171 | next if ($filename eq ".overview"); # real articles only |
177 | next if ( $filename eq ".overview" ); # real articles only |
| 172 | next if (-M $filename > $numdays); # only want articles <= a certain age |
178 | next if ( -M $filename > $numdays ); # only want articles <= a certain age |
| 173 | $earliest = (stat $filename)[9] unless defined ($earliest); |
179 | $earliest = ( stat $filename )[9] unless defined($earliest); |
| 174 | $latest = (stat $filename)[9] unless defined ($latest); |
180 | $latest = ( stat $filename )[9] unless defined($latest); |
| 175 | &getarticle($filename); # read in the article |
181 | &getarticle($filename); # read in the article |
| 176 | &getdata; # grab the data from the article |
182 | &getdata; # grab the data from the article |
| 177 | $totalposts++; # bump count of articles considered |
183 | $totalposts++; # bump count of articles considered |
| 178 | }
|
184 | }
|
| 179 | closedir(DIR); # finished with the directory |
185 | closedir(DIR); # finished with the directory |
| - | 186 | ||
| 180 | # post-processing
|
187 | # post-processing
|
| 181 | &countagents; # count agents, collapsing versions |
188 | &countagents; # count agents, collapsing versions |
| 182 | &fixpercent; # check percentages orig/total for posters |
189 | &fixpercent; # check percentages orig/total for posters |
| 183 | 190 | ||
| 184 | &writedata; |
191 | &writedata; |
| 185 | 192 | ||
| 186 | #################### DISPLAY RESULTS #####################
|
193 | #################### DISPLAY RESULTS #####################
|
| 187 | print "=" x 76, "\n"; |
194 | print "=" x 76, "\n"; |
| 188 | printf "%s\n", ¢red("Analysis of posts to $newsgroup_name", 76); |
195 | printf "%s\n", ¢red( "Analysis of posts to $newsgroup_name", 76 ); |
| 189 | print "=" x 76, "\n"; |
196 | print "=" x 76, "\n"; |
| - | 197 | printf "%s\n", |
|
| 190 | printf "%s\n", ¢red("(stats compiled with a script by Garry Knight et al.)", 76); |
198 | ¢red( "(stats compiled with a script by Garry Knight et al.)", 76 ); |
| 191 | print "\n\n"; |
199 | print "\n\n"; |
| 192 | printf "Total posts considered: %s over %d days\n", |
200 | printf "Total posts considered: %s over %d days\n", commify($totalposts), |
| 193 | commify($totalposts), $numdays; |
201 | $numdays; |
| 194 | printf "Earliest article: %s\n", scalar localtime($earliest); |
202 | printf "Earliest article: %s\n", scalar localtime($earliest); |
| 195 | printf "Latest article: %s\n", scalar localtime($latest); |
203 | printf "Latest article: %s\n", scalar localtime($latest); |
| 196 | printf "Original articles: %s, replies: %s\n", commify($origposts), commify($replies); |
204 | printf "Original articles: %s, replies: %s\n", commify($origposts), |
| - | 205 | commify($replies); |
|
| 197 | printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize), |
206 | printf "Total size of posts: %s bytes (%sK) (%.2fM)\n", commify($totsize), |
| 198 | commify(int($totsize / 1024)), $totsize / 1048576; # |
207 | commify( int( $totsize / 1024 ) ), $totsize / 1048576; # |
| 199 | printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n", |
208 | printf "Average %s articles per day, %.2f MB per day, %s bytes per article\n", |
| 200 | commify(int($totalposts / $numdays)), |
209 | commify( int( $totalposts / $numdays ) ), $totsize / $numdays / 1048576, |
| 201 | $totsize / $numdays / 1048576, commify(int($totsize / $totalposts)); |
210 | commify( int( $totsize / $totalposts ) ); |
| 202 | my $count = keys %data; |
211 | my $count = keys %data; |
| 203 | printf "Total headers: %s KB bodies: %s KB\n", |
212 | printf "Total headers: %s KB bodies: %s KB\n", |
| 204 | commify(int($totheader / 1024)), commify(int($totbody / 1024)); |
213 | commify( int( $totheader / 1024 ) ), commify( int( $totbody / 1024 ) ); |
| 205 | printf "Body text - quoted: %s KB, original: %s KB = %02.2f%%, sigs: %s KB\n", |
214 | printf "Body text - quoted: %s KB, original: %s KB = %02.2f%%, sigs: %s KB\n", |
| 206 | commify(int($totquoted / 1024)), commify(int($totorig / 1024)), |
215 | commify( int( $totquoted / 1024 ) ), commify( int( $totorig / 1024 ) ), |
| 207 | ($totorig * 100) / ($totorig + $totquoted), commify(int($totsig / 1024)); |
216 | ( $totorig * 100 ) / ( $totorig + $totquoted ), |
| - | 217 | commify( int( $totsig / 1024 ) ); |
|
| 208 | printf "Total number of posters: %s, average %s bytes per poster\n", commify($count), |
218 | printf "Total number of posters: %s, average %s bytes per poster\n", |
| 209 | commify(int($totsize / $count)); #/ |
219 | commify($count), commify( int( $totsize / $count ) ); #/ |
| 210 | $count = keys %threads; |
220 | $count = keys %threads; |
| 211 | printf "Total number of threads: %s, average %s bytes per thread\n", commify($count), |
221 | printf "Total number of threads: %s, average %s bytes per thread\n", |
| 212 | commify(int($totsize / $count)); #/ |
222 | commify($count), commify( int( $totsize / $count ) ); #/ |
| 213 | printf "Total number of User-Agents: %d\n", scalar keys %agents; |
223 | printf "Total number of User-Agents: %d\n", scalar keys %agents; |
| 214 | print "\n", "=" x 76, "\n"; |
224 | print "\n", "=" x 76, "\n"; |
| 215 | 225 | ||
| 216 | ###############################
|
226 | ###############################
|
| 217 | # show posters by article count Sec 1;
|
227 | # show posters by article count Sec 1;
|
| 218 | ###############################
|
228 | ###############################
|
| 219 | unless ( $skipSec{1} ) { |
229 | unless ( $skipSec{1} ) |
| - | 230 | {
|
|
| 220 | if (keys %data < $topposters) { |
231 | if ( keys %data < $topposters ) |
| - | 232 | {
|
|
| 221 | $count = keys %data; |
233 | $count = keys %data; |
| - | 234 | }
|
|
| 222 | } else { |
235 | else
|
| - | 236 | {
|
|
| 223 | $count = $topposters; |
237 | $count = $topposters; |
| 224 | }
|
238 | }
|
| 225 | printf "%s\n", ¢red("Top $count posters by number of articles", 76); |
239 | printf "%s\n", ¢red( "Top $count posters by number of articles", 76 ); |
| 226 | print "=" x 76, "\n"; |
240 | print "=" x 76, "\n"; |
| 227 | $i = 0; |
241 | $i = 0; |
| 228 | foreach $poster (sort {$data{$b}{count} <=> $data{$a}{count}} keys %data) { |
242 | foreach $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data ) |
| - | 243 | {
|
|
| 229 | my $name = substr($poster, 0, 65); |
244 | my $name = substr( $poster, 0, 65 ); |
| 230 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{count}; |
245 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| - | 246 | $data{$poster}{count}; |
|
| 231 | last if (++$i == $count); |
247 | last if ( ++$i == $count ); |
| 232 | }
|
248 | }
|
| 233 | print "\n", "=" x 76, "\n"; |
249 | print "\n", "=" x 76, "\n"; |
| 234 | }
|
250 | }
|
| 235 | 251 | ||
| 236 | ################################
|
252 | ################################
|
| 237 | # show posters by size in Kbytes Sec 2;
|
253 | # show posters by size in Kbytes Sec 2;
|
| 238 | ################################
|
254 | ################################
|
| 239 | unless ( $skipSec{2} ) { |
255 | unless ( $skipSec{2} ) |
| - | 256 | {
|
|
| 240 | if (keys %data < $topposters) { |
257 | if ( keys %data < $topposters ) |
| - | 258 | {
|
|
| 241 | $count = keys %data; |
259 | $count = keys %data; |
| - | 260 | }
|
|
| 242 | } else { |
261 | else
|
| - | 262 | {
|
|
| 243 | $count = $topposters; |
263 | $count = $topposters; |
| 244 | }
|
264 | }
|
| 245 | printf "%s\n", ¢red("Top $count posters by article size in Kbytes", 76); |
265 | printf "%s\n", ¢red( "Top $count posters by article size in Kbytes", 76 ); |
| 246 | print "=" x 76, "\n"; |
266 | print "=" x 76, "\n"; |
| 247 | $i = 0; |
267 | $i = 0; |
| 248 | foreach $poster (sort {$data{$b}{size} <=> $data{$a}{size}} keys %data) { |
268 | foreach $poster ( sort { $data{$b}{size} <=> $data{$a}{size} } keys %data ) |
| - | 269 | {
|
|
| 249 | my $name = substr($poster, 0, 62); |
270 | my $name = substr( $poster, 0, 62 ); |
| 250 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{size} / 1024; #/ |
271 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ), |
| - | 272 | $data{$poster}{size} / 1024; #/ |
|
| 251 | last if (++$i == $count); |
273 | last if ( ++$i == $count ); |
| 252 | }
|
274 | }
|
| 253 | print "\n", "=" x 76, "\n"; |
275 | print "\n", "=" x 76, "\n"; |
| 254 | }
|
276 | }
|
| 255 | 277 | ||
| 256 | ####################################
|
278 | ####################################
|
| 257 | # show top posters for original text
|
279 | # show top posters for original text
|
| 258 | ####################################
|
280 | ####################################
|
| 259 | unless ( $skipSec{3} ) { |
281 | unless ( $skipSec{3} ) |
| - | 282 | {
|
|
| 260 | if (keys %data < $topposters) { |
283 | if ( keys %data < $topposters ) |
| - | 284 | {
|
|
| 261 | $count = keys %data; |
285 | $count = keys %data; |
| - | 286 | }
|
|
| 262 | } else { |
287 | else
|
| - | 288 | {
|
|
| 263 | $count = $topposters; |
289 | $count = $topposters; |
| 264 | }
|
290 | }
|
| - | 291 | printf "%s\n", |
|
| 265 | printf "%s\n", ¢red("Top $count responders by original text (> 5 posts)", 76); |
292 | ¢red( "Top $count responders by original text (> 5 posts)", 76 ); |
| 266 | print "=" x 76, "\n"; |
293 | print "=" x 76, "\n"; |
| 267 | $i = 0; |
294 | $i = 0; |
| 268 | foreach $poster (sort { $data{$b}{percent} <=> $data{$a}{percent} } keys %data) { |
295 | foreach $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} } |
| - | 296 | keys %data ) |
|
| - | 297 | {
|
|
| 269 | next if $data{$poster}{quoted} == 0; |
298 | next if $data{$poster}{quoted} == 0; |
| 270 | next if $data{$poster}{count} < 5; |
299 | next if $data{$poster}{count} < 5; |
| 271 | my $name = substr($poster, 0, 63); |
300 | my $name = substr( $poster, 0, 63 ); |
| 272 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent}; |
301 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| - | 302 | $data{$poster}{percent}; |
|
| 273 | last if (++$i == $count); |
303 | last if ( ++$i == $count ); |
| 274 | }
|
304 | }
|
| 275 | print "\n", "=" x 76, "\n"; |
305 | print "\n", "=" x 76, "\n"; |
| 276 | }
|
306 | }
|
| 277 | 307 | ||
| 278 | #######################################
|
308 | #######################################
|
| 279 | # show bottom posters for original text
|
309 | # show bottom posters for original text
|
| 280 | #######################################
|
310 | #######################################
|
| 281 | unless ( $skipSec{4} ) { |
311 | unless ( $skipSec{4} ) |
| - | 312 | {
|
|
| 282 | if (keys %data < $topposters) { |
313 | if ( keys %data < $topposters ) |
| - | 314 | {
|
|
| 283 | $count = keys %data; |
315 | $count = keys %data; |
| - | 316 | }
|
|
| 284 | } else { |
317 | else
|
| - | 318 | {
|
|
| 285 | $count = $topposters; |
319 | $count = $topposters; |
| 286 | }
|
320 | }
|
| - | 321 | printf "%s\n", |
|
| 287 | printf "%s\n", ¢red("Bottom $count responders by original text (> 5 posts)", 76); |
322 | ¢red( "Bottom $count responders by original text (> 5 posts)", 76 ); |
| 288 | print "=" x 76, "\n"; |
323 | print "=" x 76, "\n"; |
| 289 | $i = 0; |
324 | $i = 0; |
| 290 | foreach $poster (sort { $data{$a}{percent} <=> $data{$b}{percent} } keys %data) { |
325 | foreach $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} } |
| - | 326 | keys %data ) |
|
| - | 327 | {
|
|
| 291 | next if $data{$poster}{quoted} == 0; |
328 | next if $data{$poster}{quoted} == 0; |
| 292 | next if $data{$poster}{count} < 5; |
329 | next if $data{$poster}{count} < 5; |
| 293 | my $name = substr($poster, 0, 63); |
330 | my $name = substr( $poster, 0, 63 ); |
| 294 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad($poster, 63, "."), $data{$poster}{percent}; |
331 | printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ), |
| - | 332 | $data{$poster}{percent}; |
|
| 295 | last if (++$i == $count); |
333 | last if ( ++$i == $count ); |
| 296 | }
|
334 | }
|
| 297 | print "\n", "=" x 76, "\n"; |
335 | print "\n", "=" x 76, "\n"; |
| 298 | }
|
336 | }
|
| 299 | 337 | ||
| 300 | ####################################
|
338 | ####################################
|
| 301 | # show threads by number of articles
|
339 | # show threads by number of articles
|
| 302 | ####################################
|
340 | ####################################
|
| 303 | unless ( $skipSec{5} ) { |
341 | unless ( $skipSec{5} ) |
| - | 342 | {
|
|
| 304 | if (keys %threads < $topthreads) { |
343 | if ( keys %threads < $topthreads ) |
| - | 344 | {
|
|
| 305 | $count = keys %threads; |
345 | $count = keys %threads; |
| - | 346 | }
|
|
| 306 | } else { |
347 | else
|
| - | 348 | {
|
|
| 307 | $count = $topthreads; |
349 | $count = $topthreads; |
| 308 | }
|
350 | }
|
| 309 | printf "%s\n", ¢red("Top $count threads by no. of articles", 76); |
351 | printf "%s\n", ¢red( "Top $count threads by no. of articles", 76 ); |
| 310 | print "=" x 76, "\n"; |
352 | print "=" x 76, "\n"; |
| 311 | $i = 0; |
353 | $i = 0; |
| 312 | foreach my $thread (sort {$threads{$b}{count} <=> $threads{$a}{count}} keys %threads) { |
354 | foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} } |
| - | 355 | keys %threads ) |
|
| - | 356 | {
|
|
| 313 | my $name = substr($thread, 0, 65); |
357 | my $name = substr( $thread, 0, 65 ); |
| 314 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{count}; |
358 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| - | 359 | $threads{$thread}{count}; |
|
| 315 | last if (++$i == $count); |
360 | last if ( ++$i == $count ); |
| 316 | }
|
361 | }
|
| 317 | print "\n", "=" x 76, "\n"; |
362 | print "\n", "=" x 76, "\n"; |
| 318 | }
|
363 | }
|
| 319 | ################################
|
364 | ################################
|
| 320 | # show threads by size in Kbytes
|
365 | # show threads by size in Kbytes
|
| 321 | ################################
|
366 | ################################
|
| 322 | unless ( $skipSec{6} ) { |
367 | unless ( $skipSec{6} ) |
| - | 368 | {
|
|
| 323 | if (keys %threads < $topthreads) { |
369 | if ( keys %threads < $topthreads ) |
| - | 370 | {
|
|
| 324 | $count = keys %threads; |
371 | $count = keys %threads; |
| - | 372 | }
|
|
| 325 | } else { |
373 | else
|
| - | 374 | {
|
|
| 326 | $count = $topthreads; |
375 | $count = $topthreads; |
| 327 | }
|
376 | }
|
| 328 | printf "%s\n", ¢red("Top $count threads by size in KB", 76); |
377 | printf "%s\n", ¢red( "Top $count threads by size in KB", 76 ); |
| 329 | print "=" x 76, "\n"; |
378 | print "=" x 76, "\n"; |
| 330 | $i = 0; |
379 | $i = 0; |
| 331 | foreach my $thread (sort {$threads{$b}{size} <=> $threads{$a}{size}} keys %threads) { |
380 | foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} } |
| - | 381 | keys %threads ) |
|
| - | 382 | {
|
|
| 332 | my $name = substr($thread, 0, 65); |
383 | my $name = substr( $thread, 0, 65 ); |
| 333 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $threads{$thread}{size} / 1024; #/ |
384 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| - | 385 | $threads{$thread}{size} / 1024; #/ |
|
| 334 | last if (++$i == $count); |
386 | last if ( ++$i == $count ); |
| 335 | }
|
387 | }
|
| 336 | print "\n", "=" x 76, "\n"; |
388 | print "\n", "=" x 76, "\n"; |
| 337 | }
|
389 | }
|
| 338 | 390 | ||
| 339 | #################################
|
391 | #################################
|
| 340 | # show top 10 cross-posted groups
|
392 | # show top 10 cross-posted groups
|
| 341 | #################################
|
393 | #################################
|
| 342 | unless ( $skipSec{7} ) { |
394 | unless ( $skipSec{7} ) |
| - | 395 | {
|
|
| 343 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
396 | delete $crossposts{"$newsgroup_name"}; # don't include ours |
| 344 | if (keys %crossposts < $topcrossposts) { |
397 | if ( keys %crossposts < $topcrossposts ) |
| - | 398 | {
|
|
| 345 | $count = keys %crossposts; |
399 | $count = keys %crossposts; |
| - | 400 | }
|
|
| 346 | } else { |
401 | else
|
| - | 402 | {
|
|
| 347 | $count = $topcrossposts; |
403 | $count = $topcrossposts; |
| 348 | }
|
404 | }
|
| 349 | printf "%s\n", ¢red("Top $count cross-posted groups", 76); |
405 | printf "%s\n", ¢red( "Top $count cross-posted groups", 76 ); |
| 350 | print "=" x 76, "\n"; |
406 | print "=" x 76, "\n"; |
| 351 | $i = 0; |
407 | $i = 0; |
| - | 408 | foreach
|
|
| 352 | foreach my $name (sort {$crossposts{$b} <=> $crossposts{$a}} keys %crossposts) { |
409 | my $name ( sort { $crossposts{$b} <=> $crossposts{$a} } keys %crossposts ) |
| - | 410 | {
|
|
| 353 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($name, 63, "."), $crossposts{$name}; |
411 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ), |
| - | 412 | $crossposts{$name}; |
|
| 354 | last if (++$i == $count); |
413 | last if ( ++$i == $count ); |
| 355 | }
|
414 | }
|
| 356 | print "\n", "=" x 76, "\n"; |
415 | print "\n", "=" x 76, "\n"; |
| 357 | }
|
416 | }
|
| 358 | #######################
|
417 | #######################
|
| 359 | #show agents and counts
|
418 | #show agents and counts
|
| 360 | #######################
|
419 | #######################
|
| 361 | unless ( $skipSec{8} ) { |
420 | unless ( $skipSec{8} ) |
| - | 421 | {
|
|
| 362 | if (keys %agents < $topagents) { |
422 | if ( keys %agents < $topagents ) |
| - | 423 | {
|
|
| 363 | $count = keys %agents; |
424 | $count = keys %agents; |
| - | 425 | }
|
|
| 364 | } else { |
426 | else
|
| - | 427 | {
|
|
| 365 | $count = $topagents; |
428 | $count = $topagents; |
| 366 | }
|
429 | }
|
| 367 | printf "%s\n", ¢red("Top $count User Agents by poster", 76); |
430 | printf "%s\n", ¢red( "Top $count User Agents by poster", 76 ); |
| 368 | print "=" x 76, "\n"; |
431 | print "=" x 76, "\n"; |
| 369 | $i = 0; |
432 | $i = 0; |
| 370 | foreach my $agent (sort {$agents{$b} <=> $agents{$a}} keys %agents) { |
433 | foreach my $agent ( sort { $agents{$b} <=> $agents{$a} } keys %agents ) |
| - | 434 | {
|
|
| 371 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($agent, 63, "."), $agents{$agent}; |
435 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $agent, 63, "." ), |
| - | 436 | $agents{$agent}; |
|
| 372 | last if (++$i == $count); |
437 | last if ( ++$i == $count ); |
| 373 | }
|
438 | }
|
| 374 | print "\n", "=" x 76, "\n"; |
439 | print "\n", "=" x 76, "\n"; |
| 375 | }
|
440 | }
|
| 376 | 441 | ||
| 377 | #######################
|
442 | #######################
|
| 378 | #show distinct agents
|
443 | #show distinct agents
|
| 379 | #######################
|
444 | #######################
|
| 380 | unless ( $skipSec{9} ) { |
445 | unless ( $skipSec{9} ) |
| - | 446 | {
|
|
| 381 | if (keys %distinct_agent < $topagents) { |
447 | if ( keys %distinct_agent < $topagents ) |
| - | 448 | {
|
|
| 382 | $count = keys %distinct_agent; |
449 | $count = keys %distinct_agent; |
| - | 450 | }
|
|
| 383 | } else { |
451 | else
|
| - | 452 | {
|
|
| 384 | $count = $topagents; |
453 | $count = $topagents; |
| 385 | }
|
454 | }
|
| 386 | printf "%s\n", ¢red("Top $count User Agents by number of posts", 76); |
455 | printf "%s\n", ¢red( "Top $count User Agents by number of posts", 76 ); |
| 387 | print "=" x 76, "\n"; |
456 | print "=" x 76, "\n"; |
| 388 | $i = 0; |
457 | $i = 0; |
| 389 | foreach my $agent (sort {$distinct_agent{$b} <=> $distinct_agent{$a}} keys %distinct_agent) { |
458 | foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} } |
| - | 459 | keys %distinct_agent ) |
|
| - | 460 | {
|
|
| 390 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad($agent, 58, "."), $distinct_agent{$agent}, (( $distinct_agent{$agent} / $totalposts ) * 100); |
461 | printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ), |
| - | 462 | $distinct_agent{$agent}, |
|
| - | 463 | ( ( $distinct_agent{$agent} / $totalposts ) * 100 ); |
|
| 391 | last if (++$i == $count); |
464 | last if ( ++$i == $count ); |
| 392 | }
|
465 | }
|
| 393 | print "\n", "=" x 76, "\n"; |
466 | print "\n", "=" x 76, "\n"; |
| 394 | }
|
467 | }
|
| 395 | 468 | ||
| 396 | ##########################
|
469 | ##########################
|
| 397 | #show timezones and counts
|
470 | #show timezones and counts
|
| 398 | ##########################
|
471 | ##########################
|
| 399 | unless ( $skipSec{10} ) { |
472 | unless ( $skipSec{10} ) |
| - | 473 | {
|
|
| 400 | if (keys %tz < $toptz) { |
474 | if ( keys %tz < $toptz ) |
| - | 475 | {
|
|
| 401 | $count = keys %tz; |
476 | $count = keys %tz; |
| - | 477 | }
|
|
| 402 | } else { |
478 | else
|
| - | 479 | {
|
|
| 403 | $count = $toptz; |
480 | $count = $toptz; |
| 404 | }
|
481 | }
|
| 405 | printf "%s\n", ¢red("Top 10 time zones", 76); |
482 | printf "%s\n", ¢red( "Top 10 time zones", 76 ); |
| 406 | print "=" x 76, "\n"; |
483 | print "=" x 76, "\n"; |
| 407 | $i = 0; |
484 | $i = 0; |
| 408 | foreach my $zone (sort {$tz{$b} <=> $tz{$a}} keys %tz) { |
485 | foreach my $zone ( sort { $tz{$b} <=> $tz{$a} } keys %tz ) |
| - | 486 | {
|
|
| 409 | printf "%2d: %-63s : %6d\n", $i + 1, rpad($zone, 63, "."), $tz{$zone}; |
487 | printf "%2d: %-63s : %6d\n", $i + 1, rpad( $zone, 63, "." ), $tz{$zone}; |
| 410 | last if (++$i == $count); |
488 | last if ( ++$i == $count ); |
| 411 | }
|
489 | }
|
| 412 | print "\n", "=" x 76, "\n"; |
490 | print "\n", "=" x 76, "\n"; |
| 413 | }
|
491 | }
|
| 414 | 492 | ||
| 415 | - | ||
| 416 | ################################ SUBROUTINES ################################
|
493 | ################################ SUBROUTINES ################################
|
| 417 | 494 | ||
| 418 | - | ||
| 419 | #######################################
|
495 | #######################################
|
| 420 | # get current article's header and body
|
496 | # get current article's header and body
|
| 421 | #######################################
|
497 | #######################################
|
| 422 | sub getarticle { |
498 | sub getarticle
|
| - | 499 | {
|
|
| 423 | %headers = (); # dump old headers |
500 | %headers = (); # dump old headers |
| 424 | my $filename = shift; # get the name of the file |
501 | my $filename = shift; # get the name of the file |
| - | 502 | ||
| 425 | # get stats about the file itself
|
503 | # get stats about the file itself
|
| 426 | $filesize = -s $filename; # get total size of file |
504 | $filesize = -s $filename; # get total size of file |
| 427 | $totsize += $filesize; # bump total sizes of all files |
505 | $totsize += $filesize; # bump total sizes of all files |
| 428 | 506 | ||
| 429 | my $mtime = (stat $filename)[9]; |
507 | my $mtime = ( stat $filename )[9]; |
| 430 | if ( $mtime < $earliest ) { |
508 | if ( $mtime < $earliest ) |
| - | 509 | {
|
|
| 431 | $earliest = $mtime; |
510 | $earliest = $mtime; |
| - | 511 | }
|
|
| 432 | } elsif ( $mtime > $latest ) { |
512 | elsif ( $mtime > $latest ) |
| - | 513 | {
|
|
| 433 | $latest = $mtime; |
514 | $latest = $mtime; |
| 434 | }
|
515 | }
|
| 435 | 516 | ||
| 436 | # now read the file
|
517 | # now read the file
|
| 437 | open(FILE, $filename) or die "Can't open $filename: $!\n"; |
518 | open( FILE, $filename ) or die "Can't open $filename: $!\n"; |
| 438 | while (<FILE>) { |
519 | while (<FILE>) |
| - | 520 | {
|
|
| 439 | $totheader += length($_); # bump total header size |
521 | $totheader += length($_); # bump total header size |
| 440 | last if (/^\s*$/); # end of header? |
522 | last if (/^\s*$/); # end of header? |
| 441 | if (/^([^:\s]*):\s+(.*)/) { |
523 | if (/^([^:\s]*):\s+(.*)/) |
| - | 524 | {
|
|
| 442 | my($key,$val) = ($1,$2); |
525 | my ( $key, $val ) = ( $1, $2 ); |
| 443 | $headers{$key} = decode('MIME-Header', $val); |
526 | $headers{$key} = decode( 'MIME-Header', $val ); |
| 444 | $lcheader{clean(lc($key))} = clean($val); |
527 | $lcheader{ clean( lc($key) ) } = clean($val); |
| 445 | }
|
528 | }
|
| 446 | }
|
529 | }
|
| 447 | @body = <FILE>; # slurp up body |
530 | @body = <FILE>; # slurp up body |
| 448 | close(FILE); |
531 | close(FILE); |
| 449 | } # getarticle |
532 | } # getarticle |
| 450 | 533 | ||
| 451 | ###################################
|
534 | ###################################
|
| 452 | # get data from the current article
|
535 | # get data from the current article
|
| 453 | ###################################
|
536 | ###################################
|
| 454 | sub getdata { |
537 | sub getdata
|
| - | 538 | {
|
|
| 455 | #### First, analyse header fields ####
|
539 | #### First, analyse header fields ####
|
| 456 | 540 | ||
| 457 | # Set up this poster if not defined, get counts, sizes
|
541 | # Set up this poster if not defined, get counts, sizes
|
| 458 | $poster = encode('UTF-8', $headers{From}); # get the poster's name |
542 | $poster = encode( 'UTF-8', $headers{From} ); # get the poster's name |
| - | 543 | if ( !defined( $data{$poster} ) ) |
|
| 459 | if (!defined($data{$poster})) { # seen this one before? |
544 | { # seen this one before? |
| 460 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
545 | $data{$poster}{agent} = 'Unknown'; # comes after For: field |
| 461 | $data{$poster}{orig} = 0; |
546 | $data{$poster}{orig} = 0; |
| 462 | $data{$poster}{quoted} = 0; |
547 | $data{$poster}{quoted} = 0; |
| 463 | }
|
548 | }
|
| 464 | $data{$poster}{count}++; # bump count for this poster |
549 | $data{$poster}{count}++; # bump count for this poster |
| 465 | $data{$poster}{size} += $filesize; # total size of file |
550 | $data{$poster}{size} += $filesize; # total size of file |
| 466 | 551 | ||
| 467 | # The User-Agent and/or X-Newsreader fields
|
552 | # The User-Agent and/or X-Newsreader fields
|
| 468 | # for User-Agent by poster
|
553 | # for User-Agent by poster
|
| 469 | if (defined $lcheader{"user-agent"}) { |
554 | if ( defined $lcheader{"user-agent"} ) |
| - | 555 | {
|
|
| 470 | $data{$poster}{agent} = $lcheader{"user-agent"}; |
556 | $data{$poster}{agent} = $lcheader{"user-agent"}; |
| 471 | }
|
557 | }
|
| 472 | if (defined $lcheader{"x-newsreader"}) { |
558 | if ( defined $lcheader{"x-newsreader"} ) |
| - | 559 | {
|
|
| 473 | $data{$poster}{agent} = $lcheader{"x-newsreader"}; |
560 | $data{$poster}{agent} = $lcheader{"x-newsreader"}; |
| 474 | }
|
561 | }
|
| 475 | 562 | ||
| 476 | # The User Agent for User-Agent by number of posts
|
563 | # The User Agent for User-Agent by number of posts
|
| 477 | my $UA = "unknown"; |
564 | my $UA = "unknown"; |
| 478 | foreach my $keys ( keys %lcheader ) |
565 | foreach my $keys ( keys %lcheader ) |
| 479 | {
|
566 | {
|
| 480 | if (defined $lcheader{'user-agent'}) |
567 | if ( defined $lcheader{'user-agent'} ) |
| 481 | {
|
568 | {
|
| 482 | $UA = $lcheader{'user-agent'}; |
569 | $UA = $lcheader{'user-agent'}; |
| 483 | }
|
570 | }
|
| 484 | elsif (defined $lcheader{"x-newsreader"}) |
571 | elsif ( defined $lcheader{"x-newsreader"} ) |
| 485 | {
|
572 | {
|
| 486 | $UA = $lcheader{"x-newsreader"}; |
573 | $UA = $lcheader{"x-newsreader"}; |
| 487 | }
|
574 | }
|
| 488 | elsif (defined $lcheader{'x-mailer'}) |
575 | elsif ( defined $lcheader{'x-mailer'} ) |
| 489 | {
|
576 | {
|
| 490 | $UA = $lcheader{'x-mailer'}; |
577 | $UA = $lcheader{'x-mailer'}; |
| 491 | }
|
578 | }
|
| - | 579 | elsif ( |
|
| 492 | elsif ((defined $lcheader{'organization'}) && |
580 | ( defined $lcheader{'organization'} ) |
| - | 581 | && ( $lcheader{'organization'} =~ |
|
| 493 | ($lcheader{'organization'} =~ /groups\.google|AOL|Supernews|WebTV|compuserve/)) |
582 | /groups\.google|AOL|Supernews|WebTV|compuserve/ ) |
| - | 583 | )
|
|
| 494 | {
|
584 | {
|
| 495 | $UA = $lcheader{'organization'}; |
585 | $UA = $lcheader{'organization'}; |
| 496 | }
|
586 | }
|
| 497 | elsif ( $lcheader{'message-id'} =~ /pine/i ) |
587 | elsif ( $lcheader{'message-id'} =~ /pine/i ) |
| 498 | {
|
588 | {
|
| 499 | $UA = "Pine"; |
589 | $UA = "Pine"; |
| 500 | } ## Hopefully found UA, else set to unknown |
590 | } ## Hopefully found UA, else set to unknown |
| 501 | }
|
591 | }
|
| 502 | - | ||
| 503 | 592 | ||
| 504 | $UA = clean($UA); |
593 | $UA = clean($UA); |
| 505 | $UA = get_agent($UA); |
594 | $UA = get_agent($UA); |
| 506 | - | ||
| 507 | 595 | ||
| 508 | sub get_agent { |
596 | sub get_agent
|
| - | 597 | {
|
|
| 509 | my $raw = shift; |
598 | my $raw = shift; |
| 510 | my $agent = $raw; |
599 | my $agent = $raw; |
| 511 | 600 | ||
| 512 | ## strip http
|
601 | ## strip http
|
| 513 | if ( $raw =~ /.*http.*/ ) { |
602 | if ( $raw =~ /.*http.*/ ) |
| - | 603 | {
|
|
| 514 | $raw =~ s!posted via!!i; |
604 | $raw =~ s!posted via!!i; |
| 515 | $raw =~ s!http://!!g; |
605 | $raw =~ s!http://!!g; |
| 516 | $raw =~ s!/!!g; |
606 | $raw =~ s!/!!g; |
| 517 | $raw =~ s! !!g; |
607 | $raw =~ s! !!g; |
| 518 | }
|
608 | }
|
| 519 | 609 | ||
| 520 | ## Fix Outlook from Mac
|
610 | ## Fix Outlook from Mac
|
| 521 | if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g;} |
611 | if ( $raw =~ /^microsoft/i ) { $raw =~ s/-/ /g; } |
| 522 | 612 | ||
| 523 | ## Pick out the popular agents
|
613 | ## Pick out the popular agents
|
| 524 | if ( $raw =~ /(outlook express)/i || |
614 | if ( $raw =~ /(outlook express)/i |
| 525 | $raw =~ /(microplanet gravity)/i || |
615 | || $raw =~ /(microplanet gravity)/i |
| 526 | $raw =~ /(news rover)/i || |
616 | || $raw =~ /(news rover)/i |
| 527 | $raw =~ /(forte agent)/i || |
617 | || $raw =~ /(forte agent)/i |
| 528 | $raw =~ /(forte free agent)/i |
618 | || $raw =~ /(forte free agent)/i ) |
| 529 | )
|
619 | {
|
| 530 | {
|
- | |
| 531 | $agent = $1; |
620 | $agent = $1; |
| 532 | }
|
621 | }
|
| - | 622 | elsif ( |
|
| 533 | elsif ( $raw =~ /^( |
623 | $raw =~ /^( |
| 534 | pan |
624 | pan |
| 535 | |sylpheed
|
625 | |sylpheed
|
| 536 | |slrn
|
626 | |slrn
|
| 537 | |mozilla
|
627 | |mozilla
|
| 538 | |knode
|
628 | |knode
|
| Line 556... | Line 646... | ||
| 556 | |nn
|
646 | |nn
|
| 557 | |rn
|
647 | |rn
|
| 558 | |007
|
648 | |007
|
| 559 | |webtv
|
649 | |webtv
|
| 560 | |compuserve
|
650 | |compuserve
|
| 561 | )/ix ) |
651 | )/ix |
| - | 652 | )
|
|
| 562 | {
|
653 | {
|
| 563 | $agent = $1; |
654 | $agent = $1; |
| 564 | }
|
655 | }
|
| 565 | else
|
656 | else
|
| 566 | {
|
657 | {
|
| 567 | ## Clean up unknown agents
|
658 | ## Clean up unknown agents
|
| 568 | if ( $raw =~ m!^(.*?)/! ) { |
659 | if ( $raw =~ m!^(.*?)/! ) |
| - | 660 | {
|
|
| 569 | $agent = $1; |
661 | $agent = $1; |
| 570 | }
|
662 | }
|
| 571 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
663 | elsif ( $raw =~ /^(\w*)\d.*/ ) |
| 572 | {
|
664 | {
|
| 573 | $agent = $1; |
665 | $agent = $1; |
| 574 | }
|
666 | }
|
| 575 | }
|
667 | }
|
| 576 | 668 | ||
| 577 | $distinct_agent{$agent}++; |
669 | $distinct_agent{$agent}++; |
| 578 | return $agent; |
670 | return $agent; |
| 579 | }
|
671 | }
|
| 580 | 672 | ||
| - | 673 | # Get all cross-posted newsgroups
|
|
| - | 674 | for ( split /,/, $headers{"Newsgroups"} ) |
|
| - | 675 | {
|
|
| - | 676 | $crossposts{$_}++; # bump count for each |
|
| - | 677 | }
|
|
| 581 | 678 | ||
| 582 | # Get all cross-posted newsgroups
|
- | |
| 583 | for (split /,/, $headers{"Newsgroups"}) { |
- | |
| 584 | $crossposts{$_}++; # bump count for each |
- | |
| 585 | }
|
- | |
| 586 | - | ||
| 587 | # Get threads
|
679 | # Get threads
|
| 588 | my $thread = encode('UTF-8', $headers{"Subject"}); |
680 | my $thread = encode( 'UTF-8', $headers{"Subject"} ); |
| 589 | $thread =~ s/^re: //i; # Remove Re: or re: at start |
681 | $thread =~ s/^re: //i; # Remove Re: or re: at start |
| 590 | $thread =~ s/\s+/ /g; # collapse whitespace |
682 | $thread =~ s/\s+/ /g; # collapse whitespace |
| 591 | $threads{$thread}{count} += 1; # bump count of this subject |
683 | $threads{$thread}{count} += 1; # bump count of this subject |
| 592 | $threads{$thread}{size} += $filesize; # bump bytes for this thread |
684 | $threads{$thread}{size} += $filesize; # bump bytes for this thread |
| 593 | 685 | ||
| 594 | # Is this an original post or a reply?
|
686 | # Is this an original post or a reply?
|
| 595 | if (defined $headers{"References"}) { |
687 | if ( defined $headers{"References"} ) |
| - | 688 | {
|
|
| 596 | $replies++; |
689 | $replies++; |
| - | 690 | }
|
|
| 597 | } else { |
691 | else
|
| - | 692 | {
|
|
| 598 | $origposts++; |
693 | $origposts++; |
| 599 | }
|
694 | }
|
| 600 | 695 | ||
| 601 | # Get the time zone
|
696 | # Get the time zone
|
| 602 | $_ = $headers{"Date"}; |
697 | $_ = $headers{"Date"}; |
| 603 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
698 | my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/; |
| 604 | if (($tz =~ /UTC/) or ($tz =~ /GMT/) or ($tz =~ /0000/)) { |
699 | if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) ) |
| - | 700 | {
|
|
| 605 | $tz = "UTC"; |
701 | $tz = "UTC"; |
| 606 | }
|
702 | }
|
| 607 | $tz{$tz}++; |
703 | $tz{$tz}++; |
| 608 | 704 | ||
| 609 | #### Now analyse the body text ####
|
705 | #### Now analyse the body text ####
|
| 610 | my $insig = 0; |
706 | my $insig = 0; |
| 611 | for (@body) { |
707 | for (@body) |
| - | 708 | {
|
|
| 612 | $totbody += length($_); # bump total body size |
709 | $totbody += length($_); # bump total body size |
| 613 | next if (/^$>/); # don't count blank lines in body |
710 | next if (/^$>/); # don't count blank lines in body |
| 614 | if ($insig == 1) { |
711 | if ( $insig == 1 ) |
| 615 | $totsig += length($_); # bump total sig size |
- | |
| 616 | # Bill Unruh uses ] quotes, and another poster uses ::
|
- | |
| 617 | } elsif (/^\s*[>\]]/ or /^\s*::/) { # are we in a quote line? |
- | |
| 618 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
- | |
| 619 | $totquoted += length($_); |
- | |
| 620 | } elsif (/-- /) { |
- | |
| 621 | $insig = 1; |
- | |
| 622 | } else { |
712 | {
|
| 623 | # we must be processing an original line
|
- | |
| 624 | $data{$poster}{orig} += length($_); # bump count of original chrs |
713 | $totsig += length($_); # bump total sig size |
| 625 | $totorig += length($_); |
- | |
| 626 | }
|
- | |
| 627 | } # end for (@body) |
- | |
| 628 | 714 | ||
| - | 715 | # Bill Unruh uses ] quotes, and another poster uses ::
|
|
| - | 716 | }
|
|
| - | 717 | elsif ( /^\s*[>\]]/ or /^\s*::/ ) |
|
| - | 718 | { # are we in a quote line? |
|
| - | 719 | $data{$poster}{quoted} += length($_); # bump count of quoted chrs |
|
| - | 720 | $totquoted += length($_); |
|
| - | 721 | }
|
|
| - | 722 | elsif (/-- /) |
|
| - | 723 | {
|
|
| - | 724 | $insig = 1; |
|
| - | 725 | }
|
|
| - | 726 | else
|
|
| - | 727 | {
|
|
| - | 728 | ||
| - | 729 | # we must be processing an original line
|
|
| - | 730 | $data{$poster}{orig} += length($_); # bump count of original chrs |
|
| - | 731 | $totorig += length($_); |
|
| - | 732 | }
|
|
| - | 733 | } # end for (@body) |
|
| - | 734 | ||
| 629 | } # getdata |
735 | } # getdata |
| 630 | 736 | ||
| 631 | ########################################
|
737 | ########################################
|
| 632 | # Count the User-Agents used, collapsing
|
738 | # Count the User-Agents used, collapsing
|
| 633 | # different versions into one per agent.
|
739 | # different versions into one per agent.
|
| 634 | ########################################
|
740 | ########################################
|
| 635 | sub countagents { |
741 | sub countagents
|
| - | 742 | {
|
|
| 636 | POSTER:
|
743 | POSTER:
|
| 637 | foreach $poster (keys %data) { |
744 | foreach $poster ( keys %data ) |
| - | 745 | {
|
|
| 638 | foreach my $agent_name (keys %distinct_agent) { # check against known ones |
746 | foreach my $agent_name ( keys %distinct_agent ) |
| - | 747 | { # check against known ones |
|
| 639 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) { |
748 | if ( $data{$poster}{agent} =~ /\Q$agent_name\E/ ) |
| - | 749 | {
|
|
| 640 | $agents{$agent_name}++; |
750 | $agents{$agent_name}++; |
| 641 | next POSTER; |
751 | next POSTER; |
| 642 | }
|
752 | }
|
| 643 | }
|
753 | }
|
| 644 | $agents{$data{$poster}{agent}}++; |
754 | $agents{ $data{$poster}{agent} }++; |
| 645 | }
|
755 | }
|
| 646 | } # countagents |
756 | } # countagents |
| 647 | 757 | ||
| 648 | ############################################
|
758 | ############################################
|
| 649 | # set orig/total percentages for all posters
|
759 | # set orig/total percentages for all posters
|
| 650 | ############################################
|
760 | ############################################
|
| 651 | sub fixpercent { |
761 | sub fixpercent
|
| - | 762 | {
|
|
| 652 | foreach $poster (keys %data) { |
763 | foreach $poster ( keys %data ) |
| - | 764 | {
|
|
| 653 | my $percent = 100; |
765 | my $percent = 100; |
| 654 | if (($data{$poster}{orig} != 0) and ($data{$poster}{quoted} != 0)) { |
766 | if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) ) |
| - | 767 | {
|
|
| - | 768 | $percent = $data{$poster}{orig} * 100 / |
|
| 655 | $percent = $data{$poster}{orig} * 100 / ($data{$poster}{quoted} + $data{$poster}{orig}); #/ |
769 | ( $data{$poster}{quoted} + $data{$poster}{orig} ); #/ |
| - | 770 | }
|
|
| 656 | } elsif ($data{$poster}{orig} == 0) { |
771 | elsif ( $data{$poster}{orig} == 0 ) |
| - | 772 | {
|
|
| 657 | $percent = 0; |
773 | $percent = 0; |
| 658 | }
|
774 | }
|
| 659 | $data{$poster}{percent} = $percent; |
775 | $data{$poster}{percent} = $percent; |
| 660 | }
|
776 | }
|
| 661 | }
|
777 | }
|
| 662 | 778 | ||
| 663 | ##############################
|
779 | ##############################
|
| 664 | # right pad a string with '.'s
|
780 | # right pad a string with '.'s
|
| 665 | ##############################
|
781 | ##############################
|
| 666 | sub rpad { |
782 | sub rpad
|
| - | 783 | {
|
|
| - | 784 | ||
| 667 | # get text to pad, length to pad, pad chr
|
785 | # get text to pad, length to pad, pad chr
|
| 668 | my ($text, $pad_len, $pad_chr) = @_; |
786 | my ( $text, $pad_len, $pad_chr ) = @_; |
| 669 | if (length($text) > $pad_len) { |
787 | if ( length($text) > $pad_len ) |
| - | 788 | {
|
|
| 670 | $text = substr($text, 0, $pad_len); |
789 | $text = substr( $text, 0, $pad_len ); |
| 671 | }
|
790 | }
|
| 672 | my $padded = $text . $pad_chr x ( $pad_len - length( $text ) ); |
791 | my $padded = $text . $pad_chr x ( $pad_len - length($text) ); |
| 673 | return $padded; |
792 | return $padded; |
| 674 | }
|
793 | }
|
| 675 | 794 | ||
| 676 | #################
|
795 | #################
|
| 677 | # centre a string
|
796 | # centre a string
|
| 678 | #################
|
797 | #################
|
| 679 | sub centred { |
798 | sub centred
|
| - | 799 | {
|
|
| 680 | my ($text, $width) = @_; # text to centre, size of field to centre in |
800 | my ( $text, $width ) = @_; # text to centre, size of field to centre in |
| 681 | my $pad_len = ($width - length($text)) / 2; #/ |
801 | my $pad_len = ( $width - length($text) ) / 2; #/ |
| 682 | my $centred = " " x $pad_len . $text; |
802 | my $centred = " " x $pad_len . $text; |
| 683 | return $centred; |
803 | return $centred; |
| 684 | }
|
804 | }
|
| 685 | 805 | ||
| 686 | ##########################
|
806 | ##########################
|
| 687 | # put commas into a number
|
807 | # put commas into a number
|
| 688 | ##########################
|
808 | ##########################
|
| 689 | sub commify { |
809 | sub commify
|
| - | 810 | {
|
|
| 690 | $_ = shift; |
811 | $_ = shift; |
| 691 | 1 while s/^(-?\d+)(\d{3})/$1,$2/; |
812 | 1 while s/^(-?\d+)(\d{3})/$1,$2/; |
| 692 | return $_; |
813 | return $_; |
| 693 | }
|
814 | }
|
| 694 | 815 | ||
| 695 | #########################
|
816 | #########################
|
| 696 | # clean
|
817 | # clean
|
| 697 | #########################
|
818 | #########################
|
| 698 | sub clean { |
819 | sub clean
|
| - | 820 | {
|
|
| 699 | my $dirty = shift; |
821 | my $dirty = shift; |
| 700 | my $clean = $dirty; |
822 | my $clean = $dirty; |
| 701 | $clean =~ s/^\s*//; |
823 | $clean =~ s/^\s*//; |
| 702 | $clean =~ s/\s*$//; |
824 | $clean =~ s/\s*$//; |
| 703 | 825 | ||
| 704 | return $clean; |
826 | return $clean; |
| 705 | }
|
827 | }
|
| 706 | 828 | ||
| - | 829 | sub usage
|
|
| - | 830 | {
|
|
| 707 | 831 | ||
| 708 | sub usage { |
- | |
| 709 | - | ||
| 710 | print "usage: newstat.pl newsgroupname\n"; |
832 | print "usage: newstat.pl newsgroupname\n"; |
| 711 | exit 1; |
833 | exit 1; |
| 712 | }
|
834 | }
|
| 713 | 835 | ||
| 714 | ###################################
|
836 | ###################################
|
| 715 | # Write data structures to a file #
|
837 | # Write data structures to a file #
|
| 716 | ###################################
|
838 | ###################################
|
| 717 | sub writedata { |
839 | sub writedata
|
| - | 840 | {
|
|
| 718 | open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
841 | open OUTF, ">/tmp/XDATA" or die "Can't create XDATA: $!\n"; |
| 719 | print OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
842 | print OUTF "Data collected from alt.os.linux.mandrake\n\n"; |
| - | 843 | print OUTF |
|
| 720 | print OUTF "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
844 | "Poster Data\nname : agent : count : size: orig : quoted : per cent\n"; |
| 721 | foreach my $name (keys %data) { |
845 | foreach my $name ( keys %data ) |
| - | 846 | {
|
|
| - | 847 | print OUTF |
|
| 722 | print OUTF "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
848 | "$name : $data{$name}{agent} : $data{$name}{count} : $data{$name}{size} : $data{$name}{orig} : $data{$name}{quoted} : $data{$name}{percent}\n"; |
| 723 | }
|
849 | }
|
| - | 850 | print OUTF |
|
| 724 | print OUTF "============================================================================\n"; |
851 | "============================================================================\n"; |
| 725 | print OUTF "Thread subjects\n"; |
852 | print OUTF "Thread subjects\n"; |
| - | 853 | print OUTF |
|
| 726 | print OUTF "----------------------------------------------------------------------------\n"; |
854 | "----------------------------------------------------------------------------\n"; |
| 727 | foreach my $thread (sort {"\L$a" cmp "\L$b"} keys %threads) { |
855 | foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads ) |
| - | 856 | {
|
|
| 728 | print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
857 | print OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n"; |
| 729 | }
|
858 | }
|
| - | 859 | print OUTF |
|
| 730 | print OUTF "============================================================================\n"; |
860 | "============================================================================\n"; |
| 731 | print OUTF "Cross-posts\n"; |
861 | print OUTF "Cross-posts\n"; |
| - | 862 | print OUTF |
|
| 732 | print OUTF "----------------------------------------------------------------------------\n"; |
863 | "----------------------------------------------------------------------------\n"; |
| 733 | foreach my $name (sort keys %crossposts) { |
864 | foreach my $name ( sort keys %crossposts ) |
| - | 865 | {
|
|
| 734 | print OUTF "$name : $crossposts{$name}\n"; |
866 | print OUTF "$name : $crossposts{$name}\n"; |
| 735 | }
|
867 | }
|
| 736 | print OUTF |
868 | print OUTF print OUTF |
| 737 | print OUTF "============================================================================\n"; |
869 | "============================================================================\n"; |
| 738 | print OUTF "User agents\n"; |
870 | print OUTF "User agents\n"; |
| - | 871 | print OUTF |
|
| 739 | print OUTF "----------------------------------------------------------------------------\n"; |
872 | "----------------------------------------------------------------------------\n"; |
| 740 | foreach my $name (sort keys %agents) { |
873 | foreach my $name ( sort keys %agents ) |
| - | 874 | {
|
|
| 741 | print OUTF "$name : $agents{$name}\n"; |
875 | print OUTF "$name : $agents{$name}\n"; |
| 742 | }
|
876 | }
|
| - | 877 | print OUTF |
|
| 743 | print OUTF "============================================================================\n"; |
878 | "============================================================================\n"; |
| 744 | print OUTF "Time zones\n"; |
879 | print OUTF "Time zones\n"; |
| - | 880 | print OUTF |
|
| 745 | print OUTF "----------------------------------------------------------------------------\n"; |
881 | "----------------------------------------------------------------------------\n"; |
| 746 | foreach my $name (sort keys %tz) { |
882 | foreach my $name ( sort keys %tz ) |
| - | 883 | {
|
|
| 747 | print OUTF "$name : $tz{$name}\n"; |
884 | print OUTF "$name : $tz{$name}\n"; |
| 748 | }
|
885 | }
|
| 749 | close OUTF; |
886 | close OUTF; |
| 750 | } # writedata |
887 | } # writedata |