Subversion Repositories LCARS

Rev

Rev 8 | Rev 10 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 8 Rev 9
Line 1... Line 1...
1
#!/usr/bin/env perl
1
#!/usr/bin/env perl
2
use strict;
2
use strict;
3
use warnings;
3
use warnings;
4
use utf8;
4
use utf8;
-
 
5
use encoding 'utf-8';
5
use Encode;
6
use Encode;
6
7
7
###########################
8
###########################
8
# newsstat.pl version 0.4.1
9
# newsstat.pl version 0.4.1
9
10
Line 18... Line 19...
18
# (Numbers and paths can be configured below.  -- PE)
19
# (Numbers and paths can be configured below.  -- PE)
19
############################################################################
20
############################################################################
20
21
21
############################################################################
22
############################################################################
22
#                       RECENT CHANGES                                     #
23
#                       RECENT CHANGES                                     #
-
 
24
# 2011-10-03  PE  - Use more compatible shebang
-
 
25
#                 - Fixed some Perl::Critic-ized code
-
 
26
#                 - Fixed wrong indent for non-ASCII names (TODO: proper dot count)
-
 
27
#                 - Formatted source code
23
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
28
# 2011-07-03  PE  - Use Encode to decode/encode MIME encodings
24
#                 - Use warnings, utf8 (just in case)
29
#                 - Use warnings, utf8 (just in case)
25
#                 - Documentation update
30
#                 - Documentation update
26
# N/A         NN  - Take newsgroup name as argument
31
# N/A         NN  - Take newsgroup name as argument
27
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
32
# 2004-06-19  NN  - newsgroup name is $ARGV[0]
Line 237... Line 242...
237
    $count = $topposters;
242
    $count = $topposters;
238
  }
243
  }
239
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
244
  printf "%s\n", &centred( "Top $count posters by number of articles", 76 );
240
  print "=" x 76, "\n";
245
  print "=" x 76, "\n";
241
  $i = 0;
246
  $i = 0;
-
 
247
  foreach
242
  foreach my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
248
    my $poster ( sort { $data{$b}{count} <=> $data{$a}{count} } keys %data )
243
  {
249
  {
244
    my $name = substr( $poster, 0, 65 );
250
    my $name = substr( $poster, 0, 65 );
245
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
251
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $poster, 63, "." ),
246
      $data{$poster}{count};
252
      $data{$poster}{count};
247
    last if ( ++$i == $count );
253
    last if ( ++$i == $count );
Line 290... Line 296...
290
  }
296
  }
291
  printf "%s\n",
297
  printf "%s\n",
292
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
298
    &centred( "Top $count responders by original text (> 5 posts)", 76 );
293
  print "=" x 76, "\n";
299
  print "=" x 76, "\n";
294
  $i = 0;
300
  $i = 0;
-
 
301
  foreach my $poster (
295
  foreach my $poster ( sort { $data{$b}{percent} <=> $data{$a}{percent} }
302
    sort { $data{$b}{percent} <=> $data{$a}{percent} }
296
    keys %data )
303
    keys %data
-
 
304
    )
297
  {
305
  {
298
    next if $data{$poster}{quoted} == 0;
306
    next if $data{$poster}{quoted} == 0;
299
    next if $data{$poster}{count} < 5;
307
    next if $data{$poster}{count} < 5;
300
    my $name = substr( $poster, 0, 63 );
308
    my $name = substr( $poster, 0, 63 );
301
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
309
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
Line 320... Line 328...
320
  }
328
  }
321
  printf "%s\n",
329
  printf "%s\n",
322
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
330
    &centred( "Bottom $count responders by original text  (> 5 posts)", 76 );
323
  print "=" x 76, "\n";
331
  print "=" x 76, "\n";
324
  $i = 0;
332
  $i = 0;
-
 
333
  foreach my $poster (
325
  foreach my $poster ( sort { $data{$a}{percent} <=> $data{$b}{percent} }
334
    sort { $data{$a}{percent} <=> $data{$b}{percent} }
326
    keys %data )
335
    keys %data
-
 
336
    )
327
  {
337
  {
328
    next if $data{$poster}{quoted} == 0;
338
    next if $data{$poster}{quoted} == 0;
329
    next if $data{$poster}{count} < 5;
339
    next if $data{$poster}{count} < 5;
330
    my $name = substr( $poster, 0, 63 );
340
    my $name = substr( $poster, 0, 63 );
331
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
341
    printf "%2d: %-63s : %02.2f%%\n", $i + 1, rpad( $poster, 63, "." ),
Line 349... Line 359...
349
    $count = $topthreads;
359
    $count = $topthreads;
350
  }
360
  }
351
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
361
  printf "%s\n", &centred( "Top $count threads by no. of articles", 76 );
352
  print "=" x 76, "\n";
362
  print "=" x 76, "\n";
353
  $i = 0;
363
  $i = 0;
-
 
364
  foreach my $thread (
354
  foreach my $thread ( sort { $threads{$b}{count} <=> $threads{$a}{count} }
365
    sort { $threads{$b}{count} <=> $threads{$a}{count} }
355
    keys %threads )
366
    keys %threads
-
 
367
    )
356
  {
368
  {
357
    my $name = substr( $thread, 0, 65 );
369
    my $name = substr( $thread, 0, 65 );
358
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
370
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
359
      $threads{$thread}{count};
371
      $threads{$thread}{count};
360
    last if ( ++$i == $count );
372
    last if ( ++$i == $count );
Line 375... Line 387...
375
    $count = $topthreads;
387
    $count = $topthreads;
376
  }
388
  }
377
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
389
  printf "%s\n", &centred( "Top $count threads by size in KB", 76 );
378
  print "=" x 76, "\n";
390
  print "=" x 76, "\n";
379
  $i = 0;
391
  $i = 0;
-
 
392
  foreach my $thread (
380
  foreach my $thread ( sort { $threads{$b}{size} <=> $threads{$a}{size} }
393
    sort { $threads{$b}{size} <=> $threads{$a}{size} }
381
    keys %threads )
394
    keys %threads
-
 
395
    )
382
  {
396
  {
383
    my $name = substr( $thread, 0, 65 );
397
    my $name = substr( $thread, 0, 65 );
384
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
398
    printf "%2d: %-63s : %6d\n", $i + 1, rpad( $name, 63, "." ),
385
      $threads{$thread}{size} / 1024;    #/
399
      $threads{$thread}{size} / 1024;    #/
386
    last if ( ++$i == $count );
400
    last if ( ++$i == $count );
Line 453... Line 467...
453
    $count = $topagents;
467
    $count = $topagents;
454
  }
468
  }
455
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
469
  printf "%s\n", &centred( "Top $count User Agents by number of posts", 76 );
456
  print "=" x 76, "\n";
470
  print "=" x 76, "\n";
457
  $i = 0;
471
  $i = 0;
-
 
472
  foreach my $agent (
458
  foreach my $agent ( sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
473
    sort { $distinct_agent{$b} <=> $distinct_agent{$a} }
459
    keys %distinct_agent )
474
    keys %distinct_agent
-
 
475
    )
460
  {
476
  {
461
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
477
    printf "%2d: %-58s : %5d (%2.f%%)\n", $i + 1, rpad( $agent, 58, "." ),
462
      $distinct_agent{$agent},
478
      $distinct_agent{$agent},
463
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
479
      ( ( $distinct_agent{$agent} / $totalposts ) * 100 );
464
    last if ( ++$i == $count );
480
    last if ( ++$i == $count );
Line 513... Line 529...
513
  {
529
  {
514
    $latest = $mtime;
530
    $latest = $mtime;
515
  }
531
  }
516
532
517
  # now read the file
533
  # now read the file
518
  open(my $FILE, $filename) or die "Can't open $filename: $!\n";
534
  open( my $FILE, $filename ) or die "Can't open $filename: $!\n";
519
  while (<$FILE>)
535
  while (<$FILE>)
520
  {
536
  {
521
    $totheader += length($_);    # bump total header size
537
    $totheader += length($_);    # bump total header size
522
    last if (/^\s*$/);           # end of header?
538
    last if (/^\s*$/);           # end of header?
523
    if (/^([^:\s]*):\s+(.*)/)
539
    if (/^([^:\s]*):\s+(.*)/)
Line 525... Line 541...
525
      my ( $key, $val ) = ( $1, $2 );
541
      my ( $key, $val ) = ( $1, $2 );
526
      $headers{$key} = decode( 'MIME-Header', $val );
542
      $headers{$key} = decode( 'MIME-Header', $val );
527
      $lcheader{ clean( lc($key) ) } = clean($val);
543
      $lcheader{ clean( lc($key) ) } = clean($val);
528
    }
544
    }
529
  }
545
  }
530
  @body = <$FILE>;                # slurp up body
546
  @body = <$FILE>;               # slurp up body
531
  close($FILE);
547
  close($FILE);
532
}    # getarticle
548
}    # getarticle
533
549
534
###################################
550
###################################
535
# get data from the current article
551
# get data from the current article
Line 537... Line 553...
537
sub getdata
553
sub getdata
538
{
554
{
539
#### First, analyse header fields ####
555
#### First, analyse header fields ####
540
556
541
  # Set up this poster if not defined, get counts, sizes
557
  # Set up this poster if not defined, get counts, sizes
542
  $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
558
  my $poster = encode( 'UTF-8', $headers{From} );    # get the poster's name
543
  if ( !defined( $data{$poster} ) )
559
  if ( !defined( $data{$poster} ) )
544
  {                                               # seen this one before?
560
  {                                                  # seen this one before?
545
    $data{$poster}{agent}  = 'Unknown';           # comes after For: field
561
    $data{$poster}{agent}  = 'Unknown';              # comes after For: field
546
    $data{$poster}{orig}   = 0;
562
    $data{$poster}{orig}   = 0;
547
    $data{$poster}{quoted} = 0;
563
    $data{$poster}{quoted} = 0;
548
  }
564
  }
549
  $data{$poster}{count}++;                        # bump count for this poster
565
  $data{$poster}{count}++;    # bump count for this poster
550
  $data{$poster}{size} += $filesize;              # total size of file
566
  $data{$poster}{size} += $filesize;    # total size of file
551
567
552
  # The User-Agent and/or X-Newsreader fields
568
  # The User-Agent and/or X-Newsreader fields
553
  # for User-Agent by poster
569
  # for User-Agent by poster
554
  if ( defined $lcheader{"user-agent"} )
570
  if ( defined $lcheader{"user-agent"} )
555
  {
571
  {
Line 694... Line 710...
694
  }
710
  }
695
711
696
  # Get the time zone
712
  # Get the time zone
697
  $_ = $headers{"Date"};
713
  $_ = $headers{"Date"};
698
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
714
  my ($tz) = /\d\d:\d\d:\d\d\s+(.*)/;
699
  if ( ($tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
715
  if ( ( $tz =~ /UTC/ ) or ( $tz =~ /GMT/ ) or ( $tz =~ /0000/ ) )
700
  {
716
  {
701
    $tz = "UTC";
717
    $tz = "UTC";
702
  }
718
  }
703
  $tz{$tz}++;
719
  $tz{$tz}++;
704
720
Line 715... Line 731...
715
      # Bill Unruh uses ] quotes, and another poster uses ::
731
      # Bill Unruh uses ] quotes, and another poster uses ::
716
    }
732
    }
717
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
733
    elsif ( /^\s*[>\]]/ or /^\s*::/ )
718
    {                           # are we in a quote line?
734
    {                           # are we in a quote line?
719
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
735
      $data{$poster}{quoted} += length($_);    # bump count of quoted chrs
720
      $totquoted             += length($_);
736
      $totquoted += length($_);
721
    }
737
    }
722
    elsif (/-- /)
738
    elsif (/-- /)
723
    {
739
    {
724
      $insig = 1;
740
      $insig = 1;
725
    }
741
    }
726
    else
742
    else
727
    {
743
    {
728
744
729
      # we must be processing an original line
745
      # we must be processing an original line
730
      $data{$poster}{orig} += length($_);      # bump count of original chrs
746
      $data{$poster}{orig} += length($_);      # bump count of original chrs
731
      $totorig             += length($_);
747
      $totorig += length($_);
732
    }
748
    }
733
  }    # end for (@body)
749
  }    # end for (@body)
734
750
735
}    # getdata
751
}    # getdata
736
752
Line 763... Line 779...
763
  foreach my $poster ( keys %data )
779
  foreach my $poster ( keys %data )
764
  {
780
  {
765
    my $percent = 100;
781
    my $percent = 100;
766
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
782
    if ( ( $data{$poster}{orig} != 0 ) and ( $data{$poster}{quoted} != 0 ) )
767
    {
783
    {
-
 
784
      $percent =
768
      $percent = $data{$poster}{orig} * 100 /
785
        $data{$poster}{orig} * 100 /
769
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
786
        ( $data{$poster}{quoted} + $data{$poster}{orig} );    #/
770
    }
787
    }
771
    elsif ( $data{$poster}{orig} == 0 )
788
    elsif ( $data{$poster}{orig} == 0 )
772
    {
789
    {
773
      $percent = 0;
790
      $percent = 0;
Line 852... Line 869...
852
  print $OUTF "Thread subjects\n";
869
  print $OUTF "Thread subjects\n";
853
  print $OUTF
870
  print $OUTF
854
"----------------------------------------------------------------------------\n";
871
"----------------------------------------------------------------------------\n";
855
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
872
  foreach my $thread ( sort { "\L$a" cmp "\L$b" } keys %threads )
856
  {
873
  {
-
 
874
    print $OUTF
857
    print $OUTF "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
875
      "$thread : $threads{$thread}{count} : $threads{$thread}{size}\n";
858
  }
876
  }
859
  print $OUTF
877
  print $OUTF
860
"============================================================================\n";
878
"============================================================================\n";
861
  print $OUTF "Cross-posts\n";
879
  print $OUTF "Cross-posts\n";
862
  print $OUTF
880
  print $OUTF