#!/usr/bin/perl # --------------------------------------------------------------------------------------------- # # snapshot_stats.pl --- Take a SnapShot of Popfile's Classification Stats # # This program authored by Scott W Leighton (helphand@pacbell.net) # based upon the Popfile project, which is Copyrighted # by John Graham-Cumming. The author hereby contributes this code # to the Popfile project under the terms of the Popfile License # Agreement. /Scott W Leighton/ May 25, 2003 # # History - May 25, 2003 - original release # May 26, 2003 - Minor cleanup, added some comments # May 27, 2003 - Added options for overriding separator and quoting # May 30, 2003 - Enhanced version check # June 22, 2003 - Major re-work for v 0.20.0 POPFile changes # June 29, 2003 - Made backward compatible with v 0.19.0 # # Popfile and Components # Copyright (c) 2001-2003 John Graham-Cumming # # --------------------------------------------------------------------------------------------- use strict; my %components; my $time = localtime; # --------------------------------------------------------------------------------------------- # # load_modules # # Called to load specific POPFile loadable modules (implemented as .pm files with special # comment on first line) in a specific subdirectory # # $directory The directory to search for loadable modules # $type The 'type' of module being loaded (e.g. proxy, core, ui) which is used # below when fixing up references between modules (e.g. proxy modules all # need access to the classifier module) # $module The specific module name to be loaded. # # --------------------------------------------------------------------------------------------- sub load_modules { my ( $directory, $type, $module ) = @_; $module = $directory . '/' . $module; if ( open MODULE, "<$module" ) { my $first = ; close MODULE; if ( $first =~ /^# POPFILE LOADABLE MODULE/ ) { require $module; $module =~ s/\//::/; $module =~ s/\.pm//; my $mod = new $module; my $name = $mod->name(); $components{$type}{$name} = $mod; } } } # # main # my $snap_time = time; my $time = localtime; # # Main # # # Load the modules we'll be using # load_modules( 'POPFile', 'core', 'Configuration.pm' ); load_modules( 'POPFile', 'core', 'Logger.pm' ); load_modules( 'POPFile', 'core', 'MQ.pm' ); load_modules( 'Classifier', 'classifier', 'Bayes.pm' ); # Do not run if we are not on version 0.19.0 or higher if ($components{core}{config}->isa ('POPFile::Module') && $components{core}{config}->can ( 'parameter' ) ) { # link each of the objects with the configuration object and # the logger and the mq foreach my $type (keys %components) { foreach my $name (keys %{$components{$type}}) { $components{$type}{$name}->configuration($components{core}{config}); $components{$type}{$name}->logger($components{core}{logger}) if ( $name ne 'logger' ); $components{$type}{$name}->mq($components{core}{mq}) if ( $components{$type}{$name}->can ( 'mq' ) ); } } # # Tell each module to initialize itself # foreach my $type (keys %components) { foreach my $name (keys %{$components{$type}}) { if ($components{$type}{$name}->initialize() == 0 ) { die "Failed to start while initializing the $name module\n"; } } } # Ensure that a snapshot subdirectory exists to hold any error log # that logger might generate so we don't interfere with a # running POPFile. mkdir ( 'snapshot' ); # Set default quote and separator characters $components{core}{config}->parameter('csv_quote',''); $components{core}{config}->parameter('csv_separator',','); # Load in the Popfile configuration parameters, any configured # ones will override the initialized default values # NOTE: We are intentially NOT saving this configuration # back to disk since the parameters we are allowing # for this program are NOT legal Popfile parameters. $components{core}{config}->load_configuration(); # override the logdir and piddir so we don't mess with # the production ones $components{core}{config}->parameter('logger_logdir','snapshot/'); $components{core}{config}->parameter('config_piddir','snapshot/'); # Now grab any commandline parameters, they will override # the defaults and those in popfile.cfg. As a byproduct, # if the user overrides our csv_quote or csv_separator # parameter, this will pick it up for us. $components{core}{config}->parse_command_line(); # force logger to recognize the new logdir before we startup # the modules. That way we will not inadvertently log to the # production POPFile log. $components{core}{logger}->service(); # now that the configuration is established, tell each module # to start foreach my $type (keys %components) { foreach my $name (keys %{$components{$type}}) { if ($components{$type}{$name}->start() == 0 ) { die "Failed to start while starting the $name module\n"; } } } # Check for existing CSV file, if present open in append mode # if not, then create it and output the header row. my $fn = 'snapshot_stats.csv'; if (-s $fn) { open CSV, ">>$fn" or die "Unable to open ${fn} :$!\n"; } else { open CSV, ">$fn" or die "Unable to open ${fn} :$!\n"; print CSV join ( $components{core}{config}->parameter("csv_separator"), wrap_in_quotes($components{core}{config}->parameter("csv_quote"), qw ( BucketName BucketColor UnixTimestamp Timestamp BucketUniqueWords BucketWordCount BucketMailsClassified BucketFalsePositives BucketFalseNegatives GlobalWordCount GlobalDownloads GlobalMessages GlobalErrors LastResetDate ) )); print CSV "\n"; } # Get the buckets, then iterate thru them and output the stats # for each bucket by appending to CSV file my @buckets = $components{classifier}{bayes}->get_buckets(); foreach my $bucket (@buckets) { print CSV join ( $components{core}{config}->parameter('csv_separator'), wrap_in_quotes($components{core}{config}->parameter('csv_quote'), ( $bucket, $components{classifier}{bayes}->get_bucket_color($bucket), $snap_time, $time, $components{classifier}{bayes}->get_bucket_unique_count($bucket), $components{classifier}{bayes}->get_bucket_word_count($bucket), $components{classifier}{bayes}->get_bucket_parameter($bucket,'count'), $components{classifier}{bayes}->get_bucket_parameter($bucket,'fpcount'), $components{classifier}{bayes}->get_bucket_parameter($bucket,'fncount'), $components{classifier}{bayes}->get_word_count(), $components{core}{config}->parameter('GLOBAL_download_count'), $components{core}{config}->parameter('GLOBAL_mcount'), $components{core}{config}->parameter('GLOBAL_ecount'), $components{core}{config}->parameter('html_last_reset') ) )); print CSV "\n"; } close CSV; # # Cleanup - Get rid of the popfile.pid file created by the configuration # module. # unlink($components{core}{config}->parameter('config_piddir') . 'popfile.pid'); # All Done } else { print "$0 is compatible only with Popfile version 0.19.0 or above\n"; } # # Routine to wrap array values in quotes # sub wrap_in_quotes { my ($default_quote, @list) = @_; my @newlist; for (@list) { push @newlist,$default_quote . $_ . $default_quote; } return @newlist; }