#!/usr/bin/perl -w

## $Id: tkl-stat,v 1.9 2005/06/30 14:08:40 sondberg Exp $
## ------------------------------------------------------
## Keystone statistical tool
##
## Copyright (c) 2002-2005, Index Data.

use TKL;
use TKL::Settings qw(:tkl_config);
use TKL::Log qw(:log_levels);
use strict;

my ($my_self) = ($0 =~ /([^\/]*)$/);

tkl_set_config(	usage           => \&usage,
		local		=> "config_stat.tkl",
		assign		=> [	"root=s",
			  		"path=s",
                                        "which_agent=s",
					"help",
					"output=s",
					"base_url=s",
					"from=s",
					"log=s",
					"nomail",
					"message=s",
					"email_subject=s",
					"debug"]	);

chomp( my $current_dir = `pwd` );
my $version = '1.1';
my $root = tkl_read_config('root') || $current_dir;
my $subdir = tkl_read_config('path') || '';
my $smtp_agent = tkl_read_config('which_agent') || undef;
my $debug = tkl_read_config('debug') || 0;
my $help = tkl_read_config('help') || 0; 
my $stat_spec = tkl_read_config('output') || "$root/stat.tkl";
my $no_mail = tkl_read_config('nomail') || 0;
my $logfile = tkl_read_config('log') || "/var/log/tkl/stat.log";
my $baseurl = tkl_read_config('base_url') || "http://no.base.url/specified";
my $email_from = tkl_read_config('from') || "tkl-stat <urlcheck\@localhost>";
my $mail_subject = tkl_read_config('email_subject') || "No header";
my $mail_message = tkl_read_config('message') || "No message";
my $default_user;
my $tkl = new TKL (root => $root);
my $logger = new TKL::Log(logfile => $logfile, level => $debug ? tkl_log_level_all : tkl_log_level_norm);
my $confirm_count = 0;
my $total_size = 0;
my ($day, $month, $year, $sec, $min, $hour) = (localtime)[3, 4, 5, 0, 1, 2];
my $tm = sprintf( "%04d-%02d-%02d, %02d:%02d:%02d", $year + 1900, $month + 1,
                  $day, $hour, $min, $sec);


usage() if $help;

$logger->log(tkl_log_log, "Welcome to $my_self/$version");
$logger->log(tkl_log_log, "Scanning TKL portal: $root");

scan_dir( $subdir, my $docs = [], my $confirm = {} );

$logger->log( tkl_log_log, 'Found ', scalar @$docs, ' documents' );
$logger->log( tkl_log_log, 'Updating report: ' . $stat_spec );

update_report( $docs );
emit_emails( $confirm );


sub check_file {
    my ($file, $doc_list, $href) = @_;
    my $doc = $file->document_element( );
    my $creator = $doc->getAttribute( 'creator' );
    my $modifier = $doc->getAttribute( 'modifier' );
    my $modified = $file->get_timestamp('modified');
    my $confirm = $doc->getAttribute( 'confirm' );
    my $schema = $file->extract_schema;
    my $size = ( -s $file->filename );
    my $record = {      'xsd'   => $schema,
                        'file'  => $file->portal_filename,
                        'size'  => $size };
    
    my $responsible = undef;
    
    $creator = defined( $creator ) && length( $creator ) ? $creator : 'admin';
    $total_size += $size;

    if ( defined( $modifier ) ) {
        if ( $modifier eq 'admin' ) {
            $responsible = $creator;
        } else {
            $responsible = $modifier;
        }
    } else {
        $responsible = $creator;
    }

    $record->{'user'} = $responsible;
    
    if ( !defined( $modified ) ) {
        $modified = time( ) - (-M $file->filename) * 24 * 60 * 60;
    }

    if ( $confirm ) {
        if ( ($modified + $confirm * 86400) <= time( ) ) {
            if ( !defined( $href->{$responsible} ) ) {
                $href->{$responsible} = [];
            }

            push @{ $href->{$responsible} }, $file;
            $confirm_count ++;
        }
    }

    push @$doc_list, $record;
}


sub scan_dir {
    my ($dir, $doc_list, $href) = @_;
    my $content = $tkl->browse($dir);
    my $files = $content->{files};
    my $dirs = $content->{dirs};
    
    $logger->log(tkl_log_log, "Scanning directory: $dir");

    foreach my $file (@$files) {
        check_file($file, $doc_list, $href);
    }

    foreach my $subdir (@$dirs) {
        scan_dir("$dir/$subdir", $doc_list, $href);
    }
}


sub emit_emails {
    my ( $verify ) = @_;
    my $default_user = 'admin';
    
    foreach my $user (keys %$verify) {
        my @body = ( $mail_message, "\n" );
        my $tkl_user = undef;

        foreach my $file ( @{$verify->{$user}} ) {
            push @body, '  ' . $baseurl . $file->portal_filename . "?edit=1\n";
            $logger->log( tkl_log_debug, 'Found document: ',
                          $file->portal_filename );
        }

        if ( $tkl_user = $tkl->get_user_info( $user ) ) {
            $logger->log( tkl_log_log, 'Sending mail to ' , $user );
        } else {
            $logger->log( tkl_log_warn, 'Unable to find user: ', $user );
            $logger->log( tkl_log_warn, 'Using default user: ', $default_user );
            $tkl_user = $tkl->get_user_info( $default_user );
        }

        if ( !$no_mail ) {
            if ( defined( $smtp_agent ) ) {
                if ( -x $smtp_agent ) {
                    $tkl_user->{mailer} = $smtp_agent;
                } else {
                    $logger->log( tkl_log_warn, 'No such mailer: ' .
                                  $smtp_agent );
                }
            }
            
            $tkl_user->send_mail( $mail_subject, @body );
        }
    }
}


sub update_report {
    my ( $docs ) = @_;

    if ( -f $stat_spec ) {
        system( "cp $stat_spec $stat_spec.old" );
    }

    if ( open( FH, '>' . $stat_spec  ) ) {
        print FH $TKL::xml_header, "\n<stat created=\"$tm\" ",
                 "size=\"$total_size\">\n";

        foreach my $rec ( @$docs ) {
            print FH "  <record>\n";

            foreach my $key ( keys %$rec ) {
                my $content = defined( $rec->{$key} ) ? $rec->{$key} : '';
                
                print FH "    <$key>$content</$key>\n";
            }
            
            print FH "  </record>\n";
        }

        print FH "</stat>\n";
        close FH;
    } else {
        $logger->log( tkl_log_warn, 'Unable to open file ', $stat_spec,
                                    ' for writing: ', $! );
    }
}


sub usage {
    print STDERR "Usage: $my_self -r tkl-portal-root [options]\n\n";
    print STDERR "Options:\n";
    print STDERR "  -r path             TKL portal root\n";	
    print STDERR "  -d                  Debugging mode\n";
    print STDERR "  -h                  Shows this information\n";
    print STDERR "  -o file             Use alternative stat.tkl\n";
    print STDERR "  -b base-URL         What is the portal base URL?\n";
    print STDERR "  -n                  Don't attempt to send any mails\n";
    print STDERR "  -w /path/to/agent   Use alternative SMTP agent\n";
    print STDERR "  -l logfile          Specify an alternative logfile\n";
    print STDERR "  -p sub-directory    Restrict scanning to sub-directory of portal\n";
    print STDERR "  -m email_message    Verify email message\n";
    print STDERR "  -e email_subject    Verify email subject\n";
    print STDERR "  -f email_from_addr  What from-address should be used in email?\n\n";
    exit(0);
}


__END__

=head1 NAME

tkl-stat - Statistical tool for the TKL portal framework.

=head1 SYNOPSIS

  % tkl-stat -r /my/tkl/portal/root

=head1 DESCRIPTION

This tool runs through a TKL/Keystone portal and looks for tkl documents with
the confirm attribute set to a non-vanishing value. The value is interpreted as
a time period. For each such document, the time-stamp is checked, and authors of
documents older than the confirm period will kindly by email be asked to
check the document content and press save.

The author of the document is the person who did the latest editing of the
document (modifier). If the modifier user happens to be admin, the creator
user is contacted instead.

=head2 Options

  -r path             TKL portal root (root)
  -d                  Debugging mode (debug)
  -h                  Shows this information
  -o file             Use alternative output file, default: stat.tkl
                      (output)
  -b base-URL         What is the portal base URL? (base_url)
  -n                  Don't attempt to send any mails (nomail)
  -w /path/to/agent   Use alternative SMTP agent (which_agent)
  -l logfile          Specify an alternative logfile (log)
  -p sub-directory    Restrict scanning to sub-directory of portal (path)
  -m email_message    Verify email message (message)
  -e email_subject    Verify email subject (email_subject)
  -f email_from_addr  What from-address should be used in email? (from)

Keywords in brackets corresponds to settings in the config_stat.tkl
configuration file.

=head1 CONFIGURATION

Apart from the command line switches listed above, you can specify all the
settings in the tkl-stat configuration file called config_stat.tkl which
should be placed in the TKL/Keystone portal root.

The structure of this configuration file is very similar to the structure
of tkl.config. In general, settings specified in config_stat.tkl will
over-write settings in tkl.config for tkl-stat, where there is an overlap.
You can specify these settings in config_stat.tkl:

  <?xml version="1.0">
  <config>
    <!-- path restricts tkl-stat to scan this sub-directory of the
         Keystone portal root
    -->
    <setting name="path"
      value="/subdir/of/keystone/portal/root"/>

    <!-- which_agent tells tkl-stat to use a different SMTP mailer
         than the /usr/sbin/sendmail, for instance on Solaris
    -->
    <setting name="which_agent"
      value="/my/own/smtp_mailer"/>

    <!-- output is a absolute path to the place to store stat.tkl,
         the statistical data collected by tkl-stat
    -->
    <setting name="output"
      value="/somewhere/some_file.xml"/>

    <!-- What is the base URL needed to reach all the tkl files
         in your Keystone portal?
    -->
    <setting name="base_url"
      value="http://my.keystone.portal.org/xxx/yyy"/>

    <!-- The email address occurring in the Reply-to field in the
         verification emails
    -->
    <setting name="from"
      value="keystone@my.keystone.portal.org"/>

    <!-- Put log information in a customized file using this setting
    -->
    <setting name="log"
      value="/log_partition/keystone/stat.log"/>

    <!-- nomail is specified if you don't want tkl-stat to emit any
         emails at all, but only want it to update statistics
    -->
    <setting name="nomail"
      value="1"/>

    <!-- message should contain a text which will be put in the body
         of the verification email in front of the list of the hyperlinks
    -->
    <setting name="message"
      value="Some of your records have exceeded the verification period"/>

    <!-- What should be the subject of the verification emails?
    -->
    <setting name="email_subject"
      value="Time to check some Keystone records"/>

    <!-- Specifying the debug setting will make tkl-stat log extra
         information into its logfile
    -->
    <setting name="debug"
      value="1"/>
  </config>
                      

=head1 FILES

The main configuration file of this tools is config_stat.tkl placed in the
portal root. Settings not specified on the command line or in config_stat.tkl
will be looked for in tkl.config.

=head1 AUTHOR

Anders Snderberg Mortensen <sondberg@indexdata.dk>
Indexdata, Copenhagen, Denmark
2005/05/20

=head1 SEE-ALSO

Man-pages for the various TKL:: packages.

=cut
