package TKL;

## $Id: TKL.pm,v 1.22 2006/05/22 09:13:45 sondberg Exp $

use XML::LibXML;
use TKL::File;
use TKL::User;
use Carp;
use Cwd;
use strict;

our $xml_header = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" standalone=\"yes\"?>";
our $tkl_suffix = "tkl";
our $schema_suffix = "xsd";
our $userlist = "users.tkl";
our $schema_NS_URI = "http://www.w3.org/2001/XMLSchema";
our $uri_element_type = "xs:anyURI";
our $VERSION = "1.5.5";
our $ABSTRACT = "Base class for the TKL perl packages.";

sub new {
    my ($class, @rest) = @_;
    my %args = (tkl_config	=> "tkl.config",
    		oai_dir		=> "oai",
		schema_dir	=> "schemas",
		@rest);
    my $self = \%args;

    bless $self, $class;
    $self->find_portal_root unless defined($self->{root});
    $self->{libxml} = new XML::LibXML;

    return $self;
}


sub dom {
    return shift->{libxml};
}


sub document_element {
    my ($self, $xml_file) = @_;
    my $doc;
    
    eval {
        $doc = $self->dom->parse_file($xml_file);
    };

    if ( $@ ) {
        return 0;
    } else {
        return $doc->documentElement;
    }
}
    

sub check_path {
    my ($self, $path) = @_;

    if ($ENV{MOD_PERL}) {	## If running in MOD_PERL environment, use Apache docroot
	$path = $ENV{DOCUMENT_ROOT} . "/$path";
    }
    $path = $self->normalize_file($path);
    my @steps = split /\//, $path;
    while (@steps) {
	my $root_cand = join "/", @steps;
	my $tkl_config = "$root_cand/tkl.config";
	if (-f $tkl_config) {
	    return $root_cand;
	}
	pop @steps;
    }
    return undef;
}


sub tkl_die {
    my ($self, @msg) = @_;

    print STDERR "Fatal error: $0\n\n";
    foreach (@msg) {
	print STDERR "$_\n";
    }
    exit(1);
}


sub find_portal_root {
    my $self = shift;
    
    return $self->{root} if defined($self->{root});
    if (defined($self->{path})) {
	my $root = $self->check_path($self->{path});
	if (defined($root)) {
	    return $self->{root} = $root;
	} else {
	    $self->tkl_die("Out of portal scope: '" . $self->{path} . "'\n\n");
	}
    }
    my $pwd = cwd;
    my @path = split /\//, $pwd;
    while (@path) {
	my $super_dir = join "/", @path;
	my $file = "$super_dir/" . $self->{tkl_config};
	if (-f $file) {
	    return $self->{root} = $super_dir;
	}
	pop(@path);
    }
    $self->tkl_die("Unable to find portal root");
}
    
sub get_user_info {
    my ($self, $user) = @_;
    my $root = $self->find_portal_root;
    my $user_file = "$root/$userlist";
    my $users = $self->{parsed_users};

    unless (defined($users)) {
	unless (-f $user_file) {
	    $self->tkl_die("Unable to find user file '$user_file'");
	}
	$users = $self->{parsed_users} = new TKL::File(file => $user_file, root => $root);
    }

    my $user_doc_elem = $users->document_element;
    my ($user_node) = $user_doc_elem->findnodes("/users/user[login='$user']");
    if (defined($user_node)) {
	return new TKL::User(user_node => $user_node, root => $self->find_portal_root);
    } else {
	return undef;
    }
}


sub xml_trim {
    my ($self, $cdata) = @_;

    $cdata =~ s/^\s+//g;
    $cdata =~ s/\s+$//g;
    $cdata =~ s/&amp;/&/g;
    $cdata =~ s/&/&amp;/g;
    $cdata =~ s/</&lt;/g;
    $cdata =~ s/>/&gt;/g;

    return $cdata;
}


sub string2xmlstring {
    my ($self, $str) = @_;

    $str =~ s/&/&amp;/g;
    $str =~ s/</&lt;/g;
    $str =~ s/>/&gt;/g;
    $str =~ s/"/&quot;/g;
    $str =~ s/'/&apos;/g;

    return $str;
}


sub dom2hash {
    my ($self, $dom, $href) = @_;
    my $name = $dom->nodeName();
    my $cdata = "";
    my $offset;
    
    if (ref($href->{$name}) eq 'ARRAY') {
	$offset = $#{$href->{$name}} + 1;
    } else {
	$href->{$name} = [];
	$offset = 0;
    }
    foreach my $kid ($dom->childNodes()) {
	my $type = $kid->getType();
	if ($type == XML_ELEMENT_NODE) {
	    my $subtree = $href->{$name}->[$offset]->{nodeset};
	    if (!defined($subtree)) {
		$subtree = $href->{$name}->[$offset]->{nodeset} = {};
	    }
	    $self->dom2hash($kid, $subtree);
	} elsif ($type == XML_TEXT_NODE) {
	    $cdata .= $kid->nodeValue();
	} else {
	    warn "$0 - dom2hash: Uknown node type '$type'";
	}
    }
    $href->{$name}->[$offset]->{content} = $self->xml_trim($cdata);
    if (my @attr = $dom->attributes) {
	foreach my $entry (@attr) {
	    $href->{$name}->[$offset]->{attr}->{$entry->nodeName()} = $entry->nodeValue();
	}
    }
}


sub parse {
    my ($self, $xml_file) = @_;
    my $root = $self->document_element($xml_file);

    $self->dom2hash($root, my $href = {});

    return $href;
}


sub normalize_file {
    my ($self, $file) = @_;
    
    $file =~ s/\/+/\//g;
    return $file;
}


sub get_file {
    my ($self, $file) = @_;
    my $root = $self->find_portal_root;
    my $abs_file = $self->normalize_file("$root/$file");

    return new TKL::File(root => $root, file => $abs_file);
}


sub locate_file {
    my ($self, $file) = @_;
    my $root = $self->find_portal_root;
    my $inside_portal = 1;

    $file = $self->normalize_file("$root/$file");

    my ($base, $f) = ($file =~ /(.*)\/([^\/]*)$/);

    do {
	my $tmp_file = "$base/$f";
	return $tmp_file if -e $tmp_file;
	if (-e "$base/tkl.config") {
	    $inside_portal = 0;
	}
	$base =~ s/\/[^\/]*$//;
    } while (length($base) && $inside_portal);

    return undef;
}
    

sub browse {
    my ($self, $dir) = @_;
    my $files = [];
    my $dirs = [];
    my $root = $self->{root};
    local *DIR;

    $dir = "" unless defined($dir);
    $dir = "$root/$dir/";
    
    opendir DIR, $dir or die "$0 - browse: Unable to open directory '$dir': $!";
    
    while (defined(my $entry = readdir DIR)) {
	next if $entry =~ /^\.\.?/;
        
	my $file = "$dir/$entry";

        next unless -r $file;
        
	if (-d $file) {
	    push @$dirs, $entry;
	} elsif ($entry =~ /\.$tkl_suffix$/) {
	    push @$files, new TKL::File(root => $root, file => $self->normalize_file($file));
	}
    }
    
    closedir DIR or warn "$0 - browse: Unable to close directory '$dir' properly: $!";
    
    return {files => $files, dirs => $dirs};
}



package XML::LibXML::Node;
use XML::LibXML;

## Here goes TKL extensions to the XML::LibXML package
## Methods should be appended with tkl_ to indicate their origin!

sub tkl_cdata {
    my $self = shift;
    my $cdata = "";
    my @children = $self->getChildnodes();

    foreach my $node (@children) {
	my $type = $node->getType();
	if ($type == XML_TEXT_NODE) {
	    $cdata .= $node->getData();
	} elsif ($type == XML_ELEMENT_NODE) {
	    $cdata .= $node->tkl_cdata();
	}
    }

    return $cdata;
}


1;

__END__

=head1 NAME

TKL - Base class of the TKL perl project.

=head1 SYNOPSIS

  use TKL;

  my $tkl = new TKL;
  my $portal_root = $tkl->find_portal_root();

=head1 DESCRIPTION

This is a collection of basic tools for manipulating TKL portals from Perl.
This package is unlikely to be of great interest on its own. You would
propably want to have a look at the more specialized packages, i.e.

 - TKL::File
 - TKL::OAI
 - TKL::User
 - TKL::Apache
 - TKL::Log
 - TKL::Settings
 - TKL::Task
 - TKL::URLcheck

before starting to work directly with TKL.pm. Anyway, all the packages
TKL::* (except TKL::Settings) inherits from TKL.pm and the methods
of this class is definitely worth reading.

=head2 Methods

This is a complete list of the methods provided by the TKL package:

=head3 new

The main constructor of the TKL/Perl framework. It should be called
this way:

  my $tkl = new TKL (setting1 => "xxxx", setting2 => "yyyy");
  
When instantiating a TKL object, the portal root must be specified in
one of the following ways:

 - explicitly by using the setting root => "/path/to/portal/root", or
 - implicitly by specifying the setting path => "/path/to/some/portal/dir", or
 - even more indirectly by invoking the Perl interpreter from within
   the particular TKL portal.

=head3 dom

Returns an instance of the class XML::LibXML.

=head3 document_element

This method returns the document element, i.e. the root XML element, of
the XML document specified. The method should be called this way:

  my $doc_element = $tkl->document_element("my_tkl_file.tkl");

The returned object is of the type XML::LibXML::Element.

=head3 check_path

Check if we're inside a TKL portal area and return the path to the
portal root. The method should be called this way:

  my $portal_root = $tkl->check_path("some/path/to/check");

If no portal root is found, i.e. we were not able to locate tkl.config,
undef is returned. In theory, this method should not be called directly,
since it is taken care of once by the constructor, if the portal
root was not explicitly specified in the "new TKL bla bla bla" line.

=head3 tkl_die

A TKL exception handler. This method is likely to be overwritten by
the subclasses, since the error handling is highly context sensitive.
Call the method this way:

  $tkl->tkl_die("Oups, something went terribly wrong!");

=head3 find_portal_root

Based upon the given context, attempt to find the portal root. If
the root instance attribute is specified, this is returned. Otherwise,
if the path instance attribute is given, try to deduce the portal
root from this information. And finally if nothing is specified,
use the current-working-directory of the Perl interpreter process to
deduce the portal root. The method is called simply by

  my $root = $tkl->find_portal_root();

=head3 get_user_info

Query the TKL admin user database, i.e. the content of the users.tkl
document. The method is called this way:

  my $user = $tkl->get_user_info("some_login_name");

The returned object is of the type TKL::User.

=head3 xml_trim

Convert some completely unknown text-string into something an XML parser
won't reject. The encoding scheme is defensive in the sense that everything
that could possibly give problems is escaped unintelligently. Call the
method like this:

  my $xml_string = $tkl->xml_trim("Unknown &<> &amp;&lt; string buffer");

=head3 string2xmlstring

Converts a non-XML string into XML:

  my $xml_string = $tkl->string2xmlstring("Buffer");

=head3 dom2hash

This method browses through a dom tree and constructs a nested hash. The
method is called this way:

  $tkl->dom2hash($node, my $href = {});

where the structure of $href is the following (FIXME: write example).

=head3 parse

This method essentially does the same as dom2hash except that the syntax
is simplified. It should be called this way:

  my $href = $tkl->parse("file_name.tkl");

=head3 normalize_file

Takes a file or path as argument and removes multiple /'es. The syntax
is this:

  my $normalized_version = $tkl->normalize_file("/tmp//subdir///test");

=head3 get_file

Open a portal TKL file and return a TKL::File object representing it:

  my $tkl_file = $tkl->get_file("/subdir1/subdir2/my_file.tkl");

The path to the file should be given relative to the portal root.

=head3 locate_file

Look for a file, and if it does not exists, look for it one directory
level up, and etc. This process continues until (a) the file was found,
in which case the absolute path to the file is returned, or (b) the
TKL portal root was reached, where undef is returned. The method is
called this way:

  my $filename = $tkl->locate_file("/subdir1/subdir2/candidate_file");

=head3 browse

Browse a directory for files and subdirectories. The method should be
called like this:

  my $result = $tkl->browse("subdir");

where subdir is the directory to browse relative to the TKL portal root.
The method returns a referenced hash with the following structure:

  $result = {
      		'files'	=> [$file1, $file2, ... ],
		'dirs'	=> ['dir1', 'dir2', ... ]
	    };
	    
where $file1 and $file2 ... are objects of the type TKL::File.

=head1 AUTHOR

Anders Snderberg Mortensen <sondberg@indexdata.dk>
Indexdata
2004/01/05

=head1 SEE-ALSO

Man-pages for the classes XML::LibXML::* and TKL::*.

=cut
