<?php
/*
   Copyright (C) 2002-2003 Index Data Aps, www.indexdata.dk

   This file is part of TKLITE.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 dated June, 1991.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   A copy of the GNU General Public License is also available at
   <URL:http://www.gnu.org/copyleft/gpl.html>.  You may also obtain
   it by writing to the Free Software Foundation, Inc., 59 Temple
   Place - Suite 330, Boston, MA 02111-1307, USA.

   $Id: oai.php,v 1.16 2006/03/23 14:26:30 sondberg Exp $
*/

// Todo:
//  - Support for resumptionToken
//  - OAI sets

// Authors: Anders Snderberg Mortensen and Sebastian Hammer

$metadataPrefix = $_REQUEST['metadataPrefix'];

// Here goes the valid verbs as well as the additional parameters, mandatory (1) as well as optional (2)
$verbs = array(
    "ListRecords"	  => array("metadataPrefix"=>1, "from"=>2, "until"=>2, "set"=>2),
    "Identify"		  => array(),
    "GetRecord"		  => array("metadataPrefix"=>1, "identifier"=>1),
    "ListIdentifiers"	  => array("metadataPrefix"=>1, "from"=>2, "until"=>2, "set"=>2),
    "ListMetadataFormats" => array("identifier"=>2),
    "ListSets"		  => array(),
);

// Generate list of supported metadata formats
$repository_info = get_metadata_formats();
$metadataformats = $repository_info['formats'];
$doctypes = $repository_info['doctypes'];

// The XML-header
$xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";

// The OAI-PMH root tag should like this according to OAI/v2
$oai_pmh = <<<END
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
       	 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	 xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
         http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
END;

$repositoryName = 'Unknown';
$id_prefix = "oai:$repositoryName\.tkl\.indexdata\.com";
$adminEmail = 'Unknown@unknown.dk';


function do_oai_verb($verb, $dir) {
    global $oai_pmh;
    global $xml_header;
    global $oai_base_url;
    global $repositoryName;
    global $adminEmail;
    global $from, $until;

    $oai_base_url = "http://" .
	$_SERVER['SERVER_NAME'] .
	($_SERVER['SERVER_PORT'] != 80 ? (":" . $_SERVER['SERVER_PORT']) : '').
	$dir;

    //print "DO_OAI: v=$verb, d=$dir";

    if (!file_exists('tkl.config')) {
	header("HTTP/1.0 400 OAI Requests must be directed at portal root.");
	echo("HTTP/1.0 400 OAI Requests must be directed at portal root.");
	die;
    }

    if ($d = domxml_open_file('index.tkl')) {
	$doc = $d->document_element();
	if (list($title) = dom_select_ext($doc, array('name'))) {
	    $text= $title->first_child();
	    $repositoryName = $text->node_value();
	}
	if (list($creator) = dom_select_ext($doc, array('creator'))) {
	    $adminEmail = $creator->get_attribute('email');
	}
    }
    
    header("Content-Type: text/xml");
    
    echo $xml_header, "\n", $oai_pmh, "\n";
    echo "<responseDate>", get_utc(), "</responseDate>\n";
    echo "<request>$oai_base_url</request>\n";
    
    if (sanity_check()) {
	$verb();
    }
    
    echo "</OAI-PMH>\n";
    die;
}


// Check if we have a valid OAI request
function sanity_check () {
    global $verbs;
    global $verb;
    global $from;
    global $until;
    global $metadataPrefix, $metadataformats;
    global $identifier;
    global $repositoryName;
    global $id_prefix;
    
    $required = $verbs[$verb];
    
    // Check for multiple arguments in URI (this must be a joke - but it isn't!)
    $fields = preg_split("/&/", $_SERVER['QUERY_STRING']);
    $args = array();
    foreach ($fields as $field) {
	if (preg_match("/(.*?)=/", $field, $match)) {
	    $arg_name = $match[1];
	    if ($args[$arg_name]) {
		echo "<error code=\"badArgument\">Repeated $arg_name argument</error>\n";
		return 0;
	    } else {
		$args[$arg_name] = 1;
	    }
	}
    }

    if (!(is_array($verbs[$verb]) && function_exists($verb))) {
	echo "<error code=\"badVerb\">Unrecognized OAI verb</error>\n";
	return 0;
    }

    foreach ($required as $p => $type) {		// Check that required arguments are there
        if (($type == 1) && empty($_REQUEST[$p])) {
	    echo "<error code=\"badArgument\">$p must be specified</error>\n";
	    //echo "<error code=\"badArgument\">Missing $p argument to $verb</error>\n";
	    return 0;
	}
    }
    
    // Check if we have unexpected arguments...
    foreach (array_merge($_GET, $_POST) as $req_name => $req_val) {
	if (!$verbs[$verb][$req_name] && strtolower($req_name) != "verb") {
	    echo "<error code=\"badArgument\">Illegal argument</error>\n";
	    return 0;
	}
    }
    
    // Check date pattern...
    if (strlen($from) && !check_date_format($from)) {
	echo "<error code=\"badArgument\">Bad from argument</error>\n";
	return 0;
    }
    if (strlen($until) && !check_date_format($until)) {
	echo "<error code=\"badArgument\">Bad until argument</error>\n";
	return 0;
    }

    // Check if we can provide the data in the specified metadata format...
    if (strlen($metadataPrefix)) {
	if (!is_array($metadataformats[$metadataPrefix])) {
	    echo "<error code=\"cannotDisseminateFormat\">Unknown format.</error>\n";
	    return 0;
	}
    }

    // Verify format of Identifier (if specified) ...
    if (strlen($identifier)) {
	if (!preg_match("/^$id_prefix:[A-Za-z0-9_\/-]*\.tkl/", $identifier)) {
	    echo "<error code=\"idDoesNotExist\">Identifier not recognized.</error>\n";
	    return 0;
	}
    }

    // Default is OKAY:
    return 1;
}



function check_date_format ($str) {
    if (preg_match("/^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)Z/", $str, $match)) {
	$month = (int) $match[2];
	$day = (int) $match[3];

	if (($day < 1) or ($day > 31)) {
	    return 0;
	}

	if (($month < 1) or ($month > 12)) {
	    return 0;
	}

	return 1;
    } else {
	return 0;
    }
}

function dump_record($path) {
    global $metadataPrefix;
    global $repositoryName;

    $ret = "";

    if (!($f = @file($path)))
	return;
    $rec = join('', $f);
    $t = new XML_getschema($rec);
    $schema = $t->schema();
    $confiltername = "$schema"."2"."$metadataPrefix.xsl";
    if (file_exists("schemas/$confiltername")) {
	$xsl = xslt_create();
	$args = array('/_xml' => $rec);
	$result = xslt_process($xsl, 'arg:/_xml', "schemas/$confiltername",
	    NULL, $args);
	$result = preg_replace("/<\?.*?\?>\s*/", "", $result);

	if (preg_match("/<nothing/", $result))
	    return;

	$ret .= "<record>\n";
	$ret .= make_record_identifier($path);
	$ret .= "<metadata>\n";
	$ret .= "$result";
	$ret .= "</metadata>\n";
	$ret .= "</record>\n";
    }

    return $ret;
}

function dump_records($dir, $handler, $active=0) {
    global $doctypes, $from, $until, $set, $metadataPrefix;
    $path = $dir ? $dir : ".";
    $ret = "";

    if ($dir_arr = @file("$path/directory.tkl")) {
	$dir_r = join('', $dir_arr);
	if ($v = preg_match("/<searchable>\s*(.*?)\s*<\/searchable>/", $dir_r,
	    $match) && $match[1])
	    $active = 1;

    }
    $d = opendir($path);
    while (false !== ($file = readdir($d))) {
        if (preg_match("/^\./", $file))
	    continue;
	$item = "$path/$file";
        if (is_dir($item)) {
	    $ret .= dump_records($item, $handler, $active);
	}
	elseif (preg_match("/.*.\.tkl$/", $file) && $active) {
	    $time_stamp = get_file_timestamp($item);
	    
	    // Enforce date selection, if we have "from" specified... 
	    if (strlen($from)) {
		if (strcmp($time_stamp, $from) < 0) {
		    continue;
		}
	    }
	    
	    // ditto if we have until specified...
	    if (strlen($until)) {
		if (strcmp($time_stamp, $until) > 0) {
		    continue;
		}
	    }

	    // Identify the document type and look it up in
	    // the list of OAI exported types...
	    if ($doctype = get_doctype($item)) {
		
		// Check if this type is supported at all...
		if (!is_array($doctypes[$doctype])) {
		    continue;
		}

		// Check if it is supported in the specified format...
		if (strlen($metadataPrefix) && !$doctypes[$doctype][$metadataPrefix]) {
		    continue;
		}

		// If a set is specified, check that this record belongs to that set...
		if (strlen($set) && $set != $doctype) {
		    continue;
		}

	    }
	    
	    $ret .= $handler($item);
	}
    }

    return $ret;
}

function ListRecords () {
    global $metadataPrefix;

    if ($result = dump_records("", "dump_record")) {
	echo "<ListRecords>\n";
	echo $result;
	echo "</ListRecords>\n";
    } else {
	empty_result_set();
    }
}


function empty_result_set () {
    echo "<error code=\"noRecordsMatch\">Nothing satisfies the specification.</error>\n";
}


function get_doctype ($item) {
    $content = join('', file($item));
    $content = preg_replace("/.*?<\?xml.*?\?>/", "", $content);
    if (preg_match("/^.*?<([^> ]+)/s", $content, $match)) {
	return $match[1];
    }
}


function Identify () {
    global $oai_base_url;
    global $repositoryName;
    global $adminEmail;


    echo "<Identify>\n";
    echo "<repositoryName>$repositoryName</repositoryName>\n";
    echo "<baseURL>$oai_base_url</baseURL>\n";
    echo "<protocolVersion>2.0</protocolVersion>\n";
    echo "<adminEmail>$adminEmail</adminEmail>\n";
    echo "<earliestDatestamp>1978-01-01T12:00:00Z</earliestDatestamp>\n";
    echo "<deletedRecord>no</deletedRecord>\n";
    echo "<granularity>YYYY-MM-DDThh:mm:ssZ</granularity>\n";
    echo <<<END
<description>
  <oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">
    <scheme>oai</scheme> 
END;
    echo "<repositoryIdentifier>$repositoryName.tkl.indexdata.com</repositoryIdentifier>";
    echo "<delimiter>:</delimiter>";
    echo "<sampleIdentifier>oai:$repositoryName.tkl.indexdata.com:links/stuff.tkl</sampleIdentifier>";
    echo "</oai-identifier>";
    echo "</description>";

    echo "</Identify>\n";
}


// Returns tkl filename given the OAI identifier
function get_filename ($id) {
    global $id_prefix;
    return preg_replace("/^$id_prefix:/", "", $id);
}

function GetRecord () {
    global $metadataPrefix, $identifier;

    echo "<GetRecord>\n";
    $path = get_filename($identifier);
    echo dump_record("./$path");
    echo "</GetRecord>\n";
}

function ListIdentifiers () {
    if ($result = dump_records("", "make_record_identifier")) {
	echo "<ListIdentifiers>\n";
	echo $result;
	echo "</ListIdentifiers>\n";
    } else {
	empty_result_set();
    }
}


function get_file_timestamp ($path) {
    $stat = stat($path);
    return get_utc($stat[10]);
}


function make_record_identifier ($path) {
    global $repositoryName;
    
    $ret = "<header>\n";
    $ret .= "<identifier>oai:$repositoryName.tkl.indexdata.com:" .
      	preg_replace("'^\./'", "", $path) . "</identifier>\n";
    
    $datestamp = get_file_timestamp($path);
    
    $ret .= "<datestamp>$datestamp</datestamp>\n";
    $ret .= insert_setSpec($path);
    $ret .= "</header>\n";

    return $ret;
}


function insert_setSpec ($path) {
    if (strlen($doctype = get_doctype($path))) {
	return "<setSpec>$doctype</setSpec>\n";
    } else {
	return "";
    }
}

function ListSets () {
    global $doctypes, $root;
    echo "<ListSets>\n";

    foreach ($doctypes as $type => $dummy) {
	echo "<set>\n";
	echo "<setSpec>$type</setSpec>\n";

	$parser = new setName_class(array('xmlfile' => "schemas/$type.xsd"));
	$callbacks = array('/xs:schema/xs:element/xs:annotation/xs:documentation/helptext' => 'set_name_handler');
	$parser->set_option('callbacks', $callbacks);
	$parser->parse();
	$res = $parser->get_name();
	$parser->close();
	unset($parser);
	echo "<setName>$res</setName>\n";
	echo "</set>\n";
    }
    
    echo "</ListSets>\n";
}


function ListMetadataFormats () {
    global $metadataformats, $identifier, $doctypes;

    if (!is_array($metadataformats)) {
	die("<b>Fatal:</b> List of metadataformats not initialized");
    }

    // If $identifier is set, we want to list the formats available
    // for this particular identifer, otherwise list all we got...
    if (strlen($identifier)) {
	$doctype = get_doctype(get_filename($identifier));
	$available_formats = $doctypes[$doctype];
    } else {
	$available_formats = $metadataformats;
    }

    echo "<ListMetadataFormats>\n";

    foreach ($available_formats as $prefix => $dummy) {
	$info = $metadataformats[$prefix];
	echo "<metadataFormat>\n";
	echo "<metadataPrefix>$prefix</metadataPrefix>\n";
	
	foreach ($info as $key => $value) {
	    echo "<$key>$value</$key>\n";
	}

	echo "</metadataFormat>\n";
    }
    
    echo "</ListMetadataFormats>\n";
}

// Looks for the presence of xxx2yyy.xsl stylesheets
// in the schemas portal directory. xxx is the document
// type and yyy is the metadataPrefix.
function get_metadata_formats () {
    global $root;
    $formats = array();
	$format_dir = "schemas";
    $abs_dir = $_SERVER['DOCUMENT_ROOT'] . "/$root/" . $format_dir;
    $doctypes = array();

    if (!$d = @opendir($abs_dir)) {
	return;
    }
    
    while (false != ($f = readdir($d))) {
	if (preg_match("/^(.*?)2(.*?)\.xsl$/", $f, $match)) {
	    $doctype = $match[1];
	    $prefix = $match[2];
	    $doctypes[$doctype][$prefix] = 1;
	    $ss = join('', file("$abs_dir/$f"));
	    
	    // Hack to acquire schema definition: Look for first tag
	    // with the right prefix
	    
	    if (preg_match("/<$prefix" . "[^>]*xsi:schemaLocation=\"(.*?)\"/s", $ss, $match)) {
		if (preg_match("/(.*?)\s+(.*)/s", $match[1], $smatch)) {
		    $formats[$prefix]['schema'] = $smatch[2];
		    $formats[$prefix]['metadataNamespace'] = $smatch[1];
		}
	    }
	}
    }

    closedir($d);

    return array(formats => $formats, doctypes => $doctypes);
}


function get_utc ($time='') {
    if (!$time)
	$time = time();

    $epoch = $time - date("Z");	// The current Unix time-stamp with respect to UTC

    return date("Y-m-d", $epoch) . "T" . date("H:i:s", $epoch) . "Z";
}


function make_attr ($attr) {
    $ret = "";
    foreach ($attr as $name => $value) {
	if ($value)
	    $ret .= " $name=\"$value\"";
    }
    return $ret;
}


function make_attr_list () {
    global $verbs;
    $list = array('verb'=>$_REQUEST['verb']);

    foreach ($verbs[$_REQUEST['verb']] as $name => $dummy) {
	$list[$name] = $_REQUEST[$name];
    }
    return $list;
}


// Used by the ListSets function!
class setName_class extends XMLstream {
    var $name;
    
    function set_name_handler ($tag, $attr) {
	$this->name = $this->cdata;
    }

    function get_name () {
	return strlen($this->name) ? $this->name : "N/A";
    }
}

?>
