4 # FILE: Scout--OAIClient.php
5 # Provides a client for pulling data from OAI-PMH providers
6 # For protocol documentation, see:
7 # http://www.openarchives.org/OAI/openarchivesprotocol.html
10 # OAIClient(ServerUrl, Cache)
13 # - Change the base url of the remote repository
14 # MetadataPrefix($pfx)
15 # - Set the schema we will request from remote
17 # - Restrict queries to a single set
19 # http://www.openarchives.org/OAI/openarchivesprotocol.html#Set
21 # - Fetch identifying information about the remote repository
23 # - Fetch information about what schemas remote can serve
24 # GetRecords($start,$end)
25 # - Pull records in batches, optionally with date restrictions
27 # - Pull a single record using a unique identifier
28 # MoreRecordsAvailable()
29 # - Determine if a batch pull is complete or not
30 # ResetRecordPointer()
31 # - Restart a batch pull from the beginning
33 # - Determine verbosity
35 # Copyright 2008 Edward Almasy and Internet Scout
36 # http://scout.wisc.edu
39 require_once(
"XMLParser.php");
44 # ---- PUBLIC INTERFACE --------------------------------------------------
54 # set default debug level
55 $this->DebugLevel = 0;
60 # set default metadata prefix
63 # set default set specification for queries
66 $this->CacheSequenceNumber = 0;
69 $this->Cache = $Cache;
70 $this->UsingCache = is_dir($Cache);
71 if ($this->UsingCache == FALSE )
86 if ($NewValue != NULL)
90 return $this->ServerUrl;
101 if ($NewValue != NULL)
105 return $this->MetadataPrefix;
114 function SetSpec($NewValue =
"X-NOSETSPECVALUE-X")
116 if ($NewValue !=
"X-NOSETSPECVALUE-X")
120 return $this->SetSpec;
132 # query server for XML text
133 $XmlText = $this->PerformQuery(
"Identify");
134 $this->DebugOutVar(8,__METHOD__,
"XmlText",htmlspecialchars($XmlText));
136 # convert XML text into object
137 $Xml = simplexml_load_string($XmlText);
138 $this->DebugOutVar(9, __METHOD__,
"Xml", $Xml);
140 # if identification info was found
142 if (isset($Xml->Identify))
145 $Ident = $Xml->Identify;
146 $this->GetValFromXml($Ident,
"repositoryName",
"Name", $Info);
147 $this->GetValFromXml($Ident,
"adminEmail",
"Email", $Info);
148 $this->GetValFromXml($Ident,
"baseURL",
"URL", $Info);
151 # return info to caller
162 # query server for XML text
163 $XmlText = $this->PerformQuery(
"ListMetadataFormats");
164 $this->DebugOutVar(8,__METHOD__,
"XmlText",htmlspecialchars($XmlText));
166 # convert XML text into object
167 $Xml = simplexml_load_string($XmlText);
168 $this->DebugOutVar(9, __METHOD__,
"Xml", $Xml);
170 # if format info was found
172 if (isset($Xml->ListMetadataFormats->metadataFormat))
176 foreach ($Xml->ListMetadataFormats->metadataFormat as $Format)
178 $this->GetValFromXml(
179 $Format,
"metadataPrefix",
"Name", $Formats[$Index]);
180 $this->GetValFromXml(
181 $Format,
"schema",
"Schema", $Formats[$Index]);
182 $this->GetValFromXml(
183 $Format,
"metadataNamespace",
"Namespace",
189 # return info to caller
202 if( $this->Cache != NULL )
204 $cache_fname = sprintf(
"%s/%010x",
206 $this->CacheSequenceNumber);
207 $this->CacheSequenceNumber++;
210 if( $this->Cache == NULL or $this->UsingCache == FALSE )
212 # if we have resumption token from prior query
213 if (isset($this->ResumptionToken))
215 # use resumption token as sole argument
216 $Args[
"resumptionToken"] = $this->ResumptionToken;
220 # set up arguments for query
221 $Args[
"metadataPrefix"] = $this->MetadataPrefix;
222 if ($StartDate) { $Args[
"from"] = $StartDate; }
223 if ($EndDate) { $Args[
"until"] = $EndDate; }
224 if ($this->
SetSpec) { $Args[
"set"] = $this->SetSpec; }
227 # query server for XML text
228 $XmlText = $this->PerformQuery(
"ListRecords", $Args);
230 if( $this->Cache != NULL )
232 file_put_contents( $cache_fname, $XmlText );
237 # Get XML text from the cache
238 $XmlText = file_get_contents( $cache_fname );
241 $this->DebugOutVar(8, __METHOD__,
"XmlText",htmlspecialchars($XmlText));
243 return $this->GetRecordsFromXML($XmlText,
"listrecords" );
262 $Args[
"metadataPrefix"] = $this->MetadataPrefix;
263 $Args[
"identifier"] = $Id;
265 # query server for XML text
266 $XmlText = $this->PerformQuery(
"GetRecord", $Args);
267 $this->DebugOutVar(8, __METHOD__,
"XmlText",htmlspecialchars($XmlText));
269 return $this->GetRecordsFromXML($XmlText,
"getrecord" );
279 return isset($this->ResumptionToken) ? TRUE : FALSE;
287 unset($this->ResumptionToken);
288 $this->CacheSequenceNumber = 0;
298 $this->DebugLevel = $NewLevel;
302 # ---- PRIVATE INTERFACE -------------------------------------------------
305 private $MetadataPrefix;
308 private $ResumptionToken;
311 private $CacheSequenceNumber;
313 # perform OAI query and return resulting data to caller
314 private function PerformQuery($QueryVerb, $Args = NULL)
316 # open stream to OAI server
318 if (strpos($this->
ServerUrl,
"?") === FALSE)
320 $QueryUrl = $this->
ServerUrl.
"?verb=".$QueryVerb;
324 $QueryUrl = $this->
ServerUrl.
"&verb=".$QueryVerb;
329 foreach ($Args as $ArgName => $ArgValue)
331 $QueryUrl .=
"&".urlencode($ArgName).
"=".urlencode($ArgValue);
334 $FHndl = fopen($QueryUrl,
"r");
336 # if stream was successfully opened
338 if ($FHndl !== FALSE)
340 # while lines left in response
341 while (!feof($FHndl))
343 # read line from server and add it to text to be parsed
344 $Text .= fread($FHndl, 10000000);
348 # close OAI server stream
351 # return query result data to caller
355 # set array value if available in simplexml object
356 private function GetValFromXml($Xml, $SrcName, $DstName, &$Results)
358 if (isset($Xml->$SrcName))
360 $Results[$DstName] = trim($Xml->$SrcName);
364 # print variable contents if debug is above specified level
365 private function DebugOutVar($Level, $MethodName, $VarName, $VarValue)
367 if ($this->DebugLevel >= $Level)
369 print(
"\n<pre>".$MethodName.
"() ".$VarName.
" = \n");
375 # Recursively dump tags inside a metadata section, flattening them
377 private function DumpTagsRecursive(&$Records, $Index, $Parser, $ParentTagName=NULL)
379 $TagName = $Parser->GetTagName();
382 $StorageTagName = ($ParentTagName!==NULL) ?
383 $ParentTagName.
"/".$TagName : $TagName;
385 if ($Parser->SeekToChild() ){
386 $this->DumpTagsRecursive( $Records, $Index, $Parser, $StorageTagName );
387 $Parser->SeekToParent();
391 $Records[$Index][
"metadata"][$StorageTagName][] = $Parser->GetData();
393 }
while ($TagName = $Parser->NextTag());
396 # Query has been sent, we need to retrieve records that came from it.
397 private function GetRecordsFromXML($XmlText, $ParseTo ){
398 # create XML parser and pass it text
400 $Parser->ParseText($XmlText);
402 $this->DebugOutVar(9, __METHOD__,
"Parser", $Parser);
404 # if records were found
406 $ItemCount = $Parser->SeekTo(
"oai-pmh", $ParseTo,
"record");
413 # grab record identifier and date
414 $Records[$Index][
"identifier"]=$Parser->GetData(
"header",
416 $Records[$Index][
"datestamp"]=$Parser->GetData(
"header",
420 $SeekResult = $Parser->SeekTo(
"metadata");
423 $SeekResult = $Parser->SeekToChild();
426 $Records[$Index][
"format"] = $Parser->GetTagName();
427 $SeekResult = $Parser->SeekToChild();
430 $this->DumpTagsRecursive($Records, $Index, $Parser);
431 $Parser->SeekToParent();
433 $Parser->SeekToParent();
435 $Parser->SeekToParent();
438 # grab search info (if any)
439 $SeekResult = $Parser->SeekTo(
"about");
442 $SeekResult = $Parser->SeekTo(
"searchInfo");
445 $SeekResult = $Parser->SeekToChild();
448 $TagName = $Parser->GetTagName();
451 $Records[$Index][
"about"][
"SEARCHINFO"][$TagName][] =
453 }
while ($TagName = $Parser->NextTag());
454 $Parser->SeekToParent();
456 $Parser->SeekToParent();
458 $Parser->SeekToParent();
463 while ($Parser->NextItem());
466 # look for resumption token and save if found
467 $Parser->SeekToRoot();
468 $SeekResult = $Parser->SeekTo(
469 "oai-pmh",
"listrecords",
"resumptiontoken");
470 if ($SeekResult !== NULL)
472 $this->ResumptionToken = $Parser->GetData();
476 unset($this->ResumptionToken);
479 # return records to caller
ResetRecordPointer()
Clear any additional records available after last GetRecords().
ServerUrl($NewValue=NULL)
Get or set URL of target OAI repository server.
OAIClient($ServerUrl, $Cache=NULL)
Class constructor.
GetRecord($Id)
Get a single record from a repositry server.
MoreRecordsAvailable()
Check whether more records are available after last GetRecords().
GetRecords($StartDate=NULL, $EndDate=NULL)
Retrieve records from repository server.
MetadataPrefix($NewValue=NULL)
Get or set metadata schema for records being retrieved.
SetSpec($NewValue="X-NOSETSPECVALUE-X")
Get or set specification of subset of records to be retrieved.
GetIdentification()
Retrieve identification information from repository server.
SetDebugLevel($NewLevel)
Set current debug output level.
GetFormats()
Retrieve list of available metadata formats from repository server.