CWIS Developer Documentation
OAIClient.php
Go to the documentation of this file.
1 <?PHP
2 
3 #
4 # FILE: Scout--OAIClient.php
5 # Provides a client for pulling data from OAI-PMH providers
6 # For protocol documentation, see:
7 # http://www.openarchives.org/OAI/openarchivesprotocol.html
8 #
9 # METHODS PROVIDED:
10 # OAIClient(ServerUrl, Cache)
11 # - constructor
12 # ServerUrl(NewValue)
13 # - Change the base url of the remote repository
14 # MetadataPrefix($pfx)
15 # - Set the schema we will request from remote
16 # SetSpec($set)
17 # - Restrict queries to a single set
18 # for details, see
19 # http://www.openarchives.org/OAI/openarchivesprotocol.html#Set
20 # GetIdentification()
21 # - Fetch identifying information about the remote repository
22 # GetFormats()
23 # - Fetch information about what schemas remote can serve
24 # GetRecords($start,$end)
25 # - Pull records in batches, optionally with date restrictions
26 # GetRecord($id)
27 # - Pull a single record using a unique identifier
28 # MoreRecordsAvailable()
29 # - Determine if a batch pull is complete or not
30 # ResetRecordPointer()
31 # - Restart a batch pull from the beginning
32 # SetDebugLevel()
33 # - Determine verbosity
34 #
35 # Copyright 2008 Edward Almasy and Internet Scout
36 # http://scout.wisc.edu
37 #
38 
39 require_once("XMLParser.php");
40 
41 
42 class OAIClient {
43 
44  # ---- PUBLIC INTERFACE --------------------------------------------------
45 
52  function OAIClient($ServerUrl, $Cache=NULL)
53  {
54  # set default debug level
55  $this->DebugLevel = 0;
56 
57  # save OAI server URL
58  $this->ServerUrl = $ServerUrl;
59 
60  # set default metadata prefix
61  $this->MetadataPrefix = "oai_dc";
62 
63  # set default set specification for queries
64  $this->SetSpec = NULL;
65 
66  $this->CacheSequenceNumber = 0;
67  if ($Cache !== NULL)
68  {
69  $this->Cache = $Cache;
70  $this->UsingCache = is_dir($Cache);
71  if ($this->UsingCache == FALSE )
72  {
73  mkdir($Cache);
74  }
75  }
76  }
77 
84  function ServerUrl($NewValue = NULL)
85  {
86  if ($NewValue != NULL)
87  {
88  $this->ServerUrl = $NewValue;
89  }
90  return $this->ServerUrl;
91  }
92 
99  function MetadataPrefix($NewValue = NULL)
100  {
101  if ($NewValue != NULL)
102  {
103  $this->MetadataPrefix = $NewValue;
104  }
105  return $this->MetadataPrefix;
106  }
107 
114  function SetSpec($NewValue = "X-NOSETSPECVALUE-X")
115  {
116  if ($NewValue != "X-NOSETSPECVALUE-X")
117  {
118  $this->SetSpec = $NewValue;
119  }
120  return $this->SetSpec;
121  }
122 
130  function GetIdentification()
131  {
132  # query server for XML text
133  $XmlText = $this->PerformQuery("Identify");
134  $this->DebugOutVar(8,__METHOD__,"XmlText",htmlspecialchars($XmlText));
135 
136  # convert XML text into object
137  $Xml = simplexml_load_string($XmlText);
138  $this->DebugOutVar(9, __METHOD__, "Xml", $Xml);
139 
140  # if identification info was found
141  $Info = array();
142  if (isset($Xml->Identify))
143  {
144  # extract info
145  $Ident = $Xml->Identify;
146  $this->GetValFromXml($Ident, "repositoryName", "Name", $Info);
147  $this->GetValFromXml($Ident, "adminEmail", "Email", $Info);
148  $this->GetValFromXml($Ident, "baseURL", "URL", $Info);
149  }
150 
151  # return info to caller
152  return $Info;
153  }
154 
160  function GetFormats()
161  {
162  # query server for XML text
163  $XmlText = $this->PerformQuery("ListMetadataFormats");
164  $this->DebugOutVar(8,__METHOD__,"XmlText",htmlspecialchars($XmlText));
165 
166  # convert XML text into object
167  $Xml = simplexml_load_string($XmlText);
168  $this->DebugOutVar(9, __METHOD__, "Xml", $Xml);
169 
170  # if format info was found
171  $Formats = array();
172  if (isset($Xml->ListMetadataFormats->metadataFormat))
173  {
174  # extract info
175  $Index = 0;
176  foreach ($Xml->ListMetadataFormats->metadataFormat as $Format)
177  {
178  $this->GetValFromXml(
179  $Format, "metadataPrefix", "Name", $Formats[$Index]);
180  $this->GetValFromXml(
181  $Format, "schema", "Schema", $Formats[$Index]);
182  $this->GetValFromXml(
183  $Format, "metadataNamespace", "Namespace",
184  $Formats[$Index]);
185  $Index++;
186  }
187  }
188 
189  # return info to caller
190  return $Formats;
191  }
192 
200  function GetRecords($StartDate = NULL, $EndDate = NULL)
201  {
202  if( $this->Cache != NULL )
203  {
204  $cache_fname = sprintf("%s/%010x",
205  $this->Cache,
206  $this->CacheSequenceNumber);
207  $this->CacheSequenceNumber++;
208  }
209 
210  if( $this->Cache == NULL or $this->UsingCache == FALSE )
211  {
212  # if we have resumption token from prior query
213  if (isset($this->ResumptionToken))
214  {
215  # use resumption token as sole argument
216  $Args["resumptionToken"] = $this->ResumptionToken;
217  }
218  else
219  {
220  # set up arguments for query
221  $Args["metadataPrefix"] = $this->MetadataPrefix;
222  if ($StartDate) { $Args["from"] = $StartDate; }
223  if ($EndDate) { $Args["until"] = $EndDate; }
224  if ($this->SetSpec) { $Args["set"] = $this->SetSpec; }
225  }
226 
227  # query server for XML text
228  $XmlText = $this->PerformQuery("ListRecords", $Args);
229 
230  if( $this->Cache != NULL )
231  {
232  file_put_contents( $cache_fname, $XmlText );
233  }
234  }
235  else
236  {
237  # Get XML text from the cache
238  $XmlText = file_get_contents( $cache_fname );
239  }
240 
241  $this->DebugOutVar(8, __METHOD__,"XmlText",htmlspecialchars($XmlText));
242 
243  return $this->GetRecordsFromXML($XmlText, "listrecords" );
244  }
245 
260  function GetRecord($Id)
261  {
262  $Args["metadataPrefix"] = $this->MetadataPrefix;
263  $Args["identifier"] = $Id;
264 
265  # query server for XML text
266  $XmlText = $this->PerformQuery("GetRecord", $Args);
267  $this->DebugOutVar(8, __METHOD__,"XmlText",htmlspecialchars($XmlText));
268 
269  return $this->GetRecordsFromXML($XmlText, "getrecord" );
270  }
271 
278  {
279  return isset($this->ResumptionToken) ? TRUE : FALSE;
280  }
281 
286  {
287  unset($this->ResumptionToken);
288  $this->CacheSequenceNumber = 0;
289  }
290 
296  function SetDebugLevel($NewLevel)
297  {
298  $this->DebugLevel = $NewLevel;
299  }
300 
301 
302  # ---- PRIVATE INTERFACE -------------------------------------------------
303 
304  private $ServerUrl;
305  private $MetadataPrefix;
306  private $SetSpec;
307  private $DebugLevel;
308  private $ResumptionToken;
309  private $Cache;
310  private $UsingCache;
311  private $CacheSequenceNumber;
312 
313  # perform OAI query and return resulting data to caller
314  private function PerformQuery($QueryVerb, $Args = NULL)
315  {
316  # open stream to OAI server
317 
318  if (strpos($this->ServerUrl, "?") === FALSE)
319  {
320  $QueryUrl = $this->ServerUrl."?verb=".$QueryVerb;
321  }
322  else
323  {
324  $QueryUrl = $this->ServerUrl."&verb=".$QueryVerb;
325  }
326 
327  if ($Args)
328  {
329  foreach ($Args as $ArgName => $ArgValue)
330  {
331  $QueryUrl .= "&".urlencode($ArgName)."=".urlencode($ArgValue);
332  }
333  }
334  $FHndl = fopen($QueryUrl, "r");
335 
336  # if stream was successfully opened
337  $Text = "";
338  if ($FHndl !== FALSE)
339  {
340  # while lines left in response
341  while (!feof($FHndl))
342  {
343  # read line from server and add it to text to be parsed
344  $Text .= fread($FHndl, 10000000);
345  }
346  }
347 
348  # close OAI server stream
349  fclose($FHndl);
350 
351  # return query result data to caller
352  return $Text;
353  }
354 
355  # set array value if available in simplexml object
356  private function GetValFromXml($Xml, $SrcName, $DstName, &$Results)
357  {
358  if (isset($Xml->$SrcName))
359  {
360  $Results[$DstName] = trim($Xml->$SrcName);
361  }
362  }
363 
364  # print variable contents if debug is above specified level
365  private function DebugOutVar($Level, $MethodName, $VarName, $VarValue)
366  {
367  if ($this->DebugLevel >= $Level)
368  {
369  print("\n<pre>".$MethodName."() ".$VarName." = \n");
370  print_r($VarValue);
371  print("</pre>\n");
372  }
373  }
374 
375  # Recursively dump tags inside a metadata section, flattening them
376  # as we go.
377  private function DumpTagsRecursive(&$Records, $Index, $Parser, $ParentTagName=NULL)
378  {
379  $TagName = $Parser->GetTagName();
380  do
381  {
382  $StorageTagName = ($ParentTagName!==NULL) ?
383  $ParentTagName."/".$TagName : $TagName;
384 
385  if ($Parser->SeekToChild() ){
386  $this->DumpTagsRecursive( $Records, $Index, $Parser, $StorageTagName );
387  $Parser->SeekToParent();
388  }
389  else
390  {
391  $Records[$Index]["metadata"][$StorageTagName][] = $Parser->GetData();
392  }
393  } while ($TagName = $Parser->NextTag());
394  }
395 
396  # Query has been sent, we need to retrieve records that came from it.
397  private function GetRecordsFromXML($XmlText, $ParseTo ){
398  # create XML parser and pass it text
399  $Parser = new XMLParser();
400  $Parser->ParseText($XmlText);
401 
402  $this->DebugOutVar(9, __METHOD__, "Parser", $Parser);
403 
404  # if records were found
405  $Records = array();
406  $ItemCount = $Parser->SeekTo("oai-pmh", $ParseTo, "record");
407  if ($ItemCount)
408  {
409  # for each record
410  $Index = 0;
411  do
412  {
413  # grab record identifier and date
414  $Records[$Index]["identifier"]=$Parser->GetData("header",
415  "identifier");
416  $Records[$Index]["datestamp"]=$Parser->GetData("header",
417  "datestamp");
418 
419  # grab metadata
420  $SeekResult = $Parser->SeekTo("metadata");
421  if ($SeekResult)
422  {
423  $SeekResult = $Parser->SeekToChild();
424  if ($SeekResult)
425  {
426  $Records[$Index]["format"] = $Parser->GetTagName();
427  $SeekResult = $Parser->SeekToChild();
428  if ($SeekResult)
429  {
430  $this->DumpTagsRecursive($Records, $Index, $Parser);
431  $Parser->SeekToParent();
432  }
433  $Parser->SeekToParent();
434  }
435  $Parser->SeekToParent();
436  }
437 
438  # grab search info (if any)
439  $SeekResult = $Parser->SeekTo("about");
440  if ($SeekResult)
441  {
442  $SeekResult = $Parser->SeekTo("searchInfo");
443  if ($SeekResult)
444  {
445  $SeekResult = $Parser->SeekToChild();
446  if ($SeekResult)
447  {
448  $TagName = $Parser->GetTagName();
449  do
450  {
451  $Records[$Index]["about"]["SEARCHINFO"][$TagName][] =
452  $Parser->GetData();
453  } while ($TagName = $Parser->NextTag());
454  $Parser->SeekToParent();
455  }
456  $Parser->SeekToParent();
457  }
458  $Parser->SeekToParent();
459  }
460 
461  $Index++;
462  }
463  while ($Parser->NextItem());
464  }
465 
466  # look for resumption token and save if found
467  $Parser->SeekToRoot();
468  $SeekResult = $Parser->SeekTo(
469  "oai-pmh", "listrecords", "resumptiontoken");
470  if ($SeekResult !== NULL)
471  {
472  $this->ResumptionToken = $Parser->GetData();
473  }
474  else
475  {
476  unset($this->ResumptionToken);
477  }
478 
479  # return records to caller
480  return $Records;
481  }
482 
483 }
484 
485 ?>
ResetRecordPointer()
Clear any additional records available after last GetRecords().
Definition: OAIClient.php:285
ServerUrl($NewValue=NULL)
Get or set URL of target OAI repository server.
Definition: OAIClient.php:84
OAIClient($ServerUrl, $Cache=NULL)
Class constructor.
Definition: OAIClient.php:52
GetRecord($Id)
Get a single record from a repositry server.
Definition: OAIClient.php:260
MoreRecordsAvailable()
Check whether more records are available after last GetRecords().
Definition: OAIClient.php:277
GetRecords($StartDate=NULL, $EndDate=NULL)
Retrieve records from repository server.
Definition: OAIClient.php:200
PHP
Definition: OAIClient.php:39
MetadataPrefix($NewValue=NULL)
Get or set metadata schema for records being retrieved.
Definition: OAIClient.php:99
SetSpec($NewValue="X-NOSETSPECVALUE-X")
Get or set specification of subset of records to be retrieved.
Definition: OAIClient.php:114
GetIdentification()
Retrieve identification information from repository server.
Definition: OAIClient.php:130
SetDebugLevel($NewLevel)
Set current debug output level.
Definition: OAIClient.php:296
GetFormats()
Retrieve list of available metadata formats from repository server.
Definition: OAIClient.php:160