5 # Part of the ScoutLib application support library
6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/
15 # ---- PUBLIC INTERFACE --------------------------------------------------
31 # set default debug level
34 # set default encoding
41 # query server (or cache) for XML text
45 # create XML parser and parse text
47 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
48 $this->Parser->ParseText($this->XmlText);
50 if ($this->DebugLevel) { print(
"RSSClient->RSSClient() returned ".strlen($this->XmlText).
" characters from server query<br>\n"); }
60 # if new RSS server URL supplied
61 if (($NewValue != NULL) && ($NewValue != $this->
ServerUrl))
66 # re-read XML from server at new URL
72 # create new XML parser and parse text
74 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
75 $this->Parser->ParseText($this->XmlText);
78 # return RSS server URL to caller
90 # if new encoding supplied
91 if (($NewValue != NULL) && ($NewValue != $this->
Encoding))
96 # re-read XML from server
102 # create new XML parser and parse text
104 if ($this->DebugLevel > 3) {
$Parser->SetDebugLevel($this->DebugLevel - 3); }
105 $this->Parser->ParseText($this->XmlText);
108 # return encoding to caller
119 # if neither the XML file nor the HTTP response headers specify an
120 # encoding, there is an overwhelming chance that it's ISO-8859-1, so
121 # use it as the default
124 # only get up to the the encoding portion of the XML declartion
125 # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
129 $EncName =
'[A-Za-z]([A-Za-z0-9._]|-)*';
130 $VersionInfo =
"{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
131 $EncodingDecl =
"{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
132 $XMLDecl =
"<\?xml{$VersionInfo}({$EncodingDecl})?";
133 $RegEx =
"/{$XMLDecl}/";
135 # try to find the encoding, index 3 will be set if encoding is declared
136 preg_match($RegEx, $this->XmlText, $Matches);
138 # give precedence to the encoding specified within the XML file since
139 # a RSS feed publisher might not have access to HTTP response headers
140 if (count($Matches) >= 4)
142 # also need to strip off the quotes
146 # then give precedence to the charset parameter in the Content-Type
148 else if ($this->CacheDB)
150 # create cache table if it doesn't exist
154 # get the cache value
156 SELECT * FROM RSSClientCache
158 $Exists = ($DB->NumRowsSelected() > 0);
159 $Cache = $DB->FetchRow();
161 # if cached and charset parameter was given in the response headers
162 if ($Exists && strlen($Cache[
"Charset"]))
179 function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
181 # start by assuming no items will be found
184 # move parser to area in XML with items
187 $Result =
$Parser->SeekTo(
"rss");
188 if ($Result === NULL)
190 $Result =
$Parser->SeekTo(
"rdf:RDF");
198 $ItemCount =
$Parser->SeekTo(
"item");
207 $Items[$Index][
"description"] =
$Parser->GetData(
"description");
209 $Items[$Index][
"enclosure"] =
$Parser->GetAttributes(
"enclosure");
213 while (
$Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
216 # return records to caller
259 # ---- PRIVATE INTERFACE -------------------------------------------------
282 $this->DebugLevel = $NewLevel;
296 $Text = @file_get_contents($Url);
300 # get the type and charset if the fetch was successful
303 # this must come after file_get_contents() and before any other remote
305 $Headers = $http_response_header;
307 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
308 $LWS =
'([ \t]*|\r\n[ \t]+)';
309 $Token =
'[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
310 $QuotedPair =
'\\[\x00-\x7F]';
311 $QdText =
"([^\\x00-\\x1F\\x7F\"]|{$LWS})";
312 $QuotedString =
"\"({$QdText}|{$QuotedPair})*\"";
313 $Value =
"({$Token}|{$QuotedString})";
314 $Parameter =
"{$Token}{$LWS}={$LWS}{$Value}";
316 # these make the Content-Type regex specific to Content-Type
317 # values with charset parameters in them, but make capturing
318 # the charset much easier
319 $BasicParameter =
"(;{$LWS}{$Parameter})*";
320 $CharsetParameter =
"(;{$LWS}charset{$LWS}={$LWS}{$Value})";
321 $ModParameter =
"{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
322 $MediaType =
"({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
325 $ContentType =
"Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
326 $RegEx =
"/^{$ContentType}$/i";
328 foreach ($Headers as $Header)
330 preg_match($RegEx, $Header, $Matches);
332 if (isset($Matches[3]) && isset($Matches[19]))
335 $Charset = $Matches[19];
341 return array($Text, $Type, $Charset);
356 # save RSS server URL
359 # save caching info (if any)
365 # if caching info was supplied
370 # look up cached information for this server
371 $QueryTimeCutoff = date(
"Y-m-d H:i:s", (time() -
$RefreshTime));
373 SELECT * FROM RSSClientCache
374 WHERE ServerUrl = '".addslashes(
$ServerUrl).
"'
375 AND LastQueryTime > '".$QueryTimeCutoff.
"'");
377 # if we have cached info that has not expired
378 if ($CachedXml = $DB->FetchField(
"CachedXml"))
381 $QueryResult = $CachedXml;
382 $this->CachedDataWasUsed = TRUE;
386 $this->CachedDataWasUsed = FALSE;
388 # query server for XML text
392 # if query was successful
395 $QueryResult = $Text;
397 # clear out any old cache entries
399 DELETE FROM RSSClientCache
400 WHERE ServerUrl = '".addslashes(
$ServerUrl).
"'");
404 INSERT INTO RSSClientCache
405 (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
408 '".addslashes($Text).
"',
409 '".addslashes($Type).
"',
410 '".addslashes($Charset).
"',
416 # return query result to caller
428 $Result =
$Parser->SeekTo(
"rss");
429 if ($Result === NULL)
431 $Result =
$Parser->SeekTo(
"rdf:RDF");
434 $this->ChannelTitle =
$Parser->GetData(
"title");
435 $this->ChannelLink =
$Parser->GetData(
"link");
436 $this->ChannelDescription =
$Parser->GetData(
"description");