SPTSearchEngine.php
Go to the documentation of this file.
00001 <?PHP 00002 # 00003 # FILE: SPTSearchEngine.php 00004 # 00005 # Part of the Collection Workflow Integration System (CWIS) 00006 # Copyright 2011 Edward Almasy and Internet Scout Project 00007 # http://scout.wisc.edu/ 00008 # 00009 00010 class SPTSearchEngine extends SearchEngine { 00011 00012 function SPTSearchEngine() 00013 { 00014 # create a database handle 00015 $DB = new Database(); 00016 00017 # pass database handle and config values to real search engine object 00018 $this->SearchEngine($DB, "Resources", "ResourceId"); 00019 00020 # for each field defined in schema 00021 $this->Schema = new MetadataSchema(); 00022 $Fields = $this->Schema->GetFields(); 00023 foreach ($Fields as $Field) 00024 { 00025 # determine field type for searching 00026 switch ($Field->Type()) 00027 { 00028 case MetadataSchema::MDFTYPE_TEXT: 00029 case MetadataSchema::MDFTYPE_PARAGRAPH: 00030 case MetadataSchema::MDFTYPE_USER: 00031 case MetadataSchema::MDFTYPE_TREE: 00032 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00033 case MetadataSchema::MDFTYPE_OPTION: 00034 case MetadataSchema::MDFTYPE_IMAGE: 00035 case MetadataSchema::MDFTYPE_FILE: 00036 case MetadataSchema::MDFTYPE_URL: 00037 $FieldType = self::FIELDTYPE_TEXT; 00038 break; 00039 00040 case MetadataSchema::MDFTYPE_NUMBER: 00041 case MetadataSchema::MDFTYPE_FLAG: 00042 $FieldType = self::FIELDTYPE_NUMERIC; 00043 break; 00044 00045 case MetadataSchema::MDFTYPE_DATE: 00046 $FieldType = self::FIELDTYPE_DATERANGE; 00047 break; 00048 00049 case MetadataSchema::MDFTYPE_TIMESTAMP: 00050 $FieldType = self::FIELDTYPE_DATE; 00051 break; 00052 00053 case MetadataSchema::MDFTYPE_POINT: 00054 $FieldType = NULL; 00055 break; 00056 00057 default: 00058 exit("ERROR: unknown field type " 00059 .$Field->Type()." in SPTSearchEngine.php"); 00060 break; 00061 } 00062 00063 if ($FieldType !== NULL) 00064 { 00065 # add field to search engine 00066 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType, 00067 $Field->SearchWeight(), $Field->IncludeInKeywordSearch()); 00068 } 00069 } 00070 } 00071 00072 # overloaded version of method to retrieve text from DB 00073 function GetFieldContent($ItemId, $FieldName) 00074 { 00075 # get resource object 00076 $Resource = new Resource($ItemId); 00077 00078 # retrieve text (including variants) from resource object and return to caller 00079 return $Resource->Get($FieldName, FALSE, TRUE); 00080 } 00081 00082 # overloaded version of method to retrieve resource/phrase match list 00083 function SearchFieldForPhrases($FieldName, $Phrase) 00084 { 00085 # normalize and escape search phrase for use in SQL query 00086 $SearchPhrase = strtolower(addslashes($Phrase)); 00087 00088 # query DB for matching list based on field type 00089 $Field = $this->Schema->GetFieldByName($FieldName); 00090 switch ($Field->Type()) 00091 { 00092 case MetadataSchema::MDFTYPE_TEXT: 00093 case MetadataSchema::MDFTYPE_PARAGRAPH: 00094 case MetadataSchema::MDFTYPE_FILE: 00095 case MetadataSchema::MDFTYPE_URL: 00096 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00097 ."WHERE POSITION('".$SearchPhrase."'" 00098 ." IN LOWER(`".$Field->DBFieldName()."`)) "; 00099 break; 00100 00101 case MetadataSchema::MDFTYPE_IMAGE: 00102 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00103 ."WHERE POSITION('".$SearchPhrase."'" 00104 ." IN LOWER(`".$Field->DBFieldName()."AltText`)) "; 00105 break; 00106 00107 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00108 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount" 00109 ." FROM ControlledNames", "NameCount"); 00110 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00111 ."FROM ResourceNameInts, ControlledNames " 00112 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00113 ."AND ControlledNames.ControlledNameId" 00114 ." = ResourceNameInts.ControlledNameId " 00115 ."AND ControlledNames.FieldId = ".$Field->Id(); 00116 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00117 ."FROM ResourceNameInts, ControlledNames, VariantNames " 00118 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) " 00119 ."AND VariantNames.ControlledNameId" 00120 ." = ResourceNameInts.ControlledNameId " 00121 ."AND ControlledNames.ControlledNameId" 00122 ." = ResourceNameInts.ControlledNameId " 00123 ."AND ControlledNames.FieldId = ".$Field->Id(); 00124 break; 00125 00126 case MetadataSchema::MDFTYPE_OPTION: 00127 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId " 00128 ."FROM ResourceNameInts, ControlledNames " 00129 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) " 00130 ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId " 00131 ."AND ControlledNames.FieldId = ".$Field->Id(); 00132 break; 00133 00134 case MetadataSchema::MDFTYPE_TREE: 00135 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId " 00136 ."FROM ResourceClassInts, Classifications " 00137 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) " 00138 ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId " 00139 ."AND Classifications.FieldId = ".$Field->Id(); 00140 break; 00141 00142 case MetadataSchema::MDFTYPE_USER: 00143 $UserId = $this->DB->Query("SELECT UserId FROM APUsers " 00144 ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) " 00145 ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId"); 00146 if ($UserId != NULL) 00147 { 00148 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00149 ."WHERE `".$Field->DBFieldName()."` = ".$UserId; 00150 } 00151 break; 00152 00153 case MetadataSchema::MDFTYPE_NUMBER: 00154 if ($SearchPhrase > 0) 00155 { 00156 $QueryString = "SELECT DISTINCT ResourceId FROM Resources " 00157 ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase; 00158 } 00159 break; 00160 00161 case MetadataSchema::MDFTYPE_FLAG: 00162 case MetadataSchema::MDFTYPE_DATE: 00163 case MetadataSchema::MDFTYPE_TIMESTAMP: 00164 # (these types not yet handled by search engine for phrases) 00165 break; 00166 } 00167 00168 # build match list based on results returned from DB 00169 if (isset($QueryString)) 00170 { 00171 $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)"); 00172 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00173 $this->DB->Query($QueryString); 00174 if ($this->DebugLevel > 9) 00175 { 00176 $EndTime = microtime(TRUE); 00177 if (($StartTime - $EndTime) > 0.1) 00178 { 00179 printf("SE: Query took %.2f seconds<br>\n", 00180 ($EndTime - $StartTime)); 00181 } 00182 } 00183 $MatchList = $this->DB->FetchColumn("ResourceId"); 00184 if (isset($SecondQueryString)) 00185 { 00186 $this->DMsg(7, "Performing second phrase search query" 00187 ." (<i>".$SecondQueryString."</i>)"); 00188 if ($this->DebugLevel > 9) { $StartTime = microtime(TRUE); } 00189 $this->DB->Query($SecondQueryString); 00190 if ($this->DebugLevel > 9) 00191 { 00192 $EndTime = microtime(TRUE); 00193 if (($StartTime - $EndTime) > 0.1) 00194 { 00195 printf("SE: query took %.2f seconds<br>\n", 00196 ($EndTime - $StartTime)); 00197 } 00198 } 00199 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId"); 00200 } 00201 } 00202 else 00203 { 00204 $MatchList = array(); 00205 } 00206 00207 # return list of matching resources to caller 00208 return $MatchList; 00209 } 00210 00211 # search field for records that meet comparison 00212 function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values) 00213 { 00214 # use SQL keyword appropriate to current search logic for combining operations 00215 $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR "; 00216 00217 # for each comparison 00218 foreach ($FieldNames as $Index => $FieldName) 00219 { 00220 $Operator = $Operators[$Index]; 00221 $Value = $Values[$Index]; 00222 00223 # determine query based on field type 00224 $Field = $this->Schema->GetFieldByName($FieldName); 00225 if ($Field != NULL) 00226 { 00227 switch ($Field->Type()) 00228 { 00229 case MetadataSchema::MDFTYPE_TEXT: 00230 case MetadataSchema::MDFTYPE_PARAGRAPH: 00231 case MetadataSchema::MDFTYPE_NUMBER: 00232 case MetadataSchema::MDFTYPE_FLAG: 00233 case MetadataSchema::MDFTYPE_USER: 00234 case MetadataSchema::MDFTYPE_URL: 00235 if (isset($Queries["Resources"])) 00236 { 00237 $Queries["Resources"] .= $CombineWord; 00238 } 00239 else 00240 { 00241 $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE "; 00242 } 00243 if ($Field->Type() == MetadataSchema::MDFTYPE_USER) 00244 { 00245 $User = new SPTUser($Value); 00246 $Value = $User->Id(); 00247 } 00248 $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' "; 00249 break; 00250 00251 case MetadataSchema::MDFTYPE_CONTROLLEDNAME: 00252 $QueryIndex = "ResourceNameInts".$Field->Id(); 00253 if (!isset($Queries[$QueryIndex]["A"])) 00254 { 00255 $Queries[$QueryIndex]["A"] = 00256 "SELECT DISTINCT ResourceId" 00257 ." FROM ResourceNameInts, ControlledNames " 00258 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00259 ." AND ( "; 00260 $CloseQuery[$QueryIndex]["A"] = TRUE; 00261 } 00262 else 00263 { 00264 $Queries[$QueryIndex]["A"] .= $CombineWord; 00265 } 00266 $Queries[$QueryIndex]["A"] .= 00267 "((ResourceNameInts.ControlledNameId" 00268 ." = ControlledNames.ControlledNameId" 00269 ." AND ControlledName " 00270 .$Operator." '".addslashes($Value)."'))"; 00271 if (!isset($Queries[$QueryIndex]["B"])) 00272 { 00273 $Queries[$QueryIndex]["B"] = 00274 "SELECT DISTINCT ResourceId" 00275 . " FROM ResourceNameInts, ControlledNames," 00276 ." VariantNames " 00277 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00278 ." AND ( "; 00279 $CloseQuery[$QueryIndex]["B"] = TRUE; 00280 } 00281 else 00282 { 00283 $Queries[$QueryIndex]["B"] .= $CombineWord; 00284 } 00285 $Queries[$QueryIndex]["B"] .= 00286 "((ResourceNameInts.ControlledNameId" 00287 ." = ControlledNames.ControlledNameId" 00288 ." AND ResourceNameInts.ControlledNameId" 00289 ." = VariantNames.ControlledNameId" 00290 ." AND VariantName " 00291 .$Operator." '".addslashes($Value)."'))"; 00292 break; 00293 00294 case MetadataSchema::MDFTYPE_OPTION: 00295 $QueryIndex = "ResourceNameInts".$Field->Id(); 00296 if (!isset($Queries[$QueryIndex])) 00297 { 00298 $Queries[$QueryIndex] = 00299 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames " 00300 ." WHERE ControlledNames.FieldId = ".$Field->Id() 00301 ." AND ( "; 00302 $CloseQuery[$QueryIndex] = TRUE; 00303 } 00304 else 00305 { 00306 $Queries[$QueryIndex] .= $CombineWord; 00307 } 00308 $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId" 00309 ." AND ControlledName ".$Operator." '".addslashes($Value)."')"; 00310 break; 00311 00312 case MetadataSchema::MDFTYPE_TREE: 00313 $QueryIndex = "ResourceClassInts".$Field->Id(); 00314 if (!isset($Queries[$QueryIndex])) 00315 { 00316 $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications " 00317 ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId" 00318 ." AND Classifications.FieldId = ".$Field->Id()." AND ( "; 00319 $CloseQuery[$QueryIndex] = TRUE; 00320 } 00321 else 00322 { 00323 $Queries[$QueryIndex] .= $CombineWord; 00324 } 00325 $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'"; 00326 break; 00327 00328 case MetadataSchema::MDFTYPE_TIMESTAMP: 00329 # if value appears to have time component or text description 00330 if (strpos($Value, ":") 00331 || strstr($Value, "day") 00332 || strstr($Value, "week") 00333 || strstr($Value, "month") 00334 || strstr($Value, "year") 00335 || strstr($Value, "hour") 00336 || strstr($Value, "minute")) 00337 { 00338 if (isset($Queries["Resources"])) 00339 { 00340 $Queries["Resources"] .= $CombineWord; 00341 } 00342 else 00343 { 00344 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00345 ." FROM Resources WHERE "; 00346 } 00347 00348 # flip operator if necessary 00349 if (strstr($Value, "ago")) 00350 { 00351 $OperatorFlipMap = array( 00352 "<" => ">=", 00353 ">" => "<=", 00354 "<=" => ">", 00355 ">=" => "<", 00356 ); 00357 $Operator = isset($OperatorFlipMap[$Operator]) 00358 ? $OperatorFlipMap[$Operator] : $Operator; 00359 } 00360 00361 # use strtotime method to build condition 00362 $TimestampValue = strtotime($Value); 00363 if (($TimestampValue !== FALSE) && ($TimestampValue != -1)) 00364 { 00365 if ((date("H:i:s", $TimestampValue) == "00:00:00") 00366 && (strpos($Value, "00:00") === FALSE) 00367 && ($Operator == "<=")) 00368 { 00369 $NormalizedValue = 00370 date("Y-m-d", $TimestampValue)." 23:59:59"; 00371 } 00372 else 00373 { 00374 $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue); 00375 } 00376 } 00377 else 00378 { 00379 $NormalizedValue = addslashes($Value); 00380 } 00381 $Queries["Resources"] .= 00382 " ( `".$Field->DBFieldName()."` " 00383 .$Operator 00384 ." '".$NormalizedValue."' ) "; 00385 } 00386 else 00387 { 00388 # use Date object method to build condition 00389 $Date = new Date($Value); 00390 if ($Date->Precision()) 00391 { 00392 if (isset($Queries["Resources"])) 00393 { 00394 $Queries["Resources"] .= $CombineWord; 00395 } 00396 else 00397 { 00398 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00399 ." FROM Resources WHERE "; 00400 } 00401 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00402 $Field->DBFieldName(), NULL, $Operator)." ) "; 00403 } 00404 } 00405 break; 00406 00407 case MetadataSchema::MDFTYPE_DATE: 00408 $Date = new Date($Value); 00409 if ($Date->Precision()) 00410 { 00411 if (isset($Queries["Resources"])) 00412 { 00413 $Queries["Resources"] .= $CombineWord; 00414 } 00415 else 00416 { 00417 $Queries["Resources"] = "SELECT DISTINCT ResourceId" 00418 ." FROM Resources WHERE "; 00419 } 00420 $Queries["Resources"] .= " ( ".$Date->SqlCondition( 00421 $Field->DBFieldName()."Begin", 00422 $Field->DBFieldName()."End", $Operator)." ) "; 00423 } 00424 break; 00425 00426 case MetadataSchema::MDFTYPE_IMAGE: 00427 case MetadataSchema::MDFTYPE_FILE: 00428 # (these types not yet handled by search engine for comparisons) 00429 break; 00430 } 00431 } 00432 } 00433 00434 # if queries found 00435 if (isset($Queries)) 00436 { 00437 # for each assembled query 00438 foreach ($Queries as $QueryIndex => $Query) 00439 { 00440 # if query has multiple parts 00441 if (is_array($Query)) 00442 { 00443 # for each part of query 00444 $ResourceIds = array(); 00445 foreach ($Query as $PartIndex => $PartQuery) 00446 { 00447 # add closing paren if query was flagged to be closed 00448 if (isset($CloseQuery[$QueryIndex])) { $PartQuery .= " ) "; } 00449 00450 # perform query and retrieve IDs 00451 $this->DMsg(5, "Performing comparison query (<i>" 00452 .$PartQuery."</i>)"); 00453 $this->DB->Query($PartQuery); 00454 $ResourceIds = $ResourceIds 00455 + $this->DB->FetchColumn("ResourceId"); 00456 $this->DMsg(5, "Comparison query produced <i>" 00457 .count($ResourceIds)."</i> results"); 00458 } 00459 } 00460 else 00461 { 00462 # add closing paren if query was flagged to be closed 00463 if (isset($CloseQuery[$QueryIndex])) { $Query .= " ) "; } 00464 00465 # perform query and retrieve IDs 00466 $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)"); 00467 $this->DB->Query($Query); 00468 $ResourceIds = $this->DB->FetchColumn("ResourceId"); 00469 $this->DMsg(5, "Comparison query produced <i>" 00470 .count($ResourceIds)."</i> results"); 00471 } 00472 00473 # if we already have some results 00474 if (isset($Results)) 00475 { 00476 # if search logic is set to AND 00477 if ($this->DefaultSearchLogic == self::LOGIC_AND) 00478 { 00479 # remove anything from results that was not returned from query 00480 $Results = array_intersect($Results, $ResourceIds); 00481 } 00482 else 00483 { 00484 # add values returned from query to results 00485 $Results = array_unique(array_merge($Results, $ResourceIds)); 00486 } 00487 } 00488 else 00489 { 00490 # set results to values returned from query 00491 $Results = $ResourceIds; 00492 } 00493 } 00494 } 00495 else 00496 { 00497 # initialize results to empty list 00498 $Results = array(); 00499 } 00500 00501 # return results to caller 00502 return $Results; 00503 } 00504 00505 static function GetItemIdsSortedByField($FieldName, $SortDescending) 00506 { 00507 $RFactory = new ResourceFactory(); 00508 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending); 00509 } 00510 00511 static function QueueUpdateForItem($ItemId, 00512 $Priority = ApplicationFramework::PRIORITY_LOW) 00513 { 00514 global $AF; 00515 $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"), 00516 array(intval($ItemId)), $Priority); 00517 } 00518 00519 static function RunUpdateForItem($ItemId) 00520 { 00521 # check that resource still exists 00522 $RFactory = new ResourceFactory(); 00523 if (!$RFactory->ItemExists($ItemId)) { return; } 00524 00525 # update search data for resource 00526 $SearchEngine = new SPTSearchEngine(); 00527 $SearchEngine->UpdateForItem($ItemId); 00528 } 00529 00530 private $Schema; 00531 00532 # functions for backward compatability w/ old SPT code 00533 function UpdateForResource($ItemId) { $this->UpdateForItem($ItemId); } 00534 }