Search:

CWIS Developers Documentation

  • Main Page
  • Classes
  • Files
  • File List
  • File Members

SPTSearchEngine.php

Go to the documentation of this file.
00001 <?PHP
00002 #
00003 #   FILE:  SPTSearchEngine.php
00004 #
00005 #   Part of the Collection Workflow Integration System (CWIS)
00006 #   Copyright 2011 Edward Almasy and Internet Scout Project
00007 #   http://scout.wisc.edu/
00008 #
00009 
00010 class SPTSearchEngine extends SearchEngine {
00011 
00012     function SPTSearchEngine()
00013     {
00014         # create a database handle
00015         $DB = new Database();
00016 
00017         # pass database handle and config values to real search engine object
00018         $this->SearchEngine($DB, "Resources", "ResourceId");
00019 
00020         # for each field defined in schema
00021         $this->Schema = new MetadataSchema();
00022         $Fields = $this->Schema->GetFields();
00023         foreach ($Fields as $Field)
00024         {
00025             # determine field type for searching
00026             switch ($Field->Type())
00027             {
00028                 case MetadataSchema::MDFTYPE_TEXT:
00029                 case MetadataSchema::MDFTYPE_PARAGRAPH:
00030                 case MetadataSchema::MDFTYPE_USER:
00031                 case MetadataSchema::MDFTYPE_TREE:
00032                 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00033                 case MetadataSchema::MDFTYPE_OPTION:
00034                 case MetadataSchema::MDFTYPE_IMAGE:
00035                 case MetadataSchema::MDFTYPE_FILE:
00036                 case MetadataSchema::MDFTYPE_URL:
00037                     $FieldType = self::FIELDTYPE_TEXT;
00038                     break;
00039 
00040                 case MetadataSchema::MDFTYPE_NUMBER:
00041                 case MetadataSchema::MDFTYPE_FLAG:
00042                     $FieldType = self::FIELDTYPE_NUMERIC;
00043                     break;
00044 
00045                 case MetadataSchema::MDFTYPE_DATE:
00046                     $FieldType = self::FIELDTYPE_DATERANGE;
00047                     break;
00048 
00049                 case MetadataSchema::MDFTYPE_TIMESTAMP:
00050                     $FieldType = self::FIELDTYPE_DATE;
00051                     break;
00052 
00053                 case MetadataSchema::MDFTYPE_POINT:
00054                     $FieldType = NULL;
00055                     break;
00056 
00057                 default:
00058                     exit("ERROR: unknown field type "
00059                             .$Field->Type()." in SPTSearchEngine.php");
00060                     break;
00061             }
00062 
00063             if ($FieldType !== NULL)
00064             {
00065                 # add field to search engine
00066                 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
00067                                 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
00068             }
00069         }
00070     }
00071 
00072     # overloaded version of method to retrieve text from DB
00073     function GetFieldContent($ItemId, $FieldName)
00074     {
00075         # get resource object
00076         $Resource = new Resource($ItemId);
00077 
00078         # retrieve text (including variants) from resource object and return to caller
00079         return $Resource->Get($FieldName, FALSE, TRUE);
00080     }
00081 
00082     # overloaded version of method to retrieve resource/phrase match list
00083     function SearchFieldForPhrases($FieldName, $Phrase)
00084     {
00085         # normalize and escape search phrase for use in SQL query
00086         $SearchPhrase = strtolower(addslashes($Phrase));
00087 
00088         # query DB for matching list based on field type
00089         $Field = $this->Schema->GetFieldByName($FieldName);
00090         switch ($Field->Type())
00091         {
00092             case MetadataSchema::MDFTYPE_TEXT:
00093             case MetadataSchema::MDFTYPE_PARAGRAPH:
00094             case MetadataSchema::MDFTYPE_FILE:
00095             case MetadataSchema::MDFTYPE_URL:
00096                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00097                         ."WHERE POSITION('".$SearchPhrase."'"
00098                             ." IN LOWER(`".$Field->DBFieldName()."`)) ";
00099                 break;
00100 
00101             case MetadataSchema::MDFTYPE_IMAGE:
00102                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00103                         ."WHERE POSITION('".$SearchPhrase."'"
00104                             ." IN LOWER(`".$Field->DBFieldName()."AltText`)) ";
00105                 break;
00106 
00107             case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00108                 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount"
00109                         ." FROM ControlledNames", "NameCount");
00110                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00111                         ."FROM ResourceNameInts, ControlledNames "
00112                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00113                         ."AND ControlledNames.ControlledNameId"
00114                                 ." = ResourceNameInts.ControlledNameId "
00115                         ."AND ControlledNames.FieldId = ".$Field->Id();
00116                 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00117                         ."FROM ResourceNameInts, ControlledNames, VariantNames "
00118                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) "
00119                         ."AND VariantNames.ControlledNameId"
00120                                 ." = ResourceNameInts.ControlledNameId "
00121                         ."AND ControlledNames.ControlledNameId"
00122                                 ." = ResourceNameInts.ControlledNameId "
00123                         ."AND ControlledNames.FieldId = ".$Field->Id();
00124                 break;
00125 
00126             case MetadataSchema::MDFTYPE_OPTION:
00127                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00128                         ."FROM ResourceNameInts, ControlledNames "
00129                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00130                         ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
00131                         ."AND ControlledNames.FieldId = ".$Field->Id();
00132                 break;
00133 
00134             case MetadataSchema::MDFTYPE_TREE:
00135                 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId "
00136                         ."FROM ResourceClassInts, Classifications "
00137                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
00138                         ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
00139                         ."AND Classifications.FieldId = ".$Field->Id();
00140                 break;
00141 
00142             case MetadataSchema::MDFTYPE_USER:
00143                 $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
00144                                            ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
00145                                            ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
00146                 if ($UserId != NULL)
00147                 {
00148                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00149                                      ."WHERE `".$Field->DBFieldName()."` = ".$UserId;
00150                 }
00151                 break;
00152 
00153             case MetadataSchema::MDFTYPE_NUMBER:
00154                 if ($SearchPhrase > 0)
00155                 {
00156                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00157                                      ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase;
00158                 }
00159                 break;
00160 
00161             case MetadataSchema::MDFTYPE_FLAG:
00162             case MetadataSchema::MDFTYPE_DATE:
00163             case MetadataSchema::MDFTYPE_TIMESTAMP:
00164                 # (these types not yet handled by search engine for phrases)
00165                 break;
00166         }
00167 
00168         # build match list based on results returned from DB
00169         if (isset($QueryString))
00170         {
00171             $this->DMsg(7, "Performing phrase search query (<i>".$QueryString."</i>)");
00172             if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00173             $this->DB->Query($QueryString);
00174             if ($this->DebugLevel > 9)
00175             {
00176                 $EndTime = microtime(TRUE);
00177                 if (($StartTime - $EndTime) > 0.1)
00178                 {
00179                     printf("SE:  Query took %.2f seconds<br>\n",
00180                             ($EndTime - $StartTime));
00181                 }
00182             }
00183             $MatchList = $this->DB->FetchColumn("ResourceId");
00184             if (isset($SecondQueryString))
00185             {
00186                 $this->DMsg(7, "Performing second phrase search query"
00187                         ." (<i>".$SecondQueryString."</i>)");
00188                 if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00189                 $this->DB->Query($SecondQueryString);
00190                 if ($this->DebugLevel > 9)
00191                 {
00192                     $EndTime = microtime(TRUE);
00193                     if (($StartTime - $EndTime) > 0.1)
00194                     {
00195                         printf("SE:  query took %.2f seconds<br>\n",
00196                                 ($EndTime - $StartTime));
00197                     }
00198                 }
00199                 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId");
00200             }
00201         }
00202         else
00203         {
00204             $MatchList = array();
00205         }
00206 
00207         # return list of matching resources to caller
00208         return $MatchList;
00209     }
00210 
00211     # search field for records that meet comparison
00212     function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
00213     {
00214         # use SQL keyword appropriate to current search logic for combining operations
00215         $CombineWord = ($this->DefaultSearchLogic == self::LOGIC_AND) ? " AND " : " OR ";
00216 
00217         # for each comparison
00218         foreach ($FieldNames as $Index => $FieldName)
00219         {
00220             $Operator = $Operators[$Index];
00221             $Value = $Values[$Index];
00222 
00223             # determine query based on field type
00224             $Field = $this->Schema->GetFieldByName($FieldName);
00225             if ($Field != NULL)
00226             {
00227                 switch ($Field->Type())
00228                 {
00229                     case MetadataSchema::MDFTYPE_TEXT:
00230                     case MetadataSchema::MDFTYPE_PARAGRAPH:
00231                     case MetadataSchema::MDFTYPE_NUMBER:
00232                     case MetadataSchema::MDFTYPE_FLAG:
00233                     case MetadataSchema::MDFTYPE_USER:
00234                     case MetadataSchema::MDFTYPE_URL:
00235                         if (isset($Queries["Resources"]))
00236                         {
00237                             $Queries["Resources"] .= $CombineWord;
00238                         }
00239                         else
00240                         {
00241                             $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE ";
00242                         }
00243                         if ($Field->Type() == MetadataSchema::MDFTYPE_USER)
00244                         {
00245                             $User = new SPTUser($Value);
00246                             $Value = $User->Id();
00247                         }
00248                         $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' ";
00249                         break;
00250 
00251                     case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00252                         $QueryIndex = "ResourceNameInts".$Field->Id();
00253                         if (!isset($Queries[$QueryIndex]["A"]))
00254                         {
00255                             $Queries[$QueryIndex]["A"] =
00256                                     "SELECT DISTINCT ResourceId"
00257                                     ." FROM ResourceNameInts, ControlledNames "
00258                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00259                                     ." AND ( ";
00260                             $CloseQuery[$QueryIndex]["A"] = TRUE;
00261                         }
00262                         else
00263                         {
00264                             $Queries[$QueryIndex]["A"] .= $CombineWord;
00265                         }
00266                         $Queries[$QueryIndex]["A"] .=
00267                                 "((ResourceNameInts.ControlledNameId"
00268                                         ." = ControlledNames.ControlledNameId"
00269                                 ." AND ControlledName "
00270                                         .$Operator." '".addslashes($Value)."'))";
00271                         if (!isset($Queries[$QueryIndex]["B"]))
00272                         {
00273                             $Queries[$QueryIndex]["B"] =
00274                                     "SELECT DISTINCT ResourceId"
00275                                     . " FROM ResourceNameInts, ControlledNames,"
00276                                             ." VariantNames "
00277                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00278                                     ." AND ( ";
00279                             $CloseQuery[$QueryIndex]["B"] = TRUE;
00280                         }
00281                         else
00282                         {
00283                             $Queries[$QueryIndex]["B"] .= $CombineWord;
00284                         }
00285                         $Queries[$QueryIndex]["B"] .=
00286                                 "((ResourceNameInts.ControlledNameId"
00287                                         ." = ControlledNames.ControlledNameId"
00288                                 ." AND ResourceNameInts.ControlledNameId"
00289                                         ." = VariantNames.ControlledNameId"
00290                                 ." AND VariantName "
00291                                         .$Operator." '".addslashes($Value)."'))";
00292                         break;
00293 
00294                     case MetadataSchema::MDFTYPE_OPTION:
00295                         $QueryIndex = "ResourceNameInts".$Field->Id();
00296                         if (!isset($Queries[$QueryIndex]))
00297                         {
00298                             $Queries[$QueryIndex] =
00299                                     "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
00300                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00301                                     ." AND ( ";
00302                             $CloseQuery[$QueryIndex] = TRUE;
00303                         }
00304                         else
00305                         {
00306                             $Queries[$QueryIndex] .= $CombineWord;
00307                         }
00308                         $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
00309                                                        ." AND ControlledName ".$Operator." '".addslashes($Value)."')";
00310                         break;
00311 
00312                     case MetadataSchema::MDFTYPE_TREE:
00313                         $QueryIndex = "ResourceClassInts".$Field->Id();
00314                         if (!isset($Queries[$QueryIndex]))
00315                         {
00316                             $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
00317                                                  ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
00318                                                  ." AND Classifications.FieldId = ".$Field->Id()." AND ( ";
00319                             $CloseQuery[$QueryIndex] = TRUE;
00320                         }
00321                         else
00322                         {
00323                             $Queries[$QueryIndex] .= $CombineWord;
00324                         }
00325                         $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'";
00326                         break;
00327 
00328                     case MetadataSchema::MDFTYPE_TIMESTAMP:
00329                         # if value appears to have time component or text description
00330                         if (strpos($Value, ":")
00331                                 || strstr($Value, "day")
00332                                 || strstr($Value, "week")
00333                                 || strstr($Value, "month")
00334                                 || strstr($Value, "year")
00335                                 || strstr($Value, "hour")
00336                                 || strstr($Value, "minute"))
00337                         {
00338                             if (isset($Queries["Resources"]))
00339                             {
00340                                 $Queries["Resources"] .= $CombineWord;
00341                             }
00342                             else
00343                             {
00344                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00345                                         ." FROM Resources WHERE ";
00346                             }
00347 
00348                             # flip operator if necessary
00349                             if (strstr($Value, "ago"))
00350                             {
00351                                 $OperatorFlipMap = array(
00352                                         "<" => ">=",
00353                                         ">" => "<=",
00354                                         "<=" => ">",
00355                                         ">=" => "<",
00356                                         );
00357                                 $Operator = isset($OperatorFlipMap[$Operator])
00358                                         ? $OperatorFlipMap[$Operator] : $Operator;
00359                             }
00360 
00361                             # use strtotime method to build condition
00362                             $TimestampValue = strtotime($Value);
00363                             if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
00364                             {
00365                                 if ((date("H:i:s", $TimestampValue) == "00:00:00")
00366                                         && (strpos($Value, "00:00") === FALSE)
00367                                         && ($Operator == "<="))
00368                                 {
00369                                     $NormalizedValue =
00370                                             date("Y-m-d", $TimestampValue)." 23:59:59";
00371                                 }
00372                                 else
00373                                 {
00374                                     $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue);
00375                                 }
00376                             }
00377                             else
00378                             {
00379                                 $NormalizedValue = addslashes($Value);
00380                             }
00381                             $Queries["Resources"] .=
00382                                     " ( `".$Field->DBFieldName()."` "
00383                                     .$Operator
00384                                     ." '".$NormalizedValue."' ) ";
00385                         }
00386                         else
00387                         {
00388                             # use Date object method to build condition
00389                             $Date = new Date($Value);
00390                             if ($Date->Precision())
00391                             {
00392                                 if (isset($Queries["Resources"]))
00393                                 {
00394                                     $Queries["Resources"] .= $CombineWord;
00395                                 }
00396                                 else
00397                                 {
00398                                     $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00399                                             ." FROM Resources WHERE ";
00400                                 }
00401                                 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00402                                         $Field->DBFieldName(), NULL, $Operator)." ) ";
00403                             }
00404                         }
00405                         break;
00406 
00407                     case MetadataSchema::MDFTYPE_DATE:
00408                         $Date = new Date($Value);
00409                         if ($Date->Precision())
00410                         {
00411                             if (isset($Queries["Resources"]))
00412                             {
00413                                 $Queries["Resources"] .= $CombineWord;
00414                             }
00415                             else
00416                             {
00417                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00418                                         ." FROM Resources WHERE ";
00419                             }
00420                             $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00421                                     $Field->DBFieldName()."Begin",
00422                                     $Field->DBFieldName()."End", $Operator)." ) ";
00423                         }
00424                         break;
00425 
00426                     case MetadataSchema::MDFTYPE_IMAGE:
00427                     case MetadataSchema::MDFTYPE_FILE:
00428                         # (these types not yet handled by search engine for comparisons)
00429                         break;
00430                 }
00431             }
00432         }
00433 
00434         # if queries found
00435         if (isset($Queries))
00436         {
00437             # for each assembled query
00438             foreach ($Queries as $QueryIndex => $Query)
00439             {
00440                 # if query has multiple parts
00441                 if (is_array($Query))
00442                 {
00443                     # for each part of query
00444                     $ResourceIds = array();
00445                     foreach ($Query as $PartIndex => $PartQuery)
00446                     {
00447                         # add closing paren if query was flagged to be closed
00448                         if (isset($CloseQuery[$QueryIndex])) {  $PartQuery .= " ) ";  }
00449 
00450                         # perform query and retrieve IDs
00451                         $this->DMsg(5, "Performing comparison query (<i>"
00452                                 .$PartQuery."</i>)");
00453                         $this->DB->Query($PartQuery);
00454                         $ResourceIds = $ResourceIds
00455                                 + $this->DB->FetchColumn("ResourceId");
00456                         $this->DMsg(5, "Comparison query produced <i>"
00457                                 .count($ResourceIds)."</i> results");
00458                     }
00459                 }
00460                 else
00461                 {
00462                     # add closing paren if query was flagged to be closed
00463                     if (isset($CloseQuery[$QueryIndex])) {  $Query .= " ) ";  }
00464 
00465                     # perform query and retrieve IDs
00466                     $this->DMsg(5, "Performing comparison query (<i>".$Query."</i>)");
00467                     $this->DB->Query($Query);
00468                     $ResourceIds = $this->DB->FetchColumn("ResourceId");
00469                     $this->DMsg(5, "Comparison query produced <i>"
00470                             .count($ResourceIds)."</i> results");
00471                 }
00472 
00473                 # if we already have some results
00474                 if (isset($Results))
00475                 {
00476                     # if search logic is set to AND
00477                     if ($this->DefaultSearchLogic == self::LOGIC_AND)
00478                     {
00479                         # remove anything from results that was not returned from query
00480                         $Results = array_intersect($Results, $ResourceIds);
00481                     }
00482                     else
00483                     {
00484                         # add values returned from query to results
00485                         $Results = array_unique(array_merge($Results, $ResourceIds));
00486                     }
00487                 }
00488                 else
00489                 {
00490                     # set results to values returned from query
00491                     $Results = $ResourceIds;
00492                 }
00493             }
00494         }
00495         else
00496         {
00497             # initialize results to empty list
00498             $Results = array();
00499         }
00500 
00501         # return results to caller
00502         return $Results;
00503     }
00504 
00505     static function GetItemIdsSortedByField($FieldName, $SortDescending)
00506     {
00507         $RFactory = new ResourceFactory();
00508         return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
00509     }
00510 
00511     static function QueueUpdateForItem($ItemId,
00512             $Priority = ApplicationFramework::PRIORITY_LOW)
00513     {
00514         global $AF;
00515         $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"),
00516                 array(intval($ItemId)), $Priority);
00517     }
00518 
00519     static function RunUpdateForItem($ItemId)
00520     {
00521         # check that resource still exists
00522         $RFactory = new ResourceFactory();
00523         if (!$RFactory->ItemExists($ItemId)) {  return;  }
00524 
00525         # update search data for resource
00526         $SearchEngine = new SPTSearchEngine();
00527         $SearchEngine->UpdateForItem($ItemId);
00528     }
00529 
00530     private $Schema;
00531 
00532     # functions for backward compatability w/ old SPT code
00533     function UpdateForResource($ItemId) {  $this->UpdateForItem($ItemId);  }
00534 }

CWIS logo doxygen
Copyright 2010 Internet Scout