3 # FILE: SPTSearchEngine.php
5 # Part of the Collection Workflow Integration System (CWIS)
6 # Copyright 2011-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/cwis/
14 # create a database handle
17 # pass database handle and config values to real search engine object
26 # for each field defined in schema
28 $Fields = $this->Schema->GetFields();
29 foreach ($Fields as $Field)
31 # determine field type for searching
32 switch ($Field->Type())
44 $FieldType = self::FIELDTYPE_TEXT;
49 $FieldType = self::FIELDTYPE_NUMERIC;
53 $FieldType = self::FIELDTYPE_DATERANGE;
57 $FieldType = self::FIELDTYPE_DATE;
65 exit(
"ERROR: unknown field type "
66 .$Field->Type().
" in SPTSearchEngine.php");
70 if ($FieldType !== NULL)
72 # add field to search engine
73 $this->
AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
74 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
79 # overloaded version of method to retrieve text from DB
85 # retrieve text (including variants) from resource object and return to caller
86 return $Resource->Get($FieldName, FALSE, TRUE);
89 # overloaded version of method to retrieve resource/phrase match list
92 # normalize and escape search phrase for use in SQL query
93 $SearchPhrase = strtolower(addslashes($Phrase));
95 # query DB for matching list based on field type
96 $Field = $this->Schema->GetFieldByName($FieldName);
97 switch ($Field->Type())
103 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
104 .
"WHERE POSITION('".$SearchPhrase.
"'"
105 .
" IN LOWER(`".$Field->DBFieldName().
"`)) ";
109 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
110 .
"WHERE POSITION('".$SearchPhrase.
"'"
111 .
" IN LOWER(`".$Field->DBFieldName().
"AltText`)) ";
115 $NameTableSize = $this->DB->Query(
"SELECT COUNT(*) AS NameCount"
116 .
" FROM ControlledNames",
"NameCount");
117 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
118 .
"FROM ResourceNameInts, ControlledNames "
119 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
120 .
"AND ControlledNames.ControlledNameId"
121 .
" = ResourceNameInts.ControlledNameId "
122 .
"AND ControlledNames.FieldId = ".$Field->Id();
123 $SecondQueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
124 .
"FROM ResourceNameInts, ControlledNames, VariantNames "
125 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(VariantName)) "
126 .
"AND VariantNames.ControlledNameId"
127 .
" = ResourceNameInts.ControlledNameId "
128 .
"AND ControlledNames.ControlledNameId"
129 .
" = ResourceNameInts.ControlledNameId "
130 .
"AND ControlledNames.FieldId = ".$Field->Id();
134 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
135 .
"FROM ResourceNameInts, ControlledNames "
136 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
137 .
"AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
138 .
"AND ControlledNames.FieldId = ".$Field->Id();
142 $QueryString =
"SELECT DISTINCT ResourceClassInts.ResourceId "
143 .
"FROM ResourceClassInts, Classifications "
144 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ClassificationName)) "
145 .
"AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
146 .
"AND Classifications.FieldId = ".$Field->Id();
150 $UserId = $this->DB->Query(
"SELECT UserId FROM APUsers "
151 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(UserName)) "
152 .
"OR POSITION('".$SearchPhrase.
"' IN LOWER(RealName))",
"UserId");
155 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
156 .
"WHERE `".$Field->DBFieldName().
"` = ".$UserId;
161 if ($SearchPhrase > 0)
163 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
164 .
"WHERE `".$Field->DBFieldName().
"` = ".(int)$SearchPhrase;
172 # (these types not yet handled by search engine for phrases)
176 # build match list based on results returned from DB
177 if (isset($QueryString))
179 $this->
DMsg(7,
"Performing phrase search query (<i>".$QueryString.
"</i>)");
180 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
181 $this->DB->Query($QueryString);
184 $EndTime = microtime(TRUE);
185 if (($StartTime - $EndTime) > 0.1)
187 printf(
"SE: Query took %.2f seconds<br>\n",
188 ($EndTime - $StartTime));
191 $MatchList = $this->DB->FetchColumn(
"ResourceId");
192 if (isset($SecondQueryString))
194 $this->
DMsg(7,
"Performing second phrase search query"
195 .
" (<i>".$SecondQueryString.
"</i>)");
196 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
197 $this->DB->Query($SecondQueryString);
200 $EndTime = microtime(TRUE);
201 if (($StartTime - $EndTime) > 0.1)
203 printf(
"SE: query took %.2f seconds<br>\n",
204 ($EndTime - $StartTime));
207 $MatchList = $MatchList + $this->DB->FetchColumn(
"ResourceId");
212 $MatchList = array();
215 # return list of matching resources to caller
219 # search field for records that meet comparison
222 # use SQL keyword appropriate to current search logic for combining operations
225 # for each comparison
226 foreach ($FieldNames as $Index => $FieldName)
228 $Operator = $Operators[$Index];
229 $Value = $Values[$Index];
231 # determine query based on field type
232 $Field = $this->Schema->GetFieldByName($FieldName);
235 switch ($Field->Type())
243 if (isset($Queries[
"Resources"]))
245 $Queries[
"Resources"] .= $CombineWord;
249 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId FROM Resources WHERE ";
253 $User =
new CWUser($Value);
254 $Value = $User->Id();
256 $Queries[
"Resources"] .=
"`".$Field->DBFieldName().
"` ".$Operator.
" '".addslashes($Value).
"' ";
260 $QueryIndex =
"ResourceNameInts".$Field->Id();
261 if (!isset($Queries[$QueryIndex][
"A"]))
263 $Queries[$QueryIndex][
"A"] =
264 "SELECT DISTINCT ResourceId"
265 .
" FROM ResourceNameInts, ControlledNames "
266 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
268 $CloseQuery[$QueryIndex][
"A"] = TRUE;
269 $ComparisonCount[$QueryIndex][
"A"] = 1;
270 $ComparisonCountField[$QueryIndex][
"A"] =
"ControlledName";
274 $Queries[$QueryIndex][
"A"] .=
" OR ";
275 $ComparisonCount[$QueryIndex][
"A"]++;
277 $Queries[$QueryIndex][
"A"] .=
278 "((ResourceNameInts.ControlledNameId"
279 .
" = ControlledNames.ControlledNameId"
280 .
" AND ControlledName "
281 .$Operator.
" '".addslashes($Value).
"'))";
282 if (!isset($Queries[$QueryIndex][
"B"]))
284 $Queries[$QueryIndex][
"B"] =
285 "SELECT DISTINCT ResourceId"
286 .
" FROM ResourceNameInts, ControlledNames,"
288 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
290 $CloseQuery[$QueryIndex][
"B"] = TRUE;
291 $ComparisonCount[$QueryIndex][
"B"] = 1;
292 $ComparisonCountField[$QueryIndex][
"B"] =
"ControlledName";
296 $Queries[$QueryIndex][
"B"] .=
" OR ";
297 $ComparisonCount[$QueryIndex][
"B"]++;
299 $Queries[$QueryIndex][
"B"] .=
300 "((ResourceNameInts.ControlledNameId"
301 .
" = ControlledNames.ControlledNameId"
302 .
" AND ResourceNameInts.ControlledNameId"
303 .
" = VariantNames.ControlledNameId"
305 .$Operator.
" '".addslashes($Value).
"'))";
309 $QueryIndex =
"ResourceNameInts".$Field->Id();
310 if (!isset($Queries[$QueryIndex]))
312 $Queries[$QueryIndex] =
313 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
314 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
316 $CloseQuery[$QueryIndex] = TRUE;
317 $ComparisonCount[$QueryIndex] = 1;
318 $ComparisonCountField[$QueryIndex] =
"ControlledName";
322 $Queries[$QueryIndex] .=
" OR ";
323 $ComparisonCount[$QueryIndex]++;
325 $Queries[$QueryIndex] .=
"(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
326 .
" AND ControlledName ".$Operator.
" '".addslashes($Value).
"')";
330 $QueryIndex =
"ResourceClassInts".$Field->Id();
331 if (!isset($Queries[$QueryIndex]))
333 $Queries[$QueryIndex] =
"SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
334 .
" WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
335 .
" AND Classifications.FieldId = ".$Field->Id().
" AND ( ";
336 $CloseQuery[$QueryIndex] = TRUE;
337 $ComparisonCount[$QueryIndex] = 1;
338 $ComparisonCountField[$QueryIndex] =
"ClassificationName";
342 $Queries[$QueryIndex] .=
" OR ";
343 $ComparisonCount[$QueryIndex]++;
345 $Queries[$QueryIndex] .=
" ClassificationName ".$Operator.
" '".addslashes($Value).
"'";
349 # if value appears to have time component or text description
350 if (strpos($Value,
":")
351 || strstr($Value,
"day")
352 || strstr($Value,
"week")
353 || strstr($Value,
"month")
354 || strstr($Value,
"year")
355 || strstr($Value,
"hour")
356 || strstr($Value,
"minute"))
358 if (isset($Queries[
"Resources"]))
360 $Queries[
"Resources"] .= $CombineWord;
364 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
365 .
" FROM Resources WHERE ";
368 # flip operator if necessary
369 if (strstr($Value,
"ago"))
371 $OperatorFlipMap = array(
377 $Operator = isset($OperatorFlipMap[$Operator])
378 ? $OperatorFlipMap[$Operator] : $Operator;
381 # use strtotime method to build condition
382 $TimestampValue = strtotime($Value);
383 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
385 if ((date(
"H:i:s", $TimestampValue) ==
"00:00:00")
386 && (strpos($Value,
"00:00") === FALSE)
387 && ($Operator ==
"<="))
390 date(
"Y-m-d", $TimestampValue).
" 23:59:59";
394 $NormalizedValue = date(
"Y-m-d H:i:s", $TimestampValue);
399 $NormalizedValue = addslashes($Value);
401 $Queries[
"Resources"] .=
402 " ( `".$Field->DBFieldName().
"` "
404 .
" '".$NormalizedValue.
"' ) ";
408 # use Date object method to build condition
409 $Date =
new Date($Value);
410 if ($Date->Precision())
412 if (isset($Queries[
"Resources"]))
414 $Queries[
"Resources"] .= $CombineWord;
418 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
419 .
" FROM Resources WHERE ";
421 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
422 $Field->DBFieldName(), NULL, $Operator).
" ) ";
428 $Date =
new Date($Value);
429 if ($Date->Precision())
431 if (isset($Queries[
"Resources"]))
433 $Queries[
"Resources"] .= $CombineWord;
437 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
438 .
" FROM Resources WHERE ";
440 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
441 $Field->DBFieldName().
"Begin",
442 $Field->DBFieldName().
"End", $Operator).
" ) ";
447 $QueryIndex =
"ReferenceInts".$Field->Id();
448 if (!isset($Queries[$QueryIndex]))
450 if (!isset($NameField))
453 $this->Schema->GetFieldByMappedName(
456 $Queries[$QueryIndex] =
457 "SELECT DISTINCT RI.SrcResourceId AS ResourceId"
458 .
" FROM ReferenceInts AS RI, Resources AS R "
459 .
" WHERE RI.FieldId = ".$Field->Id()
461 $CloseQuery[$QueryIndex] = TRUE;
462 $ComparisonCount[$QueryIndex] = 1;
463 $ComparisonCountField[$QueryIndex] =
464 "R.`".$NameField->DBFieldName().
"`";
468 $Queries[$QueryIndex] .= $CombineWord;
469 $ComparisonCount[$QueryIndex]++;
471 $Queries[$QueryIndex] .=
"(R.`".$NameField->DBFieldName().
"` "
472 .$Operator.
" '".addslashes($Value).
"'"
473 .
" AND R.ResourceId = RI.DstResourceId)";
478 # (these types not yet handled by search engine for comparisons)
487 # for each assembled query
488 foreach ($Queries as $QueryIndex => $Query)
490 # if query has multiple parts
491 if (is_array($Query))
493 # for each part of query
494 $ResourceIds = array();
495 foreach ($Query as $PartIndex => $PartQuery)
497 # add closing paren if query was flagged to be closed
498 if (isset($CloseQuery[$QueryIndex][$PartIndex]))
502 && ($ComparisonCount[$QueryIndex][$PartIndex] > 1))
504 $PartQuery .=
"GROUP BY ResourceId"
505 .
" HAVING COUNT(DISTINCT "
506 .$ComparisonCountField[$QueryIndex][$PartIndex]
508 .$ComparisonCount[$QueryIndex][$PartIndex];
512 # perform query and retrieve IDs
513 $this->
DMsg(5,
"Performing comparison query <i>"
515 $this->DB->Query($PartQuery);
516 $ResourceIds = $ResourceIds
517 + $this->DB->FetchColumn(
"ResourceId");
518 $this->
DMsg(5,
"Comparison query produced <i>"
519 .count($ResourceIds).
"</i> results");
524 # add closing paren if query was flagged to be closed
525 if (isset($CloseQuery[$QueryIndex]))
529 && ($ComparisonCount[$QueryIndex] > 1))
531 $Query .=
"GROUP BY ResourceId"
532 .
" HAVING COUNT(DISTINCT "
533 .$ComparisonCountField[$QueryIndex]
535 .$ComparisonCount[$QueryIndex];
539 # perform query and retrieve IDs
540 $this->
DMsg(5,
"Performing comparison query <i>".$Query.
"</i>");
541 $this->DB->Query($Query);
542 $ResourceIds = $this->DB->FetchColumn(
"ResourceId");
543 $this->
DMsg(5,
"Comparison query produced <i>"
544 .count($ResourceIds).
"</i> results");
547 # if we already have some results
550 # if search logic is set to AND
553 # remove anything from results that was not returned from query
554 $Results = array_intersect($Results, $ResourceIds);
558 # add values returned from query to results
559 $Results = array_unique(array_merge($Results, $ResourceIds));
564 # set results to values returned from query
565 $Results = $ResourceIds;
571 # initialize results to empty list
575 # return results to caller
582 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
589 $TaskDescription =
"Update search data for"
590 .
" <a href=\"r".$ItemId.
"\"><i>"
591 .$Item->GetMapped(
"Title").
"</i></a>";
592 $GLOBALS[
"AF"]->QueueUniqueTask(array(__CLASS__,
"RunUpdateForItem"),
593 array(intval($ItemId)), $TaskPriority, $TaskDescription);
598 # check that resource still exists
600 if (!$RFactory->ItemExists($ItemId)) {
return; }
602 # update search data for resource
604 $SearchEngine->UpdateForItem($ItemId);
606 # And updated the cached Controlled Name and Classification mappings
607 # which are used to suggest search facets:
610 # Get all the classifications that are associated with this resource:
611 $DB->Query(
"SELECT DISTINCT ClassificationId FROM ResourceClassInts "
612 .
"WHERE ResourceId=".intval($ItemId));
613 $MyClasses = array();
614 foreach (
$DB->FetchRows() as $Row)
615 $MyClasses []= $Row[
"ClassificationId"];
617 # Update the class map cache for this resource:
618 $DB->Query(
"DELETE FROM ClassificationFacetCache WHERE ResourceId=".intval($ItemId));
620 if (count($MyClasses)>0)
621 $DB->Query(
"INSERT INTO ClassificationFacetCache (ResourceId, Classes) VALUES ".
622 "(".intval($ItemId).
",'".implode(
",",$MyClasses).
"')");
624 # Get all of the names that are associated with this resource:
625 $DB->Query(
"SELECT DISTINCT ControlledNameId FROM ResourceNameInts "
626 .
"WHERE ResourceId=".intval($ItemId));
629 foreach (
$DB->FetchRows() as $Row)
630 $MyNames []= $Row[
"ControlledNameId"];
632 # Update the name map cache for this resource:
633 $DB->Query(
"DELETE FROM ControlledNameFacetCache WHERE ResourceId=".intval($ItemId));
635 if (count($MyNames)>0)
636 $DB->Query(
"INSERT INTO ControlledNameFacetCache (ResourceId, Names) VALUES ".
637 "(".intval($ItemId).
",'".implode(
",",$MyNames).
"')");
648 # Classifications and names associated with these search results:
649 $SearchClasses = array();
650 $SearchNames = array();
652 # Disable DB cache for the search suggestions process,
653 # This avoids memory exhaustion.
657 if (count($SearchResults)>0)
659 # Pull out all the Classifications that were associated with our search results:
660 $DB->Query(
"SELECT * FROM ClassificationFacetCache "
661 .
"WHERE ResourceId IN "
662 .
"(".implode(
",",array_keys($SearchResults)).
")");
663 while ($Row=
$DB->FetchRow())
665 foreach (explode(
',',$Row[
"Classes"]) as $ClassId)
667 $SearchClasses[$ClassId] []= $Row[
"ResourceId"];
671 # Similarly with controlled names
672 $DB->Query(
"SELECT * from ControlledNameFacetCache "
673 .
"WHERE ResourceId in "
674 .
"(".implode(
",",array_keys($SearchResults)).
")");
675 while ($Row=
$DB->FetchRow())
677 foreach (explode(
',',$Row[
"Names"]) as $NameId)
679 $SearchNames[$NameId] []= $Row[
"ResourceId"];
684 # Generate a map of FieldId -> Field Names for all of the generated facets:
685 $SuggestionsById = array();
687 # Pull relevant Classification names out of the DB
688 if ( count($SearchClasses) > 0)
690 $DB->Query(
"SELECT FieldId,ClassificationId,ClassificationName FROM Classifications "
691 .
"WHERE ClassificationId IN (".implode(
",",array_keys($SearchClasses)).
")");
692 while ($Row =
$DB->FetchRow())
693 $SuggestionsById[$Row[
"FieldId"]] []=
694 array(
"Id" => $Row[
"ClassificationId"],
695 "Name" => $Row[
"ClassificationName"],
696 "Count" => count( $SearchClasses[ $Row[
"ClassificationId"] ] ) );
699 if (count($SearchNames)>0)
701 # Pull relevant ControlledNames out of the DB
702 $DB->Query(
"SELECT FieldId,ControlledNameId,ControlledName FROM ControlledNames "
703 .
"WHERE ControlledNameId IN (".implode(
",",array_keys($SearchNames)).
")");
704 while ($Row =
$DB->FetchRow())
705 $SuggestionsById[$Row[
"FieldId"]] []=
706 array(
"Id" => $Row[
"ControlledNameId"],
707 "Name" => $Row[
"ControlledName"],
708 "Count" => count( $SearchNames[ $Row[
"ControlledNameId"] ] ) );
711 # Now, translate the suggestions that we have in terms of the
712 # FieldIds to suggestions in terms of the Field Names.
713 $SuggestionsByFieldName = array();
715 # If we have *any* suggestions to offer:
716 if (count($SuggestionsById)>0)
718 # We'll need G_User for permissions checks:
722 # Fill in an array that maps FieldNames to search links
723 # which would be appropriate for that field
724 foreach ($SuggestionsById as $FieldId => $FieldValues)
726 $ThisField = $Schema->GetField($FieldId);
728 # Bail on fields taht didn't exist (possibly as a result of stale cache data)
729 # and on fields that the current user cannot view, and on fields that are disabled
730 # for advanced searching:
731 if (is_object($ThisField) &&
733 $ThisField->IncludeInFacetedSearch() &&
734 $ThisField->Enabled() &&
735 $G_User->HasPriv($ThisField->ViewingPrivileges() ) )
737 $SuggestionsByFieldName[ $ThisField->Name() ] = array();
739 foreach ($FieldValues as $Value)
740 $SuggestionsByFieldName [ $ThisField->Name() ] [$Value[
"Id"]] =
741 array(
"Name" => $Value[
"Name"],
"Count" => $Value[
"Count"] );
746 ksort($SuggestionsByFieldName);
748 return $SuggestionsByFieldName;
753 # functions for backward compatability w/ old SPT code
SearchFieldForPhrases($FieldName, $Phrase)
const PRIORITY_LOW
Lower priority.
SQL database abstraction object with smart query caching.
UpdateForResource($ItemId)
AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
GetFieldContent($ItemId, $FieldName)
SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName, $ReferenceTableName, $ReferenceSourceIdFieldName, $ReferenceDestinationIdFieldName)
static RunUpdateForItem($ItemId)
static GetResultFacets($SearchResults)
Generate a list of suggested additional search terms that can be used for faceted searching...
SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
Represents a "resource" in CWIS.
static GetItemIdsSortedByField($FieldName, $SortDescending)
DefaultSearchLogic($NewSetting=NULL)
Factory for Resource objects.
CWIS-specific user class.
static QueueUpdateForItem($ItemId, $TaskPriority=ApplicationFramework::PRIORITY_LOW)