3 # FILE: SPTSearchEngine.php
5 # Part of the Collection Workflow Integration System (CWIS)
6 # Copyright 2011-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/cwis/
14 # pass database handle and config values to real search engine object
17 # for each field defined in schema
19 $Fields = $this->Schema->GetFields();
20 foreach ($Fields as $Field)
22 # determine field type for searching
23 switch ($Field->Type())
35 $FieldType = self::FIELDTYPE_TEXT;
40 $FieldType = self::FIELDTYPE_NUMERIC;
44 $FieldType = self::FIELDTYPE_DATERANGE;
48 $FieldType = self::FIELDTYPE_DATE;
56 exit(
"ERROR: unknown field type "
57 .$Field->Type().
" in SPTSearchEngine.php");
61 if ($FieldType !== NULL)
63 # add field to search engine
64 $this->
AddField($Field->Name(), $FieldType,
65 $Field->SearchWeight(),
66 $Field->IncludeInKeywordSearch());
83 # if this is a reference field
84 $Field = $this->Schema->GetFieldByName($FieldName);
87 # retrieve IDs of referenced items
88 $ReferredItemIds = $Resource->Get($FieldName);
90 # for each referred item
91 $ReturnValue = array();
92 foreach ($ReferredItemIds as $RefId)
94 # retrieve title value for item and add to returned values
96 $ReturnValue[] = $RefResource->GetMapped(
"Title");
99 # return referred item titles to caller
104 # retrieve text (including variants) from resource object and return to caller
105 return $Resource->Get($FieldName, FALSE, TRUE);
109 # overloaded version of method to retrieve resource/phrase match list
112 # normalize and escape search phrase for use in SQL query
113 $SearchPhrase = strtolower(addslashes($Phrase));
115 # query DB for matching list based on field type
116 $Field = $this->Schema->GetFieldByName($FieldName);
117 switch ($Field->Type())
123 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
124 .
"WHERE POSITION('".$SearchPhrase.
"'"
125 .
" IN LOWER(`".$Field->DBFieldName().
"`)) ";
129 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
130 .
"WHERE POSITION('".$SearchPhrase.
"'"
131 .
" IN LOWER(`".$Field->DBFieldName().
"AltText`)) ";
135 $NameTableSize = $this->DB->Query(
"SELECT COUNT(*) AS NameCount"
136 .
" FROM ControlledNames",
"NameCount");
137 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
138 .
"FROM ResourceNameInts, ControlledNames "
139 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
140 .
"AND ControlledNames.ControlledNameId"
141 .
" = ResourceNameInts.ControlledNameId "
142 .
"AND ControlledNames.FieldId = ".$Field->Id();
143 $SecondQueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
144 .
"FROM ResourceNameInts, ControlledNames, VariantNames "
145 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(VariantName)) "
146 .
"AND VariantNames.ControlledNameId"
147 .
" = ResourceNameInts.ControlledNameId "
148 .
"AND ControlledNames.ControlledNameId"
149 .
" = ResourceNameInts.ControlledNameId "
150 .
"AND ControlledNames.FieldId = ".$Field->Id();
154 $QueryString =
"SELECT DISTINCT ResourceNameInts.ResourceId "
155 .
"FROM ResourceNameInts, ControlledNames "
156 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ControlledName)) "
157 .
"AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
158 .
"AND ControlledNames.FieldId = ".$Field->Id();
162 $QueryString =
"SELECT DISTINCT ResourceClassInts.ResourceId "
163 .
"FROM ResourceClassInts, Classifications "
164 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(ClassificationName)) "
165 .
"AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
166 .
"AND Classifications.FieldId = ".$Field->Id();
170 $UserId = $this->DB->Query(
"SELECT UserId FROM APUsers "
171 .
"WHERE POSITION('".$SearchPhrase.
"' IN LOWER(UserName)) "
172 .
"OR POSITION('".$SearchPhrase.
"' IN LOWER(RealName))",
"UserId");
175 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
176 .
"WHERE `".$Field->DBFieldName().
"` = ".$UserId;
181 if ($SearchPhrase > 0)
183 $QueryString =
"SELECT DISTINCT ResourceId FROM Resources "
184 .
"WHERE `".$Field->DBFieldName().
"` = ".(int)$SearchPhrase;
192 # (these types not yet handled by search engine for phrases)
196 # build match list based on results returned from DB
197 if (isset($QueryString))
199 $this->
DMsg(7,
"Performing phrase search query (<i>".$QueryString.
"</i>)");
200 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
201 $this->DB->Query($QueryString);
204 $EndTime = microtime(TRUE);
205 if (($StartTime - $EndTime) > 0.1)
207 printf(
"SE: Query took %.2f seconds<br>\n",
208 ($EndTime - $StartTime));
211 $MatchList = $this->DB->FetchColumn(
"ResourceId");
212 if (isset($SecondQueryString))
214 $this->
DMsg(7,
"Performing second phrase search query"
215 .
" (<i>".$SecondQueryString.
"</i>)");
216 if ($this->
DebugLevel > 9) { $StartTime = microtime(TRUE); }
217 $this->DB->Query($SecondQueryString);
220 $EndTime = microtime(TRUE);
221 if (($StartTime - $EndTime) > 0.1)
223 printf(
"SE: query took %.2f seconds<br>\n",
224 ($EndTime - $StartTime));
227 $MatchList = $MatchList + $this->DB->FetchColumn(
"ResourceId");
232 $MatchList = array();
235 # return list of matching resources to caller
239 # search field for records that meet comparison
242 # use SQL keyword appropriate to current search logic for combining operations
245 # for each comparison
246 foreach ($FieldNames as $Index => $FieldName)
248 $Operator = $Operators[$Index];
249 $Value = $Values[$Index];
251 # determine query based on field type
252 $Field = $this->Schema->GetFieldByName($FieldName);
255 switch ($Field->Type())
263 if (isset($Queries[
"Resources"]))
265 $Queries[
"Resources"] .= $CombineWord;
269 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId FROM Resources WHERE ";
273 $User =
new CWUser($Value);
274 $Value = $User->Id();
276 $Queries[
"Resources"] .=
"`".$Field->DBFieldName().
"` ".$Operator.
" '".addslashes($Value).
"' ";
280 $QueryIndex =
"ResourceNameInts".$Field->Id();
281 if (!isset($Queries[$QueryIndex][
"A"]))
283 $Queries[$QueryIndex][
"A"] =
284 "SELECT DISTINCT ResourceId"
285 .
" FROM ResourceNameInts, ControlledNames "
286 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
288 $CloseQuery[$QueryIndex][
"A"] = TRUE;
289 $ComparisonCount[$QueryIndex][
"A"] = 1;
290 $ComparisonCountField[$QueryIndex][
"A"] =
"ControlledName";
294 $Queries[$QueryIndex][
"A"] .=
" OR ";
295 $ComparisonCount[$QueryIndex][
"A"]++;
297 $Queries[$QueryIndex][
"A"] .=
298 "((ResourceNameInts.ControlledNameId"
299 .
" = ControlledNames.ControlledNameId"
300 .
" AND ControlledName "
301 .$Operator.
" '".addslashes($Value).
"'))";
302 if (!isset($Queries[$QueryIndex][
"B"]))
304 $Queries[$QueryIndex][
"B"] =
305 "SELECT DISTINCT ResourceId"
306 .
" FROM ResourceNameInts, ControlledNames,"
308 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
310 $CloseQuery[$QueryIndex][
"B"] = TRUE;
311 $ComparisonCount[$QueryIndex][
"B"] = 1;
312 $ComparisonCountField[$QueryIndex][
"B"] =
"ControlledName";
316 $Queries[$QueryIndex][
"B"] .=
" OR ";
317 $ComparisonCount[$QueryIndex][
"B"]++;
319 $Queries[$QueryIndex][
"B"] .=
320 "((ResourceNameInts.ControlledNameId"
321 .
" = ControlledNames.ControlledNameId"
322 .
" AND ResourceNameInts.ControlledNameId"
323 .
" = VariantNames.ControlledNameId"
325 .$Operator.
" '".addslashes($Value).
"'))";
329 $QueryIndex =
"ResourceNameInts".$Field->Id();
330 if (!isset($Queries[$QueryIndex]))
332 $Queries[$QueryIndex] =
333 "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
334 .
" WHERE ControlledNames.FieldId = ".$Field->Id()
336 $CloseQuery[$QueryIndex] = TRUE;
337 $ComparisonCount[$QueryIndex] = 1;
338 $ComparisonCountField[$QueryIndex] =
"ControlledName";
342 $Queries[$QueryIndex] .=
" OR ";
343 $ComparisonCount[$QueryIndex]++;
345 $Queries[$QueryIndex] .=
"(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
346 .
" AND ControlledName ".$Operator.
" '".addslashes($Value).
"')";
350 $QueryIndex =
"ResourceClassInts".$Field->Id();
351 if (!isset($Queries[$QueryIndex]))
353 $Queries[$QueryIndex] =
"SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
354 .
" WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
355 .
" AND Classifications.FieldId = ".$Field->Id().
" AND ( ";
356 $CloseQuery[$QueryIndex] = TRUE;
357 $ComparisonCount[$QueryIndex] = 1;
358 $ComparisonCountField[$QueryIndex] =
"ClassificationName";
362 $Queries[$QueryIndex] .=
" OR ";
363 $ComparisonCount[$QueryIndex]++;
365 $Queries[$QueryIndex] .=
" ClassificationName ".$Operator.
" '".addslashes($Value).
"'";
369 # if value appears to have time component or text description
370 if (strpos($Value,
":")
371 || strstr($Value,
"day")
372 || strstr($Value,
"week")
373 || strstr($Value,
"month")
374 || strstr($Value,
"year")
375 || strstr($Value,
"hour")
376 || strstr($Value,
"minute"))
378 if (isset($Queries[
"Resources"]))
380 $Queries[
"Resources"] .= $CombineWord;
384 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
385 .
" FROM Resources WHERE ";
388 # flip operator if necessary
389 if (strstr($Value,
"ago"))
391 $OperatorFlipMap = array(
397 $Operator = isset($OperatorFlipMap[$Operator])
398 ? $OperatorFlipMap[$Operator] : $Operator;
401 # use strtotime method to build condition
402 $TimestampValue = strtotime($Value);
403 if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
405 if ((date(
"H:i:s", $TimestampValue) ==
"00:00:00")
406 && (strpos($Value,
"00:00") === FALSE)
407 && ($Operator ==
"<="))
410 date(
"Y-m-d", $TimestampValue).
" 23:59:59";
414 $NormalizedValue = date(
"Y-m-d H:i:s", $TimestampValue);
419 $NormalizedValue = addslashes($Value);
421 $Queries[
"Resources"] .=
422 " ( `".$Field->DBFieldName().
"` "
424 .
" '".$NormalizedValue.
"' ) ";
428 # use Date object method to build condition
429 $Date =
new Date($Value);
430 if ($Date->Precision())
432 if (isset($Queries[
"Resources"]))
434 $Queries[
"Resources"] .= $CombineWord;
438 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
439 .
" FROM Resources WHERE ";
441 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
442 $Field->DBFieldName(), NULL, $Operator).
" ) ";
448 $Date =
new Date($Value);
449 if ($Date->Precision())
451 if (isset($Queries[
"Resources"]))
453 $Queries[
"Resources"] .= $CombineWord;
457 $Queries[
"Resources"] =
"SELECT DISTINCT ResourceId"
458 .
" FROM Resources WHERE ";
460 $Queries[
"Resources"] .=
" ( ".$Date->SqlCondition(
461 $Field->DBFieldName().
"Begin",
462 $Field->DBFieldName().
"End", $Operator).
" ) ";
467 $QueryIndex =
"ReferenceInts".$Field->Id();
468 if (!isset($Queries[$QueryIndex]))
470 if (!isset($NameField))
473 $this->Schema->GetFieldByMappedName(
476 $Queries[$QueryIndex] =
477 "SELECT DISTINCT RI.SrcResourceId AS ResourceId"
478 .
" FROM ReferenceInts AS RI, Resources AS R "
479 .
" WHERE RI.FieldId = ".$Field->Id()
481 $CloseQuery[$QueryIndex] = TRUE;
482 $ComparisonCount[$QueryIndex] = 1;
483 $ComparisonCountField[$QueryIndex] =
484 "R.`".$NameField->DBFieldName().
"`";
488 $Queries[$QueryIndex] .= $CombineWord;
489 $ComparisonCount[$QueryIndex]++;
491 $Queries[$QueryIndex] .=
"(R.`".$NameField->DBFieldName().
"` "
492 .$Operator.
" '".addslashes($Value).
"'"
493 .
" AND R.ResourceId = RI.DstResourceId)";
498 # (these types not yet handled by search engine for comparisons)
507 # for each assembled query
508 foreach ($Queries as $QueryIndex => $Query)
510 # if query has multiple parts
511 if (is_array($Query))
513 # for each part of query
514 $ResourceIds = array();
515 foreach ($Query as $PartIndex => $PartQuery)
517 # add closing paren if query was flagged to be closed
518 if (isset($CloseQuery[$QueryIndex][$PartIndex]))
522 && ($ComparisonCount[$QueryIndex][$PartIndex] > 1))
524 $PartQuery .=
"GROUP BY ResourceId"
525 .
" HAVING COUNT(DISTINCT "
526 .$ComparisonCountField[$QueryIndex][$PartIndex]
528 .$ComparisonCount[$QueryIndex][$PartIndex];
532 # perform query and retrieve IDs
533 $this->
DMsg(5,
"Performing comparison query <i>"
535 $this->DB->Query($PartQuery);
536 $ResourceIds = $ResourceIds
537 + $this->DB->FetchColumn(
"ResourceId");
538 $this->
DMsg(5,
"Comparison query produced <i>"
539 .count($ResourceIds).
"</i> results");
544 # add closing paren if query was flagged to be closed
545 if (isset($CloseQuery[$QueryIndex]))
549 && ($ComparisonCount[$QueryIndex] > 1))
551 $Query .=
"GROUP BY ResourceId"
552 .
" HAVING COUNT(DISTINCT "
553 .$ComparisonCountField[$QueryIndex]
555 .$ComparisonCount[$QueryIndex];
559 # perform query and retrieve IDs
560 $this->
DMsg(5,
"Performing comparison query <i>".$Query.
"</i>");
561 $this->DB->Query($Query);
562 $ResourceIds = $this->DB->FetchColumn(
"ResourceId");
563 $this->
DMsg(5,
"Comparison query produced <i>"
564 .count($ResourceIds).
"</i> results");
567 # if we already have some results
570 # if search logic is set to AND
573 # remove anything from results that was not returned from query
574 $Results = array_intersect($Results, $ResourceIds);
578 # add values returned from query to results
579 $Results = array_unique(array_merge($Results, $ResourceIds));
584 # set results to values returned from query
585 $Results = $ResourceIds;
591 # initialize results to empty list
595 # return results to caller
602 return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
609 $TaskDescription =
"Update search data for"
610 .
" <a href=\"r".$ItemId.
"\"><i>"
611 .$Item->GetMapped(
"Title").
"</i></a>";
612 $GLOBALS[
"AF"]->QueueUniqueTask(array(__CLASS__,
"RunUpdateForItem"),
613 array(intval($ItemId)), $TaskPriority, $TaskDescription);
618 # check that resource still exists
620 if (!$RFactory->ItemExists($ItemId)) {
return; }
622 # update search data for resource
624 $SearchEngine->UpdateForItem($ItemId);
636 # classifications and names associated with these search results
637 $SearchClasses = array();
638 $SearchNames = array();
640 # disable DB cache for the search suggestions process,
641 # this avoids memory exhaustion.
645 if (count($SearchResults)>0)
647 foreach (array_chunk($SearchResults, 1000, TRUE) as $Chunk)
649 # pull out all the Classifications that were associated with our search results
650 $DB->Query(
"SELECT ResourceId,ClassificationId FROM ResourceClassInts "
651 .
"WHERE ResourceId IN "
652 .
"(".implode(
",", array_keys($Chunk)).
")");
653 foreach (
$DB->FetchRows() as $Row)
655 $SearchClasses[ $Row[
"ClassificationId"] ] []= $Row[
"ResourceId"] ;
658 # similarly with controlled names
659 $DB->Query(
"SELECT ResourceId,ControlledNameId FROM ResourceNameInts "
660 .
"WHERE ResourceId in "
661 .
"(".implode(
",", array_keys($Chunk)).
")");
662 foreach (
$DB->FetchRows() as $Row)
664 $SearchNames[ $Row[
"ControlledNameId"] ] []= $Row[
"ResourceId"];
669 # generate a map of FieldId -> Field Names for all of the generated facets:
670 $SuggestionsById = array();
672 # pull relevant Classification names out of the DB
673 if (count($SearchClasses)>0)
675 foreach (array_chunk($SearchClasses, 1000, TRUE) as $Chunk)
677 $DB->Query(
"SELECT FieldId,ClassificationId,ClassificationName FROM Classifications "
678 .
"WHERE ClassificationId IN (".implode(
",", array_keys($Chunk)).
")");
679 foreach (
$DB->FetchRows() as $Row)
681 $SuggestionsById[$Row[
"FieldId"]] []=
682 array(
"Id" => $Row[
"ClassificationId"],
683 "Name" => $Row[
"ClassificationName"],
684 "Count" => count( $SearchClasses[ $Row[
"ClassificationId"] ] ) );
689 # pull relevant ControlledNames out of the DB
690 if (count($SearchNames)>0)
692 foreach (array_chunk($SearchNames, 1000, TRUE) as $Chunk)
694 $DB->Query(
"SELECT FieldId,ControlledNameId,ControlledName FROM ControlledNames "
695 .
"WHERE ControlledNameId IN (".implode(
",", array_keys($SearchNames)).
")");
696 foreach (
$DB->FetchRows() as $Row)
698 $SuggestionsById[$Row[
"FieldId"]] []=
699 array(
"Id" => $Row[
"ControlledNameId"],
700 "Name" => $Row[
"ControlledName"],
701 "Count" => count( $SearchNames[ $Row[
"ControlledNameId"] ] ) );
706 # translate the suggestions that we have in terms of the
707 # FieldIds to suggestions in terms of the field names
708 $SuggestionsByFieldName = array();
710 # if we have suggestions to offer
711 if (count($SuggestionsById)>0)
715 # gill in an array that maps FieldNames to search links
716 # which would be appropriate for that field
717 foreach ($SuggestionsById as $FieldId => $FieldValues)
719 $ThisField = $Schema->GetField($FieldId);
721 # bail on fields that didn't exist
722 # and on fields that the current user cannot view, and on fields that are disabled
723 # for advanced searching:
724 if (is_object($ThisField) &&
726 $ThisField->IncludeInFacetedSearch() &&
727 $ThisField->Enabled() &&
728 $User->HasPriv($ThisField->ViewingPrivileges()))
730 $SuggestionsByFieldName[ $ThisField->Name() ] = array();
732 foreach ($FieldValues as $Value)
733 $SuggestionsByFieldName [ $ThisField->Name() ] [$Value[
"Id"]] =
734 array(
"Name" => $Value[
"Name"],
"Count" => $Value[
"Count"] );
739 ksort($SuggestionsByFieldName);
741 return $SuggestionsByFieldName;
746 # functions for backward compatability w/ old SPT code
SearchFieldForPhrases($FieldName, $Phrase)
const PRIORITY_LOW
Lower priority.
SQL database abstraction object with smart query caching.
AddField($FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
UpdateForResource($ItemId)
SearchEngine($ItemTableName, $ItemIdFieldName)
Object constructor.
GetFieldContent($ItemId, $FieldName)
Overloaded version of method to retrieve text from DB.
static RunUpdateForItem($ItemId)
SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
Represents a "resource" in CWIS.
static GetResultFacets($SearchResults, $User)
Generate a list of suggested additional search terms that can be used for faceted searching...
static GetItemIdsSortedByField($FieldName, $SortDescending)
Core metadata archive search engine class.
DefaultSearchLogic($NewSetting=NULL)
Get/set default search logic (LOGIC_AND or LOGIC_OR).
DebugLevel($NewValue)
Set debug output level.
Factory for Resource objects.
CWIS-specific user class.
UpdateForItem($ItemId)
Update search database for the specified item.
static QueueUpdateForItem($ItemId, $TaskPriority=ApplicationFramework::PRIORITY_LOW)