3 # FILE: SearchEngine.php
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2014 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
15 # ---- PUBLIC INTERFACE --------------------------------------------------
17 # possible types of logical operators
21 # flags used for indicating field types
27 # flags used for indicating word states
40 # create database object for our use
43 # save item access parameters
47 # set default debug state
60 $FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
63 $this->FieldInfo[$FieldName][
"FieldType"] = $FieldType;
64 $this->FieldInfo[$FieldName][
"Weight"] = $Weight;
65 $this->FieldInfo[$FieldName][
"InKeywordSearch"] =
66 $UsedInKeywordSearch ? TRUE : FALSE;
75 {
return $this->FieldInfo[$FieldName][
"FieldType"]; }
83 {
return $this->FieldInfo[$FieldName][
"Weight"]; }
91 {
return $this->FieldInfo[$FieldName][
"InKeywordSearch"]; }
103 # ---- search functions
120 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
121 $SortByField = NULL, $SortDescending = TRUE)
123 # interpret and filter out magic debugging keyword (if any)
124 $SearchString = $this->SetDebugLevel($SearchString);
125 $this->
DMsg(0,
"In Search() with search string \"".$SearchString.
"\"");
127 # save start time to use in calculating search time
128 $StartTime = microtime(TRUE);
131 $this->InclusiveTermCount = 0;
132 $this->RequiredTermCount = 0;
133 $this->ExcludedTermCount = 0;
135 # parse search string into terms
136 $Words = $this->ParseSearchStringForWords($SearchString);
137 $this->
DMsg(1,
"Found ".count($Words).
" words");
139 # parse search string for phrases
140 $Phrases = $this->ParseSearchStringForPhrases($SearchString);
141 $this->
DMsg(1,
"Found ".count($Phrases).
" phrases");
143 # if only excluded terms specified
144 if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
147 $this->
DMsg(1,
"Loading all records");
148 $Scores = $this->LoadScoresForAllRecords();
153 $Scores = $this->SearchForWords($Words);
154 $this->
DMsg(1,
"Found ".count($Scores).
" results after word search");
155 $Scores = $this->SearchForPhrases($Phrases, $Scores);
156 $this->
DMsg(1,
"Found ".count($Scores).
" results after phrase search");
159 # if search results found
160 if (count($Scores) > 0)
162 # handle any excluded words
163 $Scores = $this->FilterOnExcludedWords($Words, $Scores);
165 # strip off any results that don't contain required words
166 $Scores = $this->FilterOnRequiredWords($Scores);
169 # count, sort, and trim search result scores list
170 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
171 $SortByField, $SortDescending);
174 $this->LastSearchTime = microtime(TRUE) - $StartTime;
176 # return list of items to caller
177 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
198 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
199 $SortByField = NULL, $SortDescending = TRUE)
201 # interpret and filter out magic debugging keyword (if any)
202 $SearchStrings = $this->SetDebugLevel($SearchStrings);
203 $this->
DMsg(0,
"In FieldedSearch() with "
204 .count($SearchStrings).
" search strings");
206 # save start time to use in calculating search time
207 $StartTime = microtime(TRUE);
210 $Scores = $this->SearchAcrossFields($SearchStrings);
211 $Scores = ($Scores === NULL) ? array() : $Scores;
213 # count, sort, and trim search result scores list
214 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
215 $SortByField, $SortDescending);
218 $this->LastSearchTime = microtime(TRUE) - $StartTime;
220 # return list of items to caller
221 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
240 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
241 $SortByField = NULL, $SortDescending = TRUE)
243 # interpret and filter out magic debugging keyword (if any)
244 foreach ($SearchGroups as $Index => $Groups)
246 if (isset($SearchGroups[$Index][
"SearchStrings"]))
248 $SearchGroups[$Index][
"SearchStrings"] =
249 $this->SetDebugLevel($SearchGroups[$Index][
"SearchStrings"]);
252 $this->
DMsg(0,
"In GroupedSearch() with "
253 .count($SearchGroups).
" search groups");
255 # save start time to use in calculating search time
256 $StartTime = microtime(TRUE);
258 # start with no results
261 # save AND/OR search setting
264 # for each search group
266 foreach ($SearchGroups as $Group)
268 $this->
DMsg(0,
"----- GROUP ---------------------------");
270 # if group has AND/OR setting specified
271 if (isset($Group[
"Logic"]))
273 # use specified AND/OR setting
278 # use saved AND/OR setting
281 $this->
DMsg(2,
"Logic is "
284 # if we have search strings for this group
285 if (isset($Group[
"SearchStrings"]))
288 $GroupScores = $this->SearchAcrossFields($Group[
"SearchStrings"]);
290 # if search was conducted
291 if ($GroupScores !== NULL)
293 # if saved AND/OR setting is OR or this is first search
294 if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
296 # add search results to result list
297 foreach ($GroupScores as $ItemId => $Score)
299 if (isset($Scores[$ItemId]))
301 $Scores[$ItemId] += $Score;
305 $Scores[$ItemId] = $Score;
309 # (reset flag indicating first search)
310 $FirstSearch = FALSE;
314 # AND search results with previous results
315 $OldScores = $Scores;
317 foreach ($GroupScores as $ItemId => $Score)
319 if (isset($OldScores[$ItemId]))
321 $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
329 # restore AND/OR search setting
332 # count, sort, and trim search result scores list
333 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
334 $SortByField, $SortDescending);
337 $this->LastSearchTime = microtime(TRUE) - $StartTime;
339 # return search results to caller
340 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
350 # save filter function name
351 $this->FilterFuncs[] = $FunctionName;
361 if ($NewSetting != NULL)
393 return $this->SearchTermList;
415 $IncludedKeywordSearch = FALSE;
416 foreach ($SearchStrings as $FieldName => $SearchStringArray)
418 if ($FieldName ==
"XXXKeywordXXX")
420 $IncludedKeywordSearch = TRUE;
424 if (array_key_exists($FieldName, $this->FieldInfo))
426 $Weight += $this->FieldInfo[$FieldName][
"Weight"];
430 if ($IncludedKeywordSearch)
432 foreach ($this->FieldInfo as $FieldName => $Info)
434 if ($Info[
"InKeywordSearch"])
436 $Weight += $Info[
"Weight"];
444 # ---- search database update functions
452 # bail out if item ID is negative (indicating a temporary record)
453 if ($ItemId < 0) {
return; }
455 # clear word count added flags for this item
456 unset($this->WordCountAdded);
458 # delete any existing info for this item
459 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
461 # for each metadata field
462 foreach ($this->FieldInfo as $FieldName => $Info)
464 # if search weight for field is positive
465 if ($Info[
"Weight"] > 0)
467 # retrieve text for field
473 # for each text string in array
474 foreach ($Text as $String)
476 # record search info for text
477 $this->RecordSearchInfoForText($ItemId, $FieldName,
478 $Info[
"Weight"], $String,
479 $Info[
"InKeywordSearch"]);
484 # record search info for text
485 $this->RecordSearchInfoForText($ItemId, $FieldName,
486 $Info[
"Weight"], $Text,
487 $Info[
"InKeywordSearch"]);
501 # retrieve IDs for specified number of items starting at specified ID
502 $this->DB->Query(
"SELECT ".$this->ItemIdFieldName.
" FROM ".$this->ItemTableName
503 .
" WHERE ".$this->ItemIdFieldName.
" >= ".$StartingItemId
504 .
" ORDER BY ".$this->ItemIdFieldName.
" LIMIT ".$NumberOfItems);
505 $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
507 # for each retrieved item ID
508 foreach ($ItemIds as $ItemId)
510 # update search info for item
514 # return ID of last item updated to caller
524 # drop all entries pertaining to item from word count table
525 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
534 # retrieve our ID for field
535 $FieldId = $this->DB->Query(
"SELECT FieldId FROM SearchFields "
536 .
"WHERE FieldName = '".addslashes($FieldName).
"'",
"FieldId");
538 # drop all entries pertaining to field from word counts table
539 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId.
"\'");
541 # drop field from our fields table
542 $this->DB->Query(
"DELETE FROM SearchFields WHERE FieldId = \'".$FieldId.
"\'");
551 return $this->DB->Query(
"SELECT COUNT(*) AS TermCount"
552 .
" FROM SearchWords",
"TermCount");
561 return $this->DB->Query(
"SELECT COUNT(DISTINCT ItemId) AS ItemCount"
562 .
" FROM SearchWordCounts",
"ItemCount");
574 # asssume no synonyms will be added
578 $WordId = $this->GetWordId($Word, TRUE);
580 # for each synonym passed in
581 foreach ($Synonyms as $Synonym)
584 $SynonymId = $this->GetWordId($Synonym, TRUE);
586 # if synonym is not already in database
587 $this->DB->Query(
"SELECT * FROM SearchWordSynonyms"
588 .
" WHERE (WordIdA = ".$WordId
589 .
" AND WordIdB = ".$SynonymId.
")"
590 .
" OR (WordIdB = ".$WordId
591 .
" AND WordIdA = ".$SynonymId.
")");
592 if ($this->DB->NumRowsSelected() == 0)
594 # add synonym entry to database
595 $this->DB->Query(
"INSERT INTO SearchWordSynonyms"
596 .
" (WordIdA, WordIdB)"
597 .
" VALUES (".$WordId.
", ".$SynonymId.
")");
602 # report to caller number of new synonyms added
615 $WordId = $this->GetWordId($Word);
618 if ($WordId !== NULL)
620 # if no specific synonyms provided
621 if ($Synonyms === NULL)
623 # remove all synonyms for word
624 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
625 .
" WHERE WordIdA = '".$WordId.
"'"
626 .
" OR WordIdB = '".$WordId.
"'");
630 # for each specified synonym
631 foreach ($Synonyms as $Synonym)
633 # look up ID for synonym
634 $SynonymId = $this->GetWordId($Synonym);
636 # if synonym ID was found
637 if ($SynonymId !== NULL)
639 # delete synonym entry
640 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
641 .
" WHERE (WordIdA = '".$WordId.
"'"
642 .
" AND WordIdB = '".$SynonymId.
"')"
643 .
" OR (WordIdB = '".$WordId.
"'"
644 .
" AND WordIdA = '".$SynonymId.
"')");
656 $this->DB->Query(
"DELETE FROM SearchWordSynonyms");
666 # assume no synonyms will be found
669 # look up ID for word
670 $WordId = $this->GetWordId($Word);
672 # if word ID was found
673 if ($WordId !== NULL)
675 # look up IDs of all synonyms for this word
676 $this->DB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
677 .
" WHERE WordIdA = ".$WordId
678 .
" OR WordIdB = ".$WordId);
679 $SynonymIds = array();
680 while ($Record = $this->DB->FetchRow)
682 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
683 ? $Record[
"WordIdB"] : $Record[
"WordIdA"];
686 # for each synonym ID
687 foreach ($SynonymIds as $SynonymId)
689 # look up synonym word and add to synonym list
690 $Synonyms[] = $this->GetWord($SynonymId);
694 # return synonyms to caller
704 # assume no synonyms will be found
705 $SynonymList = array();
707 # for each synonym ID pair
709 $OurDB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
710 while ($Record = $OurDB->FetchRow())
713 $Word = $this->GetWord($Record[
"WordIdA"]);
714 $Synonym = $this->GetWord($Record[
"WordIdB"]);
716 # if we do not already have an entry for the word
717 # or synonym is not listed for this word
718 if (!isset($SynonymList[$Word])
719 || !in_array($Synonym, $SynonymList[$Word]))
721 # add entry for synonym
722 $SynonymList[$Word][] = $Synonym;
725 # if we do not already have an entry for the synonym
726 # or word is not listed for this synonym
727 if (!isset($SynonymList[$Synonym])
728 || !in_array($Word, $SynonymList[$Synonym]))
731 $SynonymList[$Synonym][] = $Word;
736 # (this loop removes reciprocal duplicates)
737 foreach ($SynonymList as $Word => $Synonyms)
739 # for each synonym for that word
740 foreach ($Synonyms as $Synonym)
742 # if synonym has synonyms and word is one of them
743 if (isset($SynonymList[$Synonym])
744 && isset($SynonymList[$Word])
745 && in_array($Word, $SynonymList[$Synonym])
746 && in_array($Synonym, $SynonymList[$Word]))
748 # if word has less synonyms than synonym
749 if (count($SynonymList[$Word])
750 < count($SynonymList[$Synonym]))
752 # remove synonym from synonym list for word
753 $SynonymList[$Word] = array_diff(
754 $SynonymList[$Word], array($Synonym));
756 # if no synonyms left for word
757 if (!count($SynonymList[$Word]))
759 # remove empty synonym list for word
760 unset($SynonymList[$Word]);
765 # remove word from synonym list for synonym
766 $SynonymList[$Synonym] = array_diff(
767 $SynonymList[$Synonym], array($Word));
769 # if no synonyms left for word
770 if (!count($SynonymList[$Synonym]))
772 # remove empty synonym list for word
773 unset($SynonymList[$Synonym]);
780 # sort array alphabetically (just for convenience)
781 foreach ($SynonymList as $Word => $Synonyms)
783 asort($SynonymList[$Word]);
787 # return 2D array of synonyms to caller
798 # remove all existing synonyms
801 # for each synonym entry passed in
802 foreach ($SynonymList as $Word => $Synonyms)
804 # add synonyms for word
819 # asssume no synonyms will be added
822 # read in contents of file
823 $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
825 # if file contained lines
828 # for each line of file
829 foreach ($Lines as $Line)
831 # if line is not a comment
832 if (!preg_match(
"/[\s]*#/", $Line))
834 # split line into words
835 $Words = preg_split(
"/[\s,]+/", $Line);
838 if (count($Words) > 1)
840 # separate out word and synonyms
841 $Word = array_shift($Words);
850 # return count of synonyms added to caller
855 # ---- PRIVATE INTERFACE -------------------------------------------------
868 private $WordCountAdded;
871 private $RequiredTermCount;
872 private $RequiredTermCounts;
873 private $InclusiveTermCount;
874 private $ExcludedTermCount;
875 private $SearchTermList;
880 # ---- common private functions (used in both searching and DB build)
891 private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
893 # strip off any surrounding whitespace
894 $Text = trim($SearchString);
896 # set up normalization replacement strings
898 "/'s[^a-z0-9\\-+~]+/i", #
get rid of possessive plurals
899 "/'/", #
get rid of single quotes / apostrophes
900 "/\"[^\"]*\"/", #
get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!)
"
901 "/\\([^)]*\\)/
", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
902 "/[^a-z0-9\\-+~]+/i
", # convert non-alphanumerics / non-minus/plus to a space
903 "/([^\\s])-+/i
", # convert minus preceded by anything but whitespace to a space
904 "/([^\\s])\\++/i
", # convert plus preceded by anything but whitespace to a space
905 "/-\\s/i
", # convert minus followed by whitespace to a space
906 "/\\+\\s/i
", # convert plus followed by whitespace to a space
907 "/~\\s/i
", # convert tilde followed by whitespace to a space
908 "/[ ]+/
" # convert multiple spaces to one space
910 $Replacements = array(
924 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
927 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
928 $Patterns[2] = "/\
"/";
930 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
931 $Patterns[3] =
"/[\(\)]+/";
934 # remove punctuation from text and normalize whitespace
935 $Text = preg_replace($Patterns, $Replacements, $Text);
936 $this->
DMsg(2,
"Normalized search string is '".$Text.
"'");
938 # convert text to lower case
939 $Text = strtolower($Text);
941 # strip off any extraneous whitespace
944 # start with an empty array
947 # if we have no words left after parsing
948 if (strlen($Text) != 0)
951 foreach (explode(
" ", $Text) as $Word)
953 # grab first character of word
954 $FirstChar = substr($Word, 0, 1);
956 # strip off option characters and set flags appropriately
957 $Flags = self::WORD_PRESENT;
958 if ($FirstChar ==
"-")
960 $Word = substr($Word, 1);
961 $Flags |= self::WORD_EXCLUDED;
962 if (!isset($Words[$Word]))
964 $this->ExcludedTermCount++;
969 if ($FirstChar ==
"~")
971 $Word = substr($Word, 1);
974 || ($FirstChar ==
"+"))
976 if ($FirstChar ==
"+")
978 $Word = substr($Word, 1);
980 $Flags |= self::WORD_REQUIRED;
981 if (!isset($Words[$Word]))
983 $this->RequiredTermCount++;
986 if (!isset($Words[$Word]))
988 $this->InclusiveTermCount++;
989 $this->SearchTermList[] = $Word;
993 # store flags to indicate word found
994 $Words[$Word] = $Flags;
995 $this->
DMsg(3,
"Word identified (".$Word.
")");
999 # return normalized words to caller
1009 private function GetFieldId($FieldName)
1011 # if field ID is not in cache
1012 if (!isset($this->FieldIds[$FieldName]))
1014 # look up field info in database
1015 $this->DB->Query(
"SELECT FieldId FROM SearchFields "
1016 .
"WHERE FieldName = '".addslashes($FieldName).
"'");
1018 # if field was found
1019 if ($Record = $this->DB->FetchRow())
1021 # load info from DB record
1022 $FieldId = $Record[
"FieldId"];
1026 # add field to database
1027 $this->DB->Query(
"INSERT INTO SearchFields (FieldName) "
1028 .
"VALUES ('".addslashes($FieldName).
"')");
1030 # retrieve ID for newly added field
1031 $FieldId = $this->DB->LastInsertId();
1035 $this->FieldIds[$FieldName] = $FieldId;
1038 # return cached ID to caller
1039 return $this->FieldIds[$FieldName];
1049 private function GetWordId($Word, $AddIfNotFound = FALSE)
1051 static $WordIdCache;
1053 # if word was in ID cache
1054 if (isset($WordIdCache[$Word]))
1057 $WordId = $WordIdCache[$Word];
1061 # look up ID in database
1062 $WordId = $this->DB->Query(
"SELECT WordId"
1063 .
" FROM SearchWords"
1064 .
" WHERE WordText='".addslashes($Word).
"'",
1067 # if ID was not found and caller requested it be added
1068 if (($WordId === NULL) && $AddIfNotFound)
1070 # add word to database
1071 $this->DB->Query(
"INSERT INTO SearchWords (WordText)"
1072 .
" VALUES ('".addslashes(strtolower($Word)).
"')");
1074 # get ID for newly added word
1075 $WordId = $this->DB->LastInsertId();
1079 $WordIdCache[$Word] = $WordId;
1082 # return ID to caller
1093 private function GetStemId($Stem, $AddIfNotFound = FALSE)
1095 static $StemIdCache;
1097 # if stem was in ID cache
1098 if (isset($StemIdCache[$Stem]))
1101 $StemId = $StemIdCache[$Stem];
1105 # look up ID in database
1106 $StemId = $this->DB->Query(
"SELECT WordId"
1107 .
" FROM SearchStems"
1108 .
" WHERE WordText='".addslashes($Stem).
"'",
1111 # if ID was not found and caller requested it be added
1112 if (($StemId === NULL) && $AddIfNotFound)
1114 # add stem to database
1115 $this->DB->Query(
"INSERT INTO SearchStems (WordText)"
1116 .
" VALUES ('".addslashes(strtolower($Stem)).
"')");
1118 # get ID for newly added stem
1119 $StemId = $this->DB->LastInsertId();
1122 # adjust from DB ID value to stem ID value
1123 $StemId += self::STEM_ID_OFFSET;
1126 $StemIdCache[$Stem] = $StemId;
1129 # return ID to caller
1138 private function GetWord($WordId)
1142 # if word was in cache
1143 if (isset($WordCache[$WordId]))
1145 # use word from cache
1146 $Word = $WordCache[$WordId];
1150 # adjust search location and word ID if word is stem
1151 $TableName =
"SearchWords";
1152 if ($WordId >= self::STEM_ID_OFFSET)
1154 $TableName =
"SearchStems";
1155 $WordId -= self::STEM_ID_OFFSET;
1158 # look up word in database
1159 $Word = $this->DB->Query(
"SELECT WordText"
1160 .
" FROM ".$TableName
1161 .
" WHERE WordId='".$WordId.
"'",
1164 # save word to cache
1165 $WordCache[$WordId] = $Word;
1168 # return word to caller
1173 # ---- private functions used in searching
1183 private function SearchAcrossFields($SearchStrings)
1185 # start by assuming no search will be done
1189 $this->InclusiveTermCount = 0;
1190 $this->RequiredTermCount = 0;
1191 $this->ExcludedTermCount = 0;
1194 $NeedComparisonSearch = FALSE;
1195 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1197 # convert search string to array if needed
1198 if (!is_array($SearchStringArray))
1200 $SearchStringArray = array($SearchStringArray);
1203 # for each search string for this field
1204 foreach ($SearchStringArray as $SearchString)
1206 # if field is keyword or field is text and does not look like comparison match
1207 $NotComparisonSearch = !preg_match(
"/^[><!]=./", $SearchString)
1208 && !preg_match(
"/^[><=]./", $SearchString);
1209 if (($FieldName ==
"XXXKeywordXXX")
1210 || (isset($this->FieldInfo[$FieldName])
1211 && ($this->FieldInfo[$FieldName][
"FieldType"]
1212 == self::FIELDTYPE_TEXT)
1213 && $NotComparisonSearch))
1215 $this->DMsg(0,
"Searching text field \""
1216 .$FieldName.
"\" for string \"$SearchString\"");
1218 # normalize text and split into words
1219 $Words[$FieldName] =
1220 $this->ParseSearchStringForWords($SearchString);
1222 # calculate scores for matching items
1223 if (count($Words[$FieldName]))
1225 $Scores = $this->SearchForWords(
1226 $Words[$FieldName], $FieldName, $Scores);
1227 $this->DMsg(3,
"Have "
1228 .count($Scores).
" results after word search");
1231 # split into phrases
1232 $Phrases[$FieldName] =
1233 $this->ParseSearchStringForPhrases($SearchString);
1235 # handle any phrases
1236 if (count($Phrases[$FieldName]))
1238 $Scores = $this->SearchForPhrases(
1239 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1240 $this->DMsg(3,
"Have "
1241 .count($Scores).
" results after phrase search");
1246 # set flag to indicate possible comparison search candidate found
1247 $NeedComparisonSearch = TRUE;
1252 # perform comparison searches
1253 if ($NeedComparisonSearch)
1255 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1256 $this->DMsg(3,
"Have ".count($Scores).
" results after comparison search");
1259 # if no results found and exclusions specified
1260 if (!count($Scores) && $this->ExcludedTermCount)
1263 $Scores = $this->LoadScoresForAllRecords();
1266 # if search results found
1269 # for each search text string
1270 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1272 # convert search string to array if needed
1273 if (!is_array($SearchStringArray))
1275 $SearchStringArray = array($SearchStringArray);
1278 # for each search string for this field
1279 foreach ($SearchStringArray as $SearchString)
1282 if (($FieldName ==
"XXXKeywordXXX")
1283 || (isset($this->FieldInfo[$FieldName])
1284 && ($this->FieldInfo[$FieldName][
"FieldType"]
1285 == self::FIELDTYPE_TEXT)))
1287 # if there are words in search text
1288 if (isset($Words[$FieldName]))
1290 # handle any excluded words
1291 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1294 # handle any excluded phrases
1295 if (isset($Phrases[$FieldName]))
1297 $Scores = $this->SearchForPhrases(
1298 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1304 # strip off any results that don't contain required words
1305 $Scores = $this->FilterOnRequiredWords($Scores);
1308 # return search result scores to caller
1322 private function SearchForWords(
1323 $Words, $FieldName =
"XXXKeywordXXX", $Scores = NULL)
1327 # start with empty search result scores list if none passed in
1328 if ($Scores == NULL)
1334 $FieldId = $this->GetFieldId($FieldName);
1337 foreach ($Words as $Word => $Flags)
1340 $this->DMsg(2,
"Searching for word '${Word}' in field ".$FieldName);
1342 # if word is not excluded
1343 if (!($Flags & self::WORD_EXCLUDED))
1345 # look up record ID for word
1346 $this->DMsg(2,
"Looking up word \"".$Word.
"\"");
1347 $WordId = $this->GetWordId($Word);
1350 if ($WordId !== NULL)
1352 # look up counts for word
1353 $DB->Query(
"SELECT ItemId,Count FROM SearchWordCounts "
1354 .
"WHERE WordId = ".$WordId
1355 .
" AND FieldId = ".$FieldId);
1356 $Counts = $DB->FetchColumn(
"Count",
"ItemId");
1358 # if synonym support is enabled
1359 if ($this->SynonymsEnabled)
1361 # look for any synonyms
1362 $DB->Query(
"SELECT WordIdA, WordIdB"
1363 .
" FROM SearchWordSynonyms"
1364 .
" WHERE WordIdA = ".$WordId
1365 .
" OR WordIdB = ".$WordId);
1367 # if synonyms were found
1368 if ($DB->NumRowsSelected())
1370 # retrieve synonym IDs
1371 $SynonymIds = array();
1372 while ($Record = $DB->FetchRow())
1374 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
1375 ? $Record[
"WordIdB"]
1376 : $Record[
"WordIdA"];
1380 foreach ($SynonymIds as $SynonymId)
1382 # retrieve counts for synonym
1383 $DB->Query(
"SELECT ItemId,Count"
1384 .
" FROM SearchWordCounts"
1385 .
" WHERE WordId = ".$SynonymId
1386 .
" AND FieldId = ".$FieldId);
1387 $SynonymCounts = $DB->FetchColumn(
"Count",
"ItemId");
1390 foreach ($SynonymCounts as $ItemId => $Count)
1392 # adjust count because it's a synonym
1393 $AdjustedCount = ceil($Count / 2);
1395 # add count to existing counts
1396 if (isset($Counts[$ItemId]))
1398 $Counts[$ItemId] += $AdjustedCount;
1402 $Counts[$ItemId] = $AdjustedCount;
1410 # if stemming is enabled
1411 if ($this->StemmingEnabled)
1414 $Stem = PorterStemmer::Stem($Word);
1415 $this->DMsg(2,
"Looking up stem \"".$Stem.
"\"");
1416 $StemId = $this->GetStemId($Stem);
1418 # if ID found for stem
1419 if ($StemId !== NULL)
1421 # retrieve counts for stem
1422 $DB->Query(
"SELECT ItemId,Count"
1423 .
" FROM SearchWordCounts"
1424 .
" WHERE WordId = ".$StemId
1425 .
" AND FieldId = ".$FieldId);
1426 $StemCounts = $DB->FetchColumn(
"Count",
"ItemId");
1429 foreach ($StemCounts as $ItemId => $Count)
1431 # adjust count because it's a stem
1432 $AdjustedCount = ceil($Count / 2);
1434 # add count to existing counts
1435 if (isset($Counts[$ItemId]))
1437 $Counts[$ItemId] += $AdjustedCount;
1441 $Counts[$ItemId] = $AdjustedCount;
1447 # if counts were found
1451 foreach ($Counts as $ItemId => $Count)
1453 # if word flagged as required
1454 if ($Flags & self::WORD_REQUIRED)
1456 # increment required word count for record
1457 if (isset($this->RequiredTermCounts[$ItemId]))
1459 $this->RequiredTermCounts[$ItemId]++;
1463 $this->RequiredTermCounts[$ItemId] = 1;
1467 # add to item record score
1468 if (isset($Scores[$ItemId]))
1470 $Scores[$ItemId] += $Count;
1474 $Scores[$ItemId] = $Count;
1481 # return basic scores to caller
1491 private function ParseSearchStringForPhrases($SearchString)
1493 # split into chunks delimited by double quote marks
1494 $Pieces = explode(
"\"", $SearchString); #
"
1496 # for each pair of chunks
1499 while ($Index < count($Pieces))
1501 # grab phrase from chunk
1502 $Phrase = trim(addslashes($Pieces[$Index - 1]));
1503 $Flags = self::WORD_PRESENT;
1505 # grab first character of phrase
1506 $FirstChar = substr($Pieces[$Index - 2], -1);
1508 # set flags to reflect any option characters
1509 if ($FirstChar == "-
")
1511 $Flags |= self::WORD_EXCLUDED;
1512 if (!isset($Phrases[$Phrase]))
1514 $this->ExcludedTermCount++;
1519 if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~
"))
1520 || ($FirstChar == "+
"))
1522 $Flags |= self::WORD_REQUIRED;
1523 if (!isset($Phrases[$Phrase]))
1525 $this->RequiredTermCount++;
1528 if (!isset($Phrases[$Phrase]))
1530 $this->InclusiveTermCount++;
1531 $this->SearchTermList[] = $Phrase;
1534 $Phrases[$Phrase] = $Flags;
1536 # move to next pair of chunks
1540 # return phrases to caller
1544 protected function SearchFieldForPhrases($FieldName, $Phrase)
1547 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1550 private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1551 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1553 # if phrases are found
1554 if (count($Phrases) > 0)
1556 # if this is a keyword search
1557 if ($FieldName ==
"XXXKeywordXXX")
1560 foreach ($this->FieldInfo as $KFieldName => $Info)
1562 # if field is marked to be included in keyword searches
1563 if ($Info[
"InKeywordSearch"])
1565 # call ourself with that field
1566 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1567 $ProcessNonExcluded, $ProcessExcluded);
1574 foreach ($Phrases as $Phrase => $Flags)
1576 $this->DMsg(2,
"Searching for phrase '".$Phrase
1577 .
"' in field ".$FieldName);
1579 # if phrase flagged as excluded and we are doing excluded phrases
1580 # or phrase flagged as non-excluded and we are doing non-excluded phrases
1581 if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1582 || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1584 # initialize score list if necessary
1585 if ($Scores === NULL) { $Scores = array(); }
1587 # retrieve list of items that contain phrase
1588 $ItemIds = $this->SearchFieldForPhrases(
1589 $FieldName, $Phrase);
1591 # for each item that contains phrase
1592 foreach ($ItemIds as $ItemId)
1594 # if we are doing excluded phrases and phrase flagged as excluded
1595 if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1597 # knock item off of list
1598 unset($Scores[$ItemId]);
1600 elseif ($ProcessNonExcluded)
1602 # calculate phrase value based on number of words and field weight
1603 $PhraseScore = count(preg_split(
"/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1604 * $this->FieldInfo[$FieldName][
"Weight"];
1605 $this->DMsg(2,
"Phrase score is ".$PhraseScore);
1607 # bump up item record score
1608 if (isset($Scores[$ItemId]))
1610 $Scores[$ItemId] += $PhraseScore;
1614 $Scores[$ItemId] = $PhraseScore;
1617 # if phrase flagged as required
1618 if ($Flags & self::WORD_REQUIRED)
1620 # increment required word count for record
1621 if (isset($this->RequiredTermCounts[$ItemId]))
1623 $this->RequiredTermCounts[$ItemId]++;
1627 $this->RequiredTermCounts[$ItemId] = 1;
1637 # return updated scores to caller
1641 private function FilterOnExcludedWords($Words, $Scores, $FieldName =
"XXXKeywordXXX")
1646 $FieldId = $this->GetFieldId($FieldName);
1649 foreach ($Words as $Word => $Flags)
1651 # if word flagged as excluded
1652 if ($Flags & self::WORD_EXCLUDED)
1654 # look up record ID for word
1655 $WordId = $this->GetWordId($Word);
1658 if ($WordId !== NULL)
1660 # look up counts for word
1661 $DB->Query(
"SELECT ItemId FROM SearchWordCounts "
1662 .
"WHERE WordId=${WordId} AND FieldId=${FieldId}");
1665 while ($Record = $DB->FetchRow())
1667 # if item record is in score list
1668 $ItemId = $Record[
"ItemId"];
1669 if (isset($Scores[$ItemId]))
1671 # remove item record from score list
1672 $this->DMsg(3,
"Filtering out item ".$ItemId
1673 .
" because it contained word \"".$Word.
"\"");
1674 unset($Scores[$ItemId]);
1681 # returned filtered score list to caller
1685 private function FilterOnRequiredWords($Scores)
1687 # if there were required words
1688 if ($this->RequiredTermCount > 0)
1691 foreach ($Scores as $ItemId => $Score)
1693 # if item does not meet required word count
1694 if (!isset($this->RequiredTermCounts[$ItemId])
1695 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1698 $this->DMsg(4,
"Filtering out item ".$ItemId
1699 .
" because it didn't have required word count of "
1700 .$this->RequiredTermCount
1701 .(isset($this->RequiredTermCounts[$ItemId])
1703 .$this->RequiredTermCounts[$ItemId]
1706 unset($Scores[$ItemId]);
1711 # return filtered list to caller
1715 # count, sort, and trim search result scores list
1716 private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1717 $SortByField, $SortDescending)
1719 # perform any requested filtering
1720 $this->DMsg(0,
"Have ".count($Scores).
" results before filter callbacks");
1721 $Scores = $this->FilterOnSuppliedFunctions($Scores);
1723 # save total number of results available
1724 $this->NumberOfResultsAvailable = count($Scores);
1726 # if no sorting field specified
1727 if ($SortByField === NULL)
1729 # sort result list by score
1730 if ($SortDescending)
1731 arsort($Scores, SORT_NUMERIC);
1733 asort($Scores, SORT_NUMERIC);
1737 # get list of item IDs in sorted order
1738 $SortedIds = $this->GetItemIdsSortedByField(
1739 $SortByField, $SortDescending);
1741 # if we have sorted item IDs
1742 if (count($SortedIds) && count($Scores))
1744 # strip sorted ID list down to those that appear in search results
1745 $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1747 # rebuild score list in sorted order
1748 foreach ($SortedIds as $Id)
1750 $NewScores[$Id] = $Scores[$Id];
1752 $Scores = $NewScores;
1756 # sort result list by score
1757 arsort($Scores, SORT_NUMERIC);
1761 # trim result list to match range requested by caller
1762 $ScoresKeys = array_slice(
1763 array_keys($Scores), $StartingResult, $NumberOfResults);
1764 $TrimmedScores = array();
1765 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1767 # returned cleaned search result scores list to caller
1768 return $TrimmedScores;
1773 # if filter functions have been set
1774 if (isset($this->FilterFuncs))
1777 foreach ($Scores as $ItemId => $Score)
1779 # for each filter function
1780 foreach ($this->FilterFuncs as $FuncName)
1782 # if filter function return TRUE for item
1783 if (call_user_func($FuncName, $ItemId))
1786 $this->DMsg(2,
"Filter callback <i>".$FuncName
1787 .
"</i> rejected item ".$ItemId);
1788 unset($Scores[$ItemId]);
1790 # bail out of filter func loop
1797 # return filtered list to caller
1801 private function SearchForComparisonMatches($SearchStrings, $Scores)
1805 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1807 # if field is not keyword
1808 if ($SearchFieldName !=
"XXXKeywordXXX")
1810 # convert search string to array if needed
1811 if (!is_array($SearchStringArray))
1813 $SearchStringArray = array($SearchStringArray);
1816 # for each search string for this field
1817 foreach ($SearchStringArray as $SearchString)
1819 # if search string looks like comparison search
1820 $FoundOperator = preg_match(
"/^[><!]=./", $SearchString)
1821 || preg_match(
"/^[><=]./", $SearchString);
1823 || (isset($this->FieldInfo[$SearchFieldName][
"FieldType"])
1824 && ($this->FieldInfo[$SearchFieldName][
"FieldType"]
1825 != self::FIELDTYPE_TEXT)))
1828 $Patterns = array(
"/^[><!]=/",
"/^[><=]/");
1829 $Replacements = array(
"",
"");
1830 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1832 # determine and save operator
1833 if (!$FoundOperator)
1835 $Operators[$Index] =
"=";
1839 $Term = trim($SearchString);
1840 $FirstChar = $Term{0};
1841 $FirstTwoChars = $FirstChar.$Term{1};
1842 if ($FirstTwoChars ==
">=") { $Operators[$Index] =
">="; }
1843 elseif ($FirstTwoChars ==
"<=") { $Operators[$Index] =
"<="; }
1844 elseif ($FirstTwoChars ==
"!=") { $Operators[$Index] =
"!="; }
1845 elseif ($FirstChar ==
">") { $Operators[$Index] =
">"; }
1846 elseif ($FirstChar ==
"<") { $Operators[$Index] =
"<"; }
1847 elseif ($FirstChar ==
"=") { $Operators[$Index] =
"="; }
1850 # if operator was found
1851 if (isset($Operators[$Index]))
1854 $Values[$Index] = $Value;
1857 $FieldNames[$Index] = $SearchFieldName;
1858 $this->DMsg(3,
"Added comparison (field = <i>"
1859 .$FieldNames[$Index].
"</i> op = <i>"
1860 .$Operators[$Index].
"</i> val = <i>"
1861 .$Values[$Index].
"</i>)");
1863 # move to next comparison array entry
1871 # if comparisons found
1872 if (isset($Operators))
1874 # perform comparisons on fields and gather results
1875 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1877 # if search logic is set to AND
1878 if ($this->DefaultSearchLogic == self::LOGIC_AND)
1880 # if results were found
1881 if (count($Results))
1883 # if there were no prior results and no terms for keyword search
1884 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1886 # add all results to scores
1887 foreach ($Results as $ItemId)
1889 $Scores[$ItemId] = 1;
1894 # remove anything from scores that is not part of results
1895 foreach ($Scores as $ItemId => $Score)
1897 if (in_array($ItemId, $Results) == FALSE)
1899 unset($Scores[$ItemId]);
1912 # add result items to scores
1913 if ($Scores === NULL) { $Scores = array(); }
1914 foreach ($Results as $ItemId)
1916 if (isset($Scores[$ItemId]))
1918 $Scores[$ItemId] += 1;
1922 $Scores[$ItemId] = 1;
1928 # return results to caller
1932 private function SetDebugLevel($SearchStrings)
1934 # if search info is an array
1935 if (is_array($SearchStrings))
1937 # for each array element
1938 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1940 # if element is an array
1941 if (is_array($SearchStringArray))
1943 # for each array element
1944 foreach ($SearchStringArray as $Index => $SearchString)
1946 # pull out search string if present
1947 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1952 # pull out search string if present
1953 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1959 # pull out search string if present
1960 $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1963 # return new search info to caller
1964 return $SearchStrings;
1967 private function ExtractDebugLevel($SearchString)
1969 # if search string contains debug level indicator
1970 if (strstr($SearchString,
"DBUGLVL="))
1972 # remove indicator and set debug level
1973 $Level = preg_replace(
"/^\\s*DBUGLVL=([1-9]{1,2}).*/",
"\\1", $SearchString);
1976 $this->DebugLevel = $Level;
1977 $this->DMsg(0,
"Setting debug level to ".$Level);
1978 $SearchString = preg_replace(
"/DBUGLVL=${Level}/",
"", $SearchString);
1982 # return (possibly) modified search string to caller
1983 return $SearchString;
1986 # load and return search result scores array containing all possible records
1987 private function LoadScoresForAllRecords()
1989 # start with empty list
1993 $this->DB->Query(
"SELECT ".$this->ItemIdFieldName
1994 .
" FROM ".$this->ItemTableName);
1995 while ($Record = $this->DB->FetchRow())
1997 # set score for item to 1
1998 $Scores[$Record[$this->ItemIdFieldName]] = 1;
2001 # return array with all scores to caller
2006 # ---- private functions used in building search database
2015 private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
2017 # retrieve ID for word
2018 $WordIds[] = $this->GetWordId($Word, TRUE);
2020 # if stemming is enabled
2021 if ($this->StemmingEnabled)
2023 # retrieve ID for stem of word
2024 $Stem = PorterStemmer::Stem($Word, TRUE);
2025 $WordIds[] = $this->GetStemId($Stem, TRUE);
2028 # for word and stem of word
2029 foreach ($WordIds as $WordId)
2031 # if word count already added to database
2032 if (isset($this->WordCountAdded[$WordId][$FieldId]))
2035 $this->DB->Query(
"UPDATE SearchWordCounts SET Count=Count+".$Weight
2036 .
" WHERE WordId=".$WordId
2037 .
" AND ItemId=".$ItemId
2038 .
" AND FieldId=".$FieldId);
2042 # add word count to DB
2043 $this->DB->Query(
"INSERT INTO SearchWordCounts"
2044 .
" (WordId, ItemId, FieldId, Count) VALUES"
2045 .
" (".$WordId.
", ".$ItemId.
", ".$FieldId.
", ".$Weight.
")");
2047 # remember that we added count for this word
2048 $this->WordCountAdded[$WordId][$FieldId] = TRUE;
2051 # decrease weight for stem
2052 $Weight = ceil($Weight / 2);
2059 exit(
"<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
2062 private function RecordSearchInfoForText(
2063 $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
2066 $Words = $this->ParseSearchStringForWords($Text, TRUE);
2068 # if there was text left after parsing
2069 if (count($Words) > 0)
2072 $FieldId = $this->GetFieldId($FieldName);
2074 # if text should be included in keyword searches
2075 if ($IncludeInKeyword)
2077 # get ID for keyword field
2078 $KeywordFieldId = $this->GetFieldId(
"XXXKeywordXXX");
2082 foreach ($Words as $Word => $Flags)
2084 # update count for word
2085 $this->UpdateWordCount($Word, $ItemId, $FieldId);
2087 # if text should be included in keyword searches
2088 if ($IncludeInKeyword)
2090 # update keyword field count for word
2091 $this->UpdateWordCount(
2092 $Word, $ItemId, $KeywordFieldId, $Weight);
2098 # print debug message if level set high enough
2099 protected function DMsg($Level, $Msg)
2101 if ($this->DebugLevel > $Level)
2103 print(
"SE: ".$Msg.
"<br>\n");
2107 # ---- BACKWARD COMPATIBILITY --------------------------------------------
2109 # possible types of logical operators
2110 const SEARCHLOGIC_AND = 1;
2111 const SEARCHLOGIC_OR = 2;
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
DropField($FieldName)
Drop all data pertaining to field from search database.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
NumberOfResults()
Get number of results found by most recent search.
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
SQL database abstraction object with smart query caching.
AddField($FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
Search($SearchString, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform keyword search.
GetAllSynonyms()
Get all synonyms.
SearchTermsRequiredByDefault($NewSetting=TRUE)
Set default search logic.
FilterOnSuppliedFunctions($Scores)
SearchEngine($ItemTableName, $ItemIdFieldName)
Object constructor.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
SearchTerms()
Get normalized list of search terms.
GroupedSearch($SearchGroups, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search with logical groups of fielded searches.
ItemCount()
Get total number of items indexed by search engine.
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
FieldWeight($FieldName)
Get search weight for specified field.
FieldInKeywordSearch($FieldName)
Get whether specified field is included in keyword searches.
RemoveAllSynonyms()
Remove all synonyms.
Core metadata archive search engine class.
FieldedSearchWeightScale($SearchStrings)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
$NumberOfResultsAvailable
DefaultSearchLogic($NewSetting=NULL)
Get/set default search logic (LOGIC_AND or LOGIC_OR).
FieldType($FieldName)
Get type of specified field (text/numeric/date/daterange).
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
GetFieldContent($ItemId, $FieldName)
UpdateForItem($ItemId)
Update search database for the specified item.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.