CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2016 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
14 {
15 
16  # ---- PUBLIC INTERFACE --------------------------------------------------
17 
18  # possible types of logical operators
19  const LOGIC_AND = 1;
20  const LOGIC_OR = 2;
21 
22  # flags used for indicating field types
23  const FIELDTYPE_TEXT = 1;
24  const FIELDTYPE_NUMERIC = 2;
25  const FIELDTYPE_DATE = 3;
27 
28  # flags used for indicating word states
29  const WORD_PRESENT = 1;
30  const WORD_EXCLUDED = 2;
31  const WORD_REQUIRED = 4;
32 
41  public function __construct(
43  {
44  # create database object for our use
45  $this->DB = new Database();
46 
47  # save item access parameters
48  $this->ItemTableName = $ItemTableName;
49  $this->ItemIdFieldName = $ItemIdFieldName;
50  $this->ItemTypeFieldName = $ItemTypeFieldName;
51 
52  # set default debug state
53  $this->DebugLevel = 0;
54  }
55 
66  public function AddField($FieldId, $FieldType, $ItemTypes,
67  $Weight, $UsedInKeywordSearch)
68  {
69  # save values
70  $this->FieldInfo[$FieldId]["FieldType"] = $FieldType;
71  $this->FieldInfo[$FieldId]["Weight"] = $Weight;
72  $this->FieldInfo[$FieldId]["InKeywordSearch"] =
73  $UsedInKeywordSearch ? TRUE : FALSE;
74  $this->FieldInfo[$FieldId]["ItemTypes"] = is_array($ItemTypes)
75  ? $ItemTypes : array($ItemTypes);
76  }
77 
83  public function FieldType($FieldId)
84  {
85  return $this->FieldInfo[$FieldId]["FieldType"];
86  }
87 
93  public function FieldWeight($FieldId)
94  {
95  return $this->FieldInfo[$FieldId]["Weight"];
96  }
97 
103  public function FieldInKeywordSearch($FieldId)
104  {
105  return $this->FieldInfo[$FieldId]["InKeywordSearch"];
106  }
107 
112  public function DebugLevel($NewValue)
113  {
114  $this->DebugLevel = $NewValue;
115  }
116 
117 
118  # ---- search functions
119 
138  public function Search(
139  $SearchParams, $StartingResult = 0, $NumberOfResults = PHP_INT_MAX,
140  $SortByField = NULL, $SortDescending = TRUE)
141  {
142  # if keyword search string was passed in
143  if (is_string($SearchParams))
144  {
145  # convert string to search parameter set
146  $SearchString = $SearchParams;
147  $SearchParams = new SearchParameterSet();
148  $SearchParams->AddParameter($SearchString);
149  }
150 
151  # interpret and filter out magic debugging keyword (if any)
152  $KeywordStrings = $SearchParams->GetKeywordSearchStrings();
153  foreach ($KeywordStrings as $String)
154  {
155  $FilteredString = $this->ExtractDebugLevel($String);
156  if ($FilteredString != $String)
157  {
158  $SearchParams->RemoveParameter($String);
159  $SearchParams->AddParameter($FilteredString);
160  }
161  }
162 
163  # save start time to use in calculating search time
164  $StartTime = microtime(TRUE);
165 
166  # clear parsed search term list
167  $this->SearchTermList = array();
168 
169  # perform search
170  $Scores = $this->RawSearch($SearchParams);
171 
172  # count, sort, and trim search result scores list
173  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
174  $SortByField, $SortDescending);
175 
176  # record search time
177  $this->LastSearchTime = microtime(TRUE) - $StartTime;
178 
179  # return search results to caller
180  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
181  return $Scores;
182  }
183 
203  public function FieldedSearch(
204  $SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
205  $SortByField = NULL, $SortDescending = TRUE)
206  {
207  # pass off the request to grouped search (for now) if appropriate
208  if ($SearchStrings instanceof SearchParameterSet)
209  {
210  return $this->GroupedSearch($SearchStrings, $StartingResult,
211  $NumberOfResults, $SortByField, $SortDescending);
212  }
213 
214  # interpret and filter out magic debugging keyword (if any)
215  $SearchStrings = $this->SetDebugLevel($SearchStrings);
216  $this->DMsg(0, "In FieldedSearch() with "
217  .count($SearchStrings)." search strings");
218 
219  # save start time to use in calculating search time
220  $StartTime = microtime(TRUE);
221 
222  # perform search
223  $Scores = $this->SearchAcrossFields($SearchStrings);
224  $Scores = ($Scores === NULL) ? array() : $Scores;
225 
226  # count, sort, and trim search result scores list
227  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
228  $SortByField, $SortDescending);
229 
230  # record search time
231  $this->LastSearchTime = microtime(TRUE) - $StartTime;
232 
233  # return list of items to caller
234  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
235  return $Scores;
236  }
237 
242  public function AddResultFilterFunction($FunctionName)
243  {
244  # save filter function name
245  $this->FilterFuncs[] = $FunctionName;
246  }
247 
254  public function NumberOfResults($ItemType = NULL)
255  {
256  return ($ItemType === NULL) ? $this->NumberOfResultsAvailable
257  : (isset($this->NumberOfResultsPerItemType[$ItemType])
258  ? $this->NumberOfResultsPerItemType[$ItemType] : 0);
259  }
260 
265  public function SearchTerms()
266  {
267  return $this->SearchTermList;
268  }
269 
274  public function SearchTime()
275  {
276  return $this->LastSearchTime;
277  }
278 
285  public function FieldedSearchWeightScale($SearchParams)
286  {
287  $Weight = 0;
288  $FieldIds = $SearchParams->GetFields();
289  foreach ($FieldIds as $FieldId)
290  {
291  if (array_key_exists($FieldId, $this->FieldInfo))
292  {
293  $Weight += $this->FieldInfo[$FieldId]["Weight"];
294  }
295  }
296  if (count($SearchParams->GetKeywordSearchStrings()))
297  {
298  foreach ($this->FieldInfo as $FieldId => $Info)
299  {
300  if ($Info["InKeywordSearch"])
301  {
302  $Weight += $Info["Weight"];
303  }
304  }
305  }
306  return $Weight;
307  }
308 
309 
310  # ---- search database update functions
311 
317  public function UpdateForItem($ItemId, $ItemType)
318  {
319  # clear word count added flags for this item
320  unset($this->WordCountAdded);
321 
322  # delete any existing info for this item
323  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
324  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
325 
326  # save item type
327  $this->DB->Query("INSERT INTO SearchItemTypes (ItemId, ItemType)"
328  ." VALUES (".intval($ItemId).", ".intval($ItemType).")");
329 
330  # for each metadata field
331  foreach ($this->FieldInfo as $FieldId => $Info)
332  {
333  # if valid search weight for field and field applies to this item
334  if (($Info["Weight"] > 0)
335  && in_array($ItemType, $Info["ItemTypes"]))
336  {
337  # retrieve text for field
338  $Text = $this->GetFieldContent($ItemId, $FieldId);
339 
340  # if text is array
341  if (is_array($Text))
342  {
343  # for each text string in array
344  foreach ($Text as $String)
345  {
346  # record search info for text
347  $this->RecordSearchInfoForText($ItemId, $FieldId,
348  $Info["Weight"], $String,
349  $Info["InKeywordSearch"]);
350  }
351  }
352  else
353  {
354  # record search info for text
355  $this->RecordSearchInfoForText($ItemId, $FieldId,
356  $Info["Weight"], $Text,
357  $Info["InKeywordSearch"]);
358  }
359  }
360  }
361  }
362 
369  public function UpdateForItems($StartingItemId, $NumberOfItems)
370  {
371  # retrieve IDs for specified number of items starting at specified ID
372  $this->DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->ItemTypeFieldName
373  ." FROM ".$this->ItemTableName
374  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
375  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
376  $ItemIds = $this->DB->FetchColumn(
377  $this->ItemTypeFieldName, $this->ItemIdFieldName);
378 
379  # for each retrieved item ID
380  foreach ($ItemIds as $ItemId => $ItemType)
381  {
382  # update search info for item
383  $this->UpdateForItem($ItemId, $ItemType);
384  }
385 
386  # return ID of last item updated to caller
387  return $ItemId;
388  }
389 
394  public function DropItem($ItemId)
395  {
396  # drop all entries pertaining to item from word count table
397  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
398  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
399  }
400 
405  public function DropField($FieldId)
406  {
407  # drop all entries pertaining to field from word counts table
408  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
409  }
410 
415  public function SearchTermCount()
416  {
417  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
418  ." FROM SearchWords", "TermCount");
419  }
420 
425  public function ItemCount()
426  {
427  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
428  ." FROM SearchWordCounts", "ItemCount");
429  }
430 
438  public function AddSynonyms($Word, $Synonyms)
439  {
440  # asssume no synonyms will be added
441  $AddCount = 0;
442 
443  # get ID for word
444  $WordId = $this->GetWordId($Word, TRUE);
445 
446  # for each synonym passed in
447  foreach ($Synonyms as $Synonym)
448  {
449  # get ID for synonym
450  $SynonymId = $this->GetWordId($Synonym, TRUE);
451 
452  # if synonym is not already in database
453  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
454  ." WHERE (WordIdA = ".$WordId
455  ." AND WordIdB = ".$SynonymId.")"
456  ." OR (WordIdB = ".$WordId
457  ." AND WordIdA = ".$SynonymId.")");
458  if ($this->DB->NumRowsSelected() == 0)
459  {
460  # add synonym entry to database
461  $this->DB->Query("INSERT INTO SearchWordSynonyms"
462  ." (WordIdA, WordIdB)"
463  ." VALUES (".$WordId.", ".$SynonymId.")");
464  $AddCount++;
465  }
466  }
467 
468  # report to caller number of new synonyms added
469  return $AddCount;
470  }
471 
478  public function RemoveSynonyms($Word, $Synonyms = NULL)
479  {
480  # find ID for word
481  $WordId = $this->GetWordId($Word);
482 
483  # if ID found
484  if ($WordId !== NULL)
485  {
486  # if no specific synonyms provided
487  if ($Synonyms === NULL)
488  {
489  # remove all synonyms for word
490  $this->DB->Query("DELETE FROM SearchWordSynonyms"
491  ." WHERE WordIdA = '".$WordId."'"
492  ." OR WordIdB = '".$WordId."'");
493  }
494  else
495  {
496  # for each specified synonym
497  foreach ($Synonyms as $Synonym)
498  {
499  # look up ID for synonym
500  $SynonymId = $this->GetWordId($Synonym);
501 
502  # if synonym ID was found
503  if ($SynonymId !== NULL)
504  {
505  # delete synonym entry
506  $this->DB->Query("DELETE FROM SearchWordSynonyms"
507  ." WHERE (WordIdA = '".$WordId."'"
508  ." AND WordIdB = '".$SynonymId."')"
509  ." OR (WordIdB = '".$WordId."'"
510  ." AND WordIdA = '".$SynonymId."')");
511  }
512  }
513  }
514  }
515  }
516 
520  public function RemoveAllSynonyms()
521  {
522  $this->DB->Query("DELETE FROM SearchWordSynonyms");
523  }
524 
530  public function GetSynonyms($Word)
531  {
532  # assume no synonyms will be found
533  $Synonyms = array();
534 
535  # look up ID for word
536  $WordId = $this->GetWordId($Word);
537 
538  # if word ID was found
539  if ($WordId !== NULL)
540  {
541  # look up IDs of all synonyms for this word
542  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
543  ." WHERE WordIdA = ".$WordId
544  ." OR WordIdB = ".$WordId);
545  $SynonymIds = array();
546  while ($Record = $this->DB->FetchRow)
547  {
548  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
549  ? $Record["WordIdB"] : $Record["WordIdA"];
550  }
551 
552  # for each synonym ID
553  foreach ($SynonymIds as $SynonymId)
554  {
555  # look up synonym word and add to synonym list
556  $Synonyms[] = $this->GetWord($SynonymId);
557  }
558  }
559 
560  # return synonyms to caller
561  return $Synonyms;
562  }
563 
568  public function GetAllSynonyms()
569  {
570  # assume no synonyms will be found
571  $SynonymList = array();
572 
573  # for each synonym ID pair
574  $OurDB = new Database();
575  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
576  while ($Record = $OurDB->FetchRow())
577  {
578  # look up words
579  $Word = $this->GetWord($Record["WordIdA"]);
580  $Synonym = $this->GetWord($Record["WordIdB"]);
581 
582  # if we do not already have an entry for the word
583  # or synonym is not listed for this word
584  if (!isset($SynonymList[$Word])
585  || !in_array($Synonym, $SynonymList[$Word]))
586  {
587  # add entry for synonym
588  $SynonymList[$Word][] = $Synonym;
589  }
590 
591  # if we do not already have an entry for the synonym
592  # or word is not listed for this synonym
593  if (!isset($SynonymList[$Synonym])
594  || !in_array($Word, $SynonymList[$Synonym]))
595  {
596  # add entry for word
597  $SynonymList[$Synonym][] = $Word;
598  }
599  }
600 
601  # for each word
602  # (this loop removes reciprocal duplicates)
603  foreach ($SynonymList as $Word => $Synonyms)
604  {
605  # for each synonym for that word
606  foreach ($Synonyms as $Synonym)
607  {
608  # if synonym has synonyms and word is one of them
609  if (isset($SynonymList[$Synonym])
610  && isset($SynonymList[$Word])
611  && in_array($Word, $SynonymList[$Synonym])
612  && in_array($Synonym, $SynonymList[$Word]))
613  {
614  # if word has less synonyms than synonym
615  if (count($SynonymList[$Word])
616  < count($SynonymList[$Synonym]))
617  {
618  # remove synonym from synonym list for word
619  $SynonymList[$Word] = array_diff(
620  $SynonymList[$Word], array($Synonym));
621 
622  # if no synonyms left for word
623  if (!count($SynonymList[$Word]))
624  {
625  # remove empty synonym list for word
626  unset($SynonymList[$Word]);
627  }
628  }
629  else
630  {
631  # remove word from synonym list for synonym
632  $SynonymList[$Synonym] = array_diff(
633  $SynonymList[$Synonym], array($Word));
634 
635  # if no synonyms left for word
636  if (!count($SynonymList[$Synonym]))
637  {
638  # remove empty synonym list for word
639  unset($SynonymList[$Synonym]);
640  }
641  }
642  }
643  }
644  }
645 
646  # sort array alphabetically (just for convenience)
647  foreach ($SynonymList as $Word => $Synonyms)
648  {
649  asort($SynonymList[$Word]);
650  }
651  ksort($SynonymList);
652 
653  # return 2D array of synonyms to caller
654  return $SynonymList;
655  }
656 
662  public function SetAllSynonyms($SynonymList)
663  {
664  # remove all existing synonyms
665  $this->RemoveAllSynonyms();
666 
667  # for each synonym entry passed in
668  foreach ($SynonymList as $Word => $Synonyms)
669  {
670  # add synonyms for word
671  $this->AddSynonyms($Word, $Synonyms);
672  }
673  }
674 
683  public function LoadSynonymsFromFile($FileName)
684  {
685  # asssume no synonyms will be added
686  $AddCount = 0;
687 
688  # read in contents of file
689  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
690 
691  # if file contained lines
692  if (count($Lines))
693  {
694  # for each line of file
695  foreach ($Lines as $Line)
696  {
697  # if line is not a comment
698  if (!preg_match("/[\s]*#/", $Line))
699  {
700  # split line into words
701  $Words = preg_split("/[\s,]+/", $Line);
702 
703  # if synonyms found
704  if (count($Words) > 1)
705  {
706  # separate out word and synonyms
707  $Word = array_shift($Words);
708 
709  # add synonyms
710  $AddCount += $this->AddSynonyms($Word, $Words);
711  }
712  }
713  }
714  }
715 
716  # return count of synonyms added to caller
717  return $AddCount;
718  }
719 
720 
721  # ---- PRIVATE INTERFACE -------------------------------------------------
722 
723  protected $DB;
724  protected $DebugLevel;
725  protected $FilterFuncs;
726  protected $ItemIdFieldName;
727  protected $ItemTableName;
729  protected $LastSearchTime;
731  protected $StemmingEnabled = TRUE;
732  protected $SynonymsEnabled = TRUE;
733 
734  private $ExcludedTermCount;
735  private $FieldIds;
736  private $FieldInfo;
737  private $InclusiveTermCount;
738  private $RequiredTermCount;
739  private $RequiredTermCounts;
740  private $SearchTermList;
741  private $WordCountAdded;
742 
743  const KEYWORD_FIELD_ID = -100;
744  const STEM_ID_OFFSET = 1000000;
745 
746 
747  # ---- private methods (searching)
748 
756  private function RawSearch($SearchParams)
757  {
758  # retrieve search strings
759  $SearchStrings = $SearchParams->GetSearchStrings();
760  $KeywordSearchStrings = $SearchParams->GetKeywordSearchStrings();
761 
762  # add keyword searches (if any) to fielded searches
763  if (count($KeywordSearchStrings))
764  {
765  $SearchStrings[self::KEYWORD_FIELD_ID] = $KeywordSearchStrings;
766  }
767 
768  # normalize search strings
769  $NormalizedSearchStrings = array();
770  foreach ($SearchStrings as $FieldId => $SearchStringArray)
771  {
772  if (!is_array($SearchStringArray))
773  {
774  $SearchStringArray = array($SearchStringArray);
775  }
776  foreach ($SearchStringArray as $String)
777  {
778  $String = trim($String);
779  if (strlen($String))
780  {
781  $NormalizedSearchStrings[$FieldId][] = $String;
782  }
783  }
784  }
785  $SearchStrings = $NormalizedSearchStrings;
786 
787  # if we have strings to search for
788  if (count($SearchStrings))
789  {
790  # perform search
791  $Scores = $this->SearchAcrossFields(
792  $SearchStrings, $SearchParams->Logic());
793  }
794 
795  # for each subgroup
796  foreach ($SearchParams->GetSubgroups() as $Subgroup)
797  {
798  # perform subgroup search
799  $NewScores = $this->RawSearch($Subgroup);
800 
801  # added subgroup search scores to previous scores as appropriate
802  if (isset($Scores))
803  {
804  $Scores = $this->CombineScores(
805  $Scores, $NewScores, $SearchParams->Logic());
806  }
807  else
808  {
809  $Scores = $NewScores;
810  }
811  }
812  if (isset($NewScores))
813  {
814  $this->DMsg(2, "Have ".count($Scores)
815  ." results after subgroup processing");
816  }
817 
818  # pare down results to just allowed item types (if specified)
819  if ($SearchParams->ItemTypes())
820  {
821  $AllowedItemTypes = $SearchParams->ItemTypes();
822  foreach ($Scores as $ItemId => $Score)
823  {
824  if (!in_array($this->GetItemType($ItemId), $AllowedItemTypes))
825  {
826  unset($Scores[$ItemId]);
827  }
828  }
829  $this->DMsg(3, "Have ".count($Scores)
830  ." results after paring to allowed item types");
831  }
832 
833  # return search results to caller
834  return isset($Scores) ? $Scores : array();
835  }
836 
844  private function CombineScores($ScoresA, $ScoresB, $Logic)
845  {
846  if ($Logic == "OR")
847  {
848  $Scores = $ScoresA;
849  foreach ($ScoresB as $ItemId => $Score)
850  {
851  if (isset($Scores[$ItemId]))
852  {
853  $Scores[$ItemId] += $Score;
854  }
855  else
856  {
857  $Scores[$ItemId] = $Score;
858  }
859  }
860  }
861  else
862  {
863  $Scores = array();
864  foreach ($ScoresA as $ItemId => $Score)
865  {
866  if (isset($ScoresB[$ItemId]))
867  {
868  $Scores[$ItemId] = $Score + $ScoresB[$ItemId];
869  }
870  }
871  }
872  return $Scores;
873  }
874 
884  private function SearchAcrossFields($SearchStrings, $Logic)
885  {
886  # start by assuming no search will be done
887  $Scores = array();
888 
889  # clear word counts
890  $this->ExcludedTermCount = 0;
891  $this->InclusiveTermCount = 0;
892  $this->RequiredTermCount = 0;
893  $this->RequiredTermCounts = array();
894 
895  # for each field
896  $NeedComparisonSearch = FALSE;
897  foreach ($SearchStrings as $FieldId => $SearchStringArray)
898  {
899  # for each search string for this field
900  foreach ($SearchStringArray as $SearchString)
901  {
902  # if field is keyword or field is text and does not look
903  # like comparison match
904  $NotComparisonSearch = !preg_match(
905  self::COMPARISON_OPERATOR_PATTERN, $SearchString);
906  if (($FieldId == self::KEYWORD_FIELD_ID)
907  || (isset($this->FieldInfo[$FieldId])
908  && ($this->FieldInfo[$FieldId]["FieldType"]
909  == self::FIELDTYPE_TEXT)
910  && $NotComparisonSearch))
911  {
912  $this->DMsg(0, "Searching text field \""
913  .$FieldId."\" for string \"$SearchString\"");
914 
915  # normalize text and split into words
916  $Words[$FieldId] =
917  $this->ParseSearchStringForWords($SearchString, $Logic);
918 
919  # calculate scores for matching items
920  if (count($Words[$FieldId]))
921  {
922  $Scores = $this->SearchForWords(
923  $Words[$FieldId], $FieldId, $Scores);
924  $this->DMsg(3, "Have "
925  .count($Scores)." results after word search");
926  }
927 
928  # split into phrases
929  $Phrases[$FieldId] = $this->ParseSearchStringForPhrases(
930  $SearchString, $Logic);
931 
932  # handle any phrases
933  if (count($Phrases[$FieldId]))
934  {
935  $Scores = $this->SearchForPhrases(
936  $Phrases[$FieldId], $Scores, $FieldId, TRUE, FALSE);
937  $this->DMsg(3, "Have "
938  .count($Scores)." results after phrase search");
939  }
940  }
941  else
942  {
943  # set flag to indicate possible comparison search candidate found
944  $NeedComparisonSearch = TRUE;
945  }
946  }
947  }
948 
949  # perform comparison searches
950  if ($NeedComparisonSearch)
951  {
952  $Scores = $this->SearchForComparisonMatches(
953  $SearchStrings, $Logic, $Scores);
954  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
955  }
956 
957  # if no results found and exclusions specified
958  if (!count($Scores) && $this->ExcludedTermCount)
959  {
960  # load all records
961  $Scores = $this->LoadScoresForAllRecords();
962  }
963 
964  # if search results found
965  if (count($Scores))
966  {
967  # for each search text string
968  foreach ($SearchStrings as $FieldId => $SearchStringArray)
969  {
970  # for each search string for this field
971  foreach ($SearchStringArray as $SearchString)
972  {
973  # if field is text
974  if (($FieldId == self::KEYWORD_FIELD_ID)
975  || (isset($this->FieldInfo[$FieldId])
976  && ($this->FieldInfo[$FieldId]["FieldType"]
977  == self::FIELDTYPE_TEXT)))
978  {
979  # if there are words in search text
980  if (isset($Words[$FieldId]))
981  {
982  # handle any excluded words
983  $Scores = $this->FilterOnExcludedWords(
984  $Words[$FieldId], $Scores, $FieldId);
985  }
986 
987  # handle any excluded phrases
988  if (isset($Phrases[$FieldId]))
989  {
990  $Scores = $this->SearchForPhrases(
991  $Phrases[$FieldId], $Scores,
992  $FieldId, FALSE, TRUE);
993  }
994  }
995  }
996  $this->DMsg(3, "Have ".count($Scores)
997  ." results after processing exclusions");
998  }
999 
1000  # strip off any results that don't contain required words
1001  $Scores = $this->FilterOnRequiredWords($Scores);
1002  }
1003 
1004  # return search result scores to caller
1005  return $Scores;
1006  }
1007 
1017  private function SearchForWords($Words, $FieldId, $Scores = NULL)
1018  {
1019  $DB = $this->DB;
1020 
1021  # start with empty search result scores list if none passed in
1022  if ($Scores == NULL)
1023  {
1024  $Scores = array();
1025  }
1026 
1027  # for each word
1028  foreach ($Words as $Word => $Flags)
1029  {
1030  unset($Counts);
1031  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldId);
1032 
1033  # if word is not excluded
1034  if (!($Flags & self::WORD_EXCLUDED))
1035  {
1036  # look up record ID for word
1037  $this->DMsg(2, "Looking up word \"".$Word."\"");
1038  $WordId = $this->GetWordId($Word);
1039 
1040  # if word is in DB
1041  if ($WordId !== NULL)
1042  {
1043  # look up counts for word
1044  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1045  ."WHERE WordId = ".$WordId
1046  ." AND FieldId = ".$FieldId);
1047  $Counts = $DB->FetchColumn("Count", "ItemId");
1048 
1049  # if synonym support is enabled
1050  if ($this->SynonymsEnabled)
1051  {
1052  # look for any synonyms
1053  $DB->Query("SELECT WordIdA, WordIdB"
1054  ." FROM SearchWordSynonyms"
1055  ." WHERE WordIdA = ".$WordId
1056  ." OR WordIdB = ".$WordId);
1057 
1058  # if synonyms were found
1059  if ($DB->NumRowsSelected())
1060  {
1061  # retrieve synonym IDs
1062  $SynonymIds = array();
1063  while ($Record = $DB->FetchRow())
1064  {
1065  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1066  ? $Record["WordIdB"]
1067  : $Record["WordIdA"];
1068  }
1069 
1070  # for each synonym
1071  foreach ($SynonymIds as $SynonymId)
1072  {
1073  # retrieve counts for synonym
1074  $DB->Query("SELECT ItemId,Count"
1075  ." FROM SearchWordCounts"
1076  ." WHERE WordId = ".$SynonymId
1077  ." AND FieldId = ".$FieldId);
1078  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1079 
1080  # for each count
1081  foreach ($SynonymCounts as $ItemId => $Count)
1082  {
1083  # adjust count because it's a synonym
1084  $AdjustedCount = ceil($Count / 2);
1085 
1086  # add count to existing counts
1087  if (isset($Counts[$ItemId]))
1088  {
1089  $Counts[$ItemId] += $AdjustedCount;
1090  }
1091  else
1092  {
1093  $Counts[$ItemId] = $AdjustedCount;
1094  }
1095  }
1096  }
1097  }
1098  }
1099  }
1100 
1101  # if stemming is enabled
1102  if ($this->StemmingEnabled)
1103  {
1104  # retrieve stem ID
1105  $Stem = PorterStemmer::Stem($Word);
1106  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1107  $StemId = $this->GetStemId($Stem);
1108 
1109  # if ID found for stem
1110  if ($StemId !== NULL)
1111  {
1112  # retrieve counts for stem
1113  $DB->Query("SELECT ItemId,Count"
1114  ." FROM SearchWordCounts"
1115  ." WHERE WordId = ".$StemId
1116  ." AND FieldId = ".$FieldId);
1117  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1118 
1119  # for each count
1120  foreach ($StemCounts as $ItemId => $Count)
1121  {
1122  # adjust count because it's a stem
1123  $AdjustedCount = ceil($Count / 2);
1124 
1125  # add count to existing counts
1126  if (isset($Counts[$ItemId]))
1127  {
1128  $Counts[$ItemId] += $AdjustedCount;
1129  }
1130  else
1131  {
1132  $Counts[$ItemId] = $AdjustedCount;
1133  }
1134  }
1135  }
1136  }
1137 
1138  # if counts were found
1139  if (isset($Counts))
1140  {
1141  # for each count
1142  foreach ($Counts as $ItemId => $Count)
1143  {
1144  # if word flagged as required
1145  if ($Flags & self::WORD_REQUIRED)
1146  {
1147  # increment required word count for record
1148  if (isset($this->RequiredTermCounts[$ItemId]))
1149  {
1150  $this->RequiredTermCounts[$ItemId]++;
1151  }
1152  else
1153  {
1154  $this->RequiredTermCounts[$ItemId] = 1;
1155  }
1156  }
1157 
1158  # add to item record score
1159  if (isset($Scores[$ItemId]))
1160  {
1161  $Scores[$ItemId] += $Count;
1162  }
1163  else
1164  {
1165  $Scores[$ItemId] = $Count;
1166  }
1167  }
1168  }
1169  }
1170  }
1171 
1172  # return basic scores to caller
1173  return $Scores;
1174  }
1175 
1183  private function ParseSearchStringForPhrases($SearchString, $Logic)
1184  {
1185  # split into chunks delimited by double quote marks
1186  $Pieces = explode("\"", $SearchString); # "
1187 
1188  # for each pair of chunks
1189  $Index = 2;
1190  $Phrases = array();
1191  while ($Index < count($Pieces))
1192  {
1193  # grab phrase from chunk
1194  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1195  $Flags = self::WORD_PRESENT;
1196 
1197  # grab first character of phrase
1198  $FirstChar = substr($Pieces[$Index - 2], -1);
1199 
1200  # set flags to reflect any option characters
1201  if ($FirstChar == "-")
1202  {
1203  $Flags |= self::WORD_EXCLUDED;
1204  if (!isset($Phrases[$Phrase]))
1205  {
1206  $this->ExcludedTermCount++;
1207  }
1208  }
1209  else
1210  {
1211  if ((($Logic == "AND")
1212  && ($FirstChar != "~"))
1213  || ($FirstChar == "+"))
1214  {
1215  $Flags |= self::WORD_REQUIRED;
1216  if (!isset($Phrases[$Phrase]))
1217  {
1218  $this->RequiredTermCount++;
1219  }
1220  }
1221  if (!isset($Phrases[$Phrase]))
1222  {
1223  $this->InclusiveTermCount++;
1224  $this->SearchTermList[] = $Phrase;
1225  }
1226  }
1227  $Phrases[$Phrase] = $Flags;
1228 
1229  # move to next pair of chunks
1230  $Index += 2;
1231  }
1232 
1233  # return phrases to caller
1234  return $Phrases;
1235  }
1236 
1242  protected function SearchFieldForPhrases($FieldId, $Phrase)
1243  {
1244  # error out
1245  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1246  }
1247 
1259  private function SearchForPhrases($Phrases, $Scores, $FieldId,
1260  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1261  {
1262  # if phrases are found
1263  if (count($Phrases) > 0)
1264  {
1265  # if this is a keyword search
1266  if ($FieldId == self::KEYWORD_FIELD_ID)
1267  {
1268  # for each field
1269  foreach ($this->FieldInfo as $KFieldId => $Info)
1270  {
1271  # if field is marked to be included in keyword searches
1272  if ($Info["InKeywordSearch"])
1273  {
1274  # call ourself with that field
1275  $Scores = $this->SearchForPhrases(
1276  $Phrases, $Scores, $KFieldId,
1277  $ProcessNonExcluded, $ProcessExcluded);
1278  }
1279  }
1280  }
1281  else
1282  {
1283  # for each phrase
1284  foreach ($Phrases as $Phrase => $Flags)
1285  {
1286  $this->DMsg(2, "Searching for phrase '".$Phrase
1287  ."' in field ".$FieldId);
1288 
1289  # if phrase flagged as excluded and we are doing excluded
1290  # phrases or phrase flagged as non-excluded and we
1291  # are doing non-excluded phrases
1292  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1293  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1294  {
1295  # initialize score list if necessary
1296  if ($Scores === NULL) { $Scores = array(); }
1297 
1298  # retrieve list of items that contain phrase
1299  $ItemIds = $this->SearchFieldForPhrases(
1300  $FieldId, $Phrase);
1301 
1302  # for each item that contains phrase
1303  foreach ($ItemIds as $ItemId)
1304  {
1305  # if we are doing excluded phrases and phrase
1306  # is flagged as excluded
1307  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1308  {
1309  # knock item off of list
1310  unset($Scores[$ItemId]);
1311  }
1312  elseif ($ProcessNonExcluded)
1313  {
1314  # calculate phrase value based on number of
1315  # words and field weight
1316  $PhraseScore = count(preg_split("/[\s]+/",
1317  $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1318  * $this->FieldInfo[$FieldId]["Weight"];
1319  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1320 
1321  # bump up item record score
1322  if (isset($Scores[$ItemId]))
1323  {
1324  $Scores[$ItemId] += $PhraseScore;
1325  }
1326  else
1327  {
1328  $Scores[$ItemId] = $PhraseScore;
1329  }
1330 
1331  # if phrase flagged as required
1332  if ($Flags & self::WORD_REQUIRED)
1333  {
1334  # increment required word count for record
1335  if (isset($this->RequiredTermCounts[$ItemId]))
1336  {
1337  $this->RequiredTermCounts[$ItemId]++;
1338  }
1339  else
1340  {
1341  $this->RequiredTermCounts[$ItemId] = 1;
1342  }
1343  }
1344  }
1345  }
1346  }
1347  }
1348  }
1349  }
1350 
1351  # return updated scores to caller
1352  return $Scores;
1353  }
1354 
1363  private function FilterOnExcludedWords($Words, $Scores, $FieldId)
1364  {
1365  $DB = $this->DB;
1366 
1367  # for each word
1368  foreach ($Words as $Word => $Flags)
1369  {
1370  # if word flagged as excluded
1371  if ($Flags & self::WORD_EXCLUDED)
1372  {
1373  # look up record ID for word
1374  $WordId = $this->GetWordId($Word);
1375 
1376  # if word is in DB
1377  if ($WordId !== NULL)
1378  {
1379  # look up counts for word
1380  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1381  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1382 
1383  # for each count
1384  while ($Record = $DB->FetchRow())
1385  {
1386  # if item record is in score list
1387  $ItemId = $Record["ItemId"];
1388  if (isset($Scores[$ItemId]))
1389  {
1390  # remove item record from score list
1391  $this->DMsg(3, "Filtering out item ".$ItemId
1392  ." because it contained word \"".$Word."\"");
1393  unset($Scores[$ItemId]);
1394  }
1395  }
1396  }
1397  }
1398  }
1399 
1400  # returned filtered score list to caller
1401  return $Scores;
1402  }
1403 
1409  private function FilterOnRequiredWords($Scores)
1410  {
1411  # if there were required words
1412  if ($this->RequiredTermCount > 0)
1413  {
1414  # for each item
1415  foreach ($Scores as $ItemId => $Score)
1416  {
1417  # if item does not meet required word count
1418  if (!isset($this->RequiredTermCounts[$ItemId])
1419  || ($this->RequiredTermCounts[$ItemId]
1420  < $this->RequiredTermCount))
1421  {
1422  # filter out item
1423  $this->DMsg(4, "Filtering out item ".$ItemId
1424  ." because it didn't have required word count of "
1425  .$this->RequiredTermCount
1426  .(isset($this->RequiredTermCounts[$ItemId])
1427  ? " (only had "
1428  .$this->RequiredTermCounts[$ItemId]
1429  : " (had none")
1430  .")");
1431  unset($Scores[$ItemId]);
1432  }
1433  }
1434  }
1435 
1436  # return filtered list to caller
1437  return $Scores;
1438  }
1439 
1452  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1453  $SortByField, $SortDescending)
1454  {
1455  # perform any requested filtering
1456  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1457  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1458 
1459  # save total number of results available
1460  $this->NumberOfResultsAvailable = count($Scores);
1461 
1462  # sort search scores into item type bins
1463  $NewScores = array();
1464  foreach ($Scores as $Id => $Score)
1465  {
1466  $ItemType = $this->GetItemType($Id);
1467  if ($ItemType !== NULL)
1468  {
1469  $NewScores[$ItemType][$Id] = $Score;
1470  }
1471  }
1472  $Scores = $NewScores;
1473 
1474  # for each item type
1475  $NewSortByField = array();
1476  $NewSortDescending = array();
1477  foreach ($Scores as $ItemType => $TypeScores)
1478  {
1479  # normalize sort field parameter
1480  $NewSortByField[$ItemType] = !is_array($SortByField) ? $SortByField
1481  : (isset($SortByField[$ItemType])
1482  ? $SortByField[$ItemType] : NULL);
1483 
1484  # normalize sort direction parameter
1485  $NewSortDescending[$ItemType] = !is_array($SortDescending) ? $SortDescending
1486  : (isset($SortDescending[$ItemType])
1487  ? $SortDescending[$ItemType] : TRUE);
1488  }
1489  $SortByField = $NewSortByField;
1490  $SortDescending = $NewSortDescending;
1491 
1492  # for each item type
1493  foreach ($Scores as $ItemType => $TypeScores)
1494  {
1495  # save number of results
1496  $this->NumberOfResultsPerItemType[$ItemType] = count($TypeScores);
1497 
1498  # if no sorting field specified
1499  if ($SortByField[$ItemType] === NULL)
1500  {
1501  # sort result list by score
1502  if ($SortDescending[$ItemType])
1503  {
1504  arsort($Scores[$ItemType], SORT_NUMERIC);
1505  }
1506  else
1507  {
1508  asort($Scores[$ItemType], SORT_NUMERIC);
1509  }
1510  }
1511  else
1512  {
1513  # get list of item IDs in sorted order
1514  $SortedIds = $this->GetItemIdsSortedByField($ItemType,
1515  $SortByField[$ItemType], $SortDescending[$ItemType]);
1516 
1517  # if we have sorted item IDs
1518  if (count($SortedIds) && count($TypeScores))
1519  {
1520  # strip sorted ID list down to those that appear in search results
1521  $SortedIds = array_intersect($SortedIds,
1522  array_keys($TypeScores));
1523 
1524  # rebuild score list in sorted order
1525  $NewScores = array();
1526  foreach ($SortedIds as $Id)
1527  {
1528  $NewScores[$Id] = $TypeScores[$Id];
1529  }
1530  $Scores[$ItemType] = $NewScores;
1531  }
1532  else
1533  {
1534  # sort result list by score
1535  arsort($Scores[$ItemType], SORT_NUMERIC);
1536  }
1537  }
1538 
1539  # if subset of scores requested
1540  if (($StartingResult > 0) || ($NumberOfResults < PHP_INT_MAX))
1541  {
1542  # trim scores back to requested subset
1543  $ScoresKeys = array_slice(array_keys($Scores[$ItemType]),
1544  $StartingResult, $NumberOfResults);
1545  $NewScores = array();
1546  foreach ($ScoresKeys as $Key)
1547  {
1548  $NewScores[$Key] = $Scores[$ItemType][$Key];
1549  }
1550  $Scores[$ItemType] = $NewScores;
1551  }
1552  }
1553 
1554  # returned cleaned search result scores list to caller
1555  return $Scores;
1556  }
1557 
1563  protected function FilterOnSuppliedFunctions($Scores)
1564  {
1565  # if filter functions have been set
1566  if (isset($this->FilterFuncs))
1567  {
1568  # for each result
1569  foreach ($Scores as $ItemId => $Score)
1570  {
1571  # for each filter function
1572  foreach ($this->FilterFuncs as $FuncName)
1573  {
1574  # if filter function return TRUE for item
1575  if (call_user_func($FuncName, $ItemId))
1576  {
1577  # discard result
1578  $this->DMsg(2, "Filter callback <i>".$FuncName
1579  ."</i> rejected item ".$ItemId);
1580  unset($Scores[$ItemId]);
1581 
1582  # bail out of filter func loop
1583  continue 2;
1584  }
1585  }
1586  }
1587  }
1588 
1589  # return filtered list to caller
1590  return $Scores;
1591  }
1592 
1602  private function SearchForComparisonMatches($SearchStrings, $Logic, $Scores)
1603  {
1604  # for each field
1605  $Index = 0;
1606  foreach ($SearchStrings as $SearchFieldId => $SearchStringArray)
1607  {
1608  # if field is not keyword
1609  if ($SearchFieldId != self::KEYWORD_FIELD_ID)
1610  {
1611  # for each search string for this field
1612  foreach ($SearchStringArray as $SearchString)
1613  {
1614  # look for comparison operators
1615  $FoundOperator = preg_match(
1616  self::COMPARISON_OPERATOR_PATTERN,
1617  $SearchString, $Matches);
1618 
1619  # if a comparison operator was found
1620  # or this is a field type that is always a comparison search
1621  if ($FoundOperator ||
1622  ($this->FieldInfo[$SearchFieldId]["FieldType"]
1623  != self::FIELDTYPE_TEXT))
1624  {
1625  # determine value to compare against
1626  $Value = trim(preg_replace(
1627  self::COMPARISON_OPERATOR_PATTERN, '\2',
1628  $SearchString));
1629 
1630  # if no comparison operator was found
1631  if (!$FoundOperator)
1632  {
1633  # assume comparison is equality
1634  $Operators[$Index] = "=";
1635  }
1636  else
1637  {
1638  # use operator from comparison match
1639  $Operators[$Index] = $Matches[1];
1640  }
1641 
1642  # if operator was found
1643  if (isset($Operators[$Index]))
1644  {
1645  # save value
1646  $Values[$Index] = $Value;
1647 
1648  # save field name
1649  $FieldIds[$Index] = $SearchFieldId;
1650  $this->DMsg(3, "Added comparison (field = <i>"
1651  .$FieldIds[$Index]."</i> op = <i>"
1652  .$Operators[$Index]."</i> val = <i>"
1653  .$Values[$Index]."</i>)");
1654 
1655  # move to next comparison array entry
1656  $Index++;
1657  }
1658  }
1659  }
1660  }
1661  }
1662 
1663  # if comparisons found
1664  if (isset($Operators))
1665  {
1666  # perform comparisons on fields and gather results
1667  $Results = $this->SearchFieldsForComparisonMatches(
1668  $FieldIds, $Operators, $Values, $Logic);
1669 
1670  # if search logic is set to AND
1671  if ($Logic == "AND")
1672  {
1673  # if results were found
1674  if (count($Results))
1675  {
1676  # if there were no prior results and no terms for keyword search
1677  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1678  {
1679  # add all results to scores
1680  foreach ($Results as $ItemId)
1681  {
1682  $Scores[$ItemId] = 1;
1683  }
1684  }
1685  else
1686  {
1687  # remove anything from scores that is not part of results
1688  foreach ($Scores as $ItemId => $Score)
1689  {
1690  if (in_array($ItemId, $Results) == FALSE)
1691  {
1692  unset($Scores[$ItemId]);
1693  }
1694  }
1695  }
1696  }
1697  else
1698  {
1699  # clear scores
1700  $Scores = array();
1701  }
1702  }
1703  else
1704  {
1705  # add result items to scores
1706  if ($Scores === NULL) { $Scores = array(); }
1707  foreach ($Results as $ItemId)
1708  {
1709  if (isset($Scores[$ItemId]))
1710  {
1711  $Scores[$ItemId] += 1;
1712  }
1713  else
1714  {
1715  $Scores[$ItemId] = 1;
1716  }
1717  }
1718  }
1719  }
1720 
1721  # return results to caller
1722  return $Scores;
1723  }
1724 
1732  private function SetDebugLevel($SearchStrings)
1733  {
1734  # if search info is an array
1735  if (is_array($SearchStrings))
1736  {
1737  # for each array element
1738  foreach ($SearchStrings as $FieldId => $SearchStringArray)
1739  {
1740  # if element is an array
1741  if (is_array($SearchStringArray))
1742  {
1743  # for each array element
1744  foreach ($SearchStringArray as $Index => $SearchString)
1745  {
1746  # pull out search string if present
1747  $SearchStrings[$FieldId][$Index] =
1748  $this->ExtractDebugLevel($SearchString);
1749  }
1750  }
1751  else
1752  {
1753  # pull out search string if present
1754  $SearchStrings[$FieldId] =
1755  $this->ExtractDebugLevel($SearchStringArray);
1756  }
1757  }
1758  }
1759  else
1760  {
1761  # pull out search string if present
1762  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1763  }
1764 
1765  # return new search info to caller
1766  return $SearchStrings;
1767  }
1768 
1775  private function ExtractDebugLevel($SearchString)
1776  {
1777  # if search string contains debug level indicator
1778  if (strstr($SearchString, "DBUGLVL="))
1779  {
1780  # remove indicator and set debug level
1781  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1782  if ($Level > 0)
1783  {
1784  $this->DebugLevel = $Level;
1785  $this->DMsg(0, "Setting debug level to ".$Level);
1786  $SearchString = preg_replace("/\s*DBUGLVL=${Level}\s*/", "",
1787  $SearchString);
1788  }
1789  }
1790 
1791  # return (possibly) modified search string to caller
1792  return $SearchString;
1793  }
1794 
1799  private function LoadScoresForAllRecords()
1800  {
1801  # start with empty list
1802  $Scores = array();
1803 
1804  # for every item
1805  $this->DB->Query("SELECT ".$this->ItemIdFieldName
1806  ." FROM ".$this->ItemTableName);
1807  while ($Record = $this->DB->FetchRow())
1808  {
1809  # set score for item to 1
1810  $Scores[$Record[$this->ItemIdFieldName]] = 1;
1811  }
1812 
1813  # return array with all scores to caller
1814  return $Scores;
1815  }
1816 
1817 
1818  # ---- private methods (search DB building)
1819 
1827  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1828  {
1829  # retrieve ID for word
1830  $WordIds[] = $this->GetWordId($Word, TRUE);
1831 
1832  # if stemming is enabled and word looks appropriate for stemming
1833  if ($this->StemmingEnabled && !is_numeric($Word))
1834  {
1835  # retrieve stem of word
1836  $Stem = PorterStemmer::Stem($Word, TRUE);
1837 
1838  # if stem is different
1839  if ($Stem != $Word)
1840  {
1841  # retrieve ID for stem of word
1842  $WordIds[] = $this->GetStemId($Stem, TRUE);
1843  }
1844  }
1845 
1846  # for word and stem of word
1847  foreach ($WordIds as $WordId)
1848  {
1849  # if word count already added to database
1850  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1851  {
1852  # update word count
1853  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1854  ." WHERE WordId=".$WordId
1855  ." AND ItemId=".$ItemId
1856  ." AND FieldId=".$FieldId);
1857  }
1858  else
1859  {
1860  # add word count to DB
1861  $this->DB->Query("INSERT INTO SearchWordCounts"
1862  ." (WordId, ItemId, FieldId, Count) VALUES"
1863  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1864 
1865  # remember that we added count for this word
1866  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1867  }
1868 
1869  # decrease weight for stem
1870  $Weight = ceil($Weight / 2);
1871  }
1872  }
1873 
1879  protected function GetFieldContent($ItemId, $FieldId)
1880  {
1881  # error out
1882  throw Exception("GetFieldContent() not implemented.");
1883  }
1884 
1894  private function RecordSearchInfoForText(
1895  $ItemId, $FieldId, $Weight, $Text, $IncludeInKeyword)
1896  {
1897  # normalize text
1898  $Words = $this->ParseSearchStringForWords($Text, "OR", TRUE);
1899 
1900  # if there was text left after parsing
1901  if (count($Words) > 0)
1902  {
1903  # for each word
1904  foreach ($Words as $Word => $Flags)
1905  {
1906  # update count for word
1907  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1908 
1909  # if text should be included in keyword searches
1910  if ($IncludeInKeyword)
1911  {
1912  # update keyword field count for word
1913  $this->UpdateWordCount(
1914  $Word, $ItemId, self::KEYWORD_FIELD_ID, $Weight);
1915  }
1916  }
1917  }
1918  }
1919 
1920  # ---- common private methods (used in both searching and DB build)
1921 
1932  private function ParseSearchStringForWords(
1933  $SearchString, $Logic, $IgnorePhrases = FALSE)
1934  {
1935  # strip off any surrounding whitespace
1936  $Text = trim($SearchString);
1937 
1938  # set up normalization replacement strings
1939  $Patterns = array(
1940  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
1941  "/'/", # get rid of single quotes / apostrophes
1942  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED
1943  # INDEX BELOW!!!) "
1944  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED
1945  # INDEX BELOW!!!)
1946  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics
1947  # / non-minus/plus to a space
1948  "/([^\\s])-+/i", # convert minus preceded by anything
1949  # but whitespace to a space
1950  "/([^\\s])\\++/i", # convert plus preceded by anything
1951  # but whitespace to a space
1952  "/-\\s/i", # convert minus followed by whitespace to a space
1953  "/\\+\\s/i", # convert plus followed by whitespace to a space
1954  "/~\\s/i", # convert tilde followed by whitespace to a space
1955  "/[ ]+/" # convert multiple spaces to one space
1956  );
1957  $Replacements = array(
1958  " ",
1959  "",
1960  " ",
1961  " ",
1962  "\\1 ",
1963  "\\1 ",
1964  " ",
1965  " ",
1966  " ",
1967  " ",
1968  " "
1969  );
1970 
1971  # if we are supposed to ignore phrases and groups (series of words
1972  # in quotes or surrounded by parens)
1973  if ($IgnorePhrases)
1974  {
1975  # switch phrase removal to double quote removal (HARD-CODED
1976  # INDEX INTO PATTERN LIST!!)
1977  $Patterns[2] = "/\"/";
1978 
1979  # switch group removal to paren removal (HARD-CODED INDEX
1980  # INTO PATTERN LIST!!)
1981  $Patterns[3] = "/[\(\)]+/";
1982  }
1983 
1984  # remove punctuation from text and normalize whitespace
1985  $Text = preg_replace($Patterns, $Replacements, $Text);
1986  $this->DMsg(2, "Normalized search string is '".$Text."'");
1987 
1988  # convert text to lower case
1989  $Text = strtolower($Text);
1990 
1991  # strip off any extraneous whitespace
1992  $Text = trim($Text);
1993 
1994  # start with an empty array
1995  $Words = array();
1996 
1997  # if we have no words left after parsing
1998  if (strlen($Text) != 0)
1999  {
2000  # for each word
2001  foreach (explode(" ", $Text) as $Word)
2002  {
2003  # grab first character of word
2004  $FirstChar = substr($Word, 0, 1);
2005 
2006  # strip off option characters and set flags appropriately
2007  $Flags = self::WORD_PRESENT;
2008  if ($FirstChar == "-")
2009  {
2010  $Word = substr($Word, 1);
2011  $Flags |= self::WORD_EXCLUDED;
2012  if (!isset($Words[$Word]))
2013  {
2014  $this->ExcludedTermCount++;
2015  }
2016  }
2017  else
2018  {
2019  if ($FirstChar == "~")
2020  {
2021  $Word = substr($Word, 1);
2022  }
2023  elseif (($Logic == "AND")
2024  || ($FirstChar == "+"))
2025  {
2026  if ($FirstChar == "+")
2027  {
2028  $Word = substr($Word, 1);
2029  }
2030  $Flags |= self::WORD_REQUIRED;
2031  if (!isset($Words[$Word]))
2032  {
2033  $this->RequiredTermCount++;
2034  }
2035  }
2036  if (!isset($Words[$Word]))
2037  {
2038  $this->InclusiveTermCount++;
2039  $this->SearchTermList[] = $Word;
2040  }
2041  }
2042 
2043  # store flags to indicate word found
2044  $Words[$Word] = $Flags;
2045  $this->DMsg(3, "Word identified (".$Word.")");
2046  }
2047  }
2048 
2049  # return normalized words to caller
2050  return $Words;
2051  }
2052 
2060  private function GetWordId($Word, $AddIfNotFound = FALSE)
2061  {
2062  static $WordIdCache;
2063 
2064  # if word was in ID cache
2065  if (isset($WordIdCache[$Word]))
2066  {
2067  # use ID from cache
2068  $WordId = $WordIdCache[$Word];
2069  }
2070  else
2071  {
2072  # look up ID in database
2073  $WordId = $this->DB->Query("SELECT WordId"
2074  ." FROM SearchWords"
2075  ." WHERE WordText='".addslashes($Word)."'",
2076  "WordId");
2077 
2078  # if ID was not found and caller requested it be added
2079  if (($WordId === NULL) && $AddIfNotFound)
2080  {
2081  # add word to database
2082  $this->DB->Query("INSERT INTO SearchWords (WordText)"
2083  ." VALUES ('".addslashes(strtolower($Word))."')");
2084 
2085  # get ID for newly added word
2086  $WordId = $this->DB->LastInsertId();
2087  }
2088 
2089  # save ID to cache
2090  $WordIdCache[$Word] = $WordId;
2091  }
2092 
2093  # return ID to caller
2094  return $WordId;
2095  }
2096 
2104  private function GetStemId($Stem, $AddIfNotFound = FALSE)
2105  {
2106  static $StemIdCache;
2107 
2108  # if stem was in ID cache
2109  if (isset($StemIdCache[$Stem]))
2110  {
2111  # use ID from cache
2112  $StemId = $StemIdCache[$Stem];
2113  }
2114  else
2115  {
2116  # look up ID in database
2117  $StemId = $this->DB->Query("SELECT WordId"
2118  ." FROM SearchStems"
2119  ." WHERE WordText='".addslashes($Stem)."'",
2120  "WordId");
2121 
2122  # if ID was not found and caller requested it be added
2123  if (($StemId === NULL) && $AddIfNotFound)
2124  {
2125  # add stem to database
2126  $this->DB->Query("INSERT INTO SearchStems (WordText)"
2127  ." VALUES ('".addslashes(strtolower($Stem))."')");
2128 
2129  # get ID for newly added stem
2130  $StemId = $this->DB->LastInsertId();
2131  }
2132 
2133  # adjust from DB ID value to stem ID value
2134  $StemId += self::STEM_ID_OFFSET;
2135 
2136  # save ID to cache
2137  $StemIdCache[$Stem] = $StemId;
2138  }
2139 
2140  # return ID to caller
2141  return $StemId;
2142  }
2143 
2149  private function GetWord($WordId)
2150  {
2151  static $WordCache;
2152 
2153  # if word was in cache
2154  if (isset($WordCache[$WordId]))
2155  {
2156  # use word from cache
2157  $Word = $WordCache[$WordId];
2158  }
2159  else
2160  {
2161  # adjust search location and word ID if word is stem
2162  $TableName = "SearchWords";
2163  if ($WordId >= self::STEM_ID_OFFSET)
2164  {
2165  $TableName = "SearchStems";
2166  $WordId -= self::STEM_ID_OFFSET;
2167  }
2168 
2169  # look up word in database
2170  $Word = $this->DB->Query("SELECT WordText"
2171  ." FROM ".$TableName
2172  ." WHERE WordId='".$WordId."'",
2173  "WordText");
2174 
2175  # save word to cache
2176  $WordCache[$WordId] = $Word;
2177  }
2178 
2179  # return word to caller
2180  return $Word;
2181  }
2182 
2188  private function GetItemType($ItemId)
2189  {
2190  static $ItemTypeCache;
2191  if (!isset($ItemTypeCache))
2192  {
2193  $this->DB->Query("SELECT * FROM SearchItemTypes");
2194  $ItemTypeCache = $this->DB->FetchColumn("ItemType", "ItemId");
2195  }
2196  return isset($ItemTypeCache[$ItemId])
2197  ? (int)$ItemTypeCache[$ItemId] : NULL;
2198  }
2199 
2205  protected function DMsg($Level, $Msg)
2206  {
2207  if ($this->DebugLevel > $Level)
2208  {
2209  print "SE: ".$Msg."<br>\n";
2210  }
2211  }
2212 
2213  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2214 
2215  # possible types of logical operators
2216  const SEARCHLOGIC_AND = 1;
2217  const SEARCHLOGIC_OR = 2;
2218 
2219  # pattern to detect search strings that are explicit comparisons
2220  const COMPARISON_OPERATOR_PATTERN = '/^([><=^$@]+|!=)([^><=^$@])/';
2221 }
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
AddField($FieldId, $FieldType, $ItemTypes, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
const KEYWORD_FIELD_ID
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
Set of parameters used to perform a search.
SQL database abstraction object with smart query caching.
Definition: Database.php:22
SearchFieldForPhrases($FieldId, $Phrase)
Search for phrase in specified field.
GetAllSynonyms()
Get all synonyms.
const SEARCHLOGIC_OR
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
Filter search scores through any supplied functions.
UpdateForItem($ItemId, $ItemType)
Update search database for the specified item.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
const FIELDTYPE_DATE
const SEARCHLOGIC_AND
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
NumberOfResults($ItemType=NULL)
Get number of results found by most recent search.
FieldWeight($FieldId)
Get search weight for specified field.
FieldType($FieldId)
Get type of specified field (text/numeric/date/daterange).
ItemCount()
Get total number of items indexed by search engine.
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
__construct($ItemTableName, $ItemIdFieldName, $ItemTypeFieldName)
Object constructor.
const STEM_ID_OFFSET
Search($SearchParams, $StartingResult=0, $NumberOfResults=PHP_INT_MAX, $SortByField=NULL, $SortDescending=TRUE)
Perform search with specified parameters.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
Print debug message if level set high enough.
const WORD_PRESENT
DropField($FieldId)
Drop all data pertaining to field from search database.
GetFieldContent($ItemId, $FieldId)
Retrieve content for specified field for specified item.
Core metadata archive search engine class.
const COMPARISON_OPERATOR_PATTERN
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
FieldedSearchWeightScale($SearchParams)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
FieldInKeywordSearch($FieldId)
Get whether specified field is included in keyword searches.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.