CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2014 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
13 class SearchEngine {
14 
15  # ---- PUBLIC INTERFACE --------------------------------------------------
16 
17  # possible types of logical operators
18  const LOGIC_AND = 1;
19  const LOGIC_OR = 2;
20 
21  # flags used for indicating field types
22  const FIELDTYPE_TEXT = 1;
23  const FIELDTYPE_NUMERIC = 2;
24  const FIELDTYPE_DATE = 3;
26 
27  # flags used for indicating word states
28  const WORD_PRESENT = 1;
29  const WORD_EXCLUDED = 2;
30  const WORD_REQUIRED = 4;
31 
39  {
40  # create database object for our use
41  $this->DB = new Database();
42 
43  # save item access parameters
44  $this->ItemTableName = $ItemTableName;
45  $this->ItemIdFieldName = $ItemIdFieldName;
46 
47  # set default debug state
48  $this->DebugLevel = 0;
49  }
50 
59  function AddField(
60  $FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
61  {
62  # save values
63  $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
64  $this->FieldInfo[$FieldName]["Weight"] = $Weight;
65  $this->FieldInfo[$FieldName]["InKeywordSearch"] =
66  $UsedInKeywordSearch ? TRUE : FALSE;
67  }
68 
74  function FieldType($FieldName)
75  { return $this->FieldInfo[$FieldName]["FieldType"]; }
76 
82  function FieldWeight($FieldName)
83  { return $this->FieldInfo[$FieldName]["Weight"]; }
84 
90  function FieldInKeywordSearch($FieldName)
91  { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; }
92 
97  function DebugLevel($NewValue)
98  {
99  $this->DebugLevel = $NewValue;
100  }
101 
102 
103  # ---- search functions
104 
120  function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
121  $SortByField = NULL, $SortDescending = TRUE)
122  {
123  # interpret and filter out magic debugging keyword (if any)
124  $SearchString = $this->SetDebugLevel($SearchString);
125  $this->DMsg(0, "In Search() with search string \"".$SearchString."\"");
126 
127  # save start time to use in calculating search time
128  $StartTime = microtime(TRUE);
129 
130  # clear word counts
131  $this->InclusiveTermCount = 0;
132  $this->RequiredTermCount = 0;
133  $this->ExcludedTermCount = 0;
134 
135  # parse search string into terms
136  $Words = $this->ParseSearchStringForWords($SearchString);
137  $this->DMsg(1, "Found ".count($Words)." words");
138 
139  # parse search string for phrases
140  $Phrases = $this->ParseSearchStringForPhrases($SearchString);
141  $this->DMsg(1, "Found ".count($Phrases)." phrases");
142 
143  # if only excluded terms specified
144  if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
145  {
146  # load all records
147  $this->DMsg(1, "Loading all records");
148  $Scores = $this->LoadScoresForAllRecords();
149  }
150  else
151  {
152  # perform searches
153  $Scores = $this->SearchForWords($Words);
154  $this->DMsg(1, "Found ".count($Scores)." results after word search");
155  $Scores = $this->SearchForPhrases($Phrases, $Scores);
156  $this->DMsg(1, "Found ".count($Scores)." results after phrase search");
157  }
158 
159  # if search results found
160  if (count($Scores) > 0)
161  {
162  # handle any excluded words
163  $Scores = $this->FilterOnExcludedWords($Words, $Scores);
164 
165  # strip off any results that don't contain required words
166  $Scores = $this->FilterOnRequiredWords($Scores);
167  }
168 
169  # count, sort, and trim search result scores list
170  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
171  $SortByField, $SortDescending);
172 
173  # record search time
174  $this->LastSearchTime = microtime(TRUE) - $StartTime;
175 
176  # return list of items to caller
177  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
178  return $Scores;
179  }
180 
198  function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
199  $SortByField = NULL, $SortDescending = TRUE)
200  {
201  # interpret and filter out magic debugging keyword (if any)
202  $SearchStrings = $this->SetDebugLevel($SearchStrings);
203  $this->DMsg(0, "In FieldedSearch() with "
204  .count($SearchStrings)." search strings");
205 
206  # save start time to use in calculating search time
207  $StartTime = microtime(TRUE);
208 
209  # perform search
210  $Scores = $this->SearchAcrossFields($SearchStrings);
211  $Scores = ($Scores === NULL) ? array() : $Scores;
212 
213  # count, sort, and trim search result scores list
214  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
215  $SortByField, $SortDescending);
216 
217  # record search time
218  $this->LastSearchTime = microtime(TRUE) - $StartTime;
219 
220  # return list of items to caller
221  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
222  return $Scores;
223  }
224 
240  function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
241  $SortByField = NULL, $SortDescending = TRUE)
242  {
243  # interpret and filter out magic debugging keyword (if any)
244  foreach ($SearchGroups as $Index => $Groups)
245  {
246  if (isset($SearchGroups[$Index]["SearchStrings"]))
247  {
248  $SearchGroups[$Index]["SearchStrings"] =
249  $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
250  }
251  }
252  $this->DMsg(0, "In GroupedSearch() with "
253  .count($SearchGroups)." search groups");
254 
255  # save start time to use in calculating search time
256  $StartTime = microtime(TRUE);
257 
258  # start with no results
259  $Scores = array();
260 
261  # save AND/OR search setting
262  $SavedSearchLogic = $this->DefaultSearchLogic;
263 
264  # for each search group
265  $FirstSearch = TRUE;
266  foreach ($SearchGroups as $Group)
267  {
268  $this->DMsg(0, "----- GROUP ---------------------------");
269 
270  # if group has AND/OR setting specified
271  if (isset($Group["Logic"]))
272  {
273  # use specified AND/OR setting
274  $this->DefaultSearchLogic = $Group["Logic"];
275  }
276  else
277  {
278  # use saved AND/OR setting
279  $this->DefaultSearchLogic = $SavedSearchLogic;
280  }
281  $this->DMsg(2, "Logic is "
282  .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR"));
283 
284  # if we have search strings for this group
285  if (isset($Group["SearchStrings"]))
286  {
287  # perform search
288  $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
289 
290  # if search was conducted
291  if ($GroupScores !== NULL)
292  {
293  # if saved AND/OR setting is OR or this is first search
294  if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
295  {
296  # add search results to result list
297  foreach ($GroupScores as $ItemId => $Score)
298  {
299  if (isset($Scores[$ItemId]))
300  {
301  $Scores[$ItemId] += $Score;
302  }
303  else
304  {
305  $Scores[$ItemId] = $Score;
306  }
307  }
308 
309  # (reset flag indicating first search)
310  $FirstSearch = FALSE;
311  }
312  else
313  {
314  # AND search results with previous results
315  $OldScores = $Scores;
316  $Scores = array();
317  foreach ($GroupScores as $ItemId => $Score)
318  {
319  if (isset($OldScores[$ItemId]))
320  {
321  $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
322  }
323  }
324  }
325  }
326  }
327  }
328 
329  # restore AND/OR search setting
330  $this->DefaultSearchLogic = $SavedSearchLogic;
331 
332  # count, sort, and trim search result scores list
333  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
334  $SortByField, $SortDescending);
335 
336  # record search time
337  $this->LastSearchTime = microtime(TRUE) - $StartTime;
338 
339  # return search results to caller
340  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
341  return $Scores;
342  }
343 
348  function AddResultFilterFunction($FunctionName)
349  {
350  # save filter function name
351  $this->FilterFuncs[] = $FunctionName;
352  }
353 
359  function DefaultSearchLogic($NewSetting = NULL)
360  {
361  if ($NewSetting != NULL)
362  {
363  $this->DefaultSearchLogic = $NewSetting;
364  }
366  }
367 
373  function SearchTermsRequiredByDefault($NewSetting = TRUE)
374  {
375  $this->DefaultSearchLogic($NewSetting ? self::LOGIC_AND : self::LOGIC_OR);
376  }
377 
382  function NumberOfResults()
383  {
385  }
386 
391  function SearchTerms()
392  {
393  return $this->SearchTermList;
394  }
395 
400  function SearchTime()
401  {
402  return $this->LastSearchTime;
403  }
404 
412  function FieldedSearchWeightScale($SearchStrings)
413  {
414  $Weight = 0;
415  $IncludedKeywordSearch = FALSE;
416  foreach ($SearchStrings as $FieldName => $SearchStringArray)
417  {
418  if ($FieldName == "XXXKeywordXXX")
419  {
420  $IncludedKeywordSearch = TRUE;
421  }
422  else
423  {
424  if (array_key_exists($FieldName, $this->FieldInfo))
425  {
426  $Weight += $this->FieldInfo[$FieldName]["Weight"];
427  }
428  }
429  }
430  if ($IncludedKeywordSearch)
431  {
432  foreach ($this->FieldInfo as $FieldName => $Info)
433  {
434  if ($Info["InKeywordSearch"])
435  {
436  $Weight += $Info["Weight"];
437  }
438  }
439  }
440  return $Weight;
441  }
442 
443 
444  # ---- search database update functions
445 
450  function UpdateForItem($ItemId)
451  {
452  # bail out if item ID is negative (indicating a temporary record)
453  if ($ItemId < 0) { return; }
454 
455  # clear word count added flags for this item
456  unset($this->WordCountAdded);
457 
458  # delete any existing info for this item
459  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
460 
461  # for each metadata field
462  foreach ($this->FieldInfo as $FieldName => $Info)
463  {
464  # if search weight for field is positive
465  if ($Info["Weight"] > 0)
466  {
467  # retrieve text for field
468  $Text = $this->GetFieldContent($ItemId, $FieldName);
469 
470  # if text is array
471  if (is_array($Text))
472  {
473  # for each text string in array
474  foreach ($Text as $String)
475  {
476  # record search info for text
477  $this->RecordSearchInfoForText($ItemId, $FieldName,
478  $Info["Weight"], $String,
479  $Info["InKeywordSearch"]);
480  }
481  }
482  else
483  {
484  # record search info for text
485  $this->RecordSearchInfoForText($ItemId, $FieldName,
486  $Info["Weight"], $Text,
487  $Info["InKeywordSearch"]);
488  }
489  }
490  }
491  }
492 
499  function UpdateForItems($StartingItemId, $NumberOfItems)
500  {
501  # retrieve IDs for specified number of items starting at specified ID
502  $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
503  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
504  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
505  $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
506 
507  # for each retrieved item ID
508  foreach ($ItemIds as $ItemId)
509  {
510  # update search info for item
511  $this->UpdateForItem($ItemId);
512  }
513 
514  # return ID of last item updated to caller
515  return $ItemId;
516  }
517 
522  function DropItem($ItemId)
523  {
524  # drop all entries pertaining to item from word count table
525  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
526  }
527 
532  function DropField($FieldName)
533  {
534  # retrieve our ID for field
535  $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
536  ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
537 
538  # drop all entries pertaining to field from word counts table
539  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
540 
541  # drop field from our fields table
542  $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
543  }
544 
549  function SearchTermCount()
550  {
551  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
552  ." FROM SearchWords", "TermCount");
553  }
554 
559  function ItemCount()
560  {
561  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
562  ." FROM SearchWordCounts", "ItemCount");
563  }
564 
572  function AddSynonyms($Word, $Synonyms)
573  {
574  # asssume no synonyms will be added
575  $AddCount = 0;
576 
577  # get ID for word
578  $WordId = $this->GetWordId($Word, TRUE);
579 
580  # for each synonym passed in
581  foreach ($Synonyms as $Synonym)
582  {
583  # get ID for synonym
584  $SynonymId = $this->GetWordId($Synonym, TRUE);
585 
586  # if synonym is not already in database
587  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
588  ." WHERE (WordIdA = ".$WordId
589  ." AND WordIdB = ".$SynonymId.")"
590  ." OR (WordIdB = ".$WordId
591  ." AND WordIdA = ".$SynonymId.")");
592  if ($this->DB->NumRowsSelected() == 0)
593  {
594  # add synonym entry to database
595  $this->DB->Query("INSERT INTO SearchWordSynonyms"
596  ." (WordIdA, WordIdB)"
597  ." VALUES (".$WordId.", ".$SynonymId.")");
598  $AddCount++;
599  }
600  }
601 
602  # report to caller number of new synonyms added
603  return $AddCount;
604  }
605 
612  function RemoveSynonyms($Word, $Synonyms = NULL)
613  {
614  # find ID for word
615  $WordId = $this->GetWordId($Word);
616 
617  # if ID found
618  if ($WordId !== NULL)
619  {
620  # if no specific synonyms provided
621  if ($Synonyms === NULL)
622  {
623  # remove all synonyms for word
624  $this->DB->Query("DELETE FROM SearchWordSynonyms"
625  ." WHERE WordIdA = '".$WordId."'"
626  ." OR WordIdB = '".$WordId."'");
627  }
628  else
629  {
630  # for each specified synonym
631  foreach ($Synonyms as $Synonym)
632  {
633  # look up ID for synonym
634  $SynonymId = $this->GetWordId($Synonym);
635 
636  # if synonym ID was found
637  if ($SynonymId !== NULL)
638  {
639  # delete synonym entry
640  $this->DB->Query("DELETE FROM SearchWordSynonyms"
641  ." WHERE (WordIdA = '".$WordId."'"
642  ." AND WordIdB = '".$SynonymId."')"
643  ." OR (WordIdB = '".$WordId."'"
644  ." AND WordIdA = '".$SynonymId."')");
645  }
646  }
647  }
648  }
649  }
650 
654  function RemoveAllSynonyms()
655  {
656  $this->DB->Query("DELETE FROM SearchWordSynonyms");
657  }
658 
664  function GetSynonyms($Word)
665  {
666  # assume no synonyms will be found
667  $Synonyms = array();
668 
669  # look up ID for word
670  $WordId = $this->GetWordId($Word);
671 
672  # if word ID was found
673  if ($WordId !== NULL)
674  {
675  # look up IDs of all synonyms for this word
676  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
677  ." WHERE WordIdA = ".$WordId
678  ." OR WordIdB = ".$WordId);
679  $SynonymIds = array();
680  while ($Record = $this->DB->FetchRow)
681  {
682  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
683  ? $Record["WordIdB"] : $Record["WordIdA"];
684  }
685 
686  # for each synonym ID
687  foreach ($SynonymIds as $SynonymId)
688  {
689  # look up synonym word and add to synonym list
690  $Synonyms[] = $this->GetWord($SynonymId);
691  }
692  }
693 
694  # return synonyms to caller
695  return $Synonyms;
696  }
697 
702  function GetAllSynonyms()
703  {
704  # assume no synonyms will be found
705  $SynonymList = array();
706 
707  # for each synonym ID pair
708  $OurDB = new Database();
709  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
710  while ($Record = $OurDB->FetchRow())
711  {
712  # look up words
713  $Word = $this->GetWord($Record["WordIdA"]);
714  $Synonym = $this->GetWord($Record["WordIdB"]);
715 
716  # if we do not already have an entry for the word
717  # or synonym is not listed for this word
718  if (!isset($SynonymList[$Word])
719  || !in_array($Synonym, $SynonymList[$Word]))
720  {
721  # add entry for synonym
722  $SynonymList[$Word][] = $Synonym;
723  }
724 
725  # if we do not already have an entry for the synonym
726  # or word is not listed for this synonym
727  if (!isset($SynonymList[$Synonym])
728  || !in_array($Word, $SynonymList[$Synonym]))
729  {
730  # add entry for word
731  $SynonymList[$Synonym][] = $Word;
732  }
733  }
734 
735  # for each word
736  # (this loop removes reciprocal duplicates)
737  foreach ($SynonymList as $Word => $Synonyms)
738  {
739  # for each synonym for that word
740  foreach ($Synonyms as $Synonym)
741  {
742  # if synonym has synonyms and word is one of them
743  if (isset($SynonymList[$Synonym])
744  && isset($SynonymList[$Word])
745  && in_array($Word, $SynonymList[$Synonym])
746  && in_array($Synonym, $SynonymList[$Word]))
747  {
748  # if word has less synonyms than synonym
749  if (count($SynonymList[$Word])
750  < count($SynonymList[$Synonym]))
751  {
752  # remove synonym from synonym list for word
753  $SynonymList[$Word] = array_diff(
754  $SynonymList[$Word], array($Synonym));
755 
756  # if no synonyms left for word
757  if (!count($SynonymList[$Word]))
758  {
759  # remove empty synonym list for word
760  unset($SynonymList[$Word]);
761  }
762  }
763  else
764  {
765  # remove word from synonym list for synonym
766  $SynonymList[$Synonym] = array_diff(
767  $SynonymList[$Synonym], array($Word));
768 
769  # if no synonyms left for word
770  if (!count($SynonymList[$Synonym]))
771  {
772  # remove empty synonym list for word
773  unset($SynonymList[$Synonym]);
774  }
775  }
776  }
777  }
778  }
779 
780  # sort array alphabetically (just for convenience)
781  foreach ($SynonymList as $Word => $Synonyms)
782  {
783  asort($SynonymList[$Word]);
784  }
785  ksort($SynonymList);
786 
787  # return 2D array of synonyms to caller
788  return $SynonymList;
789  }
790 
796  function SetAllSynonyms($SynonymList)
797  {
798  # remove all existing synonyms
799  $this->RemoveAllSynonyms();
800 
801  # for each synonym entry passed in
802  foreach ($SynonymList as $Word => $Synonyms)
803  {
804  # add synonyms for word
805  $this->AddSynonyms($Word, $Synonyms);
806  }
807  }
808 
817  function LoadSynonymsFromFile($FileName)
818  {
819  # asssume no synonyms will be added
820  $AddCount = 0;
821 
822  # read in contents of file
823  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
824 
825  # if file contained lines
826  if (count($Lines))
827  {
828  # for each line of file
829  foreach ($Lines as $Line)
830  {
831  # if line is not a comment
832  if (!preg_match("/[\s]*#/", $Line))
833  {
834  # split line into words
835  $Words = preg_split("/[\s,]+/", $Line);
836 
837  # if synonyms found
838  if (count($Words) > 1)
839  {
840  # separate out word and synonyms
841  $Word = array_shift($Words);
842 
843  # add synonyms
844  $AddCount += $this->AddSynonyms($Word, $Words);
845  }
846  }
847  }
848  }
849 
850  # return count of synonyms added to caller
851  return $AddCount;
852  }
853 
854 
855  # ---- PRIVATE INTERFACE -------------------------------------------------
856 
857  protected $DB;
858  protected $DebugLevel;
859  protected $ItemTableName;
860  protected $ItemIdFieldName;
862  protected $LastSearchTime;
863  protected $FilterFuncs;
864  protected $DefaultSearchLogic = self::LOGIC_AND;
865  protected $StemmingEnabled = TRUE;
866  protected $SynonymsEnabled = TRUE;
867 
868  private $WordCountAdded;
869  private $FieldIds;
870  private $FieldInfo;
871  private $RequiredTermCount;
872  private $RequiredTermCounts;
873  private $InclusiveTermCount;
874  private $ExcludedTermCount;
875  private $SearchTermList;
876 
877  const STEM_ID_OFFSET = 1000000;
878 
879 
880  # ---- common private functions (used in both searching and DB build)
881 
891  private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
892  {
893  # strip off any surrounding whitespace
894  $Text = trim($SearchString);
895 
896  # set up normalization replacement strings
897  $Patterns = array(
898  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
899  "/'/", # get rid of single quotes / apostrophes
900  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) "
901  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
902  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space
903  "/([^\\s])-+/i", # convert minus preceded by anything but whitespace to a space
904  "/([^\\s])\\++/i", # convert plus preceded by anything but whitespace to a space
905  "/-\\s/i", # convert minus followed by whitespace to a space
906  "/\\+\\s/i", # convert plus followed by whitespace to a space
907  "/~\\s/i", # convert tilde followed by whitespace to a space
908  "/[ ]+/" # convert multiple spaces to one space
909  );
910  $Replacements = array(
911  " ",
912  "",
913  " ",
914  " ",
915  "\\1 ",
916  "\\1 ",
917  " ",
918  " ",
919  " ",
920  " ",
921  " "
922  );
923 
924  # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
925  if ($IgnorePhrases)
926  {
927  # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
928  $Patterns[2] = "/\"/";
929 
930  # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
931  $Patterns[3] = "/[\(\)]+/";
932  }
933 
934  # remove punctuation from text and normalize whitespace
935  $Text = preg_replace($Patterns, $Replacements, $Text);
936  $this->DMsg(2, "Normalized search string is '".$Text."'");
937 
938  # convert text to lower case
939  $Text = strtolower($Text);
940 
941  # strip off any extraneous whitespace
942  $Text = trim($Text);
943 
944  # start with an empty array
945  $Words = array();
946 
947  # if we have no words left after parsing
948  if (strlen($Text) != 0)
949  {
950  # for each word
951  foreach (explode(" ", $Text) as $Word)
952  {
953  # grab first character of word
954  $FirstChar = substr($Word, 0, 1);
955 
956  # strip off option characters and set flags appropriately
957  $Flags = self::WORD_PRESENT;
958  if ($FirstChar == "-")
959  {
960  $Word = substr($Word, 1);
961  $Flags |= self::WORD_EXCLUDED;
962  if (!isset($Words[$Word]))
963  {
964  $this->ExcludedTermCount++;
965  }
966  }
967  else
968  {
969  if ($FirstChar == "~")
970  {
971  $Word = substr($Word, 1);
972  }
973  elseif (($this->DefaultSearchLogic == self::LOGIC_AND)
974  || ($FirstChar == "+"))
975  {
976  if ($FirstChar == "+")
977  {
978  $Word = substr($Word, 1);
979  }
980  $Flags |= self::WORD_REQUIRED;
981  if (!isset($Words[$Word]))
982  {
983  $this->RequiredTermCount++;
984  }
985  }
986  if (!isset($Words[$Word]))
987  {
988  $this->InclusiveTermCount++;
989  $this->SearchTermList[] = $Word;
990  }
991  }
992 
993  # store flags to indicate word found
994  $Words[$Word] = $Flags;
995  $this->DMsg(3, "Word identified (".$Word.")");
996  }
997  }
998 
999  # return normalized words to caller
1000  return $Words;
1001  }
1002 
1009  private function GetFieldId($FieldName)
1010  {
1011  # if field ID is not in cache
1012  if (!isset($this->FieldIds[$FieldName]))
1013  {
1014  # look up field info in database
1015  $this->DB->Query("SELECT FieldId FROM SearchFields "
1016  ."WHERE FieldName = '".addslashes($FieldName)."'");
1017 
1018  # if field was found
1019  if ($Record = $this->DB->FetchRow())
1020  {
1021  # load info from DB record
1022  $FieldId = $Record["FieldId"];
1023  }
1024  else
1025  {
1026  # add field to database
1027  $this->DB->Query("INSERT INTO SearchFields (FieldName) "
1028  ."VALUES ('".addslashes($FieldName)."')");
1029 
1030  # retrieve ID for newly added field
1031  $FieldId = $this->DB->LastInsertId();
1032  }
1033 
1034  # cache field info
1035  $this->FieldIds[$FieldName] = $FieldId;
1036  }
1037 
1038  # return cached ID to caller
1039  return $this->FieldIds[$FieldName];
1040  }
1041 
1049  private function GetWordId($Word, $AddIfNotFound = FALSE)
1050  {
1051  static $WordIdCache;
1052 
1053  # if word was in ID cache
1054  if (isset($WordIdCache[$Word]))
1055  {
1056  # use ID from cache
1057  $WordId = $WordIdCache[$Word];
1058  }
1059  else
1060  {
1061  # look up ID in database
1062  $WordId = $this->DB->Query("SELECT WordId"
1063  ." FROM SearchWords"
1064  ." WHERE WordText='".addslashes($Word)."'",
1065  "WordId");
1066 
1067  # if ID was not found and caller requested it be added
1068  if (($WordId === NULL) && $AddIfNotFound)
1069  {
1070  # add word to database
1071  $this->DB->Query("INSERT INTO SearchWords (WordText)"
1072  ." VALUES ('".addslashes(strtolower($Word))."')");
1073 
1074  # get ID for newly added word
1075  $WordId = $this->DB->LastInsertId();
1076  }
1077 
1078  # save ID to cache
1079  $WordIdCache[$Word] = $WordId;
1080  }
1081 
1082  # return ID to caller
1083  return $WordId;
1084  }
1085 
1093  private function GetStemId($Stem, $AddIfNotFound = FALSE)
1094  {
1095  static $StemIdCache;
1096 
1097  # if stem was in ID cache
1098  if (isset($StemIdCache[$Stem]))
1099  {
1100  # use ID from cache
1101  $StemId = $StemIdCache[$Stem];
1102  }
1103  else
1104  {
1105  # look up ID in database
1106  $StemId = $this->DB->Query("SELECT WordId"
1107  ." FROM SearchStems"
1108  ." WHERE WordText='".addslashes($Stem)."'",
1109  "WordId");
1110 
1111  # if ID was not found and caller requested it be added
1112  if (($StemId === NULL) && $AddIfNotFound)
1113  {
1114  # add stem to database
1115  $this->DB->Query("INSERT INTO SearchStems (WordText)"
1116  ." VALUES ('".addslashes(strtolower($Stem))."')");
1117 
1118  # get ID for newly added stem
1119  $StemId = $this->DB->LastInsertId();
1120  }
1121 
1122  # adjust from DB ID value to stem ID value
1123  $StemId += self::STEM_ID_OFFSET;
1124 
1125  # save ID to cache
1126  $StemIdCache[$Stem] = $StemId;
1127  }
1128 
1129  # return ID to caller
1130  return $StemId;
1131  }
1132 
1138  private function GetWord($WordId)
1139  {
1140  static $WordCache;
1141 
1142  # if word was in cache
1143  if (isset($WordCache[$WordId]))
1144  {
1145  # use word from cache
1146  $Word = $WordCache[$WordId];
1147  }
1148  else
1149  {
1150  # adjust search location and word ID if word is stem
1151  $TableName = "SearchWords";
1152  if ($WordId >= self::STEM_ID_OFFSET)
1153  {
1154  $TableName = "SearchStems";
1155  $WordId -= self::STEM_ID_OFFSET;
1156  }
1157 
1158  # look up word in database
1159  $Word = $this->DB->Query("SELECT WordText"
1160  ." FROM ".$TableName
1161  ." WHERE WordId='".$WordId."'",
1162  "WordText");
1163 
1164  # save word to cache
1165  $WordCache[$WordId] = $Word;
1166  }
1167 
1168  # return word to caller
1169  return $Word;
1170  }
1171 
1172 
1173  # ---- private functions used in searching
1174 
1183  private function SearchAcrossFields($SearchStrings)
1184  {
1185  # start by assuming no search will be done
1186  $Scores = NULL;
1187 
1188  # clear word counts
1189  $this->InclusiveTermCount = 0;
1190  $this->RequiredTermCount = 0;
1191  $this->ExcludedTermCount = 0;
1192 
1193  # for each field
1194  $NeedComparisonSearch = FALSE;
1195  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1196  {
1197  # convert search string to array if needed
1198  if (!is_array($SearchStringArray))
1199  {
1200  $SearchStringArray = array($SearchStringArray);
1201  }
1202 
1203  # for each search string for this field
1204  foreach ($SearchStringArray as $SearchString)
1205  {
1206  # if field is keyword or field is text and does not look like comparison match
1207  $NotComparisonSearch = !preg_match("/^[><!]=./", $SearchString)
1208  && !preg_match("/^[><=]./", $SearchString);
1209  if (($FieldName == "XXXKeywordXXX")
1210  || (isset($this->FieldInfo[$FieldName])
1211  && ($this->FieldInfo[$FieldName]["FieldType"]
1212  == self::FIELDTYPE_TEXT)
1213  && $NotComparisonSearch))
1214  {
1215  $this->DMsg(0, "Searching text field \""
1216  .$FieldName."\" for string \"$SearchString\"");
1217 
1218  # normalize text and split into words
1219  $Words[$FieldName] =
1220  $this->ParseSearchStringForWords($SearchString);
1221 
1222  # calculate scores for matching items
1223  if (count($Words[$FieldName]))
1224  {
1225  $Scores = $this->SearchForWords(
1226  $Words[$FieldName], $FieldName, $Scores);
1227  $this->DMsg(3, "Have "
1228  .count($Scores)." results after word search");
1229  }
1230 
1231  # split into phrases
1232  $Phrases[$FieldName] =
1233  $this->ParseSearchStringForPhrases($SearchString);
1234 
1235  # handle any phrases
1236  if (count($Phrases[$FieldName]))
1237  {
1238  $Scores = $this->SearchForPhrases(
1239  $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1240  $this->DMsg(3, "Have "
1241  .count($Scores)." results after phrase search");
1242  }
1243  }
1244  else
1245  {
1246  # set flag to indicate possible comparison search candidate found
1247  $NeedComparisonSearch = TRUE;
1248  }
1249  }
1250  }
1251 
1252  # perform comparison searches
1253  if ($NeedComparisonSearch)
1254  {
1255  $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1256  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1257  }
1258 
1259  # if no results found and exclusions specified
1260  if (!count($Scores) && $this->ExcludedTermCount)
1261  {
1262  # load all records
1263  $Scores = $this->LoadScoresForAllRecords();
1264  }
1265 
1266  # if search results found
1267  if (count($Scores))
1268  {
1269  # for each search text string
1270  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1271  {
1272  # convert search string to array if needed
1273  if (!is_array($SearchStringArray))
1274  {
1275  $SearchStringArray = array($SearchStringArray);
1276  }
1277 
1278  # for each search string for this field
1279  foreach ($SearchStringArray as $SearchString)
1280  {
1281  # if field is text
1282  if (($FieldName == "XXXKeywordXXX")
1283  || (isset($this->FieldInfo[$FieldName])
1284  && ($this->FieldInfo[$FieldName]["FieldType"]
1285  == self::FIELDTYPE_TEXT)))
1286  {
1287  # if there are words in search text
1288  if (isset($Words[$FieldName]))
1289  {
1290  # handle any excluded words
1291  $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1292  }
1293 
1294  # handle any excluded phrases
1295  if (isset($Phrases[$FieldName]))
1296  {
1297  $Scores = $this->SearchForPhrases(
1298  $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1299  }
1300  }
1301  }
1302  }
1303 
1304  # strip off any results that don't contain required words
1305  $Scores = $this->FilterOnRequiredWords($Scores);
1306  }
1307 
1308  # return search result scores to caller
1309  return $Scores;
1310  }
1311 
1322  private function SearchForWords(
1323  $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
1324  {
1325  $DB = $this->DB;
1326 
1327  # start with empty search result scores list if none passed in
1328  if ($Scores == NULL)
1329  {
1330  $Scores = array();
1331  }
1332 
1333  # grab field ID
1334  $FieldId = $this->GetFieldId($FieldName);
1335 
1336  # for each word
1337  foreach ($Words as $Word => $Flags)
1338  {
1339  unset($Counts);
1340  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName);
1341 
1342  # if word is not excluded
1343  if (!($Flags & self::WORD_EXCLUDED))
1344  {
1345  # look up record ID for word
1346  $this->DMsg(2, "Looking up word \"".$Word."\"");
1347  $WordId = $this->GetWordId($Word);
1348 
1349  # if word is in DB
1350  if ($WordId !== NULL)
1351  {
1352  # look up counts for word
1353  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1354  ."WHERE WordId = ".$WordId
1355  ." AND FieldId = ".$FieldId);
1356  $Counts = $DB->FetchColumn("Count", "ItemId");
1357 
1358  # if synonym support is enabled
1359  if ($this->SynonymsEnabled)
1360  {
1361  # look for any synonyms
1362  $DB->Query("SELECT WordIdA, WordIdB"
1363  ." FROM SearchWordSynonyms"
1364  ." WHERE WordIdA = ".$WordId
1365  ." OR WordIdB = ".$WordId);
1366 
1367  # if synonyms were found
1368  if ($DB->NumRowsSelected())
1369  {
1370  # retrieve synonym IDs
1371  $SynonymIds = array();
1372  while ($Record = $DB->FetchRow())
1373  {
1374  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1375  ? $Record["WordIdB"]
1376  : $Record["WordIdA"];
1377  }
1378 
1379  # for each synonym
1380  foreach ($SynonymIds as $SynonymId)
1381  {
1382  # retrieve counts for synonym
1383  $DB->Query("SELECT ItemId,Count"
1384  ." FROM SearchWordCounts"
1385  ." WHERE WordId = ".$SynonymId
1386  ." AND FieldId = ".$FieldId);
1387  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1388 
1389  # for each count
1390  foreach ($SynonymCounts as $ItemId => $Count)
1391  {
1392  # adjust count because it's a synonym
1393  $AdjustedCount = ceil($Count / 2);
1394 
1395  # add count to existing counts
1396  if (isset($Counts[$ItemId]))
1397  {
1398  $Counts[$ItemId] += $AdjustedCount;
1399  }
1400  else
1401  {
1402  $Counts[$ItemId] = $AdjustedCount;
1403  }
1404  }
1405  }
1406  }
1407  }
1408  }
1409 
1410  # if stemming is enabled
1411  if ($this->StemmingEnabled)
1412  {
1413  # retrieve stem ID
1414  $Stem = PorterStemmer::Stem($Word);
1415  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1416  $StemId = $this->GetStemId($Stem);
1417 
1418  # if ID found for stem
1419  if ($StemId !== NULL)
1420  {
1421  # retrieve counts for stem
1422  $DB->Query("SELECT ItemId,Count"
1423  ." FROM SearchWordCounts"
1424  ." WHERE WordId = ".$StemId
1425  ." AND FieldId = ".$FieldId);
1426  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1427 
1428  # for each count
1429  foreach ($StemCounts as $ItemId => $Count)
1430  {
1431  # adjust count because it's a stem
1432  $AdjustedCount = ceil($Count / 2);
1433 
1434  # add count to existing counts
1435  if (isset($Counts[$ItemId]))
1436  {
1437  $Counts[$ItemId] += $AdjustedCount;
1438  }
1439  else
1440  {
1441  $Counts[$ItemId] = $AdjustedCount;
1442  }
1443  }
1444  }
1445  }
1446 
1447  # if counts were found
1448  if (isset($Counts))
1449  {
1450  # for each count
1451  foreach ($Counts as $ItemId => $Count)
1452  {
1453  # if word flagged as required
1454  if ($Flags & self::WORD_REQUIRED)
1455  {
1456  # increment required word count for record
1457  if (isset($this->RequiredTermCounts[$ItemId]))
1458  {
1459  $this->RequiredTermCounts[$ItemId]++;
1460  }
1461  else
1462  {
1463  $this->RequiredTermCounts[$ItemId] = 1;
1464  }
1465  }
1466 
1467  # add to item record score
1468  if (isset($Scores[$ItemId]))
1469  {
1470  $Scores[$ItemId] += $Count;
1471  }
1472  else
1473  {
1474  $Scores[$ItemId] = $Count;
1475  }
1476  }
1477  }
1478  }
1479  }
1480 
1481  # return basic scores to caller
1482  return $Scores;
1483  }
1484 
1491  private function ParseSearchStringForPhrases($SearchString)
1492  {
1493  # split into chunks delimited by double quote marks
1494  $Pieces = explode("\"", $SearchString); # "
1495 
1496  # for each pair of chunks
1497  $Index = 2;
1498  $Phrases = array();
1499  while ($Index < count($Pieces))
1500  {
1501  # grab phrase from chunk
1502  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1503  $Flags = self::WORD_PRESENT;
1504 
1505  # grab first character of phrase
1506  $FirstChar = substr($Pieces[$Index - 2], -1);
1507 
1508  # set flags to reflect any option characters
1509  if ($FirstChar == "-")
1510  {
1511  $Flags |= self::WORD_EXCLUDED;
1512  if (!isset($Phrases[$Phrase]))
1513  {
1514  $this->ExcludedTermCount++;
1515  }
1516  }
1517  else
1518  {
1519  if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~"))
1520  || ($FirstChar == "+"))
1521  {
1522  $Flags |= self::WORD_REQUIRED;
1523  if (!isset($Phrases[$Phrase]))
1524  {
1525  $this->RequiredTermCount++;
1526  }
1527  }
1528  if (!isset($Phrases[$Phrase]))
1529  {
1530  $this->InclusiveTermCount++;
1531  $this->SearchTermList[] = $Phrase;
1532  }
1533  }
1534  $Phrases[$Phrase] = $Flags;
1535 
1536  # move to next pair of chunks
1537  $Index += 2;
1538  }
1539 
1540  # return phrases to caller
1541  return $Phrases;
1542  }
1543 
1544  protected function SearchFieldForPhrases($FieldName, $Phrase)
1545  {
1546  # error out
1547  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1548  }
1549 
1550  private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1551  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1552  {
1553  # if phrases are found
1554  if (count($Phrases) > 0)
1555  {
1556  # if this is a keyword search
1557  if ($FieldName == "XXXKeywordXXX")
1558  {
1559  # for each field
1560  foreach ($this->FieldInfo as $KFieldName => $Info)
1561  {
1562  # if field is marked to be included in keyword searches
1563  if ($Info["InKeywordSearch"])
1564  {
1565  # call ourself with that field
1566  $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1567  $ProcessNonExcluded, $ProcessExcluded);
1568  }
1569  }
1570  }
1571  else
1572  {
1573  # for each phrase
1574  foreach ($Phrases as $Phrase => $Flags)
1575  {
1576  $this->DMsg(2, "Searching for phrase '".$Phrase
1577  ."' in field ".$FieldName);
1578 
1579  # if phrase flagged as excluded and we are doing excluded phrases
1580  # or phrase flagged as non-excluded and we are doing non-excluded phrases
1581  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1582  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1583  {
1584  # initialize score list if necessary
1585  if ($Scores === NULL) { $Scores = array(); }
1586 
1587  # retrieve list of items that contain phrase
1588  $ItemIds = $this->SearchFieldForPhrases(
1589  $FieldName, $Phrase);
1590 
1591  # for each item that contains phrase
1592  foreach ($ItemIds as $ItemId)
1593  {
1594  # if we are doing excluded phrases and phrase flagged as excluded
1595  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1596  {
1597  # knock item off of list
1598  unset($Scores[$ItemId]);
1599  }
1600  elseif ($ProcessNonExcluded)
1601  {
1602  # calculate phrase value based on number of words and field weight
1603  $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1604  * $this->FieldInfo[$FieldName]["Weight"];
1605  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1606 
1607  # bump up item record score
1608  if (isset($Scores[$ItemId]))
1609  {
1610  $Scores[$ItemId] += $PhraseScore;
1611  }
1612  else
1613  {
1614  $Scores[$ItemId] = $PhraseScore;
1615  }
1616 
1617  # if phrase flagged as required
1618  if ($Flags & self::WORD_REQUIRED)
1619  {
1620  # increment required word count for record
1621  if (isset($this->RequiredTermCounts[$ItemId]))
1622  {
1623  $this->RequiredTermCounts[$ItemId]++;
1624  }
1625  else
1626  {
1627  $this->RequiredTermCounts[$ItemId] = 1;
1628  }
1629  }
1630  }
1631  }
1632  }
1633  }
1634  }
1635  }
1636 
1637  # return updated scores to caller
1638  return $Scores;
1639  }
1640 
1641  private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
1642  {
1643  $DB = $this->DB;
1644 
1645  # grab field ID
1646  $FieldId = $this->GetFieldId($FieldName);
1647 
1648  # for each word
1649  foreach ($Words as $Word => $Flags)
1650  {
1651  # if word flagged as excluded
1652  if ($Flags & self::WORD_EXCLUDED)
1653  {
1654  # look up record ID for word
1655  $WordId = $this->GetWordId($Word);
1656 
1657  # if word is in DB
1658  if ($WordId !== NULL)
1659  {
1660  # look up counts for word
1661  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1662  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1663 
1664  # for each count
1665  while ($Record = $DB->FetchRow())
1666  {
1667  # if item record is in score list
1668  $ItemId = $Record["ItemId"];
1669  if (isset($Scores[$ItemId]))
1670  {
1671  # remove item record from score list
1672  $this->DMsg(3, "Filtering out item ".$ItemId
1673  ." because it contained word \"".$Word."\"");
1674  unset($Scores[$ItemId]);
1675  }
1676  }
1677  }
1678  }
1679  }
1680 
1681  # returned filtered score list to caller
1682  return $Scores;
1683  }
1684 
1685  private function FilterOnRequiredWords($Scores)
1686  {
1687  # if there were required words
1688  if ($this->RequiredTermCount > 0)
1689  {
1690  # for each item
1691  foreach ($Scores as $ItemId => $Score)
1692  {
1693  # if item does not meet required word count
1694  if (!isset($this->RequiredTermCounts[$ItemId])
1695  || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1696  {
1697  # filter out item
1698  $this->DMsg(4, "Filtering out item ".$ItemId
1699  ." because it didn't have required word count of "
1700  .$this->RequiredTermCount
1701  .(isset($this->RequiredTermCounts[$ItemId])
1702  ? " (only had "
1703  .$this->RequiredTermCounts[$ItemId]
1704  : " (had none")
1705  .")");
1706  unset($Scores[$ItemId]);
1707  }
1708  }
1709  }
1710 
1711  # return filtered list to caller
1712  return $Scores;
1713  }
1714 
1715  # count, sort, and trim search result scores list
1716  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1717  $SortByField, $SortDescending)
1718  {
1719  # perform any requested filtering
1720  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1721  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1722 
1723  # save total number of results available
1724  $this->NumberOfResultsAvailable = count($Scores);
1725 
1726  # if no sorting field specified
1727  if ($SortByField === NULL)
1728  {
1729  # sort result list by score
1730  if ($SortDescending)
1731  arsort($Scores, SORT_NUMERIC);
1732  else
1733  asort($Scores, SORT_NUMERIC);
1734  }
1735  else
1736  {
1737  # get list of item IDs in sorted order
1738  $SortedIds = $this->GetItemIdsSortedByField(
1739  $SortByField, $SortDescending);
1740 
1741  # if we have sorted item IDs
1742  if (count($SortedIds) && count($Scores))
1743  {
1744  # strip sorted ID list down to those that appear in search results
1745  $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1746 
1747  # rebuild score list in sorted order
1748  foreach ($SortedIds as $Id)
1749  {
1750  $NewScores[$Id] = $Scores[$Id];
1751  }
1752  $Scores = $NewScores;
1753  }
1754  else
1755  {
1756  # sort result list by score
1757  arsort($Scores, SORT_NUMERIC);
1758  }
1759  }
1760 
1761  # trim result list to match range requested by caller
1762  $ScoresKeys = array_slice(
1763  array_keys($Scores), $StartingResult, $NumberOfResults);
1764  $TrimmedScores = array();
1765  foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1766 
1767  # returned cleaned search result scores list to caller
1768  return $TrimmedScores;
1769  }
1770 
1771  protected function FilterOnSuppliedFunctions($Scores)
1772  {
1773  # if filter functions have been set
1774  if (isset($this->FilterFuncs))
1775  {
1776  # for each result
1777  foreach ($Scores as $ItemId => $Score)
1778  {
1779  # for each filter function
1780  foreach ($this->FilterFuncs as $FuncName)
1781  {
1782  # if filter function return TRUE for item
1783  if (call_user_func($FuncName, $ItemId))
1784  {
1785  # discard result
1786  $this->DMsg(2, "Filter callback <i>".$FuncName
1787  ."</i> rejected item ".$ItemId);
1788  unset($Scores[$ItemId]);
1789 
1790  # bail out of filter func loop
1791  continue 2;
1792  }
1793  }
1794  }
1795  }
1796 
1797  # return filtered list to caller
1798  return $Scores;
1799  }
1800 
1801  private function SearchForComparisonMatches($SearchStrings, $Scores)
1802  {
1803  # for each field
1804  $Index = 0;
1805  foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1806  {
1807  # if field is not keyword
1808  if ($SearchFieldName != "XXXKeywordXXX")
1809  {
1810  # convert search string to array if needed
1811  if (!is_array($SearchStringArray))
1812  {
1813  $SearchStringArray = array($SearchStringArray);
1814  }
1815 
1816  # for each search string for this field
1817  foreach ($SearchStringArray as $SearchString)
1818  {
1819  # if search string looks like comparison search
1820  $FoundOperator = preg_match("/^[><!]=./", $SearchString)
1821  || preg_match("/^[><=]./", $SearchString);
1822  if ($FoundOperator
1823  || (isset($this->FieldInfo[$SearchFieldName]["FieldType"])
1824  && ($this->FieldInfo[$SearchFieldName]["FieldType"]
1825  != self::FIELDTYPE_TEXT)))
1826  {
1827  # determine value
1828  $Patterns = array("/^[><!]=/", "/^[><=]/");
1829  $Replacements = array("", "");
1830  $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1831 
1832  # determine and save operator
1833  if (!$FoundOperator)
1834  {
1835  $Operators[$Index] = "=";
1836  }
1837  else
1838  {
1839  $Term = trim($SearchString);
1840  $FirstChar = $Term{0};
1841  $FirstTwoChars = $FirstChar.$Term{1};
1842  if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; }
1843  elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; }
1844  elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; }
1845  elseif ($FirstChar == ">") { $Operators[$Index] = ">"; }
1846  elseif ($FirstChar == "<") { $Operators[$Index] = "<"; }
1847  elseif ($FirstChar == "=") { $Operators[$Index] = "="; }
1848  }
1849 
1850  # if operator was found
1851  if (isset($Operators[$Index]))
1852  {
1853  # save value
1854  $Values[$Index] = $Value;
1855 
1856  # save field name
1857  $FieldNames[$Index] = $SearchFieldName;
1858  $this->DMsg(3, "Added comparison (field = <i>"
1859  .$FieldNames[$Index]."</i> op = <i>"
1860  .$Operators[$Index]."</i> val = <i>"
1861  .$Values[$Index]."</i>)");
1862 
1863  # move to next comparison array entry
1864  $Index++;
1865  }
1866  }
1867  }
1868  }
1869  }
1870 
1871  # if comparisons found
1872  if (isset($Operators))
1873  {
1874  # perform comparisons on fields and gather results
1875  $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1876 
1877  # if search logic is set to AND
1878  if ($this->DefaultSearchLogic == self::LOGIC_AND)
1879  {
1880  # if results were found
1881  if (count($Results))
1882  {
1883  # if there were no prior results and no terms for keyword search
1884  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1885  {
1886  # add all results to scores
1887  foreach ($Results as $ItemId)
1888  {
1889  $Scores[$ItemId] = 1;
1890  }
1891  }
1892  else
1893  {
1894  # remove anything from scores that is not part of results
1895  foreach ($Scores as $ItemId => $Score)
1896  {
1897  if (in_array($ItemId, $Results) == FALSE)
1898  {
1899  unset($Scores[$ItemId]);
1900  }
1901  }
1902  }
1903  }
1904  else
1905  {
1906  # clear scores
1907  $Scores = array();
1908  }
1909  }
1910  else
1911  {
1912  # add result items to scores
1913  if ($Scores === NULL) { $Scores = array(); }
1914  foreach ($Results as $ItemId)
1915  {
1916  if (isset($Scores[$ItemId]))
1917  {
1918  $Scores[$ItemId] += 1;
1919  }
1920  else
1921  {
1922  $Scores[$ItemId] = 1;
1923  }
1924  }
1925  }
1926  }
1927 
1928  # return results to caller
1929  return $Scores;
1930  }
1931 
1932  private function SetDebugLevel($SearchStrings)
1933  {
1934  # if search info is an array
1935  if (is_array($SearchStrings))
1936  {
1937  # for each array element
1938  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1939  {
1940  # if element is an array
1941  if (is_array($SearchStringArray))
1942  {
1943  # for each array element
1944  foreach ($SearchStringArray as $Index => $SearchString)
1945  {
1946  # pull out search string if present
1947  $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1948  }
1949  }
1950  else
1951  {
1952  # pull out search string if present
1953  $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1954  }
1955  }
1956  }
1957  else
1958  {
1959  # pull out search string if present
1960  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1961  }
1962 
1963  # return new search info to caller
1964  return $SearchStrings;
1965  }
1966 
1967  private function ExtractDebugLevel($SearchString)
1968  {
1969  # if search string contains debug level indicator
1970  if (strstr($SearchString, "DBUGLVL="))
1971  {
1972  # remove indicator and set debug level
1973  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1974  if ($Level > 0)
1975  {
1976  $this->DebugLevel = $Level;
1977  $this->DMsg(0, "Setting debug level to ".$Level);
1978  $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
1979  }
1980  }
1981 
1982  # return (possibly) modified search string to caller
1983  return $SearchString;
1984  }
1985 
1986  # load and return search result scores array containing all possible records
1987  private function LoadScoresForAllRecords()
1988  {
1989  # start with empty list
1990  $Scores = array();
1991 
1992  # for every item
1993  $this->DB->Query("SELECT ".$this->ItemIdFieldName
1994  ." FROM ".$this->ItemTableName);
1995  while ($Record = $this->DB->FetchRow())
1996  {
1997  # set score for item to 1
1998  $Scores[$Record[$this->ItemIdFieldName]] = 1;
1999  }
2000 
2001  # return array with all scores to caller
2002  return $Scores;
2003  }
2004 
2005 
2006  # ---- private functions used in building search database
2007 
2015  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
2016  {
2017  # retrieve ID for word
2018  $WordIds[] = $this->GetWordId($Word, TRUE);
2019 
2020  # if stemming is enabled
2021  if ($this->StemmingEnabled)
2022  {
2023  # retrieve ID for stem of word
2024  $Stem = PorterStemmer::Stem($Word, TRUE);
2025  $WordIds[] = $this->GetStemId($Stem, TRUE);
2026  }
2027 
2028  # for word and stem of word
2029  foreach ($WordIds as $WordId)
2030  {
2031  # if word count already added to database
2032  if (isset($this->WordCountAdded[$WordId][$FieldId]))
2033  {
2034  # update word count
2035  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
2036  ." WHERE WordId=".$WordId
2037  ." AND ItemId=".$ItemId
2038  ." AND FieldId=".$FieldId);
2039  }
2040  else
2041  {
2042  # add word count to DB
2043  $this->DB->Query("INSERT INTO SearchWordCounts"
2044  ." (WordId, ItemId, FieldId, Count) VALUES"
2045  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
2046 
2047  # remember that we added count for this word
2048  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
2049  }
2050 
2051  # decrease weight for stem
2052  $Weight = ceil($Weight / 2);
2053  }
2054  }
2055 
2056  protected function GetFieldContent($ItemId, $FieldName)
2057  {
2058  # error out
2059  exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
2060  }
2061 
2062  private function RecordSearchInfoForText(
2063  $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
2064  {
2065  # normalize text
2066  $Words = $this->ParseSearchStringForWords($Text, TRUE);
2067 
2068  # if there was text left after parsing
2069  if (count($Words) > 0)
2070  {
2071  # get ID for field
2072  $FieldId = $this->GetFieldId($FieldName);
2073 
2074  # if text should be included in keyword searches
2075  if ($IncludeInKeyword)
2076  {
2077  # get ID for keyword field
2078  $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
2079  }
2080 
2081  # for each word
2082  foreach ($Words as $Word => $Flags)
2083  {
2084  # update count for word
2085  $this->UpdateWordCount($Word, $ItemId, $FieldId);
2086 
2087  # if text should be included in keyword searches
2088  if ($IncludeInKeyword)
2089  {
2090  # update keyword field count for word
2091  $this->UpdateWordCount(
2092  $Word, $ItemId, $KeywordFieldId, $Weight);
2093  }
2094  }
2095  }
2096  }
2097 
2098  # print debug message if level set high enough
2099  protected function DMsg($Level, $Msg)
2100  {
2101  if ($this->DebugLevel > $Level)
2102  {
2103  print("SE: ".$Msg."<br>\n");
2104  }
2105  }
2106 
2107  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2108 
2109  # possible types of logical operators
2110  const SEARCHLOGIC_AND = 1;
2111  const SEARCHLOGIC_OR = 2;
2112 }
2113 
2114 ?>
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
DropField($FieldName)
Drop all data pertaining to field from search database.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
NumberOfResults()
Get number of results found by most recent search.
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
SQL database abstraction object with smart query caching.
AddField($FieldName, $FieldType, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
Search($SearchString, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform keyword search.
GetAllSynonyms()
Get all synonyms.
SearchTermsRequiredByDefault($NewSetting=TRUE)
Set default search logic.
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
SearchEngine($ItemTableName, $ItemIdFieldName)
Object constructor.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
const FIELDTYPE_DATE
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
GroupedSearch($SearchGroups, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search with logical groups of fielded searches.
PHP
Definition: OAIClient.php:39
ItemCount()
Get total number of items indexed by search engine.
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
const STEM_ID_OFFSET
FieldWeight($FieldName)
Get search weight for specified field.
FieldInKeywordSearch($FieldName)
Get whether specified field is included in keyword searches.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
const WORD_PRESENT
Core metadata archive search engine class.
FieldedSearchWeightScale($SearchStrings)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
DefaultSearchLogic($NewSetting=NULL)
Get/set default search logic (LOGIC_AND or LOGIC_OR).
FieldType($FieldName)
Get type of specified field (text/numeric/date/daterange).
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
GetFieldContent($ItemId, $FieldName)
UpdateForItem($ItemId)
Update search database for the specified item.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.