CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 
3 #
4 # FILE: SearchEngine.php
5 #
6 # Open Source Metadata Archive Search Engine (OSMASE)
7 # Copyright 2002-2011 Edward Almasy and Internet Scout
8 # http://scout.wisc.edu
9 #
10 
11 class SearchEngine {
12 
13  # ---- PUBLIC INTERFACE --------------------------------------------------
14 
15  # possible types of logical operators
16  const LOGIC_AND = 1;
17  const LOGIC_OR = 2;
18 
19  # flags used for indicating field types
20  const FIELDTYPE_TEXT = 1;
21  const FIELDTYPE_NUMERIC = 2;
22  const FIELDTYPE_DATE = 3;
24 
25  # object constructor
26  function SearchEngine(
27  &$DB,
33  {
34  # save database object for our use
35  $this->DB = $DB;
36 
37  # save item access parameters
40  $this->ReferenceTableName = $ReferenceTableName;
41  $this->ReferenceSourceIdFieldName = $ReferenceSourceIdFieldName;
42  $this->ReferenceDestinationIdFieldName = $ReferenceDestinationIdFieldName;
43 
44  # define flags used for indicating word states
45  if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); }
46  if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); }
47  if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); }
48 
49  # set default debug state
50  $this->DebugLevel = 0;
51  }
52 
53  # add field to be searched
54  function AddField(
55  $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
56  {
57  # save values
58  $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName;
59  $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
60  $this->FieldInfo[$FieldName]["Weight"] = $Weight;
61  $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch;
62  }
63 
64  # retrieve info about tables and fields (useful for child objects)
65  function ItemTableName() { return $this->ItemTableName; }
66  function ItemIdFieldName() { return $this->ItemIdFieldName; }
67  function DBFieldName($FieldName)
68  { return $this->FieldInfo[$FieldName]["DBFieldName"]; }
69  function FieldType($FieldName)
70  { return $this->FieldInfo[$FieldName]["FieldType"]; }
71  function FieldWeight($FieldName)
72  { return $this->FieldInfo[$FieldName]["Weight"]; }
73  function FieldInKeywordSearch($FieldName)
74  { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; }
75 
76  # set debug level
77  function DebugLevel($Setting)
78  {
79  $this->DebugLevel = $Setting;
80  }
81 
82 
83  # ---- search functions
84 
85  # perform keyword search
86  function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
87  $SortByField = NULL, $SortDescending = TRUE)
88  {
89  $SearchString = $this->SetDebugLevel($SearchString);
90  $this->DMsg(0, "In Search() with search string \"".$SearchString."\"");
91 
92  # save start time to use in calculating search time
93  $StartTime = microtime(TRUE);
94 
95  # clear word counts
96  $this->InclusiveTermCount = 0;
97  $this->RequiredTermCount = 0;
98  $this->ExcludedTermCount = 0;
99 
100  # parse search string into terms
101  $Words = $this->ParseSearchStringForWords($SearchString);
102  $this->DMsg(1, "Found ".count($Words)." words");
103 
104  # parse search string for phrases
105  $Phrases = $this->ParseSearchStringForPhrases($SearchString);
106  $this->DMsg(1, "Found ".count($Phrases)." phrases");
107 
108  # if only excluded terms specified
109  if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
110  {
111  # load all records
112  $this->DMsg(1, "Loading all records");
113  $Scores = $this->LoadScoresForAllRecords();
114  }
115  else
116  {
117  # perform searches
118  $Scores = $this->SearchForWords($Words);
119  $this->DMsg(1, "Found ".count($Scores)." results after word search");
120  $Scores = $this->SearchForPhrases($Phrases, $Scores);
121  $this->DMsg(1, "Found ".count($Scores)." results after phrase search");
122  }
123 
124  # if search results found
125  if (count($Scores) > 0)
126  {
127  # handle any excluded words
128  $Scores = $this->FilterOnExcludedWords($Words, $Scores);
129 
130  # strip off any results that don't contain required words
131  $Scores = $this->FilterOnRequiredWords($Scores);
132  }
133 
134  # count, sort, and trim search result scores list
135  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
136  $SortByField, $SortDescending);
137 
138  # record search time
139  $this->LastSearchTime = microtime(TRUE) - $StartTime;
140 
141  # return list of items to caller
142  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
143  return $Scores;
144  }
145 
146  # perform search across multiple fields and return trimmed results to caller
147  function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
148  $SortByField = NULL, $SortDescending = TRUE)
149  {
150  $SearchStrings = $this->SetDebugLevel($SearchStrings);
151  $this->DMsg(0, "In FieldedSearch() with "
152  .count($SearchStrings)." search strings");
153 
154  # save start time to use in calculating search time
155  $StartTime = microtime(TRUE);
156 
157  # perform search
158  $Scores = $this->SearchAcrossFields($SearchStrings);
159  $Scores = ($Scores === NULL) ? array() : $Scores;
160 
161  # count, sort, and trim search result scores list
162  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
163  $SortByField, $SortDescending);
164 
165  # record search time
166  $this->LastSearchTime = microtime(TRUE) - $StartTime;
167 
168  # return list of items to caller
169  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
170  return $Scores;
171  }
172 
173  # perform search with logical groups of fielded searches
174  function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
175  $SortByField = NULL, $SortDescending = TRUE)
176  {
177  foreach ($SearchGroups as $Index => $Groups)
178  {
179  if (isset($SearchGroups[$Index]["SearchStrings"]))
180  {
181  $SearchGroups[$Index]["SearchStrings"] =
182  $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
183  }
184  }
185  $this->DMsg(0, "In GroupedSearch() with "
186  .count($SearchGroups)." search groups");
187 
188  # save start time to use in calculating search time
189  $StartTime = microtime(TRUE);
190 
191  # start with no results
192  $Scores = array();
193 
194  # save AND/OR search setting
195  $SavedSearchLogic = $this->DefaultSearchLogic;
196 
197  # for each search group
198  $FirstSearch = TRUE;
199  foreach ($SearchGroups as $Group)
200  {
201  $this->DMsg(0, "----- GROUP ---------------------------");
202 
203  # if group has AND/OR setting specified
204  if (isset($Group["Logic"]))
205  {
206  # use specified AND/OR setting
207  $this->DefaultSearchLogic = $Group["Logic"];
208  }
209  else
210  {
211  # use saved AND/OR setting
212  $this->DefaultSearchLogic = $SavedSearchLogic;
213  }
214  $this->DMsg(2, "Logic is "
215  .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR"));
216 
217  # if we have search strings for this group
218  if (isset($Group["SearchStrings"]))
219  {
220  # perform search
221  $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
222 
223  # if search was conducted
224  if ($GroupScores !== NULL)
225  {
226  # if saved AND/OR setting is OR or this is first search
227  if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
228  {
229  # add search results to result list
230  foreach ($GroupScores as $ItemId => $Score)
231  {
232  if (isset($Scores[$ItemId]))
233  {
234  $Scores[$ItemId] += $Score;
235  }
236  else
237  {
238  $Scores[$ItemId] = $Score;
239  }
240  }
241 
242  # (reset flag indicating first search)
243  $FirstSearch = FALSE;
244  }
245  else
246  {
247  # AND search results with previous results
248  $OldScores = $Scores;
249  $Scores = array();
250  foreach ($GroupScores as $ItemId => $Score)
251  {
252  if (isset($OldScores[$ItemId]))
253  {
254  $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
255  }
256  }
257  }
258  }
259  }
260  }
261 
262  # restore AND/OR search setting
263  $this->DefaultSearchLogic = $SavedSearchLogic;
264 
265  # count, sort, and trim search result scores list
266  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
267  $SortByField, $SortDescending);
268 
269  # record search time
270  $this->LastSearchTime = microtime(TRUE) - $StartTime;
271 
272  # return search results to caller
273  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
274  return $Scores;
275  }
276 
277  # add function that will be called to filter search results
278  function AddResultFilterFunction($FunctionName)
279  {
280  # save filter function name
281  $this->FilterFuncs[] = $FunctionName;
282  }
283 
284  # get or set default search logic (AND or OR)
285  function DefaultSearchLogic($NewSetting = NULL)
286  {
287  if ($NewSetting != NULL)
288  {
289  $this->DefaultSearchLogic = $NewSetting;
290  }
292  }
293 
294  function SearchTermsRequiredByDefault($NewSetting = TRUE)
295  {
296  if ($NewSetting)
297  {
298  $this->DefaultSearchLogic = self::LOGIC_AND;
299  }
300  else
301  {
302  $this->DefaultSearchLogic = self::LOGIC_OR;
303  }
304  }
305 
306  function NumberOfResults()
307  {
309  }
310 
311  function SearchTerms()
312  {
313  return $this->SearchTermList;
314  }
315 
316  function SearchTime()
317  {
318  return $this->LastSearchTime;
319  }
320 
321  # report total weight for all fields involved in search
322  function FieldedSearchWeightScale($SearchStrings)
323  {
324  $Weight = 0;
325  $IncludedKeywordSearch = FALSE;
326  foreach ($SearchStrings as $FieldName => $SearchStringArray)
327  {
328  if ($FieldName == "XXXKeywordXXX")
329  {
330  $IncludedKeywordSearch = TRUE;
331  }
332  else
333  {
334  if (array_key_exists($FieldName, $this->FieldInfo))
335  {
336  $Weight += $this->FieldInfo[$FieldName]["Weight"];
337  }
338  }
339  }
340  if ($IncludedKeywordSearch)
341  {
342  foreach ($this->FieldInfo as $FieldName => $Info)
343  {
344  if ($Info["InKeywordSearch"])
345  {
346  $Weight += $Info["Weight"];
347  }
348  }
349  }
350  return $Weight;
351  }
352 
353 
354  # ---- search database update functions
355 
356  # update search DB for the specified item
357  function UpdateForItem($ItemId)
358  {
359  # bail out if item ID is negative (indicating a temporary record)
360  if ($ItemId < 0) { return; }
361 
362  # clear word count added flags for this item
363  unset($this->WordCountAdded);
364 
365  # delete any existing info for this item
366  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
367 
368  # for each metadata field
369  foreach ($this->FieldInfo as $FieldName => $Info)
370  {
371  # if search weight for field is positive
372  if ($Info["Weight"] > 0)
373  {
374  # retrieve text for field
375  $Text = $this->GetFieldContent($ItemId, $FieldName);
376 
377  # if text is array
378  if (is_array($Text))
379  {
380  # for each text string in array
381  foreach ($Text as $String)
382  {
383  # record search info for text
384  $this->RecordSearchInfoForText($ItemId, $FieldName,
385  $Info["Weight"], $String,
386  $Info["InKeywordSearch"]);
387  }
388  }
389  else
390  {
391  # record search info for text
392  $this->RecordSearchInfoForText($ItemId, $FieldName,
393  $Info["Weight"], $Text,
394  $Info["InKeywordSearch"]);
395  }
396  }
397  }
398  }
399 
400  # update search DB for the specified range of items
401  function UpdateForItems($StartingItemId, $NumberOfItems)
402  {
403  # retrieve IDs for specified number of items starting at specified ID
404  $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
405  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
406  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
407  $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
408 
409  # for each retrieved item ID
410  foreach ($ItemIds as $ItemId)
411  {
412  # update search info for item
413  $this->UpdateForItem($ItemId);
414  }
415 
416  # return ID of last item updated to caller
417  return $ItemId;
418  }
419 
420  # drop all data pertaining to item from search DB
421  function DropItem($ItemId)
422  {
423  # drop all entries pertaining to item from word count table
424  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
425  }
426 
427  # drop all data pertaining to field from search DB
428  function DropField($FieldName)
429  {
430  # retrieve our ID for field
431  $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
432  ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
433 
434  # drop all entries pertaining to field from word counts table
435  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
436 
437  # drop field from our fields table
438  $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
439  }
440 
441  # return total number of terms indexed by search engine
442  function SearchTermCount()
443  {
444  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
445  ." FROM SearchWords", "TermCount");
446  }
447 
448  # return total number of items indexed by search engine
449  function ItemCount()
450  {
451  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
452  ." FROM SearchWordCounts", "ItemCount");
453  }
454 
461  function AddSynonyms($Word, $Synonyms)
462  {
463  # asssume no synonyms will be added
464  $AddCount = 0;
465 
466  # get ID for word
467  $WordId = $this->GetWordId($Word, TRUE);
468 
469  # for each synonym passed in
470  foreach ($Synonyms as $Synonym)
471  {
472  # get ID for synonym
473  $SynonymId = $this->GetWordId($Synonym, TRUE);
474 
475  # if synonym is not already in database
476  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
477  ." WHERE (WordIdA = ".$WordId
478  ." AND WordIdB = ".$SynonymId.")"
479  ." OR (WordIdB = ".$WordId
480  ." AND WordIdA = ".$SynonymId.")");
481  if ($this->DB->NumRowsSelected() == 0)
482  {
483  # add synonym entry to database
484  $this->DB->Query("INSERT INTO SearchWordSynonyms"
485  ." (WordIdA, WordIdB)"
486  ." VALUES (".$WordId.", ".$SynonymId.")");
487  $AddCount++;
488  }
489  }
490 
491  # report to caller number of new synonyms added
492  return $AddCount;
493  }
494 
495  # remove synonym(s)
496  function RemoveSynonyms($Word, $Synonyms = NULL)
497  {
498  # find ID for word
499  $WordId = $this->GetWordId($Word);
500 
501  # if ID found
502  if ($WordId !== NULL)
503  {
504  # if no specific synonyms provided
505  if ($Synonyms === NULL)
506  {
507  # remove all synonyms for word
508  $this->DB->Query("DELETE FROM SearchWordSynonyms"
509  ." WHERE WordIdA = '".$WordId."'"
510  ." OR WordIdB = '".$WordId."'");
511  }
512  else
513  {
514  # for each specified synonym
515  foreach ($Synonyms as $Synonym)
516  {
517  # look up ID for synonym
518  $SynonymId = $this->GetWordId($Synonym);
519 
520  # if synonym ID was found
521  if ($SynonymId !== NULL)
522  {
523  # delete synonym entry
524  $this->DB->Query("DELETE FROM SearchWordSynonyms"
525  ." WHERE (WordIdA = '".$WordId."'"
526  ." AND WordIdB = '".$SynonymId."')"
527  ." OR (WordIdB = '".$WordId."'"
528  ." AND WordIdA = '".$SynonymId."')");
529  }
530  }
531  }
532  }
533  }
534 
535  # remove all synonyms
536  function RemoveAllSynonyms()
537  {
538  $this->DB->Query("DELETE FROM SearchWordSynonyms");
539  }
540 
541  # get synonyms for word (returns array of synonyms)
542  function GetSynonyms($Word)
543  {
544  # assume no synonyms will be found
545  $Synonyms = array();
546 
547  # look up ID for word
548  $WordId = $this->GetWordId($Word);
549 
550  # if word ID was found
551  if ($WordId !== NULL)
552  {
553  # look up IDs of all synonyms for this word
554  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
555  ." WHERE WordIdA = ".$WordId
556  ." OR WordIdB = ".$WordId);
557  $SynonymIds = array();
558  while ($Record = $this->DB->FetchRow)
559  {
560  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
561  ? $Record["WordIdB"] : $Record["WordIdA"];
562  }
563 
564  # for each synonym ID
565  foreach ($SynonymIds as $SynonymId)
566  {
567  # look up synonym word and add to synonym list
568  $Synonyms[] = $this->GetWord($SynonymId);
569  }
570  }
571 
572  # return synonyms to caller
573  return $Synonyms;
574  }
575 
576  # get all synonyms (returns 2D array w/ words as first index)
577  function GetAllSynonyms()
578  {
579  # assume no synonyms will be found
580  $SynonymList = array();
581 
582  # for each synonym ID pair
583  $OurDB = new Database();
584  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
585  while ($Record = $OurDB->FetchRow())
586  {
587  # look up words
588  $Word = $this->GetWord($Record["WordIdA"]);
589  $Synonym = $this->GetWord($Record["WordIdB"]);
590 
591  # if we do not already have an entry for the word
592  # or synonym is not listed for this word
593  if (!isset($SynonymList[$Word])
594  || !in_array($Synonym, $SynonymList[$Word]))
595  {
596  # add entry for synonym
597  $SynonymList[$Word][] = $Synonym;
598  }
599 
600  # if we do not already have an entry for the synonym
601  # or word is not listed for this synonym
602  if (!isset($SynonymList[$Synonym])
603  || !in_array($Word, $SynonymList[$Synonym]))
604  {
605  # add entry for word
606  $SynonymList[$Synonym][] = $Word;
607  }
608  }
609 
610  # for each word
611  # (this loop removes reciprocal duplicates)
612  foreach ($SynonymList as $Word => $Synonyms)
613  {
614  # for each synonym for that word
615  foreach ($Synonyms as $Synonym)
616  {
617  # if synonym has synonyms and word is one of them
618  if (isset($SynonymList[$Synonym])
619  && isset($SynonymList[$Word])
620  && in_array($Word, $SynonymList[$Synonym])
621  && in_array($Synonym, $SynonymList[$Word]))
622  {
623  # if word has less synonyms than synonym
624  if (count($SynonymList[$Word])
625  < count($SynonymList[$Synonym]))
626  {
627  # remove synonym from synonym list for word
628  $SynonymList[$Word] = array_diff(
629  $SynonymList[$Word], array($Synonym));
630 
631  # if no synonyms left for word
632  if (!count($SynonymList[$Word]))
633  {
634  # remove empty synonym list for word
635  unset($SynonymList[$Word]);
636  }
637  }
638  else
639  {
640  # remove word from synonym list for synonym
641  $SynonymList[$Synonym] = array_diff(
642  $SynonymList[$Synonym], array($Word));
643 
644  # if no synonyms left for word
645  if (!count($SynonymList[$Synonym]))
646  {
647  # remove empty synonym list for word
648  unset($SynonymList[$Synonym]);
649  }
650  }
651  }
652  }
653  }
654 
655  # sort array alphabetically (just for convenience)
656  foreach ($SynonymList as $Word => $Synonyms)
657  {
658  asort($SynonymList[$Word]);
659  }
660  ksort($SynonymList);
661 
662  # return 2D array of synonyms to caller
663  return $SynonymList;
664  }
665 
666  # set all synonyms (accepts 2D array w/ words as first index)
667  function SetAllSynonyms($SynonymList)
668  {
669  # remove all existing synonyms
670  $this->RemoveAllSynonyms();
671 
672  # for each synonym entry passed in
673  foreach ($SynonymList as $Word => $Synonyms)
674  {
675  # add synonyms for word
676  $this->AddSynonyms($Word, $Synonyms);
677  }
678  }
679 
688  function LoadSynonymsFromFile($FileName)
689  {
690  # asssume no synonyms will be added
691  $AddCount = 0;
692 
693  # read in contents of file
694  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
695 
696  # if file contained lines
697  if (count($Lines))
698  {
699  # for each line of file
700  foreach ($Lines as $Line)
701  {
702  # if line is not a comment
703  if (!preg_match("/[\s]*#/", $Line))
704  {
705  # split line into words
706  $Words = preg_split("/[\s,]+/", $Line);
707 
708  # if synonyms found
709  if (count($Words) > 1)
710  {
711  # separate out word and synonyms
712  $Word = array_shift($Words);
713 
714  # add synonyms
715  $AddCount += $this->AddSynonyms($Word, $Words);
716  }
717  }
718  }
719  }
720 
721  # return count of synonyms added to caller
722  return $AddCount;
723  }
724 
725  # suggest alternatives
726  function SuggestAlternateSearches($SearchString)
727  {
728  #
729  }
730 
731 
732  # ---- PRIVATE INTERFACE -------------------------------------------------
733 
734  protected $DB;
735  protected $DebugLevel;
736  protected $ItemTableName;
737  protected $ItemIdFieldName;
742  protected $LastSearchTime;
743  protected $FilterFuncs;
744  protected $DefaultSearchLogic = self::LOGIC_AND;
745  protected $StemmingEnabled = TRUE;
746  protected $SynonymsEnabled = TRUE;
747 
748  private $WordCountAdded;
749  private $FieldIds;
750  private $FieldInfo;
751  private $RequiredTermCount;
752  private $RequiredTermCounts;
753  private $InclusiveTermCount;
754  private $ExcludedTermCount;
755  private $SearchTermList;
756 
757  const STEM_ID_OFFSET = 1000000;
758 
759 
760  # ---- common private functions (used in both searching and DB build)
761 
762  # normalize and parse search string into list of search terms
763  private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
764  {
765  # strip off any surrounding whitespace
766  $Text = trim($SearchString);
767 
768  # set up normalization replacement strings
769  $Patterns = array(
770  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
771  "/'/", # get rid of single quotes / apostrophes
772  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) "
773  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
774  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space
775  "/([^\\s])-+/i", # convert minus preceded by anything but whitespace to a space
776  "/([^\\s])\\++/i", # convert plus preceded by anything but whitespace to a space
777  "/-\\s/i", # convert minus followed by whitespace to a space
778  "/\\+\\s/i", # convert plus followed by whitespace to a space
779  "/~\\s/i", # convert tilde followed by whitespace to a space
780  "/[ ]+/" # convert multiple spaces to one space
781  );
782  $Replacements = array(
783  " ",
784  "",
785  " ",
786  " ",
787  "\\1 ",
788  "\\1 ",
789  " ",
790  " ",
791  " ",
792  " ",
793  " "
794  );
795 
796  # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
797  if ($IgnorePhrases)
798  {
799  # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
800  $Patterns[2] = "/\"/";
801 
802  # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
803  $Patterns[3] = "/[\(\)]+/";
804  }
805 
806  # remove punctuation from text and normalize whitespace
807  $Text = preg_replace($Patterns, $Replacements, $Text);
808  $this->DMsg(2, "Normalized search string is '".$Text."'");
809 
810  # convert text to lower case
811  $Text = strtolower($Text);
812 
813  # strip off any extraneous whitespace
814  $Text = trim($Text);
815 
816  # start with an empty array
817  $Words = array();
818 
819  # if we have no words left after parsing
820  if (strlen($Text) != 0)
821  {
822  # for each word
823  foreach (explode(" ", $Text) as $Word)
824  {
825  # grab first character of word
826  $FirstChar = substr($Word, 0, 1);
827 
828  # strip off option characters and set flags appropriately
829  $Flags = WORD_PRESENT;
830  if ($FirstChar == "-")
831  {
832  $Word = substr($Word, 1);
833  $Flags |= WORD_EXCLUDED;
834  if (!isset($Words[$Word]))
835  {
836  $this->ExcludedTermCount++;
837  }
838  }
839  else
840  {
841  if ($FirstChar == "~")
842  {
843  $Word = substr($Word, 1);
844  }
845  elseif (($this->DefaultSearchLogic == self::LOGIC_AND)
846  || ($FirstChar == "+"))
847  {
848  if ($FirstChar == "+")
849  {
850  $Word = substr($Word, 1);
851  }
852  $Flags |= WORD_REQUIRED;
853  if (!isset($Words[$Word]))
854  {
855  $this->RequiredTermCount++;
856  }
857  }
858  if (!isset($Words[$Word]))
859  {
860  $this->InclusiveTermCount++;
861  $this->SearchTermList[] = $Word;
862  }
863  }
864 
865  # store flags to indicate word found
866  $Words[$Word] = $Flags;
867  $this->DMsg(3, "Word identified (".$Word.")");
868  }
869  }
870 
871  # return normalized words to caller
872  return $Words;
873  }
874 
875  protected function GetFieldId($FieldName)
876  {
877  # if field ID is not in cache
878  if (!isset($this->FieldIds[$FieldName]))
879  {
880  # look up field info in database
881  $this->DB->Query("SELECT FieldId FROM SearchFields "
882  ."WHERE FieldName = '".addslashes($FieldName)."'");
883 
884  # if field was found
885  if ($Record = $this->DB->FetchRow())
886  {
887  # load info from DB record
888  $FieldId = $Record["FieldId"];
889  }
890  else
891  {
892  # add field to database
893  $this->DB->Query("INSERT INTO SearchFields (FieldName) "
894  ."VALUES ('".addslashes($FieldName)."')");
895 
896  # retrieve ID for newly added field
897  $FieldId = $this->DB->LastInsertId();
898  }
899 
900  # cache field info
901  $this->FieldIds[$FieldName] = $FieldId;
902  }
903 
904  # return cached ID to caller
905  return $this->FieldIds[$FieldName];
906  }
907 
908  # retrieve ID for specified word (returns NULL if no ID found)
909  private function GetWordId($Word, $AddIfNotFound = FALSE)
910  {
911  static $WordIdCache;
912 
913  # if word was in ID cache
914  if (isset($WordIdCache[$Word]))
915  {
916  # use ID from cache
917  $WordId = $WordIdCache[$Word];
918  }
919  else
920  {
921  # look up ID in database
922  $WordId = $this->DB->Query("SELECT WordId"
923  ." FROM SearchWords"
924  ." WHERE WordText='".addslashes($Word)."'",
925  "WordId");
926 
927  # if ID was not found and caller requested it be added
928  if (($WordId === NULL) && $AddIfNotFound)
929  {
930  # add word to database
931  $this->DB->Query("INSERT INTO SearchWords (WordText)"
932  ." VALUES ('".addslashes(strtolower($Word))."')");
933 
934  # get ID for newly added word
935  $WordId = $this->DB->LastInsertId();
936  }
937 
938  # save ID to cache
939  $WordIdCache[$Word] = $WordId;
940  }
941 
942  # return ID to caller
943  return $WordId;
944  }
945 
946  # retrieve ID for specified word stem (returns NULL if no ID found)
947  private function GetStemId($Stem, $AddIfNotFound = FALSE)
948  {
949  static $StemIdCache;
950 
951  # if stem was in ID cache
952  if (isset($StemIdCache[$Stem]))
953  {
954  # use ID from cache
955  $StemId = $StemIdCache[$Stem];
956  }
957  else
958  {
959  # look up ID in database
960  $StemId = $this->DB->Query("SELECT WordId"
961  ." FROM SearchStems"
962  ." WHERE WordText='".addslashes($Stem)."'",
963  "WordId");
964 
965  # if ID was not found and caller requested it be added
966  if (($StemId === NULL) && $AddIfNotFound)
967  {
968  # add stem to database
969  $this->DB->Query("INSERT INTO SearchStems (WordText)"
970  ." VALUES ('".addslashes(strtolower($Stem))."')");
971 
972  # get ID for newly added stem
973  $StemId = $this->DB->LastInsertId();
974  }
975 
976  # adjust from DB ID value to stem ID value
977  $StemId += self::STEM_ID_OFFSET;
978 
979  # save ID to cache
980  $StemIdCache[$Stem] = $StemId;
981  }
982 
983  # return ID to caller
984  return $StemId;
985  }
986 
987  # retrieve word for specified word ID (returns FALSE if no word found)
988  private function GetWord($WordId)
989  {
990  static $WordCache;
991 
992  # if word was in cache
993  if (isset($WordCache[$WordId]))
994  {
995  # use word from cache
996  $Word = $WordCache[$WordId];
997  }
998  else
999  {
1000  # adjust search location and word ID if word is stem
1001  $TableName = "SearchWords";
1002  if ($WordId >= self::STEM_ID_OFFSET)
1003  {
1004  $TableName = "SearchStems";
1005  $WordId -= self::STEM_ID_OFFSET;
1006  }
1007 
1008  # look up word in database
1009  $Word = $this->DB->Query("SELECT WordText"
1010  ." FROM ".$TableName
1011  ." WHERE WordId='".$WordId."'",
1012  "WordText");
1013 
1014  # save word to cache
1015  $WordCache[$WordId] = $Word;
1016  }
1017 
1018  # return word to caller
1019  return $Word;
1020  }
1021 
1022 
1023  # ---- private functions used in searching
1024 
1025  # perform search across multiple fields and return raw results to caller
1026  private function SearchAcrossFields($SearchStrings)
1027  {
1028  # start by assuming no search will be done
1029  $Scores = NULL;
1030  $ReferentScores = NULL;
1031 
1032  # clear word counts
1033  $this->InclusiveTermCount = 0;
1034  $this->RequiredTermCount = 0;
1035  $this->ExcludedTermCount = 0;
1036 
1037  # construct a search engine object used to isolate reference-based
1038  # searches from normal search parameters
1039  $ReferenceSearchEngine = new SearchEngine(
1040  $this->DB,
1041  $this->ItemTableName,
1042  $this->ItemIdFieldName,
1043  $this->ReferenceTableName,
1044  $this->ReferenceSourceIdFieldName,
1045  $this->ReferenceDestinationIdFieldName);
1046 
1047  # for each field
1048  $NeedComparisonSearch = FALSE;
1049  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1050  {
1051  # convert search string to array if needed
1052  if (!is_array($SearchStringArray))
1053  {
1054  $SearchStringArray = array($SearchStringArray);
1055  }
1056 
1057  # for each search string for this field
1058  foreach ($SearchStringArray as $SearchString)
1059  {
1060  # if field is keyword or field is text and does not look like comparison match
1061  $NotComparisonSearch = !preg_match("/^[><!]=./", $SearchString)
1062  && !preg_match("/^[><=]./", $SearchString);
1063  if (($FieldName == "XXXKeywordXXX")
1064  || (isset($this->FieldInfo[$FieldName])
1065  && ($this->FieldInfo[$FieldName]["FieldType"]
1066  == self::FIELDTYPE_TEXT)
1067  && $NotComparisonSearch))
1068  {
1069  $this->DMsg(0, "Searching text field \""
1070  .$FieldName."\" for string \"$SearchString\"");
1071 
1072  # normalize text and split into words
1073  $Words[$FieldName] =
1074  $this->ParseSearchStringForWords($SearchString);
1075 
1076  # calculate scores for matching items
1077  if (count($Words[$FieldName]))
1078  {
1079  $Scores = $this->SearchForWords(
1080  $Words[$FieldName], $FieldName, $Scores);
1081  $this->DMsg(3, "Have "
1082  .count($Scores)." results after word search");
1083  }
1084 
1085  # split into phrases
1086  $Phrases[$FieldName] =
1087  $this->ParseSearchStringForPhrases($SearchString);
1088 
1089  # handle any phrases
1090  if (count($Phrases[$FieldName]))
1091  {
1092  $Scores = $this->SearchForPhrases(
1093  $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1094  $this->DMsg(3, "Have "
1095  .count($Scores)." results after phrase search");
1096  }
1097  }
1098  else
1099  {
1100  # set flag to indicate possible comparison search candidate found
1101  $NeedComparisonSearch = TRUE;
1102  }
1103  }
1104  }
1105 
1106  # perform comparison searches
1107  if ($NeedComparisonSearch)
1108  {
1109  $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1110  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1111  }
1112 
1113  # if reference search results were found
1114  if (count($ReferentScores))
1115  {
1116  # where merged scores (reference ANDed with normal scores) are
1117  # stored for this block
1118  $MergedScores = array();
1119 
1120  # get reference items and scores from the referent item
1121  foreach ($ReferentScores as $ItemId => $Score)
1122  {
1123  # query for source references for the referent
1124  $this->DB->Query("
1125  SELECT * FROM ".$this->ReferenceTableName."
1126  WHERE ".$this->ReferenceDestinationIdFieldName." = '".addslashes($ItemId)."'");
1127 
1128  # loop through each found item
1129  while (FALSE !== ($Row = $this->DB->FetchRow()))
1130  {
1131  $ReferenceId = $Row[$this->ReferenceSourceIdFieldName];
1132 
1133  # add to an existing score
1134  if (isset($Scores[$ReferenceId]))
1135  {
1136  $MergedScores[$ReferenceId] =
1137  $Scores[$ReferenceId] + $Score;
1138  }
1139 
1140  # create a score for the reference item
1141  else
1142  {
1143  $MergedScores[$ReferenceId] = $Score;
1144  }
1145  }
1146  }
1147 
1148  # replace the existing scores with the merged ones
1149  $Scores = $MergedScores;
1150  }
1151 
1152  # if no results found and exclusions specified
1153  if (!count($Scores) && $this->ExcludedTermCount)
1154  {
1155  # load all records
1156  $Scores = $this->LoadScoresForAllRecords();
1157  }
1158 
1159  # if search results found
1160  if (count($Scores))
1161  {
1162  # for each search text string
1163  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1164  {
1165  # convert search string to array if needed
1166  if (!is_array($SearchStringArray))
1167  {
1168  $SearchStringArray = array($SearchStringArray);
1169  }
1170 
1171  # for each search string for this field
1172  foreach ($SearchStringArray as $SearchString)
1173  {
1174  # if field is text
1175  if (($FieldName == "XXXKeywordXXX")
1176  || (isset($this->FieldInfo[$FieldName])
1177  && ($this->FieldInfo[$FieldName]["FieldType"]
1178  == self::FIELDTYPE_TEXT)))
1179  {
1180  # if there are words in search text
1181  if (isset($Words[$FieldName]))
1182  {
1183  # handle any excluded words
1184  $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1185  }
1186 
1187  # handle any excluded phrases
1188  if (isset($Phrases[$FieldName]))
1189  {
1190  $Scores = $this->SearchForPhrases(
1191  $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1192  }
1193  }
1194  }
1195  }
1196 
1197  # strip off any results that don't contain required words
1198  $Scores = $this->FilterOnRequiredWords($Scores);
1199  }
1200 
1201  # return search result scores to caller
1202  return $Scores;
1203  }
1204 
1205  # search for words in specified field
1206  private function SearchForWords(
1207  $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
1208  {
1209  $DB = $this->DB;
1210 
1211  # start with empty search result scores list if none passed in
1212  if ($Scores == NULL)
1213  {
1214  $Scores = array();
1215  }
1216 
1217  # grab field ID
1218  $FieldId = $this->GetFieldId($FieldName);
1219 
1220  # for each word
1221  foreach ($Words as $Word => $Flags)
1222  {
1223  unset($Counts);
1224  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName);
1225 
1226  # if word is not excluded
1227  if (!($Flags & WORD_EXCLUDED))
1228  {
1229  # look up record ID for word
1230  $this->DMsg(2, "Looking up word \"".$Word."\"");
1231  $WordId = $this->GetWordId($Word);
1232 
1233  # if word is in DB
1234  if ($WordId !== NULL)
1235  {
1236  # look up counts for word
1237  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1238  ."WHERE WordId = ".$WordId
1239  ." AND FieldId = ".$FieldId);
1240  $Counts = $DB->FetchColumn("Count", "ItemId");
1241 
1242  # if synonym support is enabled
1243  if ($this->SynonymsEnabled)
1244  {
1245  # look for any synonyms
1246  $DB->Query("SELECT WordIdA, WordIdB"
1247  ." FROM SearchWordSynonyms"
1248  ." WHERE WordIdA = ".$WordId
1249  ." OR WordIdB = ".$WordId);
1250 
1251  # if synonyms were found
1252  if ($DB->NumRowsSelected())
1253  {
1254  # retrieve synonym IDs
1255  $SynonymIds = array();
1256  while ($Record = $DB->FetchRow())
1257  {
1258  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1259  ? $Record["WordIdB"]
1260  : $Record["WordIdA"];
1261  }
1262 
1263  # for each synonym
1264  foreach ($SynonymIds as $SynonymId)
1265  {
1266  # retrieve counts for synonym
1267  $DB->Query("SELECT ItemId,Count"
1268  ." FROM SearchWordCounts"
1269  ." WHERE WordId = ".$SynonymId
1270  ." AND FieldId = ".$FieldId);
1271  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1272 
1273  # for each count
1274  foreach ($SynonymCounts as $ItemId => $Count)
1275  {
1276  # adjust count because it's a synonym
1277  $AdjustedCount = ceil($Count / 2);
1278 
1279  # add count to existing counts
1280  if (isset($Counts[$ItemId]))
1281  {
1282  $Counts[$ItemId] += $AdjustedCount;
1283  }
1284  else
1285  {
1286  $Counts[$ItemId] = $AdjustedCount;
1287  }
1288  }
1289  }
1290  }
1291  }
1292  }
1293 
1294  # if stemming is enabled
1295  if ($this->StemmingEnabled)
1296  {
1297  # retrieve stem ID
1298  $Stem = PorterStemmer::Stem($Word);
1299  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1300  $StemId = $this->GetStemId($Stem);
1301 
1302  # if ID found for stem
1303  if ($StemId !== NULL)
1304  {
1305  # retrieve counts for stem
1306  $DB->Query("SELECT ItemId,Count"
1307  ." FROM SearchWordCounts"
1308  ." WHERE WordId = ".$StemId
1309  ." AND FieldId = ".$FieldId);
1310  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1311 
1312  # for each count
1313  foreach ($StemCounts as $ItemId => $Count)
1314  {
1315  # adjust count because it's a stem
1316  $AdjustedCount = ceil($Count / 2);
1317 
1318  # add count to existing counts
1319  if (isset($Counts[$ItemId]))
1320  {
1321  $Counts[$ItemId] += $AdjustedCount;
1322  }
1323  else
1324  {
1325  $Counts[$ItemId] = $AdjustedCount;
1326  }
1327  }
1328  }
1329  }
1330 
1331  # if counts were found
1332  if (isset($Counts))
1333  {
1334  # for each count
1335  foreach ($Counts as $ItemId => $Count)
1336  {
1337  # if word flagged as required
1338  if ($Flags & WORD_REQUIRED)
1339  {
1340  # increment required word count for record
1341  if (isset($this->RequiredTermCounts[$ItemId]))
1342  {
1343  $this->RequiredTermCounts[$ItemId]++;
1344  }
1345  else
1346  {
1347  $this->RequiredTermCounts[$ItemId] = 1;
1348  }
1349  }
1350 
1351  # add to item record score
1352  if (isset($Scores[$ItemId]))
1353  {
1354  $Scores[$ItemId] += $Count;
1355  }
1356  else
1357  {
1358  $Scores[$ItemId] = $Count;
1359  }
1360  }
1361  }
1362  }
1363  }
1364 
1365  # return basic scores to caller
1366  return $Scores;
1367  }
1368 
1369  # extract phrases (terms surrounded by quotes) from search string
1370  private function ParseSearchStringForPhrases($SearchString)
1371  {
1372  # split into chunks delimited by double quote marks
1373  $Pieces = explode("\"", $SearchString); # "
1374 
1375  # for each pair of chunks
1376  $Index = 2;
1377  $Phrases = array();
1378  while ($Index < count($Pieces))
1379  {
1380  # grab phrase from chunk
1381  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1382  $Flags = WORD_PRESENT;
1383 
1384  # grab first character of phrase
1385  $FirstChar = substr($Pieces[$Index - 2], -1);
1386 
1387  # set flags to reflect any option characters
1388  if ($FirstChar == "-")
1389  {
1390  $Flags |= WORD_EXCLUDED;
1391  if (!isset($Phrases[$Phrase]))
1392  {
1393  $this->ExcludedTermCount++;
1394  }
1395  }
1396  else
1397  {
1398  if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~"))
1399  || ($FirstChar == "+"))
1400  {
1401  $Flags |= WORD_REQUIRED;
1402  if (!isset($Phrases[$Phrase]))
1403  {
1404  $this->RequiredTermCount++;
1405  }
1406  }
1407  if (!isset($Phrases[$Phrase]))
1408  {
1409  $this->InclusiveTermCount++;
1410  $this->SearchTermList[] = $Phrase;
1411  }
1412  }
1413  $Phrases[$Phrase] = $Flags;
1414 
1415  # move to next pair of chunks
1416  $Index += 2;
1417  }
1418 
1419  # return phrases to caller
1420  return $Phrases;
1421  }
1422 
1423  # extract groups (terms surrounded by parens) from search string
1424  # (NOTE: NOT YET IMPLEMENTED!!!)
1425  private function ParseSearchStringForGroups($SearchString)
1426  {
1427  # split into chunks delimited by open paren
1428  $Pieces = explode("(", $SearchString);
1429 
1430  # for each chunk
1431  $Index = 2;
1432  while ($Index < count($Pieces))
1433  {
1434  # grab phrase from chunk
1435  $Group = trim(addslashes($Pieces[$Index - 1]));
1436  $Groups[] = $Group;
1437 
1438  # move to next pair of chunks
1439  $Index += 2;
1440  }
1441 
1442  # return phrases to caller
1443  return $Groups;
1444  }
1445 
1446  protected function SearchFieldForPhrases($FieldName, $Phrase)
1447  {
1448  # error out
1449  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1450  }
1451 
1452  private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1453  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1454  {
1455  # if phrases are found
1456  if (count($Phrases) > 0)
1457  {
1458  # if this is a keyword search
1459  if ($FieldName == "XXXKeywordXXX")
1460  {
1461  # for each field
1462  foreach ($this->FieldInfo as $KFieldName => $Info)
1463  {
1464  # if field is marked to be included in keyword searches
1465  if ($Info["InKeywordSearch"])
1466  {
1467  # call ourself with that field
1468  $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1469  $ProcessNonExcluded, $ProcessExcluded);
1470  }
1471  }
1472  }
1473  else
1474  {
1475  # for each phrase
1476  foreach ($Phrases as $Phrase => $Flags)
1477  {
1478  $this->DMsg(2, "Searching for phrase '".$Phrase
1479  ."' in field ".$FieldName);
1480 
1481  # if phrase flagged as excluded and we are doing excluded phrases
1482  # or phrase flagged as non-excluded and we are doing non-excluded phrases
1483  if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1484  || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
1485  {
1486  # initialize score list if necessary
1487  if ($Scores === NULL) { $Scores = array(); }
1488 
1489  # retrieve list of items that contain phrase
1490  $ItemIds = $this->SearchFieldForPhrases(
1491  $FieldName, $Phrase);
1492 
1493  # for each item that contains phrase
1494  foreach ($ItemIds as $ItemId)
1495  {
1496  # if we are doing excluded phrases and phrase flagged as excluded
1497  if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1498  {
1499  # knock item off of list
1500  unset($Scores[$ItemId]);
1501  }
1502  elseif ($ProcessNonExcluded)
1503  {
1504  # calculate phrase value based on number of words and field weight
1505  $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1506  * $this->FieldInfo[$FieldName]["Weight"];
1507  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1508 
1509  # bump up item record score
1510  if (isset($Scores[$ItemId]))
1511  {
1512  $Scores[$ItemId] += $PhraseScore;
1513  }
1514  else
1515  {
1516  $Scores[$ItemId] = $PhraseScore;
1517  }
1518 
1519  # if phrase flagged as required
1520  if ($Flags & WORD_REQUIRED)
1521  {
1522  # increment required word count for record
1523  if (isset($this->RequiredTermCounts[$ItemId]))
1524  {
1525  $this->RequiredTermCounts[$ItemId]++;
1526  }
1527  else
1528  {
1529  $this->RequiredTermCounts[$ItemId] = 1;
1530  }
1531  }
1532  }
1533  }
1534  }
1535  }
1536  }
1537  }
1538 
1539  # return updated scores to caller
1540  return $Scores;
1541  }
1542 
1543  private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
1544  {
1545  $DB = $this->DB;
1546 
1547  # grab field ID
1548  $FieldId = $this->GetFieldId($FieldName);
1549 
1550  # for each word
1551  foreach ($Words as $Word => $Flags)
1552  {
1553  # if word flagged as excluded
1554  if ($Flags & WORD_EXCLUDED)
1555  {
1556  # look up record ID for word
1557  $WordId = $this->GetWordId($Word);
1558 
1559  # if word is in DB
1560  if ($WordId !== NULL)
1561  {
1562  # look up counts for word
1563  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1564  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1565 
1566  # for each count
1567  while ($Record = $DB->FetchRow())
1568  {
1569  # if item record is in score list
1570  $ItemId = $Record["ItemId"];
1571  if (isset($Scores[$ItemId]))
1572  {
1573  # remove item record from score list
1574  $this->DMsg(3, "Filtering out item ".$ItemId
1575  ." because it contained word \"".$Word."\"");
1576  unset($Scores[$ItemId]);
1577  }
1578  }
1579  }
1580  }
1581  }
1582 
1583  # returned filtered score list to caller
1584  return $Scores;
1585  }
1586 
1587  private function FilterOnRequiredWords($Scores)
1588  {
1589  # if there were required words
1590  if ($this->RequiredTermCount > 0)
1591  {
1592  # for each item
1593  foreach ($Scores as $ItemId => $Score)
1594  {
1595  # if item does not meet required word count
1596  if (!isset($this->RequiredTermCounts[$ItemId])
1597  || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1598  {
1599  # filter out item
1600  $this->DMsg(4, "Filtering out item ".$ItemId
1601  ." because it didn't have required word count of "
1602  .$this->RequiredTermCount
1603  .(isset($this->RequiredTermCounts[$ItemId])
1604  ? " (only had "
1605  .$this->RequiredTermCounts[$ItemId]
1606  : " (had none")
1607  .")");
1608  unset($Scores[$ItemId]);
1609  }
1610  }
1611  }
1612 
1613  # return filtered list to caller
1614  return $Scores;
1615  }
1616 
1617  # count, sort, and trim search result scores list
1618  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1619  $SortByField, $SortDescending)
1620  {
1621  # perform any requested filtering
1622  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1623  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1624 
1625  # save total number of results available
1626  $this->NumberOfResultsAvailable = count($Scores);
1627 
1628  # if no sorting field specified
1629  if ($SortByField === NULL)
1630  {
1631  # sort result list by score
1632  if ($SortDescending)
1633  arsort($Scores, SORT_NUMERIC);
1634  else
1635  asort($Scores, SORT_NUMERIC);
1636  }
1637  else
1638  {
1639  # get list of item IDs in sorted order
1640  $SortedIds = $this->GetItemIdsSortedByField(
1641  $SortByField, $SortDescending);
1642 
1643  # if we have sorted item IDs
1644  if (count($SortedIds) && count($Scores))
1645  {
1646  # strip sorted ID list down to those that appear in search results
1647  $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1648 
1649  # rebuild score list in sorted order
1650  foreach ($SortedIds as $Id)
1651  {
1652  $NewScores[$Id] = $Scores[$Id];
1653  }
1654  $Scores = $NewScores;
1655  }
1656  else
1657  {
1658  # sort result list by score
1659  arsort($Scores, SORT_NUMERIC);
1660  }
1661  }
1662 
1663  # trim result list to match range requested by caller
1664  $ScoresKeys = array_slice(
1665  array_keys($Scores), $StartingResult, $NumberOfResults);
1666  $TrimmedScores = array();
1667  foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1668 
1669  # returned cleaned search result scores list to caller
1670  return $TrimmedScores;
1671  }
1672 
1673  protected function FilterOnSuppliedFunctions($Scores)
1674  {
1675  # if filter functions have been set
1676  if (isset($this->FilterFuncs))
1677  {
1678  # for each result
1679  foreach ($Scores as $ItemId => $Score)
1680  {
1681  # for each filter function
1682  foreach ($this->FilterFuncs as $FuncName)
1683  {
1684  # if filter function return TRUE for item
1685  if (call_user_func($FuncName, $ItemId))
1686  {
1687  # discard result
1688  $this->DMsg(2, "Filter callback <i>".$FuncName
1689  ."</i> rejected item ".$ItemId);
1690  unset($Scores[$ItemId]);
1691 
1692  # bail out of filter func loop
1693  continue 2;
1694  }
1695  }
1696  }
1697  }
1698 
1699  # return filtered list to caller
1700  return $Scores;
1701  }
1702 
1703  private function SearchForComparisonMatches($SearchStrings, $Scores)
1704  {
1705  # for each field
1706  $Index = 0;
1707  foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1708  {
1709  # if field is not keyword
1710  if ($SearchFieldName != "XXXKeywordXXX")
1711  {
1712  # convert search string to array if needed
1713  if (!is_array($SearchStringArray))
1714  {
1715  $SearchStringArray = array($SearchStringArray);
1716  }
1717 
1718  # for each search string for this field
1719  foreach ($SearchStringArray as $SearchString)
1720  {
1721  # if search string looks like comparison search
1722  $FoundOperator = preg_match("/^[><!]=./", $SearchString)
1723  || preg_match("/^[><=]./", $SearchString);
1724  if ($FoundOperator
1725  || (isset($this->FieldInfo[$SearchFieldName]["FieldType"])
1726  && ($this->FieldInfo[$SearchFieldName]["FieldType"]
1727  != self::FIELDTYPE_TEXT)))
1728  {
1729  # determine value
1730  $Patterns = array("/^[><!]=/", "/^[><=]/");
1731  $Replacements = array("", "");
1732  $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1733 
1734  # determine and save operator
1735  if (!$FoundOperator)
1736  {
1737  $Operators[$Index] = "=";
1738  }
1739  else
1740  {
1741  $Term = trim($SearchString);
1742  $FirstChar = $Term{0};
1743  $FirstTwoChars = $FirstChar.$Term{1};
1744  if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; }
1745  elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; }
1746  elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; }
1747  elseif ($FirstChar == ">") { $Operators[$Index] = ">"; }
1748  elseif ($FirstChar == "<") { $Operators[$Index] = "<"; }
1749  elseif ($FirstChar == "=") { $Operators[$Index] = "="; }
1750  }
1751 
1752  # if operator was found
1753  if (isset($Operators[$Index]))
1754  {
1755  # save value
1756  $Values[$Index] = $Value;
1757 
1758  # save field name
1759  $FieldNames[$Index] = $SearchFieldName;
1760  $this->DMsg(3, "Added comparison (field = <i>"
1761  .$FieldNames[$Index]."</i> op = <i>"
1762  .$Operators[$Index]."</i> val = <i>"
1763  .$Values[$Index]."</i>)");
1764 
1765  # move to next comparison array entry
1766  $Index++;
1767  }
1768  }
1769  }
1770  }
1771  }
1772 
1773  # if comparisons found
1774  if (isset($Operators))
1775  {
1776  # perform comparisons on fields and gather results
1777  $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1778 
1779  # if search logic is set to AND
1780  if ($this->DefaultSearchLogic == self::LOGIC_AND)
1781  {
1782  # if results were found
1783  if (count($Results))
1784  {
1785  # if there were no prior results and no terms for keyword search
1786  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1787  {
1788  # add all results to scores
1789  foreach ($Results as $ItemId)
1790  {
1791  $Scores[$ItemId] = 1;
1792  }
1793  }
1794  else
1795  {
1796  # remove anything from scores that is not part of results
1797  foreach ($Scores as $ItemId => $Score)
1798  {
1799  if (in_array($ItemId, $Results) == FALSE)
1800  {
1801  unset($Scores[$ItemId]);
1802  }
1803  }
1804  }
1805  }
1806  else
1807  {
1808  # clear scores
1809  $Scores = array();
1810  }
1811  }
1812  else
1813  {
1814  # add result items to scores
1815  if ($Scores === NULL) { $Scores = array(); }
1816  foreach ($Results as $ItemId)
1817  {
1818  if (isset($Scores[$ItemId]))
1819  {
1820  $Scores[$ItemId] += 1;
1821  }
1822  else
1823  {
1824  $Scores[$ItemId] = 1;
1825  }
1826  }
1827  }
1828  }
1829 
1830  # return results to caller
1831  return $Scores;
1832  }
1833 
1834  private function SetDebugLevel($SearchStrings)
1835  {
1836  # if search info is an array
1837  if (is_array($SearchStrings))
1838  {
1839  # for each array element
1840  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1841  {
1842  # if element is an array
1843  if (is_array($SearchStringArray))
1844  {
1845  # for each array element
1846  foreach ($SearchStringArray as $Index => $SearchString)
1847  {
1848  # pull out search string if present
1849  $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1850  }
1851  }
1852  else
1853  {
1854  # pull out search string if present
1855  $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1856  }
1857  }
1858  }
1859  else
1860  {
1861  # pull out search string if present
1862  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1863  }
1864 
1865  # return new search info to caller
1866  return $SearchStrings;
1867  }
1868 
1869  private function ExtractDebugLevel($SearchString)
1870  {
1871  # if search string contains debug level indicator
1872  if (strstr($SearchString, "DBUGLVL="))
1873  {
1874  # remove indicator and set debug level
1875  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1876  if ($Level > 0)
1877  {
1878  $this->DebugLevel = $Level;
1879  $this->DMsg(0, "Setting debug level to ".$Level);
1880  $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
1881  }
1882  }
1883 
1884  # return (possibly) modified search string to caller
1885  return $SearchString;
1886  }
1887 
1888  # load and return search result scores array containing all possible records
1889  private function LoadScoresForAllRecords()
1890  {
1891  # start with empty list
1892  $Scores = array();
1893 
1894  # for every item
1895  $this->DB->Query("SELECT ".$this->ItemIdFieldName
1896  ." FROM ".$this->ItemTableName);
1897  while ($Record = $this->DB->FetchRow())
1898  {
1899  # set score for item to 1
1900  $Scores[$Record[$this->ItemIdFieldName]] = 1;
1901  }
1902 
1903  # return array with all scores to caller
1904  return $Scores;
1905  }
1906 
1907 
1908  # ---- private functions used in building search database
1909 
1917  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1918  {
1919  # retrieve ID for word
1920  $WordIds[] = $this->GetWordId($Word, TRUE);
1921 
1922  # if stemming is enabled
1923  if ($this->StemmingEnabled)
1924  {
1925  # retrieve ID for stem of word
1926  $Stem = PorterStemmer::Stem($Word, TRUE);
1927  $WordIds[] = $this->GetStemId($Stem, TRUE);
1928  }
1929 
1930  # for word and stem of word
1931  foreach ($WordIds as $WordId)
1932  {
1933  # if word count already added to database
1934  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1935  {
1936  # update word count
1937  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1938  ." WHERE WordId=".$WordId
1939  ." AND ItemId=".$ItemId
1940  ." AND FieldId=".$FieldId);
1941  }
1942  else
1943  {
1944  # add word count to DB
1945  $this->DB->Query("INSERT INTO SearchWordCounts"
1946  ." (WordId, ItemId, FieldId, Count) VALUES"
1947  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1948 
1949  # remember that we added count for this word
1950  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1951  }
1952 
1953  # decrease weight for stem
1954  $Weight = ceil($Weight / 2);
1955  }
1956  }
1957 
1958  protected function GetFieldContent($ItemId, $FieldName)
1959  {
1960  # error out
1961  exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
1962  }
1963 
1964  private function RecordSearchInfoForText(
1965  $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
1966  {
1967  # normalize text
1968  $Words = $this->ParseSearchStringForWords($Text, TRUE);
1969 
1970  # if there was text left after parsing
1971  if (count($Words) > 0)
1972  {
1973  # get ID for field
1974  $FieldId = $this->GetFieldId($FieldName);
1975 
1976  # if text should be included in keyword searches
1977  if ($IncludeInKeyword)
1978  {
1979  # get ID for keyword field
1980  $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
1981  }
1982 
1983  # for each word
1984  foreach ($Words as $Word => $Flags)
1985  {
1986  # update count for word
1987  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1988 
1989  # if text should be included in keyword searches
1990  if ($IncludeInKeyword)
1991  {
1992  # update keyword field count for word
1993  $this->UpdateWordCount(
1994  $Word, $ItemId, $KeywordFieldId, $Weight);
1995  }
1996  }
1997  }
1998  }
1999 
2000  # print debug message if level set high enough
2001  protected function DMsg($Level, $Msg)
2002  {
2003  if ($this->DebugLevel > $Level)
2004  {
2005  print("SE: ".$Msg."<br>\n");
2006  }
2007  }
2008 
2009  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2010 
2011  # possible types of logical operators
2012  const SEARCHLOGIC_AND = 1;
2013  const SEARCHLOGIC_OR = 2;
2014 }
2015 
2016 ?>
SetAllSynonyms($SynonymList)
DropItem($ItemId)
DropField($FieldName)
RemoveSynonyms($Word, $Synonyms=NULL)
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
SQL database abstraction object with smart query caching.
Search($SearchString, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
$ReferenceDestinationIdFieldName
SearchTermsRequiredByDefault($NewSetting=TRUE)
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
const FIELDTYPE_DATE
SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName, $ReferenceTableName, $ReferenceSourceIdFieldName, $ReferenceDestinationIdFieldName)
GroupedSearch($SearchGroups, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
PHP
Definition: OAIClient.php:39
DebugLevel($Setting)
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
const STEM_ID_OFFSET
FieldWeight($FieldName)
FieldInKeywordSearch($FieldName)
DMsg($Level, $Msg)
GetFieldId($FieldName)
DBFieldName($FieldName)
FieldedSearchWeightScale($SearchStrings)
DefaultSearchLogic($NewSetting=NULL)
FieldType($FieldName)
const FIELDTYPE_TEXT
UpdateForItems($StartingItemId, $NumberOfItems)
GetFieldContent($ItemId, $FieldName)
UpdateForItem($ItemId)
AddResultFilterFunction($FunctionName)
SuggestAlternateSearches($SearchString)
GetSynonyms($Word)