SearchEngine.php
Go to the documentation of this file.
00001 <?PHP 00002 00003 # 00004 # FILE: SearchEngine.php 00005 # 00006 # Open Source Metadata Archive Search Engine (OSMASE) 00007 # Copyright 2002-2011 Edward Almasy and Internet Scout 00008 # http://scout.wisc.edu 00009 # 00010 00011 class SearchEngine { 00012 00013 # ---- PUBLIC INTERFACE -------------------------------------------------- 00014 00015 # possible types of logical operators 00016 const LOGIC_AND = 1; 00017 const LOGIC_OR = 2; 00018 00019 # flags used for indicating field types 00020 const FIELDTYPE_TEXT = 1; 00021 const FIELDTYPE_NUMERIC = 2; 00022 const FIELDTYPE_DATE = 3; 00023 const FIELDTYPE_DATERANGE = 4; 00024 00025 # object constructor 00026 function SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName) 00027 { 00028 # save database object for our use 00029 $this->DB = $DB; 00030 00031 # save item access parameters 00032 $this->ItemTableName = $ItemTableName; 00033 $this->ItemIdFieldName = $ItemIdFieldName; 00034 00035 # define flags used for indicating word states 00036 if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); } 00037 if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); } 00038 if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); } 00039 00040 # set default debug state 00041 $this->DebugLevel = 0; 00042 } 00043 00044 # add field to be searched 00045 function AddField( 00046 $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch) 00047 { 00048 # save values 00049 $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName; 00050 $this->FieldInfo[$FieldName]["FieldType"] = $FieldType; 00051 $this->FieldInfo[$FieldName]["Weight"] = $Weight; 00052 $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch; 00053 } 00054 00055 # retrieve info about tables and fields (useful for child objects) 00056 function ItemTableName() { return $this->ItemTableName; } 00057 function ItemIdFieldName() { return $this->ItemIdFieldName; } 00058 function DBFieldName($FieldName) 00059 { return $this->FieldInfo[$FieldName]["DBFieldName"]; } 00060 function FieldType($FieldName) 00061 { return $this->FieldInfo[$FieldName]["FieldType"]; } 00062 function FieldWeight($FieldName) 00063 { return $this->FieldInfo[$FieldName]["Weight"]; } 00064 function FieldInKeywordSearch($FieldName) 00065 { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; } 00066 00067 # set debug level 00068 function DebugLevel($Setting) 00069 { 00070 $this->DebugLevel = $Setting; 00071 } 00072 00073 00074 # ---- search functions 00075 00076 # perform keyword search 00077 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10, 00078 $SortByField = NULL, $SortDescending = TRUE) 00079 { 00080 $SearchString = $this->SetDebugLevel($SearchString); 00081 $this->DMsg(0, "In Search() with search string \"".$SearchString."\""); 00082 00083 # save start time to use in calculating search time 00084 $StartTime = microtime(TRUE); 00085 00086 # clear word counts 00087 $this->InclusiveTermCount = 0; 00088 $this->RequiredTermCount = 0; 00089 $this->ExcludedTermCount = 0; 00090 00091 # parse search string into terms 00092 $Words = $this->ParseSearchStringForWords($SearchString); 00093 $this->DMsg(1, "Found ".count($Words)." words"); 00094 00095 # parse search string for phrases 00096 $Phrases = $this->ParseSearchStringForPhrases($SearchString); 00097 $this->DMsg(1, "Found ".count($Phrases)." phrases"); 00098 00099 # if only excluded terms specified 00100 if ($this->ExcludedTermCount && !$this->InclusiveTermCount) 00101 { 00102 # load all records 00103 $this->DMsg(1, "Loading all records"); 00104 $Scores = $this->LoadScoresForAllRecords(); 00105 } 00106 else 00107 { 00108 # perform searches 00109 $Scores = $this->SearchForWords($Words); 00110 $this->DMsg(1, "Found ".count($Scores)." results after word search"); 00111 $Scores = $this->SearchForPhrases($Phrases, $Scores); 00112 $this->DMsg(1, "Found ".count($Scores)." results after phrase search"); 00113 } 00114 00115 # if search results found 00116 if (count($Scores) > 0) 00117 { 00118 # handle any excluded words 00119 $Scores = $this->FilterOnExcludedWords($Words, $Scores); 00120 00121 # strip off any results that don't contain required words 00122 $Scores = $this->FilterOnRequiredWords($Scores); 00123 } 00124 00125 # count, sort, and trim search result scores list 00126 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00127 $SortByField, $SortDescending); 00128 00129 # record search time 00130 $this->LastSearchTime = microtime(TRUE) - $StartTime; 00131 00132 # return list of items to caller 00133 $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results"); 00134 return $Scores; 00135 } 00136 00137 # perform search across multiple fields and return trimmed results to caller 00138 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10, 00139 $SortByField = NULL, $SortDescending = TRUE) 00140 { 00141 $SearchStrings = $this->SetDebugLevel($SearchStrings); 00142 $this->DMsg(0, "In FieldedSearch() with " 00143 .count($SearchStrings)." search strings"); 00144 00145 # save start time to use in calculating search time 00146 $StartTime = microtime(TRUE); 00147 00148 # perform search 00149 $Scores = $this->SearchAcrossFields($SearchStrings); 00150 $Scores = ($Scores === NULL) ? array() : $Scores; 00151 00152 # count, sort, and trim search result scores list 00153 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00154 $SortByField, $SortDescending); 00155 00156 # record search time 00157 $this->LastSearchTime = microtime(TRUE) - $StartTime; 00158 00159 # return list of items to caller 00160 $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results"); 00161 return $Scores; 00162 } 00163 00164 # perform search with logical groups of fielded searches 00165 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10, 00166 $SortByField = NULL, $SortDescending = TRUE) 00167 { 00168 foreach ($SearchGroups as $Index => $Groups) 00169 { 00170 if (isset($SearchGroups[$Index]["SearchStrings"])) 00171 { 00172 $SearchGroups[$Index]["SearchStrings"] = 00173 $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]); 00174 } 00175 } 00176 $this->DMsg(0, "In GroupedSearch() with " 00177 .count($SearchGroups)." search groups"); 00178 00179 # save start time to use in calculating search time 00180 $StartTime = microtime(TRUE); 00181 00182 # start with no results 00183 $Scores = array(); 00184 00185 # save AND/OR search setting 00186 $SavedSearchLogic = $this->DefaultSearchLogic; 00187 00188 # for each search group 00189 $FirstSearch = TRUE; 00190 foreach ($SearchGroups as $Group) 00191 { 00192 $this->DMsg(0, "----- GROUP ---------------------------"); 00193 00194 # if group has AND/OR setting specified 00195 if (isset($Group["Logic"])) 00196 { 00197 # use specified AND/OR setting 00198 $this->DefaultSearchLogic = $Group["Logic"]; 00199 } 00200 else 00201 { 00202 # use saved AND/OR setting 00203 $this->DefaultSearchLogic = $SavedSearchLogic; 00204 } 00205 $this->DMsg(2, "Logic is " 00206 .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR")); 00207 00208 # if we have search strings for this group 00209 if (isset($Group["SearchStrings"])) 00210 { 00211 # perform search 00212 $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]); 00213 00214 # if search was conducted 00215 if ($GroupScores !== NULL) 00216 { 00217 # if saved AND/OR setting is OR or this is first search 00218 if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch) 00219 { 00220 # add search results to result list 00221 foreach ($GroupScores as $ItemId => $Score) 00222 { 00223 if (isset($Scores[$ItemId])) 00224 { 00225 $Scores[$ItemId] += $Score; 00226 } 00227 else 00228 { 00229 $Scores[$ItemId] = $Score; 00230 } 00231 } 00232 00233 # (reset flag indicating first search) 00234 $FirstSearch = FALSE; 00235 } 00236 else 00237 { 00238 # AND search results with previous results 00239 $OldScores = $Scores; 00240 $Scores = array(); 00241 foreach ($GroupScores as $ItemId => $Score) 00242 { 00243 if (isset($OldScores[$ItemId])) 00244 { 00245 $Scores[$ItemId] = $OldScores[$ItemId] + $Score; 00246 } 00247 } 00248 } 00249 } 00250 } 00251 } 00252 00253 # restore AND/OR search setting 00254 $this->DefaultSearchLogic = $SavedSearchLogic; 00255 00256 # count, sort, and trim search result scores list 00257 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00258 $SortByField, $SortDescending); 00259 00260 # record search time 00261 $this->LastSearchTime = microtime(TRUE) - $StartTime; 00262 00263 # return search results to caller 00264 $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results"); 00265 return $Scores; 00266 } 00267 00268 # add function that will be called to filter search results 00269 function AddResultFilterFunction($FunctionName) 00270 { 00271 # save filter function name 00272 $this->FilterFuncs[] = $FunctionName; 00273 } 00274 00275 # get or set default search logic (AND or OR) 00276 function DefaultSearchLogic($NewSetting = NULL) 00277 { 00278 if ($NewSetting != NULL) 00279 { 00280 $this->DefaultSearchLogic = $NewSetting; 00281 } 00282 return $this->DefaultSearchLogic; 00283 } 00284 00285 function SearchTermsRequiredByDefault($NewSetting = TRUE) 00286 { 00287 if ($NewSetting) 00288 { 00289 $this->DefaultSearchLogic = self::LOGIC_AND; 00290 } 00291 else 00292 { 00293 $this->DefaultSearchLogic = self::LOGIC_OR; 00294 } 00295 } 00296 00297 function NumberOfResults() 00298 { 00299 return $this->NumberOfResultsAvailable; 00300 } 00301 00302 function SearchTerms() 00303 { 00304 return $this->SearchTermList; 00305 } 00306 00307 function SearchTime() 00308 { 00309 return $this->LastSearchTime; 00310 } 00311 00312 # report total weight for all fields involved in search 00313 function FieldedSearchWeightScale($SearchStrings) 00314 { 00315 $Weight = 0; 00316 $IncludedKeywordSearch = FALSE; 00317 foreach ($SearchStrings as $FieldName => $SearchStringArray) 00318 { 00319 if ($FieldName == "XXXKeywordXXX") 00320 { 00321 $IncludedKeywordSearch = TRUE; 00322 } 00323 else 00324 { 00325 $Weight += $this->FieldInfo[$FieldName]["Weight"]; 00326 } 00327 } 00328 if ($IncludedKeywordSearch) 00329 { 00330 foreach ($this->FieldInfo as $FieldName => $Info) 00331 { 00332 if ($Info["InKeywordSearch"]) 00333 { 00334 $Weight += $Info["Weight"]; 00335 } 00336 } 00337 } 00338 return $Weight; 00339 } 00340 00341 00342 # ---- search database update functions 00343 00344 # update search DB for the specified item 00345 function UpdateForItem($ItemId) 00346 { 00347 # bail out if item ID is negative (indicating a temporary record) 00348 if ($ItemId < 0) { return; } 00349 00350 # clear word count added flags for this item 00351 unset($this->WordCountAdded); 00352 00353 # delete any existing info for this item 00354 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId); 00355 00356 # for each metadata field 00357 foreach ($this->FieldInfo as $FieldName => $Info) 00358 { 00359 # if search weight for field is positive 00360 if ($Info["Weight"] > 0) 00361 { 00362 # retrieve text for field 00363 $Text = $this->GetFieldContent($ItemId, $FieldName); 00364 00365 # if text is array 00366 if (is_array($Text)) 00367 { 00368 # for each text string in array 00369 foreach ($Text as $String) 00370 { 00371 # record search info for text 00372 $this->RecordSearchInfoForText($ItemId, $FieldName, 00373 $Info["Weight"], $String, 00374 $Info["InKeywordSearch"]); 00375 } 00376 } 00377 else 00378 { 00379 # record search info for text 00380 $this->RecordSearchInfoForText($ItemId, $FieldName, 00381 $Info["Weight"], $Text, 00382 $Info["InKeywordSearch"]); 00383 } 00384 } 00385 } 00386 } 00387 00388 # update search DB for the specified range of items 00389 function UpdateForItems($StartingItemId, $NumberOfItems) 00390 { 00391 # retrieve IDs for specified number of items starting at specified ID 00392 $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName 00393 ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId 00394 ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems); 00395 $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName); 00396 00397 # for each retrieved item ID 00398 foreach ($ItemIds as $ItemId) 00399 { 00400 # update search info for item 00401 $this->UpdateForItem($ItemId); 00402 } 00403 00404 # return ID of last item updated to caller 00405 return $ItemId; 00406 } 00407 00408 # drop all data pertaining to item from search DB 00409 function DropItem($ItemId) 00410 { 00411 # drop all entries pertaining to item from word count table 00412 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId); 00413 } 00414 00415 # drop all data pertaining to field from search DB 00416 function DropField($FieldName) 00417 { 00418 # retrieve our ID for field 00419 $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields " 00420 ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId"); 00421 00422 # drop all entries pertaining to field from word counts table 00423 $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'"); 00424 00425 # drop field from our fields table 00426 $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'"); 00427 } 00428 00429 # return total number of terms indexed by search engine 00430 function SearchTermCount() 00431 { 00432 return $this->DB->Query("SELECT COUNT(*) AS TermCount" 00433 ." FROM SearchWords", "TermCount"); 00434 } 00435 00436 # return total number of items indexed by search engine 00437 function ItemCount() 00438 { 00439 return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount" 00440 ." FROM SearchWordCounts", "ItemCount"); 00441 } 00442 00449 function AddSynonyms($Word, $Synonyms) 00450 { 00451 # asssume no synonyms will be added 00452 $AddCount = 0; 00453 00454 # get ID for word 00455 $WordId = $this->GetWordId($Word, TRUE); 00456 00457 # for each synonym passed in 00458 foreach ($Synonyms as $Synonym) 00459 { 00460 # get ID for synonym 00461 $SynonymId = $this->GetWordId($Synonym, TRUE); 00462 00463 # if synonym is not already in database 00464 $this->DB->Query("SELECT * FROM SearchWordSynonyms" 00465 ." WHERE (WordIdA = ".$WordId 00466 ." AND WordIdB = ".$SynonymId.")" 00467 ." OR (WordIdB = ".$WordId 00468 ." AND WordIdA = ".$SynonymId.")"); 00469 if ($this->DB->NumRowsSelected() == 0) 00470 { 00471 # add synonym entry to database 00472 $this->DB->Query("INSERT INTO SearchWordSynonyms" 00473 ." (WordIdA, WordIdB)" 00474 ." VALUES (".$WordId.", ".$SynonymId.")"); 00475 $AddCount++; 00476 } 00477 } 00478 00479 # report to caller number of new synonyms added 00480 return $AddCount; 00481 } 00482 00483 # remove synonym(s) 00484 function RemoveSynonyms($Word, $Synonyms = NULL) 00485 { 00486 # find ID for word 00487 $WordId = $this->GetWordId($Word); 00488 00489 # if ID found 00490 if ($WordId !== NULL) 00491 { 00492 # if no specific synonyms provided 00493 if ($Synonyms === NULL) 00494 { 00495 # remove all synonyms for word 00496 $this->DB->Query("DELETE FROM SearchWordSynonyms" 00497 ." WHERE WordIdA = '".$WordId."'" 00498 ." OR WordIdB = '".$WordId."'"); 00499 } 00500 else 00501 { 00502 # for each specified synonym 00503 foreach ($Synonyms as $Synonym) 00504 { 00505 # look up ID for synonym 00506 $SynonymId = $this->GetWordId($Synonym); 00507 00508 # if synonym ID was found 00509 if ($SynonymId !== NULL) 00510 { 00511 # delete synonym entry 00512 $this->DB->Query("DELETE FROM SearchWordSynonyms" 00513 ." WHERE (WordIdA = '".$WordId."'" 00514 ." AND WordIdB = '".$SynonymId."')" 00515 ." OR (WordIdB = '".$WordId."'" 00516 ." AND WordIdA = '".$SynonymId."')"); 00517 } 00518 } 00519 } 00520 } 00521 } 00522 00523 # remove all synonyms 00524 function RemoveAllSynonyms() 00525 { 00526 $this->DB->Query("DELETE FROM SearchWordSynonyms"); 00527 } 00528 00529 # get synonyms for word (returns array of synonyms) 00530 function GetSynonyms($Word) 00531 { 00532 # assume no synonyms will be found 00533 $Synonyms = array(); 00534 00535 # look up ID for word 00536 $WordId = $this->GetWordId($Word); 00537 00538 # if word ID was found 00539 if ($WordId !== NULL) 00540 { 00541 # look up IDs of all synonyms for this word 00542 $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms" 00543 ." WHERE WordIdA = ".$WordId 00544 ." OR WordIdB = ".$WordId); 00545 $SynonymIds = array(); 00546 while ($Record = $this->DB->FetchRow) 00547 { 00548 $SynonymIds[] = ($Record["WordIdA"] == $WordId) 00549 ? $Record["WordIdB"] : $Record["WordIdA"]; 00550 } 00551 00552 # for each synonym ID 00553 foreach ($SynonymIds as $SynonymId) 00554 { 00555 # look up synonym word and add to synonym list 00556 $Synonyms[] = $this->GetWord($SynonymId); 00557 } 00558 } 00559 00560 # return synonyms to caller 00561 return $Synonyms; 00562 } 00563 00564 # get all synonyms (returns 2D array w/ words as first index) 00565 function GetAllSynonyms() 00566 { 00567 # assume no synonyms will be found 00568 $SynonymList = array(); 00569 00570 # for each synonym ID pair 00571 $OurDB = new SPTDatabase(); 00572 $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"); 00573 while ($Record = $OurDB->FetchRow()) 00574 { 00575 # look up words 00576 $Word = $this->GetWord($Record["WordIdA"]); 00577 $Synonym = $this->GetWord($Record["WordIdB"]); 00578 00579 # if we do not already have an entry for the word 00580 # or synonym is not listed for this word 00581 if (!isset($SynonymList[$Word]) 00582 || !in_array($Synonym, $SynonymList[$Word])) 00583 { 00584 # add entry for synonym 00585 $SynonymList[$Word][] = $Synonym; 00586 } 00587 00588 # if we do not already have an entry for the synonym 00589 # or word is not listed for this synonym 00590 if (!isset($SynonymList[$Synonym]) 00591 || !in_array($Word, $SynonymList[$Synonym])) 00592 { 00593 # add entry for word 00594 $SynonymList[$Synonym][] = $Word; 00595 } 00596 } 00597 00598 # for each word 00599 # (this loop removes reciprocal duplicates) 00600 foreach ($SynonymList as $Word => $Synonyms) 00601 { 00602 # for each synonym for that word 00603 foreach ($Synonyms as $Synonym) 00604 { 00605 # if synonym has synonyms and word is one of them 00606 if (isset($SynonymList[$Synonym]) 00607 && isset($SynonymList[$Word]) 00608 && in_array($Word, $SynonymList[$Synonym]) 00609 && in_array($Synonym, $SynonymList[$Word])) 00610 { 00611 # if word has less synonyms than synonym 00612 if (count($SynonymList[$Word]) 00613 < count($SynonymList[$Synonym])) 00614 { 00615 # remove synonym from synonym list for word 00616 $SynonymList[$Word] = array_diff( 00617 $SynonymList[$Word], array($Synonym)); 00618 00619 # if no synonyms left for word 00620 if (!count($SynonymList[$Word])) 00621 { 00622 # remove empty synonym list for word 00623 unset($SynonymList[$Word]); 00624 } 00625 } 00626 else 00627 { 00628 # remove word from synonym list for synonym 00629 $SynonymList[$Synonym] = array_diff( 00630 $SynonymList[$Synonym], array($Word)); 00631 00632 # if no synonyms left for word 00633 if (!count($SynonymList[$Synonym])) 00634 { 00635 # remove empty synonym list for word 00636 unset($SynonymList[$Synonym]); 00637 } 00638 } 00639 } 00640 } 00641 } 00642 00643 # sort array alphabetically (just for convenience) 00644 foreach ($SynonymList as $Word => $Synonyms) 00645 { 00646 asort($SynonymList[$Word]); 00647 } 00648 ksort($SynonymList); 00649 00650 # return 2D array of synonyms to caller 00651 return $SynonymList; 00652 } 00653 00654 # set all synonyms (accepts 2D array w/ words as first index) 00655 function SetAllSynonyms($SynonymList) 00656 { 00657 # remove all existing synonyms 00658 $this->RemoveAllSynonyms(); 00659 00660 # for each synonym entry passed in 00661 foreach ($SynonymList as $Word => $Synonyms) 00662 { 00663 # add synonyms for word 00664 $this->AddSynonyms($Word, $Synonyms); 00665 } 00666 } 00667 00676 function LoadSynonymsFromFile($FileName) 00677 { 00678 # asssume no synonyms will be added 00679 $AddCount = 0; 00680 00681 # read in contents of file 00682 $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES); 00683 00684 # if file contained lines 00685 if (count($Lines)) 00686 { 00687 # for each line of file 00688 foreach ($Lines as $Line) 00689 { 00690 # if line is not a comment 00691 if (!preg_match("/[\s]*#/", $Line)) 00692 { 00693 # split line into words 00694 $Words = preg_split("/[\s,]+/", $Line); 00695 00696 # if synonyms found 00697 if (count($Words) > 1) 00698 { 00699 # separate out word and synonyms 00700 $Word = array_shift($Words); 00701 00702 # add synonyms 00703 $AddCount += $this->AddSynonyms($Word, $Words); 00704 } 00705 } 00706 } 00707 } 00708 00709 # return count of synonyms added to caller 00710 return $AddCount; 00711 } 00712 00713 # suggest alternatives 00714 function SuggestAlternateSearches($SearchString) 00715 { 00716 # 00717 } 00718 00719 00720 # ---- PRIVATE INTERFACE ------------------------------------------------- 00721 00722 protected $DB; 00723 protected $DebugLevel; 00724 protected $ItemTableName; 00725 protected $ItemIdFieldName; 00726 protected $NumberOfResultsAvailable; 00727 protected $LastSearchTime; 00728 protected $FilterFuncs; 00729 protected $DefaultSearchLogic = self::LOGIC_AND; 00730 protected $StemmingEnabled = TRUE; 00731 protected $SynonymsEnabled = TRUE; 00732 00733 private $WordCountAdded; 00734 private $FieldIds; 00735 private $FieldInfo; 00736 private $RequiredTermCount; 00737 private $RequiredTermCounts; 00738 private $InclusiveTermCount; 00739 private $ExcludedTermCount; 00740 private $SearchTermList; 00741 00742 const STEM_ID_OFFSET = 1000000; 00743 00744 00745 # ---- common private functions (used in both searching and DB build) 00746 00747 # normalize and parse search string into list of search terms 00748 private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE) 00749 { 00750 # strip off any surrounding whitespace 00751 $Text = trim($SearchString); 00752 00753 # set up normalization replacement strings 00754 $Patterns = array( 00755 "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals 00756 "/'/", # get rid of single quotes / apostrophes 00757 "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) " 00758 "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!) 00759 "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space 00760 "/([^\\s])-+/i", # convert minus preceded by anything but whitespace to a space 00761 "/([^\\s])\\++/i", # convert plus preceded by anything but whitespace to a space 00762 "/-\\s/i", # convert minus followed by whitespace to a space 00763 "/\\+\\s/i", # convert plus followed by whitespace to a space 00764 "/~\\s/i", # convert tilde followed by whitespace to a space 00765 "/[ ]+/" # convert multiple spaces to one space 00766 ); 00767 $Replacements = array( 00768 " ", 00769 "", 00770 " ", 00771 " ", 00772 "\\1 ", 00773 "\\1 ", 00774 " ", 00775 " ", 00776 " ", 00777 " ", 00778 " " 00779 ); 00780 00781 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens) 00782 if ($IgnorePhrases) 00783 { 00784 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!) 00785 $Patterns[2] = "/\"/"; 00786 00787 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!) 00788 $Patterns[3] = "/[\(\)]+/"; 00789 } 00790 00791 # remove punctuation from text and normalize whitespace 00792 $Text = preg_replace($Patterns, $Replacements, $Text); 00793 $this->DMsg(2, "Normalized search string is '".$Text."'"); 00794 00795 # convert text to lower case 00796 $Text = strtolower($Text); 00797 00798 # strip off any extraneous whitespace 00799 $Text = trim($Text); 00800 00801 # start with an empty array 00802 $Words = array(); 00803 00804 # if we have no words left after parsing 00805 if (strlen($Text) != 0) 00806 { 00807 # for each word 00808 foreach (explode(" ", $Text) as $Word) 00809 { 00810 # grab first character of word 00811 $FirstChar = substr($Word, 0, 1); 00812 00813 # strip off option characters and set flags appropriately 00814 $Flags = WORD_PRESENT; 00815 if ($FirstChar == "-") 00816 { 00817 $Word = substr($Word, 1); 00818 $Flags |= WORD_EXCLUDED; 00819 if (!isset($Words[$Word])) 00820 { 00821 $this->ExcludedTermCount++; 00822 } 00823 } 00824 else 00825 { 00826 if ($FirstChar == "~") 00827 { 00828 $Word = substr($Word, 1); 00829 } 00830 elseif (($this->DefaultSearchLogic == self::LOGIC_AND) 00831 || ($FirstChar == "+")) 00832 { 00833 if ($FirstChar == "+") 00834 { 00835 $Word = substr($Word, 1); 00836 } 00837 $Flags |= WORD_REQUIRED; 00838 if (!isset($Words[$Word])) 00839 { 00840 $this->RequiredTermCount++; 00841 } 00842 } 00843 if (!isset($Words[$Word])) 00844 { 00845 $this->InclusiveTermCount++; 00846 $this->SearchTermList[] = $Word; 00847 } 00848 } 00849 00850 # store flags to indicate word found 00851 $Words[$Word] = $Flags; 00852 $this->DMsg(3, "Word identified (".$Word.")"); 00853 } 00854 } 00855 00856 # return normalized words to caller 00857 return $Words; 00858 } 00859 00860 protected function GetFieldId($FieldName) 00861 { 00862 # if field ID is not in cache 00863 if (!isset($this->FieldIds[$FieldName])) 00864 { 00865 # look up field info in database 00866 $this->DB->Query("SELECT FieldId FROM SearchFields " 00867 ."WHERE FieldName = '".addslashes($FieldName)."'"); 00868 00869 # if field was found 00870 if ($Record = $this->DB->FetchRow()) 00871 { 00872 # load info from DB record 00873 $FieldId = $Record["FieldId"]; 00874 } 00875 else 00876 { 00877 # add field to database 00878 $this->DB->Query("INSERT INTO SearchFields (FieldName) " 00879 ."VALUES ('".addslashes($FieldName)."')"); 00880 00881 # retrieve ID for newly added field 00882 $FieldId = $this->DB->LastInsertId("SearchFields"); 00883 } 00884 00885 # cache field info 00886 $this->FieldIds[$FieldName] = $FieldId; 00887 } 00888 00889 # return cached ID to caller 00890 return $this->FieldIds[$FieldName]; 00891 } 00892 00893 # retrieve ID for specified word (returns NULL if no ID found) 00894 private function GetWordId($Word, $AddIfNotFound = FALSE) 00895 { 00896 static $WordIdCache; 00897 00898 # if word was in ID cache 00899 if (isset($WordIdCache[$Word])) 00900 { 00901 # use ID from cache 00902 $WordId = $WordIdCache[$Word]; 00903 } 00904 else 00905 { 00906 # look up ID in database 00907 $WordId = $this->DB->Query("SELECT WordId" 00908 ." FROM SearchWords" 00909 ." WHERE WordText='".addslashes($Word)."'", 00910 "WordId"); 00911 00912 # if ID was not found and caller requested it be added 00913 if (($WordId === NULL) && $AddIfNotFound) 00914 { 00915 # add word to database 00916 $this->DB->Query("INSERT INTO SearchWords (WordText)" 00917 ." VALUES ('".addslashes(strtolower($Word))."')"); 00918 00919 # get ID for newly added word 00920 $WordId = $this->DB->LastInsertId("SearchWords"); 00921 } 00922 00923 # save ID to cache 00924 $WordIdCache[$Word] = $WordId; 00925 } 00926 00927 # return ID to caller 00928 return $WordId; 00929 } 00930 00931 # retrieve ID for specified word stem (returns NULL if no ID found) 00932 private function GetStemId($Stem, $AddIfNotFound = FALSE) 00933 { 00934 static $StemIdCache; 00935 00936 # if stem was in ID cache 00937 if (isset($StemIdCache[$Stem])) 00938 { 00939 # use ID from cache 00940 $StemId = $StemIdCache[$Stem]; 00941 } 00942 else 00943 { 00944 # look up ID in database 00945 $StemId = $this->DB->Query("SELECT WordId" 00946 ." FROM SearchStems" 00947 ." WHERE WordText='".addslashes($Stem)."'", 00948 "WordId"); 00949 00950 # if ID was not found and caller requested it be added 00951 if (($StemId === NULL) && $AddIfNotFound) 00952 { 00953 # add stem to database 00954 $this->DB->Query("INSERT INTO SearchStems (WordText)" 00955 ." VALUES ('".addslashes(strtolower($Stem))."')"); 00956 00957 # get ID for newly added stem 00958 $StemId = $this->DB->LastInsertId("SearchStems"); 00959 } 00960 00961 # adjust from DB ID value to stem ID value 00962 $StemId += self::STEM_ID_OFFSET; 00963 00964 # save ID to cache 00965 $StemIdCache[$Stem] = $StemId; 00966 } 00967 00968 # return ID to caller 00969 return $StemId; 00970 } 00971 00972 # retrieve word for specified word ID (returns FALSE if no word found) 00973 private function GetWord($WordId) 00974 { 00975 static $WordCache; 00976 00977 # if word was in cache 00978 if (isset($WordCache[$WordId])) 00979 { 00980 # use word from cache 00981 $Word = $WordCache[$WordId]; 00982 } 00983 else 00984 { 00985 # adjust search location and word ID if word is stem 00986 $TableName = "SearchWords"; 00987 if ($WordId >= self::STEM_ID_OFFSET) 00988 { 00989 $TableName = "SearchStems"; 00990 $WordId -= self::STEM_ID_OFFSET; 00991 } 00992 00993 # look up word in database 00994 $Word = $this->DB->Query("SELECT WordText" 00995 ." FROM ".$TableName 00996 ." WHERE WordId='".$WordId."'", 00997 "WordText"); 00998 00999 # save word to cache 01000 $WordCache[$WordId] = $Word; 01001 } 01002 01003 # return word to caller 01004 return $Word; 01005 } 01006 01007 01008 # ---- private functions used in searching 01009 01010 # perform search across multiple fields and return raw results to caller 01011 private function SearchAcrossFields($SearchStrings) 01012 { 01013 # start by assuming no search will be done 01014 $Scores = NULL; 01015 01016 # clear word counts 01017 $this->InclusiveTermCount = 0; 01018 $this->RequiredTermCount = 0; 01019 $this->ExcludedTermCount = 0; 01020 01021 # for each field 01022 $NeedComparisonSearch = FALSE; 01023 foreach ($SearchStrings as $FieldName => $SearchStringArray) 01024 { 01025 # convert search string to array if needed 01026 if (!is_array($SearchStringArray)) 01027 { 01028 $SearchStringArray = array($SearchStringArray); 01029 } 01030 01031 # for each search string for this field 01032 foreach ($SearchStringArray as $SearchString) 01033 { 01034 # if field is keyword or field is text and does not look like comparison match 01035 if (($FieldName == "XXXKeywordXXX") 01036 || (isset($this->FieldInfo[$FieldName]) 01037 && ($this->FieldInfo[$FieldName]["FieldType"] == self::FIELDTYPE_TEXT) 01038 && !preg_match("/^[><!]=./", $SearchString) 01039 && !preg_match("/^[><=]./", $SearchString))) 01040 { 01041 $this->DMsg(0, "Searching text field \"" 01042 .$FieldName."\" for string \"$SearchString\""); 01043 01044 # normalize text and split into words 01045 $Words[$FieldName] = 01046 $this->ParseSearchStringForWords($SearchString); 01047 01048 # calculate scores for matching items 01049 if (count($Words[$FieldName])) 01050 { 01051 $Scores = $this->SearchForWords( 01052 $Words[$FieldName], $FieldName, $Scores); 01053 $this->DMsg(3, "Have " 01054 .count($Scores)." results after word search"); 01055 } 01056 01057 # split into phrases 01058 $Phrases[$FieldName] = 01059 $this->ParseSearchStringForPhrases($SearchString); 01060 01061 # handle any phrases 01062 if (count($Phrases[$FieldName])) 01063 { 01064 $Scores = $this->SearchForPhrases( 01065 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE); 01066 $this->DMsg(3, "Have " 01067 .count($Scores)." results after phrase search"); 01068 } 01069 } 01070 else 01071 { 01072 # set flag to indicate possible comparison search candidate found 01073 $NeedComparisonSearch = TRUE; 01074 } 01075 } 01076 } 01077 01078 # perform comparison searches 01079 if ($NeedComparisonSearch) 01080 { 01081 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores); 01082 $this->DMsg(3, "Have ".count($Scores)." results after comparison search"); 01083 } 01084 01085 # if no results found and exclusions specified 01086 if (!count($Scores) && $this->ExcludedTermCount) 01087 { 01088 # load all records 01089 $Scores = $this->LoadScoresForAllRecords(); 01090 } 01091 01092 # if search results found 01093 if (count($Scores)) 01094 { 01095 # for each search text string 01096 foreach ($SearchStrings as $FieldName => $SearchStringArray) 01097 { 01098 # convert search string to array if needed 01099 if (!is_array($SearchStringArray)) 01100 { 01101 $SearchStringArray = array($SearchStringArray); 01102 } 01103 01104 # for each search string for this field 01105 foreach ($SearchStringArray as $SearchString) 01106 { 01107 # if field is text 01108 if (($FieldName == "XXXKeywordXXX") 01109 || (isset($this->FieldInfo[$FieldName]) 01110 && ($this->FieldInfo[$FieldName]["FieldType"] 01111 == self::FIELDTYPE_TEXT))) 01112 { 01113 # if there are words in search text 01114 if (isset($Words[$FieldName])) 01115 { 01116 # handle any excluded words 01117 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName); 01118 } 01119 01120 # handle any excluded phrases 01121 if (isset($Phrases[$FieldName])) 01122 { 01123 $Scores = $this->SearchForPhrases( 01124 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE); 01125 } 01126 } 01127 } 01128 } 01129 01130 # strip off any results that don't contain required words 01131 $Scores = $this->FilterOnRequiredWords($Scores); 01132 } 01133 01134 # return search result scores to caller 01135 return $Scores; 01136 } 01137 01138 # search for words in specified field 01139 private function SearchForWords( 01140 $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL) 01141 { 01142 $DB = $this->DB; 01143 01144 # start with empty search result scores list if none passed in 01145 if ($Scores == NULL) 01146 { 01147 $Scores = array(); 01148 } 01149 01150 # grab field ID 01151 $FieldId = $this->GetFieldId($FieldName); 01152 01153 # for each word 01154 foreach ($Words as $Word => $Flags) 01155 { 01156 $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName); 01157 01158 # if word is not excluded 01159 if (!($Flags & WORD_EXCLUDED)) 01160 { 01161 # look up record ID for word 01162 $this->DMsg(2, "Looking up word \"".$Word."\""); 01163 $WordId = $this->GetWordId($Word); 01164 01165 # if word is in DB 01166 if ($WordId !== NULL) 01167 { 01168 # look up counts for word 01169 $DB->Query("SELECT ItemId,Count FROM SearchWordCounts " 01170 ."WHERE WordId = ".$WordId 01171 ." AND FieldId = ".$FieldId); 01172 $Counts = $DB->FetchColumn("Count", "ItemId"); 01173 01174 # if synonym support is enabled 01175 if ($this->SynonymsEnabled) 01176 { 01177 # look for any synonyms 01178 $DB->Query("SELECT WordIdA, WordIdB" 01179 ." FROM SearchWordSynonyms" 01180 ." WHERE WordIdA = ".$WordId 01181 ." OR WordIdB = ".$WordId); 01182 01183 # if synonyms were found 01184 if ($DB->NumRowsSelected()) 01185 { 01186 # retrieve synonym IDs 01187 $SynonymIds = array(); 01188 while ($Record = $DB->FetchRow()) 01189 { 01190 $SynonymIds[] = ($Record["WordIdA"] == $WordId) 01191 ? $Record["WordIdB"] 01192 : $Record["WordIdA"]; 01193 } 01194 01195 # for each synonym 01196 foreach ($SynonymIds as $SynonymId) 01197 { 01198 # retrieve counts for synonym 01199 $DB->Query("SELECT ItemId,Count" 01200 ." FROM SearchWordCounts" 01201 ." WHERE WordId = ".$SynonymId 01202 ." AND FieldId = ".$FieldId); 01203 $SynonymCounts = $DB->FetchColumn("Count", "ItemId"); 01204 01205 # for each count 01206 foreach ($SynonymCounts as $ItemId => $Count) 01207 { 01208 # adjust count because it's a synonym 01209 $AdjustedCount = ceil($Count / 2); 01210 01211 # add count to existing counts 01212 if (isset($Counts[$ItemId])) 01213 { 01214 $Counts[$ItemId] += $AdjustedCount; 01215 } 01216 else 01217 { 01218 $Counts[$ItemId] = $AdjustedCount; 01219 } 01220 } 01221 } 01222 } 01223 } 01224 } 01225 01226 # if stemming is enabled 01227 if ($this->StemmingEnabled) 01228 { 01229 # retrieve stem ID 01230 $Stem = PorterStemmer::Stem($Word); 01231 $this->DMsg(2, "Looking up stem \"".$Stem."\""); 01232 $StemId = $this->GetStemId($Stem); 01233 01234 # if ID found for stem 01235 if ($StemId !== NULL) 01236 { 01237 # retrieve counts for stem 01238 $DB->Query("SELECT ItemId,Count" 01239 ." FROM SearchWordCounts" 01240 ." WHERE WordId = ".$StemId 01241 ." AND FieldId = ".$FieldId); 01242 $StemCounts = $DB->FetchColumn("Count", "ItemId"); 01243 01244 # for each count 01245 foreach ($StemCounts as $ItemId => $Count) 01246 { 01247 # adjust count because it's a stem 01248 $AdjustedCount = ceil($Count / 2); 01249 01250 # add count to existing counts 01251 if (isset($Counts[$ItemId])) 01252 { 01253 $Counts[$ItemId] += $AdjustedCount; 01254 } 01255 else 01256 { 01257 $Counts[$ItemId] = $AdjustedCount; 01258 } 01259 } 01260 } 01261 } 01262 01263 # if counts were found 01264 if (isset($Counts)) 01265 { 01266 # for each count 01267 foreach ($Counts as $ItemId => $Count) 01268 { 01269 # if word flagged as required 01270 if ($Flags & WORD_REQUIRED) 01271 { 01272 # increment required word count for record 01273 if (isset($this->RequiredTermCounts[$ItemId])) 01274 { 01275 $this->RequiredTermCounts[$ItemId]++; 01276 } 01277 else 01278 { 01279 $this->RequiredTermCounts[$ItemId] = 1; 01280 } 01281 } 01282 01283 # add to item record score 01284 if (isset($Scores[$ItemId])) 01285 { 01286 $Scores[$ItemId] += $Count; 01287 } 01288 else 01289 { 01290 $Scores[$ItemId] = $Count; 01291 } 01292 } 01293 } 01294 } 01295 } 01296 01297 # return basic scores to caller 01298 return $Scores; 01299 } 01300 01301 # extract phrases (terms surrounded by quotes) from search string 01302 private function ParseSearchStringForPhrases($SearchString) 01303 { 01304 # split into chunks delimited by double quote marks 01305 $Pieces = explode("\"", $SearchString); # " 01306 01307 # for each pair of chunks 01308 $Index = 2; 01309 $Phrases = array(); 01310 while ($Index < count($Pieces)) 01311 { 01312 # grab phrase from chunk 01313 $Phrase = trim(addslashes($Pieces[$Index - 1])); 01314 $Flags = WORD_PRESENT; 01315 01316 # grab first character of phrase 01317 $FirstChar = substr($Pieces[$Index - 2], -1); 01318 01319 # set flags to reflect any option characters 01320 if ($FirstChar == "-") 01321 { 01322 $Flags |= WORD_EXCLUDED; 01323 if (!isset($Phrases[$Phrase])) 01324 { 01325 $this->ExcludedTermCount++; 01326 } 01327 } 01328 else 01329 { 01330 if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~")) 01331 || ($FirstChar == "+")) 01332 { 01333 $Flags |= WORD_REQUIRED; 01334 if (!isset($Phrases[$Phrase])) 01335 { 01336 $this->RequiredTermCount++; 01337 } 01338 } 01339 if (!isset($Phrases[$Phrase])) 01340 { 01341 $this->InclusiveTermCount++; 01342 $this->SearchTermList[] = $Phrase; 01343 } 01344 } 01345 $Phrases[$Phrase] = $Flags; 01346 01347 # move to next pair of chunks 01348 $Index += 2; 01349 } 01350 01351 # return phrases to caller 01352 return $Phrases; 01353 } 01354 01355 # extract groups (terms surrounded by parens) from search string 01356 # (NOTE: NOT YET IMPLEMENTED!!!) 01357 private function ParseSearchStringForGroups($SearchString) 01358 { 01359 # split into chunks delimited by open paren 01360 $Pieces = explode("(", $SearchString); 01361 01362 # for each chunk 01363 $Index = 2; 01364 while ($Index < count($Pieces)) 01365 { 01366 # grab phrase from chunk 01367 $Group = trim(addslashes($Pieces[$Index - 1])); 01368 $Groups[] = $Group; 01369 01370 # move to next pair of chunks 01371 $Index += 2; 01372 } 01373 01374 # return phrases to caller 01375 return $Groups; 01376 } 01377 01378 protected function SearchFieldForPhrases($FieldName, $Phrase) 01379 { 01380 # error out 01381 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n"); 01382 } 01383 01384 private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX", 01385 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE) 01386 { 01387 # if phrases are found 01388 if (count($Phrases) > 0) 01389 { 01390 # if this is a keyword search 01391 if ($FieldName == "XXXKeywordXXX") 01392 { 01393 # for each field 01394 foreach ($this->FieldInfo as $KFieldName => $Info) 01395 { 01396 # if field is marked to be included in keyword searches 01397 if ($Info["InKeywordSearch"]) 01398 { 01399 # call ourself with that field 01400 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName, 01401 $ProcessNonExcluded, $ProcessExcluded); 01402 } 01403 } 01404 } 01405 else 01406 { 01407 # for each phrase 01408 foreach ($Phrases as $Phrase => $Flags) 01409 { 01410 $this->DMsg(2, "Searching for phrase '".$Phrase 01411 ."' in field ".$FieldName); 01412 01413 # if phrase flagged as excluded and we are doing excluded phrases 01414 # or phrase flagged as non-excluded and we are doing non-excluded phrases 01415 if (($ProcessExcluded && ($Flags & WORD_EXCLUDED)) 01416 || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED))) 01417 { 01418 # initialize score list if necessary 01419 if ($Scores === NULL) { $Scores = array(); } 01420 01421 # retrieve list of items that contain phrase 01422 $ItemIds = $this->SearchFieldForPhrases( 01423 $FieldName, $Phrase); 01424 01425 # for each item that contains phrase 01426 foreach ($ItemIds as $ItemId) 01427 { 01428 # if we are doing excluded phrases and phrase flagged as excluded 01429 if ($ProcessExcluded && ($Flags & WORD_EXCLUDED)) 01430 { 01431 # knock item off of list 01432 unset($Scores[$ItemId]); 01433 } 01434 elseif ($ProcessNonExcluded) 01435 { 01436 # calculate phrase value based on number of words and field weight 01437 $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY)) 01438 * $this->FieldInfo[$FieldName]["Weight"]; 01439 $this->DMsg(2, "Phrase score is ".$PhraseScore); 01440 01441 # bump up item record score 01442 if (isset($Scores[$ItemId])) 01443 { 01444 $Scores[$ItemId] += $PhraseScore; 01445 } 01446 else 01447 { 01448 $Scores[$ItemId] = $PhraseScore; 01449 } 01450 01451 # if phrase flagged as required 01452 if ($Flags & WORD_REQUIRED) 01453 { 01454 # increment required word count for record 01455 if (isset($this->RequiredTermCounts[$ItemId])) 01456 { 01457 $this->RequiredTermCounts[$ItemId]++; 01458 } 01459 else 01460 { 01461 $this->RequiredTermCounts[$ItemId] = 1; 01462 } 01463 } 01464 } 01465 } 01466 } 01467 } 01468 } 01469 } 01470 01471 # return updated scores to caller 01472 return $Scores; 01473 } 01474 01475 private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX") 01476 { 01477 $DB = $this->DB; 01478 01479 # grab field ID 01480 $FieldId = $this->GetFieldId($FieldName); 01481 01482 # for each word 01483 foreach ($Words as $Word => $Flags) 01484 { 01485 # if word flagged as excluded 01486 if ($Flags & WORD_EXCLUDED) 01487 { 01488 # look up record ID for word 01489 $WordId = $this->GetWordId($Word); 01490 01491 # if word is in DB 01492 if ($WordId !== NULL) 01493 { 01494 # look up counts for word 01495 $DB->Query("SELECT ItemId FROM SearchWordCounts " 01496 ."WHERE WordId=${WordId} AND FieldId=${FieldId}"); 01497 01498 # for each count 01499 while ($Record = $DB->FetchRow()) 01500 { 01501 # if item record is in score list 01502 $ItemId = $Record["ItemId"]; 01503 if (isset($Scores[$ItemId])) 01504 { 01505 # remove item record from score list 01506 $this->DMsg(3, "Filtering out item ".$ItemId 01507 ." because it contained word \"".$Word."\""); 01508 unset($Scores[$ItemId]); 01509 } 01510 } 01511 } 01512 } 01513 } 01514 01515 # returned filtered score list to caller 01516 return $Scores; 01517 } 01518 01519 private function FilterOnRequiredWords($Scores) 01520 { 01521 # if there were required words 01522 if ($this->RequiredTermCount > 0) 01523 { 01524 # for each item 01525 foreach ($Scores as $ItemId => $Score) 01526 { 01527 # if item does not meet required word count 01528 if (!isset($this->RequiredTermCounts[$ItemId]) 01529 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount)) 01530 { 01531 # filter out item 01532 $this->DMsg(4, "Filtering out item ".$ItemId 01533 ." because it didn't have required word count of " 01534 .$this->RequiredTermCount 01535 .(isset($this->RequiredTermCounts[$ItemId]) 01536 ? " (only had " 01537 .$this->RequiredTermCounts[$ItemId] 01538 : " (had none") 01539 .")"); 01540 unset($Scores[$ItemId]); 01541 } 01542 } 01543 } 01544 01545 # return filtered list to caller 01546 return $Scores; 01547 } 01548 01549 # count, sort, and trim search result scores list 01550 private function CleanScores($Scores, $StartingResult, $NumberOfResults, 01551 $SortByField, $SortDescending) 01552 { 01553 # perform any requested filtering 01554 $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks"); 01555 $Scores = $this->FilterOnSuppliedFunctions($Scores); 01556 01557 # save total number of results available 01558 $this->NumberOfResultsAvailable = count($Scores); 01559 01560 # if no sorting field specified 01561 if ($SortByField === NULL) 01562 { 01563 # sort result list by score 01564 if ($SortDescending) 01565 arsort($Scores, SORT_NUMERIC); 01566 else 01567 asort($Scores, SORT_NUMERIC); 01568 } 01569 else 01570 { 01571 # get list of item IDs in sorted order 01572 $SortedIds = $this->GetItemIdsSortedByField( 01573 $SortByField, $SortDescending); 01574 01575 # if we have sorted item IDs 01576 if (count($SortedIds) && count($Scores)) 01577 { 01578 # strip sorted ID list down to those that appear in search results 01579 $SortedIds = array_intersect($SortedIds, array_keys($Scores)); 01580 01581 # rebuild score list in sorted order 01582 foreach ($SortedIds as $Id) 01583 { 01584 $NewScores[$Id] = $Scores[$Id]; 01585 } 01586 $Scores = $NewScores; 01587 } 01588 else 01589 { 01590 # sort result list by score 01591 arsort($Scores, SORT_NUMERIC); 01592 } 01593 } 01594 01595 # trim result list to match range requested by caller 01596 $ScoresKeys = array_slice( 01597 array_keys($Scores), $StartingResult, $NumberOfResults); 01598 $TrimmedScores = array(); 01599 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; } 01600 01601 # returned cleaned search result scores list to caller 01602 return $TrimmedScores; 01603 } 01604 01605 protected function FilterOnSuppliedFunctions($Scores) 01606 { 01607 # if filter functions have been set 01608 if (isset($this->FilterFuncs)) 01609 { 01610 # for each result 01611 foreach ($Scores as $ItemId => $Score) 01612 { 01613 # for each filter function 01614 foreach ($this->FilterFuncs as $FuncName) 01615 { 01616 # if filter function return TRUE for item 01617 if ($FuncName($ItemId)) 01618 { 01619 # discard result 01620 $this->DMsg(2, "Filter callback <i>".$FuncName 01621 ."</i> rejected item ".$ItemId); 01622 unset($Scores[$ItemId]); 01623 01624 # bail out of filter func loop 01625 continue 2; 01626 } 01627 } 01628 } 01629 } 01630 01631 # return filtered list to caller 01632 return $Scores; 01633 } 01634 01635 private function SearchForComparisonMatches($SearchStrings, $Scores) 01636 { 01637 # for each field 01638 $Index = 0; 01639 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray) 01640 { 01641 # if field is not keyword 01642 if ($SearchFieldName != "XXXKeywordXXX") 01643 { 01644 # convert search string to array if needed 01645 if (!is_array($SearchStringArray)) 01646 { 01647 $SearchStringArray = array($SearchStringArray); 01648 } 01649 01650 # for each search string for this field 01651 foreach ($SearchStringArray as $SearchString) 01652 { 01653 # if search string looks like comparison search 01654 $FoundOperator = preg_match("/^[><!]=./", $SearchString) 01655 || preg_match("/^[><=]./", $SearchString); 01656 if ($FoundOperator 01657 || (isset($this->FieldInfo[$SearchFieldName]["FieldType"]) 01658 && ($this->FieldInfo[$SearchFieldName]["FieldType"] 01659 != self::FIELDTYPE_TEXT))) 01660 { 01661 # determine value 01662 $Patterns = array("/^[><!]=/", "/^[><=]/"); 01663 $Replacements = array("", ""); 01664 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString)); 01665 01666 # determine and save operator 01667 if (!$FoundOperator) 01668 { 01669 $Operators[$Index] = "="; 01670 } 01671 else 01672 { 01673 $Term = trim($SearchString); 01674 $FirstChar = $Term{0}; 01675 $FirstTwoChars = $FirstChar.$Term{1}; 01676 if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; } 01677 elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; } 01678 elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; } 01679 elseif ($FirstChar == ">") { $Operators[$Index] = ">"; } 01680 elseif ($FirstChar == "<") { $Operators[$Index] = "<"; } 01681 elseif ($FirstChar == "=") { $Operators[$Index] = "="; } 01682 } 01683 01684 # if operator was found 01685 if (isset($Operators[$Index])) 01686 { 01687 # save value 01688 $Values[$Index] = $Value; 01689 01690 # save field name 01691 $FieldNames[$Index] = $SearchFieldName; 01692 $this->DMsg(3, "Added comparison (field = <i>" 01693 .$FieldNames[$Index]."</i> op = <i>" 01694 .$Operators[$Index]."</i> val = <i>" 01695 .$Values[$Index]."</i>)"); 01696 01697 # move to next comparison array entry 01698 $Index++; 01699 } 01700 } 01701 } 01702 } 01703 } 01704 01705 # if comparisons found 01706 if (isset($Operators)) 01707 { 01708 # perform comparisons on fields and gather results 01709 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values); 01710 01711 # if search logic is set to AND 01712 if ($this->DefaultSearchLogic == self::LOGIC_AND) 01713 { 01714 # if results were found 01715 if (count($Results)) 01716 { 01717 # if there were no prior results and no terms for keyword search 01718 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0)) 01719 { 01720 # add all results to scores 01721 foreach ($Results as $ItemId) 01722 { 01723 $Scores[$ItemId] = 1; 01724 } 01725 } 01726 else 01727 { 01728 # remove anything from scores that is not part of results 01729 foreach ($Scores as $ItemId => $Score) 01730 { 01731 if (in_array($ItemId, $Results) == FALSE) 01732 { 01733 unset($Scores[$ItemId]); 01734 } 01735 } 01736 } 01737 } 01738 else 01739 { 01740 # clear scores 01741 $Scores = array(); 01742 } 01743 } 01744 else 01745 { 01746 # add result items to scores 01747 if ($Scores === NULL) { $Scores = array(); } 01748 foreach ($Results as $ItemId) 01749 { 01750 if (isset($Scores[$ItemId])) 01751 { 01752 $Scores[$ItemId] += 1; 01753 } 01754 else 01755 { 01756 $Scores[$ItemId] = 1; 01757 } 01758 } 01759 } 01760 } 01761 01762 # return results to caller 01763 return $Scores; 01764 } 01765 01766 private function SetDebugLevel($SearchStrings) 01767 { 01768 # if search info is an array 01769 if (is_array($SearchStrings)) 01770 { 01771 # for each array element 01772 foreach ($SearchStrings as $FieldName => $SearchStringArray) 01773 { 01774 # if element is an array 01775 if (is_array($SearchStringArray)) 01776 { 01777 # for each array element 01778 foreach ($SearchStringArray as $Index => $SearchString) 01779 { 01780 # pull out search string if present 01781 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString); 01782 } 01783 } 01784 else 01785 { 01786 # pull out search string if present 01787 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray); 01788 } 01789 } 01790 } 01791 else 01792 { 01793 # pull out search string if present 01794 $SearchStrings = $this->ExtractDebugLevel($SearchStrings); 01795 } 01796 01797 # return new search info to caller 01798 return $SearchStrings; 01799 } 01800 01801 private function ExtractDebugLevel($SearchString) 01802 { 01803 # if search string contains debug level indicator 01804 if (strstr($SearchString, "DBUGLVL=")) 01805 { 01806 # remove indicator and set debug level 01807 $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString); 01808 if ($Level > 0) 01809 { 01810 $this->DebugLevel = $Level; 01811 $this->DMsg(0, "Setting debug level to ".$Level); 01812 $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString); 01813 } 01814 } 01815 01816 # return (possibly) modified search string to caller 01817 return $SearchString; 01818 } 01819 01820 # load and return search result scores array containing all possible records 01821 private function LoadScoresForAllRecords() 01822 { 01823 # start with empty list 01824 $Scores = array(); 01825 01826 # for every item 01827 $this->DB->Query("SELECT ".$this->ItemIdFieldName 01828 ." FROM ".$this->ItemTableName); 01829 while ($Record = $this->DB->FetchRow()) 01830 { 01831 # set score for item to 1 01832 $Scores[$Record[$this->ItemIdFieldName]] = 1; 01833 } 01834 01835 # return array with all scores to caller 01836 return $Scores; 01837 } 01838 01839 01840 # ---- private functions used in building search database 01841 01849 private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1) 01850 { 01851 # retrieve ID for word 01852 $WordIds[] = $this->GetWordId($Word, TRUE); 01853 01854 # if stemming is enabled 01855 if ($this->StemmingEnabled) 01856 { 01857 # retrieve ID for stem of word 01858 $Stem = PorterStemmer::Stem($Word, TRUE); 01859 $WordIds[] = $this->GetStemId($Stem, TRUE); 01860 } 01861 01862 # for word and stem of word 01863 foreach ($WordIds as $WordId) 01864 { 01865 # if word count already added to database 01866 if (isset($this->WordCountAdded[$WordId][$FieldId])) 01867 { 01868 # update word count 01869 $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight 01870 ." WHERE WordId=".$WordId 01871 ." AND ItemId=".$ItemId 01872 ." AND FieldId=".$FieldId); 01873 } 01874 else 01875 { 01876 # add word count to DB 01877 $this->DB->Query("INSERT INTO SearchWordCounts" 01878 ." (WordId, ItemId, FieldId, Count) VALUES" 01879 ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")"); 01880 01881 # remember that we added count for this word 01882 $this->WordCountAdded[$WordId][$FieldId] = TRUE; 01883 } 01884 01885 # decrease weight for stem 01886 $Weight = ceil($Weight / 2); 01887 } 01888 } 01889 01890 protected function GetFieldContent($ItemId, $FieldName) 01891 { 01892 # error out 01893 exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n"); 01894 } 01895 01896 private function RecordSearchInfoForText( 01897 $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword) 01898 { 01899 # normalize text 01900 $Words = $this->ParseSearchStringForWords($Text, TRUE); 01901 01902 # if there was text left after parsing 01903 if (count($Words) > 0) 01904 { 01905 # get ID for field 01906 $FieldId = $this->GetFieldId($FieldName); 01907 01908 # if text should be included in keyword searches 01909 if ($IncludeInKeyword) 01910 { 01911 # get ID for keyword field 01912 $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX"); 01913 } 01914 01915 # for each word 01916 foreach ($Words as $Word => $Flags) 01917 { 01918 # update count for word 01919 $this->UpdateWordCount($Word, $ItemId, $FieldId); 01920 01921 # if text should be included in keyword searches 01922 if ($IncludeInKeyword) 01923 { 01924 # update keyword field count for word 01925 $this->UpdateWordCount( 01926 $Word, $ItemId, $KeywordFieldId, $Weight); 01927 } 01928 } 01929 } 01930 } 01931 01932 # print debug message if level set high enough 01933 protected function DMsg($Level, $Msg) 01934 { 01935 if ($this->DebugLevel > $Level) 01936 { 01937 print("SE: ".$Msg."<br>\n"); 01938 } 01939 } 01940 01941 # ---- BACKWARD COMPATIBILITY -------------------------------------------- 01942 01943 # possible types of logical operators 01944 const SEARCHLOGIC_AND = 1; 01945 const SEARCHLOGIC_OR = 2; 01946 } 01947 01948 ?>