00001 <?PHP 00002 00003 # 00004 # FILE: Scout--SearchEngine.php 00005 # 00006 # FUNCTIONS PROVIDED: 00007 # SearchEngine->SearchEngine() 00008 # - constructor 00009 # SearchEngine->Search($SearchString, 00010 # $StartingResult = 0, $NumberOfResults = 10) 00011 # - search for text and return list of matching item IDs 00012 # SearchEngine->FieldedSearch($SearchStrings, 00013 # $StartingResult = 0, $NumberOfResults = 10) 00014 # - search for text in specific fields and return item ID list 00015 # SearchEngine->NumberOfResults() 00016 # - return number of results found in last search 00017 # SearchEngine->SearchTime() 00018 # - return time in seconds that last search took 00019 # SearchEngine->AddResultFilterFunction($FunctionName) 00020 # - add function that will be used to filter search results 00021 # SearchEngine->UpdateForItem($ItemId) 00022 # - update search info for specified item 00023 # SearchEngine->UpdateForItems($StartingItemId, $NumberOfItems) 00024 # - update search info for all items in specified range (returns 00025 # ID of last item updated) 00026 # 00027 # AUTHOR: Edward Almasy 00028 # 00029 # Open Source Metadata Archive Search Engine (OSMASE) 00030 # Copyright 2002-2006 Internet Scout Project 00031 # http://scout.wisc.edu 00032 # 00033 00034 class SearchEngine { 00035 00036 # ---- PUBLIC INTERFACE -------------------------------------------------- 00037 00038 # possible types of logical operators 00039 const SEARCHLOGIC_AND = 1; 00040 const SEARCHLOGIC_OR = 2; 00041 00042 # flags used for indicating field types 00043 const SEARCHFIELD_TEXT = 1; 00044 const SEARCHFIELD_NUMERIC = 2; 00045 const SEARCHFIELD_DATE = 3; 00046 const SEARCHFIELD_DATERANGE = 4; 00047 00048 # object constructor 00049 function SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName) 00050 { 00051 # save database object for our use 00052 $this->DB = $DB; 00053 00054 # save item access parameters 00055 $this->ItemTableName = $ItemTableName; 00056 $this->ItemIdFieldName = $ItemIdFieldName; 00057 00058 # initialize internal values 00059 $this->DefaultSearchLogic = SEARCHLOGIC_AND; 00060 00061 # define flags used for indicating word states 00062 if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); } 00063 if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); } 00064 if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); } 00065 00066 # set default debug state 00067 $this->DebugLevel = 0; 00068 } 00069 00070 # add field to be searched 00071 function AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch) 00072 { 00073 # save values 00074 $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName; 00075 $this->FieldInfo[$FieldName]["FieldType"] = $FieldType; 00076 $this->FieldInfo[$FieldName]["Weight"] = $Weight; 00077 $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch; 00078 } 00079 00080 # retrieve info about tables and fields (useful for child objects) 00081 function ItemTableName() { return $this->ItemTableName; } 00082 function ItemIdFieldName() { return $this->ItemIdFieldName; } 00083 function DBFieldName($FieldName) { return $this->FieldInfo[$FieldName]["DBFieldName"]; } 00084 function FieldType($FieldName) { return $this->FieldInfo[$FieldName]["FieldType"]; } 00085 function FieldWeight($FieldName) { return $this->FieldInfo[$FieldName]["Weight"]; } 00086 function FieldInKeywordSearch($FieldName) { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; } 00087 00088 # set debug level 00089 function DebugLevel($Setting) 00090 { 00091 $this->DebugLevel = $Setting; 00092 } 00093 00094 00095 # ---- search functions 00096 00097 # perform keyword search 00098 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10, 00099 $SortByField = NULL, $SortDescending = TRUE) 00100 { 00101 $SearchString = $this->SetDebugLevel($SearchString); 00102 if ($this->DebugLevel > 0) { print("SE: In Search() with search string \"$SearchString\"<br>\n"); } 00103 00104 # save start time to use in calculating search time 00105 $StartTime = $this->GetMicrotime(); 00106 00107 # clear word counts 00108 $this->InclusiveTermCount = 0; 00109 $this->RequiredTermCount = 0; 00110 $this->ExcludedTermCount = 0; 00111 00112 # parse search string into terms 00113 $Words = $this->ParseSearchStringForWords($SearchString); 00114 if ($this->DebugLevel > 1) { print("SE: Found ".count($Words)." words<br>\n"); } 00115 00116 # parse search string for phrases 00117 $Phrases = $this->ParseSearchStringForPhrases($SearchString); 00118 if ($this->DebugLevel > 1) { print("SE: Found ".count($Phrases)." phrases<br>\n"); } 00119 00120 # if only excluded terms specified 00121 if ($this->ExcludedTermCount && !$this->InclusiveTermCount) 00122 { 00123 # load all records 00124 if ($this->DebugLevel > 1) { print("SE: Loading all records<br>\n"); } 00125 $Scores = $this->LoadScoresForAllRecords(); 00126 } 00127 else 00128 { 00129 # perform searches 00130 $Scores = $this->SearchForWords($Words); 00131 if ($this->DebugLevel > 1) { print("SE: Found ".count($Scores)." results after word search<br>\n"); } 00132 $Scores = $this->SearchForPhrases($Phrases, $Scores); 00133 if ($this->DebugLevel > 1) { print("SE: Found ".count($Scores)." results after phrase search<br>\n"); } 00134 } 00135 00136 # if search results found 00137 if (count($Scores) > 0) 00138 { 00139 # handle any excluded words 00140 $Scores = $this->FilterOnExcludedWords($Words, $Scores); 00141 00142 # strip off any results that don't contain required words 00143 $Scores = $this->FilterOnRequiredWords($Scores); 00144 } 00145 00146 # count, sort, and trim search result scores list 00147 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00148 $SortByField, $SortDescending); 00149 00150 # record search time 00151 $this->LastSearchTime = $this->GetMicrotime() - $StartTime; 00152 00153 # return list of items to caller 00154 if ($this->DebugLevel > 0) { print("SE: Ended up with ".$this->NumberOfResultsAvailable." results<br>\n"); } 00155 return $Scores; 00156 } 00157 00158 # perform search across multiple fields and return trimmed results to caller 00159 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10, 00160 $SortByField = NULL, $SortDescending = TRUE) 00161 { 00162 $SearchStrings = $this->SetDebugLevel($SearchStrings); 00163 if ($this->DebugLevel > 0) { print("SE: In FieldedSearch() with " 00164 .count($SearchStrings)." search strings<br>\n"); } 00165 00166 # save start time to use in calculating search time 00167 $StartTime = $this->GetMicrotime(); 00168 00169 # perform search 00170 $Scores = $this->SearchAcrossFields($SearchStrings); 00171 $Scores = ($Scores === NULL) ? array() : $Scores; 00172 00173 # count, sort, and trim search result scores list 00174 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00175 $SortByField, $SortDescending); 00176 00177 # record search time 00178 $this->LastSearchTime = $this->GetMicrotime() - $StartTime; 00179 00180 # return list of items to caller 00181 if ($this->DebugLevel > 0) { print("SE: Ended up with " 00182 .$this->NumberOfResultsAvailable." results<br>\n"); } 00183 return $Scores; 00184 } 00185 00186 # perform search with logical groups of fielded searches 00187 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10, 00188 $SortByField = NULL, $SortDescending = TRUE) 00189 { 00190 foreach ($SearchGroups as $Index => $Groups) 00191 { 00192 if (isset($SearchGroups[$Index]["SearchStrings"])) 00193 { 00194 $SearchGroups[$Index]["SearchStrings"] = 00195 $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]); 00196 } 00197 } 00198 if ($this->DebugLevel > 0) { print("SE: In GroupedSearch() with " 00199 .count($SearchGroups)." search groups<br>\n"); } 00200 00201 # save start time to use in calculating search time 00202 $StartTime = $this->GetMicrotime(); 00203 00204 # start with no results 00205 $Scores = array(); 00206 00207 # save AND/OR search setting 00208 $SavedSearchLogic = $this->DefaultSearchLogic; 00209 00210 # for each search group 00211 $FirstSearch = TRUE; 00212 foreach ($SearchGroups as $Group) 00213 { 00214 if ($this->DebugLevel > 0) { print("SE: ----- GROUP " 00215 ."---------------------------<br>\n"); } 00216 00217 # if group has AND/OR setting specified 00218 if (isset($Group["Logic"])) 00219 { 00220 # use specified AND/OR setting 00221 $this->DefaultSearchLogic = $Group["Logic"]; 00222 } 00223 else 00224 { 00225 # use saved AND/OR setting 00226 $this->DefaultSearchLogic = $SavedSearchLogic; 00227 } 00228 if ($this->DebugLevel > 2) { print("SE: Logic is " 00229 .(($this->DefaultSearchLogic == SEARCHLOGIC_AND) ? "AND" : "OR") 00230 ."<br>\n"); } 00231 00232 # if we have search strings for this group 00233 if (isset($Group["SearchStrings"])) 00234 { 00235 # perform search 00236 $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]); 00237 00238 # if search was conducted 00239 if ($GroupScores !== NULL) 00240 { 00241 # if saved AND/OR setting is OR or this is first search 00242 if (($SavedSearchLogic == SEARCHLOGIC_OR) || $FirstSearch) 00243 { 00244 # add search results to result list 00245 foreach ($GroupScores as $ItemId => $Score) 00246 { 00247 if (isset($Scores[$ItemId])) 00248 { 00249 $Scores[$ItemId] += $Score; 00250 } 00251 else 00252 { 00253 $Scores[$ItemId] = $Score; 00254 } 00255 } 00256 00257 # (reset flag indicating first search) 00258 $FirstSearch = FALSE; 00259 } 00260 else 00261 { 00262 # AND search results with previous results 00263 $OldScores = $Scores; 00264 $Scores = array(); 00265 foreach ($GroupScores as $ItemId => $Score) 00266 { 00267 if (isset($OldScores[$ItemId])) 00268 { 00269 $Scores[$ItemId] = $OldScores[$ItemId] + $Score; 00270 } 00271 } 00272 } 00273 } 00274 } 00275 } 00276 00277 # restore AND/OR search setting 00278 $this->DefaultSearchLogic = $SavedSearchLogic; 00279 00280 # count, sort, and trim search result scores list 00281 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults, 00282 $SortByField, $SortDescending); 00283 00284 # record search time 00285 $this->LastSearchTime = $this->GetMicrotime() - $StartTime; 00286 00287 # return search results to caller 00288 if ($this->DebugLevel > 0) { print("SE: Ended up with " 00289 .$this->NumberOfResultsAvailable." results<br>\n"); } 00290 return $Scores; 00291 } 00292 00293 # add function that will be called to filter search results 00294 function AddResultFilterFunction($FunctionName) 00295 { 00296 # save filter function name 00297 $this->FilterFuncs[] = $FunctionName; 00298 } 00299 00300 # get or set default search logic (AND or OR) 00301 function DefaultSearchLogic($NewSetting = NULL) 00302 { 00303 if ($NewSetting != NULL) 00304 { 00305 $this->DefaultSearchLogic = $NewSetting; 00306 } 00307 return $this->DefaultSearchLogic; 00308 } 00309 00310 function SearchTermsRequiredByDefault($NewSetting = TRUE) 00311 { 00312 if ($NewSetting) 00313 { 00314 $this->DefaultSearchLogic = SEARCHLOGIC_AND; 00315 } 00316 else 00317 { 00318 $this->DefaultSearchLogic = SEARCHLOGIC_OR; 00319 } 00320 } 00321 00322 function NumberOfResults() 00323 { 00324 return $this->NumberOfResultsAvailable; 00325 } 00326 00327 function SearchTerms() 00328 { 00329 return $this->SearchTermList; 00330 } 00331 00332 function SearchTime() 00333 { 00334 return $this->LastSearchTime; 00335 } 00336 00337 # report total weight for all fields involved in search 00338 function FieldedSearchWeightScale($SearchStrings) 00339 { 00340 $Weight = 0; 00341 $IncludedKeywordSearch = FALSE; 00342 foreach ($SearchStrings as $FieldName => $SearchStringArray) 00343 { 00344 if ($FieldName == "XXXKeywordXXX") 00345 { 00346 $IncludedKeywordSearch = TRUE; 00347 } 00348 else 00349 { 00350 $Weight += $this->FieldInfo[$FieldName]["Weight"]; 00351 } 00352 } 00353 if ($IncludedKeywordSearch) 00354 { 00355 foreach ($this->FieldInfo as $FieldName => $Info) 00356 { 00357 if ($Info["InKeywordSearch"]) 00358 { 00359 $Weight += $Info["Weight"]; 00360 } 00361 } 00362 } 00363 return $Weight; 00364 } 00365 00366 00367 # ---- search database update functions 00368 00369 # update search DB for the specified item 00370 function UpdateForItem($ItemId) 00371 { 00372 # bail out if item ID is negative (indicating a temporary record) 00373 if ($ItemId < 0) { return; } 00374 00375 # clear word count added flags for this item 00376 unset($this->WordCountAdded); 00377 00378 # delete any existing info for this item 00379 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId); 00380 00381 # for each metadata field 00382 foreach ($this->FieldInfo as $FieldName => $Info) 00383 { 00384 # if search weight for field is positive 00385 if ($Info["Weight"] > 0) 00386 { 00387 # retrieve text for field 00388 $Text = $this->GetFieldContent($ItemId, $FieldName); 00389 00390 # if text is array 00391 if (is_array($Text)) 00392 { 00393 # for each text string in array 00394 foreach ($Text as $String) 00395 { 00396 # record search info for text 00397 $this->RecordSearchInfoForText($ItemId, $FieldName, 00398 $Info["Weight"], $String, 00399 $Info["InKeywordSearch"]); 00400 } 00401 } 00402 else 00403 { 00404 # record search info for text 00405 $this->RecordSearchInfoForText($ItemId, $FieldName, 00406 $Info["Weight"], $Text, 00407 $Info["InKeywordSearch"]); 00408 } 00409 } 00410 } 00411 } 00412 00413 # update search DB for the specified range of items 00414 function UpdateForItems($StartingItemId, $NumberOfItems) 00415 { 00416 # retrieve IDs for specified number of items starting at specified ID 00417 $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName 00418 ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId 00419 ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems); 00420 $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName); 00421 00422 # for each retrieved item ID 00423 foreach ($ItemIds as $ItemId) 00424 { 00425 # update search info for item 00426 $this->UpdateForItem($ItemId); 00427 } 00428 00429 # return ID of last item updated to caller 00430 return $ItemId; 00431 } 00432 00433 # drop all data pertaining to item from search DB 00434 function DropItem($ItemId) 00435 { 00436 # drop all entries pertaining to item from word count table 00437 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId); 00438 } 00439 00440 # drop all data pertaining to field from search DB 00441 function DropField($FieldName) 00442 { 00443 # retrieve our ID for field 00444 $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields " 00445 ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId"); 00446 00447 # drop all entries pertaining to field from word counts table 00448 $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'"); 00449 00450 # drop field from our fields table 00451 $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'"); 00452 } 00453 00454 # return total number of terms indexed by search engine 00455 function SearchTermCount() 00456 { 00457 return $this->DB->Query("SELECT COUNT(*) AS TermCount" 00458 ." FROM SearchWords", "TermCount"); 00459 } 00460 00461 # return total number of items indexed by search engine 00462 function ItemCount() 00463 { 00464 return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount" 00465 ." FROM SearchWordCounts", "ItemCount"); 00466 } 00467 00468 # add synonym(s) 00469 function AddSynonyms($Word, $Synonyms) 00470 { 00471 # get ID for word 00472 $WordId = $this->GetWordId($Word, TRUE); 00473 00474 # for each synonym passed in 00475 foreach ($Synonyms as $Synonym) 00476 { 00477 # get ID for synonym 00478 $SynonymId = $this->GetWordId($Synonym, TRUE); 00479 00480 # if synonym is not already in database 00481 $this->DB->Query("SELECT * FROM SearchWordSynonyms" 00482 ." WHERE (WordIdA = ".$WordId 00483 ." AND WordIdB = ".$SynonymId.")" 00484 ." OR (WordIdB = ".$WordId 00485 ." AND WordIdA = ".$SynonymId.")"); 00486 if ($this->DB->NumRowsSelected() == 0) 00487 { 00488 # add synonym entry to database 00489 $this->DB->Query("INSERT INTO SearchWordSynonyms" 00490 ." (WordIdA, WordIdB)" 00491 ." VALUES (".$WordId.", ".$SynonymId.")"); 00492 } 00493 } 00494 } 00495 00496 # remove synonym(s) 00497 function RemoveSynonyms($Word, $Synonyms = NULL) 00498 { 00499 # find ID for word 00500 $WordId = $this->GetWordId($Word); 00501 00502 # if ID found 00503 if ($WordId !== NULL) 00504 { 00505 # if no specific synonyms provided 00506 if ($Synonyms === NULL) 00507 { 00508 # remove all synonyms for word 00509 $this->DB->Query("DELETE FROM SearchWordSynonyms" 00510 ." WHERE WordIdA = '".$WordId."'" 00511 ." OR WordIdB = '".$WordId."'"); 00512 } 00513 else 00514 { 00515 # for each specified synonym 00516 foreach ($Synonyms as $Synonym) 00517 { 00518 # look up ID for synonym 00519 $SynonymId = $this->GetWordId($Synonym); 00520 00521 # if synonym ID was found 00522 if ($SynonymId !== NULL) 00523 { 00524 # delete synonym entry 00525 $this->DB->Query("DELETE FROM SearchWordSynonyms" 00526 ." WHERE (WordIdA = '".$WordId."'" 00527 ." AND WordIdB = '".$SynonymId."')" 00528 ." OR (WordIdB = '".$WordId."'" 00529 ." AND WordIdA = '".$SynonymId."')"); 00530 } 00531 } 00532 } 00533 } 00534 } 00535 00536 # remove all synonyms 00537 function RemoveAllSynonyms() 00538 { 00539 $this->DB->Query("DELETE FROM SearchWordSynonyms"); 00540 } 00541 00542 # get synonyms for word (returns array of synonyms) 00543 function GetSynonyms($Word) 00544 { 00545 # assume no synonyms will be found 00546 $Synonyms = array(); 00547 00548 # look up ID for word 00549 $WordId = $this->GetWordId($Word); 00550 00551 # if word ID was found 00552 if ($WordId !== NULL) 00553 { 00554 # look up IDs of all synonyms for this word 00555 $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms" 00556 ." WHERE WordIdA = ".$WordId 00557 ." OR WordIdB = ".$WordId); 00558 $SynonymIds = array(); 00559 while ($Record = $this->DB->FetchRow) 00560 { 00561 $SynonymIds[] = ($Record["WordIdA"] == $WordId) 00562 ? $Record["WordIdB"] : $Record["WordIdA"]; 00563 } 00564 00565 # for each synonym ID 00566 foreach ($SynonymIds as $SynonymId) 00567 { 00568 # look up synonym word and add to synonym list 00569 $Synonyms[] = $this->GetWord($SynonymId); 00570 } 00571 } 00572 00573 # return synonyms to caller 00574 return $Synonyms; 00575 } 00576 00577 # get all synonyms (returns 2D array w/ words as first index) 00578 function GetAllSynonyms() 00579 { 00580 # assume no synonyms will be found 00581 $SynonymList = array(); 00582 00583 # for each synonym ID pair 00584 $OurDB = new SPTDatabase(); 00585 $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"); 00586 while ($Record = $OurDB->FetchRow()) 00587 { 00588 # look up words 00589 $Word = $this->GetWord($Record["WordIdA"]); 00590 $Synonym = $this->GetWord($Record["WordIdB"]); 00591 00592 # if we do not already have an entry for the word 00593 # or synonym is not listed for this word 00594 if (!isset($SynonymList[$Word]) 00595 || !in_array($Synonym, $SynonymList[$Word])) 00596 { 00597 # add entry for synonym 00598 $SynonymList[$Word][] = $Synonym; 00599 } 00600 00601 # if we do not already have an entry for the synonym 00602 # or word is not listed for this synonym 00603 if (!isset($SynonymList[$Synonym]) 00604 || !in_array($Word, $SynonymList[$Synonym])) 00605 { 00606 # add entry for word 00607 $SynonymList[$Synonym][] = $Word; 00608 } 00609 } 00610 00611 # for each word 00612 # (this loop removes reciprocal duplicates) 00613 foreach ($SynonymList as $Word => $Synonyms) 00614 { 00615 # for each synonym for that word 00616 foreach ($Synonyms as $Synonym) 00617 { 00618 # if synonym has synonyms and word is one of them 00619 if (isset($SynonymList[$Synonym]) 00620 && isset($SynonymList[$Word]) 00621 && in_array($Word, $SynonymList[$Synonym]) 00622 && in_array($Synonym, $SynonymList[$Word])) 00623 { 00624 # if word has less synonyms than synonym 00625 if (count($SynonymList[$Word]) 00626 < count($SynonymList[$Synonym])) 00627 { 00628 # remove synonym from synonym list for word 00629 $SynonymList[$Word] = array_diff( 00630 $SynonymList[$Word], array($Synonym)); 00631 00632 # if no synonyms left for word 00633 if (!count($SynonymList[$Word])) 00634 { 00635 # remove empty synonym list for word 00636 unset($SynonymList[$Word]); 00637 } 00638 } 00639 else 00640 { 00641 # remove word from synonym list for synonym 00642 $SynonymList[$Synonym] = array_diff( 00643 $SynonymList[$Synonym], array($Word)); 00644 00645 # if no synonyms left for word 00646 if (!count($SynonymList[$Synonym])) 00647 { 00648 # remove empty synonym list for word 00649 unset($SynonymList[$Synonym]); 00650 } 00651 } 00652 } 00653 } 00654 } 00655 00656 # sort array alphabetically (just for convenience) 00657 foreach ($SynonymList as $Word => $Synonyms) 00658 { 00659 asort($SynonymList[$Word]); 00660 } 00661 ksort($SynonymList); 00662 00663 # return 2D array of synonyms to caller 00664 return $SynonymList; 00665 } 00666 00667 # set all synonyms (accepts 2D array w/ words as first index) 00668 function SetAllSynonyms($SynonymList) 00669 { 00670 # remove all existing synonyms 00671 $this->RemoveAllSynonyms(); 00672 00673 # for each synonym entry passed in 00674 foreach ($SynonymList as $Word => $Synonyms) 00675 { 00676 # add synonyms for word 00677 $this->AddSynonyms($Word, $Synonyms); 00678 } 00679 } 00680 00681 # suggest alternatives 00682 function SuggestAlternateSearches($SearchString) 00683 { 00684 # 00685 } 00686 00687 00688 # ---- PRIVATE INTERFACE ------------------------------------------------- 00689 00690 var $DB; 00691 var $DebugLevel; 00692 var $WordCountAdded; 00693 var $NumberOfResultsAvailable; 00694 var $LastSearchTime; 00695 var $FilterFuncs; 00696 var $FieldIds; 00697 var $DefaultSearchLogic; 00698 var $FieldInfo; 00699 var $RequiredTermCount; 00700 var $RequiredTermCounts; 00701 var $InclusiveTermCount; 00702 var $ExcludedTermCount; 00703 var $ItemTableName; 00704 var $ItemIdFieldName; 00705 var $SearchTermList; 00706 00707 00708 # ---- common private functions (used in both searching and DB build) 00709 00710 # normalize and parse search string into list of search terms 00711 function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE) 00712 { 00713 # strip off any surrounding whitespace 00714 $Text = trim($SearchString); 00715 00716 # set up normalization replacement strings 00717 $Patterns = array( 00718 "/'s[^a-z0-9\-+~]+/i", # get rid of possessive plurals 00719 "/'/", # get rid of single quotes / apostrophes 00720 "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) " 00721 "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!) 00722 "/[^a-z0-9\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space 00723 "/([^\\s])-/i", # convert minus preceded by anything but whitespace to a space 00724 "/([^\\s])\\+/i", # convert plus preceded by anything but whitespace to a space 00725 "/-\\s/i", # convert minus followed by whitespace to a space 00726 "/\\+\\s/i", # convert plus followed by whitespace to a space 00727 "/~\\s/i", # convert tilde followed by whitespace to a space 00728 "/[ ]+/" # convert multiple spaces to one space 00729 ); 00730 $Replacements = array( 00731 " ", 00732 "", 00733 " ", 00734 " ", 00735 "\\1 ", 00736 "\\1 ", 00737 " ", 00738 " ", 00739 " ", 00740 " ", 00741 " " 00742 ); 00743 00744 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens) 00745 if ($IgnorePhrases) 00746 { 00747 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!) 00748 $Patterns[2] = "/\"/"; 00749 00750 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!) 00751 $Patterns[3] = "/[\(\)]+/"; 00752 } 00753 00754 # remove punctuation from text and normalize whitespace 00755 $Text = preg_replace($Patterns, $Replacements, $Text); 00756 if ($this->DebugLevel > 2) { print("SE: Normalized search string is '${Text}'<br>\n"); } 00757 00758 # convert text to lower case 00759 $Text = strtolower($Text); 00760 00761 # strip off any extraneous whitespace 00762 $Text = trim($Text); 00763 00764 # start with an empty array 00765 $Words = array(); 00766 00767 # if we have no words left after parsing 00768 if (strlen($Text) != 0) 00769 { 00770 # for each word 00771 foreach (explode(" ", $Text) as $Word) 00772 { 00773 # grab first character of word 00774 $FirstChar = substr($Word, 0, 1); 00775 00776 # strip off option characters and set flags appropriately 00777 $Flags = WORD_PRESENT; 00778 if ($FirstChar == "-") 00779 { 00780 $Word = substr($Word, 1); 00781 $Flags |= WORD_EXCLUDED; 00782 if (!isset($Words[$Word])) 00783 { 00784 $this->ExcludedTermCount++; 00785 } 00786 } 00787 else 00788 { 00789 if ($FirstChar == "~") 00790 { 00791 $Word = substr($Word, 1); 00792 } 00793 elseif (($this->DefaultSearchLogic == SEARCHLOGIC_AND) 00794 || ($FirstChar == "+")) 00795 { 00796 if ($FirstChar == "+") 00797 { 00798 $Word = substr($Word, 1); 00799 } 00800 $Flags |= WORD_REQUIRED; 00801 if (!isset($Words[$Word])) 00802 { 00803 $this->RequiredTermCount++; 00804 } 00805 } 00806 if (!isset($Words[$Word])) 00807 { 00808 $this->InclusiveTermCount++; 00809 $this->SearchTermList[] = $Word; 00810 } 00811 } 00812 00813 # store flags to indicate word found 00814 $Words[$Word] = $Flags; 00815 if ($this->DebugLevel > 3) { print("SE: Word identified (${Word})<br>\n"); } 00816 } 00817 } 00818 00819 # return normalized words to caller 00820 return $Words; 00821 } 00822 00823 function GetFieldId($FieldName) 00824 { 00825 # if field ID is not in cache 00826 if (!isset($this->FieldIds[$FieldName])) 00827 { 00828 # look up field info in database 00829 $this->DB->Query("SELECT FieldId FROM SearchFields " 00830 ."WHERE FieldName = '".addslashes($FieldName)."'"); 00831 00832 # if field was found 00833 if ($Record = $this->DB->FetchRow()) 00834 { 00835 # load info from DB record 00836 $FieldId = $Record["FieldId"]; 00837 } 00838 else 00839 { 00840 # add field to database 00841 $this->DB->Query("INSERT INTO SearchFields (FieldName) " 00842 ."VALUES ('".addslashes($FieldName)."')"); 00843 00844 # retrieve ID for newly added field 00845 $FieldId = $this->DB->LastInsertId("SearchFields"); 00846 } 00847 00848 # cache field info 00849 $this->FieldIds[$FieldName] = $FieldId; 00850 } 00851 00852 # return cached ID to caller 00853 return $this->FieldIds[$FieldName]; 00854 } 00855 00856 # retrieve ID for specified word (returns NULL if no ID found) 00857 function GetWordId($Word, $AddIfNotFound = FALSE) 00858 { 00859 static $WordIdCache; 00860 00861 # if word was in ID cache 00862 if (isset($WordIdCache[$Word])) 00863 { 00864 # use ID from cache 00865 $WordId = $WordIdCache[$Word]; 00866 } 00867 else 00868 { 00869 # look up ID in database 00870 $WordId = $this->DB->Query("SELECT WordId FROM SearchWords " 00871 ."WHERE WordText='".addslashes($Word)."'", "WordId"); 00872 00873 # if ID was not found and caller requested it be added 00874 if (($WordId === NULL) && $AddIfNotFound) 00875 { 00876 # add word to database 00877 $this->DB->Query("INSERT INTO SearchWords (WordText)" 00878 ." VALUES ('".addslashes(strtolower($Word))."')"); 00879 00880 # get ID for newly added word 00881 $WordId = $this->DB->LastInsertId("SearchWords"); 00882 } 00883 00884 # save ID to cache 00885 $WordIdCache[$Word] = $WordId; 00886 } 00887 00888 # return ID to caller 00889 return $WordId; 00890 } 00891 00892 # retrieve word for specified word ID (returns FALSE if no word found) 00893 function GetWord($WordId) 00894 { 00895 static $WordCache; 00896 00897 # if word was in cache 00898 if (isset($WordCache[$WordId])) 00899 { 00900 # use word from cache 00901 $Word = $WordCache[$WordId]; 00902 } 00903 else 00904 { 00905 # look up word in database 00906 $Word = $this->DB->Query("SELECT WordText FROM SearchWords " 00907 ."WHERE WordId='".$WordId."'", "WordText"); 00908 00909 # save word to cache 00910 $WordCache[$WordId] = $Word; 00911 } 00912 00913 # return word to caller 00914 return $Word; 00915 } 00916 00917 00918 # ---- private functions used in searching 00919 00920 # perform search across multiple fields and return raw results to caller 00921 function SearchAcrossFields($SearchStrings) 00922 { 00923 # start by assuming no search will be done 00924 $Scores = NULL; 00925 00926 # clear word counts 00927 $this->InclusiveTermCount = 0; 00928 $this->RequiredTermCount = 0; 00929 $this->ExcludedTermCount = 0; 00930 00931 # for each field 00932 $NeedComparisonSearch = FALSE; 00933 foreach ($SearchStrings as $FieldName => $SearchStringArray) 00934 { 00935 # convert search string to array if needed 00936 if (!is_array($SearchStringArray)) 00937 { 00938 $SearchStringArray = array($SearchStringArray); 00939 } 00940 00941 # for each search string for this field 00942 foreach ($SearchStringArray as $SearchString) 00943 { 00944 # if field is keyword or field is text and does not look like comparison match 00945 if (($FieldName == "XXXKeywordXXX") 00946 || (isset($this->FieldInfo[$FieldName]) 00947 && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT) 00948 && !preg_match("/^[><!]=./", $SearchString) 00949 && !preg_match("/^[><=]./", $SearchString))) 00950 { 00951 if ($this->DebugLevel > 0) { print("SE: Searching text field \"" 00952 .$FieldName."\" for string \"$SearchString\"<br>\n"); } 00953 00954 # normalize text and split into words 00955 $Words[$FieldName] = 00956 $this->ParseSearchStringForWords($SearchString); 00957 00958 # calculate scores for matching items 00959 if (count($Words[$FieldName])) 00960 { 00961 $Scores = $this->SearchForWords( 00962 $Words[$FieldName], $FieldName, $Scores); 00963 if ($this->DebugLevel > 3) { print("SE: Have " 00964 .count($Scores)." results after word search<br>\n"); } 00965 } 00966 00967 # split into phrases 00968 $Phrases[$FieldName] = 00969 $this->ParseSearchStringForPhrases($SearchString); 00970 00971 # handle any phrases 00972 if (count($Phrases[$FieldName])) 00973 { 00974 $Scores = $this->SearchForPhrases( 00975 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE); 00976 if ($this->DebugLevel > 3) { print("SE: Have ".count($Scores) 00977 ." results after phrase search<br>\n"); } 00978 } 00979 } 00980 else 00981 { 00982 # set flag to indicate possible comparison search candidate found 00983 $NeedComparisonSearch = TRUE; 00984 } 00985 } 00986 } 00987 00988 # perform comparison searches 00989 if ($NeedComparisonSearch) 00990 { 00991 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores); 00992 if ($this->DebugLevel > 3) { print("SE: Have ".count($Scores)." results after comparison search<br>\n"); } 00993 } 00994 00995 # if no results found and exclusions specified 00996 if (!count($Scores) && $this->ExcludedTermCount) 00997 { 00998 # load all records 00999 $Scores = $this->LoadScoresForAllRecords(); 01000 } 01001 01002 # if search results found 01003 if (count($Scores)) 01004 { 01005 # for each search text string 01006 foreach ($SearchStrings as $FieldName => $SearchStringArray) 01007 { 01008 # convert search string to array if needed 01009 if (!is_array($SearchStringArray)) 01010 { 01011 $SearchStringArray = array($SearchStringArray); 01012 } 01013 01014 # for each search string for this field 01015 foreach ($SearchStringArray as $SearchString) 01016 { 01017 # if field is text 01018 if (($FieldName == "XXXKeywordXXX") 01019 || (isset($this->FieldInfo[$FieldName]) 01020 && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT))) 01021 { 01022 # if there are words in search text 01023 if (isset($Words[$FieldName])) 01024 { 01025 # handle any excluded words 01026 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName); 01027 } 01028 01029 # handle any excluded phrases 01030 if (isset($Phrases[$FieldName])) 01031 { 01032 $Scores = $this->SearchForPhrases( 01033 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE); 01034 } 01035 } 01036 } 01037 } 01038 01039 # strip off any results that don't contain required words 01040 $Scores = $this->FilterOnRequiredWords($Scores); 01041 } 01042 01043 # return search result scores to caller 01044 return $Scores; 01045 } 01046 01047 # search for words in specified field 01048 function SearchForWords( 01049 $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL) 01050 { 01051 $DB = $this->DB; 01052 01053 # start with empty search result scores list if none passed in 01054 if ($Scores == NULL) 01055 { 01056 $Scores = array(); 01057 } 01058 01059 # grab field ID 01060 $FieldId = $this->GetFieldId($FieldName); 01061 01062 # for each word 01063 foreach ($Words as $Word => $Flags) 01064 { 01065 if ($this->DebugLevel > 2) { print("SE: Searching for word '${Word}' in field ${FieldName}<br>\n"); } 01066 01067 # if word is not excluded 01068 if (!($Flags & WORD_EXCLUDED)) 01069 { 01070 # look up record ID for word 01071 if ($this->DebugLevel > 2) { print("SE: Looking up word \"${Word}\"<br>\n"); } 01072 $WordId = $this->GetWordId($Word); 01073 01074 # if word is in DB 01075 if ($WordId !== NULL) 01076 { 01077 # look up counts for word 01078 $DB->Query("SELECT ItemId,Count FROM SearchWordCounts " 01079 ."WHERE WordId = ".$WordId 01080 ." AND FieldId = ".$FieldId); 01081 $Counts = $DB->FetchColumn("Count", "ItemId"); 01082 01083 # if synonym support is enabled 01084 if (TRUE) 01085 { 01086 # look for any synonyms 01087 $DB->Query("SELECT WordIdA, WordIdB" 01088 ." FROM SearchWordSynonyms" 01089 ." WHERE WordIdA = ".$WordId 01090 ." OR WordIdB = ".$WordId); 01091 01092 # if synonyms were found 01093 if ($DB->NumRowsSelected()) 01094 { 01095 # retrieve synonym IDs 01096 $SynonymIds = array(); 01097 while ($Record = $DB->FetchRow()) 01098 { 01099 $SynonymIds[] = ($Record["WordIdA"] == $WordId) 01100 ? $Record["WordIdB"] 01101 : $Record["WordIdA"]; 01102 } 01103 01104 # for each synonym 01105 foreach ($SynonymIds as $SynonymId) 01106 { 01107 # retrieve counts for synonym 01108 $DB->Query("SELECT ItemId,Count" 01109 ." FROM SearchWordCounts" 01110 ." WHERE WordId = ".$SynonymId 01111 ." AND FieldId = ".$FieldId); 01112 $SynonymCounts = $DB->FetchColumn("Count", "ItemId"); 01113 01114 # for each count 01115 foreach ($SynonymCounts as $ItemId => $Count) 01116 { 01117 # adjust count because it's a synonym 01118 $AdjustedCount = ceil($Count / 2); 01119 01120 # add count to existing counts 01121 if (isset($Counts[$ItemId])) 01122 { 01123 $Counts[$ItemId] += $AdjustedCount; 01124 } 01125 else 01126 { 01127 $Counts[$ItemId] = $AdjustedCount; 01128 } 01129 } 01130 } 01131 } 01132 } 01133 01134 # for each count 01135 foreach ($Counts as $ItemId => $Count) 01136 { 01137 # if word flagged as required 01138 if ($Flags & WORD_REQUIRED) 01139 { 01140 # increment required word count for record 01141 if (isset($this->RequiredTermCounts[$ItemId])) 01142 { 01143 $this->RequiredTermCounts[$ItemId]++; 01144 } 01145 else 01146 { 01147 $this->RequiredTermCounts[$ItemId] = 1; 01148 } 01149 } 01150 01151 # add to item record score 01152 if (isset($Scores[$ItemId])) 01153 { 01154 $Scores[$ItemId] += $Count; 01155 } 01156 else 01157 { 01158 $Scores[$ItemId] = $Count; 01159 } 01160 } 01161 } 01162 } 01163 } 01164 01165 # return basic scores to caller 01166 return $Scores; 01167 } 01168 01169 # extract phrases (terms surrounded by quotes) from search string 01170 function ParseSearchStringForPhrases($SearchString) 01171 { 01172 # split into chunks delimited by double quote marks 01173 $Pieces = explode("\"", $SearchString); # " 01174 01175 # for each pair of chunks 01176 $Index = 2; 01177 $Phrases = array(); 01178 while ($Index < count($Pieces)) 01179 { 01180 # grab phrase from chunk 01181 $Phrase = trim(addslashes($Pieces[$Index - 1])); 01182 $Flags = WORD_PRESENT; 01183 01184 # grab first character of phrase 01185 $FirstChar = substr($Pieces[$Index - 2], -1); 01186 01187 # set flags to reflect any option characters 01188 if ($FirstChar == "-") 01189 { 01190 $Flags |= WORD_EXCLUDED; 01191 if (!isset($Phrases[$Phrase])) 01192 { 01193 $this->ExcludedTermCount++; 01194 } 01195 } 01196 else 01197 { 01198 if ((($this->DefaultSearchLogic == SEARCHLOGIC_AND) && ($FirstChar != "~")) 01199 || ($FirstChar == "+")) 01200 { 01201 $Flags |= WORD_REQUIRED; 01202 if (!isset($Phrases[$Phrase])) 01203 { 01204 $this->RequiredTermCount++; 01205 } 01206 } 01207 if (!isset($Phrases[$Phrase])) 01208 { 01209 $this->InclusiveTermCount++; 01210 $this->SearchTermList[] = $Phrase; 01211 } 01212 } 01213 $Phrases[$Phrase] = $Flags; 01214 01215 # move to next pair of chunks 01216 $Index += 2; 01217 } 01218 01219 # return phrases to caller 01220 return $Phrases; 01221 } 01222 01223 # extract groups (terms surrounded by parens) from search string 01224 # (NOTE: NOT YET IMPLEMENTED!!!) 01225 function ParseSearchStringForGroups($SearchString) 01226 { 01227 # split into chunks delimited by open paren 01228 $Pieces = explode("(", $SearchString); 01229 01230 # for each chunk 01231 $Index = 2; 01232 while ($Index < count($Pieces)) 01233 { 01234 # grab phrase from chunk 01235 $Group = trim(addslashes($Pieces[$Index - 1])); 01236 $Groups[] = $Group; 01237 01238 # move to next pair of chunks 01239 $Index += 2; 01240 } 01241 01242 # return phrases to caller 01243 return $Groups; 01244 } 01245 01246 function SearchFieldForPhrases($FieldName, $Phrase) 01247 { 01248 # error out 01249 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n"); 01250 } 01251 01252 function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX", 01253 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE) 01254 { 01255 # if phrases are found 01256 if (count($Phrases) > 0) 01257 { 01258 # if this is a keyword search 01259 if ($FieldName == "XXXKeywordXXX") 01260 { 01261 # for each field 01262 foreach ($this->FieldInfo as $KFieldName => $Info) 01263 { 01264 # if field is marked to be included in keyword searches 01265 if ($Info["InKeywordSearch"]) 01266 { 01267 # call ourself with that field 01268 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName, 01269 $ProcessNonExcluded, $ProcessExcluded); 01270 } 01271 } 01272 } 01273 else 01274 { 01275 # for each phrase 01276 foreach ($Phrases as $Phrase => $Flags) 01277 { 01278 if ($this->DebugLevel > 2) { print("SE: searching for phrase '${Phrase}' in field ${FieldName}<br>\n"); } 01279 01280 # if phrase flagged as excluded and we are doing excluded phrases 01281 # or phrase flagged as non-excluded and we are doing non-excluded phrases 01282 if (($ProcessExcluded && ($Flags & WORD_EXCLUDED)) 01283 || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED))) 01284 { 01285 # retrieve list of items that contain phrase 01286 $ItemIds = $this->SearchFieldForPhrases( 01287 $FieldName, $Phrase); 01288 01289 # for each item that contains phrase 01290 foreach ($ItemIds as $ItemId) 01291 { 01292 # if we are doing excluded phrases and phrase flagged as excluded 01293 if ($ProcessExcluded && ($Flags & WORD_EXCLUDED)) 01294 { 01295 # knock item off of list 01296 unset($Scores[$ItemId]); 01297 } 01298 elseif ($ProcessNonExcluded) 01299 { 01300 # calculate phrase value based on number of words and field weight 01301 $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY)) 01302 * $this->FieldInfo[$FieldName]["Weight"]; 01303 if ($this->DebugLevel > 2) { print("SE: phrase score is ${PhraseScore}<br>\n"); } 01304 01305 # bump up item record score 01306 if (isset($Scores[$ItemId])) 01307 { 01308 $Scores[$ItemId] += $PhraseScore; 01309 } 01310 else 01311 { 01312 $Scores[$ItemId] = $PhraseScore; 01313 } 01314 01315 # if phrase flagged as required 01316 if ($Flags & WORD_REQUIRED) 01317 { 01318 # increment required word count for record 01319 if (isset($this->RequiredTermCounts[$ItemId])) 01320 { 01321 $this->RequiredTermCounts[$ItemId]++; 01322 } 01323 else 01324 { 01325 $this->RequiredTermCounts[$ItemId] = 1; 01326 } 01327 } 01328 } 01329 } 01330 } 01331 } 01332 } 01333 } 01334 01335 # return updated scores to caller 01336 return $Scores; 01337 } 01338 01339 function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX") 01340 { 01341 $DB = $this->DB; 01342 01343 # grab field ID 01344 $FieldId = $this->GetFieldId($FieldName); 01345 01346 # for each word 01347 foreach ($Words as $Word => $Flags) 01348 { 01349 # if word flagged as excluded 01350 if ($Flags & WORD_EXCLUDED) 01351 { 01352 # look up record ID for word 01353 $WordId = $this->GetWordId($Word); 01354 01355 # if word is in DB 01356 if ($WordId !== NULL) 01357 { 01358 # look up counts for word 01359 $DB->Query("SELECT ItemId FROM SearchWordCounts " 01360 ."WHERE WordId=${WordId} AND FieldId=${FieldId}"); 01361 01362 # for each count 01363 while ($Record = $DB->FetchRow()) 01364 { 01365 # if item record is in score list 01366 $ItemId = $Record["ItemId"]; 01367 if (isset($Scores[$ItemId])) 01368 { 01369 # remove item record from score list 01370 if ($this->DebugLevel > 3) { print("SE: filtering out item $ItemId because it contained word \"".$Word."\"<br>\n"); } 01371 unset($Scores[$ItemId]); 01372 } 01373 } 01374 } 01375 } 01376 } 01377 01378 # returned filtered score list to caller 01379 return $Scores; 01380 } 01381 01382 function FilterOnRequiredWords($Scores) 01383 { 01384 # if there were required words 01385 if ($this->RequiredTermCount > 0) 01386 { 01387 # for each item 01388 foreach ($Scores as $ItemId => $Score) 01389 { 01390 # if item does not meet required word count 01391 if (!isset($this->RequiredTermCounts[$ItemId]) 01392 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount)) 01393 { 01394 # filter out item 01395 if ($this->DebugLevel > 4) { print("SE: filtering out item $ItemId because it didn't have required word count of ".$this->RequiredTermCount." (only had ".$this->RequiredTermCounts[$ItemId].")<br>\n"); } 01396 unset($Scores[$ItemId]); 01397 } 01398 } 01399 } 01400 01401 # return filtered list to caller 01402 return $Scores; 01403 } 01404 01405 # count, sort, and trim search result scores list 01406 function CleanScores($Scores, $StartingResult, $NumberOfResults, 01407 $SortByField, $SortDescending) 01408 { 01409 # perform any requested filtering 01410 if ($this->DebugLevel > 0) { print("SE: Have " 01411 .count($Scores)." results before filter callbacks<br>\n"); } 01412 $Scores = $this->FilterOnSuppliedFunctions($Scores); 01413 01414 # save total number of results available 01415 $this->NumberOfResultsAvailable = count($Scores); 01416 01417 # if no sorting field specified 01418 if ($SortByField === NULL) 01419 { 01420 # sort result list by score 01421 if ($SortDescending) 01422 arsort($Scores, SORT_NUMERIC); 01423 else 01424 asort($Scores, SORT_NUMERIC); 01425 } 01426 else 01427 { 01428 # get list of item IDs in sorted order 01429 $SortedIds = $this->GetItemIdsSortedByField( 01430 $SortByField, $SortDescending); 01431 01432 # if we have sorted item IDs 01433 if (count($SortedIds) && count($Scores)) 01434 { 01435 # strip sorted ID list down to those that appear in search results 01436 $SortedIds = array_intersect($SortedIds, array_keys($Scores)); 01437 01438 # rebuild score list in sorted order 01439 foreach ($SortedIds as $Id) 01440 { 01441 $NewScores[$Id] = $Scores[$Id]; 01442 } 01443 $Scores = $NewScores; 01444 } 01445 else 01446 { 01447 # sort result list by score 01448 arsort($Scores, SORT_NUMERIC); 01449 } 01450 } 01451 01452 # trim result list to match range requested by caller 01453 $ScoresKeys = array_slice( 01454 array_keys($Scores), $StartingResult, $NumberOfResults); 01455 $TrimmedScores = array(); 01456 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; } 01457 01458 # returned cleaned search result scores list to caller 01459 return $TrimmedScores; 01460 } 01461 01462 function FilterOnSuppliedFunctions($Scores) 01463 { 01464 # if filter functions have been set 01465 if (isset($this->FilterFuncs)) 01466 { 01467 # for each result 01468 foreach ($Scores as $ItemId => $Score) 01469 { 01470 # for each filter function 01471 foreach ($this->FilterFuncs as $FuncName) 01472 { 01473 # if filter function return TRUE for item 01474 if ($FuncName($ItemId)) 01475 { 01476 # discard result 01477 if ($this->DebugLevel > 2) { print("SE: filter callback <i>$FuncName</i> rejected item ${ItemId}<br>\n"); } 01478 unset($Scores[$ItemId]); 01479 01480 # bail out of filter func loop 01481 continue 2; 01482 } 01483 } 01484 } 01485 } 01486 01487 # return filtered list to caller 01488 return $Scores; 01489 } 01490 01491 function SearchForComparisonMatches($SearchStrings, $Scores) 01492 { 01493 # for each field 01494 $Index = 0; 01495 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray) 01496 { 01497 # if field is not keyword 01498 if ($SearchFieldName != "XXXKeywordXXX") 01499 { 01500 # convert search string to array if needed 01501 if (!is_array($SearchStringArray)) 01502 { 01503 $SearchStringArray = array($SearchStringArray); 01504 } 01505 01506 # for each search string for this field 01507 foreach ($SearchStringArray as $SearchString) 01508 { 01509 # if search string looks like comparison search 01510 $FoundOperator = preg_match("/^[><!]=./", $SearchString) || preg_match("/^[><=]./", $SearchString); 01511 if ($FoundOperator || (isset($this->FieldInfo[$SearchFieldName]["FieldType"]) && ($this->FieldInfo[$SearchFieldName]["FieldType"] != SEARCHFIELD_TEXT))) 01512 { 01513 # determine value 01514 $Patterns = array("/^[><!]=/", "/^[><=]/"); 01515 $Replacements = array("", ""); 01516 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString)); 01517 01518 # determine and save operator 01519 if (!$FoundOperator) 01520 { 01521 $Operators[$Index] = "="; 01522 } 01523 else 01524 { 01525 $Term = trim($SearchString); 01526 $FirstChar = $Term{0}; 01527 $FirstTwoChars = $FirstChar.$Term{1}; 01528 if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; } 01529 elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; } 01530 elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; } 01531 elseif ($FirstChar == ">") { $Operators[$Index] = ">"; } 01532 elseif ($FirstChar == "<") { $Operators[$Index] = "<"; } 01533 elseif ($FirstChar == "=") { $Operators[$Index] = "="; } 01534 } 01535 01536 # if operator was found 01537 if (isset($Operators[$Index])) 01538 { 01539 # save value 01540 $Values[$Index] = $Value; 01541 01542 # save field name 01543 $FieldNames[$Index] = $SearchFieldName; 01544 if ($this->DebugLevel > 3) { print("SE: added comparison (field = <i>".$FieldNames[$Index]."</i> op = <i>".$Operators[$Index]."</i> val = <i>".$Values[$Index]."</i>)<br>\n"); } 01545 01546 # move to next comparison array entry 01547 $Index++; 01548 } 01549 } 01550 } 01551 } 01552 } 01553 01554 # if comparisons found 01555 if (isset($Operators)) 01556 { 01557 # perform comparisons on fields and gather results 01558 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values); 01559 01560 # if search logic is set to AND 01561 if ($this->DefaultSearchLogic == SEARCHLOGIC_AND) 01562 { 01563 # if results were found 01564 if (count($Results)) 01565 { 01566 # if there were no prior results and no terms for keyword search 01567 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0)) 01568 { 01569 # add all results to scores 01570 foreach ($Results as $ItemId) 01571 { 01572 $Scores[$ItemId] = 1; 01573 } 01574 } 01575 else 01576 { 01577 # remove anything from scores that is not part of results 01578 foreach ($Scores as $ItemId => $Score) 01579 { 01580 if (in_array($ItemId, $Results) == FALSE) 01581 { 01582 unset($Scores[$ItemId]); 01583 } 01584 } 01585 } 01586 } 01587 else 01588 { 01589 # clear scores 01590 $Scores = array(); 01591 } 01592 } 01593 else 01594 { 01595 # add result items to scores 01596 foreach ($Results as $ItemId) 01597 { 01598 if (isset($Scores[$ItemId])) 01599 { 01600 $Scores[$ItemId] += 1; 01601 } 01602 else 01603 { 01604 $Scores[$ItemId] = 1; 01605 } 01606 } 01607 } 01608 } 01609 01610 # return results to caller 01611 return $Scores; 01612 } 01613 01614 function SetDebugLevel($SearchStrings) 01615 { 01616 # if search info is an array 01617 if (is_array($SearchStrings)) 01618 { 01619 # for each array element 01620 foreach ($SearchStrings as $FieldName => $SearchStringArray) 01621 { 01622 # if element is an array 01623 if (is_array($SearchStringArray)) 01624 { 01625 # for each array element 01626 foreach ($SearchStringArray as $Index => $SearchString) 01627 { 01628 # pull out search string if present 01629 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString); 01630 } 01631 } 01632 else 01633 { 01634 # pull out search string if present 01635 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray); 01636 } 01637 } 01638 } 01639 else 01640 { 01641 # pull out search string if present 01642 $SearchStrings = $this->ExtractDebugLevel($SearchStrings); 01643 } 01644 01645 # return new search info to caller 01646 return $SearchStrings; 01647 } 01648 01649 function ExtractDebugLevel($SearchString) 01650 { 01651 # if search string contains debug level indicator 01652 if (strstr($SearchString, "DBUGLVL=")) 01653 { 01654 # remove indicator and set debug level 01655 $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString); 01656 if ($Level > 0) 01657 { 01658 print("SE: setting debug level to $Level<br>\n"); 01659 $this->DebugLevel = $Level; 01660 $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString); 01661 } 01662 } 01663 01664 # return (possibly) modified search string to caller 01665 return $SearchString; 01666 } 01667 01668 # load and return search result scores array containing all possible records 01669 function LoadScoresForAllRecords() 01670 { 01671 # start with empty list 01672 $Scores = array(); 01673 01674 # for every item 01675 $this->DB->Query("SELECT ".$this->ItemIdFieldName 01676 ." FROM ".$this->ItemTableName); 01677 while ($Record = $this->DB->FetchRow()) 01678 { 01679 # set score for item to 1 01680 $Scores[$Record[$this->ItemIdFieldName]] = 1; 01681 } 01682 01683 # return array with all scores to caller 01684 return $Scores; 01685 } 01686 01687 01688 # ---- private functions used in building search database 01689 01690 function UpdateWordCount($WordId, $ItemId, $FieldId, $Weight) 01691 { 01692 $DB = $this->DB; 01693 01694 # if word count already added to database 01695 if (isset($this->WordCountAdded[$WordId][$FieldId])) 01696 { 01697 # update word count 01698 $DB->Query("UPDATE SearchWordCounts SET Count=Count+${Weight} " 01699 ."WHERE WordId=${WordId} " 01700 ."AND ItemId=${ItemId} " 01701 ."AND FieldId=${FieldId}"); 01702 } 01703 else 01704 { 01705 # add word count to DB 01706 $DB->Query("INSERT INTO SearchWordCounts" 01707 ." (WordId, ItemId, FieldId, Count) VALUES" 01708 ." (${WordId}, ${ItemId}, ${FieldId}, ${Weight})"); 01709 01710 # remember that we added count for this word 01711 $this->WordCountAdded[$WordId][$FieldId] = TRUE; 01712 } 01713 } 01714 01715 function GetFieldContent($ItemId, $FieldName) 01716 { 01717 # error out 01718 exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n"); 01719 } 01720 01721 function RecordSearchInfoForText($ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword) 01722 { 01723 $DB = $this->DB; 01724 01725 # normalize text 01726 $Words = $this->ParseSearchStringForWords($Text, TRUE); 01727 01728 # if there was text left after parsing 01729 if (count($Words) > 0) 01730 { 01731 # get ID for field 01732 $FieldId = $this->GetFieldId($FieldName); 01733 01734 # if text should be included in keyword searches 01735 if ($IncludeInKeyword) 01736 { 01737 # get ID for keyword field 01738 $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX"); 01739 } 01740 01741 # for each word 01742 foreach ($Words as $Word => $Flags) 01743 { 01744 # look up ID for word 01745 $WordId = $this->GetWordId($Word, TRUE); 01746 01747 # update count for word 01748 $this->UpdateWordCount($WordId, $ItemId, $FieldId, 1); 01749 01750 # if text should be included in keyword searches 01751 if ($IncludeInKeyword) 01752 { 01753 # update keyword field count for word 01754 $this->UpdateWordCount( 01755 $WordId, $ItemId, $KeywordFieldId, $Weight); 01756 } 01757 } 01758 } 01759 } 01760 01761 # convenience function for getting time in microseconds 01762 function GetMicrotime() 01763 { 01764 list($usec, $sec) = explode(" ", microtime()); 01765 return ((float)$usec + (float)$sec); 01766 } 01767 } 01768 01769 # define search logic modes 01770 define("SEARCHLOGIC_AND", 1); 01771 define("SEARCHLOGIC_OR", 2); 01772 01773 # define flags used for indicating field types 01774 define("SEARCHFIELD_TEXT", 1); 01775 define("SEARCHFIELD_NUMERIC", 2); 01776 define("SEARCHFIELD_DATE", 3); 01777 define("SEARCHFIELD_DATERANGE", 4); 01778 01779 01780 ?>