Search:

CWIS Developers Documentation

  • Main Page
  • Classes
  • Files
  • File List
  • File Members

SearchEngine.php

Go to the documentation of this file.
00001 <?PHP
00002 
00003 #
00004 #   FILE:  Scout--SearchEngine.php
00005 #
00006 #   FUNCTIONS PROVIDED:
00007 #       SearchEngine->SearchEngine()
00008 #           - constructor
00009 #       SearchEngine->Search($SearchString, 
00010 #               $StartingResult = 0, $NumberOfResults = 10)
00011 #           - search for text and return list of matching item IDs
00012 #       SearchEngine->FieldedSearch($SearchStrings, 
00013 #               $StartingResult = 0, $NumberOfResults = 10)
00014 #           - search for text in specific fields and return item ID list
00015 #       SearchEngine->NumberOfResults()
00016 #           - return number of results found in last search
00017 #       SearchEngine->SearchTime()
00018 #           - return time in seconds that last search took
00019 #       SearchEngine->AddResultFilterFunction($FunctionName)
00020 #           - add function that will be used to filter search results
00021 #       SearchEngine->UpdateForItem($ItemId)
00022 #           - update search info for specified item
00023 #       SearchEngine->UpdateForItems($StartingItemId, $NumberOfItems)
00024 #           - update search info for all items in specified range (returns
00025 #                   ID of last item updated)
00026 #
00027 #   AUTHOR:  Edward Almasy
00028 #
00029 #   Open Source Metadata Archive Search Engine (OSMASE)
00030 #   Copyright 2002-2006 Internet Scout Project
00031 #   http://scout.wisc.edu
00032 #
00033 
00034 class SearchEngine {
00035 
00036     # ---- PUBLIC INTERFACE --------------------------------------------------
00037 
00038     # possible types of logical operators
00039     const SEARCHLOGIC_AND = 1;
00040     const SEARCHLOGIC_OR = 2;
00041 
00042     # flags used for indicating field types
00043     const SEARCHFIELD_TEXT = 1;
00044     const SEARCHFIELD_NUMERIC = 2;
00045     const SEARCHFIELD_DATE = 3;
00046     const SEARCHFIELD_DATERANGE = 4;
00047 
00048     # object constructor
00049     function SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName)
00050     {
00051         # save database object for our use
00052         $this->DB = $DB;
00053 
00054         # save item access parameters
00055         $this->ItemTableName = $ItemTableName;
00056         $this->ItemIdFieldName = $ItemIdFieldName;
00057 
00058         # initialize internal values
00059         $this->DefaultSearchLogic = SEARCHLOGIC_AND;
00060 
00061         # define flags used for indicating word states
00062         if (!defined("WORD_PRESENT"))  {  define("WORD_PRESENT", 1);  }
00063         if (!defined("WORD_EXCLUDED")) {  define("WORD_EXCLUDED", 2);  }
00064         if (!defined("WORD_REQUIRED")) {  define("WORD_REQUIRED", 4);  }
00065 
00066         # set default debug state
00067         $this->DebugLevel = 0;
00068     }
00069     
00070     # add field to be searched
00071     function AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
00072     {
00073         # save values
00074         $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName;
00075         $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
00076         $this->FieldInfo[$FieldName]["Weight"] = $Weight;
00077         $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch;
00078     }
00079 
00080     # retrieve info about tables and fields (useful for child objects)
00081     function ItemTableName() {  return $this->ItemTableName;  }
00082     function ItemIdFieldName() {  return $this->ItemIdFieldName;  }
00083     function DBFieldName($FieldName) {  return $this->FieldInfo[$FieldName]["DBFieldName"];  }
00084     function FieldType($FieldName) {  return $this->FieldInfo[$FieldName]["FieldType"];  }
00085     function FieldWeight($FieldName) {  return $this->FieldInfo[$FieldName]["Weight"];  }
00086     function FieldInKeywordSearch($FieldName) {  return $this->FieldInfo[$FieldName]["InKeywordSearch"];  }
00087 
00088     # set debug level
00089     function DebugLevel($Setting)
00090     {
00091         $this->DebugLevel = $Setting;
00092     }
00093 
00094 
00095     # ---- search functions
00096 
00097     # perform keyword search
00098     function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
00099             $SortByField = NULL, $SortDescending = TRUE)
00100     {
00101         $SearchString = $this->SetDebugLevel($SearchString);
00102         if ($this->DebugLevel > 0) {  print("SE:  In Search() with search string \"$SearchString\"<br>\n");  }
00103 
00104         # save start time to use in calculating search time
00105         $StartTime = $this->GetMicrotime();
00106 
00107         # clear word counts
00108         $this->InclusiveTermCount = 0;
00109         $this->RequiredTermCount = 0;
00110         $this->ExcludedTermCount = 0;
00111 
00112         # parse search string into terms
00113         $Words = $this->ParseSearchStringForWords($SearchString);
00114         if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Words)." words<br>\n");  }
00115         
00116         # parse search string for phrases
00117         $Phrases = $this->ParseSearchStringForPhrases($SearchString);
00118         if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Phrases)." phrases<br>\n");  }
00119 
00120         # if only excluded terms specified
00121         if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
00122         {
00123             # load all records
00124             if ($this->DebugLevel > 1) {  print("SE:  Loading all records<br>\n");  }
00125             $Scores = $this->LoadScoresForAllRecords();
00126         }
00127         else
00128         {
00129             # perform searches
00130             $Scores = $this->SearchForWords($Words);
00131             if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Scores)." results after word search<br>\n");  }
00132             $Scores = $this->SearchForPhrases($Phrases, $Scores);
00133             if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Scores)." results after phrase search<br>\n");  }
00134         }
00135 
00136         # if search results found
00137         if (count($Scores) > 0)
00138         {
00139             # handle any excluded words
00140             $Scores = $this->FilterOnExcludedWords($Words, $Scores);
00141 
00142             # strip off any results that don't contain required words
00143             $Scores = $this->FilterOnRequiredWords($Scores);
00144         }
00145 
00146         # count, sort, and trim search result scores list
00147         $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00148                 $SortByField, $SortDescending);
00149 
00150         # record search time
00151         $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00152 
00153         # return list of items to caller
00154         if ($this->DebugLevel > 0) {  print("SE:  Ended up with ".$this->NumberOfResultsAvailable." results<br>\n");  }
00155         return $Scores;
00156     }
00157 
00158     # perform search across multiple fields and return trimmed results to caller
00159     function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
00160             $SortByField = NULL, $SortDescending = TRUE)
00161     {
00162         $SearchStrings = $this->SetDebugLevel($SearchStrings);
00163         if ($this->DebugLevel > 0) {  print("SE:  In FieldedSearch() with "
00164                 .count($SearchStrings)." search strings<br>\n");  }
00165 
00166         # save start time to use in calculating search time
00167         $StartTime = $this->GetMicrotime();
00168 
00169         # perform search
00170         $Scores = $this->SearchAcrossFields($SearchStrings);
00171         $Scores = ($Scores === NULL) ? array() : $Scores;
00172 
00173         # count, sort, and trim search result scores list
00174         $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00175                 $SortByField, $SortDescending);
00176 
00177         # record search time
00178         $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00179 
00180         # return list of items to caller
00181         if ($this->DebugLevel > 0) {  print("SE:  Ended up with "
00182                 .$this->NumberOfResultsAvailable." results<br>\n");  }
00183         return $Scores;
00184     }
00185     
00186     # perform search with logical groups of fielded searches
00187     function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
00188             $SortByField = NULL, $SortDescending = TRUE)
00189     {
00190         foreach ($SearchGroups as $Index => $Groups)
00191         {
00192             if (isset($SearchGroups[$Index]["SearchStrings"]))
00193             {
00194                 $SearchGroups[$Index]["SearchStrings"] = 
00195                         $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
00196             }
00197         }
00198         if ($this->DebugLevel > 0) {  print("SE:  In GroupedSearch() with "
00199                 .count($SearchGroups)." search groups<br>\n");  }
00200         
00201         # save start time to use in calculating search time
00202         $StartTime = $this->GetMicrotime();
00203 
00204         # start with no results
00205         $Scores = array();
00206         
00207         # save AND/OR search setting
00208         $SavedSearchLogic = $this->DefaultSearchLogic;
00209         
00210         # for each search group
00211         $FirstSearch = TRUE;
00212         foreach ($SearchGroups as $Group)
00213         {
00214             if ($this->DebugLevel > 0) {  print("SE:  ----- GROUP "
00215                     ."---------------------------<br>\n");  }
00216             
00217             # if group has AND/OR setting specified
00218             if (isset($Group["Logic"]))
00219             {
00220                 # use specified AND/OR setting
00221                 $this->DefaultSearchLogic = $Group["Logic"];
00222             }
00223             else
00224             {
00225                 # use saved AND/OR setting
00226                 $this->DefaultSearchLogic = $SavedSearchLogic;
00227             }
00228             if ($this->DebugLevel > 2) {  print("SE:  Logic is "
00229                     .(($this->DefaultSearchLogic == SEARCHLOGIC_AND) ? "AND" : "OR")
00230                     ."<br>\n");  }
00231 
00232             # if we have search strings for this group
00233             if (isset($Group["SearchStrings"]))
00234             {
00235                 # perform search
00236                 $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
00237 
00238                 # if search was conducted
00239                 if ($GroupScores !== NULL)
00240                 {
00241                     # if saved AND/OR setting is OR or this is first search
00242                     if (($SavedSearchLogic == SEARCHLOGIC_OR) || $FirstSearch)
00243                     {
00244                         # add search results to result list
00245                         foreach ($GroupScores as $ItemId => $Score)
00246                         {
00247                             if (isset($Scores[$ItemId]))
00248                             {
00249                                 $Scores[$ItemId] += $Score;
00250                             }
00251                             else
00252                             {
00253                                 $Scores[$ItemId] = $Score;
00254                             }
00255                         }
00256         
00257                         # (reset flag indicating first search)
00258                         $FirstSearch = FALSE;
00259                     }
00260                     else
00261                     {
00262                         # AND search results with previous results
00263                         $OldScores = $Scores;
00264                         $Scores = array();
00265                         foreach ($GroupScores as $ItemId => $Score)
00266                         {
00267                             if (isset($OldScores[$ItemId]))
00268                             {
00269                                 $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
00270                             }
00271                         }
00272                     }
00273                 }
00274             }
00275         }
00276         
00277         # restore AND/OR search setting
00278         $this->DefaultSearchLogic = $SavedSearchLogic;
00279 
00280         # count, sort, and trim search result scores list
00281         $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00282                 $SortByField, $SortDescending);
00283 
00284         # record search time
00285         $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00286 
00287         # return search results to caller
00288         if ($this->DebugLevel > 0) {  print("SE:  Ended up with "
00289                 .$this->NumberOfResultsAvailable." results<br>\n");  }
00290         return $Scores;
00291     }
00292 
00293     # add function that will be called to filter search results
00294     function AddResultFilterFunction($FunctionName)
00295     {
00296         # save filter function name
00297         $this->FilterFuncs[] = $FunctionName;
00298     }
00299 
00300     # get or set default search logic (AND or OR)
00301     function DefaultSearchLogic($NewSetting = NULL)
00302     {
00303         if ($NewSetting != NULL)
00304         {
00305             $this->DefaultSearchLogic = $NewSetting;
00306         }
00307         return $this->DefaultSearchLogic;
00308     }
00309 
00310     function SearchTermsRequiredByDefault($NewSetting = TRUE)
00311     {
00312         if ($NewSetting)
00313         {
00314             $this->DefaultSearchLogic = SEARCHLOGIC_AND;
00315         }
00316         else
00317         {
00318             $this->DefaultSearchLogic = SEARCHLOGIC_OR;
00319         }
00320     }
00321 
00322     function NumberOfResults()
00323     {
00324         return $this->NumberOfResultsAvailable;
00325     }
00326 
00327     function SearchTerms()
00328     {
00329         return $this->SearchTermList;
00330     }
00331 
00332     function SearchTime()
00333     {
00334         return $this->LastSearchTime;
00335     }
00336 
00337     # report total weight for all fields involved in search
00338     function FieldedSearchWeightScale($SearchStrings)
00339     {
00340         $Weight = 0;
00341         $IncludedKeywordSearch = FALSE;
00342         foreach ($SearchStrings as $FieldName => $SearchStringArray)
00343         {
00344             if ($FieldName == "XXXKeywordXXX")
00345             {
00346                 $IncludedKeywordSearch = TRUE;
00347             }
00348             else
00349             {
00350                 $Weight += $this->FieldInfo[$FieldName]["Weight"];
00351             }
00352         }
00353         if ($IncludedKeywordSearch)
00354         {
00355             foreach ($this->FieldInfo as $FieldName => $Info)
00356             {
00357                 if ($Info["InKeywordSearch"])
00358                 {
00359                     $Weight += $Info["Weight"];
00360                 }
00361             }
00362         }
00363         return $Weight;
00364     }
00365 
00366 
00367     # ---- search database update functions
00368 
00369     # update search DB for the specified item
00370     function UpdateForItem($ItemId)
00371     {
00372         # bail out if item ID is negative (indicating a temporary record)
00373         if ($ItemId < 0) {  return;  }
00374 
00375         # clear word count added flags for this item
00376         unset($this->WordCountAdded);
00377 
00378         # delete any existing info for this item
00379         $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
00380 
00381         # for each metadata field
00382         foreach ($this->FieldInfo as $FieldName => $Info)
00383         {
00384             # if search weight for field is positive
00385             if ($Info["Weight"] > 0)
00386             {
00387                 # retrieve text for field
00388                 $Text = $this->GetFieldContent($ItemId, $FieldName);
00389 
00390                 # if text is array
00391                 if (is_array($Text))
00392                 {
00393                     # for each text string in array
00394                     foreach ($Text as $String)
00395                     {
00396                         # record search info for text
00397                         $this->RecordSearchInfoForText($ItemId, $FieldName,
00398                                                        $Info["Weight"], $String,
00399                                                        $Info["InKeywordSearch"]);
00400                     }
00401                 }
00402                 else
00403                 {
00404                     # record search info for text
00405                     $this->RecordSearchInfoForText($ItemId, $FieldName,
00406                                                    $Info["Weight"], $Text,
00407                                                    $Info["InKeywordSearch"]);
00408                 }
00409             }
00410         }
00411     }
00412 
00413     # update search DB for the specified range of items
00414     function UpdateForItems($StartingItemId, $NumberOfItems)
00415     {
00416         # retrieve IDs for specified number of items starting at specified ID
00417         $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
00418                 ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
00419                 ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
00420         $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
00421         
00422         # for each retrieved item ID
00423         foreach ($ItemIds as $ItemId)
00424         {
00425             # update search info for item
00426             $this->UpdateForItem($ItemId);
00427         }
00428 
00429         # return ID of last item updated to caller
00430         return $ItemId;
00431     }
00432 
00433     # drop all data pertaining to item from search DB
00434     function DropItem($ItemId)
00435     {
00436         # drop all entries pertaining to item from word count table
00437         $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
00438     }
00439     
00440     # drop all data pertaining to field from search DB
00441     function DropField($FieldName)
00442     {
00443         # retrieve our ID for field
00444         $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
00445                 ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
00446         
00447         # drop all entries pertaining to field from word counts table
00448         $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
00449         
00450         # drop field from our fields table
00451         $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
00452     }
00453     
00454     # return total number of terms indexed by search engine
00455     function SearchTermCount()
00456     {
00457         return $this->DB->Query("SELECT COUNT(*) AS TermCount"
00458                 ." FROM SearchWords", "TermCount");
00459     }
00460     
00461     # return total number of items indexed by search engine
00462     function ItemCount()
00463     {
00464         return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
00465                 ." FROM SearchWordCounts", "ItemCount");
00466     }
00467 
00468     # add synonym(s)
00469     function AddSynonyms($Word, $Synonyms)
00470     {
00471         # get ID for word
00472         $WordId = $this->GetWordId($Word, TRUE);
00473 
00474         # for each synonym passed in
00475         foreach ($Synonyms as $Synonym)
00476         {
00477             # get ID for synonym
00478             $SynonymId = $this->GetWordId($Synonym, TRUE);
00479 
00480             # if synonym is not already in database
00481             $this->DB->Query("SELECT * FROM SearchWordSynonyms"
00482                     ." WHERE (WordIdA = ".$WordId
00483                         ." AND WordIdB = ".$SynonymId.")"
00484                     ." OR (WordIdB = ".$WordId
00485                         ." AND WordIdA = ".$SynonymId.")");
00486             if ($this->DB->NumRowsSelected() == 0)
00487             {
00488                 # add synonym entry to database
00489                 $this->DB->Query("INSERT INTO SearchWordSynonyms"
00490                         ." (WordIdA, WordIdB)"
00491                         ." VALUES (".$WordId.", ".$SynonymId.")");
00492             }
00493         }
00494     }
00495 
00496     # remove synonym(s)
00497     function RemoveSynonyms($Word, $Synonyms = NULL)
00498     {
00499         # find ID for word
00500         $WordId = $this->GetWordId($Word);
00501 
00502         # if ID found
00503         if ($WordId !== NULL)
00504         {
00505             # if no specific synonyms provided
00506             if ($Synonyms === NULL)
00507             {
00508                 # remove all synonyms for word
00509                 $this->DB->Query("DELETE FROM SearchWordSynonyms"
00510                         ." WHERE WordIdA = '".$WordId."'"
00511                         ." OR WordIdB = '".$WordId."'");
00512             }
00513             else
00514             {
00515                 # for each specified synonym
00516                 foreach ($Synonyms as $Synonym)
00517                 {
00518                     # look up ID for synonym
00519                     $SynonymId = $this->GetWordId($Synonym);
00520 
00521                     # if synonym ID was found
00522                     if ($SynonymId !== NULL)
00523                     {
00524                         # delete synonym entry
00525                         $this->DB->Query("DELETE FROM SearchWordSynonyms"
00526                                 ." WHERE (WordIdA = '".$WordId."'"
00527                                     ." AND WordIdB = '".$SynonymId."')"
00528                                 ." OR (WordIdB = '".$WordId."'"
00529                                     ." AND WordIdA = '".$SynonymId."')");
00530                     }
00531                 }
00532             }
00533         }
00534     }
00535 
00536     # remove all synonyms
00537     function RemoveAllSynonyms()
00538     {
00539         $this->DB->Query("DELETE FROM SearchWordSynonyms");
00540     }
00541 
00542     # get synonyms for word (returns array of synonyms)
00543     function GetSynonyms($Word)
00544     {
00545         # assume no synonyms will be found
00546         $Synonyms = array();
00547 
00548         # look up ID for word
00549         $WordId = $this->GetWordId($Word);
00550 
00551         # if word ID was found
00552         if ($WordId !== NULL)
00553         {
00554             # look up IDs of all synonyms for this word
00555             $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
00556                     ." WHERE WordIdA = ".$WordId
00557                     ." OR WordIdB = ".$WordId);
00558             $SynonymIds = array();
00559             while ($Record = $this->DB->FetchRow)
00560             {
00561                 $SynonymIds[] = ($Record["WordIdA"] == $WordId)
00562                         ? $Record["WordIdB"] : $Record["WordIdA"];
00563             }
00564 
00565             # for each synonym ID
00566             foreach ($SynonymIds as $SynonymId)
00567             {
00568                 # look up synonym word and add to synonym list
00569                 $Synonyms[] = $this->GetWord($SynonymId);
00570             }
00571         }
00572 
00573         # return synonyms to caller
00574         return $Synonyms;
00575     }
00576 
00577     # get all synonyms (returns 2D array w/ words as first index)
00578     function GetAllSynonyms()
00579     {
00580         # assume no synonyms will be found
00581         $SynonymList = array();
00582 
00583         # for each synonym ID pair
00584         $OurDB = new SPTDatabase();
00585         $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
00586         while ($Record = $OurDB->FetchRow())
00587         {
00588             # look up words
00589             $Word = $this->GetWord($Record["WordIdA"]);
00590             $Synonym = $this->GetWord($Record["WordIdB"]);
00591 
00592             # if we do not already have an entry for the word
00593             #       or synonym is not listed for this word
00594             if (!isset($SynonymList[$Word]) 
00595                     || !in_array($Synonym, $SynonymList[$Word]))
00596             {
00597                 # add entry for synonym
00598                 $SynonymList[$Word][] = $Synonym;
00599             }
00600 
00601             # if we do not already have an entry for the synonym
00602             #       or word is not listed for this synonym
00603             if (!isset($SynonymList[$Synonym]) 
00604                     || !in_array($Word, $SynonymList[$Synonym]))
00605             {
00606                 # add entry for word
00607                 $SynonymList[$Synonym][] = $Word;
00608             }
00609         }
00610 
00611         # for each word
00612         # (this loop removes reciprocal duplicates)
00613         foreach ($SynonymList as $Word => $Synonyms)
00614         {
00615             # for each synonym for that word
00616             foreach ($Synonyms as $Synonym)
00617             {
00618                 # if synonym has synonyms and word is one of them
00619                 if (isset($SynonymList[$Synonym]) 
00620                         && isset($SynonymList[$Word])
00621                         && in_array($Word, $SynonymList[$Synonym])
00622                         && in_array($Synonym, $SynonymList[$Word]))
00623                 {
00624                     # if word has less synonyms than synonym
00625                     if (count($SynonymList[$Word]) 
00626                             < count($SynonymList[$Synonym]))
00627                     {
00628                         # remove synonym from synonym list for word
00629                         $SynonymList[$Word] = array_diff(
00630                                 $SynonymList[$Word], array($Synonym));
00631 
00632                         # if no synonyms left for word
00633                         if (!count($SynonymList[$Word]))
00634                         {
00635                             # remove empty synonym list for word
00636                             unset($SynonymList[$Word]);
00637                         }
00638                     }
00639                     else
00640                     {
00641                         # remove word from synonym list for synonym
00642                         $SynonymList[$Synonym] = array_diff(
00643                                 $SynonymList[$Synonym], array($Word));
00644 
00645                         # if no synonyms left for word
00646                         if (!count($SynonymList[$Synonym]))
00647                         {
00648                             # remove empty synonym list for word
00649                             unset($SynonymList[$Synonym]);
00650                         }
00651                     }
00652                 }
00653             }
00654         }
00655 
00656         # sort array alphabetically (just for convenience)
00657         foreach ($SynonymList as $Word => $Synonyms)
00658         {
00659             asort($SynonymList[$Word]);
00660         }
00661         ksort($SynonymList);
00662 
00663         # return 2D array of synonyms to caller
00664         return $SynonymList;
00665     }
00666 
00667     # set all synonyms (accepts 2D array w/ words as first index)
00668     function SetAllSynonyms($SynonymList)
00669     {
00670         # remove all existing synonyms
00671         $this->RemoveAllSynonyms();
00672 
00673         # for each synonym entry passed in
00674         foreach ($SynonymList as $Word => $Synonyms)
00675         {
00676             # add synonyms for word
00677             $this->AddSynonyms($Word, $Synonyms);
00678         }
00679     }
00680 
00681     # suggest alternatives
00682     function SuggestAlternateSearches($SearchString)
00683     {
00684         # 
00685     }
00686 
00687 
00688     # ---- PRIVATE INTERFACE -------------------------------------------------
00689 
00690     var $DB;
00691     var $DebugLevel;
00692     var $WordCountAdded;
00693     var $NumberOfResultsAvailable;
00694     var $LastSearchTime;
00695     var $FilterFuncs;
00696     var $FieldIds;
00697     var $DefaultSearchLogic;
00698     var $FieldInfo;
00699     var $RequiredTermCount;
00700     var $RequiredTermCounts;
00701     var $InclusiveTermCount;
00702     var $ExcludedTermCount;
00703     var $ItemTableName;
00704     var $ItemIdFieldName;
00705     var $SearchTermList;
00706 
00707 
00708     # ---- common private functions (used in both searching and DB build)
00709 
00710     # normalize and parse search string into list of search terms
00711     function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
00712     {
00713         # strip off any surrounding whitespace
00714         $Text = trim($SearchString);
00715         
00716         # set up normalization replacement strings
00717         $Patterns = array(
00718                 "/'s[^a-z0-9\-+~]+/i", # get rid of possessive plurals
00719                 "/'/",                 # get rid of single quotes / apostrophes
00720                 "/\"[^\"]*\"/",        # get rid of phrases  (NOTE: HARD-CODED INDEX BELOW!!!)  "
00721                 "/\\([^)]*\\)/",       # get rid of groups  (NOTE: HARD-CODED INDEX BELOW!!!)
00722                 "/[^a-z0-9\-+~]+/i",   # convert non-alphanumerics / non-minus/plus to a space
00723                 "/([^\\s])-/i",        # convert minus preceded by anything but whitespace to a space
00724                 "/([^\\s])\\+/i",      # convert plus preceded by anything but whitespace to a space
00725                 "/-\\s/i",             # convert minus followed by whitespace to a space
00726                 "/\\+\\s/i",           # convert plus followed by whitespace to a space
00727                 "/~\\s/i",             # convert tilde followed by whitespace to a space
00728                 "/[ ]+/"               # convert multiple spaces to one space
00729                 );
00730         $Replacements = array(
00731                 " ",
00732                 "",
00733                 " ",
00734                 " ",
00735                 "\\1 ",
00736                 "\\1 ",
00737                 " ",
00738                 " ",
00739                 " ",
00740                 " ",
00741                 " "
00742                 );
00743         
00744         # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
00745         if ($IgnorePhrases)
00746         {
00747             # switch phrase removal to double quote removal  (HARD-CODED INDEX INTO PATTERN LIST!!)
00748             $Patterns[2] = "/\"/";
00749 
00750             # switch group removal to paren removal  (HARD-CODED INDEX INTO PATTERN LIST!!)
00751             $Patterns[3] = "/[\(\)]+/";
00752         }
00753 
00754         # remove punctuation from text and normalize whitespace
00755         $Text = preg_replace($Patterns, $Replacements, $Text);
00756         if ($this->DebugLevel > 2) {  print("SE: Normalized search string is '${Text}'<br>\n");  }
00757 
00758         # convert text to lower case
00759         $Text = strtolower($Text);
00760 
00761         # strip off any extraneous whitespace
00762         $Text = trim($Text);
00763 
00764         # start with an empty array
00765         $Words = array();
00766 
00767         # if we have no words left after parsing
00768         if (strlen($Text) != 0)
00769         {
00770             # for each word
00771             foreach (explode(" ", $Text) as $Word)
00772             {
00773                 # grab first character of word
00774                 $FirstChar = substr($Word, 0, 1);
00775                 
00776                 # strip off option characters and set flags appropriately
00777                 $Flags = WORD_PRESENT;
00778                 if ($FirstChar == "-")
00779                 {
00780                     $Word = substr($Word, 1);
00781                     $Flags |= WORD_EXCLUDED;
00782                     if (!isset($Words[$Word]))
00783                     {
00784                         $this->ExcludedTermCount++;
00785                     }
00786                 }
00787                 else
00788                 {
00789                     if ($FirstChar == "~")
00790                     {
00791                         $Word = substr($Word, 1);
00792                     }
00793                     elseif (($this->DefaultSearchLogic == SEARCHLOGIC_AND) 
00794                             || ($FirstChar == "+"))
00795                     {
00796                         if ($FirstChar == "+")
00797                         {
00798                             $Word = substr($Word, 1);
00799                         }
00800                         $Flags |= WORD_REQUIRED;
00801                         if (!isset($Words[$Word]))
00802                         {
00803                             $this->RequiredTermCount++;
00804                         }
00805                     }
00806                     if (!isset($Words[$Word]))
00807                     {
00808                         $this->InclusiveTermCount++;
00809                         $this->SearchTermList[] = $Word;
00810                     }
00811                 }
00812 
00813                 # store flags to indicate word found
00814                 $Words[$Word] = $Flags;
00815                 if ($this->DebugLevel > 3) {  print("SE: Word identified (${Word})<br>\n");  }
00816             }
00817         }
00818 
00819         # return normalized words to caller
00820         return $Words;
00821     }
00822 
00823     function GetFieldId($FieldName)
00824     {
00825         # if field ID is not in cache
00826         if (!isset($this->FieldIds[$FieldName]))
00827         {
00828             # look up field info in database
00829             $this->DB->Query("SELECT FieldId FROM SearchFields "
00830                     ."WHERE FieldName = '".addslashes($FieldName)."'");
00831             
00832             # if field was found
00833             if ($Record = $this->DB->FetchRow())
00834             {
00835                 # load info from DB record
00836                 $FieldId = $Record["FieldId"];
00837             }
00838             else
00839             {
00840                 # add field to database
00841                 $this->DB->Query("INSERT INTO SearchFields (FieldName) "
00842                         ."VALUES ('".addslashes($FieldName)."')");
00843 
00844                   # retrieve ID for newly added field
00845                 $FieldId = $this->DB->LastInsertId("SearchFields");
00846             }
00847 
00848             # cache field info
00849             $this->FieldIds[$FieldName] = $FieldId;
00850         }
00851 
00852         # return cached ID to caller
00853         return $this->FieldIds[$FieldName];
00854     }
00855 
00856     # retrieve ID for specified word (returns NULL if no ID found)
00857     function GetWordId($Word, $AddIfNotFound = FALSE)
00858     {
00859         static $WordIdCache;
00860 
00861         # if word was in ID cache
00862         if (isset($WordIdCache[$Word]))
00863         {
00864             # use ID from cache
00865             $WordId = $WordIdCache[$Word];
00866         }
00867         else
00868         {
00869             # look up ID in database
00870             $WordId = $this->DB->Query("SELECT WordId FROM SearchWords "
00871                     ."WHERE WordText='".addslashes($Word)."'", "WordId");
00872 
00873             # if ID was not found and caller requested it be added
00874             if (($WordId === NULL) && $AddIfNotFound)
00875             {
00876                 # add word to database
00877                 $this->DB->Query("INSERT INTO SearchWords (WordText)"
00878                         ." VALUES ('".addslashes(strtolower($Word))."')");
00879 
00880                 # get ID for newly added word
00881                 $WordId = $this->DB->LastInsertId("SearchWords");
00882             }
00883 
00884             # save ID to cache
00885             $WordIdCache[$Word] = $WordId;
00886         }
00887 
00888         # return ID to caller
00889         return $WordId;
00890     }
00891 
00892     # retrieve word for specified word ID (returns FALSE if no word found)
00893     function GetWord($WordId)
00894     {
00895         static $WordCache;
00896 
00897         # if word was in cache
00898         if (isset($WordCache[$WordId]))
00899         {
00900             # use word from cache
00901             $Word = $WordCache[$WordId];
00902         }
00903         else
00904         {
00905             # look up word in database
00906             $Word = $this->DB->Query("SELECT WordText FROM SearchWords "
00907                     ."WHERE WordId='".$WordId."'", "WordText");
00908 
00909             # save word to cache
00910             $WordCache[$WordId] = $Word;
00911         }
00912 
00913         # return word to caller
00914         return $Word;
00915     }
00916 
00917 
00918     # ---- private functions used in searching
00919 
00920     # perform search across multiple fields and return raw results to caller
00921     function SearchAcrossFields($SearchStrings)
00922     {
00923         # start by assuming no search will be done
00924         $Scores = NULL;
00925         
00926         # clear word counts
00927         $this->InclusiveTermCount = 0;
00928         $this->RequiredTermCount = 0;
00929         $this->ExcludedTermCount = 0;
00930 
00931         # for each field
00932         $NeedComparisonSearch = FALSE;
00933         foreach ($SearchStrings as $FieldName => $SearchStringArray)
00934         {
00935             # convert search string to array if needed
00936             if (!is_array($SearchStringArray))
00937             {
00938                 $SearchStringArray = array($SearchStringArray);
00939             }
00940 
00941             # for each search string for this field
00942             foreach ($SearchStringArray as $SearchString)
00943             {
00944                 # if field is keyword or field is text and does not look like comparison match
00945                 if (($FieldName == "XXXKeywordXXX") 
00946                     || (isset($this->FieldInfo[$FieldName])
00947                         && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT)
00948                         && !preg_match("/^[><!]=./", $SearchString)
00949                         && !preg_match("/^[><=]./", $SearchString)))
00950                 {
00951                     if ($this->DebugLevel > 0) {  print("SE:    Searching text field \""
00952                             .$FieldName."\" for string \"$SearchString\"<br>\n");  }
00953 
00954                     # normalize text and split into words
00955                     $Words[$FieldName] = 
00956                             $this->ParseSearchStringForWords($SearchString);
00957 
00958                     # calculate scores for matching items
00959                     if (count($Words[$FieldName]))
00960                     {
00961                         $Scores = $this->SearchForWords(
00962                                 $Words[$FieldName], $FieldName, $Scores);
00963                         if ($this->DebugLevel > 3) {  print("SE:  Have "
00964                                 .count($Scores)." results after word search<br>\n");  }
00965                     }
00966 
00967                     # split into phrases
00968                     $Phrases[$FieldName] = 
00969                             $this->ParseSearchStringForPhrases($SearchString);
00970 
00971                     # handle any phrases
00972                     if (count($Phrases[$FieldName]))
00973                     {
00974                         $Scores = $this->SearchForPhrases(
00975                                 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
00976                         if ($this->DebugLevel > 3) {  print("SE:  Have ".count($Scores)
00977                                 ." results after phrase search<br>\n");  }
00978                     }
00979                 }
00980                 else
00981                 {
00982                     # set flag to indicate possible comparison search candidate found
00983                     $NeedComparisonSearch = TRUE;
00984                 }
00985             }
00986         }
00987 
00988         # perform comparison searches
00989         if ($NeedComparisonSearch)
00990         {
00991             $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
00992             if ($this->DebugLevel > 3) {  print("SE:  Have ".count($Scores)." results after comparison search<br>\n");  }
00993         }
00994 
00995         # if no results found and exclusions specified
00996         if (!count($Scores) && $this->ExcludedTermCount)
00997         {
00998             # load all records
00999             $Scores = $this->LoadScoresForAllRecords();
01000         }
01001 
01002         # if search results found 
01003         if (count($Scores))
01004         {
01005             # for each search text string
01006             foreach ($SearchStrings as $FieldName => $SearchStringArray)
01007             {
01008                 # convert search string to array if needed
01009                 if (!is_array($SearchStringArray))
01010                 {
01011                     $SearchStringArray = array($SearchStringArray);
01012                 }
01013 
01014                 # for each search string for this field
01015                 foreach ($SearchStringArray as $SearchString)
01016                 {
01017                     # if field is text
01018                     if (($FieldName == "XXXKeywordXXX")
01019                             || (isset($this->FieldInfo[$FieldName])
01020                                 && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT)))
01021                     {
01022                         # if there are words in search text
01023                         if (isset($Words[$FieldName]))
01024                         {
01025                             # handle any excluded words
01026                             $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
01027                         }
01028 
01029                         # handle any excluded phrases
01030                         if (isset($Phrases[$FieldName]))
01031                         {
01032                             $Scores = $this->SearchForPhrases(
01033                                     $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
01034                         }
01035                     }
01036                 }
01037             }
01038 
01039             # strip off any results that don't contain required words
01040             $Scores = $this->FilterOnRequiredWords($Scores);
01041         }
01042         
01043         # return search result scores to caller
01044         return $Scores;
01045     }
01046     
01047     # search for words in specified field
01048     function SearchForWords(
01049             $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
01050     {
01051         $DB = $this->DB;
01052         
01053         # start with empty search result scores list if none passed in
01054         if ($Scores == NULL)
01055         {
01056             $Scores = array();
01057         }
01058 
01059         # grab field ID
01060         $FieldId = $this->GetFieldId($FieldName);
01061 
01062         # for each word
01063         foreach ($Words as $Word => $Flags)
01064         {
01065             if ($this->DebugLevel > 2) {  print("SE: Searching for word '${Word}' in field ${FieldName}<br>\n");  }
01066 
01067             # if word is not excluded
01068             if (!($Flags & WORD_EXCLUDED))
01069             {
01070                 # look up record ID for word
01071                 if ($this->DebugLevel > 2) {  print("SE:  Looking up word \"${Word}\"<br>\n");  }
01072                 $WordId = $this->GetWordId($Word);
01073 
01074                 # if word is in DB
01075                 if ($WordId !== NULL)
01076                 {
01077                     # look up counts for word
01078                     $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
01079                             ."WHERE WordId = ".$WordId
01080                             ." AND FieldId = ".$FieldId);
01081                     $Counts = $DB->FetchColumn("Count", "ItemId");
01082 
01083                     # if synonym support is enabled
01084                     if (TRUE)
01085                     {
01086                         # look for any synonyms
01087                         $DB->Query("SELECT WordIdA, WordIdB"
01088                                 ." FROM SearchWordSynonyms"
01089                                 ." WHERE WordIdA = ".$WordId
01090                                 ." OR WordIdB = ".$WordId);
01091 
01092                         # if synonyms were found
01093                         if ($DB->NumRowsSelected())
01094                         {
01095                             # retrieve synonym IDs
01096                             $SynonymIds = array();
01097                             while ($Record = $DB->FetchRow())
01098                             {
01099                                 $SynonymIds[] = ($Record["WordIdA"] == $WordId)
01100                                         ? $Record["WordIdB"] 
01101                                         : $Record["WordIdA"];
01102                             }
01103 
01104                             # for each synonym
01105                             foreach ($SynonymIds as $SynonymId)
01106                             {
01107                                 # retrieve counts for synonym
01108                                 $DB->Query("SELECT ItemId,Count"
01109                                         ." FROM SearchWordCounts"
01110                                         ." WHERE WordId = ".$SynonymId
01111                                         ." AND FieldId = ".$FieldId);
01112                                 $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
01113 
01114                                 # for each count
01115                                 foreach ($SynonymCounts as $ItemId => $Count)
01116                                 {
01117                                     # adjust count because it's a synonym
01118                                     $AdjustedCount = ceil($Count / 2);
01119 
01120                                     # add count to existing counts
01121                                     if (isset($Counts[$ItemId]))
01122                                     {
01123                                         $Counts[$ItemId] += $AdjustedCount;
01124                                     }
01125                                     else
01126                                     {
01127                                         $Counts[$ItemId] = $AdjustedCount;
01128                                     }
01129                                 }
01130                             }
01131                         }
01132                     }
01133 
01134                     # for each count
01135                     foreach ($Counts as $ItemId => $Count)
01136                     {
01137                         # if word flagged as required
01138                         if ($Flags & WORD_REQUIRED)
01139                         {
01140                             # increment required word count for record
01141                             if (isset($this->RequiredTermCounts[$ItemId]))
01142                             {
01143                                 $this->RequiredTermCounts[$ItemId]++;
01144                             }
01145                             else
01146                             {
01147                                 $this->RequiredTermCounts[$ItemId] = 1;
01148                             }
01149                         }
01150 
01151                         # add to item record score
01152                         if (isset($Scores[$ItemId]))
01153                         {
01154                             $Scores[$ItemId] += $Count;
01155                         }
01156                         else
01157                         {
01158                             $Scores[$ItemId] = $Count;
01159                         }
01160                     }
01161                 }
01162             }
01163         }
01164 
01165         # return basic scores to caller
01166         return $Scores;
01167     }
01168 
01169     # extract phrases (terms surrounded by quotes) from search string
01170     function ParseSearchStringForPhrases($SearchString)
01171     {
01172         # split into chunks delimited by double quote marks
01173         $Pieces = explode("\"", $SearchString);   # "
01174 
01175         # for each pair of chunks
01176         $Index = 2;
01177         $Phrases = array();
01178         while ($Index < count($Pieces))
01179         {
01180             # grab phrase from chunk
01181             $Phrase = trim(addslashes($Pieces[$Index - 1]));
01182             $Flags = WORD_PRESENT;
01183 
01184             # grab first character of phrase
01185             $FirstChar = substr($Pieces[$Index - 2], -1);
01186 
01187             # set flags to reflect any option characters
01188             if ($FirstChar == "-")
01189             {
01190                 $Flags |= WORD_EXCLUDED;
01191                 if (!isset($Phrases[$Phrase]))
01192                 {
01193                     $this->ExcludedTermCount++;
01194                 }
01195             }
01196             else
01197             {
01198                 if ((($this->DefaultSearchLogic == SEARCHLOGIC_AND) && ($FirstChar != "~"))
01199                         || ($FirstChar == "+"))
01200                 {
01201                     $Flags |= WORD_REQUIRED;
01202                     if (!isset($Phrases[$Phrase]))
01203                     {
01204                         $this->RequiredTermCount++;
01205                     }
01206                 }
01207                 if (!isset($Phrases[$Phrase]))
01208                 {
01209                     $this->InclusiveTermCount++;
01210                     $this->SearchTermList[] = $Phrase;
01211                 }
01212             }
01213             $Phrases[$Phrase] = $Flags;
01214 
01215             # move to next pair of chunks
01216             $Index += 2;
01217         }
01218 
01219         # return phrases to caller
01220         return $Phrases;
01221     }
01222 
01223     # extract groups (terms surrounded by parens) from search string
01224     # (NOTE: NOT YET IMPLEMENTED!!!)
01225     function ParseSearchStringForGroups($SearchString)
01226     {
01227         # split into chunks delimited by open paren
01228         $Pieces = explode("(", $SearchString);
01229 
01230         # for each chunk
01231         $Index = 2;
01232         while ($Index < count($Pieces))
01233         {
01234             # grab phrase from chunk
01235             $Group = trim(addslashes($Pieces[$Index - 1]));
01236             $Groups[] = $Group;
01237 
01238             # move to next pair of chunks
01239             $Index += 2;
01240         }
01241 
01242         # return phrases to caller
01243         return $Groups;
01244     }
01245 
01246     function SearchFieldForPhrases($FieldName, $Phrase)
01247     {
01248         # error out
01249         exit("<br>SE - ERROR:  SearchFieldForPhrases() not implemented<br>\n");
01250     }
01251 
01252     function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX", 
01253             $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
01254     {
01255         # if phrases are found
01256         if (count($Phrases) > 0)
01257         {
01258             # if this is a keyword search
01259             if ($FieldName == "XXXKeywordXXX")
01260             {
01261                 # for each field
01262                 foreach ($this->FieldInfo as $KFieldName => $Info)
01263                 {
01264                     # if field is marked to be included in keyword searches
01265                     if ($Info["InKeywordSearch"])
01266                     {
01267                         # call ourself with that field
01268                         $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName, 
01269                                                           $ProcessNonExcluded, $ProcessExcluded);
01270                     }
01271                 }
01272             }
01273             else
01274             {
01275                 # for each phrase
01276                 foreach ($Phrases as $Phrase => $Flags)
01277                 {
01278                     if ($this->DebugLevel > 2) {  print("SE: searching for phrase '${Phrase}' in field ${FieldName}<br>\n");  }
01279     
01280                     # if phrase flagged as excluded and we are doing excluded phrases
01281                     #         or phrase flagged as non-excluded and we are doing non-excluded phrases
01282                     if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
01283                             || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
01284                     {
01285                         # retrieve list of items that contain phrase
01286                         $ItemIds = $this->SearchFieldForPhrases(
01287                                 $FieldName, $Phrase);
01288     
01289                         # for each item that contains phrase
01290                         foreach ($ItemIds as $ItemId)
01291                         {
01292                             # if we are doing excluded phrases and phrase flagged as excluded
01293                             if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
01294                             {
01295                                 # knock item off of list
01296                                 unset($Scores[$ItemId]);
01297                             }
01298                             elseif ($ProcessNonExcluded)
01299                             {
01300                                 # calculate phrase value based on number of words and field weight
01301                                 $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
01302                                                        * $this->FieldInfo[$FieldName]["Weight"];
01303                                 if ($this->DebugLevel > 2) {  print("SE: phrase score is ${PhraseScore}<br>\n");  }
01304 
01305                                 # bump up item record score
01306                                 if (isset($Scores[$ItemId]))
01307                                 {
01308                                     $Scores[$ItemId] += $PhraseScore;
01309                                 }
01310                                 else
01311                                 {
01312                                     $Scores[$ItemId] = $PhraseScore;
01313                                 }
01314     
01315                                 # if phrase flagged as required
01316                                 if ($Flags & WORD_REQUIRED)
01317                                 {
01318                                     # increment required word count for record
01319                                     if (isset($this->RequiredTermCounts[$ItemId]))
01320                                     {
01321                                         $this->RequiredTermCounts[$ItemId]++;
01322                                     }
01323                                     else
01324                                     {
01325                                         $this->RequiredTermCounts[$ItemId] = 1;
01326                                     }
01327                                 }
01328                             }
01329                         }
01330                     }
01331                 }
01332             }
01333         }
01334 
01335         # return updated scores to caller
01336         return $Scores;
01337     }
01338 
01339     function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
01340     {
01341         $DB = $this->DB;
01342 
01343         # grab field ID
01344         $FieldId = $this->GetFieldId($FieldName);
01345 
01346         # for each word
01347         foreach ($Words as $Word => $Flags)
01348         {
01349             # if word flagged as excluded
01350             if ($Flags & WORD_EXCLUDED)
01351             {
01352                 # look up record ID for word
01353                 $WordId = $this->GetWordId($Word);
01354 
01355                 # if word is in DB
01356                 if ($WordId !== NULL)
01357                 {
01358                     # look up counts for word
01359                     $DB->Query("SELECT ItemId FROM SearchWordCounts "
01360                             ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
01361 
01362                     # for each count
01363                     while ($Record = $DB->FetchRow())
01364                     {
01365                         # if item record is in score list
01366                         $ItemId = $Record["ItemId"];
01367                         if (isset($Scores[$ItemId]))
01368                         {
01369                             # remove item record from score list
01370                             if ($this->DebugLevel > 3) {  print("SE: filtering out item $ItemId because it contained word \"".$Word."\"<br>\n");  }
01371                             unset($Scores[$ItemId]);
01372                         }
01373                     }
01374                 }
01375             }
01376         }
01377 
01378         # returned filtered score list to caller
01379         return $Scores;
01380     }
01381 
01382     function FilterOnRequiredWords($Scores)
01383     {
01384         # if there were required words
01385         if ($this->RequiredTermCount > 0)
01386         {
01387             # for each item
01388             foreach ($Scores as $ItemId => $Score)
01389             {
01390                 # if item does not meet required word count
01391                 if (!isset($this->RequiredTermCounts[$ItemId])
01392                         || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
01393                 {
01394                     # filter out item
01395                     if ($this->DebugLevel > 4) {  print("SE: filtering out item $ItemId because it didn't have required word count of ".$this->RequiredTermCount." (only had ".$this->RequiredTermCounts[$ItemId].")<br>\n");  }
01396                     unset($Scores[$ItemId]);
01397                 }
01398             }
01399         }
01400 
01401         # return filtered list to caller
01402         return $Scores;
01403     }
01404     
01405     # count, sort, and trim search result scores list
01406     function CleanScores($Scores, $StartingResult, $NumberOfResults,
01407             $SortByField, $SortDescending)
01408     {
01409         # perform any requested filtering
01410         if ($this->DebugLevel > 0) {  print("SE:    Have "
01411                 .count($Scores)." results before filter callbacks<br>\n");  }
01412         $Scores = $this->FilterOnSuppliedFunctions($Scores);
01413 
01414         # save total number of results available
01415         $this->NumberOfResultsAvailable = count($Scores);
01416 
01417         # if no sorting field specified
01418         if ($SortByField === NULL)
01419         {
01420             # sort result list by score
01421             if ($SortDescending)
01422                 arsort($Scores, SORT_NUMERIC);
01423             else
01424                 asort($Scores, SORT_NUMERIC);
01425         }
01426         else
01427         {
01428             # get list of item IDs in sorted order
01429             $SortedIds = $this->GetItemIdsSortedByField(
01430                     $SortByField, $SortDescending);
01431 
01432             # if we have sorted item IDs
01433             if (count($SortedIds) && count($Scores))
01434             {
01435                 # strip sorted ID list down to those that appear in search results
01436                 $SortedIds = array_intersect($SortedIds, array_keys($Scores));
01437 
01438                 # rebuild score list in sorted order
01439                 foreach ($SortedIds as $Id)
01440                 {
01441                     $NewScores[$Id] = $Scores[$Id];
01442                 }
01443                 $Scores = $NewScores;
01444             }
01445             else
01446             {
01447                 # sort result list by score
01448                 arsort($Scores, SORT_NUMERIC);
01449             }
01450         }
01451 
01452         # trim result list to match range requested by caller
01453         $ScoresKeys = array_slice(
01454                 array_keys($Scores), $StartingResult, $NumberOfResults);
01455         $TrimmedScores = array();
01456         foreach ($ScoresKeys as $Key) {  $TrimmedScores[$Key] = $Scores[$Key];  }
01457         
01458         # returned cleaned search result scores list to caller
01459         return $TrimmedScores;
01460     }
01461 
01462     function FilterOnSuppliedFunctions($Scores)
01463     {
01464         # if filter functions have been set
01465         if (isset($this->FilterFuncs))
01466         {
01467             # for each result
01468             foreach ($Scores as $ItemId => $Score)
01469             {
01470                 # for each filter function
01471                 foreach ($this->FilterFuncs as $FuncName)
01472                 {
01473                     # if filter function return TRUE for item
01474                     if ($FuncName($ItemId))
01475                     {
01476                         # discard result
01477                         if ($this->DebugLevel > 2) {  print("SE:      filter callback <i>$FuncName</i> rejected item ${ItemId}<br>\n");  }
01478                         unset($Scores[$ItemId]);
01479 
01480                         # bail out of filter func loop
01481                         continue 2;
01482                     }
01483                 }
01484             }
01485         }
01486 
01487         # return filtered list to caller
01488         return $Scores;
01489     }
01490 
01491     function SearchForComparisonMatches($SearchStrings, $Scores)
01492     {
01493         # for each field
01494         $Index = 0;
01495         foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
01496         {
01497             # if field is not keyword
01498             if ($SearchFieldName != "XXXKeywordXXX")
01499             {
01500                 # convert search string to array if needed
01501                 if (!is_array($SearchStringArray))
01502                 {
01503                     $SearchStringArray = array($SearchStringArray);
01504                 }
01505 
01506                 # for each search string for this field
01507                 foreach ($SearchStringArray as $SearchString)
01508                 {
01509                     # if search string looks like comparison search
01510                     $FoundOperator = preg_match("/^[><!]=./", $SearchString) || preg_match("/^[><=]./", $SearchString);
01511                     if ($FoundOperator || (isset($this->FieldInfo[$SearchFieldName]["FieldType"]) && ($this->FieldInfo[$SearchFieldName]["FieldType"] != SEARCHFIELD_TEXT)))
01512                     {
01513                         # determine value
01514                         $Patterns = array("/^[><!]=/", "/^[><=]/");
01515                         $Replacements = array("", "");
01516                         $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
01517 
01518                         # determine and save operator
01519                         if (!$FoundOperator)
01520                         {
01521                             $Operators[$Index] = "=";
01522                         }
01523                         else
01524                         {
01525                             $Term = trim($SearchString);
01526                             $FirstChar = $Term{0};
01527                             $FirstTwoChars = $FirstChar.$Term{1};
01528                             if ($FirstTwoChars == ">=")     {  $Operators[$Index] = ">=";  }
01529                             elseif ($FirstTwoChars == "<=") {  $Operators[$Index] = "<=";  }
01530                             elseif ($FirstTwoChars == "!=") {  $Operators[$Index] = "!=";  }
01531                             elseif ($FirstChar == ">")      {  $Operators[$Index] = ">";  }
01532                             elseif ($FirstChar == "<")      {  $Operators[$Index] = "<";  }
01533                             elseif ($FirstChar == "=")      {  $Operators[$Index] = "=";  }
01534                         }
01535                         
01536                         # if operator was found
01537                         if (isset($Operators[$Index]))
01538                         {
01539                             # save value
01540                             $Values[$Index] = $Value;
01541 
01542                             # save field name
01543                             $FieldNames[$Index] = $SearchFieldName;
01544                             if ($this->DebugLevel > 3) {  print("SE:  added comparison (field = <i>".$FieldNames[$Index]."</i>  op = <i>".$Operators[$Index]."</i>  val = <i>".$Values[$Index]."</i>)<br>\n");  }
01545 
01546                             # move to next comparison array entry
01547                             $Index++;
01548                         }
01549                     }
01550                 }
01551             }
01552         }
01553         
01554         # if comparisons found
01555         if (isset($Operators))
01556         {
01557             # perform comparisons on fields and gather results
01558             $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
01559             
01560             # if search logic is set to AND
01561             if ($this->DefaultSearchLogic == SEARCHLOGIC_AND)
01562             {
01563                 # if results were found
01564                 if (count($Results))
01565                 {
01566                     # if there were no prior results and no terms for keyword search
01567                     if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
01568                     {
01569                         # add all results to scores
01570                         foreach ($Results as $ItemId)
01571                         {
01572                             $Scores[$ItemId] = 1;
01573                         }
01574                     }
01575                     else
01576                     {
01577                         # remove anything from scores that is not part of results
01578                         foreach ($Scores as $ItemId => $Score)
01579                         {
01580                             if (in_array($ItemId, $Results) == FALSE)
01581                             {
01582                                 unset($Scores[$ItemId]);
01583                             }
01584                         }
01585                     }
01586                 }
01587                 else
01588                 {
01589                     # clear scores
01590                     $Scores = array();
01591                 }
01592             }
01593             else
01594             {
01595                 # add result items to scores
01596                 foreach ($Results as $ItemId)
01597                 {
01598                     if (isset($Scores[$ItemId]))
01599                     {
01600                         $Scores[$ItemId] += 1;
01601                     }
01602                     else
01603                     {
01604                         $Scores[$ItemId] = 1;
01605                     }
01606                 }
01607             }
01608         }
01609 
01610         # return results to caller
01611         return $Scores;
01612     }
01613     
01614     function SetDebugLevel($SearchStrings)
01615     {
01616         # if search info is an array
01617         if (is_array($SearchStrings))
01618         {
01619             # for each array element
01620             foreach ($SearchStrings as $FieldName => $SearchStringArray)
01621             {
01622                 # if element is an array
01623                 if (is_array($SearchStringArray))
01624                 {
01625                     # for each array element
01626                     foreach ($SearchStringArray as $Index => $SearchString)
01627                     {
01628                         # pull out search string if present
01629                         $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
01630                     }
01631                 }
01632                 else
01633                 {
01634                     # pull out search string if present
01635                     $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
01636                 }
01637             }
01638         }
01639         else
01640         {
01641             # pull out search string if present
01642             $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
01643         }
01644 
01645         # return new search info to caller
01646         return $SearchStrings;
01647     }
01648     
01649     function ExtractDebugLevel($SearchString)
01650     {
01651         # if search string contains debug level indicator
01652         if (strstr($SearchString, "DBUGLVL="))
01653         {
01654             # remove indicator and set debug level
01655             $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
01656             if ($Level > 0)
01657             {
01658                 print("SE: setting debug level to $Level<br>\n");
01659                 $this->DebugLevel = $Level;
01660                 $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
01661             }
01662         }
01663         
01664         # return (possibly) modified search string to caller
01665         return $SearchString;
01666     }
01667 
01668     # load and return search result scores array containing all possible records
01669     function LoadScoresForAllRecords()
01670     {
01671         # start with empty list
01672         $Scores = array();
01673         
01674         # for every item
01675         $this->DB->Query("SELECT ".$this->ItemIdFieldName
01676                          ." FROM ".$this->ItemTableName);
01677         while ($Record = $this->DB->FetchRow())
01678         {
01679             # set score for item to 1
01680             $Scores[$Record[$this->ItemIdFieldName]] = 1;
01681         }
01682         
01683         # return array with all scores to caller
01684         return $Scores;
01685     }
01686 
01687 
01688     # ---- private functions used in building search database
01689 
01690     function UpdateWordCount($WordId, $ItemId, $FieldId, $Weight)
01691     {
01692         $DB = $this->DB;
01693 
01694         # if word count already added to database
01695         if (isset($this->WordCountAdded[$WordId][$FieldId]))
01696         {
01697             # update word count
01698             $DB->Query("UPDATE SearchWordCounts SET Count=Count+${Weight} "
01699                     ."WHERE WordId=${WordId} "
01700                             ."AND ItemId=${ItemId} "
01701                             ."AND FieldId=${FieldId}");
01702         }
01703         else
01704         {
01705             # add word count to DB
01706             $DB->Query("INSERT INTO SearchWordCounts"
01707                     ." (WordId, ItemId, FieldId, Count) VALUES"
01708                     ." (${WordId}, ${ItemId}, ${FieldId}, ${Weight})");
01709 
01710             # remember that we added count for this word
01711             $this->WordCountAdded[$WordId][$FieldId] = TRUE;
01712         }
01713     }
01714 
01715     function GetFieldContent($ItemId, $FieldName)
01716     {
01717         # error out
01718         exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
01719     }
01720 
01721     function RecordSearchInfoForText($ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
01722     {
01723         $DB = $this->DB;
01724 
01725         # normalize text
01726         $Words = $this->ParseSearchStringForWords($Text, TRUE);
01727 
01728         # if there was text left after parsing
01729         if (count($Words) > 0)
01730         {
01731             # get ID for field
01732             $FieldId = $this->GetFieldId($FieldName);
01733 
01734             # if text should be included in keyword searches
01735             if ($IncludeInKeyword)
01736             {
01737                 # get ID for keyword field
01738                 $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
01739             }
01740 
01741             # for each word
01742             foreach ($Words as $Word => $Flags)
01743             {
01744                 # look up ID for word
01745                 $WordId = $this->GetWordId($Word, TRUE);
01746 
01747                 # update count for word
01748                 $this->UpdateWordCount($WordId, $ItemId, $FieldId, 1);
01749 
01750                 # if text should be included in keyword searches
01751                 if ($IncludeInKeyword)
01752                 {
01753                     # update keyword field count for word
01754                     $this->UpdateWordCount(
01755                             $WordId, $ItemId, $KeywordFieldId, $Weight);
01756                 }
01757             }
01758         }
01759     }
01760 
01761     # convenience function for getting time in microseconds
01762     function GetMicrotime()
01763     {
01764         list($usec, $sec) = explode(" ", microtime());
01765         return ((float)$usec + (float)$sec);
01766     }
01767 }
01768 
01769 # define search logic modes
01770 define("SEARCHLOGIC_AND", 1);
01771 define("SEARCHLOGIC_OR", 2);
01772 
01773 # define flags used for indicating field types
01774 define("SEARCHFIELD_TEXT", 1);
01775 define("SEARCHFIELD_NUMERIC", 2);
01776 define("SEARCHFIELD_DATE", 3);
01777 define("SEARCHFIELD_DATERANGE", 4);
01778 
01779 
01780 ?>
CWIS logo doxygen
Copyright 2009 Internet Scout