Search:

CWIS Developers Documentation

  • Main Page
  • Classes
  • Files
  • File List
  • File Members

Recommender.php

Go to the documentation of this file.
00001 <?PHP
00002 
00003 #
00004 #   FILE:  SPT--Recommender.php
00005 #
00006 #   METHODS PROVIDED:
00007 #       Recommender()
00008 #           - constructor
00009 #       SomeMethod($SomeParameter, $AnotherParameter)
00010 #           - short description of method
00011 #
00012 #   AUTHOR:  Edward Almasy
00013 #
00014 #   Part of the Scout Portal Toolkit
00015 #   Copyright 2002-2004 Internet Scout Project
00016 #   http://scout.wisc.edu
00017 #
00018 
00019 class Recommender {
00020 
00021     # ---- PUBLIC INTERFACE --------------------------------------------------
00022     # define content field types
00023     const CONTENTFIELDTYPE_TEXT =  1;
00024     const CONTENTFIELDTYPE_NUMERIC =  2;
00025     const CONTENTFIELDTYPE_CONTROLLEDNAME =  3;
00026     const CONTENTFIELDTYPE_DATE =  4;
00027     const CONTENTFIELDTYPE_DATERAMGE =  5;
00028 
00029     # object constructor
00030     function Recommender(&$DB, $ItemTableName, $RatingTableName, 
00031             $ItemIdFieldName, $UserIdFieldName, $RatingFieldName,
00032             $ContentFields)
00033     {
00034         # set default parameters
00035         $this->ContentCorrelationThreshold = 1;
00036 
00037         # save database object
00038         $this->DB =& $DB;
00039 
00040         # save new configuration values
00041         $this->ItemTableName = $ItemTableName;
00042         $this->RatingTableName = $RatingTableName;
00043         $this->ItemIdFieldName = $ItemIdFieldName;
00044         $this->UserIdFieldName = $UserIdFieldName;
00045         $this->RatingFieldName = $RatingFieldName;
00046         $this->ContentFields = $ContentFields;
00047 
00048         # set default debug state
00049         $this->DebugLevel = 0;
00050     }
00051 
00052     # set level for debugging output
00053     function DebugLevel($Setting)
00054     {
00055         $this->DebugLevel = $Setting;
00056     }
00057 
00058 
00059     # ---- recommendation methods
00060 
00061     # recommend items for specified user
00062     function Recommend($UserId, $StartingResult = 0, $NumberOfResults = 10)
00063     {
00064         if ($this->DebugLevel > 0) {  print("REC:  Recommend(${UserId}, ${StartingResult}, ${NumberOfResults})<br>\n");  }
00065 
00066         # load in user ratings
00067         $Ratings = array();
00068         $DB =& $this->DB;
00069         $DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->RatingFieldName
00070                 ." FROM ".$this->RatingTableName
00071                 ." WHERE ".$this->UserIdFieldName." = ${UserId}");
00072         while ($Row = $DB->FetchRow())
00073         {
00074             $Ratings[$Row[$this->ItemIdFieldName]] = 
00075                     $Row[$this->RatingFieldName];
00076         }
00077         if ($this->DebugLevel > 1) {  print("REC:  user has rated ".count($Ratings)." items<br>\n");  }
00078 
00079         # for each item that user has rated
00080         $RecVals = array();
00081         foreach ($Ratings as $ItemId => $ItemRating)
00082         {
00083             # for each content correlation available for that item
00084             $DB->Query("SELECT Correlation, ItemIdB "
00085                     ."FROM RecContentCorrelations "
00086                     ."WHERE ItemIdA = ${ItemId}");
00087             while ($Row = $DB->FetchRow())
00088             {
00089                 # multiply that correlation by normalized rating and add
00090                 #       resulting value to recommendation value for that item
00091                 if (isset($RecVals[$Row["ItemIdB"]]))
00092                 {
00093                     $RecVals[$Row["ItemIdB"]] +=
00094                             $Row["Correlation"] * ($ItemRating - 50);
00095                 }
00096                 else
00097                 {
00098                     $RecVals[$Row["ItemIdB"]] =
00099                             $Row["Correlation"] * ($ItemRating - 50);
00100                 }
00101                 if ($this->DebugLevel > 9) {  print("REC:  RecVal[".$Row["ItemIdB"]."] = ".$RecVals[$Row["ItemIdB"]]."<br>\n");  }
00102             }
00103         }
00104         if ($this->DebugLevel > 1) {  print("REC:  found ".count($RecVals)." total recommendations<br>\n");  }
00105 
00106         # calculate average correlation between items
00107         $ResultThreshold = $DB->Query("SELECT AVG(Correlation) "
00108                 ."AS Average FROM RecContentCorrelations", "Average");
00109         $ResultThreshold = round($ResultThreshold) * 2;
00110 
00111         # for each recommended item
00112         foreach ($RecVals as $ItemId => $RecVal)
00113         {
00114             # remove item from list if user already rated it
00115             if (isset($Ratings[$ItemId]))
00116             {
00117                 unset($RecVals[$ItemId]);  
00118             }
00119             else
00120             {
00121                 # scale recommendation value back to match thresholds
00122                 $RecVals[$ItemId] = round($RecVal / 50);
00123 
00124                 # remove item from recommendation list if value is below threshold
00125                 if ($RecVals[$ItemId] < $ResultThreshold)
00126                 {  
00127                     unset($RecVals[$ItemId]);  
00128                 }
00129             }
00130         }
00131         if ($this->DebugLevel > 1) {  print("REC:  found ".count($RecVals)." positive recommendations<br>\n");  }
00132 
00133         # sort recommendation list by value
00134         if (isset($RecVals)) {  arsort($RecVals, SORT_NUMERIC);  }
00135 
00136         # save total number of results available
00137         $this->NumberOfResultsAvailable = count($RecVals);
00138 
00139         # trim result list to match range requested by caller
00140         $RecValKeys = array_slice(
00141                 array_keys($RecVals), $StartingResult, $NumberOfResults);
00142         $RecValSegment = array();
00143         foreach ($RecValKeys as $Key) 
00144         {  
00145             $RecValSegment[$Key] = $RecVals[$Key];  
00146         }
00147 
00148         # return recommendation list to caller
00149         return $RecValSegment;
00150     }
00151 
00152     # add function to be called to filter returned recommendation list
00153     function AddResultFilterFunction($FunctionName)
00154     {
00155         # save filter function name
00156         $this->FilterFuncs[] = $FunctionName;
00157     }
00158 
00159     # return number of recommendations generated
00160     function NumberOfResults()
00161     {
00162         return $this->NumberOfResultsAvailable;
00163     }
00164 
00165     # return recommendation generation time
00166     function SearchTime()
00167     {
00168         return $this->LastSearchTime;
00169     }
00170 
00171     # return list of items used to generate recommendation of specified item
00172     function GetSourceList($UserId, $RecommendedItemId)
00173     {
00174         # pull list of correlations from DB
00175         $this->DB->Query("SELECT * FROM RecContentCorrelations, ".$this->RatingTableName
00176                 ." WHERE (ItemIdA = ${RecommendedItemId}"
00177                         ." OR ItemIdB = ${RecommendedItemId})"
00178                         ." AND ".$this->UserIdFieldName." = ".$UserId
00179                         ." AND (RecContentCorrelations.ItemIdA = ".$this->RatingTableName.".".$this->ItemIdFieldName
00180                         ." OR RecContentCorrelations.ItemIdB = ".$this->RatingTableName.".".$this->ItemIdFieldName.")"
00181                         ." AND Rating >= 50 "
00182                 ." ORDER BY Correlation DESC");
00183 
00184         # for each correlation
00185         $SourceList = array();
00186         while ($Row = $this->DB->FetchRow())
00187         {
00188             # pick out appropriate item ID
00189             if ($Row["ItemIdA"] == $RecommendedItemId)
00190             {
00191                 $ItemId = $Row["ItemIdB"];
00192             }
00193             else
00194             {
00195                 $ItemId = $Row["ItemIdA"];
00196             }
00197 
00198             # add item to recommendation source list
00199             $SourceList[$ItemId] = $Row["Correlation"];
00200         }
00201 
00202         # return recommendation source list to caller
00203         return $SourceList;
00204     }
00205 
00206     # dynamically generate and return list of items similar to specified item
00207     function FindSimilarItems($ItemId, $FieldList = NULL)
00208     {
00209         if ($this->DebugLevel > 1) {  print("REC:  searching for items similar to item \"".$ItemId."\"<br>\n");  }
00210 
00211         # make sure we have item IDs available
00212         $this->LoadItemIds();
00213 
00214         # start with empty array
00215         $SimilarItems = array();
00216 
00217         # for every item
00218         foreach ($this->ItemIds as $Id)
00219         {
00220             # if item is not specified item
00221             if ($Id != $ItemId)
00222             {
00223                 # calculate correlation of item to specified item
00224                 $Correlation = $this->CalculateContentCorrelation($ItemId, $Id, $FieldList);
00225 
00226                 # if correlation is above threshold
00227                 if ($Correlation > $this->ContentCorrelationThreshold)
00228                 {
00229                     # add item to list of similar items
00230                     $SimilarItems[$Id] = $Correlation;
00231                 }
00232             }
00233         }
00234         if ($this->DebugLevel > 3) {  print("REC:  ".count($SimilarItems)." similar items to item \"".$ItemId."\" found<br>\n");  }
00235 
00236         # filter list of similar items (if any)
00237         if (count($SimilarItems) > 0)
00238         {
00239             $SimilarItems = $this->FilterOnSuppliedFunctions($SimilarItems);
00240             if ($this->DebugLevel > 4) {  print("REC:  ".count($SimilarItems)." similar items to item \"".$ItemId."\" left after filtering<br>\n");  }
00241         }
00242         
00243         # if any similar items left
00244         if (count($SimilarItems) > 0)
00245         {
00246             # sort list of similar items in order of most to least similar
00247             arsort($SimilarItems, SORT_NUMERIC);
00248         }
00249 
00250         # return list of similar items to caller
00251         return $SimilarItems;
00252     }
00253 
00254     # dynamically generate and return list of recommended field values for item
00255     function RecommendFieldValues($ItemId, $FieldList = NULL)
00256     {
00257         if ($this->DebugLevel > 1) {  print("REC:  generating field value recommendations for item \"".$ItemId."\"<br>\n");  }
00258 
00259         # start with empty array of values
00260         $RecVals = array();
00261 
00262         # generate list of similar items
00263         $SimilarItems = $this->FindSimilarItems($ItemId, $FieldList);
00264         
00265         # if similar items found
00266         if (count($SimilarItems) > 0)
00267         {
00268             # prune list of similar items to only top third of better-than-average
00269             $AverageCorr = intval(array_sum($SimilarItems) / count($SimilarItems));
00270             reset($SimilarItems);
00271             $HighestCorr = current($SimilarItems);
00272             $CorrThreshold = intval($HighestCorr - (($HighestCorr - $AverageCorr) / 3));
00273             if ($this->DebugLevel > 8) {  print("REC:  <i>Average Correlation: $AverageCorr &nbsp;&nbsp;&nbsp;&nbsp; Highest Correlation: $HighestCorr &nbsp;&nbsp;&nbsp;&nbsp; Correlation Threshold: $CorrThreshold </i><br>\n");  }
00274             foreach ($SimilarItems as $ItemId => $ItemCorr)
00275             {
00276                 if ($ItemCorr < $CorrThreshold)
00277                 {
00278                     unset($SimilarItems[$ItemId]);
00279                 }
00280             }
00281             if ($this->DebugLevel > 6) {  print("REC:  ".count($SimilarItems)." similar items left after threshold pruning<br>\n");  }
00282 
00283             # for each item
00284             foreach ($SimilarItems as $SimItemId => $SimItemCorr)
00285             {
00286                 # for each field
00287                 foreach ($this->ContentFields as $FieldName => $FieldAttributes)
00288                 {
00289                     # load field data for this item
00290                     $FieldData = $this->GetFieldValue($SimItemId, $FieldName);
00291 
00292                     # if field data is array
00293                     if (is_array($FieldData))
00294                     {
00295                         # for each field data value
00296                         foreach ($FieldData as $FieldDataVal)
00297                         {
00298                             # if data value is not empty
00299                             $FieldDataVal = trim($FieldDataVal);
00300                             if (strlen($FieldDataVal) > 0)
00301                             {
00302                                 # increment count for data value
00303                                 $RecVals[$FieldName][$FieldDataVal]++;
00304                             }
00305                         }
00306                     }
00307                     else
00308                     {
00309                         # if data value is not empty
00310                         $FieldData = trim($FieldData);
00311                         if (strlen($FieldData) > 0)
00312                         {
00313                             # increment count for data value
00314                             $RecVals[$FieldName][$FieldData]++;
00315                         }
00316                     }
00317                 }
00318             }
00319 
00320             # for each field
00321             $MatchingCountThreshold = 3;
00322             foreach ($RecVals as $FieldName => $FieldVals)
00323             {
00324                 # determine cutoff threshold
00325                 arsort($FieldVals, SORT_NUMERIC);
00326                 reset($FieldVals);
00327                 $HighestCount = current($FieldVals);
00328                 $AverageCount = intval(array_sum($FieldVals) / count($FieldVals));
00329                 $CountThreshold = intval($AverageCount + (($HighestCount - $AverageCount) / 2));
00330                 if ($CountThreshold < $MatchingCountThreshold) {  $CountThreshold = $MatchingCountThreshold;  }
00331                 if ($this->DebugLevel > 8) {  print("REC:  <i>Field: $FieldName &nbsp;&nbsp;&nbsp;&nbsp;  Average Count: $AverageCount &nbsp;&nbsp;&nbsp;&nbsp; Highest Count: $HighestCount &nbsp;&nbsp;&nbsp;&nbsp; Count Threshold: $CountThreshold </i><br>\n");  }
00332 
00333                 # for each field data value
00334                 foreach ($FieldVals as $FieldVal => $FieldValCount)
00335                 {
00336                     # if value count is below threshold
00337                     if ($FieldValCount < $CountThreshold)
00338                     {
00339                         # unset value
00340                         unset($RecVals[$FieldName][$FieldVal]);
00341                     }
00342                 }
00343 
00344                 if ($this->DebugLevel > 3) {  print("REC:  found ".count($RecVals[$FieldName])." recommended values for field \"".$FieldName."\" after threshold pruning<br>\n");  }
00345             }
00346         }
00347 
00348         # return recommended values to caller
00349         return $RecVals;
00350     }
00351 
00352 
00353     # ---- database update methods
00354 
00355     function UpdateForItems($StartingItemId, $NumberOfItems)
00356     {
00357         if ($this->DebugLevel > 0) {  print("REC:  UpdateForItems(${StartingItemId}, ${NumberOfItems})<br>\n");  }
00358         # make sure we have item IDs available
00359         $this->LoadItemIds();
00360 
00361         # for every item
00362         $ItemsUpdated = 0;
00363         $ItemId = NULL;
00364         foreach ($this->ItemIds as $ItemId)
00365         {
00366             # if item ID is within requested range
00367             if ($ItemId >= $StartingItemId)
00368             {
00369                 # update recommender info for item
00370                 if ($this->DebugLevel > 1) {  print("REC:  doing item ${ItemId}<br>\n");  }
00371                 $this->UpdateForItem($ItemId, TRUE);
00372                 $ItemsUpdated++;
00373 
00374                 # if we have done requested number of items
00375                 if ($ItemsUpdated >= $NumberOfItems)
00376                 {
00377                     # bail out
00378                     if ($this->DebugLevel > 1) {  print("REC:  bailing out with item ${ItemId}<br>\n");  }
00379                     return $ItemId;
00380                 }
00381             }
00382         }
00383 
00384         # return ID of last resource updated to caller
00385         return $ItemId;
00386     }
00387 
00388     function UpdateForItem($ItemId, $FullPass = FALSE)
00389     {   
00390         if ($this->DebugLevel > 1) {  print("REC:  updating for item \"".$ItemId."\"<br>\n");  }
00391         $DB =& $this->DB;
00392 
00393         # make sure we have item IDs available
00394         $this->LoadItemIds();
00395 
00396         # clear existing correlations for this item
00397         $DB->Query("DELETE FROM RecContentCorrelations "
00398                 ."WHERE ItemIdA = ${ItemId}");
00399 
00400         # for every item
00401         foreach ($this->ItemIds as $Id)
00402         {
00403             # if full pass and item is later in list than current item
00404             if (($FullPass == FALSE) || ($Id > $ItemId))
00405             {
00406                 # update correlation value for item and target item
00407                 $this->UpdateContentCorrelation($ItemId, $Id);
00408             }
00409         }
00410     }
00411 
00412     function DropItem($ItemId)
00413     {
00414         # drop all correlation entries referring to item
00415         $this->DB->Query("DELETE FROM RecContentCorrelations "
00416                          ."WHERE ItemIdA = ".$ItemId." "
00417                             ."OR ItemIdB = ".$ItemId);
00418     }
00419 
00420     function PruneCorrelations()
00421     {
00422         # get average correlation
00423         $AverageCorrelation = $this->DB->Query("SELECT AVG(Correlation) "
00424                 ."AS Average FROM RecContentCorrelations", "Average");
00425 
00426         # dump all below-average correlations
00427         if ($AverageCorrelation > 0)
00428         {
00429             $this->DB->Query("DELETE FROM RecContentCorrelations "
00430                     ."WHERE Correlation <= ${AverageCorrelation}");
00431         }
00432     }
00433 
00434 
00435     # ---- PRIVATE INTERFACE -------------------------------------------------
00436 
00437     var $ContentCorrelationThreshold;
00438     var $ContentFields;
00439     var $ItemTableName;
00440     var $RatingTableName;
00441     var $ItemIdFieldName;
00442     var $UserIdFieldName;
00443     var $RatingFieldName;
00444     var $ItemIds;
00445     var $DB;
00446     var $FilterFuncs;
00447     var $LastSearchTime;
00448     var $NumberOfResultsAvailable;
00449     var $DebugLevel;
00450 
00451 
00452     function LoadItemIds()
00453     {
00454         # if item IDs not already loaded
00455         if (!isset($this->ItemIds))
00456         {
00457             # load item IDs from DB
00458             $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS Id FROM "
00459                     .$this->ItemTableName." ORDER BY ".$this->ItemIdFieldName);
00460             $this->ItemIds = array();
00461             while ($Item = $this->DB->FetchRow())
00462             {
00463                 $this->ItemIds[] = $Item["Id"];
00464             }
00465         }
00466     }
00467 
00468     function GetFieldData($ItemId, $FieldName)
00469     {
00470         static $ItemData;
00471         static $CachedItemList;
00472 
00473         # if data not already loaded
00474         if (!isset($ItemData[$ItemId][$FieldName]))
00475         {
00476             # load field value from DB
00477             $FieldValue = $this->GetFieldValue($ItemId, $FieldName);
00478 
00479             # if field value is array
00480             if (is_array($FieldValue))
00481             {
00482                 # concatenate together text from array elements
00483                 $FieldValue = implode(" ", $FieldValue);
00484             }
00485 
00486             # normalize text and break into word array
00487             $ItemData[$ItemId][$FieldName] = $this->NormalizeAndParseText($FieldValue);
00488 
00489             # if more items than cache limit
00490             if (count($ItemData) > 1000)
00491             {
00492                 # dump oldest item
00493                 reset($ItemData);
00494                 list($DumpedItemId, $DumpedItemData) = each($ItemData);
00495                 unset($ItemData[$DumpedItemId]);
00496             }
00497         }
00498 
00499         # return cached data to caller
00500         return $ItemData[$ItemId][$FieldName];
00501     }
00502 
00503     # calculate content correlation between two items and return value to caller
00504     function CalculateContentCorrelation($ItemIdA, $ItemIdB, $FieldList = NULL)
00505     {
00506         static $CorrelationCache;
00507         
00508         if ($this->DebugLevel > 10) {  print("REC:  calculating correlation between items $ItemIdA and $ItemIdB<br>\n");  }
00509         
00510         # order item ID numbers
00511         if ($ItemIdA > $ItemIdB)
00512         {
00513             $Temp = $ItemIdA;
00514             $ItemIdA = $ItemIdB;
00515             $ItemIdB = $Temp;
00516         }
00517         
00518         # if we already have the correlation
00519         if (isset($CorrelationCache[$ItemIdA][$ItemIdB]))
00520         {
00521             # retrieve correlation from cache
00522             $TotalCorrelation = $CorrelationCache[$ItemIdA][$ItemIdB];
00523         }
00524         else
00525         {
00526             # if list of fields to correlate specified
00527             if ($FieldList != NULL)
00528             {
00529                 # create list with only specified fields
00530                 foreach ($FieldList as $FieldName)
00531                 {
00532                     $ContentFields[$FieldName] = $this->ContentFields[$FieldName];
00533                 }
00534             }
00535             else
00536             {
00537                 # use all fields
00538                 $ContentFields = $this->ContentFields;
00539             }
00540 
00541             # for each content field
00542             $TotalCorrelation = 0;
00543             foreach ($ContentFields as $FieldName => $FieldAttributes)
00544             {
00545                 # if field is of a type that we use for correlation
00546                 $FieldType = intval($FieldAttributes["FieldType"]);
00547                 if (($FieldType == Recommender::CONTENTFIELDTYPE_TEXT)
00548                         || ($FieldType == Recommender::CONTENTFIELDTYPE_CONTROLLEDNAME))
00549                 {
00550                     # load data
00551                     $ItemAData = $this->GetFieldData($ItemIdA, $FieldName);
00552                     $ItemBData = $this->GetFieldData($ItemIdB, $FieldName);
00553                     if ($this->DebugLevel > 15) {  print("REC:  loaded ".count($ItemAData)." terms for item #".$ItemIdA." and ".count($ItemBData)." terms for item #".$ItemIdB." for field \"".$FieldName."\"<br>\n");  }
00554 
00555                     # call appropriate routine to get correlation
00556                     switch ($FieldType)
00557                     {
00558                         case Recommender::CONTENTFIELDTYPE_TEXT:
00559                         case Recommender::CONTENTFIELDTYPE_CONTROLLEDNAME:
00560                             $Correlation = $this->CalcTextCorrelation(
00561                                     $ItemAData, $ItemBData);
00562                             break;
00563                     }
00564 
00565                     # add correlation multiplied by weight to total
00566                     $TotalCorrelation += $Correlation * $FieldAttributes["Weight"];
00567                 }
00568             }
00569             
00570             # store correlation to cache
00571             $CorrelationCache[$ItemIdA][$ItemIdB] = $TotalCorrelation;
00572         }
00573 
00574         # return correlation value to caller
00575         if ($this->DebugLevel > 9) {  print("REC:  correlation between items $ItemIdA and $ItemIdB found to be $TotalCorrelation<br>\n");  }
00576         return $TotalCorrelation;
00577     }
00578 
00579     # calculate content correlation between two items and update in DB
00580     function UpdateContentCorrelation($ItemIdA, $ItemIdB)
00581     {
00582         if ($this->DebugLevel > 6) {  print("REC:  updating correlation between items $ItemIdA and $ItemIdB<br>\n");  }
00583 
00584         # bail out if two items are the same
00585         if ($ItemIdA == $ItemIdB) {  return;  }
00586 
00587         # calculate correlation
00588         $Correlation = $this->CalculateContentCorrelation($ItemIdA, $ItemIdB);
00589 
00590         # save new correlation
00591         $this->ContentCorrelation($ItemIdA, $ItemIdB, $Correlation);
00592     }
00593 
00594     function NormalizeAndParseText($Text)
00595     {
00596         $StopWords = array(
00597                 "a",
00598                 "about",
00599                 "also",
00600                 "an",
00601                 "and",
00602                 "are",
00603                 "as",
00604                 "at",
00605                 "be",
00606                 "but",
00607                 "by",
00608                 "can",
00609                 "each",
00610                 "either",
00611                 "for",
00612                 "from",
00613                 "has",
00614                 "he",
00615                 "her",
00616                 "here",
00617                 "hers",
00618                 "him",
00619                 "his",
00620                 "how",
00621                 "i",
00622                 "if",
00623                 "in",
00624                 "include",
00625                 "into",
00626                 "is",
00627                 "it",
00628                 "its",
00629                 "me",
00630                 "neither",
00631                 "no",
00632                 "nor",
00633                 "not",
00634                 "of",
00635                 "on",
00636                 "or",
00637                 "so",
00638                 "she",
00639                 "than",
00640                 "that",
00641                 "the",
00642                 "their",
00643                 "them",
00644                 "then",
00645                 "there",
00646                 "these",
00647                 "they",
00648                 "this",
00649                 "those",
00650                 "through",
00651                 "to",
00652                 "too",
00653                 "very",
00654                 "what",
00655                 "when",
00656                 "where",
00657                 "while",
00658                 "who",
00659                 "why",
00660                 "will",
00661                 "you",
00662                 "");
00663 
00664         # strip any HTML tags
00665         $Text = strip_tags($Text);
00666 
00667         # strip any punctuation
00668         $Text = preg_replace("/,\\.\\?-\\(\\)\\[\\]\"/", " ", $Text);   # "
00669 
00670         # normalize whitespace
00671         $Text = trim(preg_replace("/[\\s]+/", " ", $Text));
00672 
00673         # convert to all lower case
00674         $Text = strtolower($Text);
00675 
00676         # split text into arrays of words
00677         $Words = explode(" ", $Text);
00678 
00679         # filter out all stop words
00680         $Words = array_diff($Words, $StopWords);
00681 
00682         # return word array to caller
00683         return $Words;
00684     }
00685 
00686     function CalcTextCorrelation($WordsA, $WordsB)
00687     {
00688         # get array containing intersection of two word arrays
00689         $IntersectWords = array_intersect($WordsA, $WordsB);
00690 
00691         # return number of words remaining as score
00692         return count($IntersectWords);
00693     }
00694 
00695     function ContentCorrelation($ItemIdA, $ItemIdB, $NewCorrelation = -1)
00696     {
00697         # if item ID A is greater than item ID B
00698         if ($ItemIdA > $ItemIdB)
00699         {
00700             # swap item IDs
00701             $Temp = $ItemIdA;
00702             $ItemIdA = $ItemIdB;
00703             $ItemIdB = $Temp;
00704         }
00705 
00706         # if new correlation value provided
00707         if ($NewCorrelation != -1)
00708         {
00709             # if new value is above threshold
00710             if ($NewCorrelation >= $this->ContentCorrelationThreshold)
00711             {
00712                 # insert new correlation value in DB
00713                 $this->DB->Query("INSERT INTO RecContentCorrelations "
00714                         ."(ItemIdA, ItemIdB, Correlation) "
00715                         ."VALUES (${ItemIdA}, ${ItemIdB}, ${NewCorrelation})");
00716 
00717                 # return correlation value is new value
00718                 $Correlation = $NewCorrelation;
00719             }
00720             # else
00721             else
00722             {
00723                 # return value is zero
00724                 $Correlation = 0;
00725             }
00726         }
00727         else
00728         {
00729             # retrieve correlation value from DB
00730             $Correlation = $this->DB->Query(
00731                     "SELECT Correlation FROM RecContentCorrelations "
00732                             ."WHERE ItemIdA = ${ItemIdA} AND ItemIdB = ${ItemIdB}",
00733                     "Correlation");
00734 
00735             # if no value found in DB
00736             if ($Correlation == FALSE)
00737             {
00738                 # return value is zero
00739                 $Correlation = 0;
00740             }
00741         }
00742 
00743         # return correlation value to caller
00744         return $Correlation;
00745     }
00746 
00747     function FilterOnSuppliedFunctions($Results)
00748     {
00749         # if filter functions have been set
00750         if (count($this->FilterFuncs) > 0)
00751         {
00752             # for each result
00753             foreach ($Results as $ResourceId => $Result)
00754             {
00755                 # for each filter function
00756                 foreach ($this->FilterFuncs as $FuncName)
00757                 {
00758                     # if filter function return TRUE for result resource
00759                     if ($FuncName($ResourceId))
00760                     {
00761                         # discard result
00762                         if ($this->DebugLevel > 2) {  print("REC:      filter callback rejected resource ${ResourceId}<br>\n");  }
00763                         unset($Results[$ResourceId]);
00764 
00765                         # bail out of filter func loop
00766                         continue 2;
00767                     }
00768                 }
00769             }
00770         }
00771 
00772         # return filtered list to caller
00773         return $Results;
00774     }
00775 }
00776 
00777 ?>
CWIS logo doxygen
Copyright 2009 Internet Scout