Recommender.php
Go to the documentation of this file.
00001 <?PHP 00002 00003 # 00004 # FILE: SPT--Recommender.php 00005 # 00006 # METHODS PROVIDED: 00007 # Recommender() 00008 # - constructor 00009 # SomeMethod($SomeParameter, $AnotherParameter) 00010 # - short description of method 00011 # 00012 # AUTHOR: Edward Almasy 00013 # 00014 # Part of the Scout Portal Toolkit 00015 # Copyright 2002-2004 Internet Scout Project 00016 # http://scout.wisc.edu 00017 # 00018 00019 class Recommender { 00020 00021 # ---- PUBLIC INTERFACE -------------------------------------------------- 00022 # define content field types 00023 const CONTENTFIELDTYPE_TEXT = 1; 00024 const CONTENTFIELDTYPE_NUMERIC = 2; 00025 const CONTENTFIELDTYPE_CONTROLLEDNAME = 3; 00026 const CONTENTFIELDTYPE_DATE = 4; 00027 const CONTENTFIELDTYPE_DATERAMGE = 5; 00028 00029 # object constructor 00030 function Recommender(&$DB, $ItemTableName, $RatingTableName, 00031 $ItemIdFieldName, $UserIdFieldName, $RatingFieldName, 00032 $ContentFields) 00033 { 00034 # set default parameters 00035 $this->ContentCorrelationThreshold = 1; 00036 00037 # save database object 00038 $this->DB =& $DB; 00039 00040 # save new configuration values 00041 $this->ItemTableName = $ItemTableName; 00042 $this->RatingTableName = $RatingTableName; 00043 $this->ItemIdFieldName = $ItemIdFieldName; 00044 $this->UserIdFieldName = $UserIdFieldName; 00045 $this->RatingFieldName = $RatingFieldName; 00046 $this->ContentFields = $ContentFields; 00047 00048 # set default debug state 00049 $this->DebugLevel = 0; 00050 } 00051 00052 # set level for debugging output 00053 function DebugLevel($Setting) 00054 { 00055 $this->DebugLevel = $Setting; 00056 } 00057 00058 00059 # ---- recommendation methods 00060 00061 # recommend items for specified user 00062 function Recommend($UserId, $StartingResult = 0, $NumberOfResults = 10) 00063 { 00064 if ($this->DebugLevel > 0) { print("REC: Recommend(${UserId}, ${StartingResult}, ${NumberOfResults})<br>\n"); } 00065 00066 # load in user ratings 00067 $Ratings = array(); 00068 $DB =& $this->DB; 00069 $DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->RatingFieldName 00070 ." FROM ".$this->RatingTableName 00071 ." WHERE ".$this->UserIdFieldName." = ${UserId}"); 00072 while ($Row = $DB->FetchRow()) 00073 { 00074 $Ratings[$Row[$this->ItemIdFieldName]] = 00075 $Row[$this->RatingFieldName]; 00076 } 00077 if ($this->DebugLevel > 1) { print("REC: user has rated ".count($Ratings)." items<br>\n"); } 00078 00079 # for each item that user has rated 00080 $RecVals = array(); 00081 foreach ($Ratings as $ItemId => $ItemRating) 00082 { 00083 # for each content correlation available for that item 00084 $DB->Query("SELECT Correlation, ItemIdB " 00085 ."FROM RecContentCorrelations " 00086 ."WHERE ItemIdA = ${ItemId}"); 00087 while ($Row = $DB->FetchRow()) 00088 { 00089 # multiply that correlation by normalized rating and add 00090 # resulting value to recommendation value for that item 00091 if (isset($RecVals[$Row["ItemIdB"]])) 00092 { 00093 $RecVals[$Row["ItemIdB"]] += 00094 $Row["Correlation"] * ($ItemRating - 50); 00095 } 00096 else 00097 { 00098 $RecVals[$Row["ItemIdB"]] = 00099 $Row["Correlation"] * ($ItemRating - 50); 00100 } 00101 if ($this->DebugLevel > 9) { print("REC: RecVal[".$Row["ItemIdB"]."] = ".$RecVals[$Row["ItemIdB"]]."<br>\n"); } 00102 } 00103 } 00104 if ($this->DebugLevel > 1) { print("REC: found ".count($RecVals)." total recommendations<br>\n"); } 00105 00106 # calculate average correlation between items 00107 $ResultThreshold = $DB->Query("SELECT AVG(Correlation) " 00108 ."AS Average FROM RecContentCorrelations", "Average"); 00109 $ResultThreshold = round($ResultThreshold) * 2; 00110 00111 # for each recommended item 00112 foreach ($RecVals as $ItemId => $RecVal) 00113 { 00114 # remove item from list if user already rated it 00115 if (isset($Ratings[$ItemId])) 00116 { 00117 unset($RecVals[$ItemId]); 00118 } 00119 else 00120 { 00121 # scale recommendation value back to match thresholds 00122 $RecVals[$ItemId] = round($RecVal / 50); 00123 00124 # remove item from recommendation list if value is below threshold 00125 if ($RecVals[$ItemId] < $ResultThreshold) 00126 { 00127 unset($RecVals[$ItemId]); 00128 } 00129 } 00130 } 00131 if ($this->DebugLevel > 1) { print("REC: found ".count($RecVals)." positive recommendations<br>\n"); } 00132 00133 # sort recommendation list by value 00134 if (isset($RecVals)) { arsort($RecVals, SORT_NUMERIC); } 00135 00136 # save total number of results available 00137 $this->NumberOfResultsAvailable = count($RecVals); 00138 00139 # trim result list to match range requested by caller 00140 $RecValKeys = array_slice( 00141 array_keys($RecVals), $StartingResult, $NumberOfResults); 00142 $RecValSegment = array(); 00143 foreach ($RecValKeys as $Key) 00144 { 00145 $RecValSegment[$Key] = $RecVals[$Key]; 00146 } 00147 00148 # return recommendation list to caller 00149 return $RecValSegment; 00150 } 00151 00152 # add function to be called to filter returned recommendation list 00153 function AddResultFilterFunction($FunctionName) 00154 { 00155 # save filter function name 00156 $this->FilterFuncs[] = $FunctionName; 00157 } 00158 00159 # return number of recommendations generated 00160 function NumberOfResults() 00161 { 00162 return $this->NumberOfResultsAvailable; 00163 } 00164 00165 # return recommendation generation time 00166 function SearchTime() 00167 { 00168 return $this->LastSearchTime; 00169 } 00170 00171 # return list of items used to generate recommendation of specified item 00172 function GetSourceList($UserId, $RecommendedItemId) 00173 { 00174 # pull list of correlations from DB 00175 $this->DB->Query("SELECT * FROM RecContentCorrelations, ".$this->RatingTableName 00176 ." WHERE (ItemIdA = ${RecommendedItemId}" 00177 ." OR ItemIdB = ${RecommendedItemId})" 00178 ." AND ".$this->UserIdFieldName." = ".$UserId 00179 ." AND (RecContentCorrelations.ItemIdA = ".$this->RatingTableName.".".$this->ItemIdFieldName 00180 ." OR RecContentCorrelations.ItemIdB = ".$this->RatingTableName.".".$this->ItemIdFieldName.")" 00181 ." AND Rating >= 50 " 00182 ." ORDER BY Correlation DESC"); 00183 00184 # for each correlation 00185 $SourceList = array(); 00186 while ($Row = $this->DB->FetchRow()) 00187 { 00188 # pick out appropriate item ID 00189 if ($Row["ItemIdA"] == $RecommendedItemId) 00190 { 00191 $ItemId = $Row["ItemIdB"]; 00192 } 00193 else 00194 { 00195 $ItemId = $Row["ItemIdA"]; 00196 } 00197 00198 # add item to recommendation source list 00199 $SourceList[$ItemId] = $Row["Correlation"]; 00200 } 00201 00202 # return recommendation source list to caller 00203 return $SourceList; 00204 } 00205 00206 # dynamically generate and return list of items similar to specified item 00207 function FindSimilarItems($ItemId, $FieldList = NULL) 00208 { 00209 if ($this->DebugLevel > 1) { print("REC: searching for items similar to item \"".$ItemId."\"<br>\n"); } 00210 00211 # make sure we have item IDs available 00212 $this->LoadItemIds(); 00213 00214 # start with empty array 00215 $SimilarItems = array(); 00216 00217 # for every item 00218 foreach ($this->ItemIds as $Id) 00219 { 00220 # if item is not specified item 00221 if ($Id != $ItemId) 00222 { 00223 # calculate correlation of item to specified item 00224 $Correlation = $this->CalculateContentCorrelation($ItemId, $Id, $FieldList); 00225 00226 # if correlation is above threshold 00227 if ($Correlation > $this->ContentCorrelationThreshold) 00228 { 00229 # add item to list of similar items 00230 $SimilarItems[$Id] = $Correlation; 00231 } 00232 } 00233 } 00234 if ($this->DebugLevel > 3) { print("REC: ".count($SimilarItems)." similar items to item \"".$ItemId."\" found<br>\n"); } 00235 00236 # filter list of similar items (if any) 00237 if (count($SimilarItems) > 0) 00238 { 00239 $SimilarItems = $this->FilterOnSuppliedFunctions($SimilarItems); 00240 if ($this->DebugLevel > 4) { print("REC: ".count($SimilarItems)." similar items to item \"".$ItemId."\" left after filtering<br>\n"); } 00241 } 00242 00243 # if any similar items left 00244 if (count($SimilarItems) > 0) 00245 { 00246 # sort list of similar items in order of most to least similar 00247 arsort($SimilarItems, SORT_NUMERIC); 00248 } 00249 00250 # return list of similar items to caller 00251 return $SimilarItems; 00252 } 00253 00254 # dynamically generate and return list of recommended field values for item 00255 function RecommendFieldValues($ItemId, $FieldList = NULL) 00256 { 00257 if ($this->DebugLevel > 1) { print("REC: generating field value recommendations for item \"".$ItemId."\"<br>\n"); } 00258 00259 # start with empty array of values 00260 $RecVals = array(); 00261 00262 # generate list of similar items 00263 $SimilarItems = $this->FindSimilarItems($ItemId, $FieldList); 00264 00265 # if similar items found 00266 if (count($SimilarItems) > 0) 00267 { 00268 # prune list of similar items to only top third of better-than-average 00269 $AverageCorr = intval(array_sum($SimilarItems) / count($SimilarItems)); 00270 reset($SimilarItems); 00271 $HighestCorr = current($SimilarItems); 00272 $CorrThreshold = intval($HighestCorr - (($HighestCorr - $AverageCorr) / 3)); 00273 if ($this->DebugLevel > 8) { print("REC: <i>Average Correlation: $AverageCorr Highest Correlation: $HighestCorr Correlation Threshold: $CorrThreshold </i><br>\n"); } 00274 foreach ($SimilarItems as $ItemId => $ItemCorr) 00275 { 00276 if ($ItemCorr < $CorrThreshold) 00277 { 00278 unset($SimilarItems[$ItemId]); 00279 } 00280 } 00281 if ($this->DebugLevel > 6) { print("REC: ".count($SimilarItems)." similar items left after threshold pruning<br>\n"); } 00282 00283 # for each item 00284 foreach ($SimilarItems as $SimItemId => $SimItemCorr) 00285 { 00286 # for each field 00287 foreach ($this->ContentFields as $FieldName => $FieldAttributes) 00288 { 00289 # load field data for this item 00290 $FieldData = $this->GetFieldValue($SimItemId, $FieldName); 00291 00292 # if field data is array 00293 if (is_array($FieldData)) 00294 { 00295 # for each field data value 00296 foreach ($FieldData as $FieldDataVal) 00297 { 00298 # if data value is not empty 00299 $FieldDataVal = trim($FieldDataVal); 00300 if (strlen($FieldDataVal) > 0) 00301 { 00302 # increment count for data value 00303 $RecVals[$FieldName][$FieldDataVal]++; 00304 } 00305 } 00306 } 00307 else 00308 { 00309 # if data value is not empty 00310 $FieldData = trim($FieldData); 00311 if (strlen($FieldData) > 0) 00312 { 00313 # increment count for data value 00314 $RecVals[$FieldName][$FieldData]++; 00315 } 00316 } 00317 } 00318 } 00319 00320 # for each field 00321 $MatchingCountThreshold = 3; 00322 foreach ($RecVals as $FieldName => $FieldVals) 00323 { 00324 # determine cutoff threshold 00325 arsort($FieldVals, SORT_NUMERIC); 00326 reset($FieldVals); 00327 $HighestCount = current($FieldVals); 00328 $AverageCount = intval(array_sum($FieldVals) / count($FieldVals)); 00329 $CountThreshold = intval($AverageCount + (($HighestCount - $AverageCount) / 2)); 00330 if ($CountThreshold < $MatchingCountThreshold) { $CountThreshold = $MatchingCountThreshold; } 00331 if ($this->DebugLevel > 8) { print("REC: <i>Field: $FieldName Average Count: $AverageCount Highest Count: $HighestCount Count Threshold: $CountThreshold </i><br>\n"); } 00332 00333 # for each field data value 00334 foreach ($FieldVals as $FieldVal => $FieldValCount) 00335 { 00336 # if value count is below threshold 00337 if ($FieldValCount < $CountThreshold) 00338 { 00339 # unset value 00340 unset($RecVals[$FieldName][$FieldVal]); 00341 } 00342 } 00343 00344 if ($this->DebugLevel > 3) { print("REC: found ".count($RecVals[$FieldName])." recommended values for field \"".$FieldName."\" after threshold pruning<br>\n"); } 00345 } 00346 } 00347 00348 # return recommended values to caller 00349 return $RecVals; 00350 } 00351 00352 00353 # ---- database update methods 00354 00355 function UpdateForItems($StartingItemId, $NumberOfItems) 00356 { 00357 if ($this->DebugLevel > 0) { print("REC: UpdateForItems(${StartingItemId}, ${NumberOfItems})<br>\n"); } 00358 # make sure we have item IDs available 00359 $this->LoadItemIds(); 00360 00361 # for every item 00362 $ItemsUpdated = 0; 00363 $ItemId = NULL; 00364 foreach ($this->ItemIds as $ItemId) 00365 { 00366 # if item ID is within requested range 00367 if ($ItemId >= $StartingItemId) 00368 { 00369 # update recommender info for item 00370 if ($this->DebugLevel > 1) { print("REC: doing item ${ItemId}<br>\n"); } 00371 $this->UpdateForItem($ItemId, TRUE); 00372 $ItemsUpdated++; 00373 00374 # if we have done requested number of items 00375 if ($ItemsUpdated >= $NumberOfItems) 00376 { 00377 # bail out 00378 if ($this->DebugLevel > 1) { print("REC: bailing out with item ${ItemId}<br>\n"); } 00379 return $ItemId; 00380 } 00381 } 00382 } 00383 00384 # return ID of last resource updated to caller 00385 return $ItemId; 00386 } 00387 00388 function UpdateForItem($ItemId, $FullPass = FALSE) 00389 { 00390 if ($this->DebugLevel > 1) { print("REC: updating for item \"".$ItemId."\"<br>\n"); } 00391 00392 # make sure we have item IDs available 00393 $this->LoadItemIds(); 00394 00395 # clear existing correlations for this item 00396 $this->DB->Query("DELETE FROM RecContentCorrelations " 00397 ."WHERE ItemIdA = ${ItemId}"); 00398 00399 # for every item 00400 foreach ($this->ItemIds as $Id) 00401 { 00402 # if full pass and item is later in list than current item 00403 if (($FullPass == FALSE) || ($Id > $ItemId)) 00404 { 00405 # update correlation value for item and target item 00406 $this->UpdateContentCorrelation($ItemId, $Id); 00407 } 00408 } 00409 } 00410 00411 function DropItem($ItemId) 00412 { 00413 # drop all correlation entries referring to item 00414 $this->DB->Query("DELETE FROM RecContentCorrelations " 00415 ."WHERE ItemIdA = ".$ItemId." " 00416 ."OR ItemIdB = ".$ItemId); 00417 } 00418 00419 function PruneCorrelations() 00420 { 00421 # get average correlation 00422 $AverageCorrelation = $this->DB->Query("SELECT AVG(Correlation) " 00423 ."AS Average FROM RecContentCorrelations", "Average"); 00424 00425 # dump all below-average correlations 00426 if ($AverageCorrelation > 0) 00427 { 00428 $this->DB->Query("DELETE FROM RecContentCorrelations " 00429 ."WHERE Correlation <= ${AverageCorrelation}"); 00430 } 00431 } 00432 00437 function GetItemIds() 00438 { 00439 static $ItemIds; 00440 if (!isset($ItemIds)) 00441 { 00442 $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS Id FROM " 00443 .$this->ItemTableName." ORDER BY ".$this->ItemIdFieldName); 00444 $ItemIds = $this->DB->FetchColumn("Id"); 00445 } 00446 return $ItemIds; 00447 } 00448 00449 00450 # ---- PRIVATE INTERFACE ------------------------------------------------- 00451 00452 var $ContentCorrelationThreshold; 00453 var $ContentFields; 00454 var $ItemTableName; 00455 var $RatingTableName; 00456 var $ItemIdFieldName; 00457 var $UserIdFieldName; 00458 var $RatingFieldName; 00459 var $ItemIds; 00460 var $DB; 00461 var $FilterFuncs; 00462 var $LastSearchTime; 00463 var $NumberOfResultsAvailable; 00464 var $DebugLevel; 00465 00466 00467 function LoadItemIds() 00468 { 00469 # if item IDs not already loaded 00470 if (!isset($this->ItemIds)) 00471 { 00472 # load item IDs from DB 00473 $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS Id FROM " 00474 .$this->ItemTableName." ORDER BY ".$this->ItemIdFieldName); 00475 $this->ItemIds = array(); 00476 while ($Item = $this->DB->FetchRow()) 00477 { 00478 $this->ItemIds[] = $Item["Id"]; 00479 } 00480 } 00481 } 00482 00483 function GetFieldData($ItemId, $FieldName) 00484 { 00485 static $ItemData; 00486 static $CachedItemList; 00487 00488 # if data not already loaded 00489 if (!isset($ItemData[$ItemId][$FieldName])) 00490 { 00491 # load field value from DB 00492 $FieldValue = $this->GetFieldValue($ItemId, $FieldName); 00493 00494 # if field value is array 00495 if (is_array($FieldValue)) 00496 { 00497 # concatenate together text from array elements 00498 $FieldValue = implode(" ", $FieldValue); 00499 } 00500 00501 # normalize text and break into word array 00502 $ItemData[$ItemId][$FieldName] = $this->NormalizeAndParseText($FieldValue); 00503 00504 # if more items than cache limit 00505 if (count($ItemData) > 1000) 00506 { 00507 # dump oldest item 00508 reset($ItemData); 00509 list($DumpedItemId, $DumpedItemData) = each($ItemData); 00510 unset($ItemData[$DumpedItemId]); 00511 } 00512 } 00513 00514 # return cached data to caller 00515 return $ItemData[$ItemId][$FieldName]; 00516 } 00517 00518 # calculate content correlation between two items and return value to caller 00519 function CalculateContentCorrelation($ItemIdA, $ItemIdB, $FieldList = NULL) 00520 { 00521 static $CorrelationCache; 00522 00523 if ($this->DebugLevel > 10) { print("REC: calculating correlation between items $ItemIdA and $ItemIdB<br>\n"); } 00524 00525 # order item ID numbers 00526 if ($ItemIdA > $ItemIdB) 00527 { 00528 $Temp = $ItemIdA; 00529 $ItemIdA = $ItemIdB; 00530 $ItemIdB = $Temp; 00531 } 00532 00533 # if we already have the correlation 00534 if (isset($CorrelationCache[$ItemIdA][$ItemIdB])) 00535 { 00536 # retrieve correlation from cache 00537 $TotalCorrelation = $CorrelationCache[$ItemIdA][$ItemIdB]; 00538 } 00539 else 00540 { 00541 # if list of fields to correlate specified 00542 if ($FieldList != NULL) 00543 { 00544 # create list with only specified fields 00545 foreach ($FieldList as $FieldName) 00546 { 00547 $ContentFields[$FieldName] = $this->ContentFields[$FieldName]; 00548 } 00549 } 00550 else 00551 { 00552 # use all fields 00553 $ContentFields = $this->ContentFields; 00554 } 00555 00556 # for each content field 00557 $TotalCorrelation = 0; 00558 foreach ($ContentFields as $FieldName => $FieldAttributes) 00559 { 00560 # if field is of a type that we use for correlation 00561 $FieldType = intval($FieldAttributes["FieldType"]); 00562 if (($FieldType == Recommender::CONTENTFIELDTYPE_TEXT) 00563 || ($FieldType == Recommender::CONTENTFIELDTYPE_CONTROLLEDNAME)) 00564 { 00565 # load data 00566 $ItemAData = $this->GetFieldData($ItemIdA, $FieldName); 00567 $ItemBData = $this->GetFieldData($ItemIdB, $FieldName); 00568 if ($this->DebugLevel > 15) { print("REC: loaded ".count($ItemAData)." terms for item #".$ItemIdA." and ".count($ItemBData)." terms for item #".$ItemIdB." for field \"".$FieldName."\"<br>\n"); } 00569 00570 # call appropriate routine to get correlation 00571 switch ($FieldType) 00572 { 00573 case Recommender::CONTENTFIELDTYPE_TEXT: 00574 case Recommender::CONTENTFIELDTYPE_CONTROLLEDNAME: 00575 $Correlation = $this->CalcTextCorrelation( 00576 $ItemAData, $ItemBData); 00577 break; 00578 } 00579 00580 # add correlation multiplied by weight to total 00581 $TotalCorrelation += $Correlation * $FieldAttributes["Weight"]; 00582 } 00583 } 00584 00585 # store correlation to cache 00586 $CorrelationCache[$ItemIdA][$ItemIdB] = $TotalCorrelation; 00587 } 00588 00589 # return correlation value to caller 00590 if ($this->DebugLevel > 9) { print("REC: correlation between items $ItemIdA and $ItemIdB found to be $TotalCorrelation<br>\n"); } 00591 return $TotalCorrelation; 00592 } 00593 00594 # calculate content correlation between two items and update in DB 00595 function UpdateContentCorrelation($ItemIdA, $ItemIdB) 00596 { 00597 if ($this->DebugLevel > 6) { print("REC: updating correlation between items $ItemIdA and $ItemIdB<br>\n"); } 00598 00599 # bail out if two items are the same 00600 if ($ItemIdA == $ItemIdB) { return; } 00601 00602 # calculate correlation 00603 $Correlation = $this->CalculateContentCorrelation($ItemIdA, $ItemIdB); 00604 00605 # save new correlation 00606 $this->ContentCorrelation($ItemIdA, $ItemIdB, $Correlation); 00607 } 00608 00609 function NormalizeAndParseText($Text) 00610 { 00611 $StopWords = array( 00612 "a", 00613 "about", 00614 "also", 00615 "an", 00616 "and", 00617 "are", 00618 "as", 00619 "at", 00620 "be", 00621 "but", 00622 "by", 00623 "can", 00624 "each", 00625 "either", 00626 "for", 00627 "from", 00628 "has", 00629 "he", 00630 "her", 00631 "here", 00632 "hers", 00633 "him", 00634 "his", 00635 "how", 00636 "i", 00637 "if", 00638 "in", 00639 "include", 00640 "into", 00641 "is", 00642 "it", 00643 "its", 00644 "me", 00645 "neither", 00646 "no", 00647 "nor", 00648 "not", 00649 "of", 00650 "on", 00651 "or", 00652 "so", 00653 "she", 00654 "than", 00655 "that", 00656 "the", 00657 "their", 00658 "them", 00659 "then", 00660 "there", 00661 "these", 00662 "they", 00663 "this", 00664 "those", 00665 "through", 00666 "to", 00667 "too", 00668 "very", 00669 "what", 00670 "when", 00671 "where", 00672 "while", 00673 "who", 00674 "why", 00675 "will", 00676 "you", 00677 ""); 00678 00679 # strip any HTML tags 00680 $Text = strip_tags($Text); 00681 00682 # strip any punctuation 00683 $Text = preg_replace("/,\\.\\?-\\(\\)\\[\\]\"/", " ", $Text); # " 00684 00685 # normalize whitespace 00686 $Text = trim(preg_replace("/[\\s]+/", " ", $Text)); 00687 00688 # convert to all lower case 00689 $Text = strtolower($Text); 00690 00691 # split text into arrays of words 00692 $Words = explode(" ", $Text); 00693 00694 # filter out all stop words 00695 $Words = array_diff($Words, $StopWords); 00696 00697 # return word array to caller 00698 return $Words; 00699 } 00700 00701 function CalcTextCorrelation($WordsA, $WordsB) 00702 { 00703 # get array containing intersection of two word arrays 00704 $IntersectWords = array_intersect($WordsA, $WordsB); 00705 00706 # return number of words remaining as score 00707 return count($IntersectWords); 00708 } 00709 00710 function ContentCorrelation($ItemIdA, $ItemIdB, $NewCorrelation = -1) 00711 { 00712 # if item ID A is greater than item ID B 00713 if ($ItemIdA > $ItemIdB) 00714 { 00715 # swap item IDs 00716 $Temp = $ItemIdA; 00717 $ItemIdA = $ItemIdB; 00718 $ItemIdB = $Temp; 00719 } 00720 00721 # if new correlation value provided 00722 if ($NewCorrelation != -1) 00723 { 00724 # if new value is above threshold 00725 if ($NewCorrelation >= $this->ContentCorrelationThreshold) 00726 { 00727 # insert new correlation value in DB 00728 $this->DB->Query("INSERT INTO RecContentCorrelations " 00729 ."(ItemIdA, ItemIdB, Correlation) " 00730 ."VALUES (${ItemIdA}, ${ItemIdB}, ${NewCorrelation})"); 00731 00732 # return correlation value is new value 00733 $Correlation = $NewCorrelation; 00734 } 00735 # else 00736 else 00737 { 00738 # return value is zero 00739 $Correlation = 0; 00740 } 00741 } 00742 else 00743 { 00744 # retrieve correlation value from DB 00745 $Correlation = $this->DB->Query( 00746 "SELECT Correlation FROM RecContentCorrelations " 00747 ."WHERE ItemIdA = ${ItemIdA} AND ItemIdB = ${ItemIdB}", 00748 "Correlation"); 00749 00750 # if no value found in DB 00751 if ($Correlation == FALSE) 00752 { 00753 # return value is zero 00754 $Correlation = 0; 00755 } 00756 } 00757 00758 # return correlation value to caller 00759 return $Correlation; 00760 } 00761 00762 function FilterOnSuppliedFunctions($Results) 00763 { 00764 # if filter functions have been set 00765 if (count($this->FilterFuncs) > 0) 00766 { 00767 # for each result 00768 foreach ($Results as $ResourceId => $Result) 00769 { 00770 # for each filter function 00771 foreach ($this->FilterFuncs as $FuncName) 00772 { 00773 # if filter function return TRUE for result resource 00774 if ($FuncName($ResourceId)) 00775 { 00776 # discard result 00777 if ($this->DebugLevel > 2) { print("REC: filter callback rejected resource ${ResourceId}<br>\n"); } 00778 unset($Results[$ResourceId]); 00779 00780 # bail out of filter func loop 00781 continue 2; 00782 } 00783 } 00784 } 00785 } 00786 00787 # return filtered list to caller 00788 return $Results; 00789 } 00790 } 00791 00792 ?>