Skip to content

Commit

Permalink
Merge pull request #139 from ita9naiwa/add-similar-users
Browse files Browse the repository at this point in the history
Add similar users calculation in MF models
  • Loading branch information
benfred authored Jul 25, 2018
2 parents f5a9a56 + 29388c5 commit 2f3b978
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 5 deletions.
3 changes: 3 additions & 0 deletions implicit/nearest_neighbours.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ def rank_items(self, userid, user_items, selected_items, recalculate_user=False)
ret.append((itemid, -1.0))
return ret

def similar_users(self, userid, N=10):
raise NotImplementedError("Not implemented Yet")

def similar_items(self, itemid, N=10):
""" Returns a list of the most similar other items """
if itemid >= self.similarity.shape[0]:
Expand Down
53 changes: 48 additions & 5 deletions implicit/recommender_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,25 @@ def rank_items(self, userid, user_items, selected_items, recalculate_user=False)
"""
pass

@abstractmethod
def similar_users(self, userid, N=10):
"""
Calculates a list of similar items
Parameters
----------
userid : int
The row id of the user to retrieve similar users for
N : int, optional
The number of similar users to return
Returns
-------
list
List of (userid, score) tuples
"""
pass

@abstractmethod
def similar_items(self, itemid, N=10):
"""
Expand Down Expand Up @@ -118,8 +137,8 @@ def __init__(self):
self.item_factors = None
self.user_factors = None

# cache of item norms (useful for calculating similar items)
self._item_norms = None
# cache of user, item norms (useful for calculating similar items)
self._user_norms, self._item_norms = None, None

def recommend(self, userid, user_items,
N=10, filter_already_liked_items=True, filter_items=None, recalculate_user=False):
Expand Down Expand Up @@ -167,13 +186,37 @@ def _user_factor(self, userid, user_items, recalculate_user=False):
def recalculate_user(self, userid, user_items):
raise NotImplementedError("recalculate_user is not supported with this model")

def similar_users(self, userid, N=10):
factor = self.user_factors[userid]
factors = self.user_factors
norms = self.user_norms

return self._get_similarity_score(factor, factors, norms, N)

similar_users.__doc__ = RecommenderBase.similar_users.__doc__

def similar_items(self, itemid, N=10):
scores = self.item_factors.dot(self.item_factors[itemid]) / self.item_norms
best = np.argpartition(scores, -N)[-N:]
return sorted(zip(best, scores[best] / self.item_norms[itemid]), key=lambda x: -x[1])
factor = self.item_factors[itemid]
factors = self.item_factors
norms = self.item_norms

return self._get_similarity_score(factor, factors, norms, N)

similar_items.__doc__ = RecommenderBase.similar_items.__doc__

def _get_similarity_score(self, factor, factors, norms, N):
scores = factors.dot(factor) / norms
best = np.argpartition(scores, -N)[-N:]
return sorted(zip(best, scores[best]), key=lambda x: -x[1])

@property
def user_norms(self):
if self._user_norms is None:
self._user_norms = np.linalg.norm(self.user_factors, axis=-1)
# don't divide by zero in similar_items, replace with small value
self._user_norms[self._user_norms == 0] = 1e-10
return self._user_norms

@property
def item_norms(self):
if self._item_norms is None:
Expand Down
14 changes: 14 additions & 0 deletions tests/recommender_base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from scipy.sparse import csr_matrix

from implicit.evaluation import precision_at_k
from implicit.nearest_neighbours import ItemItemRecommender


class TestRecommenderBaseMixin(object):
Expand Down Expand Up @@ -69,6 +70,19 @@ def test_evaluation(self):
show_progress=False)
self.assertEqual(p, 1)

def test_similar_users(self):

model = self._get_model()
# calculating similar users in nearest-neighbours is not implemented yet
if isinstance(model, ItemItemRecommender):
return
model.show_progress = False
model.fit(self.get_checker_board(50))
for userid in range(50):
recs = model.similar_users(userid, N=10)
for r, _ in recs:
self.assertEqual(r % 2, userid % 2)

def test_similar_items(self):
model = self._get_model()
model.show_progress = False
Expand Down

0 comments on commit 2f3b978

Please sign in to comment.