From c1700fafc05267cd4a6fcd6d786906f3003f031d Mon Sep 17 00:00:00 2001 From: ita Date: Sun, 15 Jul 2018 20:24:28 +0900 Subject: [PATCH 1/3] add similar user calculation in factor based models --- implicit/nearest_neighbours.py | 3 ++ implicit/recommender_base.py | 53 ++++++++++++++++++++++++++++++---- implicit/temp.py | 20 +++++++++++++ 3 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 implicit/temp.py diff --git a/implicit/nearest_neighbours.py b/implicit/nearest_neighbours.py index f3ca6bf6..5fe4fb89 100644 --- a/implicit/nearest_neighbours.py +++ b/implicit/nearest_neighbours.py @@ -60,6 +60,9 @@ def rank_items(self, userid, user_items, selected_items, recalculate_user=False) ret.append((itemid, -1.0)) return ret + def similar_users(self, userid, N=10): + raise NotImplementedError("Not implemented Yet") + def similar_items(self, itemid, N=10): """ Returns a list of the most similar other items """ if itemid >= self.similarity.shape[0]: diff --git a/implicit/recommender_base.py b/implicit/recommender_base.py index 7121a454..3dc3c9cc 100644 --- a/implicit/recommender_base.py +++ b/implicit/recommender_base.py @@ -82,6 +82,25 @@ def rank_items(self, userid, user_items, selected_items, recalculate_user=False) """ pass + @abstractmethod + def similar_users(self, userid, N=10): + """ + Calculates a list of similar items + + Parameters + ---------- + userid : int + The row id of the user to retrieve similar users for + N : int, optional + The number of similar users to return + + Returns + ------- + list + List of (userid, score) tuples + """ + pass + @abstractmethod def similar_items(self, itemid, N=10): """ @@ -117,8 +136,8 @@ def __init__(self): self.item_factors = None self.user_factors = None - # cache of item norms (useful for calculating similar items) - self._item_norms = None + # cache of user, item norms (useful for calculating similar items) + self._user_norms, self._item_norms = None, None def recommend(self, userid, user_items, N=10, filter_items=None, recalculate_user=False): user = self._user_factor(userid, user_items, recalculate_user) @@ -162,13 +181,37 @@ def _user_factor(self, userid, user_items, recalculate_user=False): def recalculate_user(self, userid, user_items): raise NotImplementedError("recalculate_user is not supported with this model") + def similar_users(self, userid, N=10): + factor = self.user_factors[userid] + factors = self.user_factors + norms = self.user_norms + + return self._get_similarity_score(factor, factors, norms, N) + + similar_users.__doc__ = RecommenderBase.similar_users.__doc__ + def similar_items(self, itemid, N=10): - scores = self.item_factors.dot(self.item_factors[itemid]) / self.item_norms - best = np.argpartition(scores, -N)[-N:] - return sorted(zip(best, scores[best] / self.item_norms[itemid]), key=lambda x: -x[1]) + factor = self.item_factors[itemid] + factors = self.item_factors + norms = self.item_norms + + return self._get_similarity_score(factor, factors, norms, N) similar_items.__doc__ = RecommenderBase.similar_items.__doc__ + def _get_similarity_score(self, factor, factors, norms, N): + scores = factors.dot(factor) / norms + best = np.argpartition(scores, -N)[-N:] + return sorted(zip(best, scores[best]), key=lambda x: -x[1]) + + @property + def user_norms(self): + if self._user_norms is None: + self._user_norms = np.linalg.norm(self.user_factors, axis=-1) + # don't divide by zero in similar_items, replace with small value + self._user_norms[self._user_norms == 0] = 1e-10 + return self._user_norms + @property def item_norms(self): if self._item_norms is None: diff --git a/implicit/temp.py b/implicit/temp.py new file mode 100644 index 00000000..5ecc55bf --- /dev/null +++ b/implicit/temp.py @@ -0,0 +1,20 @@ +# finding largest/smallest k +import numpy as np + + +# largest k's +array = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8, 10]) + +indices = np.argpartition(array, -3)[-3:] +values = array[indices] + +print(indices), print(values) + +# smallest k's +array = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8, 10]) + +indices = np.argpartition(array, 3)[:3] +values = array[indices] + +print(indices), print(values) + From efa03c426de4421c83ddfed2f9949c4febc1b6f5 Mon Sep 17 00:00:00 2001 From: ita Date: Sun, 15 Jul 2018 20:25:52 +0900 Subject: [PATCH 2/3] remove verbose files --- implicit/temp.py | 20 -------------------- 1 file changed, 20 deletions(-) delete mode 100644 implicit/temp.py diff --git a/implicit/temp.py b/implicit/temp.py deleted file mode 100644 index 5ecc55bf..00000000 --- a/implicit/temp.py +++ /dev/null @@ -1,20 +0,0 @@ -# finding largest/smallest k -import numpy as np - - -# largest k's -array = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8, 10]) - -indices = np.argpartition(array, -3)[-3:] -values = array[indices] - -print(indices), print(values) - -# smallest k's -array = np.array([1, 3, 5, 7, 9, 2, 4, 6, 8, 10]) - -indices = np.argpartition(array, 3)[:3] -values = array[indices] - -print(indices), print(values) - From 29388c59ed744df4601083bf86233ad1736bd7db Mon Sep 17 00:00:00 2001 From: ita Date: Sun, 15 Jul 2018 20:37:06 +0900 Subject: [PATCH 3/3] Add testing similarity calculation for users --- tests/recommender_base_test.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/recommender_base_test.py b/tests/recommender_base_test.py index 0d56993f..0464c591 100644 --- a/tests/recommender_base_test.py +++ b/tests/recommender_base_test.py @@ -6,6 +6,7 @@ from scipy.sparse import csr_matrix from implicit.evaluation import precision_at_k +from implicit.nearest_neighbours import ItemItemRecommender class TestRecommenderBaseMixin(object): @@ -69,6 +70,19 @@ def test_evaluation(self): show_progress=False) self.assertEqual(p, 1) + def test_similar_users(self): + + model = self._get_model() + # calculating similar users in nearest-neighbours is not implemented yet + if isinstance(model, ItemItemRecommender): + return + model.show_progress = False + model.fit(self.get_checker_board(50)) + for userid in range(50): + recs = model.similar_users(userid, N=10) + for r, _ in recs: + self.assertEqual(r % 2, userid % 2) + def test_similar_items(self): model = self._get_model() model.show_progress = False