From 2745470dd67e41a78491b6cebb1ef5aacf2a546d Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Fri, 8 Jul 2022 12:41:38 +0200 Subject: [PATCH 01/18] update error message of validate_adjacency --- src/torch_ppr/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index 398d5cf..ef1939a 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -144,7 +144,9 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None): # hotfix until torch.sparse.sum is implemented adj_sum = adj.t() @ torch.ones(adj.shape[0]) if not torch.allclose(adj_sum, torch.ones_like(adj_sum), rtol=1.0e-04): - raise ValueError(f"Invalid column sum: {adj_sum}. expected 1.0") + raise ValueError( + f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). expected 1.0" + ) def prepare_page_rank_adjacency( From 92703e9541116e7fe56cadda234ffbdf198657ed Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 8 Jul 2022 08:27:49 -0400 Subject: [PATCH 02/18] Update utils.py --- src/torch_ppr/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index ef1939a..34cf342 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -95,7 +95,7 @@ def edge_index_to_sparse_matrix( ) -def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None): +def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None, rtol: float = 1.0e-04): """ Validate the page-rank adjacency matrix. @@ -108,6 +108,8 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None): the adjacency matrix :param n: the number of nodes + :param rtol: + the tolerance for checking the sum is close to 1.0 :raises ValueError: if the adjacency matrix is invalid @@ -143,9 +145,9 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None): else: # hotfix until torch.sparse.sum is implemented adj_sum = adj.t() @ torch.ones(adj.shape[0]) - if not torch.allclose(adj_sum, torch.ones_like(adj_sum), rtol=1.0e-04): + if not torch.allclose(adj_sum, torch.ones_like(adj_sum), rtol=rtol): raise ValueError( - f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). expected 1.0" + f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). expected 1.0 with a tolerance of {rtol}" ) From 66b8a4f605a3d988439ebfbb59864fc0a6050e43 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Fri, 8 Jul 2022 14:29:17 +0200 Subject: [PATCH 03/18] break line --- src/torch_ppr/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index 34cf342..33a8d04 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -147,7 +147,8 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None, rtol: float = adj_sum = adj.t() @ torch.ones(adj.shape[0]) if not torch.allclose(adj_sum, torch.ones_like(adj_sum), rtol=rtol): raise ValueError( - f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). expected 1.0 with a tolerance of {rtol}" + f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). " + f"Expected 1.0 with a relative tolerance of {rtol}.", ) From 99f242b0fe8bac4e77556bf1617f5eb19dd951f6 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 11 Jul 2022 18:23:36 +0200 Subject: [PATCH 04/18] add handling of isolated vertices --- src/torch_ppr/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index 33a8d04..f111805 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -145,7 +145,12 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None, rtol: float = else: # hotfix until torch.sparse.sum is implemented adj_sum = adj.t() @ torch.ones(adj.shape[0]) - if not torch.allclose(adj_sum, torch.ones_like(adj_sum), rtol=rtol): + exp_sum = torch.ones_like(adj_sum) + mask = adj_sum == 0 + if mask.any(): + logger.warning(f"Adjacency contains {mask.sum().item()} isolated nodes.") + exp_sum[mask] = 0.0 + if not torch.allclose(adj_sum, exp_sum, rtol=rtol): raise ValueError( f"Invalid column sum: {adj_sum} (min: {adj_sum.min().item()}, max: {adj_sum.max().item()}). " f"Expected 1.0 with a relative tolerance of {rtol}.", From 19a781fcb569c3621511b81773ec5165d5e5a6b6 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 11 Jul 2022 18:29:50 +0200 Subject: [PATCH 05/18] fix division-by-zero for isolated vertices --- src/torch_ppr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index f111805..d7870c5 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -198,7 +198,7 @@ def prepare_page_rank_adjacency( # symmetrize adj = adj + adj.t() # adjacency normalization: normalize to col-sum = 1 - degree_inv = torch.reciprocal(torch.sparse.sum(adj, dim=0).to_dense()) + degree_inv = torch.reciprocal(torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=1.0e-08)) degree_inv = torch.sparse_coo_tensor( indices=torch.arange(degree_inv.shape[0], device=adj.device).unsqueeze(dim=0).repeat(2, 1), values=degree_inv, From 37b37845a10a618292c6bceb25e66d20e8c05c3d Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 11 Jul 2022 18:30:25 +0200 Subject: [PATCH 06/18] add todo note --- src/torch_ppr/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index d7870c5..a2e7c55 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -197,6 +197,7 @@ def prepare_page_rank_adjacency( adj = edge_index_to_sparse_matrix(edge_index=edge_index, num_nodes=num_nodes) # symmetrize adj = adj + adj.t() + # TODO: should we add an identity matrix here? # adjacency normalization: normalize to col-sum = 1 degree_inv = torch.reciprocal(torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=1.0e-08)) degree_inv = torch.sparse_coo_tensor( From d66e61af0c90198f5b5fc2be1b5ee34d22280a6f Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Tue, 12 Jul 2022 09:29:11 +0200 Subject: [PATCH 07/18] use torch's eps --- src/torch_ppr/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index a2e7c55..6bf9607 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -199,7 +199,9 @@ def prepare_page_rank_adjacency( adj = adj + adj.t() # TODO: should we add an identity matrix here? # adjacency normalization: normalize to col-sum = 1 - degree_inv = torch.reciprocal(torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=1.0e-08)) + degree_inv = torch.reciprocal( + torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=torch.finfo(dtype=adj.dtype).eps) + ) degree_inv = torch.sparse_coo_tensor( indices=torch.arange(degree_inv.shape[0], device=adj.device).unsqueeze(dim=0).repeat(2, 1), values=degree_inv, From 06f126a280cbdc978c1132a8fc51310ad1260e6b Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 09:47:09 +0200 Subject: [PATCH 08/18] do not use keyword --- src/torch_ppr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index 6bf9607..ca8cd75 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -200,7 +200,7 @@ def prepare_page_rank_adjacency( # TODO: should we add an identity matrix here? # adjacency normalization: normalize to col-sum = 1 degree_inv = torch.reciprocal( - torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=torch.finfo(dtype=adj.dtype).eps) + torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=torch.finfo(adj.dtype).eps) ) degree_inv = torch.sparse_coo_tensor( indices=torch.arange(degree_inv.shape[0], device=adj.device).unsqueeze(dim=0).repeat(2, 1), From 330efec352b8ab340e6c2ca5293b01fb44835177 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 09:50:32 +0200 Subject: [PATCH 09/18] extract utility --- src/torch_ppr/utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index ca8cd75..e4f79de 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -157,6 +157,21 @@ def validate_adjacency(adj: torch.Tensor, n: Optional[int] = None, rtol: float = ) +def sparse_diagonal(values: torch.Tensor) -> torch.Tensor: + """Create a sparse diagonal matrix with the given values. + + :param values: shape: ``(n,)`` + the values + + :return: shape: ``(n, n)`` + a sparse diagonal matrix + """ + return torch.sparse_coo_tensor( + indices=torch.arange(values.shape[0], device=values.device).unsqueeze(dim=0).repeat(2, 1), + values=values, + ) + + def prepare_page_rank_adjacency( adj: Optional[torch.Tensor] = None, edge_index: Optional[torch.LongTensor] = None, @@ -202,10 +217,7 @@ def prepare_page_rank_adjacency( degree_inv = torch.reciprocal( torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=torch.finfo(adj.dtype).eps) ) - degree_inv = torch.sparse_coo_tensor( - indices=torch.arange(degree_inv.shape[0], device=adj.device).unsqueeze(dim=0).repeat(2, 1), - values=degree_inv, - ) + degree_inv = sparse_diagonal(values=degree_inv) return torch.sparse.mm(adj, degree_inv) From d456db2b693198e4711a732fa82ff4d37aee20da Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 09:51:56 +0200 Subject: [PATCH 10/18] add option to add identity matrix --- src/torch_ppr/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index e4f79de..f616dbd 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -176,6 +176,7 @@ def prepare_page_rank_adjacency( adj: Optional[torch.Tensor] = None, edge_index: Optional[torch.LongTensor] = None, num_nodes: Optional[int] = None, + add_identity: bool = False, ) -> torch.Tensor: """ Prepare the page-rank adjacency matrix. @@ -195,6 +196,8 @@ def prepare_page_rank_adjacency( :param num_nodes: the number of nodes used to determine the shape of the adjacency matrix. If ``None``, and ``adj`` is not already provided, it is inferred from ``edge_index``. + :param add_identity: + whether to add an identity matrix to ``A`` to ensure that each node has a degree of at least one. :raises ValueError: if neither is provided, or the adjacency matrix is invalid @@ -212,6 +215,9 @@ def prepare_page_rank_adjacency( adj = edge_index_to_sparse_matrix(edge_index=edge_index, num_nodes=num_nodes) # symmetrize adj = adj + adj.t() + if add_identity: + adj = adj + sparse_diagonal(torch.ones(adj.shape[0], dtype=adj.dtype, device=adj.device)) + # TODO: should we add an identity matrix here? # adjacency normalization: normalize to col-sum = 1 degree_inv = torch.reciprocal( From 8114025eabbc1669fa7b0845204dd5455723b260 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 09:55:32 +0200 Subject: [PATCH 11/18] expose to API --- src/torch_ppr/api.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/torch_ppr/api.py b/src/torch_ppr/api.py index 4c875fa..7363d27 100644 --- a/src/torch_ppr/api.py +++ b/src/torch_ppr/api.py @@ -31,6 +31,7 @@ def page_rank( adj: Optional[torch.Tensor] = None, edge_index: Optional[torch.LongTensor] = None, num_nodes: Optional[int] = None, + add_identity: bool = False, max_iter: int = 1_000, alpha: float = 0.05, epsilon: float = 1.0e-04, @@ -48,6 +49,8 @@ def page_rank( :param num_nodes: the number of nodes used to determine the shape of the adjacency matrix. If ``None``, and ``adj`` is not already provided, it is inferred from ``edge_index``. + :param add_identity: + whether to add an identity matrix to ``A`` to ensure that each node has a degree of at least one. :param max_iter: ``max_iter > 0`` the maximum number of iterations @@ -69,7 +72,9 @@ def page_rank( the page-rank vector, i.e., a score between 0 and 1 for each node. """ # normalize inputs - adj = prepare_page_rank_adjacency(adj=adj, edge_index=edge_index, num_nodes=num_nodes) + adj = prepare_page_rank_adjacency( + adj=adj, edge_index=edge_index, num_nodes=num_nodes, add_identity=add_identity + ) validate_adjacency(adj=adj) x0 = prepare_x0(x0=x0, n=adj.shape[0]) @@ -96,6 +101,7 @@ def personalized_page_rank( *, adj: Optional[torch.Tensor] = None, edge_index: Optional[torch.LongTensor] = None, + add_identity: bool = False, num_nodes: Optional[int] = None, indices: Optional[torch.Tensor] = None, device: DeviceHint = None, @@ -115,6 +121,8 @@ def personalized_page_rank( :param num_nodes: the number of nodes used to determine the shape of the adjacency matrix. If ``None``, and ``adj`` is not already provided, it is inferred from ``edge_index``. + :param add_identity: + whether to add an identity matrix to ``A`` to ensure that each node has a degree of at least one. :param indices: shape: ``(k,)`` the node indices for which to calculate the PPR. Defaults to all nodes. @@ -131,9 +139,9 @@ def personalized_page_rank( # resolve device first device = resolve_device(device=device) # prepare adjacency and indices only once - adj = prepare_page_rank_adjacency(adj=adj, edge_index=edge_index, num_nodes=num_nodes).to( - device=device - ) + adj = prepare_page_rank_adjacency( + adj=adj, edge_index=edge_index, num_nodes=num_nodes, add_identity=add_identity + ).to(device=device) validate_adjacency(adj=adj) if indices is None: From 08cf1fd349ee751bdef99bff96f764b80e1cceb9 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 09:59:05 +0200 Subject: [PATCH 12/18] add test for sparse diagonal --- tests/test_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5fe0f24..9d16fb9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,6 +6,7 @@ from torch.nn import functional from torch_ppr import utils +import pytest def test_resolve_device(): @@ -165,3 +166,14 @@ def test_batched_personalized_page_rank(self): adj=self.adj, indices=torch.arange(self.num_nodes), batch_size=self.num_nodes // 3 ) utils.validate_x(x) + + +@pytest.mark.parametrize("n", [8, 16]) +def test_sparse_diagonal(n: int): + """Test for sparse diagonal matrix creation.""" + values = torch.rand(n) + matrix = utils.sparse_diagonal(values=values) + assert torch.is_tensor(matrix) + assert matrix.shape == (n, n) + assert matrix.is_sparse + assert torch.allclose(matrix.to_dense(), torch.diag(values)) From cd6d420aaf760acf0d757991a88f43d13c7148f3 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 10:00:51 +0200 Subject: [PATCH 13/18] test add identity --- tests/test_utils.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9d16fb9..5c464d2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -100,14 +100,18 @@ def test_validate_adjacancy(self): def test_prepare_page_rank_adjacency(self): """Test adjacency preparation.""" - for (adj, edge_index) in ( + for (adj, edge_index, add_identity) in ( # from edge index - (None, self.edge_index), + (None, self.edge_index, False), # passing through adjacency matrix - (self.adj, None), - (self.adj, self.edge_index), + (self.adj, None, False), + (self.adj, self.edge_index, False), + # add identity + (None, self.edge_index, True), ): - adj2 = utils.prepare_page_rank_adjacency(adj=adj, edge_index=edge_index) + adj2 = utils.prepare_page_rank_adjacency( + adj=adj, edge_index=edge_index, add_identity=add_identity + ) utils.validate_adjacency(adj=adj2, n=self.num_nodes) if adj is not None: assert adj is adj2 @@ -177,3 +181,7 @@ def test_sparse_diagonal(n: int): assert matrix.shape == (n, n) assert matrix.is_sparse assert torch.allclose(matrix.to_dense(), torch.diag(values)) + + +def test_a(): + utils.prepare_page_rank_adjacency() From 2030c2e3e2505af87ca8d94544098d3151de6769 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 10:01:26 +0200 Subject: [PATCH 14/18] remove artifact --- tests/test_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5c464d2..e30078f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -181,7 +181,3 @@ def test_sparse_diagonal(n: int): assert matrix.shape == (n, n) assert matrix.is_sparse assert torch.allclose(matrix.to_dense(), torch.diag(values)) - - -def test_a(): - utils.prepare_page_rank_adjacency() From d586b3b6a031ecd5a64d5e8a9b02a71755cc1830 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Mon, 18 Jul 2022 10:03:57 +0200 Subject: [PATCH 15/18] reformat --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e30078f..a934bec 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,11 +2,11 @@ import unittest from typing import Counter, Optional, Tuple +import pytest import torch from torch.nn import functional from torch_ppr import utils -import pytest def test_resolve_device(): From 44f4f2e8132ab4cad85890d4489176696b9883dc Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Wed, 20 Jul 2022 11:18:30 +0200 Subject: [PATCH 16/18] fix outdated TODO note --- src/torch_ppr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torch_ppr/utils.py b/src/torch_ppr/utils.py index f616dbd..4efc931 100644 --- a/src/torch_ppr/utils.py +++ b/src/torch_ppr/utils.py @@ -215,10 +215,10 @@ def prepare_page_rank_adjacency( adj = edge_index_to_sparse_matrix(edge_index=edge_index, num_nodes=num_nodes) # symmetrize adj = adj + adj.t() + # add identity matrix if requested if add_identity: adj = adj + sparse_diagonal(torch.ones(adj.shape[0], dtype=adj.dtype, device=adj.device)) - # TODO: should we add an identity matrix here? # adjacency normalization: normalize to col-sum = 1 degree_inv = torch.reciprocal( torch.sparse.sum(adj, dim=0).to_dense().clamp_min(min=torch.finfo(adj.dtype).eps) From 58b7a64b96e25c5b47663da535dcba124c200acb Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Wed, 20 Jul 2022 11:25:00 +0200 Subject: [PATCH 17/18] add test for isolated vertex handling --- tests/test_api.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_api.py b/tests/test_api.py index 8904069..3ca75fc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -50,3 +50,16 @@ def test_page_rank_manual(self): x = page_rank(edge_index=edge_index) # verify that central node has the largest PR value assert x.argmax() == 1 + + def test_page_rank_isolated_vertices(self): + """Test Page-Rank with isolated vertices.""" + # create isolated node, ID=0 + edge_index = self.edge_index + 1 + x = page_rank(edge_index=edge_index, add_identity=True) + # isolated node has only one self-loop -> no change in mass to initial mass + self.assertAlmostEqual(x[0].item(), 1 / (self.num_nodes + 1)) + # verify that other nodes are unaffected + x2 = page_rank(edge_index=self.edge_index) + # rescale + x2 = x2 * (self.num_nodes / (self.num_nodes + 1)) + assert torch.allclose(x2, x[1:], atol=1.0e-02) From 1b4b6049b3f041be4d468d12e97b81082dde9266 Mon Sep 17 00:00:00 2001 From: Max Berrendorf Date: Wed, 20 Jul 2022 11:25:26 +0200 Subject: [PATCH 18/18] fix seed for reproducible tests --- tests/test_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index 3ca75fc..19d9cd2 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -15,9 +15,12 @@ class APITest(unittest.TestCase): def setUp(self) -> None: """Prepare data.""" + generator = torch.manual_seed(42) self.edge_index = torch.cat( [ - torch.randint(self.num_nodes, size=(2, self.num_edges - self.num_nodes)), + torch.randint( + self.num_nodes, size=(2, self.num_edges - self.num_nodes), generator=generator + ), # ensure connectivity torch.arange(self.num_nodes).unsqueeze(0).repeat(2, 1), ],