diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2e9879f..f8ad026 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,8 +28,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install scipy pandas pytest-cov pytest-randomly - # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet + python -m pip install scipy numpy pytest-randomly + # pandas pytest-cov matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet python -m pip install git+https://github.com/networkx/networkx.git@main python -m pip install . echo "Done with installing" diff --git a/.gitignore b/.gitignore index f300f72..6238ec4 100644 --- a/.gitignore +++ b/.gitignore @@ -109,6 +109,7 @@ venv/ ENV/ env.bak/ venv.bak/ +nxp-dev/ # Spyder project settings .spyderproject @@ -131,3 +132,6 @@ dmypy.json # asv results/ html/ + +# get_info update script +temp__init__.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1c0914..d13a3bc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,33 +82,30 @@ To add any additional tests, **specific to nx_parallel**, you can follow the way ## Documentation syntax -For displaying a small note about nx-parallel's implementation at the end of the main NetworkX documentation, we use the `backend_info` [entry_point](https://packaging.python.org/en/latest/specifications/entry-points/#entry-points) (in the `pyproject.toml` file). The [`get_info` function](https://github.com/networkx/nx-parallel/blob/main/_nx_parallel/__init__.py) is used to parse the docstrings of all the algorithms in nx-parallel and display the nx-parallel specific documentation on the NetworkX's main docs, in the "Additional Backend implementations" box, as shown in the screenshot below. +For displaying a small note about nx-parallel's implementation at the end of the main NetworkX documentation, we use the `backend_info` [entry_point](https://packaging.python.org/en/latest/specifications/entry-points/#entry-points) (in the `pyproject.toml` file). The [`get_info` function](./_nx_parallel/__init__.py) is used to parse the docstrings of all the algorithms in nx-parallel and display the nx-parallel specific documentation on the NetworkX's main docs, in the "Additional Backend implementations" box, as shown in the screenshot below. -![backend_box_ss](https://github.com/networkx/nx-parallel/blob/main/assets/images/backend_box_ss.png) +![backend_box_ss](./assets/images/backend_box_ss.png) -Here is how the docstring should be formatted in nx-parallel: +nx-parallel follows [sphinx docstring guidelines](https://the-ultimate-sphinx-tutorial.readthedocs.io/en/latest/_guide/_styleguides/docstrings-guidelines.html) for writing docstrings. But, while extracting the docstring to display on the main networkx docs, only the first paragraph of the function's description and the first paragraph of each parameter's description is extracted and displayed. So, make sure to include all the necessary information in the first paragraphs itself. And you only need to include the additional **backend** parameters in the `Parameters` section and not all the parameters. Also, it is recommended to include a link to the networkx function's documentation page in the docstring, at the end of the function description. + +Here is an example of how the docstrings should be formatted in nx-parallel: ```.py -def betweenness_centrality( - G, k=None, normalized=True, weight=None, endpoints=False, seed=None, get_chunks="chunks" -): -"""[FIRST PARA DISPLAYED ON MAIN NETWORKX DOCS AS FUNC DESC] - The parallel computation is implemented by dividing the - nodes into chunks and computing betweenness centrality for each chunk concurrently. +def parallel_func(G, nx_arg, additional_backend_arg_1, additional_backend_arg_2=None): + """The parallel computation is implemented by dividing the + nodes into chunks and ..... [ONLY THIS PARAGRAPH WILL BE DISPLAYED ON THE MAIN NETWORKX DOCS] + + Some more additional information about the function. + + networkx.func : Parameters - ------------ [EVERYTHING BELOW THIS LINE AND BEFORE THE NETWORKX LINK WILL BE DISPLAYED IN ADDITIONAL PARAMETER'S SECTION ON NETWORKX MAIN DOCS] - get_chunks : function (default = "chunks") - A function that takes in nodes as input and returns node_chunks...[YOU CAN MULTIPLE PARAGRAPHS FOR EACH PARAMETER, IF NEEDED, SEPARATED BY 1 BLANK LINE] + ---------- + additional_backend_arg_1 : int or float + [YOU CAN HAVE MULTIPLE PARAGRAPHS BUT ONLY THE FIRST PARAGRAPH WILL BE DISPLAYED ON THE MAIN NETWORKX DOCS] - [LEAVE 2 BLANK LINES BETWEEN EACH PARAMETER] - parameter 2 : int + additional_backend_arg_2 : None or str (default=None) .... - . - . - . - [LEAVE 1 BLANK LINE BETWEEN THE PARAMETERS SECTION AND THE LINK] - networkx.betweenness_centrality : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html """ ``` @@ -116,7 +113,7 @@ def betweenness_centrality( In parallel computing, "chunking" refers to dividing a large task into smaller, more manageable chunks that can be processed simultaneously by multiple computing units, such as CPU cores or distributed computing nodes. It's like breaking down a big task into smaller pieces so that multiple workers can work on different pieces at the same time, and in the case of nx-parallel, this usually speeds up the overall process. -The default chunking in nx-parallel is done by first determining the number of available CPU cores and then allocating the nodes (or edges or any other iterator) per chunk by dividing the total number of nodes by the total CPU cores available. (ref. [chunk.py](https://github.com/networkx/nx-parallel/blob/main/nx_parallel/utils/chunk.py)). This default chunking can be overridden by the user by passing a custom `get_chunks` function to the algorithm as a kwarg. While adding a new algorithm, you can change this default chunking, if necessary (ref. [PR](https://github.com/networkx/nx-parallel/pull/33)). Also, when [the `config` PR](https://github.com/networkx/networkx/pull/7225) is merged in networkx, and the `config` will be added to nx-parallel, then the user would be able to control the number of CPU cores they would want to use and then the chunking would be done accordingly. +The default chunking in nx-parallel is done by first determining the number of available CPU cores and then allocating the nodes (or edges or any other iterator) per chunk by dividing the total number of nodes by the total CPU cores available. (ref. [chunk.py](./nx_parallel/utils/chunk.py)). This default chunking can be overridden by the user by passing a custom `get_chunks` function to the algorithm as a kwarg. While adding a new algorithm, you can change this default chunking, if necessary (ref. [PR](https://github.com/networkx/nx-parallel/pull/33)). Also, when [the `config` PR](https://github.com/networkx/networkx/pull/7225) is merged in networkx, and the `config` will be added to nx-parallel, then the user would be able to control the number of CPU cores they would want to use and then the chunking would be done accordingly. ## General guidelines on adding a new algorithm @@ -124,10 +121,10 @@ The default chunking in nx-parallel is done by first determining the number of a - The algorithm that you are considering to add to nx-parallel should be in the main networkx repository and it should have the `_dispatchable` decorator. If not, you can consider adding a sequential implementation in networkx first. - check-list for adding a new function: - [ ] Add the parallel implementation(make sure API doesn't break), the file structure should be the same as that in networkx. - - [ ] add the function to the `Dispatcher` class in [interface.py](https://github.com/networkx/nx-parallel/blob/main/nx_parallel/interface.py) (take care of the `name` parameter in `_dispatchable` (ref. [docs](https://networkx.org/documentation/latest/reference/backends.html))) + - [ ] add the function to the `BackendInterface` class in [interface.py](./nx_parallel/interface.py) (take care of the `name` parameter in `_dispatchable` (ref. [docs](https://networkx.org/documentation/latest/reference/backends.html))) - [ ] update the `__init__.py` files accordingly - [ ] docstring following the above format - - [ ] run the [timing script](https://github.com/networkx/nx-parallel/blob/main/timing/timing_individual_function.py) to get the performance heatmap + - [ ] run the [timing script](./timing/timing_individual_function.py) to get the performance heatmap - [ ] add additional test(if any) - [ ] add benchmark(s) for the new function(ref. the README in benchmarks folder for more details) diff --git a/README.md b/README.md index c161233..44aeb93 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ nxp.betweenness_centrality(H) 2. Right now there isn't much difference between `nx.Graph` and `nxp.ParallelGraph` so `method 3` would work fine but it is not recommended because in the future that might not be the case. -Feel free to contribute to nx-parallel. You can find the contributing guidelines [here](https://github.com/networkx/nx-parallel/blob/main/CONTRIBUTING.md). If you'd like to implement a feature or fix a bug, we'd be happy to review a pull request. Please make sure to explain the changes you made in the pull request description. And feel free to open issues for any problems you face, or for new features you'd like to see implemented. +Feel free to contribute to nx-parallel. You can find the contributing guidelines [here](./CONTRIBUTING.md). If you'd like to implement a feature or fix a bug, we'd be happy to review a pull request. Please make sure to explain the changes you made in the pull request description. And feel free to open issues for any problems you face, or for new features you'd like to see implemented. This project is managed under the NetworkX organisation, so the [code of conduct of NetworkX](https://github.com/networkx/networkx/blob/main/CODE_OF_CONDUCT.rst) applies here as well. diff --git a/_nx_parallel/__init__.py b/_nx_parallel/__init__.py index efc7d58..30648f3 100644 --- a/_nx_parallel/__init__.py +++ b/_nx_parallel/__init__.py @@ -13,7 +13,9 @@ def get_info(): "number_of_isolates": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/isolate.py#L8", "additional_docs": "The parallel computation is implemented by dividing the list of isolated nodes into chunks and then finding the length of each chunk in parallel and then adding all the lengths at the end.", - "additional_parameters": None, + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the isolated nodes as input and returns an iterable `isolate_chunks`. The default chunking is done by slicing the `isolates` into `n` chunks, where `n` is the total number of CPU cores available." + }, }, "square_clustering": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/cluster.py#L10", @@ -25,22 +27,30 @@ def get_info(): "local_efficiency": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/efficiency_measures.py#L9", "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then computing and adding global efficiencies of all node in all chunks, in parallel, and then adding all these sums and dividing by the total number of nodes at the end.", - "additional_parameters": None, + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the total number of CPU cores available." + }, }, "closeness_vitality": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/vitality.py#L9", "additional_docs": "The parallel computation is implemented only when the node is not specified. The closeness vitality for each node is computed concurrently.", - "additional_parameters": None, + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the total number of CPU cores." + }, }, "is_reachable": { - "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L10", + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L12", "additional_docs": "The function parallelizes the calculation of two neighborhoods of vertices in `G` and checks closure conditions for each neighborhood subset in parallel.", - "additional_parameters": None, + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the total number of CPU cores available." + }, }, "tournament_is_strongly_connected": { - "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L54", + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/tournament.py#L59", "additional_docs": "The parallel computation is implemented by dividing the nodes into chunks and then checking whether each node is reachable from each other node in parallel.", - "additional_parameters": None, + "additional_parameters": { + 'get_chunks : str, function (default = "chunks")': "A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the total number of CPU cores available." + }, }, "all_pairs_node_connectivity": { "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/connectivity/connectivity.py#L17", @@ -127,7 +137,7 @@ def get_info(): }, }, "all_pairs_shortest_path": { - "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L62", + "url": "https://github.com/networkx/nx-parallel/blob/main/nx_parallel/algorithms/shortest_paths/unweighted.py#L63", "additional_docs": "The parallel implementation first divides the nodes into chunks and then creates a generator to lazily compute shortest paths for each `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores.", "additional_parameters": { 'get_chunks : str, function (default = "chunks")': "A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores." diff --git a/_nx_parallel/update_get_info.py b/_nx_parallel/update_get_info.py index 6259105..846ba0a 100644 --- a/_nx_parallel/update_get_info.py +++ b/_nx_parallel/update_get_info.py @@ -1,7 +1,13 @@ import os import ast -__all__ = ["get_funcs_info", "extract_docstrings_from_file", "extract_from_docs"] +__all__ = [ + "get_funcs_info", + "extract_docstrings_from_file", + "extract_add_docs", + "extract_add_params", + "get_url", +] # Helper functions for get_info @@ -21,11 +27,10 @@ def get_funcs_info(): path = os.path.join(root, file) d = extract_docstrings_from_file(path) for func in d: - par_docs, par_params = extract_from_docs(d[func]) funcs[func] = { "url": get_url(path, func), - "additional_docs": par_docs, - "additional_parameters": par_params, + "additional_docs": extract_add_docs(d[func]), + "additional_parameters": extract_add_params(d[func]), } return funcs @@ -60,8 +65,8 @@ def extract_docstrings_from_file(file_path): return docstrings -def extract_from_docs(docstring): - """Extract the parallel documentation and parallel parameter description from the given doctring.""" +def extract_add_docs(docstring): + """Extract the parallel documentation description from the given doctring.""" try: # Extracting Parallel Computation description # Assuming that the first para in docstring is the function's PC desc @@ -76,30 +81,38 @@ def extract_from_docs(docstring): except Exception as e: print(e) par_docs = None + return par_docs + +def extract_add_params(docstring): + """Extract the parallel parameter description from the given docstring.""" try: # Extracting extra parameters # Assuming that the last para in docstring is the function's extra params par_params = {} - par_params_ = docstring.split("------------\n")[1] - - par_params_ = par_params_.split("\n\n\n") - for i in par_params_: - j = i.split("\n") - par_params[j[0]] = "\n".join( - [line.strip() for line in j[1:] if line.strip()] - ) - if i == par_params_[-1]: - par_params[j[0]] = " ".join( - [line.strip() for line in j[1:-1] if line.strip()] - ) - par_docs = par_docs.replace("\n", " ") + par_params_ = docstring.split("----------\n")[1] + par_params_ = par_params_.split("\n") + + i = 0 + while i < len(par_params_): + line = par_params_[i] + if " : " in line: + key = line.strip() + n = par_params_.index(key) + 1 + par_desc = "" + while n < len(par_params_) and par_params_[n] != "": + par_desc += par_params_[n].strip() + " " + n += 1 + par_params[key] = par_desc.strip() + i = n + 1 + else: + i += 1 except IndexError: par_params = None except Exception as e: print(e) par_params = None - return par_docs, par_params + return par_params def get_url(file_path, function_name): diff --git a/assets/images/backend_box_ss.png b/assets/images/backend_box_ss.png index d2a04e4..bc6eed2 100644 Binary files a/assets/images/backend_box_ss.png and b/assets/images/backend_box_ss.png differ diff --git a/nx_parallel/algorithms/approximation/connectivity.py b/nx_parallel/algorithms/approximation/connectivity.py index 6b7c4d9..0a23ecd 100644 --- a/nx_parallel/algorithms/approximation/connectivity.py +++ b/nx_parallel/algorithms/approximation/connectivity.py @@ -24,16 +24,16 @@ def approximate_all_pairs_node_connectivity( will run the parallel implementation of `all_pairs_node_connectivity` present in the `connectivity/connectivity`. Use `nxp.approximate_all_pairs_node_connectivity` instead. + networkx.all_pairs_node_connectivity : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.approximation.connectivity.all_pairs_node_connectivity.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n` chunks, where `n` is the number of CPU cores, such that size of each chunk is atmost 10, and at least 1. - - networkx.all_pairs_node_connectivity : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.approximation.connectivity.all_pairs_node_connectivity.html """ if hasattr(G, "graph_object"): diff --git a/nx_parallel/algorithms/bipartite/redundancy.py b/nx_parallel/algorithms/bipartite/redundancy.py index e01e328..69da571 100644 --- a/nx_parallel/algorithms/bipartite/redundancy.py +++ b/nx_parallel/algorithms/bipartite/redundancy.py @@ -12,14 +12,15 @@ def node_redundancy(G, nodes=None, get_chunks="chunks"): """In the parallel implementation we divide the nodes into chunks and compute the node redundancy coefficients for all `node_chunk` in parallel. + networkx.bipartite.node_redundancy : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.bipartite.redundancy.node_redundancy.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` (or `nodes`) into `n` chunks, where `n` is the number of CPU cores. - - networkx.bipartite.node_redundancy : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.bipartite.redundancy.node_redundancy.html""" + """ if hasattr(G, "graph_object"): G = G.graph_object diff --git a/nx_parallel/algorithms/centrality/betweenness.py b/nx_parallel/algorithms/centrality/betweenness.py index 0e0d386..1296700 100644 --- a/nx_parallel/algorithms/centrality/betweenness.py +++ b/nx_parallel/algorithms/centrality/betweenness.py @@ -25,14 +25,14 @@ def betweenness_centrality( """The parallel computation is implemented by dividing the nodes into chunks and computing betweenness centrality for each chunk concurrently. + networkx.betweenness_centrality : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in a list of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.betweenness_centrality : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html """ if hasattr(G, "graph_object"): G = G.graph_object diff --git a/nx_parallel/algorithms/cluster.py b/nx_parallel/algorithms/cluster.py index 8218d12..fca589d 100644 --- a/nx_parallel/algorithms/cluster.py +++ b/nx_parallel/algorithms/cluster.py @@ -12,14 +12,14 @@ def square_clustering(G, nodes=None, get_chunks="chunks"): coefficient for all `node_chunks` are computed in parallel over all available CPU cores. + networkx.square_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.square_clustering.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in a list of all the nodes (or nbunch) as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.square_clustering: https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.square_clustering.html """ def _compute_clustering_chunk(node_iter_chunk): diff --git a/nx_parallel/algorithms/connectivity/connectivity.py b/nx_parallel/algorithms/connectivity/connectivity.py index 7946662..756d505 100644 --- a/nx_parallel/algorithms/connectivity/connectivity.py +++ b/nx_parallel/algorithms/connectivity/connectivity.py @@ -22,16 +22,16 @@ def all_pairs_node_connectivity(G, nbunch=None, flow_func=None, get_chunks="chun execute these computations in parallel across all available CPU cores. At the end, the results are aggregated into a single dictionary and returned. + networkx.all_pairs_node_connectivity : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.connectivity.all_pairs_node_connectivity.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in `list(iter_func(nbunch, 2))` as input and returns an iterable `pairs_chunks`, here `iter_func` is `permutations` in case of directed graphs and `combinations` in case of undirected graphs. The default is to create chunks by slicing the list into `n` chunks, where `n` is the number of CPU cores, such that size of each chunk is atmost 10, and at least 1. - - networkx.all_pairs_node_connectivity : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.connectivity.all_pairs_node_connectivity.html """ if hasattr(G, "graph_object"): diff --git a/nx_parallel/algorithms/efficiency_measures.py b/nx_parallel/algorithms/efficiency_measures.py index 6c7a255..8c88bae 100644 --- a/nx_parallel/algorithms/efficiency_measures.py +++ b/nx_parallel/algorithms/efficiency_measures.py @@ -6,27 +6,37 @@ __all__ = ["local_efficiency"] -def local_efficiency(G): +def local_efficiency(G, get_chunks="chunks"): """The parallel computation is implemented by dividing the nodes into chunks and then computing and adding global efficiencies of all node in all chunks, in parallel, and then adding all these sums and dividing by the total number of nodes at the end. - networkx.local_efficiency : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.efficiency_measures.local_efficiency.html#local-efficiency + networkx.local_efficiency : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.efficiency_measures.local_efficiency.html + + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the nodes as input and returns an + iterable `node_chunks`. The default chunking is done by slicing the `nodes` + into `n` chunks, where `n` is the total number of CPU cores available. """ - def _local_efficiency_node_subset(G, nodes): - return sum(nx.global_efficiency(G.subgraph(G[v])) for v in nodes) + def _local_efficiency_node_subset(G, chunk): + return sum(nx.global_efficiency(G.subgraph(G[v])) for v in chunk) if hasattr(G, "graph_object"): G = G.graph_object - cpu_count = nxp.cpu_count() + total_cores = nxp.cpu_count() - num_in_chunk = max(len(G.nodes) // cpu_count, 1) - node_chunks = list(nxp.chunks(G.nodes, num_in_chunk)) + if get_chunks == "chunks": + num_in_chunk = max(len(G.nodes) // total_cores, 1) + node_chunks = list(nxp.chunks(G.nodes, num_in_chunk)) + else: + node_chunks = get_chunks(G.nodes) - efficiencies = Parallel(n_jobs=cpu_count)( + efficiencies = Parallel(n_jobs=total_cores)( delayed(_local_efficiency_node_subset)(G, chunk) for chunk in node_chunks ) return sum(efficiencies) / len(G) diff --git a/nx_parallel/algorithms/isolate.py b/nx_parallel/algorithms/isolate.py index 6cb1d0e..977996d 100644 --- a/nx_parallel/algorithms/isolate.py +++ b/nx_parallel/algorithms/isolate.py @@ -5,12 +5,19 @@ __all__ = ["number_of_isolates"] -def number_of_isolates(G): +def number_of_isolates(G, get_chunks="chunks"): """The parallel computation is implemented by dividing the list of isolated nodes into chunks and then finding the length of each chunk in parallel and then adding all the lengths at the end. - networkx.number_of_isolates : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.isolate.number_of_isolates.html#number-of-isolates + networkx.number_of_isolates : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.isolate.number_of_isolates.html + + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the isolated nodes as input and returns an + iterable `isolate_chunks`. The default chunking is done by slicing the `isolates` + into `n` chunks, where `n` is the total number of CPU cores available. """ if hasattr(G, "graph_object"): G = G.graph_object @@ -18,8 +25,12 @@ def number_of_isolates(G): cpu_count = nxp.cpu_count() isolates_list = list(nx.isolates(G)) - num_in_chunk = max(len(isolates_list) // cpu_count, 1) - isolate_chunks = nxp.chunks(isolates_list, num_in_chunk) + if get_chunks == "chunks": + num_in_chunk = max(len(isolates_list) // cpu_count, 1) + isolate_chunks = nxp.chunks(isolates_list, num_in_chunk) + else: + isolate_chunks = get_chunks(isolates_list) + results = Parallel(n_jobs=cpu_count)( delayed(len)(chunk) for chunk in isolate_chunks ) diff --git a/nx_parallel/algorithms/shortest_paths/generic.py b/nx_parallel/algorithms/shortest_paths/generic.py index 994e5b6..4cba68a 100644 --- a/nx_parallel/algorithms/shortest_paths/generic.py +++ b/nx_parallel/algorithms/shortest_paths/generic.py @@ -15,14 +15,14 @@ def all_pairs_all_shortest_paths( each node in `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.single_source_all_shortest_paths : https://networkx.org/documentation/latest/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.single_source_all_shortest_paths.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.single_source_all_shortest_paths : https://github.com/networkx/networkx/blob/de85e3fe52879f819e7a7924474fc6be3994e8e4/networkx/algorithms/shortest_paths/generic.py#L606 """ def _process_node_chunk(node_chunk): diff --git a/nx_parallel/algorithms/shortest_paths/unweighted.py b/nx_parallel/algorithms/shortest_paths/unweighted.py index b2fc4dd..d1930e6 100644 --- a/nx_parallel/algorithms/shortest_paths/unweighted.py +++ b/nx_parallel/algorithms/shortest_paths/unweighted.py @@ -21,14 +21,15 @@ def all_pairs_shortest_path_length(G, cutoff=None, get_chunks="chunks"): `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.single_source_shortest_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path_length.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.single_source_shortest_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path_length.html#all-pairs-shortest-path-length""" + """ def _process_node_chunk(node_chunk): return [ @@ -65,14 +66,15 @@ def all_pairs_shortest_path(G, cutoff=None, get_chunks="chunks"): then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.single_source_shortest_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.single_source_shortest_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path.html#all-pairs-shortest-path""" + """ def _process_node_chunk(node_chunk): return [ diff --git a/nx_parallel/algorithms/shortest_paths/weighted.py b/nx_parallel/algorithms/shortest_paths/weighted.py index 2b4ae98..53bda1b 100644 --- a/nx_parallel/algorithms/shortest_paths/weighted.py +++ b/nx_parallel/algorithms/shortest_paths/weighted.py @@ -31,14 +31,14 @@ def all_pairs_dijkstra(G, cutoff=None, weight="weight", get_chunks="chunks"): `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.all_pairs_dijkstra : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra.html#all-pairs-dijkstra + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.all_pairs_dijkstra : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra.html#all-pairs-dijkstra """ def _process_node_chunk(node_chunk): @@ -76,14 +76,14 @@ def all_pairs_dijkstra_path_length( `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.all_pairs_dijkstra_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path_length.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.all_pairs_dijkstra_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path_length.html#all-pairs-dijkstra-path-length """ def _process_node_chunk(node_chunk): @@ -124,14 +124,14 @@ def all_pairs_dijkstra_path(G, cutoff=None, weight="weight", get_chunks="chunks" then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.all_pairs_dijkstra_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.all_pairs_dijkstra_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path.html#all-pairs-dijkstra-path """ def _process_node_chunk(node_chunk): @@ -167,14 +167,14 @@ def all_pairs_bellman_ford_path_length(G, weight="weight", get_chunks="chunks"): `node_chunk`, and then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.all_pairs_bellman_ford_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path_length.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.all_pairs_bellman_ford_path_length : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path_length.html#all-pairs-bellman-ford-path-length """ def _process_node_chunk(node_chunk): @@ -212,14 +212,14 @@ def all_pairs_bellman_ford_path(G, weight="weight", get_chunks="chunks"): then employs joblib's `Parallel` function to execute these computations in parallel across all available CPU cores. + networkx.all_pairs_bellman_ford_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.all_pairs_bellman_ford_path : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path.html#all-pairs-bellman-ford-path """ def _process_node_chunk(node_chunk): @@ -254,14 +254,14 @@ def johnson(G, weight="weight", get_chunks="chunks"): nodes into chunks and computing the shortest paths using Johnson's Algorithm for each chunk in parallel. + networkx.johnson : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.johnson.html + Parameters - ------------ + ---------- get_chunks : str, function (default = "chunks") A function that takes in an iterable of all the nodes as input and returns an iterable `node_chunks`. The default chunking is done by slicing the `G.nodes` into `n` chunks, where `n` is the number of CPU cores. - - networkx.johnson : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.johnson.html#johnson """ if hasattr(G, "graph_object"): G = G.graph_object diff --git a/nx_parallel/algorithms/tournament.py b/nx_parallel/algorithms/tournament.py index 8452d31..8064ae7 100644 --- a/nx_parallel/algorithms/tournament.py +++ b/nx_parallel/algorithms/tournament.py @@ -1,5 +1,7 @@ from joblib import Parallel, delayed import nx_parallel as nxp +from networkx.algorithms.simple_paths import is_simple_path as is_path +import networkx as nx __all__ = [ "is_reachable", @@ -7,71 +9,84 @@ ] -def is_reachable(G, s, t): +def is_reachable(G, s, t, get_chunks="chunks"): """The function parallelizes the calculation of two neighborhoods of vertices in `G` and checks closure conditions for each neighborhood subset in parallel. - networkx.tournament.is_reachable : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tournament.is_reachable.html#networkx.algorithms.tournament.is_reachable - """ - if hasattr(G, "graph_object"): - G = G.graph_object - - G_adj = G._adj - setG = set(G) + networkx.tournament.is_reachable : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tournament.is_reachable.html - cpu_count = nxp.cpu_count() + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the nodes as input and returns an + iterable `node_chunks`. The default chunking is done by slicing the `nodes` + into `n` chunks, where `n` is the total number of CPU cores available. + """ - def two_nbrhood_subset(G, chunk): - result = [] + def two_neighborhood_close(G, chunk): + tnc = [] for v in chunk: - v_nbrs = G_adj[v].keys() - result.append(v_nbrs | {x for nbr in v_nbrs for x in G_adj[nbr]}) - return result + S = { + x + for x in G + if x == v or x in G[v] or any(is_path(G, [v, z, x]) for z in G) + } + tnc.append(not (is_closed(G, S) and s in S and t not in S)) + return all(tnc) def is_closed(G, nodes): - return all(v in G_adj[u] for u in setG - nodes for v in nodes) + return all(v in G[u] for u in set(G) - nodes for v in nodes) - def check_closure_subset(chunk): - return all(not (s in S and t not in S and is_closed(G, S)) for S in chunk) + if hasattr(G, "graph_object"): + G = G.graph_object - # send chunk of vertices to each process (calculating neighborhoods) - num_in_chunk = max(len(G) // cpu_count, 1) + cpu_count = nxp.cpu_count() - # neighborhoods = [two_neighborhood_subset(G, chunk) for chunk in node_chunks] - neighborhoods = Parallel(n_jobs=cpu_count)( - delayed(two_nbrhood_subset)(G, chunk) for chunk in nxp.chunks(G, num_in_chunk) - ) + if get_chunks == "chunks": + num_in_chunk = max(len(G) // cpu_count, 1) + node_chunks = nxp.chunks(G, num_in_chunk) + else: + node_chunks = get_chunks(G) - # send chunk of neighborhoods to each process (checking closure conditions) - nbrhoods = (nhood for nh_chunk in neighborhoods for nhood in nh_chunk) - results = Parallel(n_jobs=cpu_count)( - delayed(check_closure_subset)(ch) for ch in nxp.chunks(nbrhoods, num_in_chunk) + return all( + Parallel(n_jobs=cpu_count)( + delayed(two_neighborhood_close)(G, chunk) for chunk in node_chunks + ) ) - return all(results) -def tournament_is_strongly_connected(G): +def tournament_is_strongly_connected(G, get_chunks="chunks"): """The parallel computation is implemented by dividing the nodes into chunks and then checking whether each node is reachable from each other node in parallel. Note, this function uses the name `tournament_is_strongly_connected` while - dispatching to the backend in=mplementation. So, `nxp.tournament.is_strongly_connected` + dispatching to the backend implementation. So, `nxp.tournament.is_strongly_connected` will result in an error. Use `nxp.tournament_is_strongly_connected` instead. - networkx.tournament.is_strongly_connected : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tournament.is_strongly_connected.html#networkx.algorithms.tournament.is_strongly_connected + networkx.tournament.is_strongly_connected : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tournament.is_strongly_connected.html + + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the nodes as input and returns an + iterable `node_chunks`. The default chunking is done by slicing the `nodes` + into `n` chunks, where `n` is the total number of CPU cores available. """ if hasattr(G, "graph_object"): G = G.graph_object - # Subset version of is_reachable def is_reachable_subset(G, chunk): - return all(is_reachable(G, u, v) for v in chunk for u in G) + return all(nx.tournament.is_reachable(G, u, v) for v in chunk for u in G) cpu_count = nxp.cpu_count() - num_in_chunk = max(len(G) // cpu_count, 1) - node_chunks = nxp.chunks(G, num_in_chunk) + + if get_chunks == "chunks": + num_in_chunk = max(min(len(G) // cpu_count, 10), 1) + node_chunks = nxp.chunks(G, num_in_chunk) + else: + node_chunks = get_chunks(G) results = Parallel(n_jobs=cpu_count)( delayed(is_reachable_subset)(G, chunk) for chunk in node_chunks diff --git a/nx_parallel/algorithms/vitality.py b/nx_parallel/algorithms/vitality.py index e719749..d0a5798 100644 --- a/nx_parallel/algorithms/vitality.py +++ b/nx_parallel/algorithms/vitality.py @@ -6,12 +6,25 @@ __all__ = ["closeness_vitality"] -def closeness_vitality(G, node=None, weight=None, wiener_index=None): +def closeness_vitality( + G, node=None, weight=None, wiener_index=None, get_chunks="chunks" +): """The parallel computation is implemented only when the node is not specified. The closeness vitality for each node is computed concurrently. - networkx.closeness_vitality : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.vitality.closeness_vitality.html#closeness-vitality + networkx.closeness_vitality : https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.vitality.closeness_vitality.html + + Parameters + ---------- + get_chunks : str, function (default = "chunks") + A function that takes in a list of all the nodes as input and + returns an iterable `node_chunks`. The default chunking is done by slicing the + `nodes` into `n` chunks, where `n` is the total number of CPU cores. """ + + def closeness_vitality_chunk_subset(chunk): + return {v: vitality(v) for v in chunk} + if hasattr(G, "graph_object"): G = G.graph_object @@ -22,10 +35,19 @@ def closeness_vitality(G, node=None, weight=None, wiener_index=None): after = nx.wiener_index(G.subgraph(set(G) - {node}), weight=weight) return wiener_index - after - cpu_count = nxp.cpu_count() + total_cores = nxp.cpu_count() + + if get_chunks == "chunks": + num_in_chunk = max(len(G) // total_cores, 1) + node_chunks = nxp.chunks(G.nodes, num_in_chunk) + else: + node_chunks = get_chunks(G.nodes) + + vitality = partial( + nx.closeness_vitality, G, weight=weight, wiener_index=wiener_index + ) - vitality = partial(closeness_vitality, G, weight=weight, wiener_index=wiener_index) - result = Parallel(n_jobs=cpu_count)( - delayed(lambda v: (v, vitality(v)))(v) for v in G + result = Parallel(n_jobs=total_cores)( + delayed(closeness_vitality_chunk_subset)(chunk) for chunk in node_chunks ) - return dict(result) + return {k: v for d in result for k, v in d.items()} diff --git a/nx_parallel/interface.py b/nx_parallel/interface.py index ccef103..8296b03 100644 --- a/nx_parallel/interface.py +++ b/nx_parallel/interface.py @@ -25,8 +25,9 @@ ) from nx_parallel.algorithms.connectivity import connectivity from nx_parallel.algorithms.cluster import square_clustering +import networkx as nx -__all__ = ["Dispatcher", "ParallelGraph"] +__all__ = ["BackendInterface", "ParallelGraph"] class ParallelGraph: @@ -35,8 +36,15 @@ class ParallelGraph: __networkx_backend__ = "parallel" - def __init__(self, graph_object): - self.graph_object = graph_object + def __init__(self, graph_object=None): + if graph_object is None: + self.graph_object = nx.Graph() + elif isinstance( + graph_object, (nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph) + ): + self.graph_object = graph_object + else: + self.graph_object = nx.Graph(graph_object) def is_multigraph(self): return self.graph_object.is_multigraph() @@ -44,9 +52,12 @@ def is_multigraph(self): def is_directed(self): return self.graph_object.is_directed() + def __str__(self): + return "Parallel" + str(self.graph_object) -class Dispatcher: - """Dispatcher class for parallel algorithms.""" + +class BackendInterface: + """BackendInterface class for parallel algorithms.""" # Bipartite node_redundancy = node_redundancy @@ -94,18 +105,7 @@ class Dispatcher: # ============================= @staticmethod - def convert_from_nx( - graph, - edge_attrs=None, - node_attrs=None, - preserve_edge_attrs=False, - preserve_node_attrs=False, - preserve_graph_attrs=False, - name=None, - graph_name=None, - *, - weight=None, # For nx.__version__ <= 3.1 - ): + def convert_from_nx(graph, *args, **kwargs): """Convert a networkx.Graph, networkx.DiGraph, networkx.MultiGraph, or networkx.MultiDiGraph to a ParallelGraph.""" if isinstance(graph, ParallelGraph): @@ -116,4 +116,6 @@ def convert_from_nx( def convert_to_nx(result, *, name=None): """Convert a ParallelGraph to a networkx.Graph, networkx.DiGraph, networkx.MultiGraph, or networkx.MultiDiGraph.""" + if isinstance(result, ParallelGraph): + return result.graph_object return result diff --git a/nx_parallel/tests/__init__.py b/nx_parallel/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nx_parallel/tests/test_get_chunks.py b/nx_parallel/tests/test_get_chunks.py new file mode 100644 index 0000000..3a3b9f1 --- /dev/null +++ b/nx_parallel/tests/test_get_chunks.py @@ -0,0 +1,84 @@ +# smoke tests for all functions supporting `get_chunks` kwarg + +import inspect +import importlib +import networkx as nx +import nx_parallel as nxp +import random +import types +import math + + +def get_all_functions(package_name="nx_parallel"): + """Returns a dictionary where the keys are the function names in a given Python package, and the values are dictionaries containing the function's keyword arguments and positional arguments.""" + package = importlib.import_module(package_name) + functions = {} + + for name, obj in inspect.getmembers(package, inspect.isfunction): + if not name.startswith("_"): + args, kwargs = inspect.getfullargspec(obj)[:2] + functions[name] = {"args": args, "kwargs": kwargs} + + return functions + + +def get_functions_with_get_chunks(): + """Returns a list of functions with the `get_chunks` kwarg.""" + all_funcs = get_all_functions() + get_chunks_funcs = [] + for func in all_funcs: + if "get_chunks" in all_funcs[func]["args"]: + get_chunks_funcs.append(func) + return get_chunks_funcs + + +def test_get_chunks(): + def random_chunking(nodes): + _nodes = list(nodes).copy() + random.seed(42) + random.shuffle(_nodes) + num_chunks = nxp.cpu_count() + num_in_chunk = max(len(_nodes) // num_chunks, 1) + return nxp.chunks(_nodes, num_in_chunk) + + get_chunks_funcs = get_functions_with_get_chunks() + ignore_funcs = [ + "number_of_isolates", + "is_reachable", + ] + tournament_funcs = [ + "tournament_is_strongly_connected", + ] + chk_dict_vals = [ + "betweenness_centrality", + ] + G = nx.fast_gnp_random_graph(50, 0.6, seed=42) + H = nxp.ParallelGraph(G) + for func in get_chunks_funcs: + print(func) + if func not in ignore_funcs: + if func in tournament_funcs: + G = nx.tournament.random_tournament(50, seed=42) + H = nxp.ParallelGraph(G) + c1 = getattr(nxp, func)(H) + c2 = getattr(nxp, func)(H, get_chunks=random_chunking) + assert c1 == c2 + else: + c1 = getattr(nxp, func)(H) + c2 = getattr(nxp, func)(H, get_chunks=random_chunking) + if isinstance(c1, types.GeneratorType): + c1, c2 = dict(c1), dict(c2) + if func in chk_dict_vals: + for i in range(len(G.nodes)): + assert math.isclose(c1[i], c2[i], abs_tol=1e-16) + else: + assert c1 == c2 + else: + if func in chk_dict_vals: + for i in range(len(G.nodes)): + assert math.isclose(c1[i], c2[i], abs_tol=1e-16) + else: + if isinstance(c1, float): + assert math.isclose(c1, c2, abs_tol=1e-16) + else: + assert c1 == c2 diff --git a/pyproject.toml b/pyproject.toml index f9310d4..2bf14b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ test = [ ] [project.entry-points."networkx.backends"] -parallel = "nx_parallel.interface:Dispatcher" +parallel = "nx_parallel.interface:BackendInterface" [project.entry-points."networkx.backend_info"] parallel = "_nx_parallel:get_info" diff --git a/timing/heatmap_is_reachable_timing.png b/timing/heatmap_is_reachable_timing.png index 47f3a51..2d70a7c 100644 Binary files a/timing/heatmap_is_reachable_timing.png and b/timing/heatmap_is_reachable_timing.png differ diff --git a/timing/heatmap_is_strongly_connected_timing.png b/timing/heatmap_is_strongly_connected_timing.png new file mode 100644 index 0000000..ffd73b2 Binary files /dev/null and b/timing/heatmap_is_strongly_connected_timing.png differ diff --git a/timing/timing_individual_function.py b/timing/timing_individual_function.py index 39dffa7..809315d 100644 --- a/timing/timing_individual_function.py +++ b/timing/timing_individual_function.py @@ -1,6 +1,4 @@ import time -import random -import types import networkx as nx import pandas as pd @@ -14,11 +12,11 @@ # for bipartite graphs # n = [50, 100, 200, 400] # m = [25, 50, 100, 200] -number_of_nodes_list = [75, 150, 300, 600] +number_of_nodes_list = [200, 400, 800, 1600] weighted = False pList = [1, 0.8, 0.6, 0.4, 0.2] -currFun = nx.bipartite.node_redundancy -currFun = nx.square_clustering +currFun = nx.tournament.is_reachable +""" for p in pList: for num in range(len(number_of_nodes_list)): # create original and parallel graphs @@ -26,7 +24,7 @@ number_of_nodes_list[num], p, seed=42, directed=True ) - """ + # for bipartite.node_redundancy G = nx.bipartite.random_graph(n[num], m[num], p, seed=42, directed=True) for i in G.nodes: @@ -37,7 +35,6 @@ G.add_edge(i, random.choice([node for node in G.nodes if node != i])) elif len(l) == 1: G.add_edge(i, random.choice([node for node in G.nodes if node != i and node not in list(G.neighbors(i))])) - """ # for weighted graphs if weighted: @@ -63,29 +60,37 @@ timesFaster = stdTime / parallelTime heatmapDF.at[number_of_nodes_list[num], p] = timesFaster print("Finished " + str(currFun)) +""" # Code to create for row of heatmap specifically for tournaments -# for p in pList: -# for num in number_of_nodes_list): -# G = nx.tournament.random_tournament(num) -# H = nx_parallel.ParallelDiGraph(G) -# t1 = time.time() -# c = nx.tournament.is_reachable(H, 1, num) -# t2 = time.time() -# parallelTime = t2-t1 -# t1 = time.time() -# c = nx.tournament.is_reachable(G, 1, num) -# t2 = time.time() -# stdTime = t2-t1 -# timesFaster = stdTime/parallelTime -# heatmapDF.at[num, 3] = timesFaster +for num in number_of_nodes_list: + print(num) + G = nx.tournament.random_tournament(num, seed=42) + H = nxp.ParallelGraph(G) + t1 = time.time() + c = currFun(H, 1, num) + t2 = time.time() + parallelTime = t2 - t1 + print(parallelTime) + t1 = time.time() + c = currFun(G, 1, num) + t2 = time.time() + stdTime = t2 - t1 + print(stdTime) + timesFaster = stdTime / parallelTime + heatmapDF.at[num, 3] = timesFaster + print("Finished " + str(currFun)) # plotting the heatmap with numbers and a green color scheme plt.figure(figsize=(20, 4)) hm = sns.heatmap(data=heatmapDF.T, annot=True, cmap="Greens", cbar=True) # Remove the tick labels on both axes -hm.set_yticklabels(pList) +hm.set_yticklabels( + [ + 3, + ] +) # Adding x-axis labels hm.set_xticklabels(number_of_nodes_list)