From 0e3c3f3bd1df0eeb1e785d56dc05a378f9b26849 Mon Sep 17 00:00:00 2001 From: Zack Koppert Date: Fri, 18 Oct 2024 09:46:01 -0700 Subject: [PATCH] refactor: Move search functions and tests to their own files Signed-off-by: Zack Koppert --- issue_metrics.py | 132 +----------------------------------- search.py | 138 ++++++++++++++++++++++++++++++++++++++ test_issue_metrics.py | 144 +-------------------------------------- test_search.py | 152 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 293 insertions(+), 273 deletions(-) create mode 100644 search.py create mode 100644 test_search.py diff --git a/issue_metrics.py b/issue_metrics.py index eed5525..7a41f57 100644 --- a/issue_metrics.py +++ b/issue_metrics.py @@ -8,9 +8,6 @@ Functions: get_env_vars() -> EnvVars: Get the environment variables for use in the script. - search_issues(search_query: str, github_connection: github3.GitHub, owners_and_repositories: List[dict]) - -> github3.structs.SearchIterator: - Searches for issues in a GitHub repository that match the given search query. get_per_issue_metrics(issues: Union[List[dict], List[github3.issues.Issue]], discussions: bool = False), labels: Union[List[str], None] = None, ignore_users: List[str] = [] -> tuple[List, int, int]: @@ -21,8 +18,6 @@ """ import shutil -import sys -from time import sleep from typing import List, Union import github3 @@ -36,6 +31,7 @@ from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file from markdown_writer import write_to_markdown from most_active_mentors import count_comments_per_user, get_mentor_count +from search import get_owners_and_repositories, search_issues from time_to_answer import get_stats_time_to_answer, measure_time_to_answer from time_to_close import get_stats_time_to_close, measure_time_to_close from time_to_first_response import ( @@ -46,101 +42,6 @@ from time_to_ready_for_review import get_time_to_ready_for_review -def search_issues( - search_query: str, - github_connection: github3.GitHub, - owners_and_repositories: List[dict], - rate_limit_bypass: bool = False, -) -> List[github3.search.IssueSearchResult]: # type: ignore - """ - Searches for issues/prs/discussions in a GitHub repository that match - the given search query and handles errors related to GitHub API responses. - - Args: - search_query (str): The search query to use for finding issues/prs/discussions. - github_connection (github3.GitHub): A connection to the GitHub API. - owners_and_repositories (List[dict]): A list of dictionaries containing - the owner and repository names. - - Returns: - List[github3.search.IssueSearchResult]: A list of issues that match the search query. - """ - - # Rate Limit Handling: API only allows 30 requests per minute - def wait_for_api_refresh( - iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False - ): - # If the rate limit bypass is enabled, don't wait for the API to refresh - if rate_limit_bypass: - return - - max_retries = 5 - retry_count = 0 - sleep_time = 70 - - while iterator.ratelimit_remaining < 5: - if retry_count >= max_retries: - raise RuntimeError("Exceeded maximum retries for API rate limit") - - print( - f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh." - ) - sleep(sleep_time) - - # Exponentially increase the sleep time for the next retry - sleep_time *= 2 - retry_count += 1 - - issues_per_page = 100 - - print("Searching for issues...") - issues_iterator = github_connection.search_issues( - search_query, per_page=issues_per_page - ) - wait_for_api_refresh(issues_iterator, rate_limit_bypass) - - issues = [] - repos_and_owners_string = "" - for item in owners_and_repositories: - repos_and_owners_string += ( - f"{item.get('owner', '')}/{item.get('repository', '')} " - ) - - # Print the issue titles - try: - for idx, issue in enumerate(issues_iterator, 1): - print(issue.title) # type: ignore - issues.append(issue) - - # requests are sent once per page of issues - if idx % issues_per_page == 0: - wait_for_api_refresh(issues_iterator, rate_limit_bypass) - - except github3.exceptions.ForbiddenError: - print( - f"You do not have permission to view a repository from: '{repos_and_owners_string}'; Check your API Token." - ) - sys.exit(1) - except github3.exceptions.NotFoundError: - print( - f"The repository could not be found; Check the repository owner and names: '{repos_and_owners_string}" - ) - sys.exit(1) - except github3.exceptions.ConnectionError: - print( - "There was a connection error; Check your internet connection or API Token." - ) - sys.exit(1) - except github3.exceptions.AuthenticationFailed: - print("Authentication failed; Check your API Token.") - sys.exit(1) - except github3.exceptions.UnprocessableEntity: - print("The search query is invalid; Check the search query.") - sys.exit(1) - - return issues - - def get_per_issue_metrics( issues: Union[List[dict], List[github3.search.IssueSearchResult]], # type: ignore env_vars: EnvVars, @@ -264,37 +165,6 @@ def get_per_issue_metrics( return issues_with_metrics, num_issues_open, num_issues_closed -def get_owners_and_repositories( - search_query: str, -) -> List[dict]: - """Get the owners and repositories from the search query. - - Args: - search_query (str): The search query used to search for issues. - - Returns: - List[dict]: A list of dictionaries of owners and repositories. - - """ - search_query_split = search_query.split(" ") - results_list = [] - for item in search_query_split: - result = {} - if "repo:" in item and "/" in item: - result["owner"] = item.split(":")[1].split("/")[0] - result["repository"] = item.split(":")[1].split("/")[1] - if "org:" in item or "owner:" in item or "user:" in item: - result["owner"] = item.split(":")[1] - if "user:" in item: - result["owner"] = item.split(":")[1] - if "owner:" in item: - result["owner"] = item.split(":")[1] - if result: - results_list.append(result) - - return results_list - - def main(): # pragma: no cover """Run the issue-metrics script. diff --git a/search.py b/search.py new file mode 100644 index 0000000..5a5c1fa --- /dev/null +++ b/search.py @@ -0,0 +1,138 @@ +""" A module to search for issues in a GitHub repository.""" + +import sys +from time import sleep +from typing import List + +import github3 +import github3.structs + + +def search_issues( + search_query: str, + github_connection: github3.GitHub, + owners_and_repositories: List[dict], + rate_limit_bypass: bool = False, +) -> List[github3.search.IssueSearchResult]: # type: ignore + """ + Searches for issues/prs/discussions in a GitHub repository that match + the given search query and handles errors related to GitHub API responses. + + Args: + search_query (str): The search query to use for finding issues/prs/discussions. + github_connection (github3.GitHub): A connection to the GitHub API. + owners_and_repositories (List[dict]): A list of dictionaries containing + the owner and repository names. + rate_limit_bypass (bool, optional): A flag to bypass the rate limit to be used + when working with GitHub server that has rate limiting turned off. Defaults to False. + + Returns: + List[github3.search.IssueSearchResult]: A list of issues that match the search query. + """ + + # Rate Limit Handling: API only allows 30 requests per minute + def wait_for_api_refresh( + iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False + ): + # If the rate limit bypass is enabled, don't wait for the API to refresh + if rate_limit_bypass: + return + + max_retries = 5 + retry_count = 0 + sleep_time = 70 + + while iterator.ratelimit_remaining < 5: + if retry_count >= max_retries: + raise RuntimeError("Exceeded maximum retries for API rate limit") + + print( + f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh." + ) + sleep(sleep_time) + + # Exponentially increase the sleep time for the next retry + sleep_time *= 2 + retry_count += 1 + + issues_per_page = 100 + + print("Searching for issues...") + issues_iterator = github_connection.search_issues( + search_query, per_page=issues_per_page + ) + wait_for_api_refresh(issues_iterator, rate_limit_bypass) + + issues = [] + repos_and_owners_string = "" + for item in owners_and_repositories: + repos_and_owners_string += ( + f"{item.get('owner', '')}/{item.get('repository', '')} " + ) + + # Print the issue titles and add them to the list of issues + try: + for idx, issue in enumerate(issues_iterator, 1): + print(issue.title) # type: ignore + issues.append(issue) + + # requests are sent once per page of issues + if idx % issues_per_page == 0: + wait_for_api_refresh(issues_iterator, rate_limit_bypass) + + except github3.exceptions.ForbiddenError: + print( + f"You do not have permission to view a repository \ +from: '{repos_and_owners_string}'; Check your API Token." + ) + sys.exit(1) + except github3.exceptions.NotFoundError: + print( + f"The repository could not be found; \ +Check the repository owner and names: '{repos_and_owners_string}" + ) + sys.exit(1) + except github3.exceptions.ConnectionError: + print( + "There was a connection error; Check your internet connection or API Token." + ) + sys.exit(1) + except github3.exceptions.AuthenticationFailed: + print("Authentication failed; Check your API Token.") + sys.exit(1) + except github3.exceptions.UnprocessableEntity: + print("The search query is invalid; Check the search query.") + sys.exit(1) + + return issues + + +def get_owners_and_repositories( + search_query: str, +) -> List[dict]: + """Get the owners and repositories from the search query. + + Args: + search_query (str): The search query used to search for issues. + + Returns: + List[dict]: A list of dictionaries of owners and repositories. + + """ + search_query_split = search_query.split(" ") + results_list = [] + for item in search_query_split: + result = {} + if "repo:" in item and "/" in item: + result["owner"] = item.split(":")[1].split("/")[0] + result["repository"] = item.split(":")[1].split("/")[1] + if "org:" in item or "owner:" in item or "user:" in item: + result["owner"] = item.split(":")[1] + if "user:" in item: + result["owner"] = item.split(":")[1] + if "owner:" in item: + result["owner"] = item.split(":")[1] + if result: + results_list.append(result) + + return results_list diff --git a/test_issue_metrics.py b/test_issue_metrics.py index ab025af..9892cca 100644 --- a/test_issue_metrics.py +++ b/test_issue_metrics.py @@ -19,153 +19,12 @@ from issue_metrics import ( IssueWithMetrics, get_env_vars, - get_owners_and_repositories, get_per_issue_metrics, measure_time_to_close, measure_time_to_first_response, - search_issues, ) -class TestSearchIssues(unittest.TestCase): - """Unit tests for the search_issues function. - - This class contains unit tests for the search_issues function in the - issue_metrics module. The tests use the unittest module and the unittest.mock - module to mock the GitHub API and test the function in isolation. - - Methods: - test_search_issues_with_owner_and_repository: Test that search_issues with owner/repo returns the correct issues. - test_search_issues_with_just_owner_or_org: Test that search_issues with just an owner/org returns the correct issues. - - """ - - def test_search_issues_with_owner_and_repository(self): - """Test that search_issues with owner/repo returns the correct issues.""" - - # Set up the mock GitHub connection object - mock_issues = [ - MagicMock(title="Issue 1"), - MagicMock(title="Issue 2"), - ] - - # simulating github3.structs.SearchIterator return value - mock_search_result = MagicMock() - mock_search_result.__iter__.return_value = iter(mock_issues) - mock_search_result.ratelimit_remaining = 30 - - mock_connection = MagicMock() - mock_connection.search_issues.return_value = mock_search_result - - # Call search_issues and check that it returns the correct issues - repo_with_owner = {"owner": "owner1", "repository": "repo1"} - owners_and_repositories = [repo_with_owner] - issues = search_issues("is:open", mock_connection, owners_and_repositories) - self.assertEqual(issues, mock_issues) - - def test_search_issues_with_just_owner_or_org(self): - """Test that search_issues with just an owner/org returns the correct issues.""" - - # Set up the mock GitHub connection object - mock_issues = [ - MagicMock(title="Issue 1"), - MagicMock(title="Issue 2"), - MagicMock(title="Issue 3"), - ] - - # simulating github3.structs.SearchIterator return value - mock_search_result = MagicMock() - mock_search_result.__iter__.return_value = iter(mock_issues) - mock_search_result.ratelimit_remaining = 30 - - mock_connection = MagicMock() - mock_connection.search_issues.return_value = mock_search_result - - # Call search_issues and check that it returns the correct issues - org = {"owner": "org1"} - owners = [org] - issues = search_issues("is:open", mock_connection, owners) - self.assertEqual(issues, mock_issues) - - def test_search_issues_with_just_owner_or_org_with_bypass(self): - """Test that search_issues with just an owner/org returns the correct issues.""" - - # Set up the mock GitHub connection object - mock_issues = [ - MagicMock(title="Issue 1"), - MagicMock(title="Issue 2"), - MagicMock(title="Issue 3"), - ] - - # simulating github3.structs.SearchIterator return value - mock_search_result = MagicMock() - mock_search_result.__iter__.return_value = iter(mock_issues) - mock_search_result.ratelimit_remaining = 30 - - mock_connection = MagicMock() - mock_connection.search_issues.return_value = mock_search_result - - # Call search_issues and check that it returns the correct issues - org = {"owner": "org1"} - owners = [org] - issues = search_issues( - "is:open", mock_connection, owners, rate_limit_bypass=True - ) - self.assertEqual(issues, mock_issues) - - -class TestGetOwnerAndRepository(unittest.TestCase): - """Unit tests for the get_owners_and_repositories function. - - This class contains unit tests for the get_owners_and_repositories function in the - issue_metrics module. The tests use the unittest module and the unittest.mock - module to mock the GitHub API and test the function in isolation. - - Methods: - test_get_owners_with_owner_and_repo_in_query: Test get both owner and repo. - test_get_owners_and_repositories_with_repo_in_query: Test get just owner. - test_get_owners_and_repositories_without_either_in_query: Test get neither. - test_get_owners_and_repositories_with_multiple_entries: Test get multiple entries. - """ - - def test_get_owners_with_owner_and_repo_in_query(self): - """Test get both owner and repo.""" - result = get_owners_and_repositories("repo:owner1/repo1") - self.assertEqual(result[0].get("owner"), "owner1") - self.assertEqual(result[0].get("repository"), "repo1") - - def test_get_owner_and_repositories_without_repo_in_query(self): - """Test get just owner.""" - result = get_owners_and_repositories("org:owner1") - self.assertEqual(result[0].get("owner"), "owner1") - self.assertIsNone(result[0].get("repository")) - - def test_get_owners_and_repositories_without_either_in_query(self): - """Test get neither.""" - result = get_owners_and_repositories("is:blah") - self.assertEqual(result, []) - - def test_get_owners_and_repositories_with_multiple_entries(self): - """Test get multiple entries.""" - result = get_owners_and_repositories("repo:owner1/repo1 org:owner2") - self.assertEqual(result[0].get("owner"), "owner1") - self.assertEqual(result[0].get("repository"), "repo1") - self.assertEqual(result[1].get("owner"), "owner2") - self.assertIsNone(result[1].get("repository")) - - def test_get_owners_and_repositories_with_org(self): - """Test get org as owner.""" - result = get_owners_and_repositories("org:owner1") - self.assertEqual(result[0].get("owner"), "owner1") - self.assertIsNone(result[0].get("repository")) - - def test_get_owners_and_repositories_with_user(self): - """Test get user as owner.""" - result = get_owners_and_repositories("user:owner1") - self.assertEqual(result[0].get("owner"), "owner1") - self.assertIsNone(result[0].get("repository")) - - class TestGetEnvVars(unittest.TestCase): """Test suite for the get_env_vars function.""" @@ -426,7 +285,8 @@ def test_get_per_issue_metrics_without_hide_envs(self): ) def test_get_per_issue_metrics_with_ignore_users(self): """ - Test that the function correctly filters out issues with authors in the IGNORE_USERS variable + Test that the function correctly filters out issues + with authors in the IGNORE_USERS variable """ # Create mock data diff --git a/test_search.py b/test_search.py new file mode 100644 index 0000000..bd1d36f --- /dev/null +++ b/test_search.py @@ -0,0 +1,152 @@ +"""Unit tests for the search module.""" + +import unittest +from unittest.mock import MagicMock + +from search import get_owners_and_repositories, search_issues + + +class TestSearchIssues(unittest.TestCase): + """Unit tests for the search_issues function. + + This class contains unit tests for the search_issues function in the + issue_metrics module. The tests use the unittest module and the unittest.mock + module to mock the GitHub API and test the function in isolation. + + Methods: + test_search_issues_with_owner_and_repository: + Test that search_issues with owner/repo returns the correct issues. + test_search_issues_with_just_owner_or_org: + Test that search_issues with just an owner/org returns the correct issues. + test_search_issues_with_just_owner_or_org_with_bypass: + Test that search_issues with just an owner/org returns the correct issues + with rate limit bypass enabled. + + """ + + def test_search_issues_with_owner_and_repository(self): + """Test that search_issues with owner/repo returns the correct issues.""" + + # Set up the mock GitHub connection object + mock_issues = [ + MagicMock(title="Issue 1"), + MagicMock(title="Issue 2"), + ] + + # simulating github3.structs.SearchIterator return value + mock_search_result = MagicMock() + mock_search_result.__iter__.return_value = iter(mock_issues) + mock_search_result.ratelimit_remaining = 30 + + mock_connection = MagicMock() + mock_connection.search_issues.return_value = mock_search_result + + # Call search_issues and check that it returns the correct issues + repo_with_owner = {"owner": "owner1", "repository": "repo1"} + owners_and_repositories = [repo_with_owner] + issues = search_issues("is:open", mock_connection, owners_and_repositories) + self.assertEqual(issues, mock_issues) + + def test_search_issues_with_just_owner_or_org(self): + """Test that search_issues with just an owner/org returns the correct issues.""" + + # Set up the mock GitHub connection object + mock_issues = [ + MagicMock(title="Issue 1"), + MagicMock(title="Issue 2"), + MagicMock(title="Issue 3"), + ] + + # simulating github3.structs.SearchIterator return value + mock_search_result = MagicMock() + mock_search_result.__iter__.return_value = iter(mock_issues) + mock_search_result.ratelimit_remaining = 30 + + mock_connection = MagicMock() + mock_connection.search_issues.return_value = mock_search_result + + # Call search_issues and check that it returns the correct issues + org = {"owner": "org1"} + owners = [org] + issues = search_issues("is:open", mock_connection, owners) + self.assertEqual(issues, mock_issues) + + def test_search_issues_with_just_owner_or_org_with_bypass(self): + """Test that search_issues with just an owner/org returns the correct issues.""" + + # Set up the mock GitHub connection object + mock_issues = [ + MagicMock(title="Issue 1"), + MagicMock(title="Issue 2"), + MagicMock(title="Issue 3"), + ] + + # simulating github3.structs.SearchIterator return value + mock_search_result = MagicMock() + mock_search_result.__iter__.return_value = iter(mock_issues) + mock_search_result.ratelimit_remaining = 30 + + mock_connection = MagicMock() + mock_connection.search_issues.return_value = mock_search_result + + # Call search_issues and check that it returns the correct issues + org = {"owner": "org1"} + owners = [org] + issues = search_issues( + "is:open", mock_connection, owners, rate_limit_bypass=True + ) + self.assertEqual(issues, mock_issues) + + +class TestGetOwnerAndRepository(unittest.TestCase): + """Unit tests for the get_owners_and_repositories function. + + This class contains unit tests for the get_owners_and_repositories function in the + issue_metrics module. The tests use the unittest module and the unittest.mock + module to mock the GitHub API and test the function in isolation. + + Methods: + test_get_owners_with_owner_and_repo_in_query: Test get both owner and repo. + test_get_owner_and_repositories_without_repo_in_query: Test get just owner. + test_get_owners_and_repositories_without_either_in_query: Test get neither. + test_get_owners_and_repositories_with_multiple_entries: Test get multiple entries. + test_get_owners_and_repositories_with_org: Test get org as owner. + test_get_owners_and_repositories_with_user: Test get user as owner. + """ + + def test_get_owners_with_owner_and_repo_in_query(self): + """Test get both owner and repo.""" + result = get_owners_and_repositories("repo:owner1/repo1") + self.assertEqual(result[0].get("owner"), "owner1") + self.assertEqual(result[0].get("repository"), "repo1") + + def test_get_owner_and_repositories_without_repo_in_query(self): + """Test get just owner.""" + result = get_owners_and_repositories("org:owner1") + self.assertEqual(result[0].get("owner"), "owner1") + self.assertIsNone(result[0].get("repository")) + + def test_get_owners_and_repositories_without_either_in_query(self): + """Test get neither.""" + result = get_owners_and_repositories("is:blah") + self.assertEqual(result, []) + + def test_get_owners_and_repositories_with_multiple_entries(self): + """Test get multiple entries.""" + result = get_owners_and_repositories("repo:owner1/repo1 org:owner2") + self.assertEqual(result[0].get("owner"), "owner1") + self.assertEqual(result[0].get("repository"), "repo1") + self.assertEqual(result[1].get("owner"), "owner2") + self.assertIsNone(result[1].get("repository")) + + def test_get_owners_and_repositories_with_org(self): + """Test get org as owner.""" + result = get_owners_and_repositories("org:owner1") + self.assertEqual(result[0].get("owner"), "owner1") + self.assertIsNone(result[0].get("repository")) + + def test_get_owners_and_repositories_with_user(self): + """Test get user as owner.""" + result = get_owners_and_repositories("user:owner1") + self.assertEqual(result[0].get("owner"), "owner1") + self.assertIsNone(result[0].get("repository"))