Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Move search functions and tests to their own files #407

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 1 addition & 131 deletions issue_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
Functions:
get_env_vars() -> EnvVars: Get the environment variables for use
in the script.
search_issues(search_query: str, github_connection: github3.GitHub, owners_and_repositories: List[dict])
-> github3.structs.SearchIterator:
Searches for issues in a GitHub repository that match the given search query.
get_per_issue_metrics(issues: Union[List[dict], List[github3.issues.Issue]],
discussions: bool = False), labels: Union[List[str], None] = None,
ignore_users: List[str] = [] -> tuple[List, int, int]:
Expand All @@ -21,8 +18,6 @@
"""

import shutil
import sys
from time import sleep
from typing import List, Union

import github3
Expand All @@ -36,6 +31,7 @@
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
from markdown_writer import write_to_markdown
from most_active_mentors import count_comments_per_user, get_mentor_count
from search import get_owners_and_repositories, search_issues
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
from time_to_close import get_stats_time_to_close, measure_time_to_close
from time_to_first_response import (
Expand All @@ -46,101 +42,6 @@
from time_to_ready_for_review import get_time_to_ready_for_review


def search_issues(
search_query: str,
github_connection: github3.GitHub,
owners_and_repositories: List[dict],
rate_limit_bypass: bool = False,
) -> List[github3.search.IssueSearchResult]: # type: ignore
"""
Searches for issues/prs/discussions in a GitHub repository that match
the given search query and handles errors related to GitHub API responses.

Args:
search_query (str): The search query to use for finding issues/prs/discussions.
github_connection (github3.GitHub): A connection to the GitHub API.
owners_and_repositories (List[dict]): A list of dictionaries containing
the owner and repository names.

Returns:
List[github3.search.IssueSearchResult]: A list of issues that match the search query.
"""

# Rate Limit Handling: API only allows 30 requests per minute
def wait_for_api_refresh(
iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False
):
# If the rate limit bypass is enabled, don't wait for the API to refresh
if rate_limit_bypass:
return

max_retries = 5
retry_count = 0
sleep_time = 70

while iterator.ratelimit_remaining < 5:
if retry_count >= max_retries:
raise RuntimeError("Exceeded maximum retries for API rate limit")

print(
f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh."
)
sleep(sleep_time)

# Exponentially increase the sleep time for the next retry
sleep_time *= 2
retry_count += 1

issues_per_page = 100

print("Searching for issues...")
issues_iterator = github_connection.search_issues(
search_query, per_page=issues_per_page
)
wait_for_api_refresh(issues_iterator, rate_limit_bypass)

issues = []
repos_and_owners_string = ""
for item in owners_and_repositories:
repos_and_owners_string += (
f"{item.get('owner', '')}/{item.get('repository', '')} "
)

# Print the issue titles
try:
for idx, issue in enumerate(issues_iterator, 1):
print(issue.title) # type: ignore
issues.append(issue)

# requests are sent once per page of issues
if idx % issues_per_page == 0:
wait_for_api_refresh(issues_iterator, rate_limit_bypass)

except github3.exceptions.ForbiddenError:
print(
f"You do not have permission to view a repository from: '{repos_and_owners_string}'; Check your API Token."
)
sys.exit(1)
except github3.exceptions.NotFoundError:
print(
f"The repository could not be found; Check the repository owner and names: '{repos_and_owners_string}"
)
sys.exit(1)
except github3.exceptions.ConnectionError:
print(
"There was a connection error; Check your internet connection or API Token."
)
sys.exit(1)
except github3.exceptions.AuthenticationFailed:
print("Authentication failed; Check your API Token.")
sys.exit(1)
except github3.exceptions.UnprocessableEntity:
print("The search query is invalid; Check the search query.")
sys.exit(1)

return issues


def get_per_issue_metrics(
issues: Union[List[dict], List[github3.search.IssueSearchResult]], # type: ignore
env_vars: EnvVars,
Expand Down Expand Up @@ -264,37 +165,6 @@ def get_per_issue_metrics(
return issues_with_metrics, num_issues_open, num_issues_closed


def get_owners_and_repositories(
search_query: str,
) -> List[dict]:
"""Get the owners and repositories from the search query.

Args:
search_query (str): The search query used to search for issues.

Returns:
List[dict]: A list of dictionaries of owners and repositories.

"""
search_query_split = search_query.split(" ")
results_list = []
for item in search_query_split:
result = {}
if "repo:" in item and "/" in item:
result["owner"] = item.split(":")[1].split("/")[0]
result["repository"] = item.split(":")[1].split("/")[1]
if "org:" in item or "owner:" in item or "user:" in item:
result["owner"] = item.split(":")[1]
if "user:" in item:
result["owner"] = item.split(":")[1]
if "owner:" in item:
result["owner"] = item.split(":")[1]
if result:
results_list.append(result)

return results_list


def main(): # pragma: no cover
"""Run the issue-metrics script.

Expand Down
138 changes: 138 additions & 0 deletions search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
""" A module to search for issues in a GitHub repository."""

import sys
from time import sleep
from typing import List

import github3
import github3.structs


def search_issues(
search_query: str,
github_connection: github3.GitHub,
owners_and_repositories: List[dict],
rate_limit_bypass: bool = False,
) -> List[github3.search.IssueSearchResult]: # type: ignore
"""
Searches for issues/prs/discussions in a GitHub repository that match
the given search query and handles errors related to GitHub API responses.

Args:
search_query (str): The search query to use for finding issues/prs/discussions.
github_connection (github3.GitHub): A connection to the GitHub API.
owners_and_repositories (List[dict]): A list of dictionaries containing
the owner and repository names.
rate_limit_bypass (bool, optional): A flag to bypass the rate limit to be used
when working with GitHub server that has rate limiting turned off. Defaults to False.

Returns:
List[github3.search.IssueSearchResult]: A list of issues that match the search query.
"""

# Rate Limit Handling: API only allows 30 requests per minute
def wait_for_api_refresh(
iterator: github3.structs.SearchIterator, rate_limit_bypass: bool = False
):
# If the rate limit bypass is enabled, don't wait for the API to refresh
if rate_limit_bypass:
return

max_retries = 5
retry_count = 0
sleep_time = 70

while iterator.ratelimit_remaining < 5:
if retry_count >= max_retries:
raise RuntimeError("Exceeded maximum retries for API rate limit")

print(
f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh."
)
sleep(sleep_time)

# Exponentially increase the sleep time for the next retry
sleep_time *= 2
retry_count += 1

issues_per_page = 100

print("Searching for issues...")
issues_iterator = github_connection.search_issues(
search_query, per_page=issues_per_page
)
wait_for_api_refresh(issues_iterator, rate_limit_bypass)

issues = []
repos_and_owners_string = ""
for item in owners_and_repositories:
repos_and_owners_string += (
f"{item.get('owner', '')}/{item.get('repository', '')} "
)

# Print the issue titles and add them to the list of issues
try:
for idx, issue in enumerate(issues_iterator, 1):
print(issue.title) # type: ignore
issues.append(issue)

# requests are sent once per page of issues
if idx % issues_per_page == 0:
wait_for_api_refresh(issues_iterator, rate_limit_bypass)

except github3.exceptions.ForbiddenError:
print(
f"You do not have permission to view a repository \
from: '{repos_and_owners_string}'; Check your API Token."
)
sys.exit(1)
except github3.exceptions.NotFoundError:
print(
f"The repository could not be found; \
Check the repository owner and names: '{repos_and_owners_string}"
)
sys.exit(1)
except github3.exceptions.ConnectionError:
print(
"There was a connection error; Check your internet connection or API Token."
)
sys.exit(1)
except github3.exceptions.AuthenticationFailed:
print("Authentication failed; Check your API Token.")
sys.exit(1)
except github3.exceptions.UnprocessableEntity:
print("The search query is invalid; Check the search query.")
sys.exit(1)

return issues


def get_owners_and_repositories(
search_query: str,
) -> List[dict]:
"""Get the owners and repositories from the search query.

Args:
search_query (str): The search query used to search for issues.

Returns:
List[dict]: A list of dictionaries of owners and repositories.

"""
search_query_split = search_query.split(" ")
results_list = []
for item in search_query_split:
result = {}
if "repo:" in item and "/" in item:
result["owner"] = item.split(":")[1].split("/")[0]
result["repository"] = item.split(":")[1].split("/")[1]
if "org:" in item or "owner:" in item or "user:" in item:
result["owner"] = item.split(":")[1]
if "user:" in item:
result["owner"] = item.split(":")[1]
if "owner:" in item:
result["owner"] = item.split(":")[1]
if result:
results_list.append(result)

return results_list
Loading