diff --git a/.github/linters/.flake8 b/.github/linters/.flake8 index 1aaf829..de0917a 100644 --- a/.github/linters/.flake8 +++ b/.github/linters/.flake8 @@ -1,5 +1,5 @@ [flake8] exclude = venv,.venv,.git,__pycache__ -extend-ignore = C901 +extend-ignore = C901, E203 max-line-length = 150 statistics = True diff --git a/.gitignore b/.gitignore index 5aab725..4f454b1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # Output files -issue_metrics.md +issue_metrics*.md issue_metrics.json # Byte-compiled / optimized / DLL files diff --git a/README.md b/README.md index 801ea85..c6b7ea9 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,7 @@ This action can be configured to authenticate with GitHub App Installation or Pe - [Configuring the `SEARCH_QUERY`](./docs/search-query.md) - [Local usage without Docker](./docs/local-usage-without-docker.md) - [Authenticating with GitHub App Installation](./docs/authenticating-with-github-app-installation.md) +- [Dealing with large issue_metrics.md files](./docs/dealing-with-large-issue-metrics.md) ## Contributions diff --git a/docs/dealing-with-large-issue-metrics.md b/docs/dealing-with-large-issue-metrics.md new file mode 100644 index 0000000..c2247f6 --- /dev/null +++ b/docs/dealing-with-large-issue-metrics.md @@ -0,0 +1,17 @@ +# Dealing with large issue metrics Markdown files + +When working with lots of issues/pull requests/discussion results, the resulting issue_metrics.md file can become very large. This can cause the GitHub API to return an error when trying to create an issue with the contents of the file. + +```shell +Pull request creation failed. Validation failed: Body is too long (maximum is 65536 characters) +``` + +To work around this limitation, the issue-metrics action detects the large file size and splits the issue_metrics.md file into smaller files. So instead of issue_metrics.md, you will get issue_metrics_0.md, issue_metrics_1.md, etc. +Since we don't want the action to fail, it has been designed to have the same name as usual for the first split file (issue_metrics.md) and then append a number to the name for the subsequent split files. + +You can choose one of the following strategies to deal with the split files: +- Create multiple issues, each with using the next split file in the sequence. +- Upload the full file as an artifact and link to it in the issue body. +- Create an issue and put the content of the split files as issue comments. + +JSON output files are not split since its not anticipated that you use them as issue body content. diff --git a/issue_metrics.py b/issue_metrics.py index ef79e51..ec85210 100644 --- a/issue_metrics.py +++ b/issue_metrics.py @@ -20,6 +20,7 @@ main(): Run the issue-metrics script. """ +import shutil import sys from typing import List, Union @@ -30,6 +31,7 @@ from discussions import get_discussions from json_writer import write_to_json from labels import get_label_metrics, get_stats_time_in_labels +from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file from markdown_writer import write_to_markdown from most_active_mentors import count_comments_per_user, get_mentor_count from time_to_answer import get_stats_time_to_answer, measure_time_to_answer @@ -364,6 +366,7 @@ def main(): num_mentor_count, search_query, ) + write_to_markdown( issues_with_metrics, stats_time_to_first_response, @@ -377,6 +380,18 @@ def main(): search_query, ) + max_char_count = 65535 + if markdown_too_large_for_issue_body("issue_metrics.md", max_char_count): + split_markdown_file("issue_metrics.md", max_char_count) + shutil.move("issue_metrics.md", "issue_metrics_full.md") + shutil.move("issue_metrics_0.md", "issue_metrics.md") + print( + "Issue metrics markdown file is too large for GitHub issue body and has been \ + split into multiple files. ie. issue_metrics.md, issue_metrics_1.md, etc. \ + The full file is saved as issue_metrics_full.md\n\ + See https://github.com/github/issue-metrics/blob/main/docs/dealing-with-large-issue-metrics.md" + ) + if __name__ == "__main__": main() diff --git a/markdown_helpers.py b/markdown_helpers.py new file mode 100644 index 0000000..8faf39a --- /dev/null +++ b/markdown_helpers.py @@ -0,0 +1,38 @@ +""" Helper functions for working with markdown files. """ + + +def markdown_too_large_for_issue_body(file_path: str, max_char_count: int) -> bool: + """ + Check if the markdown file is too large to fit into a github issue. + + Inputs: + file_path: str - the path to the markdown file to check + max_char_count: int - the maximum number of characters allowed in a github issue body + + Returns: + bool - True if the file is too large, False otherwise + + """ + with open(file_path, "r", encoding="utf-8") as file: + file_contents = file.read() + return len(file_contents) > max_char_count + + +def split_markdown_file(file_path: str, max_char_count: int) -> None: + """ + Split the markdown file into smaller files. + + Inputs: + file_path: str - the path to the markdown file to split + max_char_count: int - the maximum number of characters allowed before splitting markdown file + + """ + with open(file_path, "r", encoding="utf-8") as file: + file_contents = file.read() + contents_list = [ + file_contents[i : i + max_char_count] + for i in range(0, len(file_contents), max_char_count) + ] + for i, content in enumerate(contents_list): + with open(f"{file_path[:-3]}_{i}.md", "w", encoding="utf-8") as new_file: + new_file.write(content) diff --git a/test_markdown_helpers.py b/test_markdown_helpers.py new file mode 100644 index 0000000..d856a62 --- /dev/null +++ b/test_markdown_helpers.py @@ -0,0 +1,75 @@ +""" Unit tests for the markdown_helpers module. """ + +import os +import unittest + +from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file + + +class TestMarkdownHelpers(unittest.TestCase): + """ + Unit tests for the markdown_helpers module. + """ + + def test_markdown_too_large_for_issue_body(self): + """ + Test the markdown_too_large_for_issue_body function. + """ + # Define a sample markdown file content + max_char_count = 65535 + markdown_content = "a\n" * max_char_count + + # Write the markdown content to a temporary file + with open("temp.md", "w", encoding="utf-8") as f: + f.write(markdown_content) + + # Call the function with the temporary file + result = markdown_too_large_for_issue_body("temp.md", max_char_count) + + # remove the temporary file + os.remove("temp.md") + + # Assert that the function returns True + self.assertTrue(result) + + def test_split_markdown_file(self): + """ + Test the split_markdown_file function. + """ + + # Define a sample markdown file content with 4 times the maximum character count + multiple_of_max = 4 + max_char_count = 65535 + repeated_content = "a\n" + markdown_content = repeated_content * int( + (max_char_count * multiple_of_max) / len(repeated_content) + ) + + # Write the markdown content to a temporary file + with open("temp.md", "w", encoding="utf-8") as f: + f.write(markdown_content) + + # Call the function with the temporary file + split_markdown_file("temp.md", max_char_count) + + # Assert that the function creates two files + self.assertTrue(os.path.exists("temp_0.md")) + self.assertTrue(os.path.exists("temp_1.md")) + self.assertTrue(os.path.exists("temp_2.md")) + self.assertTrue(os.path.exists("temp_3.md")) + + # Assert that the all files have less than max characters + for i in range(0, multiple_of_max): + with open(f"temp_{i}.md", "r", encoding="utf-8") as f: + self.assertLessEqual(len(f.read()), max_char_count) + + # remove the temporary files + os.remove("temp.md") + os.remove("temp_0.md") + os.remove("temp_1.md") + os.remove("temp_2.md") + os.remove("temp_3.md") + + +if __name__ == "__main__": + unittest.main()