-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Split markdown files when larger than max issue body size
Signed-off-by: Zack Koppert <[email protected]>
- Loading branch information
Showing
6 changed files
with
146 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# Output files | ||
issue_metrics.md | ||
issue_metrics*.md | ||
issue_metrics.json | ||
|
||
# Byte-compiled / optimized / DLL files | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Dealing with large issue metrics markdown files | ||
|
||
When working with lots of issues/pull requests/discussion results, the resulting issue_metrics.md file can become very large. This can cause the GitHub API to return an error when trying to create an issue with the contents of the file. | ||
|
||
```shell | ||
Pull request creation failed. Validation failed: Body is too long (maximum is 65536 characters) | ||
``` | ||
|
||
To work around this limitation, the issue-metrics action detects the issue and splits the issue_metrics.md file into smaller files. So instead of issue_metrics.md, you will get issue_metrics_0.md, issue_metrics_1.md, etc. Since we don't want the action to fail, it has been designed to have the same name as usual for the first split file (issue_metrics.md) and then append a number to the name for the subsequent split files. | ||
|
||
You can choose one of the following strategies to deal with the split files: | ||
- Create multiple issues, each with using the next split file in the sequence. | ||
- Upload the full file as an artifact and link to it in the issue body. | ||
- Create an issue and put the content of the split files as issue comments. | ||
|
||
JSON output files are not split since its not anticipated that you use them as issue body content. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" Helper functions for working with markdown files. """ | ||
|
||
|
||
def markdown_too_large_for_issue_body(file_path: str, max_char_count: int) -> bool: | ||
""" | ||
Check if the markdown file is too large to fit into a github issue. | ||
Inputs: | ||
file_path: str - the path to the markdown file to check | ||
max_char_count: int - the maximum number of characters allowed in a github issue body | ||
Returns: | ||
bool - True if the file is too large, False otherwise | ||
""" | ||
with open(file_path, "r", encoding="utf-8") as file: | ||
file_contents = file.read() | ||
return len(file_contents) > max_char_count | ||
|
||
|
||
def split_markdown_file(file_path: str, max_char_count: int) -> None: | ||
""" | ||
Split the markdown file into smaller files. | ||
Inputs: | ||
file_path: str - the path to the markdown file to split | ||
max_char_count: int - the maximum number of characters allowed before splitting markdown file | ||
""" | ||
with open(file_path, "r", encoding="utf-8") as file: | ||
file_contents = file.read() | ||
contents_list = [ | ||
file_contents[i : i + max_char_count] | ||
for i in range(0, len(file_contents), max_char_count) | ||
] | ||
for i, content in enumerate(contents_list): | ||
with open(f"{file_path[:-3]}_{i}.md", "w", encoding="utf-8") as new_file: | ||
new_file.write(content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
""" Unit tests for the markdown_helpers module. """ | ||
|
||
import os | ||
import unittest | ||
|
||
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file | ||
|
||
|
||
class TestMarkdownHelpers(unittest.TestCase): | ||
""" | ||
Unit tests for the markdown_helpers module. | ||
""" | ||
|
||
def test_markdown_too_large_for_issue_body(self): | ||
""" | ||
Test the markdown_too_large_for_issue_body function. | ||
""" | ||
# Define a sample markdown file content | ||
max_char_count = 65535 | ||
markdown_content = "a\n" * max_char_count | ||
|
||
# Write the markdown content to a temporary file | ||
with open("temp.md", "w", encoding="utf-8") as f: | ||
f.write(markdown_content) | ||
|
||
# Call the function with the temporary file | ||
result = markdown_too_large_for_issue_body("temp.md", max_char_count) | ||
|
||
# remove the temporary file | ||
os.remove("temp.md") | ||
|
||
# Assert that the function returns True | ||
self.assertTrue(result) | ||
|
||
def test_split_markdown_file(self): | ||
""" | ||
Test the split_markdown_file function. | ||
""" | ||
|
||
# Define a sample markdown file content with 3 times the maximum character count | ||
multiple_of_max = 4 | ||
max_char_count = 65535 | ||
repeated_content = "a\n" | ||
markdown_content = repeated_content * int( | ||
(max_char_count * multiple_of_max) / len(repeated_content) | ||
) | ||
|
||
# Write the markdown content to a temporary file | ||
with open("temp.md", "w", encoding="utf-8") as f: | ||
f.write(markdown_content) | ||
|
||
# Call the function with the temporary file | ||
split_markdown_file("temp.md", max_char_count) | ||
|
||
# Assert that the function creates two files | ||
self.assertTrue(os.path.exists("temp_0.md")) | ||
self.assertTrue(os.path.exists("temp_1.md")) | ||
self.assertTrue(os.path.exists("temp_2.md")) | ||
self.assertTrue(os.path.exists("temp_3.md")) | ||
|
||
# Assert that the all files have less than max characters | ||
for i in range(0, multiple_of_max): | ||
with open(f"temp_{i}.md", "r", encoding="utf-8") as f: | ||
self.assertLessEqual(len(f.read()), max_char_count) | ||
|
||
# remove the temporary files | ||
os.remove("temp.md") | ||
os.remove("temp_0.md") | ||
os.remove("temp_1.md") | ||
os.remove("temp_2.md") | ||
os.remove("temp_3.md") | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |