Skip to content

Commit

Permalink
feat: Split markdown files when larger than max issue body size
Browse files Browse the repository at this point in the history
Signed-off-by: Zack Koppert <[email protected]>
  • Loading branch information
zkoppert committed May 1, 2024
1 parent d40d483 commit a26168d
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Output files
issue_metrics.md
issue_metrics*.md
issue_metrics.json

# Byte-compiled / optimized / DLL files
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ This action can be configured to authenticate with GitHub App Installation or Pe
- [Configuring the `SEARCH_QUERY`](./docs/search-query.md)
- [Local usage without Docker](./docs/local-usage-without-docker.md)
- [Authenticating with GitHub App Installation](./docs/authenticating-with-github-app-installation.md)
- [Dealing with large issue_metrics.md files](./docs/dealing-with-large-issue-metrics.md)

## Contributions

Expand Down
16 changes: 16 additions & 0 deletions docs/dealing-with-large-issue-metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Dealing with large issue metrics markdown files

When working with lots of issues/pull requests/discussion results, the resulting issue_metrics.md file can become very large. This can cause the GitHub API to return an error when trying to create an issue with the contents of the file.

```shell
Pull request creation failed. Validation failed: Body is too long (maximum is 65536 characters)
```

To work around this limitation, the issue-metrics action detects the issue and splits the issue_metrics.md file into smaller files. So instead of issue_metrics.md, you will get issue_metrics_0.md, issue_metrics_1.md, etc. Since we don't want the action to fail, it has been designed to have the same name as usual for the first split file (issue_metrics.md) and then append a number to the name for the subsequent split files.

You can choose one of the following strategies to deal with the split files:
- Create multiple issues, each with using the next split file in the sequence.
- Upload the full file as an artifact and link to it in the issue body.
- Create an issue and put the content of the split files as issue comments.

JSON output files are not split since its not anticipated that you use them as issue body content.
15 changes: 15 additions & 0 deletions issue_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
main(): Run the issue-metrics script.
"""

import shutil
import sys
from typing import List, Union

Expand All @@ -30,6 +31,7 @@
from discussions import get_discussions
from json_writer import write_to_json
from labels import get_label_metrics, get_stats_time_in_labels
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
from markdown_writer import write_to_markdown
from most_active_mentors import count_comments_per_user, get_mentor_count
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
Expand Down Expand Up @@ -364,6 +366,7 @@ def main():
num_mentor_count,
search_query,
)

write_to_markdown(
issues_with_metrics,
stats_time_to_first_response,
Expand All @@ -377,6 +380,18 @@ def main():
search_query,
)

max_char_count = 65535
if markdown_too_large_for_issue_body("issue_metrics.md", max_char_count):
split_markdown_file("issue_metrics.md", max_char_count)
shutil.move("issue_metrics.md", "issue_metrics_full.md")
shutil.move("issue_metrics_0.md", "issue_metrics.md")
print(
"Issue metrics markdown file is too large for GitHub issue body and has been \
split into multiple files. ie. issue_metrics.md, issue_metrics_1.md, etc. \
The full file is saved as issue_metrics_full.md\n\
See https://github.com/github/issue-metrics/blob/main/docs/dealing-with-large-issue-metrics.md"
)


if __name__ == "__main__":
main()
38 changes: 38 additions & 0 deletions markdown_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
""" Helper functions for working with markdown files. """


def markdown_too_large_for_issue_body(file_path: str, max_char_count: int) -> bool:
"""
Check if the markdown file is too large to fit into a github issue.
Inputs:
file_path: str - the path to the markdown file to check
max_char_count: int - the maximum number of characters allowed in a github issue body
Returns:
bool - True if the file is too large, False otherwise
"""
with open(file_path, "r", encoding="utf-8") as file:
file_contents = file.read()
return len(file_contents) > max_char_count


def split_markdown_file(file_path: str, max_char_count: int) -> None:
"""
Split the markdown file into smaller files.
Inputs:
file_path: str - the path to the markdown file to split
max_char_count: int - the maximum number of characters allowed before splitting markdown file
"""
with open(file_path, "r", encoding="utf-8") as file:
file_contents = file.read()
contents_list = [
file_contents[i : i + max_char_count]
for i in range(0, len(file_contents), max_char_count)
]
for i, content in enumerate(contents_list):
with open(f"{file_path[:-3]}_{i}.md", "w", encoding="utf-8") as new_file:
new_file.write(content)
75 changes: 75 additions & 0 deletions test_markdown_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
""" Unit tests for the markdown_helpers module. """

import os
import unittest

from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file


class TestMarkdownHelpers(unittest.TestCase):
"""
Unit tests for the markdown_helpers module.
"""

def test_markdown_too_large_for_issue_body(self):
"""
Test the markdown_too_large_for_issue_body function.
"""
# Define a sample markdown file content
max_char_count = 65535
markdown_content = "a\n" * max_char_count

# Write the markdown content to a temporary file
with open("temp.md", "w", encoding="utf-8") as f:
f.write(markdown_content)

# Call the function with the temporary file
result = markdown_too_large_for_issue_body("temp.md", max_char_count)

# remove the temporary file
os.remove("temp.md")

# Assert that the function returns True
self.assertTrue(result)

def test_split_markdown_file(self):
"""
Test the split_markdown_file function.
"""

# Define a sample markdown file content with 3 times the maximum character count
multiple_of_max = 4
max_char_count = 65535
repeated_content = "a\n"
markdown_content = repeated_content * int(
(max_char_count * multiple_of_max) / len(repeated_content)
)

# Write the markdown content to a temporary file
with open("temp.md", "w", encoding="utf-8") as f:
f.write(markdown_content)

# Call the function with the temporary file
split_markdown_file("temp.md", max_char_count)

# Assert that the function creates two files
self.assertTrue(os.path.exists("temp_0.md"))
self.assertTrue(os.path.exists("temp_1.md"))
self.assertTrue(os.path.exists("temp_2.md"))
self.assertTrue(os.path.exists("temp_3.md"))

# Assert that the all files have less than max characters
for i in range(0, multiple_of_max):
with open(f"temp_{i}.md", "r", encoding="utf-8") as f:
self.assertLessEqual(len(f.read()), max_char_count)

# remove the temporary files
os.remove("temp.md")
os.remove("temp_0.md")
os.remove("temp_1.md")
os.remove("temp_2.md")
os.remove("temp_3.md")


if __name__ == "__main__":
unittest.main()

0 comments on commit a26168d

Please sign in to comment.