Fix the bug in the long code message segmentation format merging algo…

…rithm.
yym68686 · Jul 7, 2024 · 6874314 · 6874314
1 parent 0fe8b9b
commit 6874314
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 2 deletions.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="md2tgmd",
-    version="0.2.5",
+    version="0.2.6",
     description="md2tgmd is a Markdown to Telegram-specific-markdown converter.",
     long_description=Path("README.md").open(encoding="utf-8").read(),
     long_description_content_type="text/markdown",

diff --git a/src/md2tgmd.py b/src/md2tgmd.py
@@ -62,7 +62,7 @@ def split_code(text):
         message_index = 1
         while message_index < conversation_len:
             if split_str_list[message_index].startswith('    '):
-                split_str_list[message_index - 1] += split_str_list[message_index]
+                split_str_list[message_index - 1] += "\n\n" + split_str_list[message_index]
                 split_str_list.pop(message_index)
                 conversation_len = conversation_len - 1
             else:

diff --git a/test/test_long_text.py b/test/test_long_text.py
@@ -0,0 +1,50 @@
+
+a = '''
+
+'''
+
+print(len(a))  # 1911
+
+# def split_code(text):
+#     import re
+#     split_list = []
+#     if len(text) > 2000:
+#         split_str_list = text.split('\n\n')
+
+#         conversation_len = len(split_str_list)
+#         message_index = 1
+#         while message_index < conversation_len:
+#             if split_str_list[message_index].startswith('    '):
+#                 split_str_list[message_index - 1] += split_str_list[message_index + 1]
+#                 split_str_list.pop(message_index)
+#                 conversation_len = conversation_len - 1
+#             else:
+#                 message_index = message_index + 1
+
+#         split_index = 0
+#         for index, _ in enumerate(split_str_list):
+#             if len("".join(split_str_list[:index])) < len(text) // 2:
+#                 split_index += 1
+#                 continue
+#             else:
+#                 break
+#         str1 = '\n\n'.join(split_str_list[:split_index])
+#         str1 = str1 + "\n```"
+#         split_list.append(str1)
+#         code_type = text.split('\n')[0]
+#         str2 = '\n\n'.join(split_str_list[split_index:])
+#         str2 = code_type + "\n" + str2
+#         split_list.append(str2)
+#     else:
+#         split_list.append(text)
+#     split_list = "\n@|@|@|@\n\n".join(split_list)
+#     return split_list
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from src.md2tgmd import replace_all, split_code
+text = replace_all(a, r"(```[\D\d\s]+?```)", split_code)
+print(text)
+# for i in split_code(a):
+#     print(i)