Skip to content

Commit

Permalink
Optimizing Long Text Segmentation Algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
yym68686 committed Jul 19, 2024
1 parent 798634e commit 0429bed
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 38 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="md2tgmd",
version="0.2.9",
version="0.2.10",
description="md2tgmd is a Markdown to Telegram-specific-markdown converter.",
long_description=Path("README.md").open(encoding="utf-8").read(),
long_description_content_type="text/markdown",
Expand Down
2 changes: 1 addition & 1 deletion src/md2tgmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def dedent_space(text):

def split_code(text):
split_list = []
if len(text) > 2500:
if len(text) > 2300:
split_str_list = text.split('\n\n')

conversation_len = len(split_str_list)
Expand Down
45 changes: 9 additions & 36 deletions test/test_long_text.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,18 @@

b = '''
```
fvewfv
```
nknknlnkj
```
kjbkjbk
'''
print(b.split('```'))
a = '''
'''

print(len(a)) # 1911

# def split_code(text):
# import re
# split_list = []
# if len(text) > 2000:
# split_str_list = text.split('\n\n')

# conversation_len = len(split_str_list)
# message_index = 1
# while message_index < conversation_len:
# if split_str_list[message_index].startswith(' '):
# split_str_list[message_index - 1] += split_str_list[message_index + 1]
# split_str_list.pop(message_index)
# conversation_len = conversation_len - 1
# else:
# message_index = message_index + 1

# split_index = 0
# for index, _ in enumerate(split_str_list):
# if len("".join(split_str_list[:index])) < len(text) // 2:
# split_index += 1
# continue
# else:
# break
# str1 = '\n\n'.join(split_str_list[:split_index])
# str1 = str1 + "\n```"
# split_list.append(str1)
# code_type = text.split('\n')[0]
# str2 = '\n\n'.join(split_str_list[split_index:])
# str2 = code_type + "\n" + str2
# split_list.append(str2)
# else:
# split_list.append(text)
# split_list = "\n@|@|@|@\n\n".join(split_list)
# return split_list

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down

0 comments on commit 0429bed

Please sign in to comment.