Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgraded deepl-cli code #267

Merged
merged 9 commits into from
Nov 30, 2024
84 changes: 76 additions & 8 deletions deepl/deepl.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
self.to_lang = to_lang
self.translated_fr_lang: str | None = None
self.translated_to_lang: str | None = None
self.max_length = 3000
self.max_length = 1500
self.timeout = timeout
self.use_dom_submit = use_dom_submit

Expand Down Expand Up @@ -133,15 +133,41 @@ async def __translate(self, script: str) -> str:
if self.use_dom_submit:
# banner prevents clicking on language buttons, close the banner first
await page.click("button[data-testid=cookie-banner-lax-close-button]")
# we also expect the Chrome extension banner to show up
try:
await page.wait_for_function(
"""
() => document.querySelector('div[data-testid="chrome-extension-toast"]')
""",
)
except PlaywrightError:
pass

# try to close the extension banner
try:
await page.evaluate(
"""
document.querySelector(
'div[data-testid="chrome-extension-toast"]',
).querySelector('button').click()
""",
)
except PlaywrightError:
pass

# select input / output language
await page.locator("button[data-testid=translator-source-lang-btn]").dispatch_event("click")
await page.locator(f"button[data-testid=translator-lang-option-{self.fr_lang}]").dispatch_event("click")
await page.get_by_test_id("translator-source-lang-list").get_by_test_id(
f"translator-lang-option-{self.fr_lang}"
).dispatch_event("click")
await page.locator("button[data-testid=translator-target-lang-btn]").dispatch_event("click")
await page.locator(f"button[data-testid=translator-lang-option-{self.to_lang}]").dispatch_event("click")
await page.get_by_test_id("translator-target-lang-list").get_by_test_id(
f"translator-lang-option-{self.to_lang}"
).dispatch_event("click")
# fill in the form of translating script
await page.fill("div[aria-labelledby=translation-source-heading]", script)

# Wait for translation to complete
# Wait for translation to complete (perhaps partially)
try:
await page.wait_for_function(
"""
Expand All @@ -153,20 +179,62 @@ async def __translate(self, script: str) -> str:
msg = f"Time limit exceeded. ({self.timeout} ms)"
raise DeepLCLIPageLoadError(msg) from e

# Get the number of lines in the translated text field
try:
line_count = await page.evaluate(
"""
document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]',
).children[0].children.length
""",
)
except PlaywrightError as e:
msg = "Unable to evaluate line count of the translation"
raise DeepLCLIPageLoadError(msg) from e

# Since the site may not output all lines at once, we wait until each line is finished
# and then add it to the list of translated lines
translated_lines = []
for line_index in range(line_count):
try:
await page.wait_for_function(
f"""
() => {
const t = document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]',
)?.children[0]?.children[{line_index}]?.innerText ?? '';
t.length > 0 && !t.startsWith('[...]')
""",
)
except PlaywrightError as e:
msg = f"Time limit exceeded for line {line_index}. ({self.timeout} ms)"
raise DeepLCLIPageLoadError(msg) from e

try:
translated_text = await page.evaluate(
f"""
document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]'
).children[0].children[{line_index}].innerText
""",
)
translated_lines.append(translated_text)
except PlaywrightError as e:
msg = f"Unable get translated text for line {line_index}"
raise DeepLCLIPageLoadError(msg) from e

# Get information
input_textbox = page.get_by_role("region", name="Source text").locator("d-textarea")
output_textbox = page.get_by_role("region", name="Translation results").locator("d-textarea")

self.translated_fr_lang = str(await input_textbox.get_attribute("lang")).split("-")[0]
self.translated_to_lang = str(await output_textbox.get_attribute("lang")).split("-")[0]

res = str((await output_textbox.all_inner_texts())[0])
# the extra \n is generated by <p> tag because every line is covered by it
res = res.replace("\n\n", "\n")
res = "".join(translated_lines)

await browser.close()

return res.rstrip("\n")
return res

def __sanitize_script(self, script: str) -> str:
"""Check command line args and stdin."""
Expand Down
Loading