Skip to content

Commit

Permalink
Upgraded deepl-cli code (#267)
Browse files Browse the repository at this point in the history
* Upgraded deepl-cli code

* Forgot to enable headless mode

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Forgot to remove print

* The max limit has changed to 1500

* Update deepl/deepl.py

* Update deepl/deepl.py

* Update deepl/deepl.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: haruna <[email protected]>
  • Loading branch information
3 people authored Nov 30, 2024
1 parent c1d974c commit 1d621be
Showing 1 changed file with 76 additions and 8 deletions.
84 changes: 76 additions & 8 deletions deepl/deepl.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
self.to_lang = to_lang
self.translated_fr_lang: str | None = None
self.translated_to_lang: str | None = None
self.max_length = 3000
self.max_length = 1500
self.timeout = timeout
self.use_dom_submit = use_dom_submit

Expand Down Expand Up @@ -133,15 +133,41 @@ async def __translate(self, script: str) -> str:
if self.use_dom_submit:
# banner prevents clicking on language buttons, close the banner first
await page.click("button[data-testid=cookie-banner-lax-close-button]")
# we also expect the Chrome extension banner to show up
try:
await page.wait_for_function(
"""
() => document.querySelector('div[data-testid="chrome-extension-toast"]')
""",
)
except PlaywrightError:
pass

# try to close the extension banner
try:
await page.evaluate(
"""
document.querySelector(
'div[data-testid="chrome-extension-toast"]',
).querySelector('button').click()
""",
)
except PlaywrightError:
pass

# select input / output language
await page.locator("button[data-testid=translator-source-lang-btn]").dispatch_event("click")
await page.locator(f"button[data-testid=translator-lang-option-{self.fr_lang}]").dispatch_event("click")
await page.get_by_test_id("translator-source-lang-list").get_by_test_id(
f"translator-lang-option-{self.fr_lang}"
).dispatch_event("click")
await page.locator("button[data-testid=translator-target-lang-btn]").dispatch_event("click")
await page.locator(f"button[data-testid=translator-lang-option-{self.to_lang}]").dispatch_event("click")
await page.get_by_test_id("translator-target-lang-list").get_by_test_id(
f"translator-lang-option-{self.to_lang}"
).dispatch_event("click")
# fill in the form of translating script
await page.fill("div[aria-labelledby=translation-source-heading]", script)

# Wait for translation to complete
# Wait for translation to complete (perhaps partially)
try:
await page.wait_for_function(
"""
Expand All @@ -153,20 +179,62 @@ async def __translate(self, script: str) -> str:
msg = f"Time limit exceeded. ({self.timeout} ms)"
raise DeepLCLIPageLoadError(msg) from e

# Get the number of lines in the translated text field
try:
line_count = await page.evaluate(
"""
document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]',
).children[0].children.length
""",
)
except PlaywrightError as e:
msg = "Unable to evaluate line count of the translation"
raise DeepLCLIPageLoadError(msg) from e

# Since the site may not output all lines at once, we wait until each line is finished
# and then add it to the list of translated lines
translated_lines = []
for line_index in range(line_count):
try:
await page.wait_for_function(
f"""
() => {
const t = document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]',
)?.children[0]?.children[{line_index}]?.innerText ?? '';
t.length > 0 && !t.startsWith('[...]')
""",
)
except PlaywrightError as e:
msg = f"Time limit exceeded for line {line_index}. ({self.timeout} ms)"
raise DeepLCLIPageLoadError(msg) from e
try:
translated_text = await page.evaluate(
f"""
document.querySelector(
'd-textarea[aria-labelledby=translation-target-heading]'
).children[0].children[{line_index}].innerText
""",
)
translated_lines.append(translated_text)
except PlaywrightError as e:
msg = f"Unable get translated text for line {line_index}"
raise DeepLCLIPageLoadError(msg) from e
# Get information
input_textbox = page.get_by_role("region", name="Source text").locator("d-textarea")
output_textbox = page.get_by_role("region", name="Translation results").locator("d-textarea")
self.translated_fr_lang = str(await input_textbox.get_attribute("lang")).split("-")[0]
self.translated_to_lang = str(await output_textbox.get_attribute("lang")).split("-")[0]
res = str((await output_textbox.all_inner_texts())[0])
# the extra \n is generated by <p> tag because every line is covered by it
res = res.replace("\n\n", "\n")
res = "".join(translated_lines)
await browser.close()
return res.rstrip("\n")
return res
def __sanitize_script(self, script: str) -> str:
"""Check command line args and stdin."""
Expand Down

0 comments on commit 1d621be

Please sign in to comment.