Skip to content
This repository has been archived by the owner on Dec 2, 2024. It is now read-only.

fix: implements selenium for css scraping (#36) #84

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions leetcode-api/leetcode.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import requests
import uvicorn
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from fastapi import FastAPI, APIRouter

Expand Down Expand Up @@ -60,6 +64,18 @@ def leetcodeScrape(username: str):
# Get the user's LC URL
user.name = username

# Initialize browser options for Selenium
browser_options = ChromeOptions()

# Enables headless mode for Selenium
browser_options.add_argument("--headless=new")

# Initialize Chrome web driver using browser options
driver = Chrome(options=browser_options)

# Opens URL using the web driver
driver.get("https://leetcode.com/" + user.name)

# Get Raw HTML
try:
r = requests.get("https://leetcode.com/" + user.name)
Expand All @@ -82,12 +98,19 @@ def leetcodeScrape(username: str):
user.rank = int(raw_rank.replace(",", ""))

# Get the most recent problem, if any
raw_recent = html_doc.find("span", class_=RECENT_DIV_CLASS).get_text()
raw_recent = (
WebDriverWait(driver, 10)
.until(EC.element_to_be_clickable((By.XPATH, RECENT_DIV_CLASS)))
.text
)

# If submitted recently (LC uses the format "23 hours ago" or "1 day ago")
user.recent = "hour" in raw_recent
if user.recent:
user.recent_problem = html_doc.find("span", class_=RECENT_PROBLEM_DIV_CLASS).get_text()
user.recent_problem = driver.find_element(By.XPATH, RECENT_PROBLEM_DIV_CLASS).text

# Shutdown web driver
driver.quit()

# Prints user data
return user
Expand Down
8 changes: 6 additions & 2 deletions leetcode-api/leetcode_constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# On LC user page, div class for number of completed easy, medium, and hard
DIFF_DIV_CLASS = "mr-[5px] text-base font-medium leading-[20px] text-label-1 dark:text-dark-label-1"
RANK_DIV_CLASS = "ttext-label-1 dark:text-dark-label-1 font-medium"
RECENT_DIV_CLASS = "text-label-3 dark:text-dark-label-3 hidden whitespace-nowrap lc-md:inline"
RECENT_PROBLEM_DIV_CLASS = "text-label-1 dark:text-dark-label-1 font-medium line-clamp-1"
RECENT_DIV_CLASS = (
"//span[@class='text-label-3 dark:text-dark-label-3 lc-md:inline hidden whitespace-nowrap']"
)
RECENT_PROBLEM_DIV_CLASS = (
"//span[@class='text-label-1 dark:text-dark-label-1 line-clamp-1 font-medium']"
)
STREAK_DIV_CLASS = "font-medium text-label-2 dark:text-dark-label-2"