-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrestaurants.py
103 lines (93 loc) · 4.22 KB
/
restaurants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import time
import csv
s = Service('./chromedriver')
o = Options()
o.headless = True
main_url = "https://restaurant.ikyu.com/"
# without headless option if detected as bot , options=o
browser = webdriver.Chrome(service=s, options=o)
urls = []
names = []
genres = []
prices = []
addresses = []
stations = []
phones = []
# 194 + 1
for iteration in range(1, 3):
url_counting = (
f"https://restaurant.ikyu.com/search?pups=2&pthm=19%3A00&xpge={iteration}&rac1=08004&pndt=1&ptaround=0&xsrt=gourmet")
browser.get(url_counting)
restaurant_elements = browser.find_elements(By.CLASS_NAME,
'cover_3Ae77')
for elem in restaurant_elements:
url = elem.get_attribute("href")
if url != None:
urls.append(url)
for restaurant in urls:
name = price = genre = address = phone = station = ''
browser.get(restaurant)
time.sleep(1)
try:
name_exists = browser.find_elements(By.CLASS_NAME,
"restaurantName_dvSu5")
if (name_exists):
name = browser.find_element(By.CLASS_NAME,
"restaurantName_dvSu5").get_attribute('innerText')
price_exists = browser.find_elements(By.XPATH,
"//*[@aria-label='05']/span[2]")
if(price_exists):
price = browser.find_element(By.XPATH,
"//*[@aria-label='05']/span[2]").get_attribute('innerText')
genre = browser.find_element(By.XPATH,
"//*[@id='__layout']/div/div[2]/main/header/div/div[1]/div[1]/div/span/span").get_attribute('innerText')
phone_exists = browser.find_elements(
By.XPATH, "//h3[contains(text(), 'お問い合わせ')]")
if (phone_exists):
phone = browser.find_element(
By.XPATH, "//h3[contains(text(), 'お問い合わせ')]/following-sibling::div").get_attribute('innerText').split('\n')[0]
station_list = browser.find_elements(
By.XPATH, "//h3[contains(text(), '最寄り駅')]")
if(station_list):
station = browser.find_element(
By.XPATH, "//h3[contains(text(), '最寄り駅')]/following-sibling::div").get_attribute('innerText').replace(
'\u3000', ' ').replace('\n', ' ')
address = browser.find_element(
By.XPATH, "//*[@aria-label='address']/following-sibling::div").get_attribute('innerText').replace(
'\u3000', ' ').replace('\n', ' ')
else:
station_list = browser.find_elements(
By.XPATH, "//h3[contains(text(), '最寄り駅')]")
if(station_list):
station = browser.find_element(
By.XPATH, "//h3[contains(text(), '最寄り駅')]/following-sibling::div").get_attribute('innerText').replace(
'\u3000', ' ').replace('\n', ' ')
address = browser.find_element(
By.XPATH, "//*[@aria-label='address']/following-sibling::div").get_attribute('innerText').replace(
'\u3000', ' ').replace('\n', ' ')
except NoSuchElementException as e:
print("Exception:", e)
continue
finally:
names.append(name)
prices.append(price)
genres.append(genre)
addresses.append(address)
phones.append(phone)
stations.append(station)
combined_lists = [list(a) for a in zip(
names, urls, prices, genres, addresses, phones, stations)]
with open('out/restaurants_hiroshima_rankings_01-23.csv', 'w', newline='') as csvfile:
my_writer = csv.writer(csvfile, delimiter=',')
headers = ['Venue Name', 'Venue URL', 'Price Range',
'Genre', 'Address', 'Phone Number', 'Nearest Station']
my_writer.writerow(i for i in headers)
for j in combined_lists:
my_writer.writerow(j)
browser.quit()