DEVELOPER_KEY = os.getenv('DEVELOPER_KEY')
YOUTUBE_API_SERVICE_NAME='youtube'
YOUTUBE_API_VERSION='v3'
self.youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
response_channel = self.youtube.search().list(
channelId="UCyn-K7rZLXjGl7VXGweIlcA", # ์ฑ๋ ๊ฒ์
part="snippet",
maxResults=50,
).execute()
channel_id = response_channel['items'][0]['snippet']['channelId']
video_list = []
playlists = self.youtube.playlists().list(
channelId=channel_id,
part='snippet',
maxResults=20
).execute()
# * ๋ด๊ฐ ํ์ํ ์ฟ ํน๋ก๊ทธ ๋ฐ์ดํฐ๋ [10] ์ ์์นํด ์๋ค.
cooking_log_ids = playlists['items'][10]['id']
request = self.youtube.playlistItems().list(
part="snippet,contentDetails",
playlistId=cooking_log_ids,
maxResults=50
)
next_page = True
while next_page:
response = request.execute()
data = response['items']
for video in data:
video_id = video['contentDetails']['videoId']
# * video_id ๋ฅผ ์ค๋ณต์์ด video_list ์ ์ฝ์
ํ๋ค.
if video_id not in video_list:
video_list.append(video_id)
# Do we have more pages?
if 'nextPageToken' in response.keys():
next_page = True
request = self.youtube.playlistItems().list(
part="snippet,contentDetails",
playlistId=cooking_log_ids,
pageToken=response['nextPageToken'], # * google-api-python-client ์์ ์ง์ํ๋ nextPageToken
maxResults=50
)
else:
next_page = False # * 'nextPageToken' in response.keys() ๊ฐ ์๋ค๋ฉด End
stats_list = []
# * 0~50 ๊น์ง์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์จ๋ค. 323๊ฐ์ ๋ฐ์ดํฐ ๊น์ง 6๋ฒ ๋ฐ๋ณต
for i in range(0, len(video_list), 50):
response_videos = self.youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_list[i:i+50]
).execute()
# ! ๋ฐฑ์ข
์
for video in response_videos['items']:
url_pk=video['id']
channel_id=video['snippet']['channelId'] # 'UCyn-K7rZLXjGl7VXGweIlcA'
title=video['snippet']['title']
description=video['snippet']['description']
thumbnails=video['snippet']['thumbnails']['high']['url']
view_count=video['statistics'].get('viewCount',0)
like_count=video['statistics'].get('likeCount',0)
published=video['snippet']['publishedAt']
play_time=video['contentDetails']['duration'].strip('PT, S').replace('M', ':')
stats_dict=dict(url_pk=url_pk, channel_id=channel_id, title=title, description=description, thumbnails=thumbnails, published=published, play_time=play_time, view_count=view_count, like_count=like_count)
stats_list.append(stats_dict)
df=pd.DataFrame(stats_list)
df.to_csv("/home/ubuntu/code/cancook-backend/csv/๋ฐฑ์ข
์_์ฟ ํน๋ก๊ทธ.csv", index=False)
# df.to_csv("/Users/cslee/vscode/self-dining-backend/csv/๋ฐฑ์ข
์_์ฟ ํน๋ก๊ทธ.csv", index=False)
obj_list = [YouTube(**data) for data in stats_list] # YouTube(**data) YouTube Object = ORM
try:
YouTube.objects.bulk_create(objs=obj_list)
count = YouTube.objects.filter(channel_id='UCyn-K7rZLXjGl7VXGweIlcA').count()
client.chat_postMessage(
channel="youtube",
text=f"๋ฐฑ์ข
์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์ค๋๋ฐ ์ฑ๊ณตํ์ต๋๋ค.\nํ์ ๋ฐ์ดํฐ: {count} :tada: "
)
except IntegrityError:
client.chat_postMessage(
channel="youtube",
text=f"๋ฐฑ์ข
์ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์ค๋๋ฐ ์คํจํ์ต๋๋ค.\nํ์ ๋ฐ์ดํฐ: {count} :red_circle: "
)
ingredient_dict = {}
ingredient_list = []
target_description = None
if channel_name == '๋ฐฑ์ข
์':
objs = YouTube.objects.filter(channel_id="UCyn-K7rZLXjGl7VXGweIlcA").values_list('url_pk', 'description')
elif channel_name == '์์ทจ์๋ฆฌ์ ':
objs = YouTube.objects.filter(channel_id="UCC9pQY_uaBSa0WOpMNJHbEQ").values_list('url_pk', 'description')
for obj in objs:
description = obj[1].split("\n")
url_pk = obj[0]
for ingredient in description:
if '[ ์ฌ๋ฃ ]' in ingredient or '[์ฌ๋ฃ]' in ingredient:
target_description = ingredient
if target_description:
index = description.index(target_description)
for i in range(index+1, len(description)):
#! description ์ ๋ด๋ถ ๋ก์ง์ ๋ค๋ฉด์ ํน์ ๋ฌธ์์ด์ด ๋์จ๋ค๋ฉด break ํด์ ์ฌ๋ฃ ๋ฐ์ดํฐ๋ง ์ถ์ถ
if description[i] == '':
break
if description[i].startswith('['):
break
if description[i].startswith('[ ๋ง๋๋ ๋ฒ ]'):
break
if description[i].startswith('[๋ง๋๋ ๋ฒ]'):
break
if description[i].startswith('*'):
break
if description[i] == '*':
break
#! ์์์ ์ฌ๋ฃ ๋ฐ์ดํฐ๋ฅผ 1์ฐจ ํํฐ๋ง์ ํ๋ค๋ฉด, ์ฌ๋ฃ ๋ฐ์ดํฐ์ ๋ถํ์ํ ์ฌ๋ฃ ๋ฐ์ดํฐ๊ฐ ๋ค์ด๊ฐ ์๋ค๋ฉด 2์ฐจ ํํฐ๋ง
ingredient = re.sub(r'[|[a-zA-Z]|[0-9]|[์ฝ์ปตํฐ์ ๊ฐ๋ณ์คํผ์ ํ์ฌ๋ฃ๊ฐ์ข
๋ด๋๊ป์๋ํฉ/()+ยฝยผยพ~ .-]|]', '', description[i])
if '์์ผ๋ฉด' in ingredient:
ingredient = re.sub(r'์์ผ๋ฉด', '', ingredient)
if '์๋ต๊ฐ๋ฅ' in ingredient:
ingredient = re.sub(r'์๋ต๊ฐ๋ฅ', '', ingredient)
if '์ฌ์ด์ฆ๋์ผ๋ฐ์ด์
๋๋ค' in ingredient:
ingredient = re.sub(r'์ฌ์ด์ฆ๋์ผ๋ฐ์ด์
๋๋ค', '', ingredient)
if 'ํฌ๊ธฐ' in ingredient:
ingredient = re.sub(r'ํฌ๊ธฐ', '', ingredient)
if '์ฉ๊ธฐ' in ingredient:
ingredient = re.sub(r'์ฉ๊ธฐ', '', ingredient)
if '๊ณต๊ธฐ' in ingredient:
ingredient = re.sub(r'๊ณต๊ธฐ', '', ingredient)
if '๊ฐ์๋ง๋ ๋ฐฐ์' in ingredient:
ingredient = re.sub(r'๊ฐ์๋ง๋ ๋ฐฐ์', '', ingredient)
if 'ํ์ค' in ingredient:
ingredient = re.sub(r'ํ์ค', '', ingredient)
if '-์ก์ ์๋ฌด๊ฑฐ๋' in ingredient:
ingredient = re.sub(r'-์ก์ ์๋ฌด๊ฑฐ๋', '', ingredient)
if 'ํน์' in ingredient:
ingredient = re.sub(r'ํน์', '', ingredient)
if '์ ์๋ ์ธ์ง์ฉ๊ธฐ' in ingredient:
ingredient = re.sub(r'์ ์๋ ์ธ์ง์ฉ๊ธฐ', '', ingredient)
if (len(ingredient) > 2 and ingredient[-1] == '๊ณผ') or (len(ingredient) > 2 and ingredient[-1] == '๋') or (len(ingredient) > 3 and ingredient[-1] == '๊ฐ'):
ingredient = re.sub(r'.$', '', ingredient)
if (len(ingredient) > 3 and ingredient[-1] == '์ฅ') or (len(ingredient) > 3 and ingredient[-1] == '์'):
ingredient = re.sub(r'(์ฅ|์)$', '', ingredient)
#! '์์ทจ์๋ฆฌ์ '์ ๊ฒฝ์ฐ์ ์ฌ๋ฃ ์ ๋ณด๊ฐ ์ค ๋ฐ๊ฟ ์์ด ๋ฌธ์์ด "๋๋ถ, ์๋๋ถ, ๊น์น" ์ด๋ฐ์์ผ๋ก ๋์ด ์๊ธฐ ๋๋ฌธ์ list ํํ๋ก ๋ณ๊ฒฝํ๊ณ for๋ฌธ์ผ๋ก ์ฌ๋ฃ ๋ฐ์ดํฐ ์ถ์ถ
if channel_name == '์์ทจ์๋ฆฌ์ ':
ingredient = ingredient.split(',')
for i in ingredient:
ingredient_dict = dict(url_pk=url_pk, name=i)
ingredient_list.append(ingredient_dict)
else:
ingredient_dict = dict(url_pk=url_pk, name=ingredient)
ingredient_list.append(ingredient_dict)
else:
pass
#* 1. ๊ฒ์ฆ ๋ ์ฌ๋ฃ๋ฐ์ดํฐ(ingredient_list) ๋ฅผ ๊ฐ์ ธ์์ Ingredient Model ์ ์ ์ฅํ๋ค.
obj_list = [Ingredients(name=info['name'], is_valid=False) for info in ingredient_list]
try:
Ingredients.objects.bulk_create(obj_list, ignore_conflicts=True)
except IntegrityError:
pass