Skip to content

v1.0.0

Latest
Compare
Choose a tag to compare
@2044smile 2044smile released this 24 Aug 06:57
· 6 commits to main since this release

@2044smile @DevDior

Feat

  1. I brought the data using google-api-python-client
DEVELOPER_KEY = os.getenv('DEVELOPER_KEY')
YOUTUBE_API_SERVICE_NAME='youtube'
YOUTUBE_API_VERSION='v3'
self.youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

response_channel = self.youtube.search().list(
            channelId="UCyn-K7rZLXjGl7VXGweIlcA", # ์ฑ„๋„ ๊ฒ€์ƒ‰
            part="snippet",
            maxResults=50,
        ).execute()

channel_id = response_channel['items'][0]['snippet']['channelId']

video_list = []
playlists = self.youtube.playlists().list(
    channelId=channel_id,
    part='snippet',
    maxResults=20
).execute()

# * ๋‚ด๊ฐ€ ํ•„์š”ํ•œ ์ฟ ํ‚น๋กœ๊ทธ ๋ฐ์ดํ„ฐ๋Š” [10] ์— ์œ„์น˜ํ•ด ์žˆ๋‹ค.
cooking_log_ids = playlists['items'][10]['id']

request = self.youtube.playlistItems().list(
    part="snippet,contentDetails",
    playlistId=cooking_log_ids,
    maxResults=50
)
next_page = True

while next_page:
    response = request.execute()
    data = response['items']

    for video in data:
        video_id = video['contentDetails']['videoId']
        # * video_id ๋ฅผ ์ค‘๋ณต์—†์ด video_list ์— ์‚ฝ์ž…ํ•œ๋‹ค.
        if video_id not in video_list:
            video_list.append(video_id)

    # Do we have more pages?
    if 'nextPageToken' in response.keys():
        next_page = True
        request = self.youtube.playlistItems().list(
            part="snippet,contentDetails",
            playlistId=cooking_log_ids,
            pageToken=response['nextPageToken'], # * google-api-python-client ์—์„œ ์ง€์›ํ•˜๋Š” nextPageToken
            maxResults=50
        )
    else:
        next_page = False # * 'nextPageToken' in response.keys() ๊ฐ€ ์—†๋‹ค๋ฉด End

stats_list = []

# * 0~50 ๊นŒ์ง€์˜ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜จ๋‹ค. 323๊ฐœ์˜ ๋ฐ์ดํ„ฐ ๊นŒ์ง€ 6๋ฒˆ ๋ฐ˜๋ณต
for i in range(0, len(video_list), 50):
    response_videos = self.youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=video_list[i:i+50]
    ).execute()
    # ! ๋ฐฑ์ข…์›
    for video in response_videos['items']:
        url_pk=video['id']
        channel_id=video['snippet']['channelId'] # 'UCyn-K7rZLXjGl7VXGweIlcA'
        title=video['snippet']['title']
        description=video['snippet']['description']
        thumbnails=video['snippet']['thumbnails']['high']['url']
        view_count=video['statistics'].get('viewCount',0)
        like_count=video['statistics'].get('likeCount',0)
        published=video['snippet']['publishedAt']
        play_time=video['contentDetails']['duration'].strip('PT, S').replace('M', ':')
        stats_dict=dict(url_pk=url_pk, channel_id=channel_id, title=title, description=description, thumbnails=thumbnails, published=published, play_time=play_time, view_count=view_count, like_count=like_count)
        stats_list.append(stats_dict)
df=pd.DataFrame(stats_list)
df.to_csv("/home/ubuntu/code/cancook-backend/csv/๋ฐฑ์ข…์›_์ฟ ํ‚น๋กœ๊ทธ.csv", index=False)
# df.to_csv("/Users/cslee/vscode/self-dining-backend/csv/๋ฐฑ์ข…์›_์ฟ ํ‚น๋กœ๊ทธ.csv", index=False)

obj_list = [YouTube(**data) for data in stats_list] # YouTube(**data) YouTube Object = ORM
try:
    YouTube.objects.bulk_create(objs=obj_list)
    count = YouTube.objects.filter(channel_id='UCyn-K7rZLXjGl7VXGweIlcA').count()
    client.chat_postMessage(
        channel="youtube",
        text=f"๋ฐฑ์ข…์› ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์„ฑ๊ณตํ–ˆ์Šต๋‹ˆ๋‹ค.\nํ˜„์ œ ๋ฐ์ดํ„ฐ: {count} :tada: "
    )
except IntegrityError:
    client.chat_postMessage(
        channel="youtube", 
        text=f"๋ฐฑ์ข…์› ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š”๋ฐ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.\nํ˜„์ œ ๋ฐ์ดํ„ฐ: {count} :red_circle: "
    )
  1. ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ ์ •๋ ฌ
ingredient_dict = {}
ingredient_list = []
target_description = None

if channel_name == '๋ฐฑ์ข…์›':
    objs = YouTube.objects.filter(channel_id="UCyn-K7rZLXjGl7VXGweIlcA").values_list('url_pk', 'description')
elif channel_name == '์ž์ทจ์š”๋ฆฌ์‹ ':
    objs = YouTube.objects.filter(channel_id="UCC9pQY_uaBSa0WOpMNJHbEQ").values_list('url_pk', 'description')

for obj in objs:
    description = obj[1].split("\n")
    url_pk = obj[0]
    for ingredient in description:
        if '[ ์žฌ๋ฃŒ ]' in ingredient or '[์žฌ๋ฃŒ]' in ingredient:
            target_description = ingredient
            if target_description:
                index = description.index(target_description)
                for i in range(index+1, len(description)):
                    #! description ์˜ ๋‚ด๋ถ€ ๋กœ์ง์„ ๋“ค๋ฉด์„œ ํŠน์ • ๋ฌธ์ž์—ด์ด ๋‚˜์˜จ๋‹ค๋ฉด break ํ•ด์„œ ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ๋งŒ ์ถ”์ถœ
                    if description[i] == '':
                        break
                    if description[i].startswith('['):
                        break
                    if description[i].startswith('[ ๋งŒ๋“œ๋Š” ๋ฒ• ]'):
                        break
                    if description[i].startswith('[๋งŒ๋“œ๋Š” ๋ฒ•]'):
                        break
                    if description[i].startswith('*'):
                        break
                    if description[i] == '*':
                        break
                    
                    #! ์œ„์—์„œ ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ๋ฅผ 1์ฐจ ํ•„ํ„ฐ๋ง์„ ํ–ˆ๋‹ค๋ฉด, ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ์— ๋ถˆํ•„์š”ํ•œ ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ๊ฐ€ ๋“ค์–ด๊ฐ€ ์žˆ๋‹ค๋ฉด 2์ฐจ ํ•„ํ„ฐ๋ง
                    ingredient = re.sub(r'[|[a-zA-Z]|[0-9]|[์•ฝ์ปตํฐ์ˆ ๊ฐœ๋ณ‘์Šคํ‘ผ์„ ํƒ์žฌ๋ฃŒ๊ฐ์ข…๋ด‰๋šœ๊ป‘์žˆ๋Š”ํŒฉ/()+ยฝยผยพ~ .-]|]', '', description[i])
                    if '์—†์œผ๋ฉด' in ingredient:
                        ingredient = re.sub(r'์—†์œผ๋ฉด', '', ingredient)
                    if '์ƒ๋žต๊ฐ€๋Šฅ' in ingredient:
                        ingredient = re.sub(r'์ƒ๋žต๊ฐ€๋Šฅ', '', ingredient)
                    if '์‚ฌ์ด์ฆˆ๋Š”์ผ๋ฐ˜์ด์ž…๋‹ˆ๋‹ค' in ingredient:
                        ingredient = re.sub(r'์‚ฌ์ด์ฆˆ๋Š”์ผ๋ฐ˜์ด์ž…๋‹ˆ๋‹ค', '', ingredient)
                    if 'ํฌ๊ธฐ' in ingredient:
                        ingredient = re.sub(r'ํฌ๊ธฐ', '', ingredient)
                    if '์šฉ๊ธฐ' in ingredient:
                        ingredient = re.sub(r'์šฉ๊ธฐ', '', ingredient)
                    if '๊ณต๊ธฐ' in ingredient:
                        ingredient = re.sub(r'๊ณต๊ธฐ', '', ingredient)
                    if '๊ฐˆ์•„๋งŒ๋“ ๋ฐฐ์Œ' in ingredient:
                        ingredient = re.sub(r'๊ฐˆ์•„๋งŒ๋“ ๋ฐฐ์Œ', '', ingredient)
                    if 'ํ•œ์คŒ' in ingredient:
                        ingredient = re.sub(r'ํ•œ์คŒ', '', ingredient)
                    if '-์•ก์ “์•„๋ฌด๊ฑฐ๋‚˜' in ingredient:
                        ingredient = re.sub(r'-์•ก์ “์•„๋ฌด๊ฑฐ๋‚˜', '', ingredient)
                    if 'ํ˜น์€' in ingredient:
                        ingredient = re.sub(r'ํ˜น์€', '', ingredient)
                    if '์ „์ž๋ ˆ์ธ์ง€์šฉ๊ธฐ' in ingredient:
                        ingredient = re.sub(r'์ „์ž๋ ˆ์ธ์ง€์šฉ๊ธฐ', '', ingredient)

                    if (len(ingredient) > 2 and ingredient[-1] == '๊ณผ') or (len(ingredient) > 2 and ingredient[-1] == '๋Œ€') or (len(ingredient) > 3 and ingredient[-1] == '๊ฐ„'):
                        ingredient = re.sub(r'.$', '', ingredient)
                    if (len(ingredient) > 3 and ingredient[-1] == '์žฅ') or (len(ingredient) > 3 and ingredient[-1] == '์™€'):
                        ingredient = re.sub(r'(์žฅ|์™€)$', '', ingredient)

                    #! '์ž์ทจ์š”๋ฆฌ์‹ '์˜ ๊ฒฝ์šฐ์—” ์žฌ๋ฃŒ ์ •๋ณด๊ฐ€ ์ค„ ๋ฐ”๊ฟˆ ์—†์ด ๋ฌธ์ž์—ด "๋‘๋ถ€, ์ˆœ๋‘๋ถ€, ๊น€์น˜" ์ด๋Ÿฐ์‹์œผ๋กœ ๋˜์–ด ์žˆ๊ธฐ ๋•Œ๋ฌธ์— list ํ˜•ํƒœ๋กœ ๋ณ€๊ฒฝํ•˜๊ณ  for๋ฌธ์œผ๋กœ ์žฌ๋ฃŒ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
                    if channel_name == '์ž์ทจ์š”๋ฆฌ์‹ ':
                        ingredient = ingredient.split(',')
                        for i in ingredient:
                            ingredient_dict = dict(url_pk=url_pk, name=i)
                            ingredient_list.append(ingredient_dict)
                    else:
                        ingredient_dict = dict(url_pk=url_pk, name=ingredient)
                        ingredient_list.append(ingredient_dict)
    else:
        pass

#* 1. ๊ฒ€์ฆ ๋œ ์žฌ๋ฃŒ๋ฐ์ดํ„ฐ(ingredient_list) ๋ฅผ ๊ฐ€์ ธ์™€์„œ Ingredient Model ์— ์ €์žฅํ•œ๋‹ค.
obj_list = [Ingredients(name=info['name'], is_valid=False) for info in ingredient_list]
try:
    Ingredients.objects.bulk_create(obj_list, ignore_conflicts=True)
except IntegrityError:
    pass
  1. REST Framework