Skip to content

Commit

Permalink
[extractor/telecaribe] Add extractor (#6311)
Browse files Browse the repository at this point in the history
Authored by: elyse0
Closes #6001
  • Loading branch information
elyse0 authored Mar 4, 2023
1 parent 1f8489c commit b404712
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
1 change: 1 addition & 0 deletions yt_dlp/extractor/_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1854,6 +1854,7 @@
from .tele5 import Tele5IE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecaribe import TelecaribePlayIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE
Expand Down
77 changes: 77 additions & 0 deletions yt_dlp/extractor/telecaribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import re

from .common import InfoExtractor
from ..utils import traverse_obj


class TelecaribePlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?play\.telecaribe\.co/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.play.telecaribe.co/breicok',
'info_dict': {
'id': 'breicok',
'title': 'Breicok',
},
'playlist_count': 7,
}, {
'url': 'https://www.play.telecaribe.co/si-fue-gol-de-yepes',
'info_dict': {
'id': 'si-fue-gol-de-yepes',
'title': 'Sí Fue Gol de Yepes',
},
'playlist_count': 6,
}, {
'url': 'https://www.play.telecaribe.co/ciudad-futura',
'info_dict': {
'id': 'ciudad-futura',
'title': 'Ciudad Futura',
},
'playlist_count': 10,
}, {
'url': 'https://www.play.telecaribe.co/live',
'info_dict': {
'id': 'live',
'title': r're:^Señal en vivo',
'live_status': 'is_live',
'ext': 'mp4',
},
'params': {
'skip_download': 'Livestream',
}
}]

def _download_player_webpage(self, webpage, display_id):
page_id = self._search_regex(
(r'window.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
webpage, 'page_id')

props = self._download_json(self._search_regex(
rf'<link[^>]+href\s*=\s*"([^"]+)"[^>]+id\s*=\s*"features_{page_id}"',
webpage, 'json_props_url'), display_id)['props']['render']['compProps']

return self._download_webpage(traverse_obj(props, (..., 'url'))[-1], display_id)

def _get_clean_title(self, title):
return re.sub(r'\s*\|\s*Telecaribe\s*VOD', '', title or '').strip() or None

def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player = self._download_player_webpage(webpage, display_id)

if display_id != 'live':
return self.playlist_from_matches(
re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
self._get_clean_title(self._og_search_title(webpage)))

formats, subtitles = self._extract_m3u8_formats_and_subtitles(
self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'),
display_id, 'mp4')

return {
'id': display_id,
'title': self._get_clean_title(self._og_search_title(webpage)),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

0 comments on commit b404712

Please sign in to comment.