From 37258c644f76416b2a09de14c0e74da628534e2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 24 Nov 2020 02:18:40 +0700 Subject: [PATCH] [cda] Fix extraction (closes #17803, closes #24458, closes #24518, closes #26381) --- youtube_dl/extractor/cda.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 0c3af23d5..d67900e62 100644 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -5,10 +5,16 @@ import codecs import re from .common import InfoExtractor +from ..compat import ( + compat_chr, + compat_ord, + compat_urllib_parse_unquote, +) from ..utils import ( ExtractorError, float_or_none, int_or_none, + merge_dicts, multipart_encode, parse_duration, random_birthday, @@ -107,8 +113,9 @@ class CDAIE(InfoExtractor): r'OdsÅ‚ony:(?:\s| )*([0-9]+)', webpage, 'view_count', default=None) average_rating = self._search_regex( - r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', - webpage, 'rating', fatal=False, group='rating_value') + (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', + r']+\bclass=["\']rating["\'][^>]*>(?P[0-9.]+)'), webpage, 'rating', fatal=False, + group='rating_value') info_dict = { 'id': video_id, @@ -123,6 +130,24 @@ class CDAIE(InfoExtractor): 'age_limit': 18 if need_confirm_age else 0, } + # Source: https://www.cda.pl/js/player.js?t=1606154898 + def decrypt_file(a): + for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): + a = a.replace(p, '') + a = compat_urllib_parse_unquote(a) + b = [] + for c in a: + f = compat_ord(c) + b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f)) + a = ''.join(b) + a = a.replace('.cda.mp4', '') + for p in ('.2cda.pl', '.3cda.pl'): + a = a.replace(p, '.cda.pl') + if '/upstream' in a: + a = a.replace('/upstream', '.mp4/upstream') + return 'https://' + a + return 'https://' + a + '.mp4' + def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, @@ -141,6 +166,8 @@ class CDAIE(InfoExtractor): video['file'] = codecs.decode(video['file'], 'rot_13') if video['file'].endswith('adc.mp4'): video['file'] = video['file'].replace('adc.mp4', '.mp4') + elif not video['file'].startswith('http'): + video['file'] = decrypt_file(video['file']) f = { 'url': video['file'], } @@ -179,4 +206,6 @@ class CDAIE(InfoExtractor): self._sort_formats(formats) - return info_dict + info = self._search_json_ld(webpage, video_id, default={}) + + return merge_dicts(info_dict, info)