From 9f88b079453b0b74efd494a7ddfda40a43055852 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 11 Dec 2020 12:04:02 +0100 Subject: [PATCH] [facebook] add support for group posts with multiple videos(closes #19131) --- youtube_dl/extractor/facebook.py | 57 +++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 27a57ad03..6c360e29e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -72,6 +72,7 @@ class FacebookIE(InfoExtractor): }, 'skip': 'Requires logging in', }, { + # data.video 'url': 'https://www.facebook.com/video.php?v=274175099429670', 'info_dict': { 'id': '274175099429670', @@ -133,6 +134,7 @@ class FacebookIE(InfoExtractor): }, }, { # have 1080P, but only up to 720p in swf params + # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'md5': '9571fae53d4165bbbadb17a94651dcdc', 'info_dict': { @@ -147,6 +149,7 @@ class FacebookIE(InfoExtractor): }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall + # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', 'info_dict': { 'id': '1417995061575415', @@ -174,6 +177,7 @@ class FacebookIE(InfoExtractor): 'skip_download': True, }, }, { + # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', 'info_dict': { 'id': '1396382447100162', @@ -193,18 +197,23 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', 'only_matching': True, }, { + # data.mediaset.currMedia.edges 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', 'only_matching': True, }, { + # data.video.story.attachments[].media 'url': 'facebook:544765982287235', 'only_matching': True, }, { + # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/', 'only_matching': True, }, { + # data.video.creation_story.attachments[].media 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/', 'only_matching': True, }, { + # data.video 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', 'only_matching': True, }, { @@ -212,6 +221,7 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/', 'only_matching': True, }, { + # data.video 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/', 'info_dict': { 'id': '359649331226507', @@ -222,6 +232,13 @@ class FacebookIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media + 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', + 'info_dict': { + 'id': '106560053808006', + }, + 'playlist_count': 2, }] @staticmethod @@ -330,9 +347,7 @@ class FacebookIE(InfoExtractor): return extract_video_data(try_get( js_data, lambda x: x['jsmods']['instances'], list) or []) - formats = [] - - def extract_dash_manifest(video): + def extract_dash_manifest(video, formats): dash_manifest = video.get('dash_manifest') if dash_manifest: formats.extend(self._parse_mpd_formats( @@ -351,7 +366,10 @@ class FacebookIE(InfoExtractor): webpage, 'graphql data', default='{}'), video_id, fatal=False) or {} for require in (graphql_data.get('require') or []): if require[0] == 'RelayPrefetchedStreamCache': + entries = [] + def parse_graphql_video(video): + formats = [] q = qualities(['sd', 'hd']) for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: playable_url = video.get('playable_url' + suffix) @@ -362,7 +380,7 @@ class FacebookIE(InfoExtractor): 'quality': q(format_id), 'url': playable_url, }) - extract_dash_manifest(video) + extract_dash_manifest(video, formats) self._sort_formats(formats) v_id = video.get('videoId') or video.get('id') or video_id info = { @@ -382,7 +400,12 @@ class FacebookIE(InfoExtractor): }) else: info['title'] = description or 'Facebook video #%s' % v_id - return webpage, info + entries.append(info) + + def parse_attachment(attachment, key='media'): + media = attachment.get(key) or {} + if media.get('__typename') == 'Video': + return parse_graphql_video(media) data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} @@ -392,13 +415,22 @@ class FacebookIE(InfoExtractor): lambda x: x['node']['comet_sections']['content']['story']['attachments'] ], list) or [] for attachment in attachments: - media = attachment.get('media') or try_get(attachment, lambda x: x['style_type_renderer']['attachment']['media'], dict) or {} - if media.get('__typename') == 'Video': - return parse_graphql_video(media) + attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) or attachment + nodes = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] + for node in nodes: + parse_attachment(node) + parse_attachment(attachment) - video = data.get('video') or {} - if video: - return parse_graphql_video(video) + edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] + for edge in edges: + parse_attachment(edge, key='node') + + if not entries: + video = data.get('video') or {} + if video: + parse_graphql_video(video) + + return webpage, self.playlist_result(entries, video_id) if not video_data: if not fatal_if_no_video: @@ -440,6 +472,7 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') + formats = [] subtitles = {} for f in video_data: format_id = f['stream_type'] @@ -459,7 +492,7 @@ class FacebookIE(InfoExtractor): 'url': src, 'preference': preference, }) - extract_dash_manifest(f[0]) + extract_dash_manifest(f[0], formats) subtitles_src = f[0].get('subtitles_src') if subtitles_src: subtitles.setdefault('en', []).append({'url': subtitles_src})