[ie/Facebook] Fix Memories extraction (#8681)

- Support group /posts/ URLs
- Raise a proper error message if no formats are found

Closes #8669
Authored by: kclauhk
This commit is contained in:
kclauhk 2023-12-25 06:43:35 +08:00 committed by GitHub
parent 1f8bd8eba8
commit c39358a54b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -52,7 +52,7 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/permalink/| groups/[^/]+/(?:permalink|posts)/|
watchparty/ watchparty/
)| )|
facebook: facebook:
@ -232,6 +232,21 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, {
# data.node.comet_sections.content.story.attachments[].throwbackStyles.attachment_target_renderer.attachment.target.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/groups/1645456212344334/posts/3737828833107051/',
'info_dict': {
'id': '1569199726448814',
'ext': 'mp4',
'title': 'Pence MUST GO!',
'description': 'Vickie Gentry shared a memory.',
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -612,9 +627,11 @@ def parse_attachment(attachment, key='media'):
nodes = variadic(traverse_obj(data, 'nodes', 'node') or []) nodes = variadic(traverse_obj(data, 'nodes', 'node') or [])
attachments = traverse_obj(nodes, ( attachments = traverse_obj(nodes, (
..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments', ..., 'comet_sections', 'content', 'story', (None, 'attached_story'), 'attachments',
..., ('styles', 'style_type_renderer'), 'attachment'), expected_type=dict) or [] ..., ('styles', 'style_type_renderer', ('throwbackStyles', 'attachment_target_renderer')),
'attachment', {dict}))
for attachment in attachments: for attachment in attachments:
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] ns = traverse_obj(attachment, ('all_subattachments', 'nodes', ..., {dict}),
('target', 'attachments', ..., 'styles', 'attachment', {dict}))
for n in ns: for n in ns:
parse_attachment(n) parse_attachment(n)
parse_attachment(attachment) parse_attachment(attachment)
@ -637,7 +654,7 @@ def parse_attachment(attachment, key='media'):
if len(entries) > 1: if len(entries) > 1:
return self.playlist_result(entries, video_id) return self.playlist_result(entries, video_id)
video_info = entries[0] video_info = entries[0] if entries else {'id': video_id}
webpage_info = extract_metadata(webpage) webpage_info = extract_metadata(webpage)
# honor precise duration in video info # honor precise duration in video info
if video_info.get('duration'): if video_info.get('duration'):