[reddit] improve comment metadata v2 (#4482)

provide main submission metadata at the top level
and comment metadata inside the 'comment' field,
i.e. the other way round than in 1710f1e9
pull/4571/head
Mike Fährmann 1 year ago
parent 7592c5e566
commit 4963bb9b30
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -99,11 +99,15 @@ class RedditExtractor(Extractor):
for comment in comments:
html = comment["body_html"] or ""
if ' href="' in html:
comment["submission"] = submission
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
data = submission.copy()
data["comment"] = comment
else:
data = comment
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, comment))
urls.append((url, data))
for url, data in urls:
if not url or url[0] == "#":

Loading…
Cancel
Save