[fantia] refactor

- embed response data as hidden '_data' field
  (instead of returning/passing 'resp')
- split _get_urls_from_post()
pull/4268/head
Mike Fährmann 1 year ago
parent 6c8bf9a762
commit dc7af00014
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -31,15 +31,22 @@ class FantiaExtractor(Extractor):
FantiaExtractor._warning = False FantiaExtractor._warning = False
for post_id in self.posts(): for post_id in self.posts():
full_response, post = self._get_post_data(post_id) post = self._get_post_data(post_id)
yield Message.Directory, post
post["num"] = 0 post["num"] = 0
for url, url_data in self._get_urls_from_post(full_response, post):
post["num"] += 1 for content in self._get_post_contents(post):
fname = url_data["content_filename"] or url post["content_category"] = content["category"]
text.nameext_from_url(fname, url_data) post["content_title"] = content["title"]
url_data["file_url"] = url post["content_filename"] = content.get("filename", "")
yield Message.Url, url, url_data post["content_id"] = content["id"]
yield Message.Directory, post
for url in self._get_content_urls(post, content):
text.nameext_from_url(
post["content_filename"] or url, post)
post["file_url"] = url
post["num"] += 1
yield Message.Url, url, post
def posts(self): def posts(self):
"""Return post IDs""" """Return post IDs"""
@ -71,7 +78,7 @@ class FantiaExtractor(Extractor):
"""Fetch and process post data""" """Fetch and process post data"""
url = self.root+"/api/v1/posts/"+post_id url = self.root+"/api/v1/posts/"+post_id
resp = self.request(url, headers=self.headers).json()["post"] resp = self.request(url, headers=self.headers).json()["post"]
post = { return {
"post_id": resp["id"], "post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]), "post_url": self.root + "/posts/" + str(resp["id"]),
"post_title": resp["title"], "post_title": resp["title"],
@ -85,55 +92,63 @@ class FantiaExtractor(Extractor):
"fanclub_user_name": resp["fanclub"]["user"]["name"], "fanclub_user_name": resp["fanclub"]["user"]["name"],
"fanclub_name": resp["fanclub"]["name"], "fanclub_name": resp["fanclub"]["name"],
"fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]), "fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
"tags": resp["tags"] "tags": resp["tags"],
"_data": resp,
} }
return resp, post
def _get_urls_from_post(self, resp, post): def _get_post_contents(self, post):
contents = post["_data"]["post_contents"]
try:
url = post["_data"]["thumb"]["original"]
except Exception:
pass
else:
contents.insert(0, {
"id": "thumb",
"title": "thumb",
"category": "thumb",
"download_uri": url,
})
return contents
def _get_content_urls(self, post, content):
"""Extract individual URL data from the response""" """Extract individual URL data from the response"""
if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]: if "comment" in content:
post["content_filename"] = "" post["content_comment"] = content["comment"]
post["content_category"] = "thumb"
post["file_id"] = "thumb" if "post_content_photos" in content:
yield resp["thumb"]["original"], post for photo in content["post_content_photos"]:
post["file_id"] = photo["id"]
for content in resp["post_contents"]: yield photo["url"]["original"]
post["content_category"] = content["category"]
post["content_title"] = content["title"] if "download_uri" in content:
post["content_filename"] = content.get("filename", "") post["file_id"] = content["id"]
post["content_id"] = content["id"] url = content["download_uri"]
if url[0] == "/":
if "comment" in content: url = self.root + url
post["content_comment"] = content["comment"] yield url
if "post_content_photos" in content: if content["category"] == "blog" and "comment" in content:
for photo in content["post_content_photos"]: comment_json = util.json_loads(content["comment"])
post["file_id"] = photo["id"] ops = comment_json.get("ops") or ()
yield photo["url"]["original"], post
# collect blogpost text first
if "download_uri" in content: blog_text = ""
post["file_id"] = content["id"] for op in ops:
yield self.root+"/"+content["download_uri"], post insert = op.get("insert")
if isinstance(insert, str):
if content["category"] == "blog" and "comment" in content: blog_text += insert
comment_json = util.json_loads(content["comment"]) post["blogpost_text"] = blog_text
ops = comment_json.get("ops", ())
# collect images
# collect blogpost text first for op in ops:
blog_text = "" insert = op.get("insert")
for op in ops: if isinstance(insert, dict) and "fantiaImage" in insert:
insert = op.get("insert") img = insert["fantiaImage"]
if isinstance(insert, str): post["file_id"] = img["id"]
blog_text += insert yield self.root + img["original_url"]
post["blogpost_text"] = blog_text
# collect images
for op in ops:
insert = op.get("insert")
if isinstance(insert, dict) and "fantiaImage" in insert:
img = insert["fantiaImage"]
post["file_id"] = img["id"]
yield "https://fantia.jp" + img["original_url"], post
class FantiaCreatorExtractor(FantiaExtractor): class FantiaCreatorExtractor(FantiaExtractor):

Loading…
Cancel
Save