[fantia] refactor

- embed response data as hidden '_data' field
  (instead of returning/passing 'resp')
- split _get_urls_from_post()
pull/4268/head
Mike Fährmann 1 year ago
parent 6c8bf9a762
commit dc7af00014
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -31,15 +31,22 @@ class FantiaExtractor(Extractor):
FantiaExtractor._warning = False FantiaExtractor._warning = False
for post_id in self.posts(): for post_id in self.posts():
full_response, post = self._get_post_data(post_id) post = self._get_post_data(post_id)
yield Message.Directory, post
post["num"] = 0 post["num"] = 0
for url, url_data in self._get_urls_from_post(full_response, post):
for content in self._get_post_contents(post):
post["content_category"] = content["category"]
post["content_title"] = content["title"]
post["content_filename"] = content.get("filename", "")
post["content_id"] = content["id"]
yield Message.Directory, post
for url in self._get_content_urls(post, content):
text.nameext_from_url(
post["content_filename"] or url, post)
post["file_url"] = url
post["num"] += 1 post["num"] += 1
fname = url_data["content_filename"] or url yield Message.Url, url, post
text.nameext_from_url(fname, url_data)
url_data["file_url"] = url
yield Message.Url, url, url_data
def posts(self): def posts(self):
"""Return post IDs""" """Return post IDs"""
@ -71,7 +78,7 @@ class FantiaExtractor(Extractor):
"""Fetch and process post data""" """Fetch and process post data"""
url = self.root+"/api/v1/posts/"+post_id url = self.root+"/api/v1/posts/"+post_id
resp = self.request(url, headers=self.headers).json()["post"] resp = self.request(url, headers=self.headers).json()["post"]
post = { return {
"post_id": resp["id"], "post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]), "post_url": self.root + "/posts/" + str(resp["id"]),
"post_title": resp["title"], "post_title": resp["title"],
@ -85,39 +92,47 @@ class FantiaExtractor(Extractor):
"fanclub_user_name": resp["fanclub"]["user"]["name"], "fanclub_user_name": resp["fanclub"]["user"]["name"],
"fanclub_name": resp["fanclub"]["name"], "fanclub_name": resp["fanclub"]["name"],
"fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]), "fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
"tags": resp["tags"] "tags": resp["tags"],
"_data": resp,
} }
return resp, post
def _get_urls_from_post(self, resp, post): def _get_post_contents(self, post):
"""Extract individual URL data from the response""" contents = post["_data"]["post_contents"]
if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
post["content_filename"] = ""
post["content_category"] = "thumb"
post["file_id"] = "thumb"
yield resp["thumb"]["original"], post
for content in resp["post_contents"]: try:
post["content_category"] = content["category"] url = post["_data"]["thumb"]["original"]
post["content_title"] = content["title"] except Exception:
post["content_filename"] = content.get("filename", "") pass
post["content_id"] = content["id"] else:
contents.insert(0, {
"id": "thumb",
"title": "thumb",
"category": "thumb",
"download_uri": url,
})
return contents
def _get_content_urls(self, post, content):
"""Extract individual URL data from the response"""
if "comment" in content: if "comment" in content:
post["content_comment"] = content["comment"] post["content_comment"] = content["comment"]
if "post_content_photos" in content: if "post_content_photos" in content:
for photo in content["post_content_photos"]: for photo in content["post_content_photos"]:
post["file_id"] = photo["id"] post["file_id"] = photo["id"]
yield photo["url"]["original"], post yield photo["url"]["original"]
if "download_uri" in content: if "download_uri" in content:
post["file_id"] = content["id"] post["file_id"] = content["id"]
yield self.root+"/"+content["download_uri"], post url = content["download_uri"]
if url[0] == "/":
url = self.root + url
yield url
if content["category"] == "blog" and "comment" in content: if content["category"] == "blog" and "comment" in content:
comment_json = util.json_loads(content["comment"]) comment_json = util.json_loads(content["comment"])
ops = comment_json.get("ops", ()) ops = comment_json.get("ops") or ()
# collect blogpost text first # collect blogpost text first
blog_text = "" blog_text = ""
@ -133,7 +148,7 @@ class FantiaExtractor(Extractor):
if isinstance(insert, dict) and "fantiaImage" in insert: if isinstance(insert, dict) and "fantiaImage" in insert:
img = insert["fantiaImage"] img = insert["fantiaImage"]
post["file_id"] = img["id"] post["file_id"] = img["id"]
yield "https://fantia.jp" + img["original_url"], post yield self.root + img["original_url"]
class FantiaCreatorExtractor(FantiaExtractor): class FantiaCreatorExtractor(FantiaExtractor):

Loading…
Cancel
Save