[fantia] refactor

- embed response data as hidden '_data' field
  (instead of returning/passing 'resp')
- split _get_urls_from_post()
pull/4268/head
Mike Fährmann 1 year ago
parent 6c8bf9a762
commit dc7af00014
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -31,15 +31,22 @@ class FantiaExtractor(Extractor):
FantiaExtractor._warning = False
for post_id in self.posts():
full_response, post = self._get_post_data(post_id)
yield Message.Directory, post
post = self._get_post_data(post_id)
post["num"] = 0
for url, url_data in self._get_urls_from_post(full_response, post):
post["num"] += 1
fname = url_data["content_filename"] or url
text.nameext_from_url(fname, url_data)
url_data["file_url"] = url
yield Message.Url, url, url_data
for content in self._get_post_contents(post):
post["content_category"] = content["category"]
post["content_title"] = content["title"]
post["content_filename"] = content.get("filename", "")
post["content_id"] = content["id"]
yield Message.Directory, post
for url in self._get_content_urls(post, content):
text.nameext_from_url(
post["content_filename"] or url, post)
post["file_url"] = url
post["num"] += 1
yield Message.Url, url, post
def posts(self):
"""Return post IDs"""
@ -71,7 +78,7 @@ class FantiaExtractor(Extractor):
"""Fetch and process post data"""
url = self.root+"/api/v1/posts/"+post_id
resp = self.request(url, headers=self.headers).json()["post"]
post = {
return {
"post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]),
"post_title": resp["title"],
@ -85,55 +92,63 @@ class FantiaExtractor(Extractor):
"fanclub_user_name": resp["fanclub"]["user"]["name"],
"fanclub_name": resp["fanclub"]["name"],
"fanclub_url": self.root+"/fanclubs/"+str(resp["fanclub"]["id"]),
"tags": resp["tags"]
"tags": resp["tags"],
"_data": resp,
}
return resp, post
def _get_urls_from_post(self, resp, post):
def _get_post_contents(self, post):
contents = post["_data"]["post_contents"]
try:
url = post["_data"]["thumb"]["original"]
except Exception:
pass
else:
contents.insert(0, {
"id": "thumb",
"title": "thumb",
"category": "thumb",
"download_uri": url,
})
return contents
def _get_content_urls(self, post, content):
"""Extract individual URL data from the response"""
if "thumb" in resp and resp["thumb"] and "original" in resp["thumb"]:
post["content_filename"] = ""
post["content_category"] = "thumb"
post["file_id"] = "thumb"
yield resp["thumb"]["original"], post
for content in resp["post_contents"]:
post["content_category"] = content["category"]
post["content_title"] = content["title"]
post["content_filename"] = content.get("filename", "")
post["content_id"] = content["id"]
if "comment" in content:
post["content_comment"] = content["comment"]
if "post_content_photos" in content:
for photo in content["post_content_photos"]:
post["file_id"] = photo["id"]
yield photo["url"]["original"], post
if "download_uri" in content:
post["file_id"] = content["id"]
yield self.root+"/"+content["download_uri"], post
if content["category"] == "blog" and "comment" in content:
comment_json = util.json_loads(content["comment"])
ops = comment_json.get("ops", ())
# collect blogpost text first
blog_text = ""
for op in ops:
insert = op.get("insert")
if isinstance(insert, str):
blog_text += insert
post["blogpost_text"] = blog_text
# collect images
for op in ops:
insert = op.get("insert")
if isinstance(insert, dict) and "fantiaImage" in insert:
img = insert["fantiaImage"]
post["file_id"] = img["id"]
yield "https://fantia.jp" + img["original_url"], post
if "comment" in content:
post["content_comment"] = content["comment"]
if "post_content_photos" in content:
for photo in content["post_content_photos"]:
post["file_id"] = photo["id"]
yield photo["url"]["original"]
if "download_uri" in content:
post["file_id"] = content["id"]
url = content["download_uri"]
if url[0] == "/":
url = self.root + url
yield url
if content["category"] == "blog" and "comment" in content:
comment_json = util.json_loads(content["comment"])
ops = comment_json.get("ops") or ()
# collect blogpost text first
blog_text = ""
for op in ops:
insert = op.get("insert")
if isinstance(insert, str):
blog_text += insert
post["blogpost_text"] = blog_text
# collect images
for op in ops:
insert = op.get("insert")
if isinstance(insert, dict) and "fantiaImage" in insert:
img = insert["fantiaImage"]
post["file_id"] = img["id"]
yield self.root + img["original_url"]
class FantiaCreatorExtractor(FantiaExtractor):

Loading…
Cancel
Save