[imagefap] add 'folder' extractor (#3504)

pull/3532/head
Mike Fährmann 2 years ago
parent cb9d100bac
commit 4833ec323e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -346,7 +346,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr> <tr>
<td>ImageFap</td> <td>ImageFap</td>
<td>https://www.imagefap.com/</td> <td>https://www.imagefap.com/</td>
<td>Galleries, individual Images, User Profiles</td> <td>Folders, Galleries, individual Images, User Profiles</td>
<td></td> <td></td>
</tr> </tr>
<tr> <tr>

@ -53,12 +53,20 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3", "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab", "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}), }),
("https://www.imagefap.com/gallery/5486966", { ("https://www.imagefap.com/gallery/7876223", {
"pattern": r"https://cdnh?\.imagefap\.com" "pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg", r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "8d2e562df7a0bc9e8eecb9d1bb68d32b4086bf98", "keyword": {
"archive": False, "count": 44,
"count": 62, "gallery_id": 7876223,
"image_id": int,
"num": int,
"tags": ["big ass", "panties", "horny",
"pussy", "exposed", "outdoor"],
"title": "Kelsi Monroe in lingerie",
"uploader": "BdRachel",
},
"count": 44,
}), }),
("https://www.imagefap.com/gallery.php?gid=7102714"), ("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"), ("https://beta.imagefap.com/gallery.php?gid=7102714"),
@ -130,12 +138,20 @@ class ImagefapImageExtractor(ImagefapExtractor):
subcategory = "image" subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)" pattern = BASE_PATTERN + r"/photo/(\d+)"
test = ( test = (
("https://www.imagefap.com/photo/1369341772/", { ("https://www.imagefap.com/photo/1962981893", {
"pattern": r"https://cdnh?\.imagefap\.com" "pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg", r"/images/full/65/196/1962981893\.jpg",
"keyword": "8894e45f7262020d8d66ce59917315def1fc475b", "keyword": {
"date": "21/08/2014",
"gallery_id": 7876223,
"height": 1600,
"image_id": 1962981893,
"title": "Kelsi Monroe in lingerie",
"uploader": "BdRachel",
"width": 1066,
},
}), }),
("https://beta.imagefap.com/photo/1369341772/"), ("https://beta.imagefap.com/photo/1962981893"),
) )
def __init__(self, match): def __init__(self, match):
@ -171,61 +187,70 @@ class ImagefapImageExtractor(ImagefapExtractor):
}) })
class ImagefapUserExtractor(ImagefapExtractor): class ImagefapFolderExtractor(ImagefapExtractor):
"""Extractor for all galleries from a user at imagefap.com""" """Extractor for imagefap user folders"""
subcategory = "user" subcategory = "folder"
categorytransfer = True pattern = (BASE_PATTERN + r"/(?:organizer/|"
pattern = (BASE_PATTERN + r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
r"/(?:profile(?:\.php\?user=|/)([^/?#]+)" r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
r"|usergallery\.php\?userid=(\d+))")
test = ( test = (
("https://www.imagefap.com/profile/LucyRae/galleries", { ("https://www.imagefap.com/organizer/409758", {
"url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", "pattern": r"https://www\.imagefap\.com/gallery/7876223",
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
"count": 1,
}), }),
("https://www.imagefap.com/usergallery.php?userid=1862791", { (("https://www.imagefap.com/usergallery.php"
"url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", "?userid=1981976&folderid=409758"), {
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
}),
(("https://www.imagefap.com/usergallery.php"
"?user=BdRachel&folderid=409758"), {
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
}),
("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}),
(("https://www.imagefap.com/usergallery.php"
"?userid=1981976&folderid=-1"), {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}),
(("https://www.imagefap.com/usergallery.php"
"?user=BdRachel&folderid=-1"), {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}), }),
("https://www.imagefap.com/profile.php?user=LucyRae"),
("https://beta.imagefap.com/profile.php?user=LucyRae"),
) )
def __init__(self, match): def __init__(self, match):
ImagefapExtractor.__init__(self, match) ImagefapExtractor.__init__(self, match)
self.user, self.user_id = match.groups() self._id, user, profile, self.folder_id = match.groups()
self.user = user or profile
def items(self): def items(self):
for folder_id in self.folders(): for gallery_id, name in self.galleries(self.folder_id):
for gallery_id, name in self.galleries(folder_id): url = "{}/gallery/{}".format(self.root, gallery_id)
url = "{}/gallery/{}".format(self.root, gallery_id) data = {
data = { "gallery_id": gallery_id,
"gallery_id": text.parse_int(gallery_id), "title" : text.unescape(name),
"title" : text.unescape(name), "_extractor": ImagefapGalleryExtractor,
"_extractor": ImagefapGalleryExtractor, }
} yield Message.Queue, url, data
yield Message.Queue, url, data
def folders(self):
"""Return a list of folder_ids of a specific user"""
if self.user:
url = "{}/profile/{}/galleries".format(self.root, self.user)
else:
url = "{}/usergallery.php?userid={}".format(
self.root, self.user_id)
response = self.request(url)
self.user = response.url.split("/")[-2]
folders = text.extr(response.text, ' id="tgl_all" value="', '"')
return folders.rstrip("|").split("|")
def galleries(self, folder_id): def galleries(self, folder_id):
"""Yield gallery_ids of a folder""" """Yield gallery IDs and titles of a folder"""
if folder_id == "-1": if folder_id == "-1":
url = "{}/profile/{}/galleries?folderid=-1".format( if self._id:
self.root, self.user) url = "{}/usergallery.php?userid={}&folderid=-1".format(
self.root, self.user)
else:
url = "{}/profile/{}/galleries?folderid=-1".format(
self.root, self.user)
else: else:
url = "{}/organizer/{}/".format(self.root, folder_id) url = "{}/organizer/{}/".format(self.root, folder_id)
params = {"page": 0}
params = {"page": 0}
while True: while True:
extr = text.extract_from(self.request(url, params=params).text) extr = text.extract_from(self.request(url, params=params).text)
cnt = 0 cnt = 0
@ -240,3 +265,53 @@ class ImagefapUserExtractor(ImagefapExtractor):
if cnt < 25: if cnt < 25:
break break
params["page"] += 1 params["page"] += 1
class ImagefapUserExtractor(ImagefapExtractor):
"""Extractor for an imagefap user profile"""
subcategory = "user"
pattern = (BASE_PATTERN +
r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
r"|usergallery\.php\?userid=(\d+))(?:$|#)")
test = (
("https://www.imagefap.com/profile/BdRachel", {
"pattern": ImagefapFolderExtractor.pattern,
"count": ">= 18",
}),
("https://www.imagefap.com/usergallery.php?userid=1862791", {
"pattern": r"https://www\.imagefap\.com"
r"/profile/LucyRae/galleries\?folderid=-1",
"count": 1,
}),
("https://www.imagefap.com/profile/BdRachel/galleries"),
("https://www.imagefap.com/profile.php?user=BdRachel"),
("https://beta.imagefap.com/profile.php?user=BdRachel"),
)
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
self.user, self.user_id = match.groups()
def items(self):
data = {"_extractor": ImagefapFolderExtractor}
for folder_id in self.folders():
if folder_id == "-1":
url = "{}/profile/{}/galleries?folderid=-1".format(
self.root, self.user)
else:
url = "{}/organizer/{}/".format(self.root, folder_id)
yield Message.Queue, url, data
def folders(self):
"""Return a list of folder IDs of a user"""
if self.user:
url = "{}/profile/{}/galleries".format(self.root, self.user)
else:
url = "{}/usergallery.php?userid={}".format(
self.root, self.user_id)
response = self.request(url)
self.user = response.url.split("/")[-2]
folders = text.extr(response.text, ' id="tgl_all" value="', '"')
return folders.rstrip("|").split("|")

Loading…
Cancel
Save