diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 30e74ded..46d6d92f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -391,6 +391,12 @@ Consider all sites to be NSFW unless otherwise known. Galleries, individual Images + + Jpgchurch + https://jpg.church/ + Albums, individual Images, User Profiles + + Keenspot http://www.keenspot.com/ diff --git a/gallery_dl/extractor/jpgchurch.py b/gallery_dl/extractor/jpgchurch.py index dc848d3a..34910d10 100644 --- a/gallery_dl/extractor/jpgchurch.py +++ b/gallery_dl/extractor/jpgchurch.py @@ -16,24 +16,19 @@ class JpgchurchExtractor(Extractor): """Base class for Jpgchurch extractors""" category = "jpgchurch" root = "https://jpg.church" - directory_fmt = ("{category}", "{user}",) - archive_fmt = "{filename}" - - def __init__(self, match): - Extractor.__init__(self, match) - - def items(self): - for image in self.images(): - yield Message.Directory, image - yield Message.Url, image["url"], image - - def images(self): - """Return an iterable containing the image(s)""" + directory_fmt = ("{category}", "{user}", "{album}",) + archive_fmt = "{user}_{filename}" @staticmethod def _extract_user(page): return text.extract(page, 'username: "', '"')[0] + @staticmethod + def _extract_album(page): + album = text.extract(page, 'Added to ', '')[0] + return album + def _extract_image(self, url): page = self.request(url).text data = { @@ -42,36 +37,22 @@ class JpgchurchExtractor(Extractor): } text.nameext_from_url(data["url"], data) data["user"] = self._extract_user(page) + data["album"] = self._extract_album(page) return data def _pagination(self, url): - """Uses recursion to yield the next page""" - yield url - page = self.request(url).text - _next = text.extract( - page, '<')[0] - if _next: - url = _next - yield from self._pagination(_next) - - def _get_images(self, url): - for url in self._pagination(url): + while True: + yield url page = self.request(url).text - album = text.extract(page, '')[0] - album = text.extract(album, '>', '')[0] - page = text.extract_iter( - page, '
<')[0] + if not _next: + return + url = _next def _get_albums(self, url): for url in self._pagination(url): page = self.request(url).text - album = text.extract(page, '')[0] - album = text.extract(album, '>', '')[0] page = text.extract_iter( page, '