change keyword names to valid Python identifiers

This commit mostly replaces all minus-signs ('-') in keyword names with
underscores ('_') to allow them to be used in filter-expressions. For
example 'gallery-id' got renamed to 'gallery_id'.

(It is theoretically possible to access any variable, regardless of its
name, with 'locals()["NAME"]', but that seems a bit too convoluted if
just 'NAME' could be enough)
pull/40/head
Mike Fährmann 7 years ago
parent 81877bb5f6
commit 6f30cf4c64
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -16,12 +16,12 @@ class FutabaThreadExtractor(Extractor):
"""Extractor for images from threads on www.2chan.net"""
category = "2chan"
subcategory = "thread"
directory_fmt = ["{category}", "{board-name}", "{thread}"]
directory_fmt = ["{category}", "{board_name}", "{thread}"]
pattern = [r"(?:https?://)?(([^.]+)\.2chan\.net/([^/]+)/res/(\d+))"]
urlfmt = "https://{server}.2chan.net/{board}/src/{filename}"
test = [("http://dec.2chan.net/70/res/947.htm", {
"url": "c5c12b80b290e224b6758507b3bb952044f4595b",
"keyword": "e1295c0a96f733898e92742bcc1a4c4b320e3748",
"keyword": "4bd22e7a9c3636faecd6ea7082509e8655e10dd0",
})]
def __init__(self, match):
@ -49,7 +49,7 @@ class FutabaThreadExtractor(Extractor):
"server": self.server,
"title": title,
"board": self.board,
"board-name": boardname[:-4],
"board_name": boardname[:-4],
"thread": self.thread,
}

@ -84,7 +84,7 @@ class DeviantartExtractor(Extractor):
deviation["username"] = self.user
author = deviation["author"]
author["urlname"] = author["username"].lower()
deviation["da-category"] = deviation["category"]
deviation["da_category"] = deviation["category"]
@staticmethod
def commit(deviation, target):
@ -170,7 +170,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
test = [
("http://shimoda7.deviantart.com/gallery/", {
"url": "f95b222d939c1e6aa8b9aabe89eaa2d364f06d38",
"keyword": "9342c2a7a2bd6eb9f4a6ea539d04d75248ebe05f",
"keyword": "af266348c6bf2bfb9219f94e505ff346db9b6321",
}),
("https://yakuzafc.deviantart.com/", {
"url": "fa6ecb2c3aa78872f762d43f7809b7f0580debc1",
@ -195,11 +195,11 @@ class DeviantartFolderExtractor(DeviantartExtractor):
test = [
("http://shimoda7.deviantart.com/gallery/722019/Miscellaneous", {
"url": "1ee23a0bd8f7099d375afe8a29ea1a3bf394ba1e",
"keyword": "a0d7093148b9bab8ee0efa6213139efd99f23394",
"keyword": "96f4a766180d9eac96c5904a5c9cb29613401aac",
}),
("http://majestic-da.deviantart.com/gallery/63419606/CHIBI-KAWAII", {
"url": "1df6f4312f124b0ad9f2a905c8f9e94e89c84370",
"keyword": "b651f5d540aaaf7974fa7e181e4cc54151a65e9e",
"keyword": "3eb4e7f10c49ab23d783a825bd934dbf98e8ff88",
}),
]
@ -229,7 +229,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
(("http://shimoda7.deviantart.com/art/"
"For-the-sake-of-a-memory-10073852"), {
"url": "393dc581ca9e6938dbf0a3db8e9eea6243eb35f4",
"keyword": "5f58ecdce9b9ebb51f65d0e24e0f7efe00a74a55",
"keyword": "e9373cda1d3e2a5b6ece82325f8f8e08b202ce10",
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://zzz.deviantart.com/art/zzz-1234567890", {
@ -237,7 +237,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
}),
("http://sta.sh/01ijs78ebagf", {
"url": "3a15ed9201e665172b1daece8ef6d42f6a7ad3d5",
"keyword": "00246726d49f51ab35ea88d66467067f05b10bc9",
"keyword": "8213960fd651e1c8e2622a514d2482078341948e",
}),
("http://sta.sh/abcdefghijkl", {
"exception": exception.NotFoundError,
@ -265,7 +265,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
test = [
("http://h3813067.deviantart.com/favourites/", {
"url": "393dc581ca9e6938dbf0a3db8e9eea6243eb35f4",
"keyword": "c7d0a3bacc1e4c5625dda703e25affe047cbbc3f",
"keyword": "76b34daf4bde433897ef1f230b036182da118413",
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("http://h3813067.deviantart.com/favourites/?catpath=/", None),
@ -291,7 +291,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
r"/favourites/(\d+)/([^/?&#]+)"]
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "f0c12581060aab9699289817b39804d9eb88f675",
"keyword": "2778b4abaac240ff6fb1d630d7b04b8e983ef9c4",
"keyword": "868ff18de9e3dd2af6ae95b0fb73d6556f051e17",
})]
def __init__(self, match):
@ -319,7 +319,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
test = [
("https://angrywhitewanker.deviantart.com/journal/", {
"url": "6474f49fbb4d01637ff0762708953252a52dc9c1",
"keyword": "5306515383a7ec26b22a2de42045718e6d630f25",
"keyword": "5b29c0fb9af34d35f069bfe8fd448c8ca0dd8a9d",
}),
("http://shimoda7.deviantart.com/journal/?catpath=/", None),
]

@ -20,12 +20,12 @@ class ExhentaiGalleryExtractor(Extractor):
"""Extractor for image galleries from exhentai.org"""
category = "exhentai"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery-id}"]
filename_fmt = "{gallery-id}_{num:>04}_{image-token}_{name}.{extension}"
directory_fmt = ["{category}", "{gallery_id}"]
filename_fmt = "{gallery_id}_{num:>04}_{image_token}_{name}.{extension}"
pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
test = [
("https://exhentai.org/g/960460/4f0e369d82/", {
"keyword": "623f8c86c9fe38e964682dd4309b96922655b900",
"keyword": "d837276b02c4e91e96c1b40fe4415cbb73b56577",
"content": "493d759de534355c9f55f8e365565b62411de146",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
@ -85,8 +85,8 @@ class ExhentaiGalleryExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"gallery-id" : self.gid,
"gallery-token": self.token,
"gallery_id" : self.gid,
"gallery_token": self.token,
}
text.extract_all(page, (
("title" , '<h1 id="gn">', '</h1>'),
@ -94,7 +94,7 @@ class ExhentaiGalleryExtractor(Extractor):
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
("language" , '>Language:</td><td class="gdt2">', ' '),
("size" , '>File Size:</td><td class="gdt2">', ' '),
("size-units", '', '<'),
("size_units", '', '<'),
("count" , '>Length:</td><td class="gdt2">', ' '),
), values=data)
data["lang"] = util.language_to_code(data["language"])
@ -132,7 +132,7 @@ class ExhentaiGalleryExtractor(Extractor):
return url, text.nameext_from_url(data["url"], {
"num": 1,
"image-token": data["startkey"],
"image_token": data["startkey"],
})
def images_from_api(self):
@ -165,7 +165,7 @@ class ExhentaiGalleryExtractor(Extractor):
yield url, text.nameext_from_url(imgurl, {
"num": request["page"],
"image-token": imgkey
"image_token": imgkey
})
request["imgkey"] = nextkey

@ -31,13 +31,13 @@ class HbrowseChapterExtractor(Extractor):
"""Extractor for manga-chapters from hbrowse.com"""
category = "hbrowse"
subcategory = "chapter"
directory_fmt = ["{category}", "{gallery-id} {title}", "c{chapter:>05}"]
filename_fmt = ("{category}_{gallery-id}_{chapter:>05}_"
directory_fmt = ["{category}", "{gallery_id} {title}", "c{chapter:>05}"]
filename_fmt = ("{category}_{gallery_id}_{chapter:>05}_"
"{num:>03}.{extension}")
pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"]
test = [("http://www.hbrowse.com/10363/c00000", {
"url": "634f4800858913f097bc3b62a8fedaf74b5254bd",
"keyword": "c7dc22a10699dee5cf466406fecee6ffa2e6277e",
"keyword": "f0f96cefda19e5aee1a19454f63ffe3a425602ab",
"content": "44578ebbe176c2c27434966aef22945787e2781e",
})]
url_base = "http://www.hbrowse.com"
@ -59,14 +59,14 @@ class HbrowseChapterExtractor(Extractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
'gallery-id': self.gid,
'gallery_id': self.gid,
"chapter": int(self.chapter[1:]),
}
return text.extract_all(page, (
('title' , '<td class="listLong">', '</td>'),
(None , '<td class="listLong">', ''),
('artist' , '>', '<'),
('count-total', '<td class="listLong">', ' '),
('count_total', '<td class="listLong">', ' '),
(None , '<td class="listLong">', ''),
('origin' , '>', '<'),
), values=data)[0]

@ -42,7 +42,7 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
test = [("http://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "fc79e4c70d61ae476aea2b63a75324e3d96f4497",
"keyword": "a159017295546e2647d80a4a4165c702662abe1e",
})]
def __init__(self, match):
@ -57,7 +57,7 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
r"(\d+): (.+) . Page 1 ", title)
return {
"manga-id": images[0].split("/")[-3],
"manga_id": images[0].split("/")[-3],
"chapter": self.chapter,
"count": len(images),
"manga": match.group(1),

@ -16,8 +16,8 @@ import json
class HentaicdnChapterExtractor(Extractor):
"""Base class for extractors for a single manga chapter"""
subcategory = "chapter"
directory_fmt = ["{category}", "{manga-id} {title}"]
filename_fmt = ("{category}_{manga-id}_{chapter:>02}_"
directory_fmt = ["{category}", "{manga_id} {title}"]
filename_fmt = ("{category}_{manga_id}_{chapter:>02}_"
"{num:>03}.{extension}")
url = ""

@ -40,7 +40,7 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
test = [("https://hentaihere.com/m/S13812/1/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "fd6e515ccf073e3b57d39c5cb472692858bddb88",
"keyword": "7b31d19668b353f7be73b330a52ec6a7e56d23ea",
})]
def __init__(self, match):
@ -55,7 +55,7 @@ class HentaihereChapterExtractor(hentaicdn.HentaicdnChapterExtractor):
pattern = r"Page 1 \| (.+) \(([^)]+)\) - Chapter \d+: (.+) by (.+) at "
match = re.match(pattern, title)
return {
"manga-id": self.gid,
"manga_id": self.gid,
"manga": match.group(1),
"type": match.group(2),
"chapter": self.chapter,

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015,2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,12 +17,12 @@ class HitomiGalleryExtractor(Extractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{num:>03}_{name}.{extension}"
pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"]
test = [("https://hitomi.la/galleries/867789.html", {
"url": "e42a47dfadda93e4bf37e82b1dc9ad29edfa9130",
"keyword": "03a64d67584afd7b8ad96ecb47acae08ea14d90f",
"keyword": "c007cd41229d727b2ced3b364350561444738351",
})]
def __init__(self, match):
@ -37,8 +37,7 @@ class HitomiGalleryExtractor(Extractor):
data["count"] = len(images)
yield Message.Version, 1
yield Message.Directory, data
for num, url in enumerate(images, 1):
data["num"] = num
for data["num"], url in enumerate(images, 1):
yield Message.Url, url, text.nameext_from_url(url, data)
def get_job_metadata(self, page):
@ -63,7 +62,7 @@ class HitomiGalleryExtractor(Extractor):
series, pos = text.extract(page, '.html">', '</a>', pos)
lang = lang.capitalize()
return {
"gallery-id": self.gid,
"gallery_id": self.gid,
"title": " ".join(title.split()),
"artist": string.capwords(artist),
"group": string.capwords(group),

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2016 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,13 +16,13 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
"""Extractor for image galleries from imagebam.com"""
category = "imagebam"
subcategory = "gallery"
directory_fmt = ["{category}", "{title} - {gallery-key}"]
directory_fmt = ["{category}", "{title} - {gallery_key}"]
filename_fmt = "{num:>03}-{filename}"
pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"]
test = [(("http://www.imagebam.com/"
"gallery/adz2y0f9574bjpmonaismyrhtjgvey4o"), {
"url": "d7a4483b6d5ebba81950a349aad58ae034c60eda",
"keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c",
"keyword": "0ab7bef5cf995d9229dc900dc508311cefb32306",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
})]
url_base = "http://www.imagebam.com"
@ -32,12 +32,12 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
self.gkey = match.group(1)
def items(self):
data = self.get_job_metadata()
data["num"] = 0
data, url = self.get_job_metadata()
yield Message.Version, 1
yield Message.Directory, data
for image_url, image_id in self.get_images(data["first-url"]):
data["id"] = image_id
data["num"] = 0
for image_url, image_id in self.get_images(url):
data["image_id"] = image_id
data["num"] += 1
text.nameext_from_url(image_url, data)
yield Message.Url, image_url, data.copy()
@ -46,12 +46,14 @@ class ImagebamGalleryExtractor(AsynchronousExtractor):
"""Collect metadata for extractor-job"""
url = self.url_base + "/gallery/" + self.gkey
page = self.request(url, encoding="utf-8").text
return text.extract_all(page, (
data, pos = text.extract_all(page, (
(None , "<img src='/img/icons/photos.png'", ""),
("title" , "'> ", " <"),
("count" , "'>", " images"),
("first-url", "<a href='http://www.imagebam.com", "'"),
), values={"gallery-key": self.gkey})[0]
), values={"gallery_key": self.gkey})
url, pos = text.extract(
page, "<a href='http://www.imagebam.com", "'", pos)
return data, url
def get_images(self, url):
"""Yield all image-urls and -ids for a gallery"""

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,19 +17,19 @@ class ImagefapGalleryExtractor(Extractor):
"""Extractor for image galleries from imagefap.com"""
category = "imagefap"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)")]
test = [
("http://www.imagefap.com/gallery/6318447", {
"url": "f63e6876df83a40e1a98dad70e46952dd9edb7a7",
"keyword": "715f99ad154c4cf608afc7cd77dd1e896030646a",
"keyword": "275857d113bb007245de705ae7bd0dff7d677874",
"content": "38e50699db9518ae68648c45ecdd6be614efc324",
}),
("http://www.imagefap.com/gallery/5486966", {
"url": "eace9b33be99f87f3382c87bd915cf495a865d6e",
"keyword": "0f14b5547adb9ffda6a6ac8ded15fc2b44d23c4a",
"keyword": "b84da0543c2d1f848bf5e4c2950dd4f4543a1e0c",
}),
]
@ -55,7 +55,7 @@ class ImagefapGalleryExtractor(Extractor):
("title" , '<title>Porn pics of ', ' (Page 1)</title>'),
("uploader", '>Uploaded by ', '</font>'),
("count" , ' 1 of ', ' pics"'),
), values={"gallery-id": self.gid})
), values={"gallery_id": self.gid})
self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
data["title"] = text.unescape(data["title"])
return data
@ -74,7 +74,7 @@ class ImagefapGalleryExtractor(Extractor):
return
num += 1
_, imgid, name = imgurl.rsplit("/", 2)
data = {"image-id": imgid, "num": num}
data = {"image_id": imgid, "num": num}
yield imgurl, text.nameext_from_url(name, data)
params["idx"] += 24
@ -83,12 +83,12 @@ class ImagefapImageExtractor(Extractor):
"""Extractor for single images from imagefap.com"""
category = "imagefap"
subcategory = "image"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/photo/(\d+)"]
test = [("http://www.imagefap.com/photo/1616331218/", {
"url": "8a05c0ccdcf84e63c962803bc41d247628c549ea",
"keyword": "c9880c6731b3fdc6d98d25dbff56f4342c11683e",
"keyword": "c5023841c72b88949786c231f472f51453103185",
"content": "964b8c62c9d5c2a039a2fccf1b1e10aaf7a18a96",
})]
@ -113,8 +113,8 @@ class ImagefapImageExtractor(Extractor):
"date": info["datePublished"],
"width": info["width"],
"height": info["height"],
"gallery-id": parts[1],
"image-id": parts[2],
"gallery_id": parts[1],
"image_id": parts[2],
})
def load_json(self):
@ -136,8 +136,8 @@ class ImagefapUserExtractor(Extractor):
"""Extractor for all galleries from a user at imagefap.com"""
category = "imagefap"
subcategory = "user"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{name}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{name}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/"
r"profile(?:\.php\?user=|/)([^/]+)"),
(r"(?:https?://)?(?:www\.)?imagefap\.com/"
@ -161,7 +161,7 @@ class ImagefapUserExtractor(Extractor):
yield Message.Queue, "http://www.imagefap.com/gallery/" + gallery
def get_gallery_ids(self):
"""Yield all gallery-ids of a specific user"""
"""Yield all gallery_ids of a specific user"""
folders = self.get_gallery_folders()
url = "http://www.imagefap.com/ajax_usergallery_folder.php"
params = {"userid": self.user_id}
@ -171,7 +171,7 @@ class ImagefapUserExtractor(Extractor):
yield from text.extract_iter(page, '<a href="/gallery/', '"')
def get_gallery_folders(self):
"""Create a list of all folder-ids of a specific user"""
"""Create a list of all folder_ids of a specific user"""
if self.user:
url = "http://www.imagefap.com/profile/" + self.user + "/galleries"
else:

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,13 +17,13 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
"""Extractor for image galleries from imgbox.com"""
category = "imgbox"
subcategory = "gallery"
directory_fmt = ["{category}", "{title} - {gallery-key}"]
directory_fmt = ["{category}", "{title} - {gallery_key}"]
filename_fmt = "{num:>03}-{filename}"
pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"]
test = [
("https://imgbox.com/g/JaX5V5HX7g", {
"url": "6eafdeebaf0774238dddc9227e2ba315e40e9b7c",
"keyword": "cebd7f6868cf84ff492341c936cb6dbe5cde4682",
"keyword": "abe510221e1dc8c804296be25adf1498fb93f892",
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
}),
("https://imgbox.com/g/JaX5V5HX7h", {
@ -56,7 +56,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
title = text.extract(page, "<h1>", "</h1>")[0]
title, _, count = title.rpartition(" - ")
return {
"gallery-key": self.key,
"gallery_key": self.key,
"title": text.unescape(title),
"count": count[:-7],
}
@ -66,7 +66,7 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
return text.extract_all(page, (
("num" , '</a> &nbsp; ', ' of '),
(None , 'class="image-container"', ''),
("image-key", 'alt="', '"'),
("image_key", 'alt="', '"'),
("filename" , ' title="', '"'),
), values=self.metadata.copy())[0]
@ -86,7 +86,7 @@ class ImgboxImageExtractor(Extractor):
test = [
("https://imgbox.com/qHhw7lpG", {
"url": "b9556dc307edf88e016fbced6d354702bc236070",
"keyword": "ff0524dba869a4b3292d7d4f72f5da4024b4f002",
"keyword": "a5cdcdf6e784bb186ed65a0cd7978ae2d0e17a12",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("https://imgbox.com/qHhw7lpH", {
@ -104,7 +104,7 @@ class ImgboxImageExtractor(Extractor):
if not url:
raise exception.NotFoundError("image")
filename, pos = text.extract(page, ' title="', '"', pos)
data = text.nameext_from_url(filename, {"image-key": self.key})
data = text.nameext_from_url(filename, {"image_key": self.key})
yield Message.Version, 1
yield Message.Directory, data
yield Message.Url, "https://i." + url, data

@ -48,7 +48,7 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
test = [(("http://imgchili.net/show/89427/"
"89427136_test___quot;___gt;.png"), {
"url": "b93d92a6b58eb30a7ff6f9729cb748d25fea0c86",
"keyword": "376c4584dfae7d7d2e88687d4ee9618bbfd0a35c",
"keyword": "9c584f848766e4cc71d9e7f5f1f849e296ec05ae",
})]
def get_job_metadata(self, page):
@ -59,7 +59,7 @@ class ImgchiliImageExtractor(ImgchiliExtractor):
parts = name2.split("in the gallery ")
name = parts[0] if not parts[0].endswith("...") else name1
return text.nameext_from_url(name, {
"image-id": self.match.group(1),
"image_id": self.match.group(1),
"title": text.unescape(parts[-1]) if len(parts) > 1 else ""
})
@ -75,7 +75,7 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"]
test = [("http://imgchili.net/album/7a3824c59f77c8d39b260f9168d4b49b", {
"url": "995e32b62c36d48b02ef4c7a7a19463924391e2a",
"keyword": "2d065bd7f822de4c0b7598679f2730e0082a617e",
"keyword": "ae0c56cfd1fe032e5bc22f1188767b2a923ae25e",
})]
def get_job_metadata(self, page):
@ -95,5 +95,5 @@ class ImgchiliAlbumExtractor(ImgchiliExtractor):
return
imgid, pos = text.extract(page, ' alt="', '_', pos)
name , pos = text.extract(page, '<strong>', '</strong>', pos)
data = text.nameext_from_url(name, {"image-id": imgid, "num": num})
data = text.nameext_from_url(name, {"image_id": imgid, "num": num})
yield "http://i" + url + data["extension"], data

@ -16,12 +16,12 @@ class ImgthGalleryExtractor(Extractor):
"""Extractor for image galleries from imgth.com"""
category = "imgth"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"]
test = [("http://imgth.com/gallery/37/wallpaper-anime", {
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
"keyword": "3f268fcc18d49ac3799a8f25cc08053e90891955",
"keyword": "e62d14f20ded393d28c2789fcc34ea2c30bc6a7c",
})]
def __init__(self, match):
@ -62,4 +62,4 @@ class ImgthGalleryExtractor(Extractor):
("date" , 'created on ', ' by <'),
(None , 'href="/users/', ''),
("user" , '>', '<'),
), values={"gallery-id": self.gid})[0]
), values={"gallery_id": self.gid})[0]

@ -25,8 +25,8 @@ class KissmangaExtractor(Extractor):
"""Base class for kissmanga extractors"""
category = "kissmanga"
directory_fmt = ["{category}", "{manga}",
"c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"c{chapter:>03}{chapter_minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
"{page:>03}.{extension}")
root = "http://kissmanga.com"
@ -60,15 +60,15 @@ class KissmangaChapterExtractor(KissmangaExtractor):
test = [
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
"keyword": "ab332093a4f2e473a468235bfd624cbe3b19fd7f",
"keyword": "97cc9e513953e20d6309648df57a52a7ced59ae0",
}),
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
"url": "de074848f6c1245204bb9214c12bcc3ecfd65019",
"keyword": "013aad80e578c6ccd2e1fe47cdc27c12a64f6db2",
"keyword": "3d96653188b761752c38b60d6e397e2ace0ea04c",
}),
("http://kissmanga.com/Manga/Monster/Monster-79?id=7608", {
"url": "6abec8178f35fe7846586280ca9e38eacc32452c",
"keyword": "ca7a07ecfd9525c0f825dc747f520306611d6af9",
"keyword": "2ae18e456a4a7e4a2889af49d5f2e9c10fbc45e6",
}),
("http://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None),
]
@ -95,7 +95,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
"manga": manga,
"volume": match.group(1) or "",
"chapter": match.group(2) or match.group(5),
"chapter-minor": "."+chminor if chminor else "",
"chapter_minor": "."+chminor if chminor else "",
"title": match.group(4) or "",
"lang": "en",
"language": "English",

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,20 +16,20 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
"""Extractor for image albums from luscious.net"""
category = "luscious"
subcategory = "album"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/"
r"(?:c/[^/]+/)?(?:pictures/album|albums)/([^/]+_(\d+))")]
test = [
(("https://luscious.net/c/hentai_manga/albums/"
"okinami-no-koigokoro_277031/view/"), {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
"keyword": "8533c72ff85578240cf7594eb617d907bebf87ab",
"keyword": "76e099479b180420fd5cf820f00c52fe07fda884",
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
}),
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
"url": "01e2d7dd6eecea0152610f2446a6b1d60519c8bd",
"keyword": "6c8750df7f38ff4e15cabc9a3a2e876b84a328d6",
"keyword": "02624ff1097260e2a3c1b220afc92ea4c6b109b3",
}),
("https://luscious.net/albums/okinami-no-koigokoro_277031/", None),
]
@ -58,7 +58,7 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
(None , '<p>Section:', ''),
("section" , '>', '<'),
("language", '<p>Language:', ' '),
), values={"gallery-id": self.gid})[0]
), values={"gallery_id": self.gid})[0]
data["lang"] = util.language_to_code(data["language"])
try:
data["artist"] = text.extract(data["tags"], "rtist: ", ",")[0]
@ -84,6 +84,6 @@ class LusciousAlbumExtractor(AsynchronousExtractor):
"num": num,
"name": name,
"extension": iurl.rpartition(".")[2],
"image-id": imgid,
"image_id": imgid,
}
num += 1

@ -17,14 +17,14 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
"""Extractor for manga-chapters from mangafox.me"""
category = "mangafox"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter_minor}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?(mangafox\.me/manga/"
r"[^/]+/(v\d+/)?c\d+[^/]*)")]
test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
"v05/c006.2/1.html"), {
"keyword": "3bae0396e96868f5f24dff5e547a6bbfcbed7282",
"keyword": "ef2757d6136ef6b02eafe12d98a05f189fe8b2ba",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
})]
@ -59,7 +59,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter"])
data["volume"] = match.group(2) or ""
data["chapter"] = match.group(3)
data["chapter-minor"] = match.group(4) or ""
data["chapter_minor"] = match.group(4) or ""
data["manga"] = data["manga"].rpartition(" ")[0]
return data

@ -32,13 +32,13 @@ class MangahereChapterExtractor(AsynchronousExtractor):
"""Extractor for manga-chapters from mangahere.co"""
category = "mangahere"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter_minor}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
"keyword": "d3fe470e934a9f02ed00d4391b1743970eae82fa",
"keyword": "8cb9f9512b68d2cdcbea2419592b9247304c149b",
"content": "dd8454469429c6c717cbc3cad228e76ef8c6e420",
})]
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
@ -74,8 +74,8 @@ class MangahereChapterExtractor(AsynchronousExtractor):
# "title": TODO,
"volume": self.volume or "",
"chapter": self.chapter,
"chapter-minor": self.chminor or "",
"chapter-id": chid,
"chapter_minor": self.chminor or "",
"chapter_id": chid,
"count": count,
"lang": "en",
"language": "English",

@ -35,5 +35,5 @@ class MangapandaChapterExtractor(MangapandaBase, MangareaderChapterExtractor):
]
test = [("http://www.mangapanda.com/red-storm/2", {
"url": "4bf4ddf6c50105ec8a37675495ab80c46608275d",
"keyword": "89c712f7ed255ec9c1d8e84dcb5a160b6cb4498c",
"keyword": "16c881a09f5cf0f2c1c374665b667d1f26b20c22",
})]

@ -32,24 +32,24 @@ class MangaparkChapterExtractor(Extractor):
category = "mangapark"
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}",
"c{chapter:>03}{chapter-minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_"
"c{chapter:>03}{chapter_minor} - {title}"]
filename_fmt = ("{manga}_c{chapter:>03}{chapter_minor}_"
"{page:>03}.{extension}")
pattern = [(r"(?:https?://)?(?:www\.)?mangapark\.me/manga/"
r"([^/]+/s(\d+)(?:/v([^/]+))?/c(\d+)(?:([^/]+)|/e(\d+))?)")]
test = [
("http://mangapark.me/manga/gosu/s2/c55", {
"count": 50,
"keyword": "bd97ca24ef344b44292910384215ef3f1005ea2e",
"keyword": "b2216c0b8621a86be51eced72a2a61ba9f47e11e",
}),
(("http://mangapark.me/manga/"
"ad-astra-per-aspera-hata-kenjirou/s1/c1.2"), {
"count": 40,
"keyword": "f28eb26b4966bebda0e761f241c2dd49e505ce13",
"keyword": "257d90d582b8f259380a61a0774ff2c4ae5b3986",
}),
("http://mangapark.me/manga/gekkan-shoujo-nozaki-kun/s2/c70/e2/1", {
"count": 15,
"keyword": "34aa6ca3bdf5078f839cbf68ff68e39728cf248b",
"keyword": "cf5329984c062058133e12ee3fe4a53fb734ed4c",
}),
]
@ -80,13 +80,13 @@ class MangaparkChapterExtractor(Extractor):
"version": self.version,
"volume": self.volume or "",
"chapter": self.chapter,
"chapter-minor": self.chminor or "",
"chapter_minor": self.chminor or "",
"lang": "en",
"language": "English",
}
data = text.extract_all(page, (
("manga-id" , "var _manga_id = '", "'"),
("chapter-id", "var _book_id = '", "'"),
("manga_id" , "var _manga_id = '", "'"),
("chapter_id", "var _book_id = '", "'"),
("manga" , "<h2>", "</h2>"),
("title" , "</a>", "<"),
(None , 'target="_blank" href="', ''),

@ -45,7 +45,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
test = [(("http://www.mangareader.net/"
"karate-shoukoushi-kohinata-minoru/11"), {
"url": "84ffaab4c027ef9022695c53163c3aeabd07ca58",
"keyword": "09b4ad57a082eb371dec027ccfc8ed1157c6eac6",
"keyword": "05ef372e80257726166f78625cb78a09e6d9b1d1",
})]
def __init__(self, match):
@ -77,14 +77,14 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
(None, '<td class="propertytitle">Name:', ''),
("manga", '<h2 class="aname">', '</h2>'),
(None, '<td class="propertytitle">Year of Release:', ''),
('manga-release', '<td>', '</td>'),
('release', '<td>', '</td>'),
(None, '<td class="propertytitle">Author:', ''),
('author', '<td>', '</td>'),
(None, '<td class="propertytitle">Artist:', ''),
('artist', '<td>', '</td>'),
(None, '<div id="readmangasum">', ''),
('title', ' ' + self.chapter + '</a> : ', '</td>'),
('chapter-date', '<td>', '</td>'),
('date', '<td>', '</td>'),
), values=data)
data, _ = text.extract_all(chapter_page, (
(None, '<select id="pageMenu"', ''),

@ -17,12 +17,12 @@ class NhentaiGalleryExtractor(Extractor):
"""Extractor for image galleries from nhentai.net"""
category = "nhentai"
subcategory = "gallery"
directory_fmt = ["{category}", "{gallery-id} {title}"]
filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}"
directory_fmt = ["{category}", "{gallery_id} {title}"]
filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"]
test = [("http://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
"keyword": "574e36436a1c01c82e5779207e44e4e78d0e1726",
"keyword": "82751294e75fc203b019ffd94d8c1f94a5b86494",
})]
def __init__(self, match):
@ -33,7 +33,7 @@ class NhentaiGalleryExtractor(Extractor):
ginfo = self.get_gallery_info()
data = self.get_job_metadata(ginfo)
urlfmt = "{}galleries/{}/{{}}.{{}}".format(
ginfo["media_url"], data["media-id"])
ginfo["media_url"], data["media_id"])
extdict = {"j": "jpg", "p": "png", "g": "gif"}
yield Message.Version, 1
yield Message.Directory, data
@ -60,12 +60,12 @@ class NhentaiGalleryExtractor(Extractor):
title_en = ginfo["title"].get("english", "")
title_ja = ginfo["title"].get("japanese", "")
return {
"gallery-id": self.gid,
"upload-date": ginfo["upload_date"],
"media-id": ginfo["media_id"],
"gallery_id": self.gid,
"upload_date": ginfo["upload_date"],
"media_id": ginfo["media_id"],
"scanlator": ginfo["scanlator"],
"count": ginfo["num_pages"],
"title": title_en or title_ja,
"title-en": title_en,
"title-ja": title_ja,
"title_en": title_en,
"title_ja": title_ja,
}

@ -16,8 +16,8 @@ from ..cache import cache
class NijieExtractor(AsynchronousExtractor):
"""Base class for nijie extractors"""
category = "nijie"
directory_fmt = ["{category}", "{artist-id}"]
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
directory_fmt = ["{category}", "{artist_id}"]
filename_fmt = "{category}_{artist_id}_{image_id}_p{index:>02}.{extension}"
cookiedomain = "nijie.info"
popup_url = "https://nijie.info/view_popup.php?id="
@ -39,7 +39,7 @@ class NijieExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {"artist-id": self.artist_id}
return {"artist_id": self.artist_id}
def get_image_ids(self):
"""Collect all image-ids for a specific artist"""
@ -58,7 +58,7 @@ class NijieExtractor(AsynchronousExtractor):
yield "https://pic" + url, text.nameext_from_url(url, {
"count": len(images),
"index": index,
"image-id": image_id,
"image_id": image_id,
})
def login(self):
@ -87,7 +87,7 @@ class NijieUserExtractor(NijieExtractor):
test = [
("https://nijie.info/members_illust.php?id=44", {
"url": "585d821df4716b1098660a0be426d01db4b65f2a",
"keyword": "7a2dbf8fc0dfdb2af208ecdb8ec7f3186bdc31ab",
"keyword": "804d3a9bb8205048ac0d1fe8eec39266b50f1e8e",
}),
("https://nijie.info/members_illust.php?id=43", {
"exception": exception.NotFoundError,
@ -119,7 +119,7 @@ class NijieImageExtractor(NijieExtractor):
test = [
("https://nijie.info/view.php?id=70720", {
"url": "a10d4995645b5f260821e32c60a35f73546c2699",
"keyword": "e454c2bad9b636b90d569881bf4fe8438506e0d2",
"keyword": "4ecfd46460761b7a89fdba815eece10e917032c2",
"content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
}),
("https://nijie.info/view.php?id=70724", {

@ -15,7 +15,7 @@ from .. import text, exception
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
filename_fmt = "{category}_{pin-id}.{extension}"
filename_fmt = "{category}_{pin_id}.{extension}"
def __init__(self):
Extractor.__init__(self)
@ -26,7 +26,7 @@ class PinterestExtractor(Extractor):
img = pin["image"]["original"]
url = img["url"]
data = {
"pin-id": pin["id"],
"pin_id": pin["id"],
"note": pin["note"],
"width": img["width"],
"height": img["height"],
@ -41,7 +41,7 @@ class PinterestPinExtractor(PinterestExtractor):
test = [
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "7abf2be76bf03d452feacf6e000b040fc2706b80",
"keyword": "e1a2ce625ece86f0b31f0ae94a3af3d72e6454b9",
"keyword": "5aac8028244b865824c61667f6cadd51e8765853",
"content": "d3e24bc9f7af585e8c23b9136956bd45a4d9b947",
}),
("https://www.pinterest.com/pin/858146903966145188/", {
@ -70,7 +70,7 @@ class PinterestBoardExtractor(PinterestExtractor):
test = [
("https://www.pinterest.com/g1952849/test-/", {
"url": "705ee521630a5d613b0449d694a5345e684572a9",
"keyword": "2815716747f84fa0a4047d29d71df8ae96a0e177",
"keyword": "1650dd31c4dedd940cef399135e485400625ec0b",
"content": "30897fb5d5616765bb2c9c26cb84f54499424fb4",
}),
("https://www.pinterest.com/g1952848/test/", {
@ -99,7 +99,7 @@ class PinterestBoardExtractor(PinterestExtractor):
"""Get metadata from a board-object"""
data = {
"user": self.user,
"board-id": board["id"],
"board_id": board["id"],
"board": board["name"],
"count": board["counts"]["pins"],
}

@ -39,7 +39,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
yield Message.Directory, data
for image in self.get_images():
image.update(data)
yield Message.Url, image["file-url"], image
yield Message.Url, image["file_url"], image
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
@ -75,7 +75,7 @@ class SankakuTagExtractor(AsynchronousExtractor):
height , pos = text.extract(page, '', ' ', pos)
data = text.nameext_from_url(image_url, {
"id": image_id,
"file-url": "https:" + text.unescape(image_url),
"file_url": "https:" + text.unescape(image_url),
"width": width,
"height": height,
})

@ -27,7 +27,7 @@ class SeigaExtractor(Extractor):
for image in self.get_images():
data.update(image)
data["extension"] = None
url = self.get_image_url(image["image-id"])
url = self.get_image_url(image["image_id"])
yield Message.Url, url, data
def get_metadata(self):
@ -68,17 +68,17 @@ class SeigaExtractor(Extractor):
class SeigaUserExtractor(SeigaExtractor):
"""Extractor for images of a user from seiga.nicovideo.jp"""
subcategory = "user"
directory_fmt = ["{category}", "{user-id}"]
filename_fmt = "{category}_{user-id}_{image-id}.{extension}"
directory_fmt = ["{category}", "{user_id}"]
filename_fmt = "{category}_{user_id}_{image_id}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
r"user/illust/(\d+)")]
test = [
("http://seiga.nicovideo.jp/user/illust/39537793", {
"keyword": "15e4158164f9309c75c0f97169e0026b13a642ed",
"keyword": "a716bf534b4191dc58ddbff51494b72a9cf58285",
}),
("http://seiga.nicovideo.jp/user/illust/79433", {
"url": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"keyword": "82b330a4d1e8a2cd47ee934a0a40829232b49cdc",
"keyword": "187b77728381d072466af7f7ebcc479a0830ce25",
}),
]
@ -87,10 +87,10 @@ class SeigaUserExtractor(SeigaExtractor):
self.user_id = match.group(1)
def get_metadata(self):
return {"user-id": self.user_id}
return {"user_id": self.user_id}
def get_images(self):
keymap = {0: "image-id", 2: "title", 3: "description",
keymap = {0: "image_id", 2: "title", 3: "description",
7: "summary", 8: "genre", 18: "date"}
url = "http://seiga.nicovideo.jp/api/user/data?id=" + self.user_id
response = self.request(url)
@ -114,14 +114,14 @@ class SeigaUserExtractor(SeigaExtractor):
class SeigaImageExtractor(SeigaExtractor):
"""Extractor for single images from seiga.nicovideo.jp"""
subcategory = "image"
filename_fmt = "{category}_{image-id}.{extension}"
filename_fmt = "{category}_{image_id}.{extension}"
pattern = [(r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
r"(?:seiga/im|image/source/)(\d+)"),
(r"(?:https?://)?lohas\.nicoseiga\.jp/"
r"(?:priv|o)/[^/]+/\d+/(\d+)")]
test = [
("http://seiga.nicovideo.jp/seiga/im5977527", {
"keyword": "3b61d2fc26efb74547f47c522051cf3596ff6b62",
"keyword": "6ff7564b35890e333ff7413cb633ddb58339912f",
"content": "d9202292012178374d57fb0126f6124387265297",
}),
("http://seiga.nicovideo.jp/seiga/im123", {
@ -134,4 +134,4 @@ class SeigaImageExtractor(SeigaExtractor):
self.image_id = match.group(1)
def get_images(self):
return ({"image-id": self.image_id},)
return ({"image_id": self.image_id},)

@ -22,7 +22,7 @@ class TumblrUserExtractor(Extractor):
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com(?:/page/\d+)?/?$"]
test = [("http://demo.tumblr.com/", {
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
"keyword": "d2cf142bcaf1cbea29291f8c8ccb5f582962d8be",
"keyword": "8f1b06c2a0a562b10df3e62ab2a8156e3da1855b",
})]
def __init__(self, match):
@ -42,6 +42,8 @@ class TumblrUserExtractor(Extractor):
image.update(data)
image = text.nameext_from_url(url, image)
image["hash"] = text.extract(image["name"], "_", "_")[0]
image = {key.replace("-", "_"): value
for key, value in image.items()}
yield Message.Url, url, image
def get_job_metadata(self, image_data):
@ -100,7 +102,7 @@ class TumblrPostExtractor(TumblrUserExtractor):
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/post/(\d+)"]
test = [("http://demo.tumblr.com/post/459265350", {
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
"keyword": "ce211deb8e3936e2202f3f82f38375fd14781b79",
"keyword": "4d5bc44bf8ec334fdaf78696edf215574fa6d998",
})]
def __init__(self, match):
@ -114,7 +116,7 @@ class TumblrTagExtractor(TumblrUserExtractor):
pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/tagged/(.+)"]
test = [("http://demo.tumblr.com/tagged/Times Square", {
"url": "5c113da25a605b7449de8ca1606eec5502b4dc9f",
"keyword": "f36901e86c5d20affbe66f78c6b5717d34466fc4",
"keyword": "b0465d131ecb097633127b79805432dacae06d14",
})]
def __init__(self, match):

@ -17,18 +17,18 @@ class TwitterTweetExtractor(Extractor):
category = "twitter"
subcategory = "tweet"
directory_fmt = ["{category}", "{user}"]
filename_fmt = "{tweet-id}_{num}.{extension}"
filename_fmt = "{tweet_id}_{num}.{extension}"
pattern = [r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/"
r"(([^/]+)/status/(\d+))"]
test = [
("https://twitter.com/PicturesEarth/status/672897688871018500", {
"url": "d9e68d41301d2fe382eb27711dea28366be03b1a",
"keyword": "3cd8e27026a2112008985b1b53f5e4baf4616177",
"keyword": "7a6eac2bc88bbf16d0671ebb38e31f708d940ee8",
"content": "a1f2f04cb2d8df24b1afa7a39910afda23484342",
}),
("https://twitter.com/perrypumas/status/894001459754180609", {
"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
"keyword": "8438551b34caf2f580ba23f6014509c8dd5e1e0f",
"keyword": "334cd0c1f85c3e66923b44740f17407ce444931e",
}),
]
@ -54,7 +54,7 @@ class TwitterTweetExtractor(Extractor):
"""Collect metadata for extractor-job"""
return {
"user": self.user,
"tweet-id": self.tid,
"tweet_id": self.tid,
}
@staticmethod

@ -22,11 +22,11 @@ class WarosuThreadExtractor(Extractor):
test = [
("https://warosu.org/jp/thread/16656025", {
"url": "889d57246ed67e491e5b8f7f124e50ea7991e770",
"keyword": "dab56209e31634b44eb99a2cdd85fa922c726b4f",
"keyword": "65607b4630d87767465a5985c81cfa594913c073",
}),
("https://warosu.org/jp/thread/16658073", {
"url": "4500cf3184b067424fd9883249bd543c905fbecd",
"keyword": "084369b27b8cfc08a2276e00a4be6ffd7b1e5088",
"keyword": "d88ea2280201a7b04256c852733faff7272d7d11",
"content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
}),
]
@ -59,7 +59,7 @@ class WarosuThreadExtractor(Extractor):
title = text.extract(page, 'filetitle" itemprop="name">', '<')[0]
return {
"board": self.board,
"board-name": boardname.rpartition(" - ")[2],
"board_name": boardname.rpartition(" - ")[2],
"thread": self.thread,
"title": title,
}

Loading…
Cancel
Save