code cleanup and fixes

pull/30/head
Mike Fährmann 7 years ago
parent f08af03845
commit 7aa9fa796a
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -26,7 +26,7 @@ class BatotoExtractor():
"""Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info()
username, password = self._get_auth_info()
if username:
cookies = self._login_impl(username, password)
for key, value in cookies.items():

@ -79,7 +79,6 @@ class FoolfuukaThreadExtractor(Extractor):
def items(self):
op = True
yield Message.Version, 1
yield Message.Headers, self.session.headers
for post in self.posts():
if op:
yield Message.Directory, post

@ -32,20 +32,7 @@ class Extractor():
def __init__(self):
self.session = requests.Session()
self.log = logging.getLogger(self.category)
cookies = self.config("cookies")
if cookies:
if isinstance(cookies, dict):
setcookie = self.session.cookies.set
for name, value in cookies.items():
setcookie(name, value, domain=self.cookiedomain)
else:
try:
cj = http.cookiejar.MozillaCookieJar()
cj.load(cookies)
self.session.cookies.update(cj)
except OSError as exc:
self.log.warning("cookies: %s", exc)
self._set_cookies(self.config("cookies"))
def __iter__(self):
return self.items()
@ -60,7 +47,13 @@ class Extractor():
return config.interpolate(
("extractor", self.category, self.subcategory, key), default)
def auth_info(self):
def request(self, url, encoding=None, *args, **kwargs):
response = safe_request(self.session, url, *args, **kwargs)
if encoding:
response.encoding = encoding
return response
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")
password = None
@ -78,14 +71,23 @@ class Extractor():
return username, password
def request(self, url, encoding=None, *args, **kwargs):
response = safe_request(self.session, url, *args, **kwargs)
if encoding:
response.encoding = encoding
return response
def _set_cookies(self, cookies):
"""Populate the cookiejar with 'cookies'"""
if cookies:
if isinstance(cookies, dict):
setcookie = self.session.cookies.set
for name, value in cookies.items():
setcookie(name, value, domain=self.cookiedomain)
else:
try:
cj = http.cookiejar.MozillaCookieJar()
cj.load(cookies)
self.session.cookies.update(cj)
except OSError as exc:
self.log.warning("cookies: %s", exc)
def _check_cookies(self, cookienames, domain=None):
"""Return True if all 'cookienames' exist in the current session"""
"""Check if all 'cookienames' are in the session's cookiejar"""
if not domain and self.cookiedomain:
domain = self.cookiedomain
for name in cookienames:

@ -180,7 +180,7 @@ class ExhentaiGalleryExtractor(Extractor):
"""Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info()
username, password = self._get_auth_info()
if not username:
self.log.info("no username given; using e-hentai.org")
self.root = "https://e-hentai.org"

@ -63,7 +63,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
manga, pos = text.extract(page, '<title>', '</title>')
chid , pos = text.extract(page, 'h.mhcdn.net/store/manga/', '/', pos)
chid , pos = text.extract(page, '.mhcdn.net/store/manga/', '/', pos)
_ , pos = text.extract(page, '<select class="wid60"', '', pos)
_ , pos = text.extract(page, '</select>', '', pos)
count, pos = text.extract(page, '>', '<', pos-30)

@ -63,10 +63,9 @@ class NijieExtractor(AsynchronousExtractor):
def login(self):
"""Login and obtain session cookie"""
if self._check_cookies(("nemail", "nlogin")):
return
username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password)
if not self._check_cookies(("nemail", "nlogin")):
username, password = self._get_auth_info()
self.session.cookies = self._login_impl(username, password)
@cache(maxage=30*24*60*60, keyarg=1)
def _login_impl(self, username, password):

@ -269,7 +269,7 @@ class PixivAPI():
def __init__(self, extractor):
self.session = extractor.session
self.log = extractor.log
self.username, self.password = extractor.auth_info()
self.username, self.password = extractor._get_auth_info()
self.user_info = None
self.session.headers.update({
"Referer": "https://www.pixiv.net/",

@ -48,10 +48,9 @@ class SeigaExtractor(Extractor):
def login(self):
"""Login and set necessary cookies"""
if self._check_cookies(("user_session",)):
return
username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password)
if not self._check_cookies(("user_session",)):
username, password = self._get_auth_info()
self.session.cookies = self._login_impl(username, password)
@cache(maxage=7*24*60*60, keyarg=1)
def _login_impl(self, username, password):

@ -43,6 +43,7 @@ class TestCookiejar(unittest.TestCase):
@classmethod
def tearDownClass(cls):
cls.path.cleanup()
config.clear()
def test_cookiefile(self):
config.set(CKEY, self.cookiefile)
@ -79,6 +80,9 @@ class TestCookiedict(unittest.TestCase):
self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
config.set(CKEY, self.cdict)
def tearDown(self):
config.clear()
def test_dict(self):
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), len(self.cdict))
@ -97,6 +101,9 @@ class TestCookiedict(unittest.TestCase):
class TestCookieLogin(unittest.TestCase):
def tearDown(self):
config.clear()
def test_cookie_login(self):
extr_cookies = {
"batoto": ("member_id", "pass_hash"),

@ -23,6 +23,9 @@ class TestExtractors(unittest.TestCase):
config.set(("extractor", "nijie", "username"), email)
config.set(("extractor", "seiga", "username"), email)
def tearDown(self):
config.clear()
def _run_test(self, extr, url, result):
content = "content" in result if result else False
tjob = job.TestJob(url, content)

Loading…
Cancel
Save