[hatenablog] fix extractor naming errors

pull/5037/head
blankie 9 months ago
parent be6949c55d
commit 2cfe788f93
No known key found for this signature in database
GPG Key ID: CC15FC822C7F61F5

@ -19,7 +19,7 @@ BASE_PATTERN = (
QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$" QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$"
class HatenaBlogExtractor(Extractor): class HatenablogExtractor(Extractor):
"""Base class for HatenaBlog extractors""" """Base class for HatenaBlog extractors"""
category = "hatenablog" category = "hatenablog"
directory_fmt = ("{category}", "{domain}") directory_fmt = ("{category}", "{domain}")
@ -65,12 +65,12 @@ class HatenaBlogExtractor(Extractor):
yield Message.Url, url, text.nameext_from_url(url, data) yield Message.Url, url, text.nameext_from_url(url, data)
class HatenaBlogEntriesExtractor(HatenaBlogExtractor): class HatenablogEntriesExtractor(HatenablogExtractor):
"""Base class for a list of entries""" """Base class for a list of entries"""
allowed_parameters = () allowed_parameters = ()
def __init__(self, match): def __init__(self, match):
HatenaBlogExtractor.__init__(self, match) HatenablogExtractor.__init__(self, match)
self.path = match.group(3) self.path = match.group(3)
self.query = {key: value for key, value in text.parse_query( self.query = {key: value for key, value in text.parse_query(
match.group(4)).items() if self._acceptable_query(key)} match.group(4)).items() if self._acceptable_query(key)}
@ -103,7 +103,7 @@ class HatenaBlogEntriesExtractor(HatenaBlogExtractor):
url = "hatenablog:" + text.unescape(text.extr( url = "hatenablog:" + text.unescape(text.extr(
section, '<a class="entry-title-link" href="', '"')) section, '<a class="entry-title-link" href="', '"'))
data = {"_extractor": HatenaBlogEntryExtractor} data = {"_extractor": HatenablogEntryExtractor}
yield Message.Queue, url, data yield Message.Queue, url, data
def _handle_full_articles(self, extr): def _handle_full_articles(self, extr):
@ -121,14 +121,14 @@ class HatenaBlogEntriesExtractor(HatenaBlogExtractor):
return key == "page" or key in self.allowed_parameters return key == "page" or key in self.allowed_parameters
class HatenaBlogEntryExtractor(HatenaBlogExtractor): class HatenablogEntryExtractor(HatenablogExtractor):
"""Extractor for a single entry URL""" """Extractor for a single entry URL"""
subcategory = "entry" subcategory = "entry"
pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
example = "https://BLOG.hatenablog.com/entry/PATH" example = "https://BLOG.hatenablog.com/entry/PATH"
def __init__(self, match): def __init__(self, match):
HatenaBlogExtractor.__init__(self, match) HatenablogExtractor.__init__(self, match)
self.path = match.group(3) self.path = match.group(3)
def items(self): def items(self):
@ -144,14 +144,14 @@ class HatenaBlogEntryExtractor(HatenaBlogExtractor):
return self._handle_article(article) return self._handle_article(article)
class HatenaBlogHomeExtractor(HatenaBlogEntriesExtractor): class HatenablogHomeExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's home page""" """Extractor for a blog's home page"""
subcategory = "home" subcategory = "home"
pattern = BASE_PATTERN + r"(/?)" + QUERY_RE pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
example = "https://BLOG.hatenablog.com" example = "https://BLOG.hatenablog.com"
class HatenaBlogArchiveExtractor(HatenaBlogEntriesExtractor): class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's archive page""" """Extractor for a blog's archive page"""
subcategory = "archive" subcategory = "archive"
pattern = BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?" + \ pattern = BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?" + \
@ -159,7 +159,7 @@ class HatenaBlogArchiveExtractor(HatenaBlogEntriesExtractor):
example = "https://BLOG.hatenablog.com/archive/2024" example = "https://BLOG.hatenablog.com/archive/2024"
class HatenaBlogSearchExtractor(HatenaBlogEntriesExtractor): class HatenablogSearchExtractor(HatenablogEntriesExtractor):
"""Extractor for a blog's search results""" """Extractor for a blog's search results"""
subcategory = "search" subcategory = "search"
pattern = BASE_PATTERN + r"(/search)" + QUERY_RE pattern = BASE_PATTERN + r"(/search)" + QUERY_RE

@ -11,38 +11,38 @@ __tests__ = (
{ {
"#url" : "https://cosmiclatte.hatenablog.com/entry/2020/05/28/003227", "#url" : "https://cosmiclatte.hatenablog.com/entry/2020/05/28/003227",
"#category": ("", "hatenablog", "entry"), "#category": ("", "hatenablog", "entry"),
"#class" : hatenablog.HatenaBlogEntryExtractor, "#class" : hatenablog.HatenablogEntryExtractor,
"#count" : 20, "#count" : 20,
}, },
{ {
"#url" : "https://moko0908.hatenablog.jp/entry/2023/12/31/083846", "#url" : "https://moko0908.hatenablog.jp/entry/2023/12/31/083846",
"#category": ("", "hatenablog", "entry"), "#category": ("", "hatenablog", "entry"),
"#class" : hatenablog.HatenaBlogEntryExtractor, "#class" : hatenablog.HatenablogEntryExtractor,
}, },
{ {
"#url" : "https://p-shirokuma.hatenadiary.com/entry/20231227/1703685600", "#url" : "https://p-shirokuma.hatenadiary.com/entry/20231227/1703685600",
"#category": ("", "hatenablog", "entry"), "#category": ("", "hatenablog", "entry"),
"#class" : hatenablog.HatenaBlogEntryExtractor, "#class" : hatenablog.HatenablogEntryExtractor,
}, },
{ {
"#url" : "https://urakatahero.hateblo.jp/entry/2ndlife", "#url" : "https://urakatahero.hateblo.jp/entry/2ndlife",
"#category": ("", "hatenablog", "entry"), "#category": ("", "hatenablog", "entry"),
"#class" : hatenablog.HatenaBlogEntryExtractor, "#class" : hatenablog.HatenablogEntryExtractor,
}, },
{ {
"#url" : "hatenablog:https://blog.hyouhon.com/entry/2023/12/22/133549", "#url" : "hatenablog:https://blog.hyouhon.com/entry/2023/12/22/133549",
"#category": ("", "hatenablog", "entry"), "#category": ("", "hatenablog", "entry"),
"#class" : hatenablog.HatenaBlogEntryExtractor, "#class" : hatenablog.HatenablogEntryExtractor,
}, },
{ {
"#url" : "https://cetriolo.hatenablog.com", "#url" : "https://cetriolo.hatenablog.com",
"#category": ("", "hatenablog", "home"), "#category": ("", "hatenablog", "home"),
"#class" : hatenablog.HatenaBlogHomeExtractor, "#class" : hatenablog.HatenablogHomeExtractor,
"#range" : "1-7", "#range" : "1-7",
"#count" : 7, "#count" : 7,
}, },
@ -50,25 +50,25 @@ __tests__ = (
{ {
"#url" : "https://moko0908.hatenablog.jp/", "#url" : "https://moko0908.hatenablog.jp/",
"#category": ("", "hatenablog", "home"), "#category": ("", "hatenablog", "home"),
"#class" : hatenablog.HatenaBlogHomeExtractor, "#class" : hatenablog.HatenablogHomeExtractor,
}, },
{ {
"#url" : "https://p-shirokuma.hatenadiary.com/", "#url" : "https://p-shirokuma.hatenadiary.com/",
"#category": ("", "hatenablog", "home"), "#category": ("", "hatenablog", "home"),
"#class" : hatenablog.HatenaBlogHomeExtractor, "#class" : hatenablog.HatenablogHomeExtractor,
}, },
{ {
"#url" : "https://urakatahero.hateblo.jp/", "#url" : "https://urakatahero.hateblo.jp/",
"#category": ("", "hatenablog", "home"), "#category": ("", "hatenablog", "home"),
"#class" : hatenablog.HatenaBlogHomeExtractor, "#class" : hatenablog.HatenablogHomeExtractor,
}, },
{ {
"#url" : "hatenablog:https://blog.hyouhon.com/", "#url" : "hatenablog:https://blog.hyouhon.com/",
"#category": ("", "hatenablog", "home"), "#category": ("", "hatenablog", "home"),
"#class" : hatenablog.HatenaBlogHomeExtractor, "#class" : hatenablog.HatenablogHomeExtractor,
}, },
{ {
@ -76,7 +76,7 @@ __tests__ = (
"%83%AB%E3%83%95%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%8D%E3%82" "%83%AB%E3%83%95%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%8D%E3%82"
"%A4%E3%83%AB"), "%A4%E3%83%AB"),
"#category": ("", "hatenablog", "archive"), "#category": ("", "hatenablog", "archive"),
"#class" : hatenablog.HatenaBlogArchiveExtractor, "#class" : hatenablog.HatenablogArchiveExtractor,
"#range" : "1-30", "#range" : "1-30",
"#count" : 30, "#count" : 30,
}, },
@ -84,21 +84,21 @@ __tests__ = (
{ {
"#url" : "https://moko0908.hatenablog.jp/archive/2023", "#url" : "https://moko0908.hatenablog.jp/archive/2023",
"#category": ("", "hatenablog", "archive"), "#category": ("", "hatenablog", "archive"),
"#class" : hatenablog.HatenaBlogArchiveExtractor, "#class" : hatenablog.HatenablogArchiveExtractor,
"#count" : 13, "#count" : 13,
}, },
{ {
"#url" : "https://p-shirokuma.hatenadiary.com/archive/2023/01", "#url" : "https://p-shirokuma.hatenadiary.com/archive/2023/01",
"#category": ("", "hatenablog", "archive"), "#category": ("", "hatenablog", "archive"),
"#class" : hatenablog.HatenaBlogArchiveExtractor, "#class" : hatenablog.HatenablogArchiveExtractor,
"#count" : 5, "#count" : 5,
}, },
{ {
"#url" : "https://urakatahero.hateblo.jp/archive", "#url" : "https://urakatahero.hateblo.jp/archive",
"#category": ("", "hatenablog", "archive"), "#category": ("", "hatenablog", "archive"),
"#class" : hatenablog.HatenaBlogArchiveExtractor, "#class" : hatenablog.HatenablogArchiveExtractor,
"#range" : "1-30", "#range" : "1-30",
"#count" : 30, "#count" : 30,
}, },
@ -106,13 +106,13 @@ __tests__ = (
{ {
"#url" : "hatenablog:https://blog.hyouhon.com/archive/2024/01/01", "#url" : "hatenablog:https://blog.hyouhon.com/archive/2024/01/01",
"#category": ("", "hatenablog", "archive"), "#category": ("", "hatenablog", "archive"),
"#class" : hatenablog.HatenaBlogArchiveExtractor, "#class" : hatenablog.HatenablogArchiveExtractor,
}, },
{ {
"#url" : "hatenablog:https://blog.hyouhon.com/search?q=a", "#url" : "hatenablog:https://blog.hyouhon.com/search?q=a",
"#category": ("", "hatenablog", "search"), "#category": ("", "hatenablog", "search"),
"#class" : hatenablog.HatenaBlogSearchExtractor, "#class" : hatenablog.HatenablogSearchExtractor,
"#range" : "1-30", "#range" : "1-30",
"#count" : 30, "#count" : 30,
}, },
@ -120,25 +120,25 @@ __tests__ = (
{ {
"#url" : "https://cosmiclatte.hatenablog.com/search?q=a", "#url" : "https://cosmiclatte.hatenablog.com/search?q=a",
"#category": ("", "hatenablog", "search"), "#category": ("", "hatenablog", "search"),
"#class" : hatenablog.HatenaBlogSearchExtractor, "#class" : hatenablog.HatenablogSearchExtractor,
}, },
{ {
"#url" : "https://moko0908.hatenablog.jp/search?q=a", "#url" : "https://moko0908.hatenablog.jp/search?q=a",
"#category": ("", "hatenablog", "search"), "#category": ("", "hatenablog", "search"),
"#class" : hatenablog.HatenaBlogSearchExtractor, "#class" : hatenablog.HatenablogSearchExtractor,
}, },
{ {
"#url" : "https://p-shirokuma.hatenadiary.com/search?q=a", "#url" : "https://p-shirokuma.hatenadiary.com/search?q=a",
"#category": ("", "hatenablog", "search"), "#category": ("", "hatenablog", "search"),
"#class" : hatenablog.HatenaBlogSearchExtractor, "#class" : hatenablog.HatenablogSearchExtractor,
}, },
{ {
"#url" : "https://urakatahero.hateblo.jp/search?q=a", "#url" : "https://urakatahero.hateblo.jp/search?q=a",
"#category": ("", "hatenablog", "search"), "#category": ("", "hatenablog", "search"),
"#class" : hatenablog.HatenaBlogSearchExtractor, "#class" : hatenablog.HatenablogSearchExtractor,
}, },
) )

Loading…
Cancel
Save