@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
@ -15,16 +13,30 @@ from .. import text, exception
class MyhentaigalleryGalleryExtractor ( GalleryExtractor ) :
""" Extractor for image galleries from myhentaigallery.com """
category = " myhentaigallery "
directory_fmt = ( " {category} " , " {gallery_id} [{artist} ] {title} " )
pattern = ( r " (?:https?://)? ( myhentaigallery\ .com "
r " /gallery/ thumbnails/[0-9] +)" )
directory_fmt = ( " {category} " , " {gallery_id} { artist:?[/] /J, } {title} " )
pattern = ( r " (?:https?://)? myhentaigallery\ .com "
r " /gallery/ (?:thumbnails|show)/(\ d +)" )
test = (
( " https://myhentaigallery.com/gallery/thumbnails/16247 " ) ,
( " https://myhentaigallery.com/gallery/thumbnails/15224 " ) ,
( " https://myhentaigallery.com/gallery/thumbnails/16247 " , {
" pattern " : r " https://images.myhentaigrid.com/imagesgallery/images "
r " /[^/]+/original/ \ d+ \ .jpg " ,
" keyword " : {
" artist " : list ,
" count " : 11 ,
" gallery_id " : 16247 ,
" group " : list ,
" parodies " : list ,
" tags " : [ " Giantess " ] ,
" title " : " Attack Of The 50ft Woman 1 " ,
} ,
} ) ,
( " https://myhentaigallery.com/gallery/show/16247/1 " ) ,
)
root = " https://myhentaigallery.com "
def __init__ ( self , match ) :
url = " https:// " + match . group ( 1 )
self . gallery_id = match . group ( 1 )
url = " {} /gallery/thumbnails/ {} " . format ( self . root , self . gallery_id )
GalleryExtractor . __init__ ( self , match , url )
self . session . headers [ " Referer " ] = url
@ -32,22 +44,22 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
extr = text . extract_from ( page )
split = text . split_html
image = extr ( ' <div class= " comic-cover " > \n <a href= " ' , ' " ' )
title = extr ( ' <div class= " comic-description " > \n <h1> ' , ' </h1> ' )
if not title :
raise exception . NotFoundError ( " gallery " )
data = {
return {
" title " : text . unescape ( title ) ,
" gallery_id " : text . parse_int ( image . split ( " / " ) [ - 2 ] ) ,
" gallery_id " : text . parse_int ( self . gallery_id ) ,
" tags " : split ( extr ( ' <div> \n Categories: ' , ' </div> ' ) ) ,
" artist " : split ( extr ( ' <div> \n Artists: ' , ' </div> ' ) ) ,
" group " : split ( extr ( ' <div> \n Groups: ' , ' </div> ' ) ) ,
" parodies " : split ( extr ( ' <div> \n Parodies: ' , ' </div> ' ) ) ,
}
artists = split ( extr ( ' <div> \n Artists: ' , ' </div> ' ) )
data [ " artist " ] = artists [ 0 ] if artists else " Unknown "
return data
def images ( self , page ) :
extr = text . extract_iter
return [
( text . unescape ( url ) . replace ( " /thumbnail/ " , " /original/ " ) , None )
for url in extr ( page , ' class= " comic-thumb " > \n <img src= " ' , ' " ' )
( text . unescape ( text . extract ( url , ' src= " ' , ' " ' ) [ 0 ] ) . replace (
" /thumbnail/ " , " /original/ " ) , None )
for url in text . extract_iter ( page , ' class= " comic-thumb " ' , ' </div> ' )
]