@ -22,9 +22,19 @@ class Photos18Extractor(Extractor):
archive_fmt = " {filename} "
root = " https://www.photos18.com "
class Photos18AlbumExtractor ( Photos18Extractor ) :
""" Extractor for a single album URL """
subcategory = " album "
pattern = BASE_PATTERN + r " /v/( \ w+) "
example = " https://www.photos18.com/v/ID "
def __init__ ( self , match ) :
Photos18Extractor . __init__ ( self , match )
self . post_id = match . group ( 1 )
def items ( self ) :
for post_id in self . posts ( ) :
url = self . root + " /v/ " + post_id
url = self . root + " /v/ " + self . post_id
page = self . request ( url ) . text
extr = text . extract_from ( page )
@ -45,7 +55,7 @@ class Photos18Extractor(Extractor):
urls . append ( url )
data = {
" post_id " : post_id ,
" post_id " : self . post_id ,
" title " : title ,
" category_id " : category_id ,
" category_name " : category_name ,
@ -58,20 +68,6 @@ class Photos18Extractor(Extractor):
yield Message . Url , url , text . nameext_from_url ( url , data )
class Photos18AlbumExtractor ( Photos18Extractor ) :
""" Extractor for a single album URL """
subcategory = " album "
pattern = BASE_PATTERN + r " /v/( \ w+) "
example = " https://www.photos18.com/v/ID "
def __init__ ( self , match ) :
Photos18Extractor . __init__ ( self , match )
self . post_id = match . group ( 1 )
def posts ( self ) :
return ( self . post_id , )
class Photos18ListExtractor ( Photos18Extractor ) :
""" Extractor for a list of posts """
subcategory = " list "
@ -86,9 +82,9 @@ class Photos18ListExtractor(Photos18Extractor):
self . q = text . unquote ( match . group ( 4 ) or " " ) or query . get ( " q " )
self . category_id = match . group ( 1 ) or query . get ( " category_id " )
self . sort = match . group ( 2 ) or match . group ( 3 ) or query . get ( " sort " )
self . page = query . get ( " page " )
self . page = int ( query . get ( " page " ) or 1 )
def post s( self ) :
def item s( self ) :
query = { }
if self . q :
query [ " q " ] = self . q
@ -99,5 +95,17 @@ class Photos18ListExtractor(Photos18Extractor):
if self . page :
query [ " page " ] = self . page
while True :
has_post = False
page = self . request ( self . root , params = query ) . text
return text . extract_iter ( page , ' <a class= " visited " href= " /v/ ' , ' " ' )
for i in text . extract_iter (
page , ' <a class= " visited " href= " /v/ ' , ' " ' ) :
has_post = True
url = self . root + " /v/ " + i
data = { " _extractor " : Photos18AlbumExtractor }
yield Message . Queue , url , data
if not has_post or ' <li class= " page-item next " > ' not in page :
break
query [ " page " ] + = 1