@ -57,7 +57,7 @@ class KemonopartyExtractor(Extractor):
generators = self . _build_file_generators ( self . config ( " files " ) )
generators = self . _build_file_generators ( self . config ( " files " ) )
duplicates = self . config ( " duplicates " )
duplicates = self . config ( " duplicates " )
comments = self . config ( " comments " )
comments = self . config ( " comments " )
username = dms = None
username = dms = announcements = None
# prevent files from being sent with gzip compression
# prevent files from being sent with gzip compression
headers = { " Accept-Encoding " : " identity " }
headers = { " Accept-Encoding " : " identity " }
@ -68,6 +68,8 @@ class KemonopartyExtractor(Extractor):
' <meta name= " artist_name " content= " ' , ' " ' ) [ 0 ] )
' <meta name= " artist_name " content= " ' , ' " ' ) [ 0 ] )
if self . config ( " dms " ) :
if self . config ( " dms " ) :
dms = True
dms = True
if self . config ( " announcements " ) :
announcements = True
posts = self . posts ( )
posts = self . posts ( )
max_posts = self . config ( " max-posts " )
max_posts = self . config ( " max-posts " )
@ -88,8 +90,12 @@ class KemonopartyExtractor(Extractor):
post [ " comments " ] = self . _extract_comments ( post )
post [ " comments " ] = self . _extract_comments ( post )
if dms is not None :
if dms is not None :
if dms is True :
if dms is True :
dms = self . _extract_ dm s( post )
dms = self . _extract_ car ds( post , " dms " )
post [ " dms " ] = dms
post [ " dms " ] = dms
if announcements is not None :
if announcements is True :
announcements = self . _extract_cards ( post , " announcements " )
post [ " announcements " ] = announcements
files = [ ]
files = [ ]
hashes = set ( )
hashes = set ( )
@ -200,21 +206,21 @@ class KemonopartyExtractor(Extractor):
} )
} )
return comments
return comments
def _extract_ dm s( self , post ) :
def _extract_ car ds( self , post , type ) :
url = " {} / {} /user/ {} / dms " . format (
url = " {} / {} /user/ {} / {} " . format (
self . root , post [ " service " ] , post [ " user " ] )
self . root , post [ " service " ] , post [ " user " ] , type )
page = self . request ( url ) . text
page = self . request ( url ) . text
dm s = [ ]
car ds = [ ]
for dm in text . extract_iter ( page , " <article " , " </article> " ) :
for car d in text . extract_iter ( page , " <article " , " </article> " ) :
footer = text . extr ( dm , " <footer " , " </footer> " )
footer = text . extr ( car d, " <footer " , " </footer> " )
dm s. append ( {
car ds. append ( {
" body " : text . unescape ( text . extr (
" body " : text . unescape ( text . extr (
dm , " <pre> " , " </pre></ " ,
car d, " <pre> " , " </pre></ " ,
) . strip ( ) ) ,
) . strip ( ) ) ,
" date " : text . extr ( footer , ' Published : ' , ' \n ' ) ,
" date " : text . extr ( footer , ' : ' , ' \n ' ) ,
} )
} )
return dm s
return car ds
def _parse_datetime ( self , date_string ) :
def _parse_datetime ( self , date_string ) :
if len ( date_string ) > 19 :
if len ( date_string ) > 19 :