2022-03-03 23:41:18 +00:00
from requests import get
2022-09-03 11:22:31 +00:00
from requests import post
2022-03-05 10:56:13 +00:00
from hashlib import sha256
2022-03-03 23:41:18 +00:00
import sqlite3
2022-03-31 17:00:08 +00:00
from bs4 import BeautifulSoup
2022-09-03 11:25:19 +00:00
from json import dumps
2022-09-03 15:39:30 +00:00
import re
2022-03-31 17:00:08 +00:00
2022-04-06 15:40:49 +00:00
headers = {
2022-08-24 08:38:44 +00:00
" user-agent " : " Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0 "
2022-04-06 15:40:49 +00:00
}
2022-03-31 17:00:08 +00:00
2022-04-09 17:26:33 +00:00
2022-03-31 17:00:08 +00:00
def get_mastodon_blocks ( domain : str ) - > dict :
2022-04-08 21:12:01 +00:00
blocks = {
" Suspended servers " : [ ] ,
" Filtered media " : [ ] ,
" Limited servers " : [ ] ,
" Silenced servers " : [ ] ,
}
2022-03-31 17:00:08 +00:00
2022-04-23 12:05:17 +00:00
translations = {
2022-08-27 08:35:39 +00:00
" Silenced instances " : " Silenced servers " ,
" Suspended instances " : " Suspended servers " ,
2022-04-23 12:05:17 +00:00
" Gesperrte Server " : " Suspended servers " ,
" Gefilterte Medien " : " Filtered media " ,
" Stummgeschaltete Server " : " Silenced servers " ,
" 停止済みのサーバー " : " Suspended servers " ,
" メディアを拒否しているサーバー " : " Filtered media " ,
" サイレンス済みのサーバー " : " Silenced servers " ,
" Serveurs suspendus " : " Suspended servers " ,
" Médias filtrés " : " Filtered media " ,
" Serveurs limités " : " Silenced servers " ,
}
2022-04-08 21:12:01 +00:00
try :
doc = BeautifulSoup (
get ( f " https:// { domain } /about/more " , headers = headers , timeout = 5 ) . text ,
" html.parser " ,
)
except :
return { }
2022-04-09 17:26:33 +00:00
2022-04-08 21:12:01 +00:00
for header in doc . find_all ( " h3 " ) :
2022-09-02 19:01:39 +00:00
header_text = header . text
if header_text in translations :
header_text = translations [ header_text ]
if header_text in blocks :
2022-09-03 11:47:55 +00:00
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
for line in header . find_all_next ( " table " ) [ 0 ] . find_all ( " tr " ) [ 1 : ] :
2022-04-23 12:05:17 +00:00
blocks [ header_text ] . append (
2022-04-08 21:12:01 +00:00
{
2022-08-07 11:17:12 +00:00
" domain " : line . find ( " span " ) . text ,
2022-04-08 21:12:01 +00:00
" hash " : line . find ( " span " ) [ " title " ] [ 9 : ] ,
" reason " : line . find_all ( " td " ) [ 1 ] . text . strip ( ) ,
}
)
return {
" reject " : blocks [ " Suspended servers " ] ,
" media_removal " : blocks [ " Filtered media " ] ,
2022-04-09 17:26:33 +00:00
" federated_timeline_removal " : blocks [ " Limited servers " ]
+ blocks [ " Silenced servers " ] ,
2022-04-08 21:12:01 +00:00
}
2022-03-31 17:00:08 +00:00
2022-08-23 19:46:21 +00:00
def get_friendica_blocks ( domain : str ) - > dict :
blocks = [ ]
try :
doc = BeautifulSoup (
get ( f " https:// { domain } /friendica " , headers = headers , timeout = 5 ) . text ,
" html.parser " ,
)
except :
return { }
blocklist = doc . find ( id = " about_blocklist " )
for line in blocklist . find ( " table " ) . find_all ( " tr " ) [ 1 : ] :
blocks . append (
{
" domain " : line . find_all ( " td " ) [ 0 ] . text . strip ( ) ,
" reason " : line . find_all ( " td " ) [ 1 ] . text . strip ( )
}
)
return {
" reject " : blocks
}
2022-04-09 17:26:33 +00:00
2022-09-03 11:22:31 +00:00
def get_pisskey_blocks ( domain : str ) - > dict :
blocks = {
" suspended " : [ ] ,
" blocked " : [ ]
}
try :
counter = 0
step = 99
while True :
# iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
try :
if counter == 0 :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " suspended " : True , " limit " : step } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
else :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " suspended " : True , " limit " : step , " offset " : counter - 1 } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
for instance in doc :
# just in case
if instance [ " isSuspended " ] :
blocks [ " suspended " ] . append (
{
" domain " : instance [ " host " ] ,
# no reason field, nothing
" reason " : " "
}
)
counter = counter + step
except :
counter = 0
break
while True :
# same shit, different asshole ("blocked" aka full suspend)
try :
if counter == 0 :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " blocked " : True , " limit " : step } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
else :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " blocked " : True , " limit " : step , " offset " : counter - 1 } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
for instance in doc :
if instance [ " isBlocked " ] :
blocks [ " blocked " ] . append (
{
" domain " : instance [ " host " ] ,
" reason " : " "
}
)
counter = counter + step
except :
counter = 0
break
return {
" reject " : blocks [ " blocked " ] ,
" followers_only " : blocks [ " suspended " ]
}
except :
return { }
2022-04-08 22:00:29 +00:00
def get_hash ( domain : str ) - > str :
return sha256 ( domain . encode ( " utf-8 " ) ) . hexdigest ( )
2022-03-03 23:41:18 +00:00
2022-04-09 17:26:33 +00:00
2022-03-31 15:17:11 +00:00
def get_type ( domain : str ) - > str :
2022-03-21 13:04:44 +00:00
try :
2022-04-08 20:07:05 +00:00
res = get ( f " https:// { domain } /nodeinfo/2.1.json " , headers = headers , timeout = 5 )
2022-08-23 19:56:09 +00:00
if res . status_code == 404 :
res = get ( f " https:// { domain } /nodeinfo/2.0 " , headers = headers , timeout = 5 )
2022-04-08 20:07:05 +00:00
if res . status_code == 404 :
res = get ( f " https:// { domain } /nodeinfo/2.0.json " , headers = headers , timeout = 5 )
2022-04-13 21:18:56 +00:00
if res . ok and " text/html " in res . headers [ " content-type " ] :
res = get ( f " https:// { domain } /nodeinfo/2.1 " , headers = headers , timeout = 5 )
2022-04-08 20:07:05 +00:00
if res . ok :
2022-09-03 12:09:31 +00:00
if res . json ( ) [ " software " ] [ " name " ] in [ " akkoma " , " rebased " ] :
2022-07-22 21:24:47 +00:00
return " pleroma "
2022-09-03 12:09:31 +00:00
elif res . json ( ) [ " software " ] [ " name " ] in [ " hometown " , " ecko " ] :
2022-08-24 08:36:58 +00:00
return " mastodon "
2022-09-03 13:06:10 +00:00
elif res . json ( ) [ " software " ] [ " name " ] in [ " calckey " , " groundpolis " , " foundkey " ] :
2022-09-03 11:22:31 +00:00
return " misskey "
2022-07-22 21:26:18 +00:00
else :
2022-07-22 21:24:47 +00:00
return res . json ( ) [ " software " ] [ " name " ]
2022-04-08 20:07:05 +00:00
elif res . status_code == 404 :
res = get ( f " https:// { domain } /api/v1/instance " , headers = headers , timeout = 5 )
if res . ok :
2022-03-21 13:04:44 +00:00
return " mastodon "
2022-04-08 20:07:05 +00:00
except :
return None
2022-03-21 13:04:44 +00:00
2022-04-09 17:26:33 +00:00
2022-03-03 23:41:18 +00:00
conn = sqlite3 . connect ( " blocks.db " )
c = conn . cursor ( )
2022-04-09 17:26:33 +00:00
c . execute (
2022-09-03 15:39:30 +00:00
" select domain, software from instances where software in ( ' pleroma ' , ' mastodon ' , ' friendica ' , ' misskey ' , ' gotosocial ' ) "
2022-04-09 17:26:33 +00:00
)
2022-03-31 17:00:08 +00:00
2022-03-31 22:58:52 +00:00
for blocker , software in c . fetchall ( ) :
if software == " pleroma " :
2022-03-03 23:41:18 +00:00
print ( blocker )
try :
2022-03-21 13:04:44 +00:00
# Blocks
2022-04-09 17:26:33 +00:00
federation = get (
f " https:// { blocker } /nodeinfo/2.1.json " , headers = headers , timeout = 5
) . json ( ) [ " metadata " ] [ " federation " ]
2022-04-09 15:57:03 +00:00
if " mrf_simple " in federation :
2022-04-09 17:26:33 +00:00
for block_level , blocks in (
2022-04-23 12:12:21 +00:00
{ * * federation [ " mrf_simple " ] ,
* * { " quarantined_instances " : federation [ " quarantined_instances " ] } }
2022-04-09 17:26:33 +00:00
) . items ( ) :
2022-04-09 15:57:03 +00:00
for blocked in blocks :
2022-03-31 16:07:32 +00:00
if blocked == " " :
continue
2022-09-03 15:39:30 +00:00
blocked = blocked . lower ( )
blocker = blocker . lower ( )
2022-04-09 17:26:33 +00:00
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
2022-03-31 22:32:38 +00:00
if c . fetchone ( ) == None :
2022-04-09 17:26:33 +00:00
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
c . execute (
2022-04-16 20:43:28 +00:00
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
2022-04-09 17:26:33 +00:00
( blocker , blocked , block_level ) ,
)
2022-04-16 20:43:28 +00:00
if c . fetchone ( ) == None :
c . execute (
" insert into blocks select ?, ?, ' ' , ? " ,
( blocker , blocked , block_level ) ,
)
2022-03-03 23:41:18 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
# Reasons
2022-04-09 15:57:03 +00:00
if " mrf_simple_info " in federation :
2022-04-09 17:26:33 +00:00
for block_level , info in (
2022-04-23 12:12:21 +00:00
{ * * federation [ " mrf_simple_info " ] ,
* * ( federation [ " quarantined_instances_info " ]
2022-04-09 17:26:33 +00:00
if " quarantined_instances_info " in federation
2022-04-23 12:12:21 +00:00
else { } ) }
2022-04-09 17:26:33 +00:00
) . items ( ) :
2022-04-09 15:57:03 +00:00
for blocked , reason in info . items ( ) :
2022-09-03 15:39:30 +00:00
blocker = blocker . lower ( )
blocked = blocked . lower ( )
2022-04-09 17:26:33 +00:00
c . execute (
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? " ,
( reason [ " reason " ] , blocker , blocked , block_level ) ,
)
2022-03-05 08:21:10 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
except Exception as e :
print ( " error: " , e , blocker )
2022-03-31 22:58:52 +00:00
elif software == " mastodon " :
2022-03-04 15:50:32 +00:00
print ( blocker )
try :
2022-03-31 17:00:08 +00:00
json = get_mastodon_blocks ( blocker )
2022-04-09 17:23:05 +00:00
for block_level , blocks in json . items ( ) :
for instance in blocks :
blocked , blocked_hash , reason = instance . values ( )
2022-09-03 15:39:30 +00:00
blocked = blocked . lower ( )
blocker = blocker . lower ( )
2022-04-09 17:23:05 +00:00
if blocked . count ( " * " ) < = 1 :
2022-04-09 17:26:33 +00:00
c . execute (
" select hash from instances where hash = ? " , ( blocked_hash , )
)
2022-03-31 22:32:38 +00:00
if c . fetchone ( ) == None :
2022-04-09 17:23:05 +00:00
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
c . execute (
2022-04-16 20:43:28 +00:00
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
2022-08-23 19:46:21 +00:00
( blocker , blocked if blocked . count ( " * " ) < = 1 else blocked_hash , block_level ) ,
2022-04-09 17:23:05 +00:00
)
2022-04-16 20:43:28 +00:00
if c . fetchone ( ) == None :
c . execute (
" insert into blocks select ?, ?, ?, ? " ,
(
blocker ,
blocked if blocked . count ( " * " ) < = 1 else blocked_hash ,
reason ,
block_level ,
) ,
)
2022-03-04 15:50:32 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
except Exception as e :
print ( " error: " , e , blocker )
2022-09-03 11:22:31 +00:00
elif software == " friendica " or software == " misskey " :
2022-08-23 19:46:21 +00:00
print ( blocker )
try :
2022-09-03 11:22:31 +00:00
if software == " friendica " :
json = get_friendica_blocks ( blocker )
elif software == " misskey " :
json = get_pisskey_blocks ( blocker )
2022-08-23 21:25:31 +00:00
for block_level , blocks in json . items ( ) :
2022-08-23 19:46:21 +00:00
for instance in blocks :
blocked , reason = instance . values ( )
2022-09-03 15:39:30 +00:00
blocked = blocked . lower ( )
blocker = blocker . lower ( )
2022-08-23 19:46:21 +00:00
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
if c . fetchone ( ) == None :
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
c . execute (
" select * from blocks where blocker = ? and blocked = ? " ,
( blocker , blocked ) ,
)
if c . fetchone ( ) == None :
c . execute (
" insert into blocks select ?, ?, ?, ? " ,
(
blocker ,
blocked ,
reason ,
2022-08-23 21:25:31 +00:00
block_level ,
2022-08-23 19:46:21 +00:00
) ,
)
conn . commit ( )
except Exception as e :
print ( " error: " , e , blocker )
2022-09-03 15:39:30 +00:00
elif software == " gotosocial " :
print ( blocker )
try :
# Blocks
federation = get (
f " https:// { blocker } /api/v1/instance/peers?filter=suspended " , headers = headers , timeout = 5
) . json ( )
print ( federation )
for peer in federation :
blocked = peer [ " domain " ] . lower ( )
blocker = blocker . lower ( )
if blocked . count ( " * " ) > 0 :
# GTS does not have hashes for obscured domains, so we have to guess it
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " * " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
if c . fetchone ( ) == None :
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
c . execute (
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
( blocker , blocked , " reject " ) ,
)
if c . fetchone ( ) == None :
c . execute (
" insert into blocks select ?, ?, ?, ? " ,
( blocker , blocked , " " , " reject " ) ,
)
if " public_comment " in peer :
reason = peer [ " public_comment " ]
c . execute (
" select * from blocks where blocker = ? and blocked = ? and reason != ? and block_level = ? " ,
( blocker , blocked , " " , " reject " ) ,
)
if c . fetchone ( ) == None :
c . execute (
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? " ,
( reason , blocker , blocked , " reject " ) ,
)
conn . commit ( )
except Exception as e :
print ( " error: " , e , blocker )
2022-03-31 17:00:08 +00:00
conn . close ( )