2022-03-03 23:41:18 +00:00
from requests import get
2022-09-03 11:22:31 +00:00
from requests import post
2022-03-05 10:56:13 +00:00
from hashlib import sha256
2022-03-03 23:41:18 +00:00
import sqlite3
2022-03-31 17:00:08 +00:00
from bs4 import BeautifulSoup
2022-09-03 11:25:19 +00:00
from json import dumps
2022-09-03 15:39:30 +00:00
import re
2022-11-07 20:33:23 +00:00
from time import time
2022-03-31 17:00:08 +00:00
2022-04-06 15:40:49 +00:00
headers = {
2022-08-24 08:38:44 +00:00
" user-agent " : " Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0 "
2022-04-06 15:40:49 +00:00
}
2022-03-31 17:00:08 +00:00
2022-04-09 17:26:33 +00:00
2022-03-31 17:00:08 +00:00
def get_mastodon_blocks ( domain : str ) - > dict :
2022-04-08 21:12:01 +00:00
blocks = {
" Suspended servers " : [ ] ,
" Filtered media " : [ ] ,
" Limited servers " : [ ] ,
" Silenced servers " : [ ] ,
}
2022-03-31 17:00:08 +00:00
2022-04-23 12:05:17 +00:00
translations = {
2022-08-27 08:35:39 +00:00
" Silenced instances " : " Silenced servers " ,
" Suspended instances " : " Suspended servers " ,
2022-04-23 12:05:17 +00:00
" Gesperrte Server " : " Suspended servers " ,
" Gefilterte Medien " : " Filtered media " ,
" Stummgeschaltete Server " : " Silenced servers " ,
" 停止済みのサーバー " : " Suspended servers " ,
" メディアを拒否しているサーバー " : " Filtered media " ,
" サイレンス済みのサーバー " : " Silenced servers " ,
" Serveurs suspendus " : " Suspended servers " ,
" Médias filtrés " : " Filtered media " ,
" Serveurs limités " : " Silenced servers " ,
}
2022-04-08 21:12:01 +00:00
try :
doc = BeautifulSoup (
get ( f " https:// { domain } /about/more " , headers = headers , timeout = 5 ) . text ,
" html.parser " ,
)
except :
return { }
2022-04-09 17:26:33 +00:00
2022-04-08 21:12:01 +00:00
for header in doc . find_all ( " h3 " ) :
2022-09-02 19:01:39 +00:00
header_text = header . text
if header_text in translations :
header_text = translations [ header_text ]
if header_text in blocks :
2022-09-03 11:47:55 +00:00
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
for line in header . find_all_next ( " table " ) [ 0 ] . find_all ( " tr " ) [ 1 : ] :
2022-04-23 12:05:17 +00:00
blocks [ header_text ] . append (
2022-04-08 21:12:01 +00:00
{
2022-08-07 11:17:12 +00:00
" domain " : line . find ( " span " ) . text ,
2022-04-08 21:12:01 +00:00
" hash " : line . find ( " span " ) [ " title " ] [ 9 : ] ,
" reason " : line . find_all ( " td " ) [ 1 ] . text . strip ( ) ,
}
)
return {
" reject " : blocks [ " Suspended servers " ] ,
" media_removal " : blocks [ " Filtered media " ] ,
2022-10-20 22:02:07 +00:00
" followers_only " : blocks [ " Limited servers " ]
2022-04-09 17:26:33 +00:00
+ blocks [ " Silenced servers " ] ,
2022-04-08 21:12:01 +00:00
}
2022-03-31 17:00:08 +00:00
2022-08-23 19:46:21 +00:00
def get_friendica_blocks ( domain : str ) - > dict :
blocks = [ ]
try :
doc = BeautifulSoup (
get ( f " https:// { domain } /friendica " , headers = headers , timeout = 5 ) . text ,
" html.parser " ,
)
except :
return { }
blocklist = doc . find ( id = " about_blocklist " )
for line in blocklist . find ( " table " ) . find_all ( " tr " ) [ 1 : ] :
blocks . append (
{
" domain " : line . find_all ( " td " ) [ 0 ] . text . strip ( ) ,
" reason " : line . find_all ( " td " ) [ 1 ] . text . strip ( )
}
)
return {
" reject " : blocks
}
2022-04-09 17:26:33 +00:00
2022-09-03 11:22:31 +00:00
def get_pisskey_blocks ( domain : str ) - > dict :
blocks = {
" suspended " : [ ] ,
" blocked " : [ ]
}
try :
counter = 0
step = 99
while True :
# iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
try :
if counter == 0 :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " suspended " : True , " limit " : step } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
else :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " suspended " : True , " limit " : step , " offset " : counter - 1 } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
for instance in doc :
# just in case
if instance [ " isSuspended " ] :
blocks [ " suspended " ] . append (
{
" domain " : instance [ " host " ] ,
# no reason field, nothing
" reason " : " "
}
)
counter = counter + step
except :
counter = 0
break
while True :
# same shit, different asshole ("blocked" aka full suspend)
try :
if counter == 0 :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " blocked " : True , " limit " : step } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
else :
doc = post ( f " https:// { domain } /api/federation/instances " , data = dumps ( { " sort " : " +caughtAt " , " host " : None , " blocked " : True , " limit " : step , " offset " : counter - 1 } ) , headers = headers , timeout = 5 ) . json ( )
if doc == [ ] : raise
for instance in doc :
if instance [ " isBlocked " ] :
blocks [ " blocked " ] . append (
{
" domain " : instance [ " host " ] ,
" reason " : " "
}
)
counter = counter + step
except :
counter = 0
break
return {
" reject " : blocks [ " blocked " ] ,
" followers_only " : blocks [ " suspended " ]
}
except :
return { }
2022-04-08 22:00:29 +00:00
def get_hash ( domain : str ) - > str :
return sha256 ( domain . encode ( " utf-8 " ) ) . hexdigest ( )
2022-03-03 23:41:18 +00:00
2022-04-09 17:26:33 +00:00
2022-03-31 15:17:11 +00:00
def get_type ( domain : str ) - > str :
2022-03-21 13:04:44 +00:00
try :
2022-04-08 20:07:05 +00:00
res = get ( f " https:// { domain } /nodeinfo/2.1.json " , headers = headers , timeout = 5 )
2022-08-23 19:56:09 +00:00
if res . status_code == 404 :
res = get ( f " https:// { domain } /nodeinfo/2.0 " , headers = headers , timeout = 5 )
2022-04-08 20:07:05 +00:00
if res . status_code == 404 :
res = get ( f " https:// { domain } /nodeinfo/2.0.json " , headers = headers , timeout = 5 )
2022-04-13 21:18:56 +00:00
if res . ok and " text/html " in res . headers [ " content-type " ] :
res = get ( f " https:// { domain } /nodeinfo/2.1 " , headers = headers , timeout = 5 )
2022-04-08 20:07:05 +00:00
if res . ok :
2022-09-03 12:09:31 +00:00
if res . json ( ) [ " software " ] [ " name " ] in [ " akkoma " , " rebased " ] :
2022-07-22 21:24:47 +00:00
return " pleroma "
2022-09-03 12:09:31 +00:00
elif res . json ( ) [ " software " ] [ " name " ] in [ " hometown " , " ecko " ] :
2022-08-24 08:36:58 +00:00
return " mastodon "
2022-09-17 20:25:52 +00:00
elif res . json ( ) [ " software " ] [ " name " ] in [ " calckey " , " groundpolis " , " foundkey " , " cherrypick " ] :
2022-09-03 11:22:31 +00:00
return " misskey "
2022-07-22 21:26:18 +00:00
else :
2022-07-22 21:24:47 +00:00
return res . json ( ) [ " software " ] [ " name " ]
2022-04-08 20:07:05 +00:00
elif res . status_code == 404 :
res = get ( f " https:// { domain } /api/v1/instance " , headers = headers , timeout = 5 )
if res . ok :
2022-03-21 13:04:44 +00:00
return " mastodon "
2022-04-08 20:07:05 +00:00
except :
return None
2022-03-21 13:04:44 +00:00
2022-09-09 08:43:55 +00:00
def tidyup ( domain : str ) - > str :
# some retards put their blocks in variable case
domain = domain . lower ( )
# other retards put the port
domain = re . sub ( " \ : \ d+$ " , " " , domain )
# bigger retards put the schema in their blocklist, sometimes even without slashes
domain = re . sub ( " ^https? \ :( \ /*) " , " " , domain )
2022-09-09 16:30:11 +00:00
# and trailing slash
domain = re . sub ( " \ /$ " , " " , domain )
2022-09-09 08:43:55 +00:00
# the biggest retards of them all try to block individual users
domain = re . sub ( " (.+) \ @ " , " " , domain )
return domain
2022-04-09 17:26:33 +00:00
2022-03-03 23:41:18 +00:00
conn = sqlite3 . connect ( " blocks.db " )
c = conn . cursor ( )
2022-04-09 17:26:33 +00:00
c . execute (
2022-11-07 20:33:23 +00:00
# "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
" select domain, software from instances where domain = ' glaceon.social ' "
2022-04-09 17:26:33 +00:00
)
2022-03-31 17:00:08 +00:00
2022-03-31 22:58:52 +00:00
for blocker , software in c . fetchall ( ) :
2022-09-09 08:43:55 +00:00
blocker = tidyup ( blocker )
2022-03-31 22:58:52 +00:00
if software == " pleroma " :
2022-03-03 23:41:18 +00:00
print ( blocker )
try :
2022-03-21 13:04:44 +00:00
# Blocks
2022-04-09 17:26:33 +00:00
federation = get (
f " https:// { blocker } /nodeinfo/2.1.json " , headers = headers , timeout = 5
) . json ( ) [ " metadata " ] [ " federation " ]
2022-04-09 15:57:03 +00:00
if " mrf_simple " in federation :
2022-04-09 17:26:33 +00:00
for block_level , blocks in (
2022-04-23 12:12:21 +00:00
{ * * federation [ " mrf_simple " ] ,
* * { " quarantined_instances " : federation [ " quarantined_instances " ] } }
2022-04-09 17:26:33 +00:00
) . items ( ) :
2022-04-09 15:57:03 +00:00
for blocked in blocks :
2022-09-09 08:43:55 +00:00
blocked = tidyup ( blocked )
2022-03-31 16:07:32 +00:00
if blocked == " " :
continue
2022-09-09 08:43:55 +00:00
if blocked . count ( " * " ) > 1 :
# -ACK!-oma also started obscuring domains without hash
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " * " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
2022-04-09 17:26:33 +00:00
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
2022-03-31 22:32:38 +00:00
if c . fetchone ( ) == None :
2022-04-09 17:26:33 +00:00
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
2022-11-07 20:33:23 +00:00
timestamp = int ( time ( ) )
2022-04-09 17:26:33 +00:00
c . execute (
2022-04-16 20:43:28 +00:00
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
2022-04-09 17:26:33 +00:00
( blocker , blocked , block_level ) ,
)
2022-04-16 20:43:28 +00:00
if c . fetchone ( ) == None :
c . execute (
2022-11-07 20:33:23 +00:00
" insert into blocks select ?, ?, ' ' , ?, ?, ? " ,
( blocker , blocked , block_level , timestamp , timestamp ) ,
)
else :
c . execute (
" update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ? " ,
( timestamp , blocker , blocked , block_level )
2022-04-16 20:43:28 +00:00
)
2022-03-03 23:41:18 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
# Reasons
2022-04-09 15:57:03 +00:00
if " mrf_simple_info " in federation :
2022-04-09 17:26:33 +00:00
for block_level , info in (
2022-04-23 12:12:21 +00:00
{ * * federation [ " mrf_simple_info " ] ,
* * ( federation [ " quarantined_instances_info " ]
2022-04-09 17:26:33 +00:00
if " quarantined_instances_info " in federation
2022-04-23 12:12:21 +00:00
else { } ) }
2022-04-09 17:26:33 +00:00
) . items ( ) :
2022-04-09 15:57:03 +00:00
for blocked , reason in info . items ( ) :
2022-09-09 08:43:55 +00:00
blocked = tidyup ( blocked )
2022-10-03 16:50:28 +00:00
if blocked == " " :
continue
if blocked . count ( " * " ) > 1 :
# same domain guess as above, but for reasons field
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " * " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
2022-04-09 17:26:33 +00:00
c . execute (
2022-11-07 20:33:23 +00:00
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ' ' " ,
2022-04-09 17:26:33 +00:00
( reason [ " reason " ] , blocker , blocked , block_level ) ,
)
2022-03-05 08:21:10 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
except Exception as e :
print ( " error: " , e , blocker )
2022-03-31 22:58:52 +00:00
elif software == " mastodon " :
2022-03-04 15:50:32 +00:00
print ( blocker )
try :
2022-10-20 22:02:07 +00:00
# json endpoint for newer mastodongs
try :
json = {
" reject " : [ ] ,
" media_removal " : [ ] ,
" followers_only " : [ ] ,
" report_removal " : [ ]
}
blocks = get (
f " https:// { blocker } /api/v1/instance/domain_blocks " , headers = headers , timeout = 5
) . json ( )
for block in blocks :
entry = { ' domain ' : block [ ' domain ' ] , ' hash ' : block [ ' digest ' ] , ' reason ' : block [ ' comment ' ] }
if block [ ' severity ' ] == ' suspend ' :
json [ ' reject ' ] . append ( entry )
elif block [ ' severity ' ] == ' silence ' :
json [ ' followers_only ' ] . append ( entry )
elif block [ ' severity ' ] == ' reject_media ' :
json [ ' media_removal ' ] . append ( entry )
elif block [ ' severity ' ] == ' reject_reports ' :
json [ ' report_removal ' ] . append ( entry )
except :
json = get_mastodon_blocks ( blocker )
2022-04-09 17:23:05 +00:00
for block_level , blocks in json . items ( ) :
for instance in blocks :
blocked , blocked_hash , reason = instance . values ( )
2022-09-09 08:43:55 +00:00
blocked = tidyup ( blocked )
2022-04-09 17:23:05 +00:00
if blocked . count ( " * " ) < = 1 :
2022-04-09 17:26:33 +00:00
c . execute (
" select hash from instances where hash = ? " , ( blocked_hash , )
)
2022-03-31 22:32:38 +00:00
if c . fetchone ( ) == None :
2022-04-09 17:23:05 +00:00
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
2022-09-09 08:43:55 +00:00
else :
# Doing the hash search for instance names as well to tidy up DB
c . execute (
" select domain from instances where hash = ? " , ( blocked_hash , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
2022-11-07 20:33:23 +00:00
timestamp = int ( time ( ) )
2022-04-09 17:23:05 +00:00
c . execute (
2022-04-16 20:43:28 +00:00
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
2022-08-23 19:46:21 +00:00
( blocker , blocked if blocked . count ( " * " ) < = 1 else blocked_hash , block_level ) ,
2022-04-09 17:23:05 +00:00
)
2022-04-16 20:43:28 +00:00
if c . fetchone ( ) == None :
c . execute (
2022-11-07 20:33:23 +00:00
" insert into blocks select ?, ?, ?, ?, ?, ? " ,
2022-04-16 20:43:28 +00:00
(
blocker ,
blocked if blocked . count ( " * " ) < = 1 else blocked_hash ,
reason ,
block_level ,
2022-11-07 20:33:23 +00:00
timestamp ,
timestamp ,
2022-04-16 20:43:28 +00:00
) ,
)
2022-11-07 20:33:23 +00:00
else :
c . execute (
" update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ? " ,
( timestamp , blocker , blocked if blocked . count ( " * " ) < = 1 else blocked_hash , block_level ) ,
)
if reason != ' ' :
c . execute (
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ' ' " ,
( reason , blocker , blocked if blocked . count ( " * " ) < = 1 else blocked_hash , block_level ) ,
)
2022-03-04 15:50:32 +00:00
conn . commit ( )
2022-03-21 13:04:44 +00:00
except Exception as e :
print ( " error: " , e , blocker )
2022-09-03 11:22:31 +00:00
elif software == " friendica " or software == " misskey " :
2022-08-23 19:46:21 +00:00
print ( blocker )
try :
2022-09-03 11:22:31 +00:00
if software == " friendica " :
json = get_friendica_blocks ( blocker )
elif software == " misskey " :
json = get_pisskey_blocks ( blocker )
2022-08-23 21:25:31 +00:00
for block_level , blocks in json . items ( ) :
2022-08-23 19:46:21 +00:00
for instance in blocks :
blocked , reason = instance . values ( )
2022-09-09 08:43:55 +00:00
blocked = tidyup ( blocked )
2022-09-09 14:45:22 +00:00
if blocked . count ( " * " ) > 0 :
# Some friendica servers also obscure domains without hash
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " * " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
2022-09-15 07:17:21 +00:00
if blocked . count ( " ? " ) > 0 :
# Some obscure them with question marks, not sure if that's dependent on version or not
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " ? " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
2022-08-23 19:46:21 +00:00
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
if c . fetchone ( ) == None :
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
2022-11-07 20:33:23 +00:00
timestamp = int ( time ( ) )
2022-08-23 19:46:21 +00:00
c . execute (
2022-11-07 20:33:23 +00:00
" select * from blocks where blocker = ? and blocked = ? and reason = ? " ,
( blocker , blocked , reason ) ,
2022-08-23 19:46:21 +00:00
)
if c . fetchone ( ) == None :
c . execute (
2022-11-07 20:33:23 +00:00
" insert into blocks select ?, ?, ?, ?, ?, ? " ,
2022-08-23 19:46:21 +00:00
(
blocker ,
blocked ,
reason ,
2022-08-23 21:25:31 +00:00
block_level ,
2022-11-07 20:33:23 +00:00
timestamp ,
timestamp
2022-08-23 19:46:21 +00:00
) ,
)
2022-11-07 20:33:23 +00:00
else :
c . execute (
" update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ? " ,
( timestamp , blocker , blocked , block_level ) ,
)
if reason != ' ' :
c . execute (
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ' ' " ,
( reason , blocker , blocked , block_level ) ,
)
2022-08-23 19:46:21 +00:00
conn . commit ( )
except Exception as e :
print ( " error: " , e , blocker )
2022-09-03 15:39:30 +00:00
elif software == " gotosocial " :
print ( blocker )
try :
# Blocks
federation = get (
f " https:// { blocker } /api/v1/instance/peers?filter=suspended " , headers = headers , timeout = 5
) . json ( )
for peer in federation :
blocked = peer [ " domain " ] . lower ( )
if blocked . count ( " * " ) > 0 :
# GTS does not have hashes for obscured domains, so we have to guess it
c . execute (
" select domain from instances where domain like ? order by rowid limit 1 " , ( blocked . replace ( " * " , " _ " ) , )
)
searchres = c . fetchone ( )
if searchres != None :
blocked = searchres [ 0 ]
c . execute (
" select domain from instances where domain = ? " , ( blocked , )
)
if c . fetchone ( ) == None :
c . execute (
" insert into instances select ?, ?, ? " ,
( blocked , get_hash ( blocked ) , get_type ( blocked ) ) ,
)
c . execute (
" select * from blocks where blocker = ? and blocked = ? and block_level = ? " ,
( blocker , blocked , " reject " ) ,
)
2022-11-07 20:33:23 +00:00
timestamp = int ( time ( ) )
2022-09-03 15:39:30 +00:00
if c . fetchone ( ) == None :
c . execute (
2022-11-07 20:33:23 +00:00
" insert into blocks select ?, ?, ?, ?, ?, ? " ,
( blocker , blocked , " " , " reject " , timestamp , timestamp ) ,
)
else :
c . execute (
" update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ? " ,
( timestamp , blocker , blocked , " reject " ) ,
2022-09-03 15:39:30 +00:00
)
if " public_comment " in peer :
reason = peer [ " public_comment " ]
c . execute (
2022-11-07 20:33:23 +00:00
" update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ' ' " ,
( reason , blocker , blocked , " reject " ) ,
2022-09-03 15:39:30 +00:00
)
conn . commit ( )
except Exception as e :
print ( " error: " , e , blocker )
2022-03-31 17:00:08 +00:00
conn . close ( )