fedi-block-api/fetch_blocks.py

315 lines
12 KiB
Python
Raw Normal View History

from requests import get
2022-09-03 11:22:31 +00:00
from requests import post
from hashlib import sha256
import sqlite3
2022-03-31 17:00:08 +00:00
from bs4 import BeautifulSoup
2022-09-03 11:25:19 +00:00
from json import dumps
2022-03-31 17:00:08 +00:00
2022-04-06 15:40:49 +00:00
headers = {
2022-08-24 08:38:44 +00:00
"user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0"
2022-04-06 15:40:49 +00:00
}
2022-03-31 17:00:08 +00:00
2022-04-09 17:26:33 +00:00
2022-03-31 17:00:08 +00:00
def get_mastodon_blocks(domain: str) -> dict:
2022-04-08 21:12:01 +00:00
blocks = {
"Suspended servers": [],
"Filtered media": [],
"Limited servers": [],
"Silenced servers": [],
}
2022-03-31 17:00:08 +00:00
translations = {
"Silenced instances": "Silenced servers",
"Suspended instances": "Suspended servers",
"Gesperrte Server": "Suspended servers",
"Gefilterte Medien": "Filtered media",
"Stummgeschaltete Server": "Silenced servers",
"停止済みのサーバー": "Suspended servers",
"メディアを拒否しているサーバー": "Filtered media",
"サイレンス済みのサーバー": "Silenced servers",
"Serveurs suspendus": "Suspended servers",
"Médias filtrés": "Filtered media",
"Serveurs limités": "Silenced servers",
}
2022-04-08 21:12:01 +00:00
try:
doc = BeautifulSoup(
get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
"html.parser",
)
except:
return {}
2022-04-09 17:26:33 +00:00
2022-04-08 21:12:01 +00:00
for header in doc.find_all("h3"):
header_text = header.text
if header_text in translations:
header_text = translations[header_text]
if header_text in blocks:
2022-09-03 11:47:55 +00:00
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
for line in header.find_all_next("table")[0].find_all("tr")[1:]:
blocks[header_text].append(
2022-04-08 21:12:01 +00:00
{
"domain": line.find("span").text,
2022-04-08 21:12:01 +00:00
"hash": line.find("span")["title"][9:],
"reason": line.find_all("td")[1].text.strip(),
}
)
return {
"reject": blocks["Suspended servers"],
"media_removal": blocks["Filtered media"],
2022-04-09 17:26:33 +00:00
"federated_timeline_removal": blocks["Limited servers"]
+ blocks["Silenced servers"],
2022-04-08 21:12:01 +00:00
}
2022-03-31 17:00:08 +00:00
def get_friendica_blocks(domain: str) -> dict:
blocks = []
try:
doc = BeautifulSoup(
get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
"html.parser",
)
except:
return {}
blocklist = doc.find(id="about_blocklist")
for line in blocklist.find("table").find_all("tr")[1:]:
blocks.append(
{
"domain": line.find_all("td")[0].text.strip(),
"reason": line.find_all("td")[1].text.strip()
}
)
return {
"reject": blocks
}
2022-04-09 17:26:33 +00:00
2022-09-03 11:22:31 +00:00
def get_pisskey_blocks(domain: str) -> dict:
blocks = {
"suspended": [],
"blocked": []
}
try:
counter = 0
step = 99
while True:
# iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
try:
if counter == 0:
doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
if doc == []: raise
else:
doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
if doc == []: raise
for instance in doc:
# just in case
if instance["isSuspended"]:
blocks["suspended"].append(
{
"domain": instance["host"],
# no reason field, nothing
"reason": ""
}
)
counter = counter + step
except:
counter = 0
break
while True:
# same shit, different asshole ("blocked" aka full suspend)
try:
if counter == 0:
doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
if doc == []: raise
else:
doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
if doc == []: raise
for instance in doc:
if instance["isBlocked"]:
blocks["blocked"].append(
{
"domain": instance["host"],
"reason": ""
}
)
counter = counter + step
except:
counter = 0
break
return {
"reject": blocks["blocked"],
"followers_only": blocks["suspended"]
}
except:
return {}
2022-04-08 22:00:29 +00:00
def get_hash(domain: str) -> str:
return sha256(domain.encode("utf-8")).hexdigest()
2022-04-09 17:26:33 +00:00
2022-03-31 15:17:11 +00:00
def get_type(domain: str) -> str:
try:
res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
if res.status_code == 404:
res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
if res.status_code == 404:
res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
2022-04-13 21:18:56 +00:00
if res.ok and "text/html" in res.headers["content-type"]:
res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
if res.ok:
2022-09-03 12:09:31 +00:00
if res.json()["software"]["name"] in ["akkoma", "rebased"]:
2022-07-22 21:24:47 +00:00
return "pleroma"
2022-09-03 12:09:31 +00:00
elif res.json()["software"]["name"] in ["hometown", "ecko"]:
2022-08-24 08:36:58 +00:00
return "mastodon"
2022-09-03 13:06:10 +00:00
elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey"]:
2022-09-03 11:22:31 +00:00
return "misskey"
2022-07-22 21:26:18 +00:00
else:
2022-07-22 21:24:47 +00:00
return res.json()["software"]["name"]
elif res.status_code == 404:
res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
if res.ok:
return "mastodon"
except:
return None
2022-04-09 17:26:33 +00:00
conn = sqlite3.connect("blocks.db")
c = conn.cursor()
2022-04-09 17:26:33 +00:00
c.execute(
2022-09-03 11:22:31 +00:00
"select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey')"
2022-04-09 17:26:33 +00:00
)
2022-03-31 17:00:08 +00:00
2022-03-31 22:58:52 +00:00
for blocker, software in c.fetchall():
if software == "pleroma":
print(blocker)
try:
# Blocks
2022-04-09 17:26:33 +00:00
federation = get(
f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
).json()["metadata"]["federation"]
2022-04-09 15:57:03 +00:00
if "mrf_simple" in federation:
2022-04-09 17:26:33 +00:00
for block_level, blocks in (
2022-04-23 12:12:21 +00:00
{**federation["mrf_simple"],
**{"quarantined_instances": federation["quarantined_instances"]}}
2022-04-09 17:26:33 +00:00
).items():
2022-04-09 15:57:03 +00:00
for blocked in blocks:
if blocked == "":
continue
2022-08-06 15:31:31 +00:00
blocked == blocked.lower()
2022-08-07 12:40:38 +00:00
blocker == blocker.lower()
2022-04-09 17:26:33 +00:00
c.execute(
"select domain from instances where domain = ?", (blocked,)
)
2022-03-31 22:32:38 +00:00
if c.fetchone() == None:
2022-04-09 17:26:33 +00:00
c.execute(
"insert into instances select ?, ?, ?",
(blocked, get_hash(blocked), get_type(blocked)),
)
c.execute(
2022-04-16 20:43:28 +00:00
"select * from blocks where blocker = ? and blocked = ? and block_level = ?",
2022-04-09 17:26:33 +00:00
(blocker, blocked, block_level),
)
2022-04-16 20:43:28 +00:00
if c.fetchone() == None:
c.execute(
"insert into blocks select ?, ?, '', ?",
(blocker, blocked, block_level),
)
conn.commit()
# Reasons
2022-04-09 15:57:03 +00:00
if "mrf_simple_info" in federation:
2022-04-09 17:26:33 +00:00
for block_level, info in (
2022-04-23 12:12:21 +00:00
{**federation["mrf_simple_info"],
**(federation["quarantined_instances_info"]
2022-04-09 17:26:33 +00:00
if "quarantined_instances_info" in federation
2022-04-23 12:12:21 +00:00
else {})}
2022-04-09 17:26:33 +00:00
).items():
2022-04-09 15:57:03 +00:00
for blocked, reason in info.items():
2022-08-07 12:40:38 +00:00
blocker == blocker.lower()
2022-08-07 11:50:38 +00:00
blocked == blocked.lower()
2022-04-09 17:26:33 +00:00
c.execute(
"update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?",
(reason["reason"], blocker, blocked, block_level),
)
conn.commit()
except Exception as e:
print("error:", e, blocker)
2022-03-31 22:58:52 +00:00
elif software == "mastodon":
print(blocker)
try:
2022-03-31 17:00:08 +00:00
json = get_mastodon_blocks(blocker)
2022-04-09 17:23:05 +00:00
for block_level, blocks in json.items():
for instance in blocks:
blocked, blocked_hash, reason = instance.values()
2022-08-07 11:50:38 +00:00
blocked == blocked.lower()
2022-08-07 12:40:38 +00:00
blocker == blocker.lower()
2022-04-09 17:23:05 +00:00
if blocked.count("*") <= 1:
2022-04-09 17:26:33 +00:00
c.execute(
"select hash from instances where hash = ?", (blocked_hash,)
)
2022-03-31 22:32:38 +00:00
if c.fetchone() == None:
2022-04-09 17:23:05 +00:00
c.execute(
"insert into instances select ?, ?, ?",
(blocked, get_hash(blocked), get_type(blocked)),
)
c.execute(
2022-04-16 20:43:28 +00:00
"select * from blocks where blocker = ? and blocked = ? and block_level = ?",
(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
2022-04-09 17:23:05 +00:00
)
2022-04-16 20:43:28 +00:00
if c.fetchone() == None:
c.execute(
"insert into blocks select ?, ?, ?, ?",
(
blocker,
blocked if blocked.count("*") <= 1 else blocked_hash,
reason,
block_level,
),
)
conn.commit()
except Exception as e:
print("error:", e, blocker)
2022-09-03 11:22:31 +00:00
elif software == "friendica" or software == "misskey":
print(blocker)
try:
2022-09-03 11:22:31 +00:00
if software == "friendica":
json = get_friendica_blocks(blocker)
elif software == "misskey":
json = get_pisskey_blocks(blocker)
2022-08-23 21:25:31 +00:00
for block_level, blocks in json.items():
for instance in blocks:
blocked, reason = instance.values()
blocked == blocked.lower()
blocker == blocker.lower()
c.execute(
"select domain from instances where domain = ?", (blocked,)
)
if c.fetchone() == None:
c.execute(
"insert into instances select ?, ?, ?",
(blocked, get_hash(blocked), get_type(blocked)),
)
c.execute(
"select * from blocks where blocker = ? and blocked = ?",
(blocker, blocked),
)
if c.fetchone() == None:
c.execute(
"insert into blocks select ?, ?, ?, ?",
(
blocker,
blocked,
reason,
2022-08-23 21:25:31 +00:00
block_level,
),
)
conn.commit()
except Exception as e:
print("error:", e, blocker)
2022-03-31 17:00:08 +00:00
conn.close()