fedi-block-api/fetch_blocks.py

108 lines
6.2 KiB
Python
Raw Normal View History

from requests import get
from hashlib import sha256
import sqlite3
2022-03-31 17:00:08 +00:00
from bs4 import BeautifulSoup
2022-04-06 15:40:49 +00:00
headers = {
"user-agent": "fedi-block-api (https://gitlab.com/EnjuAihara/fedi-block-api)"
}
2022-03-31 17:00:08 +00:00
def get_mastodon_blocks(domain: str) -> dict:
try:
reject = []
media_removal = []
federated_timeline_removal = []
2022-04-06 15:40:49 +00:00
doc = BeautifulSoup(get(f"https://{domain}/about/more", headers=headers, timeout=5).text, "html.parser")
2022-03-31 17:00:08 +00:00
for header in doc.find_all("h3"):
if header.text == "Suspended servers":
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
reject.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
elif header.text == "Filtered media":
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
media_removal.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
elif header.text in ["Limited servers", "Silenced servers"]:
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
federated_timeline_removal.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
finally:
return {"reject": reject, "media_removal": media_removal, "federated_timeline_removal": federated_timeline_removal}
2022-03-31 15:17:11 +00:00
def get_type(domain: str) -> str:
try:
res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
if res.status_code == 404:
res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
if res.ok:
return res.json()["software"]["name"]
elif res.status_code == 404:
res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
if res.ok:
return "mastodon"
except:
return None
conn = sqlite3.connect("blocks.db")
c = conn.cursor()
2022-04-07 11:14:25 +00:00
c.execute("select domain, software from instances where software in ('pleroma', 'mastodon')")
2022-03-31 17:00:08 +00:00
2022-03-31 22:58:52 +00:00
for blocker, software in c.fetchall():
if software == "pleroma":
print(blocker)
try:
# Blocks
2022-03-05 08:41:11 +00:00
c.execute("delete from blocks where blocker = ?", (blocker,))
2022-04-06 15:40:49 +00:00
json = get(f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5).json()
if "mrf_simple" in json["metadata"]["federation"]:
for mrf in json["metadata"]["federation"]["mrf_simple"]:
for blocked in json["metadata"]["federation"]["mrf_simple"][mrf]:
if blocked == "":
continue
2022-03-31 22:32:38 +00:00
c.execute("select domain from instances where domain = ?", (blocked,))
if c.fetchone() == None:
c.execute("insert into instances select ?, ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest(), get_type(blocked)))
c.execute("insert into blocks select ?, ?, '', ?", (blocker, blocked, mrf))
# Quarantined Instances
if "quarantined_instances" in json["metadata"]["federation"]:
for blocked in json["metadata"]["federation"]["quarantined_instances"]:
2022-03-05 12:09:00 +00:00
if blocked == "":
continue
2022-03-31 22:32:38 +00:00
c.execute("select domain from instances where domain = ?", (blocked,))
if c.fetchone() == None:
c.execute("insert into instances select ?, ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest(), get_type(blocked)))
c.execute("insert into blocks select ?, ?, '', 'quarantined_instances'", (blocker, blocked))
conn.commit()
# Reasons
if "mrf_simple_info" in json["metadata"]["federation"]:
for mrf in json["metadata"]["federation"]["mrf_simple_info"]:
for blocked in json["metadata"]["federation"]["mrf_simple_info"][mrf]:
c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?", (json["metadata"]["federation"]["mrf_simple_info"][mrf][blocked]["reason"], blocker, blocked, mrf))
if "quarantined_instances_info" in json["metadata"]["federation"]:
for blocked in json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"]:
c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = 'quarantined_instances'", (json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"][blocked]["reason"], blocker, blocked))
conn.commit()
except Exception as e:
print("error:", e, blocker)
2022-03-31 22:58:52 +00:00
elif software == "mastodon":
print(blocker)
try:
2022-03-05 08:41:11 +00:00
c.execute("delete from blocks where blocker = ?", (blocker,))
2022-03-31 17:00:08 +00:00
json = get_mastodon_blocks(blocker)
2022-03-31 22:58:52 +00:00
for block_level in json:
2022-03-05 11:19:28 +00:00
for blocked in json[block_level]:
if blocked["domain"].count("*") > 1:
2022-03-31 22:58:52 +00:00
# instance is censored, check if domain of hash is known, if not, insert the hash
2022-03-05 11:19:28 +00:00
c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, ?", (blocker, blocked["hash"], blocked["hash"], blocked['reason'], block_level))
else:
2022-03-31 22:58:52 +00:00
# instance is not censored
2022-03-31 22:32:38 +00:00
c.execute("select domain from instances where domain = ?", (blocked["domain"],))
if c.fetchone() == None:
2022-03-31 22:58:52 +00:00
# if instance not known, add it
c.execute("insert into instances select ?, ?, ?", (blocked["domain"], sha256(bytes(blocked["domain"], "utf-8")).hexdigest(), get_type(blocked["domain"])))
2022-03-05 11:19:28 +00:00
c.execute("insert into blocks select ?, ?, ?, ?", (blocker, blocked["domain"], blocked["reason"], block_level))
conn.commit()
except Exception as e:
print("error:", e, blocker)
2022-03-31 17:00:08 +00:00
conn.close()