More logical Mastodon parsing order (fixes parsing pages with lots of <h3>)

This commit is contained in:
Mint 2022-09-02 22:01:39 +03:00
parent 933ce8555f
commit 2906946ee9

View file

@ -39,11 +39,11 @@ def get_mastodon_blocks(domain: str) -> dict:
return {}
for header in doc.find_all("h3"):
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
header_text = header.text
if header_text in translations:
header_text = translations[header_text]
if header_text in blocks:
header_text = header.text
if header_text in translations:
header_text = translations[header_text]
if header_text in blocks:
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
blocks[header_text].append(
{
"domain": line.find("span").text,