More logical Mastodon parsing order (fixes parsing pages with lots of <h3>)

This commit is contained in:
Mint 2022-09-02 22:01:39 +03:00
parent 933ce8555f
commit 2906946ee9

View file

@ -39,11 +39,11 @@ def get_mastodon_blocks(domain: str) -> dict:
return {} return {}
for header in doc.find_all("h3"): for header in doc.find_all("h3"):
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]: header_text = header.text
header_text = header.text if header_text in translations:
if header_text in translations: header_text = translations[header_text]
header_text = translations[header_text] if header_text in blocks:
if header_text in blocks: for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
blocks[header_text].append( blocks[header_text].append(
{ {
"domain": line.find("span").text, "domain": line.find("span").text,