From 191214dbd61c9ca3388617ebda88f38345cdc187 Mon Sep 17 00:00:00 2001 From: io Date: Thu, 19 Aug 2021 10:40:57 +0000 Subject: [PATCH] save some memory by using account handles instead of objects --- fetch_posts.py | 28 +++++++++++++++++----------- utils.py | 7 +++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fetch_posts.py b/fetch_posts.py index ece15ee..ac6f04a 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -6,12 +6,14 @@ import anyio import aiohttp import platform import pendulum +import operator import aiosqlite import contextlib from utils import shield from pleroma import Pleroma from bs4 import BeautifulSoup from functools import partial +from typing import Iterable, NewType from third_party.utils import extract_post_content USER_AGENT = ( @@ -52,16 +54,20 @@ class PostFetcher: async def __aexit__(self, *excinfo): return await self._ctx_stack.__aexit__(*excinfo) + # username@instance + AccountHandle = NewType('AccountHandle', str) + async def fetch_all(self): + """fetch all following accounts, or an iterable of accounts if provided""" await self._fedi.verify_credentials() self._completed_accounts = {} async with anyio.create_task_group() as tg: - for acc in await self._fedi.following(): + for acc in map(operator.itemgetter('fqn'), await self._fedi.following()): tg.start_soon(self._do_account, acc) - async def _do_account(self, acc): + async def _do_account(self, acc: AccountHandle): async with anyio.create_task_group() as tg: - self._completed_accounts[acc['fqn']] = done_ev = anyio.Event() + self._completed_accounts[acc] = done_ev = anyio.Event() tx, rx = anyio.create_memory_object_stream() async with rx, tx: tg.start_soon(self._process_pages, rx, acc) @@ -72,7 +78,7 @@ class PostFetcher: tg.cancel_scope.cancel() async def _process_pages(self, stream, account): - done_ev = self._completed_accounts[account['fqn']] + done_ev = self._completed_accounts[account] try: async for activity in stream: try: @@ -83,10 +89,10 @@ class PostFetcher: # this means we've encountered an item we already have saved break - self.erroneous_accounts.append(account['fqn']) + self.erroneous_accounts.append(account) raise finally: - print('Saving posts from', account['fqn'], 'to the DB') + print('Saving posts from', account, 'to the DB') await self._db.commit() done_ev.set() @@ -113,19 +119,19 @@ class PostFetcher: # TODO figure out why i put shield here lol @shield - async def _fetch_account(self, tx, account): - done_ev = self._completed_accounts[account['fqn']] + async def _fetch_account(self, tx, account: AccountHandle): + done_ev = self._completed_accounts[account] try: - outbox = await self.fetch_outbox(account['fqn']) + outbox = await self.fetch_outbox(account) except Exception as exc: import traceback traceback.print_exception(type(exc), exc, exc.__traceback__) done_ev.set() - self.erroneous_accounts.append(account['fqn']) + self.erroneous_accounts.append(account) return - print(f'Fetching posts for {account["acct"]}...') + print(f'Fetching posts for {account}...') next_page_url = outbox['first'] while True: diff --git a/utils.py b/utils.py index b552598..085b536 100644 --- a/utils.py +++ b/utils.py @@ -9,3 +9,10 @@ def shield(f): with anyio.CancelScope(shield=True): return await f(*args, **kwargs) return shielded + +def removeprefix(s, prefix): + try: + return s.removeprefix(prefix) + except AttributeError: + # compatibility for pre-3.9 + return s[len(prefix):] if s.startswith(prefix) else s