diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..9f8d83c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.analysis.autoImportCompletions": true, + "python.analysis.typeCheckingMode": "standard" +} diff --git a/pyproject.toml b/pyproject.toml index ca79f55..9522188 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "apify-shared" -version = "0.1.0" +version = "0.1.1-rc1" description = "Shared utility package for use with Apify actors" authors = [{ name = "Ryan Byrne", email = "ryanjbyrne30@gmail.com" }] requires-python = ">=3.12" diff --git a/src/apify_shared/__init__.py b/src/apify_shared/__init__.py index 82e27be..a2c8443 100644 --- a/src/apify_shared/__init__.py +++ b/src/apify_shared/__init__.py @@ -1,2 +1,5 @@ -def hello() -> str: - return "Hello from apify-shared!" +from apify_shared.data_store import DataStore, DataStoreConfig +from apify_shared.fetch import FetchClient, FetchConfig +from apify_shared.logger import Logger +from apify_shared.utils import date_str, timestamp_str +from apify_shared.zenrows import ZenRowsClient diff --git a/src/apify_shared/data_store.py b/src/apify_shared/data_store.py index fc7bc76..37032c3 100644 --- a/src/apify_shared/data_store.py +++ b/src/apify_shared/data_store.py @@ -25,33 +25,33 @@ class DataStore: kwargs["aws_secret_access_key"] = config.aws_secret_access_key self._client = boto3.client("s3", **kwargs) - async def upload_json(self, key: str, data: Any): + async def s3_upload_json(self, key: str, data: Any): body = json.dumps(data, indent=2).encode("utf-8") - await self.__upload(key, body, "application/json") + await self.__s3_upload(key, body, "application/json") - async def upload_xml(self, key: str, data: Any): + async def s3_upload_xml(self, key: str, data: Any): if isinstance(data, bytes): body = data elif isinstance(data, str): body = data.encode("utf-8") else: body = str(data).encode("utf-8") - await self.__upload(key, body, "application/xml") + await self.__s3_upload(key, body, "application/xml") - async def upload_html( + async def s3_upload_html( self, key: str, data: str | bytes, metadata: dict[str, object] = {} ): body = data.encode("utf-8") if isinstance(data, str) else data - await self.__upload(key, body, "text/html; charset=utf-8", metadata=metadata) + await self.__s3_upload(key, body, "text/html; charset=utf-8", metadata=metadata) - async def upload_txt(self, key: str, data: Any): + async def s3_upload_txt(self, key: str, data: Any): if isinstance(data, bytes): body = data else: body = str(data).encode("utf-8") - await self.__upload(key, body, "text/plain") + await self.__s3_upload(key, body, "text/plain") - async def upload_csv(self, key: str, data: Any): + async def s3_upload_csv(self, key: str, data: Any): if isinstance(data, bytes): body = data elif isinstance(data, str): @@ -68,9 +68,9 @@ class DataStore: body = buf.getvalue().encode("utf-8") else: raise TypeError(f"Unsupported data type for CSV upload: {type(data)}") - await self.__upload(key, body, "text/csv") + await self.__s3_upload(key, body, "text/csv") - async def __upload( + async def __s3_upload( self, key: str, body: bytes, content_type: str, metadata: dict[str, object] = {} ): await asyncio.to_thread( diff --git a/src/apify_shared/fetch.py b/src/apify_shared/fetch.py index e17851c..ca5424c 100644 --- a/src/apify_shared/fetch.py +++ b/src/apify_shared/fetch.py @@ -64,3 +64,4 @@ class FetchClient: response = await self._client.request(method, url, **kwargs) response.raise_for_status() return response + raise httpx.NetworkError("Failed to send request") diff --git a/src/apify_shared/zenrows.py b/src/apify_shared/zenrows.py index 6cbcfcf..394adc3 100644 --- a/src/apify_shared/zenrows.py +++ b/src/apify_shared/zenrows.py @@ -1,12 +1,19 @@ -from virtek_apify.logger import Logger import time +from dataclasses import dataclass +from apify_shared.logger import Logger from zenrows import ZenRowsClient +@dataclass +class ZenRowsConfig: + logger: Logger + api_key: str + + class ZenRowsScraper: - def __init__(self, logger: Logger, api_key: str): - self.client = ZenRowsClient(api_key) - self.logger = logger + def __init__(self, config: ZenRowsConfig): + self.client = ZenRowsClient(config.api_key) + self.logger = config.logger def scrape(self, url: str): self.logger.debug(f"ZenRows scraping url: {url}...")