0.1.1-rc1 minor refactors
This commit is contained in:
4
.vscode/settings.json
vendored
Normal file
4
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"python.analysis.autoImportCompletions": true,
|
||||
"python.analysis.typeCheckingMode": "standard"
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "apify-shared"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1-rc1"
|
||||
description = "Shared utility package for use with Apify actors"
|
||||
authors = [{ name = "Ryan Byrne", email = "ryanjbyrne30@gmail.com" }]
|
||||
requires-python = ">=3.12"
|
||||
|
||||
@@ -1,2 +1,5 @@
|
||||
def hello() -> str:
|
||||
return "Hello from apify-shared!"
|
||||
from apify_shared.data_store import DataStore, DataStoreConfig
|
||||
from apify_shared.fetch import FetchClient, FetchConfig
|
||||
from apify_shared.logger import Logger
|
||||
from apify_shared.utils import date_str, timestamp_str
|
||||
from apify_shared.zenrows import ZenRowsClient
|
||||
|
||||
@@ -25,33 +25,33 @@ class DataStore:
|
||||
kwargs["aws_secret_access_key"] = config.aws_secret_access_key
|
||||
self._client = boto3.client("s3", **kwargs)
|
||||
|
||||
async def upload_json(self, key: str, data: Any):
|
||||
async def s3_upload_json(self, key: str, data: Any):
|
||||
body = json.dumps(data, indent=2).encode("utf-8")
|
||||
await self.__upload(key, body, "application/json")
|
||||
await self.__s3_upload(key, body, "application/json")
|
||||
|
||||
async def upload_xml(self, key: str, data: Any):
|
||||
async def s3_upload_xml(self, key: str, data: Any):
|
||||
if isinstance(data, bytes):
|
||||
body = data
|
||||
elif isinstance(data, str):
|
||||
body = data.encode("utf-8")
|
||||
else:
|
||||
body = str(data).encode("utf-8")
|
||||
await self.__upload(key, body, "application/xml")
|
||||
await self.__s3_upload(key, body, "application/xml")
|
||||
|
||||
async def upload_html(
|
||||
async def s3_upload_html(
|
||||
self, key: str, data: str | bytes, metadata: dict[str, object] = {}
|
||||
):
|
||||
body = data.encode("utf-8") if isinstance(data, str) else data
|
||||
await self.__upload(key, body, "text/html; charset=utf-8", metadata=metadata)
|
||||
await self.__s3_upload(key, body, "text/html; charset=utf-8", metadata=metadata)
|
||||
|
||||
async def upload_txt(self, key: str, data: Any):
|
||||
async def s3_upload_txt(self, key: str, data: Any):
|
||||
if isinstance(data, bytes):
|
||||
body = data
|
||||
else:
|
||||
body = str(data).encode("utf-8")
|
||||
await self.__upload(key, body, "text/plain")
|
||||
await self.__s3_upload(key, body, "text/plain")
|
||||
|
||||
async def upload_csv(self, key: str, data: Any):
|
||||
async def s3_upload_csv(self, key: str, data: Any):
|
||||
if isinstance(data, bytes):
|
||||
body = data
|
||||
elif isinstance(data, str):
|
||||
@@ -68,9 +68,9 @@ class DataStore:
|
||||
body = buf.getvalue().encode("utf-8")
|
||||
else:
|
||||
raise TypeError(f"Unsupported data type for CSV upload: {type(data)}")
|
||||
await self.__upload(key, body, "text/csv")
|
||||
await self.__s3_upload(key, body, "text/csv")
|
||||
|
||||
async def __upload(
|
||||
async def __s3_upload(
|
||||
self, key: str, body: bytes, content_type: str, metadata: dict[str, object] = {}
|
||||
):
|
||||
await asyncio.to_thread(
|
||||
|
||||
@@ -64,3 +64,4 @@ class FetchClient:
|
||||
response = await self._client.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
raise httpx.NetworkError("Failed to send request")
|
||||
|
||||
@@ -1,12 +1,19 @@
|
||||
from virtek_apify.logger import Logger
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from apify_shared.logger import Logger
|
||||
from zenrows import ZenRowsClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class ZenRowsConfig:
|
||||
logger: Logger
|
||||
api_key: str
|
||||
|
||||
|
||||
class ZenRowsScraper:
|
||||
def __init__(self, logger: Logger, api_key: str):
|
||||
self.client = ZenRowsClient(api_key)
|
||||
self.logger = logger
|
||||
def __init__(self, config: ZenRowsConfig):
|
||||
self.client = ZenRowsClient(config.api_key)
|
||||
self.logger = config.logger
|
||||
|
||||
def scrape(self, url: str):
|
||||
self.logger.debug(f"ZenRows scraping url: {url}...")
|
||||
|
||||
Reference in New Issue
Block a user