0.1.1-rc1 minor refactors
This commit is contained in:
4
.vscode/settings.json
vendored
Normal file
4
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"python.analysis.autoImportCompletions": true,
|
||||||
|
"python.analysis.typeCheckingMode": "standard"
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "apify-shared"
|
name = "apify-shared"
|
||||||
version = "0.1.0"
|
version = "0.1.1-rc1"
|
||||||
description = "Shared utility package for use with Apify actors"
|
description = "Shared utility package for use with Apify actors"
|
||||||
authors = [{ name = "Ryan Byrne", email = "ryanjbyrne30@gmail.com" }]
|
authors = [{ name = "Ryan Byrne", email = "ryanjbyrne30@gmail.com" }]
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
|
|||||||
@@ -1,2 +1,5 @@
|
|||||||
def hello() -> str:
|
from apify_shared.data_store import DataStore, DataStoreConfig
|
||||||
return "Hello from apify-shared!"
|
from apify_shared.fetch import FetchClient, FetchConfig
|
||||||
|
from apify_shared.logger import Logger
|
||||||
|
from apify_shared.utils import date_str, timestamp_str
|
||||||
|
from apify_shared.zenrows import ZenRowsClient
|
||||||
|
|||||||
@@ -25,33 +25,33 @@ class DataStore:
|
|||||||
kwargs["aws_secret_access_key"] = config.aws_secret_access_key
|
kwargs["aws_secret_access_key"] = config.aws_secret_access_key
|
||||||
self._client = boto3.client("s3", **kwargs)
|
self._client = boto3.client("s3", **kwargs)
|
||||||
|
|
||||||
async def upload_json(self, key: str, data: Any):
|
async def s3_upload_json(self, key: str, data: Any):
|
||||||
body = json.dumps(data, indent=2).encode("utf-8")
|
body = json.dumps(data, indent=2).encode("utf-8")
|
||||||
await self.__upload(key, body, "application/json")
|
await self.__s3_upload(key, body, "application/json")
|
||||||
|
|
||||||
async def upload_xml(self, key: str, data: Any):
|
async def s3_upload_xml(self, key: str, data: Any):
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
body = data
|
body = data
|
||||||
elif isinstance(data, str):
|
elif isinstance(data, str):
|
||||||
body = data.encode("utf-8")
|
body = data.encode("utf-8")
|
||||||
else:
|
else:
|
||||||
body = str(data).encode("utf-8")
|
body = str(data).encode("utf-8")
|
||||||
await self.__upload(key, body, "application/xml")
|
await self.__s3_upload(key, body, "application/xml")
|
||||||
|
|
||||||
async def upload_html(
|
async def s3_upload_html(
|
||||||
self, key: str, data: str | bytes, metadata: dict[str, object] = {}
|
self, key: str, data: str | bytes, metadata: dict[str, object] = {}
|
||||||
):
|
):
|
||||||
body = data.encode("utf-8") if isinstance(data, str) else data
|
body = data.encode("utf-8") if isinstance(data, str) else data
|
||||||
await self.__upload(key, body, "text/html; charset=utf-8", metadata=metadata)
|
await self.__s3_upload(key, body, "text/html; charset=utf-8", metadata=metadata)
|
||||||
|
|
||||||
async def upload_txt(self, key: str, data: Any):
|
async def s3_upload_txt(self, key: str, data: Any):
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
body = data
|
body = data
|
||||||
else:
|
else:
|
||||||
body = str(data).encode("utf-8")
|
body = str(data).encode("utf-8")
|
||||||
await self.__upload(key, body, "text/plain")
|
await self.__s3_upload(key, body, "text/plain")
|
||||||
|
|
||||||
async def upload_csv(self, key: str, data: Any):
|
async def s3_upload_csv(self, key: str, data: Any):
|
||||||
if isinstance(data, bytes):
|
if isinstance(data, bytes):
|
||||||
body = data
|
body = data
|
||||||
elif isinstance(data, str):
|
elif isinstance(data, str):
|
||||||
@@ -68,9 +68,9 @@ class DataStore:
|
|||||||
body = buf.getvalue().encode("utf-8")
|
body = buf.getvalue().encode("utf-8")
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"Unsupported data type for CSV upload: {type(data)}")
|
raise TypeError(f"Unsupported data type for CSV upload: {type(data)}")
|
||||||
await self.__upload(key, body, "text/csv")
|
await self.__s3_upload(key, body, "text/csv")
|
||||||
|
|
||||||
async def __upload(
|
async def __s3_upload(
|
||||||
self, key: str, body: bytes, content_type: str, metadata: dict[str, object] = {}
|
self, key: str, body: bytes, content_type: str, metadata: dict[str, object] = {}
|
||||||
):
|
):
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
|
|||||||
@@ -64,3 +64,4 @@ class FetchClient:
|
|||||||
response = await self._client.request(method, url, **kwargs)
|
response = await self._client.request(method, url, **kwargs)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response
|
return response
|
||||||
|
raise httpx.NetworkError("Failed to send request")
|
||||||
|
|||||||
@@ -1,12 +1,19 @@
|
|||||||
from virtek_apify.logger import Logger
|
|
||||||
import time
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from apify_shared.logger import Logger
|
||||||
from zenrows import ZenRowsClient
|
from zenrows import ZenRowsClient
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ZenRowsConfig:
|
||||||
|
logger: Logger
|
||||||
|
api_key: str
|
||||||
|
|
||||||
|
|
||||||
class ZenRowsScraper:
|
class ZenRowsScraper:
|
||||||
def __init__(self, logger: Logger, api_key: str):
|
def __init__(self, config: ZenRowsConfig):
|
||||||
self.client = ZenRowsClient(api_key)
|
self.client = ZenRowsClient(config.api_key)
|
||||||
self.logger = logger
|
self.logger = config.logger
|
||||||
|
|
||||||
def scrape(self, url: str):
|
def scrape(self, url: str):
|
||||||
self.logger.debug(f"ZenRows scraping url: {url}...")
|
self.logger.debug(f"ZenRows scraping url: {url}...")
|
||||||
|
|||||||
Reference in New Issue
Block a user