diff --git a/src/vl_apify_shared/zenrows.py b/src/vl_apify_shared/zenrows.py index 0686502..d372f41 100644 --- a/src/vl_apify_shared/zenrows.py +++ b/src/vl_apify_shared/zenrows.py @@ -1,5 +1,6 @@ import time -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Optional from vl_apify_shared.logger import Logger from zenrows import ZenRowsClient @@ -15,10 +16,31 @@ class ZenRowsScraper: self.client = ZenRowsClient(config.api_key) self.logger = config.logger - def scrape(self, url: str): + def scrape( + self, + url: str, + js_render: bool = True, + wait: Optional[int] = None, + wait_for: Optional[str] = None, + ): + """Scrape a URL using ZenRows scraping SDK. + + Args + url: URL to scrape + js_render: whether to wait for JavaScript to execute + wait: milliseconds to wait after page load + wait_for: css selector to wait for + """ self.logger.debug(f"ZenRows scraping url: {url}...") + params: dict = {} + if js_render: + params["js_render"] = "true" + if wait is not None: + params["wait"] = str(wait) + if wait_for is not None: + params["wait_for"] = wait_for start = time.monotonic() - response = self.client.get(url, params={"mode": "auto"}) + response = self.client.get(url, params=params) elapsed_time = round((time.monotonic() - start) * 1000) self.logger.debug( f"ZenRows scraped url: {url}. Returned status: {response.status_code} in {elapsed_time}ms"