Updated ZenRowsScraper to accept js related configs
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Optional
|
||||||
from vl_apify_shared.logger import Logger
|
from vl_apify_shared.logger import Logger
|
||||||
from zenrows import ZenRowsClient
|
from zenrows import ZenRowsClient
|
||||||
|
|
||||||
@@ -15,10 +16,31 @@ class ZenRowsScraper:
|
|||||||
self.client = ZenRowsClient(config.api_key)
|
self.client = ZenRowsClient(config.api_key)
|
||||||
self.logger = config.logger
|
self.logger = config.logger
|
||||||
|
|
||||||
def scrape(self, url: str):
|
def scrape(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
js_render: bool = True,
|
||||||
|
wait: Optional[int] = None,
|
||||||
|
wait_for: Optional[str] = None,
|
||||||
|
):
|
||||||
|
"""Scrape a URL using ZenRows scraping SDK.
|
||||||
|
|
||||||
|
Args
|
||||||
|
url: URL to scrape
|
||||||
|
js_render: whether to wait for JavaScript to execute
|
||||||
|
wait: milliseconds to wait after page load
|
||||||
|
wait_for: css selector to wait for
|
||||||
|
"""
|
||||||
self.logger.debug(f"ZenRows scraping url: {url}...")
|
self.logger.debug(f"ZenRows scraping url: {url}...")
|
||||||
|
params: dict = {}
|
||||||
|
if js_render:
|
||||||
|
params["js_render"] = "true"
|
||||||
|
if wait is not None:
|
||||||
|
params["wait"] = str(wait)
|
||||||
|
if wait_for is not None:
|
||||||
|
params["wait_for"] = wait_for
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
response = self.client.get(url, params={"mode": "auto"})
|
response = self.client.get(url, params=params)
|
||||||
elapsed_time = round((time.monotonic() - start) * 1000)
|
elapsed_time = round((time.monotonic() - start) * 1000)
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
f"ZenRows scraped url: {url}. Returned status: {response.status_code} in {elapsed_time}ms"
|
f"ZenRows scraped url: {url}. Returned status: {response.status_code} in {elapsed_time}ms"
|
||||||
|
|||||||
Reference in New Issue
Block a user