Updated ZenRowsScraper to accept js related configs
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from vl_apify_shared.logger import Logger
|
||||
from zenrows import ZenRowsClient
|
||||
|
||||
@@ -15,10 +16,31 @@ class ZenRowsScraper:
|
||||
self.client = ZenRowsClient(config.api_key)
|
||||
self.logger = config.logger
|
||||
|
||||
def scrape(self, url: str):
|
||||
def scrape(
|
||||
self,
|
||||
url: str,
|
||||
js_render: bool = True,
|
||||
wait: Optional[int] = None,
|
||||
wait_for: Optional[str] = None,
|
||||
):
|
||||
"""Scrape a URL using ZenRows scraping SDK.
|
||||
|
||||
Args
|
||||
url: URL to scrape
|
||||
js_render: whether to wait for JavaScript to execute
|
||||
wait: milliseconds to wait after page load
|
||||
wait_for: css selector to wait for
|
||||
"""
|
||||
self.logger.debug(f"ZenRows scraping url: {url}...")
|
||||
params: dict = {}
|
||||
if js_render:
|
||||
params["js_render"] = "true"
|
||||
if wait is not None:
|
||||
params["wait"] = str(wait)
|
||||
if wait_for is not None:
|
||||
params["wait_for"] = wait_for
|
||||
start = time.monotonic()
|
||||
response = self.client.get(url, params={"mode": "auto"})
|
||||
response = self.client.get(url, params=params)
|
||||
elapsed_time = round((time.monotonic() - start) * 1000)
|
||||
self.logger.debug(
|
||||
f"ZenRows scraped url: {url}. Returned status: {response.status_code} in {elapsed_time}ms"
|
||||
|
||||
Reference in New Issue
Block a user