|
1 | 1 | """Oxylabs Web Reader."""
|
2 | 2 |
|
3 | 3 | import asyncio
|
4 |
| -from typing import Any, List |
| 4 | +from typing import Any, Dict, List, Optional, TYPE_CHECKING |
5 | 5 | from platform import architecture, python_version
|
6 | 6 | from importlib.metadata import version
|
7 | 7 |
|
| 8 | +from llama_index.core.bridge.pydantic import Field |
8 | 9 | from llama_index.core.readers.base import BasePydanticReader
|
9 | 10 | from llama_index.core.schema import Document
|
10 | 11 | from markdownify import markdownify
|
11 | 12 |
|
12 | 13 | from llama_index.readers.web.oxylabs_web.utils import strip_html, json_to_markdown
|
13 |
| -from oxylabs.utils.utils import prepare_config |
14 | 14 |
|
15 |
| -from oxylabs.internal.api import AsyncAPI, APICredentials, RealtimeAPI |
| 15 | +if TYPE_CHECKING: |
| 16 | + from oxylabs.internal.api import AsyncAPI, RealtimeAPI |
| 17 | + |
| 18 | + |
| 19 | +def get_default_config() -> dict[str, Any]: |
| 20 | + from oxylabs.utils.utils import prepare_config |
| 21 | + |
| 22 | + return prepare_config(async_integration=True) |
16 | 23 |
|
17 | 24 |
|
18 | 25 | class OxylabsWebReader(BasePydanticReader):
|
@@ -50,11 +57,13 @@ class OxylabsWebReader(BasePydanticReader):
|
50 | 57 |
|
51 | 58 | timeout_s: int = 100
|
52 | 59 | oxylabs_scraper_url: str = "https://realtime.oxylabs.io/v1/queries"
|
53 |
| - api: RealtimeAPI |
54 |
| - async_api: AsyncAPI |
55 |
| - default_config: dict[str, Any] = prepare_config(async_integration=True) |
| 60 | + api: "RealtimeAPI" |
| 61 | + async_api: "AsyncAPI" |
| 62 | + default_config: dict[str, Any] = Field(default_factory=get_default_config) |
56 | 63 |
|
57 | 64 | def __init__(self, username: str, password: str, **kwargs) -> None:
|
| 65 | + from oxylabs.internal.api import AsyncAPI, APICredentials, RealtimeAPI |
| 66 | + |
58 | 67 | credentials = APICredentials(username=username, password=password)
|
59 | 68 |
|
60 | 69 | bits, _ = architecture()
|
@@ -90,7 +99,7 @@ def _get_document_from_response(self, response: dict[str, Any]) -> Document:
|
90 | 99 | async def aload_data(
|
91 | 100 | self,
|
92 | 101 | urls: list[str],
|
93 |
| - additional_params: dict[str, Any] | None = None, |
| 102 | + additional_params: Optional[Dict[str, Any]] = None, |
94 | 103 | ) -> List[Document]:
|
95 | 104 | """
|
96 | 105 | Asynchronously load data from urls.
|
@@ -124,7 +133,7 @@ async def aload_data(
|
124 | 133 | def load_data(
|
125 | 134 | self,
|
126 | 135 | urls: list[str],
|
127 |
| - additional_params: dict[str, Any] | None = None, |
| 136 | + additional_params: Optional[Dict[str, Any]] = None, |
128 | 137 | ) -> List[Document]:
|
129 | 138 | """
|
130 | 139 | Load data from urls.
|
|
0 commit comments