|
11 | 11 | Callable,
|
12 | 12 | Dict,
|
13 | 13 | Generator,
|
14 |
| - Iterable, |
15 | 14 | List,
|
16 | 15 | Optional,
|
17 | 16 | Sequence,
|
|
27 | 26 | import sqlalchemy
|
28 | 27 | from langchain_core.documents import Document
|
29 | 28 | from langchain_core.embeddings import Embeddings
|
| 29 | +from langchain_core.indexing import UpsertResponse |
30 | 30 | from langchain_core.utils import get_from_dict_or_env
|
31 | 31 | from langchain_core.vectorstores import VectorStore
|
32 | 32 | from sqlalchemy import SQLColumnExpression, cast, create_engine, delete, func, select
|
@@ -714,7 +714,7 @@ async def __afrom(
|
714 | 714 |
|
715 | 715 | def add_embeddings(
|
716 | 716 | self,
|
717 |
| - texts: Iterable[str], |
| 717 | + texts: Sequence[str], |
718 | 718 | embeddings: List[List[float]],
|
719 | 719 | metadatas: Optional[List[dict]] = None,
|
720 | 720 | ids: Optional[List[str]] = None,
|
@@ -770,7 +770,7 @@ def add_embeddings(
|
770 | 770 |
|
771 | 771 | async def aadd_embeddings(
|
772 | 772 | self,
|
773 |
| - texts: Iterable[str], |
| 773 | + texts: Sequence[str], |
774 | 774 | embeddings: List[List[float]],
|
775 | 775 | metadatas: Optional[List[dict]] = None,
|
776 | 776 | ids: Optional[List[str]] = None,
|
@@ -824,56 +824,6 @@ async def aadd_embeddings(
|
824 | 824 |
|
825 | 825 | return ids
|
826 | 826 |
|
827 |
| - def add_texts( |
828 |
| - self, |
829 |
| - texts: Iterable[str], |
830 |
| - metadatas: Optional[List[dict]] = None, |
831 |
| - ids: Optional[List[str]] = None, |
832 |
| - **kwargs: Any, |
833 |
| - ) -> List[str]: |
834 |
| - """Run more texts through the embeddings and add to the vectorstore. |
835 |
| -
|
836 |
| - Args: |
837 |
| - texts: Iterable of strings to add to the vectorstore. |
838 |
| - metadatas: Optional list of metadatas associated with the texts. |
839 |
| - ids: Optional list of ids for the texts. |
840 |
| - If not provided, will generate a new id for each text. |
841 |
| - kwargs: vectorstore specific parameters |
842 |
| -
|
843 |
| - Returns: |
844 |
| - List of ids from adding the texts into the vectorstore. |
845 |
| - """ |
846 |
| - assert not self._async_engine, "This method must be called without async_mode" |
847 |
| - embeddings = self.embedding_function.embed_documents(list(texts)) |
848 |
| - return self.add_embeddings( |
849 |
| - texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
850 |
| - ) |
851 |
| - |
852 |
| - async def aadd_texts( |
853 |
| - self, |
854 |
| - texts: Iterable[str], |
855 |
| - metadatas: Optional[List[dict]] = None, |
856 |
| - ids: Optional[List[str]] = None, |
857 |
| - **kwargs: Any, |
858 |
| - ) -> List[str]: |
859 |
| - """Run more texts through the embeddings and add to the vectorstore. |
860 |
| -
|
861 |
| - Args: |
862 |
| - texts: Iterable of strings to add to the vectorstore. |
863 |
| - metadatas: Optional list of metadatas associated with the texts. |
864 |
| - ids: Optional list of ids for the texts. |
865 |
| - If not provided, will generate a new id for each text. |
866 |
| - kwargs: vectorstore specific parameters |
867 |
| -
|
868 |
| - Returns: |
869 |
| - List of ids from adding the texts into the vectorstore. |
870 |
| - """ |
871 |
| - await self.__apost_init__() # Lazy async init |
872 |
| - embeddings = await self.embedding_function.aembed_documents(list(texts)) |
873 |
| - return await self.aadd_embeddings( |
874 |
| - texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
875 |
| - ) |
876 |
| - |
877 | 827 | def similarity_search(
|
878 | 828 | self,
|
879 | 829 | query: str,
|
@@ -1014,6 +964,7 @@ def _results_to_docs_and_scores(self, results: Any) -> List[Tuple[Document, floa
|
1014 | 964 | docs = [
|
1015 | 965 | (
|
1016 | 966 | Document(
|
| 967 | + id=str(result.EmbeddingStore.id), |
1017 | 968 | page_content=result.EmbeddingStore.document,
|
1018 | 969 | metadata=result.EmbeddingStore.cmetadata,
|
1019 | 970 | ),
|
@@ -2178,3 +2129,112 @@ async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]:
|
2178 | 2129 | )
|
2179 | 2130 | async with self.session_maker() as session:
|
2180 | 2131 | yield typing_cast(AsyncSession, session)
|
| 2132 | + |
| 2133 | + def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: |
| 2134 | + """Upsert documents into the vectorstore. |
| 2135 | +
|
| 2136 | + Args: |
| 2137 | + items: Sequence of documents to upsert. |
| 2138 | + kwargs: vectorstore specific parameters |
| 2139 | +
|
| 2140 | + Returns: |
| 2141 | + UpsertResponse |
| 2142 | + """ |
| 2143 | + if self._async_engine: |
| 2144 | + raise AssertionError("This method must be called in sync mode.") |
| 2145 | + texts = [item.page_content for item in items] |
| 2146 | + metadatas = [item.metadata for item in items] |
| 2147 | + ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items] |
| 2148 | + embeddings = self.embedding_function.embed_documents(list(texts)) |
| 2149 | + added_ids = self.add_embeddings( |
| 2150 | + texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
| 2151 | + ) |
| 2152 | + return { |
| 2153 | + "succeeded": added_ids, |
| 2154 | + "failed": [ |
| 2155 | + item.id |
| 2156 | + for item in items |
| 2157 | + if item.id is not None and item.id not in added_ids |
| 2158 | + ], |
| 2159 | + } |
| 2160 | + |
| 2161 | + async def aupsert( |
| 2162 | + self, items: Sequence[Document], /, **kwargs: Any |
| 2163 | + ) -> UpsertResponse: |
| 2164 | + """Upsert documents into the vectorstore. |
| 2165 | +
|
| 2166 | + Args: |
| 2167 | + items: Sequence of documents to upsert. |
| 2168 | + kwargs: vectorstore specific parameters |
| 2169 | +
|
| 2170 | + Returns: |
| 2171 | + UpsertResponse |
| 2172 | + """ |
| 2173 | + if not self._async_engine: |
| 2174 | + raise AssertionError("This method must be called with async_mode") |
| 2175 | + texts = [item.page_content for item in items] |
| 2176 | + metadatas = [item.metadata for item in items] |
| 2177 | + ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items] |
| 2178 | + embeddings = await self.embedding_function.aembed_documents(list(texts)) |
| 2179 | + added_ids = await self.aadd_embeddings( |
| 2180 | + texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs |
| 2181 | + ) |
| 2182 | + return { |
| 2183 | + "succeeded": added_ids, |
| 2184 | + "failed": [ |
| 2185 | + item.id |
| 2186 | + for item in items |
| 2187 | + if item.id is not None and item.id not in added_ids |
| 2188 | + ], |
| 2189 | + } |
| 2190 | + |
| 2191 | + def get_by_ids(self, ids: Sequence[str], /) -> List[Document]: |
| 2192 | + """Get documents by ids.""" |
| 2193 | + documents = [] |
| 2194 | + with self._make_sync_session() as session: |
| 2195 | + collection = self.get_collection(session) |
| 2196 | + filter_by = [self.EmbeddingStore.collection_id == collection.uuid] |
| 2197 | + stmt = ( |
| 2198 | + select( |
| 2199 | + self.EmbeddingStore, |
| 2200 | + ) |
| 2201 | + .where(self.EmbeddingStore.id.in_(ids)) |
| 2202 | + .filter(*filter_by) |
| 2203 | + ) |
| 2204 | + |
| 2205 | + for result in session.execute(stmt).scalars().all(): |
| 2206 | + documents.append( |
| 2207 | + Document( |
| 2208 | + id=result.id, |
| 2209 | + page_content=result.document, |
| 2210 | + metadata=result.cmetadata, |
| 2211 | + ) |
| 2212 | + ) |
| 2213 | + return documents |
| 2214 | + |
| 2215 | + async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]: |
| 2216 | + """Get documents by ids.""" |
| 2217 | + documents = [] |
| 2218 | + async with self._make_async_session() as session: |
| 2219 | + collection = await self.aget_collection(session) |
| 2220 | + filter_by = [self.EmbeddingStore.collection_id == collection.uuid] |
| 2221 | + |
| 2222 | + stmt = ( |
| 2223 | + select( |
| 2224 | + self.EmbeddingStore, |
| 2225 | + ) |
| 2226 | + .where(self.EmbeddingStore.id.in_(ids)) |
| 2227 | + .filter(*filter_by) |
| 2228 | + ) |
| 2229 | + |
| 2230 | + results: Sequence[Any] = (await session.execute(stmt)).scalars().all() |
| 2231 | + |
| 2232 | + for result in results: |
| 2233 | + documents.append( |
| 2234 | + Document( |
| 2235 | + id=str(result.id), |
| 2236 | + page_content=result.document, |
| 2237 | + metadata=result.cmetadata, |
| 2238 | + ) |
| 2239 | + ) |
| 2240 | + return documents |
0 commit comments