Skip to content

Commit 344619e

Browse files
authored
patch: revert unreleased upsert change (#112)
* this change has not been released yet. * add_documents and add_texts can be used instead of the upsert method. The semantics for those methods ARE upsert semantics when an ID is provided!
1 parent 6d4f6f3 commit 344619e

File tree

3 files changed

+1095
-1200
lines changed

3 files changed

+1095
-1200
lines changed

langchain_postgres/vectorstores.py

Lines changed: 72 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Callable,
1212
Dict,
1313
Generator,
14+
Iterable,
1415
List,
1516
Optional,
1617
Sequence,
@@ -26,7 +27,6 @@
2627
import sqlalchemy
2728
from langchain_core.documents import Document
2829
from langchain_core.embeddings import Embeddings
29-
from langchain_core.indexing import UpsertResponse
3030
from langchain_core.utils import get_from_dict_or_env
3131
from langchain_core.vectorstores import VectorStore
3232
from sqlalchemy import SQLColumnExpression, cast, create_engine, delete, func, select
@@ -764,7 +764,9 @@ def add_embeddings(
764764
"""
765765
assert not self._async_engine, "This method must be called with sync_mode"
766766
if ids is None:
767-
ids = [str(uuid.uuid4()) for _ in texts]
767+
ids_ = [str(uuid.uuid4()) for _ in texts]
768+
else:
769+
ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
768770

769771
if not metadatas:
770772
metadatas = [{} for _ in texts]
@@ -782,7 +784,7 @@ def add_embeddings(
782784
"cmetadata": metadata or {},
783785
}
784786
for text, metadata, embedding, id in zip(
785-
texts, metadatas, embeddings, ids
787+
texts, metadatas, embeddings, ids_
786788
)
787789
]
788790
stmt = insert(self.EmbeddingStore).values(data)
@@ -798,7 +800,7 @@ def add_embeddings(
798800
session.execute(on_conflict_stmt)
799801
session.commit()
800802

801-
return ids
803+
return ids_
802804

803805
async def aadd_embeddings(
804806
self,
@@ -819,8 +821,11 @@ async def aadd_embeddings(
819821
kwargs: vectorstore specific parameters
820822
"""
821823
await self.__apost_init__() # Lazy async init
824+
822825
if ids is None:
823-
ids = [str(uuid.uuid1()) for _ in texts]
826+
ids_ = [str(uuid.uuid4()) for _ in texts]
827+
else:
828+
ids_ = [id if id is not None else str(uuid.uuid4()) for id in ids]
824829

825830
if not metadatas:
826831
metadatas = [{} for _ in texts]
@@ -838,7 +843,7 @@ async def aadd_embeddings(
838843
"cmetadata": metadata or {},
839844
}
840845
for text, metadata, embedding, id in zip(
841-
texts, metadatas, embeddings, ids
846+
texts, metadatas, embeddings, ids_
842847
)
843848
]
844849
stmt = insert(self.EmbeddingStore).values(data)
@@ -854,7 +859,67 @@ async def aadd_embeddings(
854859
await session.execute(on_conflict_stmt)
855860
await session.commit()
856861

857-
return ids
862+
return ids_
863+
864+
def add_texts(
865+
self,
866+
texts: Iterable[str],
867+
metadatas: Optional[List[dict]] = None,
868+
ids: Optional[List[str]] = None,
869+
**kwargs: Any,
870+
) -> List[str]:
871+
"""Run more texts through the embeddings and add to the vectorstore.
872+
873+
Args:
874+
texts: Iterable of strings to add to the vectorstore.
875+
metadatas: Optional list of metadatas associated with the texts.
876+
ids: Optional list of ids for the texts.
877+
If not provided, will generate a new id for each text.
878+
kwargs: vectorstore specific parameters
879+
880+
Returns:
881+
List of ids from adding the texts into the vectorstore.
882+
"""
883+
assert not self._async_engine, "This method must be called without async_mode"
884+
texts_ = list(texts)
885+
embeddings = self.embedding_function.embed_documents(texts_)
886+
return self.add_embeddings(
887+
texts=texts_,
888+
embeddings=list(embeddings),
889+
metadatas=list(metadatas) if metadatas else None,
890+
ids=list(ids) if ids else None,
891+
**kwargs,
892+
)
893+
894+
async def aadd_texts(
895+
self,
896+
texts: Iterable[str],
897+
metadatas: Optional[List[dict]] = None,
898+
ids: Optional[List[str]] = None,
899+
**kwargs: Any,
900+
) -> List[str]:
901+
"""Run more texts through the embeddings and add to the vectorstore.
902+
903+
Args:
904+
texts: Iterable of strings to add to the vectorstore.
905+
metadatas: Optional list of metadatas associated with the texts.
906+
ids: Optional list of ids for the texts.
907+
If not provided, will generate a new id for each text.
908+
kwargs: vectorstore specific parameters
909+
910+
Returns:
911+
List of ids from adding the texts into the vectorstore.
912+
"""
913+
await self.__apost_init__() # Lazy async init
914+
texts_ = list(texts)
915+
embeddings = await self.embedding_function.aembed_documents(texts_)
916+
return await self.aadd_embeddings(
917+
texts=texts_,
918+
embeddings=list(embeddings),
919+
metadatas=list(metadatas) if metadatas else None,
920+
ids=list(ids) if ids else None,
921+
**kwargs,
922+
)
858923

859924
def similarity_search(
860925
self,
@@ -2162,64 +2227,6 @@ async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]:
21622227
async with self.session_maker() as session:
21632228
yield typing_cast(AsyncSession, session)
21642229

2165-
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
2166-
"""Upsert documents into the vectorstore.
2167-
2168-
Args:
2169-
items: Sequence of documents to upsert.
2170-
kwargs: vectorstore specific parameters
2171-
2172-
Returns:
2173-
UpsertResponse
2174-
"""
2175-
if self._async_engine:
2176-
raise AssertionError("This method must be called in sync mode.")
2177-
texts = [item.page_content for item in items]
2178-
metadatas = [item.metadata for item in items]
2179-
ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items]
2180-
embeddings = self.embedding_function.embed_documents(list(texts))
2181-
added_ids = self.add_embeddings(
2182-
texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
2183-
)
2184-
return {
2185-
"succeeded": added_ids,
2186-
"failed": [
2187-
item.id
2188-
for item in items
2189-
if item.id is not None and item.id not in added_ids
2190-
],
2191-
}
2192-
2193-
async def aupsert(
2194-
self, items: Sequence[Document], /, **kwargs: Any
2195-
) -> UpsertResponse:
2196-
"""Upsert documents into the vectorstore.
2197-
2198-
Args:
2199-
items: Sequence of documents to upsert.
2200-
kwargs: vectorstore specific parameters
2201-
2202-
Returns:
2203-
UpsertResponse
2204-
"""
2205-
if not self._async_engine:
2206-
raise AssertionError("This method must be called with async_mode")
2207-
texts = [item.page_content for item in items]
2208-
metadatas = [item.metadata for item in items]
2209-
ids = [item.id if item.id is not None else str(uuid.uuid4()) for item in items]
2210-
embeddings = await self.embedding_function.aembed_documents(list(texts))
2211-
added_ids = await self.aadd_embeddings(
2212-
texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
2213-
)
2214-
return {
2215-
"succeeded": added_ids,
2216-
"failed": [
2217-
item.id
2218-
for item in items
2219-
if item.id is not None and item.id not in added_ids
2220-
],
2221-
}
2222-
22232230
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
22242231
"""Get documents by ids."""
22252232
documents = []

0 commit comments

Comments
 (0)