11
11
Callable ,
12
12
Dict ,
13
13
Generator ,
14
+ Iterable ,
14
15
List ,
15
16
Optional ,
16
17
Sequence ,
26
27
import sqlalchemy
27
28
from langchain_core .documents import Document
28
29
from langchain_core .embeddings import Embeddings
29
- from langchain_core .indexing import UpsertResponse
30
30
from langchain_core .utils import get_from_dict_or_env
31
31
from langchain_core .vectorstores import VectorStore
32
32
from sqlalchemy import SQLColumnExpression , cast , create_engine , delete , func , select
@@ -764,7 +764,9 @@ def add_embeddings(
764
764
"""
765
765
assert not self ._async_engine , "This method must be called with sync_mode"
766
766
if ids is None :
767
- ids = [str (uuid .uuid4 ()) for _ in texts ]
767
+ ids_ = [str (uuid .uuid4 ()) for _ in texts ]
768
+ else :
769
+ ids_ = [id if id is not None else str (uuid .uuid4 ()) for id in ids ]
768
770
769
771
if not metadatas :
770
772
metadatas = [{} for _ in texts ]
@@ -782,7 +784,7 @@ def add_embeddings(
782
784
"cmetadata" : metadata or {},
783
785
}
784
786
for text , metadata , embedding , id in zip (
785
- texts , metadatas , embeddings , ids
787
+ texts , metadatas , embeddings , ids_
786
788
)
787
789
]
788
790
stmt = insert (self .EmbeddingStore ).values (data )
@@ -798,7 +800,7 @@ def add_embeddings(
798
800
session .execute (on_conflict_stmt )
799
801
session .commit ()
800
802
801
- return ids
803
+ return ids_
802
804
803
805
async def aadd_embeddings (
804
806
self ,
@@ -819,8 +821,11 @@ async def aadd_embeddings(
819
821
kwargs: vectorstore specific parameters
820
822
"""
821
823
await self .__apost_init__ () # Lazy async init
824
+
822
825
if ids is None :
823
- ids = [str (uuid .uuid1 ()) for _ in texts ]
826
+ ids_ = [str (uuid .uuid4 ()) for _ in texts ]
827
+ else :
828
+ ids_ = [id if id is not None else str (uuid .uuid4 ()) for id in ids ]
824
829
825
830
if not metadatas :
826
831
metadatas = [{} for _ in texts ]
@@ -838,7 +843,7 @@ async def aadd_embeddings(
838
843
"cmetadata" : metadata or {},
839
844
}
840
845
for text , metadata , embedding , id in zip (
841
- texts , metadatas , embeddings , ids
846
+ texts , metadatas , embeddings , ids_
842
847
)
843
848
]
844
849
stmt = insert (self .EmbeddingStore ).values (data )
@@ -854,7 +859,67 @@ async def aadd_embeddings(
854
859
await session .execute (on_conflict_stmt )
855
860
await session .commit ()
856
861
857
- return ids
862
+ return ids_
863
+
864
+ def add_texts (
865
+ self ,
866
+ texts : Iterable [str ],
867
+ metadatas : Optional [List [dict ]] = None ,
868
+ ids : Optional [List [str ]] = None ,
869
+ ** kwargs : Any ,
870
+ ) -> List [str ]:
871
+ """Run more texts through the embeddings and add to the vectorstore.
872
+
873
+ Args:
874
+ texts: Iterable of strings to add to the vectorstore.
875
+ metadatas: Optional list of metadatas associated with the texts.
876
+ ids: Optional list of ids for the texts.
877
+ If not provided, will generate a new id for each text.
878
+ kwargs: vectorstore specific parameters
879
+
880
+ Returns:
881
+ List of ids from adding the texts into the vectorstore.
882
+ """
883
+ assert not self ._async_engine , "This method must be called without async_mode"
884
+ texts_ = list (texts )
885
+ embeddings = self .embedding_function .embed_documents (texts_ )
886
+ return self .add_embeddings (
887
+ texts = texts_ ,
888
+ embeddings = list (embeddings ),
889
+ metadatas = list (metadatas ) if metadatas else None ,
890
+ ids = list (ids ) if ids else None ,
891
+ ** kwargs ,
892
+ )
893
+
894
+ async def aadd_texts (
895
+ self ,
896
+ texts : Iterable [str ],
897
+ metadatas : Optional [List [dict ]] = None ,
898
+ ids : Optional [List [str ]] = None ,
899
+ ** kwargs : Any ,
900
+ ) -> List [str ]:
901
+ """Run more texts through the embeddings and add to the vectorstore.
902
+
903
+ Args:
904
+ texts: Iterable of strings to add to the vectorstore.
905
+ metadatas: Optional list of metadatas associated with the texts.
906
+ ids: Optional list of ids for the texts.
907
+ If not provided, will generate a new id for each text.
908
+ kwargs: vectorstore specific parameters
909
+
910
+ Returns:
911
+ List of ids from adding the texts into the vectorstore.
912
+ """
913
+ await self .__apost_init__ () # Lazy async init
914
+ texts_ = list (texts )
915
+ embeddings = await self .embedding_function .aembed_documents (texts_ )
916
+ return await self .aadd_embeddings (
917
+ texts = texts_ ,
918
+ embeddings = list (embeddings ),
919
+ metadatas = list (metadatas ) if metadatas else None ,
920
+ ids = list (ids ) if ids else None ,
921
+ ** kwargs ,
922
+ )
858
923
859
924
def similarity_search (
860
925
self ,
@@ -2162,64 +2227,6 @@ async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]:
2162
2227
async with self .session_maker () as session :
2163
2228
yield typing_cast (AsyncSession , session )
2164
2229
2165
- def upsert (self , items : Sequence [Document ], / , ** kwargs : Any ) -> UpsertResponse :
2166
- """Upsert documents into the vectorstore.
2167
-
2168
- Args:
2169
- items: Sequence of documents to upsert.
2170
- kwargs: vectorstore specific parameters
2171
-
2172
- Returns:
2173
- UpsertResponse
2174
- """
2175
- if self ._async_engine :
2176
- raise AssertionError ("This method must be called in sync mode." )
2177
- texts = [item .page_content for item in items ]
2178
- metadatas = [item .metadata for item in items ]
2179
- ids = [item .id if item .id is not None else str (uuid .uuid4 ()) for item in items ]
2180
- embeddings = self .embedding_function .embed_documents (list (texts ))
2181
- added_ids = self .add_embeddings (
2182
- texts = texts , embeddings = embeddings , metadatas = metadatas , ids = ids , ** kwargs
2183
- )
2184
- return {
2185
- "succeeded" : added_ids ,
2186
- "failed" : [
2187
- item .id
2188
- for item in items
2189
- if item .id is not None and item .id not in added_ids
2190
- ],
2191
- }
2192
-
2193
- async def aupsert (
2194
- self , items : Sequence [Document ], / , ** kwargs : Any
2195
- ) -> UpsertResponse :
2196
- """Upsert documents into the vectorstore.
2197
-
2198
- Args:
2199
- items: Sequence of documents to upsert.
2200
- kwargs: vectorstore specific parameters
2201
-
2202
- Returns:
2203
- UpsertResponse
2204
- """
2205
- if not self ._async_engine :
2206
- raise AssertionError ("This method must be called with async_mode" )
2207
- texts = [item .page_content for item in items ]
2208
- metadatas = [item .metadata for item in items ]
2209
- ids = [item .id if item .id is not None else str (uuid .uuid4 ()) for item in items ]
2210
- embeddings = await self .embedding_function .aembed_documents (list (texts ))
2211
- added_ids = await self .aadd_embeddings (
2212
- texts = texts , embeddings = embeddings , metadatas = metadatas , ids = ids , ** kwargs
2213
- )
2214
- return {
2215
- "succeeded" : added_ids ,
2216
- "failed" : [
2217
- item .id
2218
- for item in items
2219
- if item .id is not None and item .id not in added_ids
2220
- ],
2221
- }
2222
-
2223
2230
def get_by_ids (self , ids : Sequence [str ], / ) -> List [Document ]:
2224
2231
"""Get documents by ids."""
2225
2232
documents = []
0 commit comments