@@ -246,98 +246,130 @@ def _create_vector_extension(conn: Connection) -> None:
246
246
247
247
248
248
class PGVector (VectorStore ):
249
- """Vectorstore implementation using Postgres as the backend .
249
+ """Postgres vector store integration .
250
250
251
- Currently, there is no mechanism for supporting data migration.
251
+ Setup:
252
+ Install ``langchain_postgres`` and run the docker container.
252
253
253
- So breaking changes in the vectorstore schema will require the user to recreate
254
- the tables and re-add the documents.
254
+ .. code-block:: bash
255
255
256
- If this is a concern, please use a different vectorstore. If
257
- not, this implementation should be fine for your use case.
256
+ pip install -qU langchain-postgres
257
+ docker run --name pgvector-container -e POSTGRES_USER=langchain -e POSTGRES_PASSWORD=langchain -e POSTGRES_DB=langchain -p 6024:5432 -d pgvector/pgvector:pg16
258
258
259
- To use this vectorstore you need to have the `vector` extension installed.
260
- The `vector` extension is a Postgres extension that provides vector
261
- similarity search capabilities.
259
+ Key init args — indexing params:
260
+ collection_name: str
261
+ Name of the collection.
262
+ embeddings: Embeddings
263
+ Embedding function to use.
262
264
263
- ```sh
264
- docker run --name pgvector-container -e POSTGRES_PASSWORD=...
265
- -d pgvector/pgvector:pg16
266
- ```
267
-
268
- Example:
265
+ Key init args — client params:
266
+ connection: Union[None, DBConnection, Engine, AsyncEngine, str]
267
+ Connection string or engine.
268
+
269
+ Instantiate:
269
270
.. code-block:: python
270
271
272
+ from langchain_postgres import PGVector
271
273
from langchain_postgres.vectorstores import PGVector
272
- from langchain_openai.embeddings import OpenAIEmbeddings
273
-
274
- connection_string = "postgresql+psycopg://..."
275
- collection_name = "state_of_the_union_test"
276
- embeddings = OpenAIEmbeddings()
277
- vectorstore = PGVector.from_documents(
278
- embedding=embeddings,
279
- documents=docs,
280
- connection=connection_string,
274
+ from langchain_openai import OpenAIEmbeddings
275
+
276
+ # See docker command above to launch a postgres instance with pgvector enabled.
277
+ connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain" # Uses psycopg3!
278
+ collection_name = "my_docs"
279
+
280
+ vector_store = PGVector(
281
+ embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),
281
282
collection_name=collection_name,
283
+ connection=connection,
282
284
use_jsonb=True,
283
- async_mode=False,
284
285
)
285
286
287
+ Add Documents:
288
+ .. code-block:: python
289
+
290
+ from langchain_core.documents import Document
291
+
292
+ document_1 = Document(page_content="foo", metadata={"baz": "bar"})
293
+ document_2 = Document(page_content="thud", metadata={"bar": "baz"})
294
+ document_3 = Document(page_content="i will be deleted :(")
295
+
296
+ documents = [document_1, document_2, document_3]
297
+ ids = ["1", "2", "3"]
298
+ vector_store.add_documents(documents=documents, ids=ids)
299
+
300
+ Delete Documents:
301
+ .. code-block:: python
302
+
303
+ vector_store.delete(ids=["3"])
304
+
305
+ Search:
306
+ .. code-block:: python
307
+
308
+ results = vector_store.similarity_search(query="thud",k=1)
309
+ for doc in results:
310
+ print(f"* {doc.page_content} [{doc.metadata}]")
311
+
312
+ .. code-block:: python
313
+
314
+ * thud [{'bar': 'baz'}]
315
+
316
+ Search with filter:
317
+ .. code-block:: python
318
+
319
+ results = vector_store.similarity_search(query="thud",k=1,filter={"bar": "baz"})
320
+ for doc in results:
321
+ print(f"* {doc.page_content} [{doc.metadata}]")
322
+
323
+ .. code-block:: python
324
+
325
+ * thud [{'bar': 'baz'}]
326
+
327
+ Search with score:
328
+ .. code-block:: python
329
+
330
+ results = vector_store.similarity_search_with_score(query="qux",k=1)
331
+ for doc, score in results:
332
+ print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
333
+
334
+ .. code-block:: python
335
+
336
+ * [SIM=0.499243] foo [{'baz': 'bar'}]
337
+
338
+ Async:
339
+ .. code-block:: python
340
+
341
+ # add documents
342
+ # await vector_store.aadd_documents(documents=documents, ids=ids)
343
+
344
+ # delete documents
345
+ # await vector_store.adelete(ids=["3"])
346
+
347
+ # search
348
+ # results = vector_store.asimilarity_search(query="thud",k=1)
349
+
350
+ # search with score
351
+ results = await vector_store.asimilarity_search_with_score(query="qux",k=1)
352
+ for doc,score in results:
353
+ print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
354
+
355
+ .. code-block:: python
356
+
357
+ * [SIM=0.499243] foo [{'baz': 'bar'}]
358
+
359
+ Use as Retriever:
360
+ .. code-block:: python
361
+
362
+ retriever = vector_store.as_retriever(
363
+ search_type="mmr",
364
+ search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
365
+ )
366
+ retriever.invoke("thud")
367
+
368
+ .. code-block:: python
369
+
370
+ [Document(metadata={'bar': 'baz'}, page_content='thud')]
286
371
287
- This code has been ported over from langchain_community with minimal changes
288
- to allow users to easily transition from langchain_community to langchain_postgres.
289
-
290
- Some changes had to be made to address issues with the community implementation:
291
- * langchain_postgres now works with psycopg3. Please update your
292
- connection strings from `postgresql+psycopg2://...` to
293
- `postgresql+psycopg://langchain:langchain@...`
294
- (yes, the driver name is `psycopg` not `psycopg3`)
295
- * The schema of the embedding store and collection have been changed to make
296
- add_documents work correctly with user specified ids, specifically
297
- when overwriting existing documents.
298
- You will need to recreate the tables if you are using an existing database.
299
- * A Connection object has to be provided explicitly. Connections will not be
300
- picked up automatically based on env variables.
301
- * langchain_postgres now accept async connections. If you want to use the async
302
- version, you need to set `async_mode=True` when initializing the store or
303
- use an async engine.
304
-
305
- Supported filter operators:
306
-
307
- * $eq: Equality operator
308
- * $ne: Not equal operator
309
- * $lt: Less than operator
310
- * $lte: Less than or equal operator
311
- * $gt: Greater than operator
312
- * $gte: Greater than or equal operator
313
- * $in: In operator
314
- * $nin: Not in operator
315
- * $between: Between operator
316
- * $exists: Exists operator
317
- * $like: Like operator
318
- * $ilike: Case insensitive like operator
319
- * $and: Logical AND operator
320
- * $or: Logical OR operator
321
- * $not: Logical NOT operator
322
-
323
- Example:
324
-
325
- .. code-block:: python
326
-
327
- vectorstore.similarity_search('kitty', k=10, filter={
328
- 'id': {'$in': [1, 5, 2, 9]}
329
- })
330
- #%% md
331
-
332
- If you provide a dict with multiple fields, but no operators,
333
- the top level will be interpreted as a logical **AND** filter
334
-
335
- vectorstore.similarity_search('ducks', k=10, filter={
336
- 'id': {'$in': [1, 5, 2, 9]},
337
- 'location': {'$in': ["pond", "market"]}
338
- })
339
-
340
- """
372
+ """ # noqa: E501
341
373
342
374
def __init__ (
343
375
self ,
0 commit comments