diff --git a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx
index 5aed8ee2c273..7dabac71c86e 100644
--- a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx
+++ b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx
@@ -57,20 +57,21 @@ import { Table, Tr, Th, Td } from "@mdx-js/react";
In addition to the basic value-based filtering capabilities, it is possible to use more advanced filtering. The table below shows the available filter operators.
-| Operator | Semantic |
-| ---------- | -------------------------------------------------------------------------- |
-| `$eq` | Equality (==) |
-| `$ne` | Inequality (!=) |
-| `$lt` | Less than (<) |
-| `$lte` | Less than or equal (<=) |
-| `$gt` | Greater than (>) |
-| `$gte` | Greater than or equal (>=) |
-| `$in` | Contained in a set of given values (in) |
-| `$nin` | Not contained in a set of given values (not in) |
-| `$between` | Between the range of two boundary values |
-| `$like` | Text equality based on the "LIKE" semantics in SQL (using "%" as wildcard) |
-| `$and` | Logical "and", supporting 2 or more operands |
-| `$or` | Logical "or", supporting 2 or more operands |
+| Operator | Semantic |
+| ----------- | -------------------------------------------------------------------------- |
+| `$eq` | Equality (==) |
+| `$ne` | Inequality (!=) |
+| `$lt` | Less than (<) |
+| `$lte` | Less than or equal (<=) |
+| `$gt` | Greater than (>) |
+| `$gte` | Greater than or equal (>=) |
+| `$in` | Contained in a set of given values (in) |
+| `$nin` | Not contained in a set of given values (not in) |
+| `$between` | Between the range of two boundary values |
+| `$like` | Text equality based on the "LIKE" semantics in SQL (using "%" as wildcard) |
+| `$contains` | Filters documents containing a specific keyword |
+| `$and` | Logical "and", supporting 2 or more operands |
+| `$or` | Logical "or", supporting 2 or more operands |
import ExampleAdvancedFilter from "@examples/indexes/vector_stores/hana_vector/advancedFiltering.ts";
@@ -82,6 +83,18 @@ import ExampleChain from "@examples/indexes/vector_stores/hana_vector/chains.ts"
{ExampleChain}
+## Internal Embedding Functionality
+
+SAP HANA Cloud Vector Engine supports computing embeddings directly in the database by leveraging its native `VECTOR_EMBEDDING` function. This approach eliminates the need for an external embedding service, improving performance and enhancing data security.
+
+To enable this functionality, instantiate a `HanaInternalEmbeddings` object with the internal embedding model ID and pass this instance to your `HanaDB` vector store.
+
+For more details on the `VECTOR_EMBEDDING` function, refer to the official [SAP HANA Cloud documentation](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/vector-embedding-function-vector?locale=en-US).
+
+import ExampleInternalEmbeddings from "@examples/indexes/vector_stores/hana_vector/internalEmbeddings.ts";
+
+{ExampleInternalEmbeddings}
+
## Related
- Vector store [conceptual guide](/docs/concepts/#vectorstores)
diff --git a/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts
index a3095c29b17e..a7b4f1ad4870 100644
--- a/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts
+++ b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts
@@ -29,15 +29,15 @@ await new Promise((resolve, reject) => {
const docs: Document[] = [
{
pageContent: "First",
- metadata: { name: "adam", is_active: true, id: 1, height: 10.0 },
+ metadata: { name: "Adam Smith", is_active: true, id: 1, height: 10.0 },
},
{
pageContent: "Second",
- metadata: { name: "bob", is_active: false, id: 2, height: 5.7 },
+ metadata: { name: "Bob Johnson", is_active: false, id: 2, height: 5.7 },
},
{
pageContent: "Third",
- metadata: { name: "jane", is_active: true, id: 3, height: 2.4 },
+ metadata: { name: "Jane Doe", is_active: true, id: 3, height: 2.4 },
},
];
@@ -75,8 +75,8 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"id":{"$ne":1}}
-{ name: 'bob', is_active: false, id: 2, height: 5.7 }
-{ name: 'jane', is_active: true, id: 3, height: 2.4 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 }
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 }
*/
// Between range
@@ -86,27 +86,27 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"id":{"$between":[1,2]}}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
// In list
-advancedFilter = { name: { $in: ["adam", "bob"] } };
+advancedFilter = { name: { $in: ["Adam Smith", "Bob Johnson"] } };
console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
-/* Filter: {"name":{"$in":["adam","bob"]}}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 } */
+/* Filter: {"name":{"$in":["Adam Smith","Bob Johnson"]}}
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
// Not in list
-advancedFilter = { name: { $nin: ["adam", "bob"] } };
+advancedFilter = { name: { $nin: ["Adam Smith", "Bob Johnson"] } };
console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
-/* Filter: {"name":{"$nin":["adam","bob"]}}
-{ name: 'jane', is_active: true, id: 3, height: 2.4 } */
+/* Filter: {"name":{"$nin":["Adam Smith","Bob Johnson"]}}
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */
// Greater than
advancedFilter = { id: { $gt: 1 } };
@@ -115,8 +115,8 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"id":{"$gt":1}}
-{ name: 'bob', is_active: false, id: 2, height: 5.7 }
-{ name: 'jane', is_active: true, id: 3, height: 2.4 } */
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 }
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */
// Greater than or equal to
advancedFilter = { id: { $gte: 1 } };
@@ -125,9 +125,9 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"id":{"$gte":1}}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 }
-{ name: 'jane', is_active: true, id: 3, height: 2.4 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 }
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */
// Less than
advancedFilter = { id: { $lt: 1 } };
@@ -145,7 +145,7 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"id":{"$lte":1}}
-{ name: 'adam', is_active: true, id: 1, height: 10 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } */
// Text filtering with $like
advancedFilter = { name: { $like: "a%" } };
@@ -154,7 +154,7 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"name":{"$like":"a%"}}
-{ name: 'adam', is_active: true, id: 1, height: 10 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } */
advancedFilter = { name: { $like: "%a%" } };
console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
@@ -162,18 +162,35 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"name":{"$like":"%a%"}}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'jane', is_active: true, id: 3, height: 2.4 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */
+
+// Text filtering with $contains
+advancedFilter = { name: { $contains: "bob" } };
+console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
+printFilterResult(
+ await vectorStore.similaritySearch("just testing", 5, advancedFilter)
+);
+/* Filter: {"name":{"$contains":"bob"}}
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
+
+advancedFilter = { name: { $contains: "bo" } };
+console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
+printFilterResult(
+ await vectorStore.similaritySearch("just testing", 5, advancedFilter)
+);
+/* Filter: {"name":{"$contains":"bo"}}
+ */
// Combined filtering with $or
-advancedFilter = { $or: [{ id: 1 }, { name: "bob" }] };
+advancedFilter = { $or: [{ id: 1 }, { name: "Bob Johnson" }] };
console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
-/* Filter: {"$or":[{"id":1},{"name":"bob"}]}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 } */
+/* Filter: {"$or":[{"id":1},{"name":"Bob Johnson"}]}
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
// Combined filtering with $and
advancedFilter = { $and: [{ id: 1 }, { id: 2 }] };
@@ -184,15 +201,23 @@ printFilterResult(
/* Filter: {"$and":[{"id":1},{"id":2}]}
*/
+advancedFilter = { $and: [{ name: { $contains: "bob" } }, { id: 2 }] };
+console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
+printFilterResult(
+ await vectorStore.similaritySearch("just testing", 5, advancedFilter)
+);
+/* Filter: {"$and":[{"name":{"$contains":"bob"}},{"id":2}]}
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
+
advancedFilter = { $or: [{ id: 1 }, { id: 2 }, { id: 3 }] };
console.log(`Filter: ${JSON.stringify(advancedFilter)}`);
printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"$or":[{"id":1},{"id":2},{"id":3}]}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 }
-{ name: 'jane', is_active: true, id: 3, height: 2.4 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 }
+{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */
// You can also define a nested filter with $and and $or.
advancedFilter = {
@@ -203,8 +228,8 @@ printFilterResult(
await vectorStore.similaritySearch("just testing", 5, advancedFilter)
);
/* Filter: {"$and":[{"$or":[{"id":1},{"id":2}]},{"height":{"$gte":5.0}}]}
-{ name: 'adam', is_active: true, id: 1, height: 10 }
-{ name: 'bob', is_active: false, id: 2, height: 5.7 } */
+{ name: 'Adam Smith', is_active: true, id: 1, height: 10 }
+{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */
// Disconnect from SAP HANA aft er the operations
client.disconnect();
diff --git a/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts b/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts
new file mode 100644
index 000000000000..be93dbf85f2d
--- /dev/null
+++ b/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts
@@ -0,0 +1,81 @@
+import { Document } from "@langchain/core/documents";
+import hanaClient from "hdb";
+import { HanaInternalEmbeddings } from "@langchain/community/embeddings/hana_internal";
+import { HanaDB, HanaDBArgs } from "@langchain/community/vectorstores/hanavector";
+
+// Initialize the internal embeddings instance using the internal model ID.
+// This instance will use SAP HANA's built-in VECTOR_EMBEDDING function of HanaDB.
+const internalEmbeddings = new HanaInternalEmbeddings({
+ internalEmbeddingModelId: process.env.HANA_DB_EMBEDDING_MODEL_ID || "SAP_NEB.20240715",
+});
+
+// Set up connection parameters from environment variables.
+const connectionParams = {
+ host: process.env.HANA_HOST,
+ port: process.env.HANA_PORT,
+ user: process.env.HANA_UID,
+ password: process.env.HANA_PWD,
+};
+
+// Create a HANA client.
+const client = hanaClient.createClient(connectionParams);
+
+// Connect to SAP HANA.
+await new Promise((resolve, reject) => {
+ client.connect((err: Error) => {
+ if (err) {
+ reject(err);
+ } else {
+ console.log("Connected to SAP HANA successfully.");
+ resolve();
+ }
+ });
+});
+
+// Define the arguments for the vector store instance.
+const args: HanaDBArgs = {
+ connection: client,
+ tableName: "testInternalEmbeddings",
+};
+
+// Create a new HanaDB vector store using the internal embeddings instance.
+// This vector store leverages the internal VECTOR_EMBEDDING function of HanaDB.
+const vectorStore = new HanaDB(internalEmbeddings, args);
+// Initialize the vector store (creates the table and verifies its columns).
+await vectorStore.initialize();
+
+// Example documents to index.
+const docs: Document[] = [
+ new Document({
+ pageContent: "Charlie is a data scientist who specializes in AI research.",
+ metadata: { name: "Charlie Brown" },
+ }),
+ new Document({
+ pageContent: "David is a teacher with a passion for history and literature.",
+ metadata: { name: "David Williams" },
+ }),
+ new Document({
+ pageContent: "Eve is an entrepreneur focusing on blockchain and cryptocurrency.",
+ metadata: { name: "Eve Adams" },
+ }),
+];
+
+// Clean up any existing documents in the table.
+await vectorStore.delete({ filter: {} });
+// Add the example documents.
+await vectorStore.addDocuments(docs);
+
+// Perform a similarity search. In this example, we search for documents related to "bitcoin".
+const results = await vectorStore.similaritySearch("bitcoin", 1);
+console.log("Similarity search results:", results);
+/*
+ [
+ {
+ pageContent: 'Eve is an entrepreneur focusing on blockchain and cryptocurrency.',
+ metadata: { name: 'Eve Adams' }
+ }
+ ]
+*/
+
+// Disconnect from SAP HANA after operations.
+client.disconnect();
diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore
index fc0cb520b693..cbd398458861 100644
--- a/libs/langchain-community/.gitignore
+++ b/libs/langchain-community/.gitignore
@@ -174,6 +174,10 @@ embeddings/gradient_ai.cjs
embeddings/gradient_ai.js
embeddings/gradient_ai.d.ts
embeddings/gradient_ai.d.cts
+embeddings/hana_internal.cjs
+embeddings/hana_internal.js
+embeddings/hana_internal.d.ts
+embeddings/hana_internal.d.cts
embeddings/hf.cjs
embeddings/hf.js
embeddings/hf.d.ts
diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js
index 3a67261b225b..2d365348b4f5 100644
--- a/libs/langchain-community/langchain.config.js
+++ b/libs/langchain-community/langchain.config.js
@@ -79,6 +79,7 @@ export const config = {
"embeddings/deepinfra": "embeddings/deepinfra",
"embeddings/fireworks": "embeddings/fireworks",
"embeddings/gradient_ai": "embeddings/gradient_ai",
+ "embeddings/hana_internal": "embeddings/hana_internal",
"embeddings/hf": "embeddings/hf",
"embeddings/hf_transformers": "embeddings/hf_transformers",
"embeddings/huggingface_transformers":
diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
index bbeac62a3d4e..40d7f755fca5 100644
--- a/libs/langchain-community/package.json
+++ b/libs/langchain-community/package.json
@@ -1129,6 +1129,15 @@
"import": "./embeddings/gradient_ai.js",
"require": "./embeddings/gradient_ai.cjs"
},
+ "./embeddings/hana_internal": {
+ "types": {
+ "import": "./embeddings/hana_internal.d.ts",
+ "require": "./embeddings/hana_internal.d.cts",
+ "default": "./embeddings/hana_internal.d.ts"
+ },
+ "import": "./embeddings/hana_internal.js",
+ "require": "./embeddings/hana_internal.cjs"
+ },
"./embeddings/hf": {
"types": {
"import": "./embeddings/hf.d.ts",
@@ -3469,6 +3478,10 @@
"embeddings/gradient_ai.js",
"embeddings/gradient_ai.d.ts",
"embeddings/gradient_ai.d.cts",
+ "embeddings/hana_internal.cjs",
+ "embeddings/hana_internal.js",
+ "embeddings/hana_internal.d.ts",
+ "embeddings/hana_internal.d.cts",
"embeddings/hf.cjs",
"embeddings/hf.js",
"embeddings/hf.d.ts",
diff --git a/libs/langchain-community/src/embeddings/hana_internal.ts b/libs/langchain-community/src/embeddings/hana_internal.ts
new file mode 100644
index 000000000000..8a50d7eac0aa
--- /dev/null
+++ b/libs/langchain-community/src/embeddings/hana_internal.ts
@@ -0,0 +1,70 @@
+import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings";
+
+/**
+ * Parameters for initializing HanaInternalEmbeddings.
+ */
+export interface HanaInternalEmbeddingsParams extends EmbeddingsParams {
+ /**
+ * The ID of the internal embedding model used by the HANA database.
+ */
+ internalEmbeddingModelId: string;
+}
+
+/**
+ * A dummy embeddings class for use with HANA's internal embedding functionality.
+ * This class prevents the use of standard embedding methods and ensures that
+ * internal embeddings are handled exclusively via database queries.
+ *
+ * @example
+ * const internalEmbeddings = new HanaInternalEmbeddings({
+ * internalEmbeddingModelId: "your_model_id_here",
+ * });
+ *
+ * // The following calls will throw errors:
+ * await internalEmbeddings.embedQuery("sample text"); // Throws error
+ * await internalEmbeddings.embedDocuments(["text one", "text two"]); // Throws error
+ *
+ * // Retrieve the internal model id:
+ * console.log(internalEmbeddings.getModelId());
+ */
+export class HanaInternalEmbeddings extends Embeddings {
+ private modelId: string;
+
+ /**
+ * A flag to indicate this class is HANA-specific.
+ */
+ public readonly isHanaInternalEmbeddings = true;
+
+ constructor(fields: HanaInternalEmbeddingsParams) {
+ super(fields);
+ this.modelId = fields.internalEmbeddingModelId;
+ }
+
+ /**
+ * This method is not applicable for HANA internal embeddings.
+ * @throws Error indicating that internal embeddings cannot be used externally.
+ */
+ async embedQuery(_text: string): Promise {
+ throw new Error(
+ "Internal embeddings cannot be used externally. Use HANA's internal embedding functionality instead."
+ );
+ }
+
+ /**
+ * This method is not applicable for HANA internal embeddings.
+ * @throws Error indicating that internal embeddings cannot be used externally.
+ */
+ async embedDocuments(_texts: string[]): Promise {
+ throw new Error(
+ "Internal embeddings cannot be used externally. Use HANA's internal embedding functionality instead."
+ );
+ }
+
+ /**
+ * Retrieves the internal embedding model ID.
+ * @returns The internal embedding model ID.
+ */
+ getModelId(): string {
+ return this.modelId;
+ }
+}
diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts
index 832782caf5b6..5d926b11ee4b 100644
--- a/libs/langchain-community/src/load/import_map.ts
+++ b/libs/langchain-community/src/load/import_map.ts
@@ -30,6 +30,7 @@ export * as embeddings__baidu_qianfan from "../embeddings/baidu_qianfan.js";
export * as embeddings__bytedance_doubao from "../embeddings/bytedance_doubao.js";
export * as embeddings__deepinfra from "../embeddings/deepinfra.js";
export * as embeddings__fireworks from "../embeddings/fireworks.js";
+export * as embeddings__hana_internal from "../embeddings/hana_internal.js";
export * as embeddings__minimax from "../embeddings/minimax.js";
export * as embeddings__ollama from "../embeddings/ollama.js";
export * as embeddings__togetherai from "../embeddings/togetherai.js";
diff --git a/libs/langchain-community/src/vectorstores/hanavector.ts b/libs/langchain-community/src/vectorstores/hanavector.ts
index 48e40b8ee48c..e002afa9165e 100644
--- a/libs/langchain-community/src/vectorstores/hanavector.ts
+++ b/libs/langchain-community/src/vectorstores/hanavector.ts
@@ -6,6 +6,8 @@ import {
import { Document } from "@langchain/core/documents";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";
+import { HanaInternalEmbeddings } from "../embeddings/hana_internal.js";
+
export type DistanceStrategy = "euclidean" | "cosine";
const COMPARISONS_TO_SQL: Record = {
@@ -35,7 +37,8 @@ type Comparator =
| "$in"
| "$nin"
| "$between"
- | "$like";
+ | "$like"
+ | "$contains";
// Filter using comparison operators
// Defines the relationship between a comparison operator and its value
type ComparatorFilter = {
@@ -75,6 +78,11 @@ const LOGICAL_OPERATORS_TO_SQL: Record = {
$or: "OR",
};
+const CONTAINS_OPERATOR = "$contains";
+
+const INTERMEDIATE_TABLE_NAME = "intermediate_result";
+
+
const HANA_DISTANCE_FUNCTION: Record = {
cosine: ["COSINE_SIMILARITY", "DESC"],
euclidean: ["L2DISTANCE", "ASC"],
@@ -126,6 +134,10 @@ export class HanaDB extends VectorStore {
private specificMetadataColumns: string[];
+ private useInternalEmbeddings: boolean;
+
+ private internalEmbeddingModelId: string;
+
_vectorstoreType(): string {
return "hanadb";
}
@@ -151,6 +163,52 @@ export class HanaDB extends VectorStore {
args.specificMetadataColumns || []
);
this.connection = args.connection;
+
+ // Set the embedding and decide whether to use internal embedding
+ this._setEmbeddings(embeddings);
+ }
+
+ /**
+ * Use this method to change the embeddings instance.
+ *
+ * Sets the embedding instance and configures the internal embedding mode
+ * if applicable.
+ *
+ * this method sets the internal flag and stores the model ID.
+ * Otherwise, it ensures that external embedding mode is used.
+ *
+ * @param embeddings - An instance of EmbeddingsInterface.
+ */
+ private _setEmbeddings(embeddings: EmbeddingsInterface): void {
+ this.embeddings = embeddings
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ if ((embeddings as any).isHanaInternalEmbeddings === true) {
+ this.useInternalEmbeddings = true;
+ this.internalEmbeddingModelId = (embeddings as HanaInternalEmbeddings).getModelId();
+ } else {
+ this.useInternalEmbeddings = false;
+ this.internalEmbeddingModelId = "";
+ }
+ }
+
+ /**
+ * Ping the database to check if the in-database embedding
+ * function exists and works.
+ *
+ * This method ensures that the internal VECTOR_EMBEDDING function
+ * is available and functioning correctly by passing a test value.
+ *
+ * @throws Error if the internal embedding function validation fails.
+ */
+ private async validateInternalEmbeddingFunction(): Promise {
+ if (!this.internalEmbeddingModelId) {
+ throw new Error("Internal embedding model id is not set");
+ }
+ const sqlStr =
+ "SELECT COUNT(TO_NVARCHAR(VECTOR_EMBEDDING('test', 'QUERY', ?))) AS TEST FROM sys.DUMMY;";
+ const client = this.connection;
+ const stm = await this.prepareQuery(client, sqlStr);
+ await this.executeStatement(stm, [this.internalEmbeddingModelId]);
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -196,6 +254,8 @@ export class HanaDB extends VectorStore {
}
public async initialize() {
+ if (this.useInternalEmbeddings)
+ await this.validateInternalEmbeddingFunction();
let valid_distance = false;
for (const key in HANA_DISTANCE_FUNCTION) {
if (key === this.distanceStrategy) {
@@ -509,7 +569,18 @@ export class HanaDB extends VectorStore {
`Operator '${specialOp}' expects a non-undefined value.`
);
}
- } else if (specialOp in IN_OPERATORS_TO_SQL) {
+ } else if (specialOp === CONTAINS_OPERATOR) {
+ // Special handling for keyword search
+ operator = CONTAINS_OPERATOR;
+ if (specialVal !== undefined) {
+ queryTuple.push(specialVal.toString());
+ } else {
+ throw new Error(
+ `Operator '${specialOp}' expects a non-undefined value.`
+ );
+ }
+ }
+ else if (specialOp in IN_OPERATORS_TO_SQL) {
operator = IN_OPERATORS_TO_SQL[specialOp];
if (Array.isArray(specialVal)) {
const placeholders = Array(specialVal.length).fill("?").join(",");
@@ -527,15 +598,89 @@ export class HanaDB extends VectorStore {
throw new Error(`Unsupported filter data-type: ${typeof filterValue}`);
}
- // Metadata column handling
- const selector = this.specificMetadataColumns.includes(key)
+ if (operator === CONTAINS_OPERATOR) {
+ // Instead of a normal clause, create a keyword search condition.
+ whereStr += `SCORE(? IN ("${key}" EXACT SEARCH MODE 'text')) > 0`;
+ } else {
+ // Metadata column handling (not required in keyword search)
+ const selector = this.specificMetadataColumns.includes(key)
? `"${key}"`
: `JSON_VALUE(${this.metadataColumn}, '$.${key}')`;
whereStr += `${selector} ${operator} ${sqlParam}`;
+ }
});
return [whereStr, queryTuple];
}
+ /**
+ * Extract metadata columns used with `$contains` in the filter.
+ *
+ * Scans the filter to find unspecific metadata columns used
+ * with the `$contains` operator.
+ *
+ * @param filter - (Optional) A filter object that may include nested filter conditions.
+ * @returns An array of unique metadata field names (as strings) that are used
+ * with the "$contains" operator.
+ */
+ private extractKeywordSearchColumns(filter?: this["FilterType"]): string[] {
+ const keywordColumns = new Set();
+ this.recurseFiltersHelper(keywordColumns, filter);
+ return [...keywordColumns];
+ }
+
+ private recurseFiltersHelper(keywordColumns: Set, filterObj?: this["FilterType"], parentKey?: string): void {
+ if (!filterObj || typeof filterObj !== "object") return;
+
+ Object.entries(filterObj).forEach(([key, value]) => {
+ if (key === CONTAINS_OPERATOR) {
+ if (
+ parentKey &&
+ parentKey !== this.contentColumn &&
+ !this.specificMetadataColumns.includes(parentKey)
+ ) {
+ keywordColumns.add(parentKey);
+ }
+ } else if (key in LOGICAL_OPERATORS_TO_SQL) {
+ // Assume it's an array of filters
+ (value as this["FilterType"][]).forEach((subfilter) => this.recurseFiltersHelper(keywordColumns, subfilter));
+ } else if (typeof value === "object" && value !== null) {
+ this.recurseFiltersHelper(keywordColumns, value as this["FilterType"], key);
+ }
+ });
+ }
+
+
+ /**
+ * Generate a SQL `WITH` clause to project metadata columns for keyword search.
+ *
+ *
+ * Example:
+ * Input: ["title", "author"]
+ * Output:
+ * WITH intermediate_result AS (
+ * SELECT *,
+ * JSON_VALUE(metadata_column, '$.title') AS "title",
+ * JSON_VALUE(metadata_column, '$.author') AS "author"
+ * FROM "table_name"
+ * )
+ * *
+ * @param projectedMetadataColumns - List of metadata column names for projection.
+ * @returns A SQL `WITH` clause string.
+ */
+ private createMetadataProjection(
+ projectedMetadataColumns: string[]
+ ): string {
+ const metadataColumns = projectedMetadataColumns.map(
+ (col) =>
+ `JSON_VALUE(${this.metadataColumn}, '$.${HanaDB.sanitizeName(col)}') AS "${HanaDB.sanitizeName(col)}"`
+ );
+ return (
+ `WITH ${INTERMEDIATE_TABLE_NAME} AS (` +
+ `SELECT *, ${metadataColumns.join(", ")} ` +
+ `FROM "${this.tableName}")`
+ );
+ }
+
/**
* Creates an HNSW vector index on a specified table and vector column with
* optional build and search configurations. If no configurations are provided,
@@ -717,13 +862,23 @@ export class HanaDB extends VectorStore {
}
/**
- * Adds an array of documents to the table. The documents are first
- * converted to vectors using the `embedDocuments` method of the
- * `embeddings` instance.
+ * Adds an array of documents to the table.
+ *
+ *
+ * In external embedding mode, this method computes embeddings client-side
+ * and inserts them.
+ * In internal embedding mode, it leverages the database's internal
+ * VECTOR_EMBEDDING function to generate embeddings.
+ *
* @param documents Array of Document instances to be added to the table.
* @returns Promise that resolves when the documents are added.
*/
async addDocuments(documents: Document[]): Promise {
+ // If using internal embeddings, we do NOT call embedDocuments() from Node.
+ if (this.useInternalEmbeddings) {
+ return this.addDocumentsUsingInternalEmbedding(documents);
+ }
+ // Otherwise, default (external) approach:
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(
await this.embeddings.embedDocuments(texts),
@@ -731,6 +886,44 @@ export class HanaDB extends VectorStore {
);
}
+ /**
+ * Adds documents to the database using the internal embedding function.
+ *
+ * This method constructs an SQL INSERT statement that leverages the
+ * database's internal VECTOR_EMBEDDING function to generate embeddings
+ * on the server side.
+ *
+ * @param documents - Array of Document objects to be added.
+ * @returns Promise that resolves when the documents are added.
+ */
+ private async addDocumentsUsingInternalEmbedding(documents: Document[]): Promise {
+ const texts = documents.map((doc) => doc.pageContent);
+ const metadatas = documents.map((doc) => doc.metadata);
+ const client = this.connection;
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ const sqlParams: [string, string, string, string, ...(string | null)[]][] = texts.map((text, i) => {
+ const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
+ const [remainingMetadata, specialMetadata] = this.splitOffSpecialMetadata(metadata);
+ // Prepare the SQL parameters
+ return [
+ text,
+ JSON.stringify(this.sanitizeMetadataKeys(remainingMetadata)),
+ text,
+ this.internalEmbeddingModelId,
+ ...specialMetadata
+ ];
+ });
+ // Build the column list for the INSERT statement.
+ const specificMetadataColumnsString = this.getSpecificMetadataColumnsString()
+ const extraPlaceholders = this.specificMetadataColumns.map(() => ", ?").join("");
+
+ // Insert data into the table, bulk insert.
+ const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}"${specificMetadataColumnsString})
+ VALUES (?, ?, VECTOR_EMBEDDING(?, 'DOCUMENT', ?)${extraPlaceholders});`;
+ const stm = await this.prepareQuery(client, sqlStr);
+ await this.executeStatement(stm, sqlParams);
+ }
+
/**
* Adds an array of vectors and corresponding documents to the database.
* The vectors and documents are batch inserted into the database.
@@ -745,33 +938,78 @@ export class HanaDB extends VectorStore {
const texts = documents.map((doc) => doc.pageContent);
const metadatas = documents.map((doc) => doc.metadata);
const client = this.connection;
- const sqlParams: [string, string, string][] = texts.map((text, i) => {
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ const sqlParams: [string, string, string, ...any[]][] = texts.map((text, i) => {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
+ const [remainingMetadata, specialMetadata] = this.splitOffSpecialMetadata(metadata);
// Ensure embedding is generated or provided
const embeddingString = `[${vectors[i].join(", ")}]`;
// Prepare the SQL parameters
return [
text,
- JSON.stringify(this.sanitizeMetadataKeys(metadata)),
- embeddingString,
+ JSON.stringify(this.sanitizeMetadataKeys(remainingMetadata)),
+ embeddingString,
+ ...specialMetadata
];
});
+ // Build the column list for the INSERT statement.
+ const specificMetadataColumnsString = this.getSpecificMetadataColumnsString()
+ const extraPlaceholders = this.specificMetadataColumns.map(() => ", ?").join("");
+
// Insert data into the table, bulk insert.
- const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}")
- VALUES (?, ?, TO_REAL_VECTOR(?));`;
+ const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}"${specificMetadataColumnsString})
+ VALUES (?, ?, TO_REAL_VECTOR(?)${extraPlaceholders});`;
const stm = await this.prepareQuery(client, sqlStr);
await this.executeStatement(stm, sqlParams);
// stm.execBatch(sqlParams);
}
/**
- * Return docs most similar to query.
- * @param query Query text for the similarity search.
- * @param k Number of Documents to return. Defaults to 4.
- * @param filter A dictionary of metadata fields and values to filter by.
- Defaults to None.
- * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
- */
+ * Helper function to generate the SQL snippet for specific metadata columns.
+ *
+ * Returns a string in the format: ', "col1", "col2", ...'
+ * if specific metadata columns are defined,
+ * or an empty string if there are none.
+ *
+ * @returns A string representing the specific metadata columns for SQL insertion.
+ */
+ private getSpecificMetadataColumnsString(): string{
+ if (this.specificMetadataColumns.length === 0) {
+ return "";
+ }
+ return ', "' + this.specificMetadataColumns.join('", "') + '"';
+ }
+
+ /**
+ * Splits the given metadata object into two parts:
+ * 1. The original metadata (unchanged).
+ * 2. An array of special metadata values corresponding to each column
+ * listed in `specificMetadataColumns`.
+ *
+ * @param metadata - The metadata object from which to extract special values.
+ * @returns A tuple where the first element is the original metadata object,
+ * and the second element is an array of special metadata values.
+ */
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ private splitOffSpecialMetadata(metadata: any): [any, (string | null)[]] {
+ const specialMetadata: (string | null)[] = [];
+ if (!metadata) {
+ return [{}, []];
+ }
+ for (const columnName of this.specificMetadataColumns) {
+ specialMetadata.push(metadata[columnName] ?? null);
+ }
+ return [metadata, specialMetadata];
+ }
+
+ /**
+ * Return docs most similar to query.
+ * @param query Query text for the similarity search.
+ * @param k Number of Documents to return. Defaults to 4.
+ * @param filter A dictionary of metadata fields and values to filter by.
+ Defaults to None.
+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
+ */
async similaritySearch(
query: string,
k: number,
@@ -782,30 +1020,47 @@ export class HanaDB extends VectorStore {
}
/**
- * Return documents and score values most similar to query.
- * @param query Query text for the similarity search.
- * @param k Number of Documents to return. Defaults to 4.
- * @param filter A dictionary of metadata fields and values to filter by.
- Defaults to None.
- * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
- */
+ * Return documents and score values most similar to query.
+ * @param query Query text for the similarity search.
+ * @param k Number of Documents to return. Defaults to 4.
+ * @param filter A dictionary of metadata fields and values to filter by.
+ Defaults to None.
+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
+ */
async similaritySearchWithScore(
query: string,
k: number,
filter?: this["FilterType"]
): Promise<[Document, number][]> {
- const queryEmbedding = await this.embeddings.embedQuery(query);
- return this.similaritySearchVectorWithScore(queryEmbedding, k, filter);
+ let wholeResult = null
+ if (this.useInternalEmbeddings) {
+ // Internal embeddings: pass the query directly
+ wholeResult = await this.similaritySearchWithScoreAndVectorByQuery(
+ query,
+ k,
+ filter
+ );
+ } else {
+ const queryEmbedding = await this.embeddings.embedQuery(query);
+ // External embeddings: generate embedding from the query
+ wholeResult = await this.similaritySearchWithScoreAndVectorByVector(
+ queryEmbedding,
+ k,
+ filter
+ );
+ }
+ return wholeResult.map(([doc, score]) => [doc, score]);
+
}
/**
- * Return docs most similar to the given embedding.
- * @param query Query embedding for the similarity search.
- * @param k Number of Documents to return. Defaults to 4.
- * @param filter A dictionary of metadata fields and values to filter by.
- Defaults to None.
- * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
- */
+ * Return docs most similar to the given embedding.
+ * @param query Query embedding for the similarity search.
+ * @param k Number of Documents to return. Defaults to 4.
+ * @param filter A dictionary of metadata fields and values to filter by.
+ Defaults to None.
+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
+ */
async similaritySearchVectorWithScore(
queryEmbedding: number[],
k: number,
@@ -821,37 +1076,56 @@ export class HanaDB extends VectorStore {
}
/**
- * Performs a similarity search based on vector comparison and returns documents along with their similarity scores and vectors.
- * @param embedding The vector representation of the query for similarity comparison.
- * @param k The number of top similar documents to return.
- * @param filter Optional filter criteria to apply to the search query.
- * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector.
+ * Performs a similarity search using the provided embedding expression.
+ *
+ * This helper method is used by both external and internal similarity search methods
+ * to construct and execute the SQL query.
+ *
+ * @param embeddingExpr - SQL expression that represents or generates the query embedding.
+ * @param k - The number of documents to return.
+ * @param filter A dictionary of metadata fields and values to filter by.
+ Defaults to None.
+ * @param vectorEmbeddingParams - Optional parameters for the embedding expression (used in internal mode).
+ * @returns Promise that resolves to a list of documents and their corresponding similarity scores.
*/
- async similaritySearchWithScoreAndVectorByVector(
- embedding: number[],
+ private async similaritySearchWithScoreAndVector(
+ embeddingExpr: string,
k: number,
- filter?: this["FilterType"]
+ filter?: this["FilterType"],
+ vectorEmbeddingParams?: string[]
): Promise> {
// Sanitize inputs
const sanitizedK = HanaDB.sanitizeInt(k);
- const sanitizedEmbedding = HanaDB.sanitizeListFloat(embedding);
// Determine the distance function based on the configured strategy
const distanceFuncName = HANA_DISTANCE_FUNCTION[this.distanceStrategy][0];
- // Convert the embedding vector to a string for SQL query
- const embeddingAsString = sanitizedEmbedding.join(",");
- let sqlStr = `SELECT TOP ${sanitizedK}
+
+ // Keyword search: extract metadata columns used with $contains
+ const projectedMetadataColumns = this.extractKeywordSearchColumns(filter);
+ let metadataProjection = "";
+ let fromClause = `"${this.tableName}"`;
+ if (projectedMetadataColumns.length > 0) {
+ metadataProjection = this.createMetadataProjection(projectedMetadataColumns);
+ fromClause = INTERMEDIATE_TABLE_NAME;
+ }
+
+ let sqlStr = `${metadataProjection}
+ SELECT TOP ${sanitizedK}
"${this.contentColumn}",
"${this.metadataColumn}",
TO_NVARCHAR("${this.vectorColumn}") AS VECTOR,
- ${distanceFuncName}("${this.vectorColumn}", TO_REAL_VECTOR('[${embeddingAsString}]')) AS CS
- FROM "${this.tableName}"`;
+ ${distanceFuncName}("${this.vectorColumn}", ${embeddingExpr}) AS CS
+ FROM ${fromClause}`;
// Add order by clause to sort by similarity
const orderStr = ` ORDER BY CS ${
HANA_DISTANCE_FUNCTION[this.distanceStrategy][1]
}`;
// Prepare and execute the SQL query
- const [whereStr, queryTuple] = this.createWhereByFilter(filter);
+ const [whereStr, tempQueryTuple] = this.createWhereByFilter(filter);
+ let queryTuple = tempQueryTuple
+ if (vectorEmbeddingParams && vectorEmbeddingParams.length > 0) {
+ queryTuple = [...vectorEmbeddingParams, ...queryTuple];
+ }
sqlStr += whereStr + orderStr;
const client = this.connection;
@@ -874,10 +1148,66 @@ export class HanaDB extends VectorStore {
return result;
}
+ /**
+ * Performs a similarity search based on vector comparison and returns documents along with their similarity scores and vectors.
+ * @param embedding The vector representation of the query for similarity comparison.
+ * @param k The number of top similar documents to return.
+ * @param filter Optional filter criteria to apply to the search query.
+ * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector.
+ */
+ async similaritySearchWithScoreAndVectorByVector(
+ embedding: number[],
+ k: number,
+ filter?: this["FilterType"]
+ ): Promise> {
+ // Convert the embedding vector to a string for SQL query
+ const sanitizedEmbedding = HanaDB.sanitizeListFloat(embedding);
+ const embeddingAsString = sanitizedEmbedding.join(",");
+ return this.similaritySearchWithScoreAndVector(
+ `TO_REAL_VECTOR('[${embeddingAsString}]')`,
+ k,
+ filter
+ );
+ }
+
+ /**
+ * Performs a similarity search using the internal embedding function.
+ *
+ * In this mode, the query text is passed directly to the database's internal VECTOR_EMBEDDING function.
+ *
+ * @param query - The query text.
+ * @param k - The number of documents to return.
+ * @param filter A dictionary of metadata fields and values to filter by.
+ Defaults to None.
+ * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector.
+ * @throws Error if internal embedding mode is not active.
+ */
+ async similaritySearchWithScoreAndVectorByQuery(
+ query: string,
+ k: number,
+ filter?: this["FilterType"]
+ ): Promise> {
+ if (!this.useInternalEmbeddings) {
+ throw new Error(
+ "Internal embedding search requires an internal embedding instance."
+ );
+ }
+ const vectorEmbeddingParams = [query, this.internalEmbeddingModelId]
+ return this.similaritySearchWithScoreAndVector(
+ "VECTOR_EMBEDDING(?, 'QUERY', ?)",
+ k,
+ filter,
+ vectorEmbeddingParams
+ );
+ }
+
/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND
* diversity among selected documents.
+ * When using an internal embedding instance, the query is processed
+ * directly by the database's internal embedding function.
+ * Otherwise, the query is embedded externally.
* @param query Text to look up documents similar to.
* @param options.k Number of documents to return.
* @param options.fetchK=20 Number of documents to fetch before passing to
@@ -892,7 +1222,25 @@ export class HanaDB extends VectorStore {
options: MaxMarginalRelevanceSearchOptions
): Promise {
const { k, fetchK = 20, lambda = 0.5 } = options;
- const queryEmbedding = await this.embeddings.embedQuery(query);
+ let queryEmbedding: number[];
+ if (this.useInternalEmbeddings){
+ const sqlStr = `SELECT TO_NVARCHAR(VECTOR_EMBEDDING(?, 'QUERY', ?))
+ AS VECTOR FROM sys.DUMMY;`
+ const queryTuple = [query, this.internalEmbeddingModelId];
+ const client = this.connection;
+ const stm = await this.prepareQuery(client, sqlStr);
+ const resultSet = await this.executeStatement(stm, queryTuple);
+ const result: [number[]] = resultSet.map(
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ (row: any) => {
+ return HanaDB.parseFloatArrayFromString(row.VECTOR);
+ }
+ );
+ queryEmbedding = result[0];
+ }
+ else {
+ queryEmbedding = await this.embeddings.embedQuery(query);
+ }
const docs = await this.similaritySearchWithScoreAndVectorByVector(
queryEmbedding,
diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts
index 9634adaa05b4..52253181c9e3 100644
--- a/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts
+++ b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts
@@ -123,6 +123,8 @@ export const TYPE_4_FILTERING_TEST_CASES: TestCase[] = [
export const TYPE_5_FILTERING_TEST_CASES: TestCase[] = [
{ filter: { name: { $like: "a%" } }, expected: [1] },
{ filter: { name: { $like: "%a%" } }, expected: [1, 3] },
+ { filter: { name: { $contains: "bob" } }, expected: [2] },
+ { filter: { name: { $contains: "bo" } }, expected: [] },
];
export const TYPE_6_FILTERING_TEST_CASES: TestCase[] = [
diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts
index dc8b4a534e81..184f3c09412a 100644
--- a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts
+++ b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts
@@ -5,6 +5,7 @@ import { Document } from "@langchain/core/documents";
import { FakeEmbeddings } from "@langchain/core/utils/testing";
import { test, expect } from "@jest/globals";
import { HanaDB, HanaDBArgs } from "../hanavector.js";
+import { HanaInternalEmbeddings } from "../../embeddings/hana_internal.js";
import {
DOCUMENTS,
TYPE_1_FILTERING_TEST_CASES,
@@ -126,6 +127,7 @@ beforeAll(async () => {
expect(process.env.HANA_PORT).toBeDefined();
expect(process.env.HANA_UID).toBeDefined();
expect(process.env.HANA_PWD).toBeDefined();
+ expect(process.env.HANA_DB_EMBEDDING_MODEL_ID).toBeDefined();
await connectToHANA();
});
@@ -1147,6 +1149,113 @@ describe("HNSW Index Creation Tests", () => {
});
});
+describe("Keyword Search Tests", () => {
+ test("keyword search on content and specific metadata column", async () => {
+ const tableNameTest = "TEST_TABLE_KEYWORD_SEARCH_WITHOUT_UNSPECIFIC_METADATA_COL";
+ await dropTable(client, tableNameTest);
+
+ // Create table manually with extra columns "quality" and "start"
+ const sqlStr = `
+ CREATE TABLE "${tableNameTest}" (
+ "VEC_TEXT" NCLOB,
+ "VEC_META" NCLOB,
+ "VEC_VECTOR" REAL_VECTOR,
+ "quality" NVARCHAR(100),
+ "start" INTEGER
+ )
+ `;
+ try {
+ await executeQuery(client, sqlStr);
+ } catch (error) {
+ console.log(error)
+ }
+
+ // Create the vector store using fromTexts with a specific metadata column "quality"
+ const texts = ["foo bar", "hello foo world", "baz"];
+ const metadatas = [
+ { quality: "bad", start: 0, id: 1 },
+ { quality: "good", start: 10, id: 2 },
+ { quality: "ugly", start: 20, id: 3 },
+ ];
+ const vectorStore = await HanaDB.fromTexts(texts, metadatas, embeddings, {
+ connection: client,
+ tableName: tableNameTest,
+ specificMetadataColumns: ["quality"],
+ });
+
+ // Keyword search on content column using $contains operator on VEC_TEXT
+ let keyword = "foo";
+ let docs = await vectorStore.similaritySearch(keyword, 3, {
+ VEC_TEXT: { $contains: keyword },
+ });
+ expect(docs.length).toEqual(2);
+ expect(docs[0].pageContent).toContain(keyword);
+ expect(docs[1].pageContent).toContain(keyword);
+
+
+ // Keyword search on content column with a non-existing keyword
+ let nonExistingKeyword = "nonexistent";
+ docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, {
+ VEC_TEXT: { $contains: nonExistingKeyword },
+ });
+ expect(docs.length).toEqual(0);
+
+ // Keyword search on the specific metadata column "quality"
+ keyword = "good";
+ docs = await vectorStore.similaritySearch(keyword, 3, {
+ quality: { $contains: keyword },
+ });
+ expect(docs.length).toEqual(1);
+ expect(docs[0].metadata.quality).toContain(keyword);
+
+ // Keyword search on metadata with a non-existing keyword
+ nonExistingKeyword = "terrible";
+ docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, {
+ quality: { $contains: nonExistingKeyword },
+ });
+ expect(docs.length).toEqual(0);
+ });
+
+ test("keyword search on unspecific metadata column", async () => {
+ const tableNameTest = "TEST_TABLE_KEYWORD_SEARCH_WITH_UNSPECIFIC_METADATA_COL";
+ await dropTable(client, tableNameTest);
+
+ // Create the vector store without providing specific metadata columns.
+ const texts = ["foo bar", "hello foo world", "baz"];
+ const metadatas = [
+ { quality: "good", id: 1 },
+ { quality: "bad", id: 2 },
+ { quality: "ugly", id: 3 },
+ ];
+ const vectorStore = await HanaDB.fromTexts(texts, metadatas, embeddings, {
+ connection: client,
+ tableName: tableNameTest,
+ });
+
+ // Using a simple filter (without $contains) on unspecific metadata column "quality"
+ const keyword = "good";
+ let docs = await vectorStore.similaritySearch("hello", 5, { quality: keyword });
+ expect(docs.length).toEqual(1);
+ // Assuming the document with "good" appears in the content (e.g. "foo bar")
+ expect(docs[0].metadata.quality).toContain(keyword);
+
+ // Using $contains operator on unspecific metadata column "quality"
+ docs = await vectorStore.similaritySearch("hello", 5, {
+ quality: { $contains: keyword },
+ });
+ expect(docs.length).toEqual(1);
+ expect(docs[0].pageContent).toContain("foo");
+ expect(docs[0].metadata.quality).toContain(keyword);
+
+ // Test with a non-existing keyword on the unspecific metadata column
+ const nonExistingKeyword = "terrible";
+ docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, {
+ quality: { $contains: nonExistingKeyword },
+ });
+ expect(docs.length).toEqual(0);
+ });
+});
+
describe("Filter Tests", () => {
// Filter Test 1: Applying various filters from TYPE_1_FILTERING_TEST_CASES
it.each(TYPE_1_FILTERING_TEST_CASES)(
@@ -1329,3 +1438,68 @@ describe("Filter Tests", () => {
}
);
});
+
+// Internal Embedding Functionality Tests
+describe("Internal Embedding Functionality Tests", () => {
+ // Create an internal embeddings instance using the model ID from the environment.
+ const internalModelId = process.env.HANA_DB_EMBEDDING_MODEL_ID;
+ if (typeof internalModelId !== 'string' || internalModelId.trim() === '') {
+ throw new Error("HANA_DB_EMBEDDING_MODEL_ID environment variable is not defined or is an empty string.");
+ }
+ const internalEmbedding = new HanaInternalEmbeddings({
+ internalEmbeddingModelId: internalModelId,
+ });
+
+ test("test internal addDocuments", async () => {
+ const tableName = "TEST_TABLE_ADD_DOCUMENTS_INTERNAL";
+ await dropTable(client, tableName);
+ const args: HanaDBArgs = { connection: client, tableName };
+ const vectorDB = new HanaDB(internalEmbedding, args);
+ await vectorDB.initialize()
+ expect(vectorDB).toBeDefined();
+ await vectorDB.addDocuments(DOCUMENTS);
+ const sqlStr = `SELECT COUNT(*) AS CNT FROM "${tableName}"`;
+ const result = await executeQuery(client, sqlStr);
+ expect(result[0].CNT).toEqual(DOCUMENTS.length);
+ await dropTable(client, tableName);
+ });
+
+
+ test("test internal similarity search with metadata filter", async () => {
+ const tableName = "TEST_TABLE_FILTER_INTERNAL";
+ await dropTable(client, tableName);
+ // Create the vector store using DOCUMENTS
+ const vectorDB = await HanaDB.fromDocuments(DOCUMENTS, internalEmbedding, {
+ connection: client,
+ tableName,
+ });
+ let searchResult = await vectorDB.similaritySearch(DOCUMENTS[0].pageContent, 3,);
+ expect(searchResult.length).toEqual(3);
+ expect(searchResult[0].pageContent).toEqual(DOCUMENTS[0].pageContent);
+
+ // DOCUMENT[0] has a height of 10, so it is filtered out.
+ searchResult = await vectorDB.similaritySearch(DOCUMENTS[1].pageContent, 3, { height : { $lt: 10.0 } });
+ expect(searchResult.length).toEqual(2);
+ expect(searchResult[0].pageContent).toEqual(DOCUMENTS[1].pageContent);
+ expect(searchResult[1].pageContent).toEqual(DOCUMENTS[2].pageContent);
+ await dropTable(client, tableName);
+ });
+
+ test("test internal max marginal relevance search", async () => {
+ const tableName = "TEST_TABLE_MAX_RELEVANCE_INTERNAL";
+ await dropTable(client, tableName);
+ // Create the vector store using DOCUMENTS and an empty metadata object for fromDocuments
+ const vectorDB = await HanaDB.fromDocuments(DOCUMENTS, internalEmbedding, {
+ connection: client,
+ tableName,
+ });
+ const searchResult = await vectorDB.maxMarginalRelevanceSearch(DOCUMENTS[0].pageContent, {
+ k: 2,
+ fetchK: 20,
+ });
+ expect(searchResult.length).toEqual(2);
+ expect(searchResult[0].pageContent).toEqual(DOCUMENTS[0].pageContent);
+ await dropTable(client, tableName);
+ });
+
+});