diff --git a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx index 5aed8ee2c273..7dabac71c86e 100644 --- a/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx +++ b/docs/core_docs/docs/integrations/vectorstores/hanavector.mdx @@ -57,20 +57,21 @@ import { Table, Tr, Th, Td } from "@mdx-js/react"; In addition to the basic value-based filtering capabilities, it is possible to use more advanced filtering. The table below shows the available filter operators. -| Operator | Semantic | -| ---------- | -------------------------------------------------------------------------- | -| `$eq` | Equality (==) | -| `$ne` | Inequality (!=) | -| `$lt` | Less than (<) | -| `$lte` | Less than or equal (<=) | -| `$gt` | Greater than (>) | -| `$gte` | Greater than or equal (>=) | -| `$in` | Contained in a set of given values (in) | -| `$nin` | Not contained in a set of given values (not in) | -| `$between` | Between the range of two boundary values | -| `$like` | Text equality based on the "LIKE" semantics in SQL (using "%" as wildcard) | -| `$and` | Logical "and", supporting 2 or more operands | -| `$or` | Logical "or", supporting 2 or more operands | +| Operator | Semantic | +| ----------- | -------------------------------------------------------------------------- | +| `$eq` | Equality (==) | +| `$ne` | Inequality (!=) | +| `$lt` | Less than (<) | +| `$lte` | Less than or equal (<=) | +| `$gt` | Greater than (>) | +| `$gte` | Greater than or equal (>=) | +| `$in` | Contained in a set of given values (in) | +| `$nin` | Not contained in a set of given values (not in) | +| `$between` | Between the range of two boundary values | +| `$like` | Text equality based on the "LIKE" semantics in SQL (using "%" as wildcard) | +| `$contains` | Filters documents containing a specific keyword | +| `$and` | Logical "and", supporting 2 or more operands | +| `$or` | Logical "or", supporting 2 or more operands | import ExampleAdvancedFilter from "@examples/indexes/vector_stores/hana_vector/advancedFiltering.ts"; @@ -82,6 +83,18 @@ import ExampleChain from "@examples/indexes/vector_stores/hana_vector/chains.ts" {ExampleChain} +## Internal Embedding Functionality + +SAP HANA Cloud Vector Engine supports computing embeddings directly in the database by leveraging its native `VECTOR_EMBEDDING` function. This approach eliminates the need for an external embedding service, improving performance and enhancing data security. + +To enable this functionality, instantiate a `HanaInternalEmbeddings` object with the internal embedding model ID and pass this instance to your `HanaDB` vector store. + +For more details on the `VECTOR_EMBEDDING` function, refer to the official [SAP HANA Cloud documentation](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-vector-engine-guide/vector-embedding-function-vector?locale=en-US). + +import ExampleInternalEmbeddings from "@examples/indexes/vector_stores/hana_vector/internalEmbeddings.ts"; + +{ExampleInternalEmbeddings} + ## Related - Vector store [conceptual guide](/docs/concepts/#vectorstores) diff --git a/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts index a3095c29b17e..a7b4f1ad4870 100644 --- a/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts +++ b/examples/src/indexes/vector_stores/hana_vector/advancedFiltering.ts @@ -29,15 +29,15 @@ await new Promise((resolve, reject) => { const docs: Document[] = [ { pageContent: "First", - metadata: { name: "adam", is_active: true, id: 1, height: 10.0 }, + metadata: { name: "Adam Smith", is_active: true, id: 1, height: 10.0 }, }, { pageContent: "Second", - metadata: { name: "bob", is_active: false, id: 2, height: 5.7 }, + metadata: { name: "Bob Johnson", is_active: false, id: 2, height: 5.7 }, }, { pageContent: "Third", - metadata: { name: "jane", is_active: true, id: 3, height: 2.4 }, + metadata: { name: "Jane Doe", is_active: true, id: 3, height: 2.4 }, }, ]; @@ -75,8 +75,8 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"id":{"$ne":1}} -{ name: 'bob', is_active: false, id: 2, height: 5.7 } -{ name: 'jane', is_active: true, id: 3, height: 2.4 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ // Between range @@ -86,27 +86,27 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"id":{"$between":[1,2]}} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ // In list -advancedFilter = { name: { $in: ["adam", "bob"] } }; +advancedFilter = { name: { $in: ["Adam Smith", "Bob Johnson"] } }; console.log(`Filter: ${JSON.stringify(advancedFilter)}`); printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); -/* Filter: {"name":{"$in":["adam","bob"]}} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ +/* Filter: {"name":{"$in":["Adam Smith","Bob Johnson"]}} +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ // Not in list -advancedFilter = { name: { $nin: ["adam", "bob"] } }; +advancedFilter = { name: { $nin: ["Adam Smith", "Bob Johnson"] } }; console.log(`Filter: ${JSON.stringify(advancedFilter)}`); printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); -/* Filter: {"name":{"$nin":["adam","bob"]}} -{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ +/* Filter: {"name":{"$nin":["Adam Smith","Bob Johnson"]}} +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ // Greater than advancedFilter = { id: { $gt: 1 } }; @@ -115,8 +115,8 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"id":{"$gt":1}} -{ name: 'bob', is_active: false, id: 2, height: 5.7 } -{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ // Greater than or equal to advancedFilter = { id: { $gte: 1 } }; @@ -125,9 +125,9 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"id":{"$gte":1}} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } -{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ // Less than advancedFilter = { id: { $lt: 1 } }; @@ -145,7 +145,7 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"id":{"$lte":1}} -{ name: 'adam', is_active: true, id: 1, height: 10 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } */ // Text filtering with $like advancedFilter = { name: { $like: "a%" } }; @@ -154,7 +154,7 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"name":{"$like":"a%"}} -{ name: 'adam', is_active: true, id: 1, height: 10 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } */ advancedFilter = { name: { $like: "%a%" } }; console.log(`Filter: ${JSON.stringify(advancedFilter)}`); @@ -162,18 +162,35 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"name":{"$like":"%a%"}} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ + +// Text filtering with $contains +advancedFilter = { name: { $contains: "bob" } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$contains":"bob"}} +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ + +advancedFilter = { name: { $contains: "bo" } }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"name":{"$contains":"bo"}} + */ // Combined filtering with $or -advancedFilter = { $or: [{ id: 1 }, { name: "bob" }] }; +advancedFilter = { $or: [{ id: 1 }, { name: "Bob Johnson" }] }; console.log(`Filter: ${JSON.stringify(advancedFilter)}`); printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); -/* Filter: {"$or":[{"id":1},{"name":"bob"}]} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ +/* Filter: {"$or":[{"id":1},{"name":"Bob Johnson"}]} +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ // Combined filtering with $and advancedFilter = { $and: [{ id: 1 }, { id: 2 }] }; @@ -184,15 +201,23 @@ printFilterResult( /* Filter: {"$and":[{"id":1},{"id":2}]} */ +advancedFilter = { $and: [{ name: { $contains: "bob" } }, { id: 2 }] }; +console.log(`Filter: ${JSON.stringify(advancedFilter)}`); +printFilterResult( + await vectorStore.similaritySearch("just testing", 5, advancedFilter) +); +/* Filter: {"$and":[{"name":{"$contains":"bob"}},{"id":2}]} +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ + advancedFilter = { $or: [{ id: 1 }, { id: 2 }, { id: 3 }] }; console.log(`Filter: ${JSON.stringify(advancedFilter)}`); printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"$or":[{"id":1},{"id":2},{"id":3}]} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } -{ name: 'jane', is_active: true, id: 3, height: 2.4 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } +{ name: 'Jane Doe', is_active: true, id: 3, height: 2.4 } */ // You can also define a nested filter with $and and $or. advancedFilter = { @@ -203,8 +228,8 @@ printFilterResult( await vectorStore.similaritySearch("just testing", 5, advancedFilter) ); /* Filter: {"$and":[{"$or":[{"id":1},{"id":2}]},{"height":{"$gte":5.0}}]} -{ name: 'adam', is_active: true, id: 1, height: 10 } -{ name: 'bob', is_active: false, id: 2, height: 5.7 } */ +{ name: 'Adam Smith', is_active: true, id: 1, height: 10 } +{ name: 'Bob Johnson', is_active: false, id: 2, height: 5.7 } */ // Disconnect from SAP HANA aft er the operations client.disconnect(); diff --git a/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts b/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts new file mode 100644 index 000000000000..be93dbf85f2d --- /dev/null +++ b/examples/src/indexes/vector_stores/hana_vector/internalEmbeddings.ts @@ -0,0 +1,81 @@ +import { Document } from "@langchain/core/documents"; +import hanaClient from "hdb"; +import { HanaInternalEmbeddings } from "@langchain/community/embeddings/hana_internal"; +import { HanaDB, HanaDBArgs } from "@langchain/community/vectorstores/hanavector"; + +// Initialize the internal embeddings instance using the internal model ID. +// This instance will use SAP HANA's built-in VECTOR_EMBEDDING function of HanaDB. +const internalEmbeddings = new HanaInternalEmbeddings({ + internalEmbeddingModelId: process.env.HANA_DB_EMBEDDING_MODEL_ID || "SAP_NEB.20240715", +}); + +// Set up connection parameters from environment variables. +const connectionParams = { + host: process.env.HANA_HOST, + port: process.env.HANA_PORT, + user: process.env.HANA_UID, + password: process.env.HANA_PWD, +}; + +// Create a HANA client. +const client = hanaClient.createClient(connectionParams); + +// Connect to SAP HANA. +await new Promise((resolve, reject) => { + client.connect((err: Error) => { + if (err) { + reject(err); + } else { + console.log("Connected to SAP HANA successfully."); + resolve(); + } + }); +}); + +// Define the arguments for the vector store instance. +const args: HanaDBArgs = { + connection: client, + tableName: "testInternalEmbeddings", +}; + +// Create a new HanaDB vector store using the internal embeddings instance. +// This vector store leverages the internal VECTOR_EMBEDDING function of HanaDB. +const vectorStore = new HanaDB(internalEmbeddings, args); +// Initialize the vector store (creates the table and verifies its columns). +await vectorStore.initialize(); + +// Example documents to index. +const docs: Document[] = [ + new Document({ + pageContent: "Charlie is a data scientist who specializes in AI research.", + metadata: { name: "Charlie Brown" }, + }), + new Document({ + pageContent: "David is a teacher with a passion for history and literature.", + metadata: { name: "David Williams" }, + }), + new Document({ + pageContent: "Eve is an entrepreneur focusing on blockchain and cryptocurrency.", + metadata: { name: "Eve Adams" }, + }), +]; + +// Clean up any existing documents in the table. +await vectorStore.delete({ filter: {} }); +// Add the example documents. +await vectorStore.addDocuments(docs); + +// Perform a similarity search. In this example, we search for documents related to "bitcoin". +const results = await vectorStore.similaritySearch("bitcoin", 1); +console.log("Similarity search results:", results); +/* + [ + { + pageContent: 'Eve is an entrepreneur focusing on blockchain and cryptocurrency.', + metadata: { name: 'Eve Adams' } + } + ] +*/ + +// Disconnect from SAP HANA after operations. +client.disconnect(); diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index fc0cb520b693..cbd398458861 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -174,6 +174,10 @@ embeddings/gradient_ai.cjs embeddings/gradient_ai.js embeddings/gradient_ai.d.ts embeddings/gradient_ai.d.cts +embeddings/hana_internal.cjs +embeddings/hana_internal.js +embeddings/hana_internal.d.ts +embeddings/hana_internal.d.cts embeddings/hf.cjs embeddings/hf.js embeddings/hf.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index 3a67261b225b..2d365348b4f5 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -79,6 +79,7 @@ export const config = { "embeddings/deepinfra": "embeddings/deepinfra", "embeddings/fireworks": "embeddings/fireworks", "embeddings/gradient_ai": "embeddings/gradient_ai", + "embeddings/hana_internal": "embeddings/hana_internal", "embeddings/hf": "embeddings/hf", "embeddings/hf_transformers": "embeddings/hf_transformers", "embeddings/huggingface_transformers": diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index bbeac62a3d4e..40d7f755fca5 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1129,6 +1129,15 @@ "import": "./embeddings/gradient_ai.js", "require": "./embeddings/gradient_ai.cjs" }, + "./embeddings/hana_internal": { + "types": { + "import": "./embeddings/hana_internal.d.ts", + "require": "./embeddings/hana_internal.d.cts", + "default": "./embeddings/hana_internal.d.ts" + }, + "import": "./embeddings/hana_internal.js", + "require": "./embeddings/hana_internal.cjs" + }, "./embeddings/hf": { "types": { "import": "./embeddings/hf.d.ts", @@ -3469,6 +3478,10 @@ "embeddings/gradient_ai.js", "embeddings/gradient_ai.d.ts", "embeddings/gradient_ai.d.cts", + "embeddings/hana_internal.cjs", + "embeddings/hana_internal.js", + "embeddings/hana_internal.d.ts", + "embeddings/hana_internal.d.cts", "embeddings/hf.cjs", "embeddings/hf.js", "embeddings/hf.d.ts", diff --git a/libs/langchain-community/src/embeddings/hana_internal.ts b/libs/langchain-community/src/embeddings/hana_internal.ts new file mode 100644 index 000000000000..8a50d7eac0aa --- /dev/null +++ b/libs/langchain-community/src/embeddings/hana_internal.ts @@ -0,0 +1,70 @@ +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; + +/** + * Parameters for initializing HanaInternalEmbeddings. + */ +export interface HanaInternalEmbeddingsParams extends EmbeddingsParams { + /** + * The ID of the internal embedding model used by the HANA database. + */ + internalEmbeddingModelId: string; +} + +/** + * A dummy embeddings class for use with HANA's internal embedding functionality. + * This class prevents the use of standard embedding methods and ensures that + * internal embeddings are handled exclusively via database queries. + * + * @example + * const internalEmbeddings = new HanaInternalEmbeddings({ + * internalEmbeddingModelId: "your_model_id_here", + * }); + * + * // The following calls will throw errors: + * await internalEmbeddings.embedQuery("sample text"); // Throws error + * await internalEmbeddings.embedDocuments(["text one", "text two"]); // Throws error + * + * // Retrieve the internal model id: + * console.log(internalEmbeddings.getModelId()); + */ +export class HanaInternalEmbeddings extends Embeddings { + private modelId: string; + + /** + * A flag to indicate this class is HANA-specific. + */ + public readonly isHanaInternalEmbeddings = true; + + constructor(fields: HanaInternalEmbeddingsParams) { + super(fields); + this.modelId = fields.internalEmbeddingModelId; + } + + /** + * This method is not applicable for HANA internal embeddings. + * @throws Error indicating that internal embeddings cannot be used externally. + */ + async embedQuery(_text: string): Promise { + throw new Error( + "Internal embeddings cannot be used externally. Use HANA's internal embedding functionality instead." + ); + } + + /** + * This method is not applicable for HANA internal embeddings. + * @throws Error indicating that internal embeddings cannot be used externally. + */ + async embedDocuments(_texts: string[]): Promise { + throw new Error( + "Internal embeddings cannot be used externally. Use HANA's internal embedding functionality instead." + ); + } + + /** + * Retrieves the internal embedding model ID. + * @returns The internal embedding model ID. + */ + getModelId(): string { + return this.modelId; + } +} diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 832782caf5b6..5d926b11ee4b 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -30,6 +30,7 @@ export * as embeddings__baidu_qianfan from "../embeddings/baidu_qianfan.js"; export * as embeddings__bytedance_doubao from "../embeddings/bytedance_doubao.js"; export * as embeddings__deepinfra from "../embeddings/deepinfra.js"; export * as embeddings__fireworks from "../embeddings/fireworks.js"; +export * as embeddings__hana_internal from "../embeddings/hana_internal.js"; export * as embeddings__minimax from "../embeddings/minimax.js"; export * as embeddings__ollama from "../embeddings/ollama.js"; export * as embeddings__togetherai from "../embeddings/togetherai.js"; diff --git a/libs/langchain-community/src/vectorstores/hanavector.ts b/libs/langchain-community/src/vectorstores/hanavector.ts index 48e40b8ee48c..e002afa9165e 100644 --- a/libs/langchain-community/src/vectorstores/hanavector.ts +++ b/libs/langchain-community/src/vectorstores/hanavector.ts @@ -6,6 +6,8 @@ import { import { Document } from "@langchain/core/documents"; import { maximalMarginalRelevance } from "@langchain/core/utils/math"; +import { HanaInternalEmbeddings } from "../embeddings/hana_internal.js"; + export type DistanceStrategy = "euclidean" | "cosine"; const COMPARISONS_TO_SQL: Record = { @@ -35,7 +37,8 @@ type Comparator = | "$in" | "$nin" | "$between" - | "$like"; + | "$like" + | "$contains"; // Filter using comparison operators // Defines the relationship between a comparison operator and its value type ComparatorFilter = { @@ -75,6 +78,11 @@ const LOGICAL_OPERATORS_TO_SQL: Record = { $or: "OR", }; +const CONTAINS_OPERATOR = "$contains"; + +const INTERMEDIATE_TABLE_NAME = "intermediate_result"; + + const HANA_DISTANCE_FUNCTION: Record = { cosine: ["COSINE_SIMILARITY", "DESC"], euclidean: ["L2DISTANCE", "ASC"], @@ -126,6 +134,10 @@ export class HanaDB extends VectorStore { private specificMetadataColumns: string[]; + private useInternalEmbeddings: boolean; + + private internalEmbeddingModelId: string; + _vectorstoreType(): string { return "hanadb"; } @@ -151,6 +163,52 @@ export class HanaDB extends VectorStore { args.specificMetadataColumns || [] ); this.connection = args.connection; + + // Set the embedding and decide whether to use internal embedding + this._setEmbeddings(embeddings); + } + + /** + * Use this method to change the embeddings instance. + * + * Sets the embedding instance and configures the internal embedding mode + * if applicable. + * + * this method sets the internal flag and stores the model ID. + * Otherwise, it ensures that external embedding mode is used. + * + * @param embeddings - An instance of EmbeddingsInterface. + */ + private _setEmbeddings(embeddings: EmbeddingsInterface): void { + this.embeddings = embeddings + // eslint-disable-next-line @typescript-eslint/no-explicit-any + if ((embeddings as any).isHanaInternalEmbeddings === true) { + this.useInternalEmbeddings = true; + this.internalEmbeddingModelId = (embeddings as HanaInternalEmbeddings).getModelId(); + } else { + this.useInternalEmbeddings = false; + this.internalEmbeddingModelId = ""; + } + } + + /** + * Ping the database to check if the in-database embedding + * function exists and works. + * + * This method ensures that the internal VECTOR_EMBEDDING function + * is available and functioning correctly by passing a test value. + * + * @throws Error if the internal embedding function validation fails. + */ + private async validateInternalEmbeddingFunction(): Promise { + if (!this.internalEmbeddingModelId) { + throw new Error("Internal embedding model id is not set"); + } + const sqlStr = + "SELECT COUNT(TO_NVARCHAR(VECTOR_EMBEDDING('test', 'QUERY', ?))) AS TEST FROM sys.DUMMY;"; + const client = this.connection; + const stm = await this.prepareQuery(client, sqlStr); + await this.executeStatement(stm, [this.internalEmbeddingModelId]); } // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -196,6 +254,8 @@ export class HanaDB extends VectorStore { } public async initialize() { + if (this.useInternalEmbeddings) + await this.validateInternalEmbeddingFunction(); let valid_distance = false; for (const key in HANA_DISTANCE_FUNCTION) { if (key === this.distanceStrategy) { @@ -509,7 +569,18 @@ export class HanaDB extends VectorStore { `Operator '${specialOp}' expects a non-undefined value.` ); } - } else if (specialOp in IN_OPERATORS_TO_SQL) { + } else if (specialOp === CONTAINS_OPERATOR) { + // Special handling for keyword search + operator = CONTAINS_OPERATOR; + if (specialVal !== undefined) { + queryTuple.push(specialVal.toString()); + } else { + throw new Error( + `Operator '${specialOp}' expects a non-undefined value.` + ); + } + } + else if (specialOp in IN_OPERATORS_TO_SQL) { operator = IN_OPERATORS_TO_SQL[specialOp]; if (Array.isArray(specialVal)) { const placeholders = Array(specialVal.length).fill("?").join(","); @@ -527,15 +598,89 @@ export class HanaDB extends VectorStore { throw new Error(`Unsupported filter data-type: ${typeof filterValue}`); } - // Metadata column handling - const selector = this.specificMetadataColumns.includes(key) + if (operator === CONTAINS_OPERATOR) { + // Instead of a normal clause, create a keyword search condition. + whereStr += `SCORE(? IN ("${key}" EXACT SEARCH MODE 'text')) > 0`; + } else { + // Metadata column handling (not required in keyword search) + const selector = this.specificMetadataColumns.includes(key) ? `"${key}"` : `JSON_VALUE(${this.metadataColumn}, '$.${key}')`; whereStr += `${selector} ${operator} ${sqlParam}`; + } }); return [whereStr, queryTuple]; } + /** + * Extract metadata columns used with `$contains` in the filter. + * + * Scans the filter to find unspecific metadata columns used + * with the `$contains` operator. + * + * @param filter - (Optional) A filter object that may include nested filter conditions. + * @returns An array of unique metadata field names (as strings) that are used + * with the "$contains" operator. + */ + private extractKeywordSearchColumns(filter?: this["FilterType"]): string[] { + const keywordColumns = new Set(); + this.recurseFiltersHelper(keywordColumns, filter); + return [...keywordColumns]; + } + + private recurseFiltersHelper(keywordColumns: Set, filterObj?: this["FilterType"], parentKey?: string): void { + if (!filterObj || typeof filterObj !== "object") return; + + Object.entries(filterObj).forEach(([key, value]) => { + if (key === CONTAINS_OPERATOR) { + if ( + parentKey && + parentKey !== this.contentColumn && + !this.specificMetadataColumns.includes(parentKey) + ) { + keywordColumns.add(parentKey); + } + } else if (key in LOGICAL_OPERATORS_TO_SQL) { + // Assume it's an array of filters + (value as this["FilterType"][]).forEach((subfilter) => this.recurseFiltersHelper(keywordColumns, subfilter)); + } else if (typeof value === "object" && value !== null) { + this.recurseFiltersHelper(keywordColumns, value as this["FilterType"], key); + } + }); + } + + + /** + * Generate a SQL `WITH` clause to project metadata columns for keyword search. + * + * + * Example: + * Input: ["title", "author"] + * Output: + * WITH intermediate_result AS ( + * SELECT *, + * JSON_VALUE(metadata_column, '$.title') AS "title", + * JSON_VALUE(metadata_column, '$.author') AS "author" + * FROM "table_name" + * ) + * * + * @param projectedMetadataColumns - List of metadata column names for projection. + * @returns A SQL `WITH` clause string. + */ + private createMetadataProjection( + projectedMetadataColumns: string[] + ): string { + const metadataColumns = projectedMetadataColumns.map( + (col) => + `JSON_VALUE(${this.metadataColumn}, '$.${HanaDB.sanitizeName(col)}') AS "${HanaDB.sanitizeName(col)}"` + ); + return ( + `WITH ${INTERMEDIATE_TABLE_NAME} AS (` + + `SELECT *, ${metadataColumns.join(", ")} ` + + `FROM "${this.tableName}")` + ); + } + /** * Creates an HNSW vector index on a specified table and vector column with * optional build and search configurations. If no configurations are provided, @@ -717,13 +862,23 @@ export class HanaDB extends VectorStore { } /** - * Adds an array of documents to the table. The documents are first - * converted to vectors using the `embedDocuments` method of the - * `embeddings` instance. + * Adds an array of documents to the table. + * + * + * In external embedding mode, this method computes embeddings client-side + * and inserts them. + * In internal embedding mode, it leverages the database's internal + * VECTOR_EMBEDDING function to generate embeddings. + * * @param documents Array of Document instances to be added to the table. * @returns Promise that resolves when the documents are added. */ async addDocuments(documents: Document[]): Promise { + // If using internal embeddings, we do NOT call embedDocuments() from Node. + if (this.useInternalEmbeddings) { + return this.addDocumentsUsingInternalEmbedding(documents); + } + // Otherwise, default (external) approach: const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors( await this.embeddings.embedDocuments(texts), @@ -731,6 +886,44 @@ export class HanaDB extends VectorStore { ); } + /** + * Adds documents to the database using the internal embedding function. + * + * This method constructs an SQL INSERT statement that leverages the + * database's internal VECTOR_EMBEDDING function to generate embeddings + * on the server side. + * + * @param documents - Array of Document objects to be added. + * @returns Promise that resolves when the documents are added. + */ + private async addDocumentsUsingInternalEmbedding(documents: Document[]): Promise { + const texts = documents.map((doc) => doc.pageContent); + const metadatas = documents.map((doc) => doc.metadata); + const client = this.connection; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const sqlParams: [string, string, string, string, ...(string | null)[]][] = texts.map((text, i) => { + const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const [remainingMetadata, specialMetadata] = this.splitOffSpecialMetadata(metadata); + // Prepare the SQL parameters + return [ + text, + JSON.stringify(this.sanitizeMetadataKeys(remainingMetadata)), + text, + this.internalEmbeddingModelId, + ...specialMetadata + ]; + }); + // Build the column list for the INSERT statement. + const specificMetadataColumnsString = this.getSpecificMetadataColumnsString() + const extraPlaceholders = this.specificMetadataColumns.map(() => ", ?").join(""); + + // Insert data into the table, bulk insert. + const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}"${specificMetadataColumnsString}) + VALUES (?, ?, VECTOR_EMBEDDING(?, 'DOCUMENT', ?)${extraPlaceholders});`; + const stm = await this.prepareQuery(client, sqlStr); + await this.executeStatement(stm, sqlParams); + } + /** * Adds an array of vectors and corresponding documents to the database. * The vectors and documents are batch inserted into the database. @@ -745,33 +938,78 @@ export class HanaDB extends VectorStore { const texts = documents.map((doc) => doc.pageContent); const metadatas = documents.map((doc) => doc.metadata); const client = this.connection; - const sqlParams: [string, string, string][] = texts.map((text, i) => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const sqlParams: [string, string, string, ...any[]][] = texts.map((text, i) => { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; + const [remainingMetadata, specialMetadata] = this.splitOffSpecialMetadata(metadata); // Ensure embedding is generated or provided const embeddingString = `[${vectors[i].join(", ")}]`; // Prepare the SQL parameters return [ text, - JSON.stringify(this.sanitizeMetadataKeys(metadata)), - embeddingString, + JSON.stringify(this.sanitizeMetadataKeys(remainingMetadata)), + embeddingString, + ...specialMetadata ]; }); + // Build the column list for the INSERT statement. + const specificMetadataColumnsString = this.getSpecificMetadataColumnsString() + const extraPlaceholders = this.specificMetadataColumns.map(() => ", ?").join(""); + // Insert data into the table, bulk insert. - const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}") - VALUES (?, ?, TO_REAL_VECTOR(?));`; + const sqlStr = `INSERT INTO "${this.tableName}" ("${this.contentColumn}", "${this.metadataColumn}", "${this.vectorColumn}"${specificMetadataColumnsString}) + VALUES (?, ?, TO_REAL_VECTOR(?)${extraPlaceholders});`; const stm = await this.prepareQuery(client, sqlStr); await this.executeStatement(stm, sqlParams); // stm.execBatch(sqlParams); } /** - * Return docs most similar to query. - * @param query Query text for the similarity search. - * @param k Number of Documents to return. Defaults to 4. - * @param filter A dictionary of metadata fields and values to filter by. - Defaults to None. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ + * Helper function to generate the SQL snippet for specific metadata columns. + * + * Returns a string in the format: ', "col1", "col2", ...' + * if specific metadata columns are defined, + * or an empty string if there are none. + * + * @returns A string representing the specific metadata columns for SQL insertion. + */ + private getSpecificMetadataColumnsString(): string{ + if (this.specificMetadataColumns.length === 0) { + return ""; + } + return ', "' + this.specificMetadataColumns.join('", "') + '"'; + } + + /** + * Splits the given metadata object into two parts: + * 1. The original metadata (unchanged). + * 2. An array of special metadata values corresponding to each column + * listed in `specificMetadataColumns`. + * + * @param metadata - The metadata object from which to extract special values. + * @returns A tuple where the first element is the original metadata object, + * and the second element is an array of special metadata values. + */ + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private splitOffSpecialMetadata(metadata: any): [any, (string | null)[]] { + const specialMetadata: (string | null)[] = []; + if (!metadata) { + return [{}, []]; + } + for (const columnName of this.specificMetadataColumns) { + specialMetadata.push(metadata[columnName] ?? null); + } + return [metadata, specialMetadata]; + } + + /** + * Return docs most similar to query. + * @param query Query text for the similarity search. + * @param k Number of Documents to return. Defaults to 4. + * @param filter A dictionary of metadata fields and values to filter by. + Defaults to None. + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. + */ async similaritySearch( query: string, k: number, @@ -782,30 +1020,47 @@ export class HanaDB extends VectorStore { } /** - * Return documents and score values most similar to query. - * @param query Query text for the similarity search. - * @param k Number of Documents to return. Defaults to 4. - * @param filter A dictionary of metadata fields and values to filter by. - Defaults to None. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ + * Return documents and score values most similar to query. + * @param query Query text for the similarity search. + * @param k Number of Documents to return. Defaults to 4. + * @param filter A dictionary of metadata fields and values to filter by. + Defaults to None. + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. + */ async similaritySearchWithScore( query: string, k: number, filter?: this["FilterType"] ): Promise<[Document, number][]> { - const queryEmbedding = await this.embeddings.embedQuery(query); - return this.similaritySearchVectorWithScore(queryEmbedding, k, filter); + let wholeResult = null + if (this.useInternalEmbeddings) { + // Internal embeddings: pass the query directly + wholeResult = await this.similaritySearchWithScoreAndVectorByQuery( + query, + k, + filter + ); + } else { + const queryEmbedding = await this.embeddings.embedQuery(query); + // External embeddings: generate embedding from the query + wholeResult = await this.similaritySearchWithScoreAndVectorByVector( + queryEmbedding, + k, + filter + ); + } + return wholeResult.map(([doc, score]) => [doc, score]); + } /** - * Return docs most similar to the given embedding. - * @param query Query embedding for the similarity search. - * @param k Number of Documents to return. Defaults to 4. - * @param filter A dictionary of metadata fields and values to filter by. - Defaults to None. - * @returns Promise that resolves to a list of documents and their corresponding similarity scores. - */ + * Return docs most similar to the given embedding. + * @param query Query embedding for the similarity search. + * @param k Number of Documents to return. Defaults to 4. + * @param filter A dictionary of metadata fields and values to filter by. + Defaults to None. + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. + */ async similaritySearchVectorWithScore( queryEmbedding: number[], k: number, @@ -821,37 +1076,56 @@ export class HanaDB extends VectorStore { } /** - * Performs a similarity search based on vector comparison and returns documents along with their similarity scores and vectors. - * @param embedding The vector representation of the query for similarity comparison. - * @param k The number of top similar documents to return. - * @param filter Optional filter criteria to apply to the search query. - * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector. + * Performs a similarity search using the provided embedding expression. + * + * This helper method is used by both external and internal similarity search methods + * to construct and execute the SQL query. + * + * @param embeddingExpr - SQL expression that represents or generates the query embedding. + * @param k - The number of documents to return. + * @param filter A dictionary of metadata fields and values to filter by. + Defaults to None. + * @param vectorEmbeddingParams - Optional parameters for the embedding expression (used in internal mode). + * @returns Promise that resolves to a list of documents and their corresponding similarity scores. */ - async similaritySearchWithScoreAndVectorByVector( - embedding: number[], + private async similaritySearchWithScoreAndVector( + embeddingExpr: string, k: number, - filter?: this["FilterType"] + filter?: this["FilterType"], + vectorEmbeddingParams?: string[] ): Promise> { // Sanitize inputs const sanitizedK = HanaDB.sanitizeInt(k); - const sanitizedEmbedding = HanaDB.sanitizeListFloat(embedding); // Determine the distance function based on the configured strategy const distanceFuncName = HANA_DISTANCE_FUNCTION[this.distanceStrategy][0]; - // Convert the embedding vector to a string for SQL query - const embeddingAsString = sanitizedEmbedding.join(","); - let sqlStr = `SELECT TOP ${sanitizedK} + + // Keyword search: extract metadata columns used with $contains + const projectedMetadataColumns = this.extractKeywordSearchColumns(filter); + let metadataProjection = ""; + let fromClause = `"${this.tableName}"`; + if (projectedMetadataColumns.length > 0) { + metadataProjection = this.createMetadataProjection(projectedMetadataColumns); + fromClause = INTERMEDIATE_TABLE_NAME; + } + + let sqlStr = `${metadataProjection} + SELECT TOP ${sanitizedK} "${this.contentColumn}", "${this.metadataColumn}", TO_NVARCHAR("${this.vectorColumn}") AS VECTOR, - ${distanceFuncName}("${this.vectorColumn}", TO_REAL_VECTOR('[${embeddingAsString}]')) AS CS - FROM "${this.tableName}"`; + ${distanceFuncName}("${this.vectorColumn}", ${embeddingExpr}) AS CS + FROM ${fromClause}`; // Add order by clause to sort by similarity const orderStr = ` ORDER BY CS ${ HANA_DISTANCE_FUNCTION[this.distanceStrategy][1] }`; // Prepare and execute the SQL query - const [whereStr, queryTuple] = this.createWhereByFilter(filter); + const [whereStr, tempQueryTuple] = this.createWhereByFilter(filter); + let queryTuple = tempQueryTuple + if (vectorEmbeddingParams && vectorEmbeddingParams.length > 0) { + queryTuple = [...vectorEmbeddingParams, ...queryTuple]; + } sqlStr += whereStr + orderStr; const client = this.connection; @@ -874,10 +1148,66 @@ export class HanaDB extends VectorStore { return result; } + /** + * Performs a similarity search based on vector comparison and returns documents along with their similarity scores and vectors. + * @param embedding The vector representation of the query for similarity comparison. + * @param k The number of top similar documents to return. + * @param filter Optional filter criteria to apply to the search query. + * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector. + */ + async similaritySearchWithScoreAndVectorByVector( + embedding: number[], + k: number, + filter?: this["FilterType"] + ): Promise> { + // Convert the embedding vector to a string for SQL query + const sanitizedEmbedding = HanaDB.sanitizeListFloat(embedding); + const embeddingAsString = sanitizedEmbedding.join(","); + return this.similaritySearchWithScoreAndVector( + `TO_REAL_VECTOR('[${embeddingAsString}]')`, + k, + filter + ); + } + + /** + * Performs a similarity search using the internal embedding function. + * + * In this mode, the query text is passed directly to the database's internal VECTOR_EMBEDDING function. + * + * @param query - The query text. + * @param k - The number of documents to return. + * @param filter A dictionary of metadata fields and values to filter by. + Defaults to None. + * @returns A promise that resolves to an array of tuples, each containing a Document, its similarity score, and its vector. + * @throws Error if internal embedding mode is not active. + */ + async similaritySearchWithScoreAndVectorByQuery( + query: string, + k: number, + filter?: this["FilterType"] + ): Promise> { + if (!this.useInternalEmbeddings) { + throw new Error( + "Internal embedding search requires an internal embedding instance." + ); + } + const vectorEmbeddingParams = [query, this.internalEmbeddingModelId] + return this.similaritySearchWithScoreAndVector( + "VECTOR_EMBEDDING(?, 'QUERY', ?)", + k, + filter, + vectorEmbeddingParams + ); + } + /** * Return documents selected using the maximal marginal relevance. * Maximal marginal relevance optimizes for similarity to the query AND * diversity among selected documents. + * When using an internal embedding instance, the query is processed + * directly by the database's internal embedding function. + * Otherwise, the query is embedded externally. * @param query Text to look up documents similar to. * @param options.k Number of documents to return. * @param options.fetchK=20 Number of documents to fetch before passing to @@ -892,7 +1222,25 @@ export class HanaDB extends VectorStore { options: MaxMarginalRelevanceSearchOptions ): Promise { const { k, fetchK = 20, lambda = 0.5 } = options; - const queryEmbedding = await this.embeddings.embedQuery(query); + let queryEmbedding: number[]; + if (this.useInternalEmbeddings){ + const sqlStr = `SELECT TO_NVARCHAR(VECTOR_EMBEDDING(?, 'QUERY', ?)) + AS VECTOR FROM sys.DUMMY;` + const queryTuple = [query, this.internalEmbeddingModelId]; + const client = this.connection; + const stm = await this.prepareQuery(client, sqlStr); + const resultSet = await this.executeStatement(stm, queryTuple); + const result: [number[]] = resultSet.map( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (row: any) => { + return HanaDB.parseFloatArrayFromString(row.VECTOR); + } + ); + queryEmbedding = result[0]; + } + else { + queryEmbedding = await this.embeddings.embedQuery(query); + } const docs = await this.similaritySearchWithScoreAndVectorByVector( queryEmbedding, diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts index 9634adaa05b4..52253181c9e3 100644 --- a/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts +++ b/libs/langchain-community/src/vectorstores/tests/hanavector.fixtures.ts @@ -123,6 +123,8 @@ export const TYPE_4_FILTERING_TEST_CASES: TestCase[] = [ export const TYPE_5_FILTERING_TEST_CASES: TestCase[] = [ { filter: { name: { $like: "a%" } }, expected: [1] }, { filter: { name: { $like: "%a%" } }, expected: [1, 3] }, + { filter: { name: { $contains: "bob" } }, expected: [2] }, + { filter: { name: { $contains: "bo" } }, expected: [] }, ]; export const TYPE_6_FILTERING_TEST_CASES: TestCase[] = [ diff --git a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts index dc8b4a534e81..184f3c09412a 100644 --- a/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts +++ b/libs/langchain-community/src/vectorstores/tests/hanavector.int.test.ts @@ -5,6 +5,7 @@ import { Document } from "@langchain/core/documents"; import { FakeEmbeddings } from "@langchain/core/utils/testing"; import { test, expect } from "@jest/globals"; import { HanaDB, HanaDBArgs } from "../hanavector.js"; +import { HanaInternalEmbeddings } from "../../embeddings/hana_internal.js"; import { DOCUMENTS, TYPE_1_FILTERING_TEST_CASES, @@ -126,6 +127,7 @@ beforeAll(async () => { expect(process.env.HANA_PORT).toBeDefined(); expect(process.env.HANA_UID).toBeDefined(); expect(process.env.HANA_PWD).toBeDefined(); + expect(process.env.HANA_DB_EMBEDDING_MODEL_ID).toBeDefined(); await connectToHANA(); }); @@ -1147,6 +1149,113 @@ describe("HNSW Index Creation Tests", () => { }); }); +describe("Keyword Search Tests", () => { + test("keyword search on content and specific metadata column", async () => { + const tableNameTest = "TEST_TABLE_KEYWORD_SEARCH_WITHOUT_UNSPECIFIC_METADATA_COL"; + await dropTable(client, tableNameTest); + + // Create table manually with extra columns "quality" and "start" + const sqlStr = ` + CREATE TABLE "${tableNameTest}" ( + "VEC_TEXT" NCLOB, + "VEC_META" NCLOB, + "VEC_VECTOR" REAL_VECTOR, + "quality" NVARCHAR(100), + "start" INTEGER + ) + `; + try { + await executeQuery(client, sqlStr); + } catch (error) { + console.log(error) + } + + // Create the vector store using fromTexts with a specific metadata column "quality" + const texts = ["foo bar", "hello foo world", "baz"]; + const metadatas = [ + { quality: "bad", start: 0, id: 1 }, + { quality: "good", start: 10, id: 2 }, + { quality: "ugly", start: 20, id: 3 }, + ]; + const vectorStore = await HanaDB.fromTexts(texts, metadatas, embeddings, { + connection: client, + tableName: tableNameTest, + specificMetadataColumns: ["quality"], + }); + + // Keyword search on content column using $contains operator on VEC_TEXT + let keyword = "foo"; + let docs = await vectorStore.similaritySearch(keyword, 3, { + VEC_TEXT: { $contains: keyword }, + }); + expect(docs.length).toEqual(2); + expect(docs[0].pageContent).toContain(keyword); + expect(docs[1].pageContent).toContain(keyword); + + + // Keyword search on content column with a non-existing keyword + let nonExistingKeyword = "nonexistent"; + docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, { + VEC_TEXT: { $contains: nonExistingKeyword }, + }); + expect(docs.length).toEqual(0); + + // Keyword search on the specific metadata column "quality" + keyword = "good"; + docs = await vectorStore.similaritySearch(keyword, 3, { + quality: { $contains: keyword }, + }); + expect(docs.length).toEqual(1); + expect(docs[0].metadata.quality).toContain(keyword); + + // Keyword search on metadata with a non-existing keyword + nonExistingKeyword = "terrible"; + docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, { + quality: { $contains: nonExistingKeyword }, + }); + expect(docs.length).toEqual(0); + }); + + test("keyword search on unspecific metadata column", async () => { + const tableNameTest = "TEST_TABLE_KEYWORD_SEARCH_WITH_UNSPECIFIC_METADATA_COL"; + await dropTable(client, tableNameTest); + + // Create the vector store without providing specific metadata columns. + const texts = ["foo bar", "hello foo world", "baz"]; + const metadatas = [ + { quality: "good", id: 1 }, + { quality: "bad", id: 2 }, + { quality: "ugly", id: 3 }, + ]; + const vectorStore = await HanaDB.fromTexts(texts, metadatas, embeddings, { + connection: client, + tableName: tableNameTest, + }); + + // Using a simple filter (without $contains) on unspecific metadata column "quality" + const keyword = "good"; + let docs = await vectorStore.similaritySearch("hello", 5, { quality: keyword }); + expect(docs.length).toEqual(1); + // Assuming the document with "good" appears in the content (e.g. "foo bar") + expect(docs[0].metadata.quality).toContain(keyword); + + // Using $contains operator on unspecific metadata column "quality" + docs = await vectorStore.similaritySearch("hello", 5, { + quality: { $contains: keyword }, + }); + expect(docs.length).toEqual(1); + expect(docs[0].pageContent).toContain("foo"); + expect(docs[0].metadata.quality).toContain(keyword); + + // Test with a non-existing keyword on the unspecific metadata column + const nonExistingKeyword = "terrible"; + docs = await vectorStore.similaritySearch(nonExistingKeyword, 3, { + quality: { $contains: nonExistingKeyword }, + }); + expect(docs.length).toEqual(0); + }); +}); + describe("Filter Tests", () => { // Filter Test 1: Applying various filters from TYPE_1_FILTERING_TEST_CASES it.each(TYPE_1_FILTERING_TEST_CASES)( @@ -1329,3 +1438,68 @@ describe("Filter Tests", () => { } ); }); + +// Internal Embedding Functionality Tests +describe("Internal Embedding Functionality Tests", () => { + // Create an internal embeddings instance using the model ID from the environment. + const internalModelId = process.env.HANA_DB_EMBEDDING_MODEL_ID; + if (typeof internalModelId !== 'string' || internalModelId.trim() === '') { + throw new Error("HANA_DB_EMBEDDING_MODEL_ID environment variable is not defined or is an empty string."); + } + const internalEmbedding = new HanaInternalEmbeddings({ + internalEmbeddingModelId: internalModelId, + }); + + test("test internal addDocuments", async () => { + const tableName = "TEST_TABLE_ADD_DOCUMENTS_INTERNAL"; + await dropTable(client, tableName); + const args: HanaDBArgs = { connection: client, tableName }; + const vectorDB = new HanaDB(internalEmbedding, args); + await vectorDB.initialize() + expect(vectorDB).toBeDefined(); + await vectorDB.addDocuments(DOCUMENTS); + const sqlStr = `SELECT COUNT(*) AS CNT FROM "${tableName}"`; + const result = await executeQuery(client, sqlStr); + expect(result[0].CNT).toEqual(DOCUMENTS.length); + await dropTable(client, tableName); + }); + + + test("test internal similarity search with metadata filter", async () => { + const tableName = "TEST_TABLE_FILTER_INTERNAL"; + await dropTable(client, tableName); + // Create the vector store using DOCUMENTS + const vectorDB = await HanaDB.fromDocuments(DOCUMENTS, internalEmbedding, { + connection: client, + tableName, + }); + let searchResult = await vectorDB.similaritySearch(DOCUMENTS[0].pageContent, 3,); + expect(searchResult.length).toEqual(3); + expect(searchResult[0].pageContent).toEqual(DOCUMENTS[0].pageContent); + + // DOCUMENT[0] has a height of 10, so it is filtered out. + searchResult = await vectorDB.similaritySearch(DOCUMENTS[1].pageContent, 3, { height : { $lt: 10.0 } }); + expect(searchResult.length).toEqual(2); + expect(searchResult[0].pageContent).toEqual(DOCUMENTS[1].pageContent); + expect(searchResult[1].pageContent).toEqual(DOCUMENTS[2].pageContent); + await dropTable(client, tableName); + }); + + test("test internal max marginal relevance search", async () => { + const tableName = "TEST_TABLE_MAX_RELEVANCE_INTERNAL"; + await dropTable(client, tableName); + // Create the vector store using DOCUMENTS and an empty metadata object for fromDocuments + const vectorDB = await HanaDB.fromDocuments(DOCUMENTS, internalEmbedding, { + connection: client, + tableName, + }); + const searchResult = await vectorDB.maxMarginalRelevanceSearch(DOCUMENTS[0].pageContent, { + k: 2, + fetchK: 20, + }); + expect(searchResult.length).toEqual(2); + expect(searchResult[0].pageContent).toEqual(DOCUMENTS[0].pageContent); + await dropTable(client, tableName); + }); + +});