fix(hf): Upgrade @huggingface/inference to v3

YasharF · YasharF · commit c92e7bd08a8e · 2025-05-22T16:30:59.000-07:00
Changes:
- Migrated from HfInference to InferenceClient.
- Added a provider parameter as it is required for the new version. The parameter can also be set using HUGGINGFACEHUB_PROVIDER environment variable.
- Updated documentation accordingly
- Fixed broken integration test, and added a new test for when the new provider parameter is used.

Other Improvements:
- The model parameter now supports being set via the HUGGINGFACEHUB_MODEL environment variable, in addition to the constructor argument.
- If no model is provided, a warning is logged that the default `BAAI/bge-base-en-v1.5` is used.
diff --git a/docs/core_docs/docs/integrations/text_embedding/hugging_face_inference.mdx b/docs/core_docs/docs/integrations/text_embedding/hugging_face_inference.mdx
@@ -1,17 +1,18 @@
 # HuggingFace Inference
 
-This Embeddings integration uses the HuggingFace Inference API to generate embeddings for a given text using by default the `sentence-transformers/distilbert-base-nli-mean-tokens` model. You can pass a different model name to the constructor to use a different model.
+This Embeddings integration uses the HuggingFace Inference API to generate embeddings for a given text, using the `BAAI/bge-base-en-v1.5` model by default. You can pass a different model name to the constructor to use a different model.  
+The current HuggingFace API also expects you to specify a `provider`, but has a fallback auto-select mode.
 
 ## Setup
 
-You'll first need to install the [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) package and the required peer dep:
+You'll first need to install the [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) package and the required peer dependency:
 
 import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
 
 <IntegrationInstallTooltip></IntegrationInstallTooltip>
 
 ```bash npm2yarn
-npm install @langchain/community @langchain/core @huggingface/inference@2
+npm install @langchain/community @langchain/core @huggingface/inference@3
 ```
 
 ## Usage
@@ -20,10 +21,19 @@ npm install @langchain/community @langchain/core @huggingface/inference@2
 import { HuggingFaceInferenceEmbeddings } from "@langchain/community/embeddings/hf";
 
 const embeddings = new HuggingFaceInferenceEmbeddings({
-  apiKey: "YOUR-API-KEY", // In Node.js defaults to process.env.HUGGINGFACEHUB_API_KEY
+  apiKey: "YOUR-API-KEY", // Defaults to process.env.HUGGINGFACEHUB_API_KEY
+  model: "MODEL-NAME", // Defaults to process.env.HUGGINGFACEHUB_MODEL, or `BAAI/bge-base-en-v1.5` if not provided
+  provider: "MODEL-PROVIDER", // Defaults to process.env.HUGGINGFACEHUB_PROVIDER, or `auto` if not provided
 });
 ```
 
+> **Note:**  
+> If you do not provide a `model`, a warning will be logged and the default model `BAAI/bge-base-en-v1.5` will be used.
+> If you do not provide a `provider`, Hugging Face will default the provider to `auto`, which will select the first provider available for the model based on your settings at https://hf.co/settings/inference-providers.
+
+> **Hint:**  
+> `hf-inference` is the provider name for models that are hosted directly by Hugging Face.
+
 ## Related
 
 - Embedding model [conceptual guide](/docs/concepts/embedding_models)
diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
@@ -79,7 +79,7 @@
     "@google-ai/generativelanguage": "^2.5.0",
     "@google-cloud/storage": "^7.15.2",
     "@gradientai/nodejs-sdk": "^1.2.0",
-    "@huggingface/inference": "^2.6.4",
+    "@huggingface/inference": "^3.13.2",
     "@huggingface/transformers": "^3.2.3",
     "@ibm-cloud/watsonx-ai": "^1.6.4",
     "@jest/globals": "^29.5.0",
@@ -254,7 +254,7 @@
     "@google-ai/generativelanguage": "*",
     "@google-cloud/storage": "^6.10.1 || ^7.7.0",
     "@gradientai/nodejs-sdk": "^1.2.0",
-    "@huggingface/inference": "^2.6.4",
+    "@huggingface/inference": "^3.13.2",
     "@huggingface/transformers": "^3.2.3",
     "@ibm-cloud/watsonx-ai": "*",
     "@lancedb/lancedb": "^0.12.0",
diff --git a/libs/langchain-community/src/embeddings/hf.ts b/libs/langchain-community/src/embeddings/hf.ts
@@ -1,4 +1,4 @@
-import { HfInference, HfInferenceEndpoint } from "@huggingface/inference";
+import { InferenceClient } from "@huggingface/inference";
 import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings";
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
 
@@ -10,6 +10,7 @@ export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
   apiKey?: string;
   model?: string;
   endpointUrl?: string;
+  provider?: string;
 }
 
 /**
@@ -27,18 +28,32 @@ export class HuggingFaceInferenceEmbeddings
 
   endpointUrl?: string;
 
-  client: HfInference | HfInferenceEndpoint;
+  provider?: string;
+
+  client: InferenceClient;
 
   constructor(fields?: HuggingFaceInferenceEmbeddingsParams) {
     super(fields ?? {});
 
-    this.model = fields?.model ?? "BAAI/bge-base-en-v1.5";
+    const envModel = getEnvironmentVariable("HUGGINGFACEHUB_MODEL");
+    if (fields?.model) {
+      this.model = fields.model;
+    } else if (envModel) {
+      this.model = envModel;
+    } else {
+      console.warn(
+        '[HuggingFaceInferenceEmbeddings] No "model" provided. Using default: "BAAI/bge-base-en-v1.5".'
+      );
+      this.model = "BAAI/bge-base-en-v1.5";
+    }
     this.apiKey =
       fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY");
     this.endpointUrl = fields?.endpointUrl;
+    this.provider =
+      fields?.provider ?? getEnvironmentVariable("HUGGINGFACEHUB_PROVIDER");
     this.client = this.endpointUrl
-      ? new HfInference(this.apiKey).endpoint(this.endpointUrl)
-      : new HfInference(this.apiKey);
+      ? new InferenceClient(this.apiKey).endpoint(this.endpointUrl)
+      : new InferenceClient(this.apiKey);
   }
 
   async _embed(texts: string[]): Promise<number[][]> {
@@ -48,6 +63,7 @@ export class HuggingFaceInferenceEmbeddings
       this.client.featureExtraction({
         model: this.model,
         inputs: clean,
+        provider: this.provider,
       })
     ) as Promise<number[][]>;
   }
diff --git a/libs/langchain-community/src/embeddings/tests/hf.int.test.ts b/libs/langchain-community/src/embeddings/tests/hf.int.test.ts
@@ -23,13 +23,36 @@ test("HuggingFaceInferenceEmbeddings", async () => {
   expect(await store.similaritySearch(texts[4], 2)).toMatchInlineSnapshot(`
     [
       Document {
+        "id": undefined,
         "metadata": {},
         "pageContent": "1 + 1 = 2",
       },
       Document {
+        "id": undefined,
         "metadata": {},
         "pageContent": "1 + 1 = 3",
       },
     ]
   `);
 });
+
+test("HuggingFaceInferenceEmbeddings with explicit model and provider", async () => {
+  const model = "BAAI/bge-small-en-v1.5";
+  const provider = "hf-inference";
+  const embeddings = new HuggingFaceInferenceEmbeddings({
+    model,
+    provider,
+  });
+
+  const texts = ["Integration test input 1", "Integration test input 2"];
+
+  const queryEmbedding = await embeddings.embedQuery(texts[0]);
+  expect(Array.isArray(queryEmbedding)).toBe(true);
+  expect(typeof queryEmbedding[0]).toBe("number");
+
+  const store = await HNSWLib.fromTexts(texts, {}, embeddings);
+  const results = await store.similaritySearch(texts[1], 1);
+
+  expect(results.length).toBe(1);
+  expect(results[0].pageContent).toBe("Integration test input 2");
+});
diff --git a/yarn.lock b/yarn.lock
@@ -5972,10 +5972,13 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@huggingface/inference@npm:^2.6.4":
-  version: 2.6.4
-  resolution: "@huggingface/inference@npm:2.6.4"
-  checksum: 7d48960a62d0621d4c3f1edd183aa5d7829d297110b5720c78291aac17ed58b6a9af8eaf8a3f2cbb9dabfda3cf48931f59cf491cdedefd624f90d93fa3927981
+"@huggingface/inference@npm:^3.13.2":
+  version: 3.13.2
+  resolution: "@huggingface/inference@npm:3.13.2"
+  dependencies:
+    "@huggingface/jinja": ^0.5.0
+    "@huggingface/tasks": ^0.19.6
+  checksum: 347192023adcaa2ec70d4de10b33306592c1b9fbef3cf0eec7de7a11821f2dfd733fbbb53be6e0dbf962a82b499ff816e705758f7c0b7d165367689c6f2ffd87
   languageName: node
   linkType: hard
 
@@ -5986,6 +5989,20 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@huggingface/jinja@npm:^0.5.0":
+  version: 0.5.0
+  resolution: "@huggingface/jinja@npm:0.5.0"
+  checksum: 8bb04021f381158d5e358a166f8eec250785c0dc9f70a04b92d0aabddc8583a9f20026c808be59d4926787087399ca8f5a4cc8fc37749be35bea63473af19c53
+  languageName: node
+  linkType: hard
+
+"@huggingface/tasks@npm:^0.19.6":
+  version: 0.19.8
+  resolution: "@huggingface/tasks@npm:0.19.8"
+  checksum: 89377ebda7b5cdecf12e805b52eb8800d3f0fb337855e4d3b4e082aeae9c1ad5492f94ea9492e45d1c7c23779836bf168b7f21103be3fb1c1651be67fa0f89bb
+  languageName: node
+  linkType: hard
+
 "@huggingface/transformers@npm:^3.2.3":
   version: 3.2.4
   resolution: "@huggingface/transformers@npm:3.2.4"
@@ -7264,7 +7281,7 @@ __metadata:
     "@google-ai/generativelanguage": ^2.5.0
     "@google-cloud/storage": ^7.15.2
     "@gradientai/nodejs-sdk": ^1.2.0
-    "@huggingface/inference": ^2.6.4
+    "@huggingface/inference": ^3.13.2
     "@huggingface/transformers": ^3.2.3
     "@ibm-cloud/watsonx-ai": ^1.6.4
     "@jest/globals": ^29.5.0
@@ -7449,7 +7466,7 @@ __metadata:
     "@google-ai/generativelanguage": "*"
     "@google-cloud/storage": ^6.10.1 || ^7.7.0
     "@gradientai/nodejs-sdk": ^1.2.0
-    "@huggingface/inference": ^2.6.4
+    "@huggingface/inference": ^3.13.2
     "@huggingface/transformers": ^3.2.3
     "@ibm-cloud/watsonx-ai": "*"
     "@lancedb/lancedb": ^0.12.0