Skip to content

Commit df12dec

Browse files
lone17cin-alberttrducng
authored
Feat/local endpoint llm (#148)
* serve local model in a different process from the app --------- Co-authored-by: albert <[email protected]> Co-authored-by: trducng <[email protected]>
1 parent 2950e6e commit df12dec

20 files changed

+675
-79
lines changed

.gitattributes

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.bat text eol=crlf

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -466,4 +466,5 @@ examples/example1/assets
466466
storage/*
467467

468468
# Conda and env storages
469-
install_dir/
469+
*install_dir/
470+
doc_env

libs/kotaemon/kotaemon/agents/io/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from enum import Enum
66
from typing import Any, Dict, Literal, NamedTuple, Optional, Union
77

8-
from pydantic import Extra
8+
from pydantic import ConfigDict
99

1010
from kotaemon.base import LLMInterface
1111

@@ -238,7 +238,7 @@ class AgentFinish(NamedTuple):
238238
log: str
239239

240240

241-
class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
241+
class AgentOutput(LLMInterface):
242242
"""Output from an agent.
243243
244244
Args:
@@ -248,6 +248,8 @@ class AgentOutput(LLMInterface, extra=Extra.allow): # type: ignore [call-arg]
248248
error: The error message if any.
249249
"""
250250

251+
model_config = ConfigDict(extra="allow")
252+
251253
text: str
252254
type: str = "agent"
253255
agent_type: AgentType

libs/kotaemon/kotaemon/embeddings/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .base import BaseEmbeddings
2+
from .endpoint_based import EndpointEmbeddings
23
from .langchain_based import (
34
AzureOpenAIEmbeddings,
45
CohereEmbdeddings,
@@ -8,6 +9,7 @@
89

910
__all__ = [
1011
"BaseEmbeddings",
12+
"EndpointEmbeddings",
1113
"OpenAIEmbeddings",
1214
"AzureOpenAIEmbeddings",
1315
"CohereEmbdeddings",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import requests
2+
3+
from kotaemon.base import Document, DocumentWithEmbedding
4+
5+
from .base import BaseEmbeddings
6+
7+
8+
class EndpointEmbeddings(BaseEmbeddings):
9+
"""
10+
An Embeddings component that uses an OpenAI API compatible endpoint.
11+
12+
Attributes:
13+
endpoint_url (str): The url of an OpenAI API compatible endpoint.
14+
"""
15+
16+
endpoint_url: str
17+
18+
def run(
19+
self, text: str | list[str] | Document | list[Document]
20+
) -> list[DocumentWithEmbedding]:
21+
"""
22+
Generate embeddings from text Args:
23+
text (str | list[str] | Document | list[Document]): text to generate
24+
embeddings from
25+
Returns:
26+
list[DocumentWithEmbedding]: embeddings
27+
"""
28+
if not isinstance(text, list):
29+
text = [text]
30+
31+
outputs = []
32+
33+
for item in text:
34+
response = requests.post(
35+
self.endpoint_url, json={"input": str(item)}
36+
).json()
37+
outputs.append(
38+
DocumentWithEmbedding(
39+
text=str(item),
40+
embedding=response["data"][0]["embedding"],
41+
total_tokens=response["usage"]["total_tokens"],
42+
prompt_tokens=response["usage"]["prompt_tokens"],
43+
)
44+
)
45+
46+
return outputs

libs/kotaemon/kotaemon/indices/qa/citation.py

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ def invoke(self, context: str, question: str):
108108
print(e)
109109
return None
110110

111+
if not llm_output.messages:
112+
return None
113+
111114
function_output = llm_output.messages[0].additional_kwargs["function_call"][
112115
"arguments"
113116
]
@@ -126,6 +129,9 @@ async def ainvoke(self, context: str, question: str):
126129
print(e)
127130
return None
128131

132+
if not llm_output.messages:
133+
return None
134+
129135
function_output = llm_output.messages[0].additional_kwargs["function_call"][
130136
"arguments"
131137
]

libs/kotaemon/kotaemon/llms/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from .base import BaseLLM
44
from .branching import GatedBranchingPipeline, SimpleBranchingPipeline
5-
from .chats import AzureChatOpenAI, ChatLLM, LlamaCppChat
5+
from .chats import AzureChatOpenAI, ChatLLM, EndpointChatLLM, LlamaCppChat
66
from .completions import LLM, AzureOpenAI, LlamaCpp, OpenAI
77
from .cot import ManualSequentialChainOfThought, Thought
88
from .linear import GatedLinearPipeline, SimpleLinearPipeline
@@ -12,6 +12,7 @@
1212
"BaseLLM",
1313
# chat-specific components
1414
"ChatLLM",
15+
"EndpointChatLLM",
1516
"BaseMessage",
1617
"HumanMessage",
1718
"AIMessage",
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
from .base import ChatLLM
2+
from .endpoint_based import EndpointChatLLM
23
from .langchain_based import AzureChatOpenAI, LCChatMixin
34
from .llamacpp import LlamaCppChat
45

5-
__all__ = ["ChatLLM", "AzureChatOpenAI", "LCChatMixin", "LlamaCppChat"]
6+
__all__ = [
7+
"ChatLLM",
8+
"EndpointChatLLM",
9+
"AzureChatOpenAI",
10+
"LCChatMixin",
11+
"LlamaCppChat",
12+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import requests
2+
3+
from kotaemon.base import (
4+
AIMessage,
5+
BaseMessage,
6+
HumanMessage,
7+
LLMInterface,
8+
SystemMessage,
9+
)
10+
11+
from .base import ChatLLM
12+
13+
14+
class EndpointChatLLM(ChatLLM):
15+
"""
16+
A ChatLLM that uses an endpoint to generate responses. This expects an OpenAI API
17+
compatible endpoint.
18+
19+
Attributes:
20+
endpoint_url (str): The url of a OpenAI API compatible endpoint.
21+
"""
22+
23+
endpoint_url: str
24+
25+
def run(
26+
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
27+
) -> LLMInterface:
28+
"""
29+
Generate response from messages
30+
Args:
31+
messages (str | BaseMessage | list[BaseMessage]): history of messages to
32+
generate response from
33+
**kwargs: additional arguments to pass to the OpenAI API
34+
Returns:
35+
LLMInterface: generated response
36+
"""
37+
if isinstance(messages, str):
38+
input_ = [HumanMessage(content=messages)]
39+
elif isinstance(messages, BaseMessage):
40+
input_ = [messages]
41+
else:
42+
input_ = messages
43+
44+
def decide_role(message: BaseMessage):
45+
if isinstance(message, SystemMessage):
46+
return "system"
47+
elif isinstance(message, AIMessage):
48+
return "assistant"
49+
else:
50+
return "user"
51+
52+
request_json = {
53+
"messages": [{"content": m.text, "role": decide_role(m)} for m in input_]
54+
}
55+
56+
response = requests.post(self.endpoint_url, json=request_json).json()
57+
58+
content = ""
59+
candidates = []
60+
if response["choices"]:
61+
candidates = [
62+
each["message"]["content"]
63+
for each in response["choices"]
64+
if each["message"]["content"]
65+
]
66+
content = candidates[0]
67+
68+
return LLMInterface(
69+
content=content,
70+
candidates=candidates,
71+
completion_tokens=response["usage"]["completion_tokens"],
72+
total_tokens=response["usage"]["total_tokens"],
73+
prompt_tokens=response["usage"]["prompt_tokens"],
74+
)
75+
76+
def invoke(
77+
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
78+
) -> LLMInterface:
79+
"""Same as run"""
80+
return self.run(messages, **kwargs)
81+
82+
async def ainvoke(
83+
self, messages: str | BaseMessage | list[BaseMessage], **kwargs
84+
) -> LLMInterface:
85+
return self.invoke(messages, **kwargs)

libs/ktem/flowsettings.py

+56-37
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
COHERE_API_KEY = config("COHERE_API_KEY", default="")
1414
KH_MODE = "dev"
15-
KH_FEATURE_USER_MANAGEMENT = True
15+
KH_FEATURE_USER_MANAGEMENT = False
1616
KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
1717
config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
1818
)
@@ -21,6 +21,8 @@
2121
)
2222
KH_ENABLE_ALEMBIC = False
2323
KH_DATABASE = f"sqlite:///{user_cache_dir / 'sql.db'}"
24+
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
25+
2426
KH_DOCSTORE = {
2527
"__type__": "kotaemon.storages.SimpleFileDocumentStore",
2628
"path": str(user_cache_dir / "docstore"),
@@ -29,51 +31,68 @@
2931
"__type__": "kotaemon.storages.ChromaVectorStore",
3032
"path": str(user_cache_dir / "vectorstore"),
3133
}
32-
KH_FILESTORAGE_PATH = str(user_cache_dir / "files")
3334
KH_LLMS = {
34-
"gpt4": {
35+
# example for using Azure OpenAI, the config variables can set as environment
36+
# variables or in the .env file
37+
# "gpt4": {
38+
# "def": {
39+
# "__type__": "kotaemon.llms.AzureChatOpenAI",
40+
# "temperature": 0,
41+
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
42+
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
43+
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
44+
# "deployment_name": "<your deployment name>",
45+
# "stream": True,
46+
# },
47+
# "accuracy": 10,
48+
# "cost": 10,
49+
# "default": False,
50+
# },
51+
# "gpt35": {
52+
# "def": {
53+
# "__type__": "kotaemon.llms.AzureChatOpenAI",
54+
# "temperature": 0,
55+
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
56+
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
57+
# "openai_api_version": config("OPENAI_API_VERSION", default=""),
58+
# "deployment_name": "<your deployment name>",
59+
# "request_timeout": 10,
60+
# "stream": False,
61+
# },
62+
# "accuracy": 5,
63+
# "cost": 5,
64+
# "default": False,
65+
# },
66+
"local": {
3567
"def": {
36-
"__type__": "kotaemon.llms.AzureChatOpenAI",
37-
"temperature": 0,
38-
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
39-
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
40-
"openai_api_version": config("OPENAI_API_VERSION", default=""),
41-
"deployment_name": "dummy-q2",
42-
"stream": True,
68+
"__type__": "kotaemon.llms.EndpointChatLLM",
69+
"endpoint_url": "http://localhost:31415/v1/chat/completions",
4370
},
44-
"accuracy": 10,
45-
"cost": 10,
4671
"default": False,
4772
},
48-
"gpt35": {
49-
"def": {
50-
"__type__": "kotaemon.llms.AzureChatOpenAI",
51-
"temperature": 0,
52-
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
53-
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
54-
"openai_api_version": config("OPENAI_API_VERSION", default=""),
55-
"deployment_name": "dummy-q2",
56-
"request_timeout": 10,
57-
"stream": False,
58-
},
59-
"accuracy": 5,
60-
"cost": 5,
61-
"default": True,
62-
},
6373
}
6474
KH_EMBEDDINGS = {
65-
"ada": {
75+
# example for using Azure OpenAI, the config variables can set as environment
76+
# variables or in the .env file
77+
# "ada": {
78+
# "def": {
79+
# "__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
80+
# "model": "text-embedding-ada-002",
81+
# "azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
82+
# "openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
83+
# "deployment": "<your deployment name>",
84+
# "chunk_size": 16,
85+
# },
86+
# "accuracy": 5,
87+
# "cost": 5,
88+
# "default": True,
89+
# },
90+
"local": {
6691
"def": {
67-
"__type__": "kotaemon.embeddings.AzureOpenAIEmbeddings",
68-
"model": "text-embedding-ada-002",
69-
"azure_endpoint": config("AZURE_OPENAI_ENDPOINT", default=""),
70-
"openai_api_key": config("AZURE_OPENAI_API_KEY", default=""),
71-
"deployment": "dummy-q2-text-embedding",
72-
"chunk_size": 16,
92+
"__type__": "kotaemon.embeddings.EndpointEmbeddings",
93+
"endpoint_url": "http://localhost:31415/v1/embeddings",
7394
},
74-
"accuracy": 5,
75-
"cost": 5,
76-
"default": True,
95+
"default": False,
7796
},
7897
}
7998
KH_REASONINGS = ["ktem.reasoning.simple.FullQAPipeline"]

libs/ktem/ktem/index/file/pipelines.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def run(
118118

119119
# rerank
120120
docs = self.vector_retrieval(text=text, top_k=top_k, **kwargs)
121-
if self.get_from_path("reranker"):
121+
if docs and self.get_from_path("reranker"):
122122
docs = self.reranker(docs, query=text)
123123

124124
if not self.get_extra_table:

0 commit comments

Comments
 (0)