Skip to content

Commit 2a49365

Browse files
Adds basic metering infrastructure (#68)
* Basic metering module structure * Token counting working for Bedrock * Price calc using price list * Added more models to pricelist * Added Ollama token counts ---- Authored-by: JackColquitt <[email protected]>
1 parent 8085bb0 commit 2a49365

File tree

12 files changed

+176
-8
lines changed

12 files changed

+176
-8
lines changed

Containerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ RUN dnf install -y python3 python3-pip python3-wheel python3-aiohttp \
1313

1414
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
1515

16-
RUN pip3 install anthropic boto3 cohere openai google-cloud-aiplatform \
16+
RUN pip3 install anthropic boto3 cohere openai google-cloud-aiplatform ollama \
1717
langchain langchain-core langchain-huggingface langchain-text-splitters \
1818
langchain-community pymilvus sentence-transformers transformers \
1919
huggingface-hub pulsar-client cassandra-driver pyarrow pyyaml \

prometheus/prometheus.yml

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ scrape_configs:
2929
- 'kg-extract-definitions:8000'
3030
- 'kg-extract-topics:8000'
3131
- 'kg-extract-relationships:8000'
32+
- 'metering:8000'
3233
- 'store-graph-embeddings:8000'
3334
- 'store-triples:8000'
3435
- 'text-completion:8000'

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ pyyaml
2020
prometheus-client
2121
pyarrow
2222
boto3
23+
ollama

scripts/metering

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env python3
2+
3+
from trustgraph.metering import run
4+
5+
run()

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
"scripts/load-pdf",
8282
"scripts/load-text",
8383
"scripts/load-triples",
84+
"scripts/metering",
8485
"scripts/object-extract-row",
8586
"scripts/oe-write-milvus",
8687
"scripts/pdf-decoder",

trustgraph/metering/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
from . counter import *
3+

trustgraph/metering/__main__.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/usr/bin/env python3
2+
3+
from . counter import run
4+
5+
if __name__ == '__main__':
6+
run()
7+

trustgraph/metering/counter.py

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Simple token counter for each LLM response.
3+
"""
4+
5+
from prometheus_client import Histogram, Info
6+
from . pricelist import price_list
7+
8+
from .. schema import TextCompletionResponse, Error
9+
from .. schema import text_completion_response_queue
10+
from .. log_level import LogLevel
11+
from .. base import Consumer
12+
13+
module = ".".join(__name__.split(".")[1:-1])
14+
15+
default_input_queue = text_completion_response_queue
16+
default_subscriber = module
17+
18+
19+
class Processor(Consumer):
20+
21+
def __init__(self, **params):
22+
23+
input_queue = params.get("input_queue", default_input_queue)
24+
subscriber = params.get("subscriber", default_subscriber)
25+
26+
super(Processor, self).__init__(
27+
**params | {
28+
"input_queue": input_queue,
29+
"subscriber": subscriber,
30+
"input_schema": TextCompletionResponse,
31+
}
32+
)
33+
34+
def get_prices(self, prices, modelname):
35+
for model in prices["price_list"]:
36+
if model["model_name"] == modelname:
37+
return model["input_price"], model["output_price"]
38+
return None, None # Return None if model is not found
39+
40+
def handle(self, msg):
41+
42+
v = msg.value()
43+
modelname = v.model
44+
45+
# Sender-produced ID
46+
id = msg.properties()["id"]
47+
48+
print(f"Handling response {id}...", flush=True)
49+
50+
num_in = v.in_token
51+
num_out = v.out_token
52+
53+
model_input_price, model_output_price = self.get_prices(price_list, modelname)
54+
cost_in = num_in * model_input_price
55+
cost_out = num_out * model_output_price
56+
cost_per_call = cost_in + cost_out
57+
58+
print(f"Input Tokens: {num_in}", flush=True)
59+
print(f"Output Tokens: {num_out}", flush=True)
60+
print(f"Cost for call: ${cost_per_call:.6f}", flush=True)
61+
62+
@staticmethod
63+
def add_args(parser):
64+
65+
Consumer.add_args(
66+
parser, default_input_queue, default_subscriber,
67+
)
68+
69+
def run():
70+
71+
Processor.start(module, __doc__)

trustgraph/metering/pricelist.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
price_list = {
2+
"price_list": [
3+
{
4+
"model_name": "mistral.mistral-large-2407-v1:0",
5+
"input_price": 0.000004,
6+
"output_price": 0.000012
7+
},
8+
{
9+
"model_name": "meta.llama3-1-405b-instruct-v1:0",
10+
"input_price": 0.00000532,
11+
"output_price": 0.000016
12+
},
13+
{
14+
"model_name": "mistral.mixtral-8x7b-instruct-v0:1",
15+
"input_price": 0.00000045,
16+
"output_price": 0.0000007
17+
},
18+
{
19+
"model_name": "meta.llama3-1-70b-instruct-v1:0",
20+
"input_price": 0.00000099,
21+
"output_price": 0.00000099
22+
},
23+
{
24+
"model_name": "meta.llama3-1-8b-instruct-v1:0",
25+
"input_price": 0.00000022,
26+
"output_price": 0.00000022
27+
},
28+
{
29+
"model_name": "anthropic.claude-3-haiku-20240307-v1:0",
30+
"input_price": 0.00000025,
31+
"output_price": 0.00000125
32+
},
33+
{
34+
"model_name": "anthropic.claude-3-5-sonnet-20240620-v1:0",
35+
"input_price": 0.000003,
36+
"output_price": 0.000015
37+
},
38+
{
39+
"model_name": "cohere.command-r-plus-v1:0",
40+
"input_price": 0.0000030,
41+
"output_price": 0.0000150
42+
},
43+
{
44+
"model_name": "ollama",
45+
"input_price": 0,
46+
"output_price": 0
47+
}
48+
]
49+
}

trustgraph/model/text_completion/bedrock/llm.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -209,14 +209,23 @@ def handle(self, msg):
209209
# Use Mistral as default
210210
else:
211211
response_body = json.loads(response.get("body").read())
212-
outputtext = response_body['outputs'][0]['text']
212+
outputtext = response_body['outputs'][0]['text']
213+
214+
metadata = response['ResponseMetadata']['HTTPHeaders']
215+
inputtokens = int(metadata['x-amzn-bedrock-input-token-count'])
216+
outputtokens = int(metadata['x-amzn-bedrock-output-token-count'])
213217

214218
print(outputtext, flush=True)
219+
print(f"Input Tokens: {inputtokens}", flush=True)
220+
print(f"Output Tokens: {outputtokens}", flush=True)
215221

216222
print("Send response...", flush=True)
217223
r = TextCompletionResponse(
218224
error=None,
219-
response=outputtext
225+
response=outputtext,
226+
in_token=inputtokens,
227+
out_token=outputtokens,
228+
model=str(self.model),
220229
)
221230

222231
self.send(r, properties={"id": id})
@@ -236,6 +245,9 @@ def handle(self, msg):
236245
message = str(e),
237246
),
238247
response=None,
248+
in_token=None,
249+
out_token=None,
250+
model=None,
239251
)
240252

241253
self.producer.send(r, properties={"id": id})
@@ -254,6 +266,9 @@ def handle(self, msg):
254266
message = str(e),
255267
),
256268
response=None,
269+
in_token=None,
270+
out_token=None,
271+
model=None,
257272
)
258273

259274
self.consumer.acknowledge(msg)

trustgraph/model/text_completion/ollama/llm.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Input is prompt, output is response.
55
"""
66

7-
from langchain_community.llms import Ollama
7+
from ollama import Client
88
from prometheus_client import Histogram, Info
99

1010
from .... schema import TextCompletionRequest, TextCompletionResponse, Error
@@ -67,7 +67,8 @@ def __init__(self, **params):
6767
"ollama": ollama,
6868
})
6969

70-
self.llm = Ollama(base_url=ollama, model=model)
70+
self.model = model
71+
self.llm = Client(host=ollama)
7172

7273
def handle(self, msg):
7374

@@ -83,11 +84,16 @@ def handle(self, msg):
8384
try:
8485

8586
with __class__.text_completion_metric.time():
86-
response = self.llm.invoke(prompt)
87+
response = self.llm.generate(self.model, prompt)
8788

89+
response_text = response['response']
8890
print("Send response...", flush=True)
91+
print(response_text, flush=True)
8992

90-
r = TextCompletionResponse(response=response, error=None)
93+
inputtokens = int(response['prompt_eval_count'])
94+
outputtokens = int(response['eval_count'])
95+
96+
r = TextCompletionResponse(response=response_text, error=None, in_token=inputtokens, out_token=outputtokens, model="ollama")
9197

9298
self.send(r, properties={"id": id})
9399

@@ -105,6 +111,9 @@ def handle(self, msg):
105111
message = str(e),
106112
),
107113
response=None,
114+
in_token=None,
115+
out_token=None,
116+
model=None,
108117
)
109118

110119
self.producer.send(r, properties={"id": id})
@@ -123,6 +132,9 @@ def handle(self, msg):
123132
message = str(e),
124133
),
125134
response=None,
135+
in_token=None,
136+
out_token=None,
137+
model=None,
126138
)
127139

128140
self.producer.send(r, properties={"id": id})

trustgraph/schema/models.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
from pulsar.schema import Record, String, Array, Double
2+
from pulsar.schema import Record, String, Array, Double, Integer
33

44
from . topic import topic
55
from . types import Error
@@ -14,6 +14,9 @@ class TextCompletionRequest(Record):
1414
class TextCompletionResponse(Record):
1515
error = Error()
1616
response = String()
17+
in_token = Integer()
18+
out_token = Integer()
19+
model = String()
1720

1821
text_completion_request_queue = topic(
1922
'text-completion', kind='non-persistent', namespace='request'

0 commit comments

Comments
 (0)