1
+ """
2
+ Simple token counter for each LLM response.
3
+ """
4
+
5
+ from prometheus_client import Histogram , Info
6
+ from . pricelist import price_list
7
+
8
+ from .. schema import TextCompletionResponse , Error
9
+ from .. schema import text_completion_response_queue
10
+ from .. log_level import LogLevel
11
+ from .. base import Consumer
12
+
13
+ module = "." .join (__name__ .split ("." )[1 :- 1 ])
14
+
15
+ default_input_queue = text_completion_response_queue
16
+ default_subscriber = module
17
+
18
+
19
+ class Processor (Consumer ):
20
+
21
+ def __init__ (self , ** params ):
22
+
23
+ input_queue = params .get ("input_queue" , default_input_queue )
24
+ subscriber = params .get ("subscriber" , default_subscriber )
25
+
26
+ super (Processor , self ).__init__ (
27
+ ** params | {
28
+ "input_queue" : input_queue ,
29
+ "subscriber" : subscriber ,
30
+ "input_schema" : TextCompletionResponse ,
31
+ }
32
+ )
33
+
34
+ def get_prices (self , prices , modelname ):
35
+ for model in prices ["price_list" ]:
36
+ if model ["model_name" ] == modelname :
37
+ return model ["input_price" ], model ["output_price" ]
38
+ return None , None # Return None if model is not found
39
+
40
+ def handle (self , msg ):
41
+
42
+ v = msg .value ()
43
+ modelname = v .model
44
+
45
+ # Sender-produced ID
46
+ id = msg .properties ()["id" ]
47
+
48
+ print (f"Handling response { id } ..." , flush = True )
49
+
50
+ num_in = v .in_token
51
+ num_out = v .out_token
52
+
53
+ model_input_price , model_output_price = self .get_prices (price_list , modelname )
54
+ cost_in = num_in * model_input_price
55
+ cost_out = num_out * model_output_price
56
+ cost_per_call = cost_in + cost_out
57
+
58
+ print (f"Input Tokens: { num_in } " , flush = True )
59
+ print (f"Output Tokens: { num_out } " , flush = True )
60
+ print (f"Cost for call: ${ cost_per_call :.6f} " , flush = True )
61
+
62
+ @staticmethod
63
+ def add_args (parser ):
64
+
65
+ Consumer .add_args (
66
+ parser , default_input_queue , default_subscriber ,
67
+ )
68
+
69
+ def run ():
70
+
71
+ Processor .start (module , __doc__ )
0 commit comments