Skip to content

Commit a21865f

Browse files
committed
add python realtime speech transcription + select ai nl2sql
1 parent 3a5cc03 commit a21865f

File tree

3 files changed

+195
-164
lines changed

3 files changed

+195
-164
lines changed

python-realtimespeech-selectai/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ https://artifactory.oci.oraclecorp.com/ocas-service-platform-dev-pypi-local/oci.
22
https://artifactory.oci.oraclecorp.com/global-dev-pypi/oci-2.129.1+preview.1.1805-py3-none-any.whl
33
PyAudio>=0.2.14
44
websockets==11.0.3; python_version >= "3.7"
5-
websockets==9.1; python_version == "3.6"
5+
websockets==9.1; python_version == "3.6"
6+
oracledb

python-realtimespeech-selectai/src/RealtimeClient.py

Lines changed: 0 additions & 163 deletions
This file was deleted.
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import asyncio
2+
import getpass
3+
4+
import pyaudio
5+
import oracledb
6+
import oci
7+
from oci.config import from_file
8+
from oci.auth.signers.security_token_signer import SecurityTokenSigner
9+
from oci.ai_speech_realtime import (
10+
RealtimeClient,
11+
RealtimeClientListener,
12+
RealtimeParameters,
13+
)
14+
15+
pw = getpass.getpass("Enter database user password:")
16+
17+
# Use this when making a connection with a wallet
18+
connection = oracledb.connect(
19+
user="moviestream",
20+
password=pw,
21+
dsn="selectaidb_high",
22+
config_dir="/Users/pparkins/Downloads/Wallet_SelectAIDB",
23+
wallet_location="/Users/pparkins/Downloads/Wallet_SelectAIDB"
24+
)
25+
print("Successfully connected to Oracle Database")
26+
print(f"Connection details: {connection}")
27+
28+
# Create a FIFO queue
29+
queue = asyncio.Queue()
30+
31+
# Set audio parameters
32+
SAMPLE_RATE = 16000
33+
FORMAT = pyaudio.paInt16
34+
CHANNELS = 1
35+
BUFFER_DURATION_MS = 96
36+
37+
# Calculate the number of frames per buffer
38+
FRAMES_PER_BUFFER = int(SAMPLE_RATE * BUFFER_DURATION_MS / 1000)
39+
40+
# Variables to keep track of results and state
41+
cummulativeResult = ""
42+
isSelect = False
43+
last_result_time = None
44+
45+
def authenticator():
46+
config = from_file("~/.oci/config", "paulspeechai")
47+
with open(config["security_token_file"], "r") as f:
48+
token = f.readline()
49+
private_key = oci.signer.load_private_key_from_file(config["key_file"])
50+
auth = SecurityTokenSigner(token=token, private_key=private_key)
51+
return auth
52+
53+
def audio_callback(in_data, frame_count, time_info, status):
54+
# This function will be called by PyAudio when there's new audio data
55+
queue.put_nowait(in_data)
56+
return (None, pyaudio.paContinue)
57+
58+
p = pyaudio.PyAudio()
59+
60+
# Open the stream
61+
stream = p.open(
62+
format=FORMAT,
63+
channels=CHANNELS,
64+
rate=SAMPLE_RATE,
65+
input=True,
66+
frames_per_buffer=FRAMES_PER_BUFFER,
67+
stream_callback=audio_callback,
68+
)
69+
70+
stream.start_stream()
71+
config = from_file()
72+
73+
async def send_audio(client):
74+
while True:
75+
data = await queue.get()
76+
# Send it over the websocket
77+
await client.send_data(data)
78+
79+
class SpeechListener(RealtimeClientListener):
80+
def on_result(self, result):
81+
global cummulativeResult, isSelect, last_result_time
82+
if result["transcriptions"][0]["isFinal"]:
83+
transcription = result['transcriptions'][0]['transcription']
84+
cummulativeResult += transcription
85+
print(f"Received final results: {transcription}")
86+
print(f"Current cummulative result: {cummulativeResult}")
87+
if cummulativeResult.lower().startswith("select ai"):
88+
isSelect = True
89+
last_result_time = asyncio.get_event_loop().time()
90+
else:
91+
print(f"Received partial results: {result['transcriptions'][0]['transcription']}")
92+
93+
def on_ack_message(self, ackmessage):
94+
return super().on_ack_message(ackmessage)
95+
96+
def on_connect(self):
97+
return super().on_connect()
98+
99+
def on_connect_message(self, connectmessage):
100+
return super().on_connect_message(connectmessage)
101+
102+
def on_network_event(self, ackmessage):
103+
return super().on_network_event(ackmessage)
104+
105+
def on_error(self):
106+
return super().on_error()
107+
108+
async def check_idle():
109+
global last_result_time, isSelect
110+
while True:
111+
if isSelect and last_result_time and (asyncio.get_event_loop().time() - last_result_time > 2):
112+
executeSelectAI()
113+
isSelect = False
114+
await asyncio.sleep(1)
115+
116+
def executeSelectAI():
117+
global cummulativeResult
118+
print(f"executeSelectAI called cummulative result: {cummulativeResult}")
119+
# for example prompt => 'select ai I am looking for the top 5 selling movies for the latest month please',
120+
query = """SELECT DBMS_CLOUD_AI.GENERATE(
121+
prompt => :prompt,
122+
profile_name => 'openai_gpt35',
123+
action => 'narrate')
124+
FROM dual"""
125+
with connection.cursor() as cursor:
126+
cursor.execute(query, prompt=cummulativeResult)
127+
result = cursor.fetchone()
128+
if result and isinstance(result[0], oracledb.LOB):
129+
text_result = result[0].read()
130+
print(text_result)
131+
else:
132+
print(result)
133+
# Reset cumulativeResult after execution
134+
cummulativeResult = ""
135+
136+
137+
# logic such as the following could be added to make the app further dynamic as far as action type...
138+
# actionValue = 'narrate'
139+
# if cummulativeResult.lower().startswith("select ai narrate"):
140+
# actionValue = "narrate"
141+
# elif cummulativeResult.lower().startswith("select ai chat"):
142+
# actionValue = "chat"
143+
# elif cummulativeResult.lower().startswith("select ai showsql"):
144+
# actionValue = "showsql"
145+
# elif cummulativeResult.lower().startswith("select ai show sql"):
146+
# actionValue = "showsql"
147+
# elif cummulativeResult.lower().startswith("select ai runsql"):
148+
# actionValue = "runsql"
149+
# elif cummulativeResult.lower().startswith("select ai run sql"):
150+
# actionValue = "runsql"
151+
# # Note that "runsql" is not currently supported as action value
152+
# query = """SELECT DBMS_CLOUD_AI.GENERATE(
153+
# prompt => :prompt,
154+
# profile_name => 'openai_gpt35',
155+
# action => :actionValue)
156+
# FROM dual"""
157+
158+
if __name__ == "__main__":
159+
# Run the event loop
160+
def message_callback(message):
161+
print(f"Received message: {message}")
162+
163+
realtime_speech_parameters: RealtimeParameters = RealtimeParameters()
164+
realtime_speech_parameters.language_code = "en-US"
165+
realtime_speech_parameters.model_domain = (
166+
realtime_speech_parameters.MODEL_DOMAIN_GENERIC
167+
)
168+
realtime_speech_parameters.partial_silence_threshold_in_ms = 0
169+
realtime_speech_parameters.final_silence_threshold_in_ms = 2000
170+
realtime_speech_parameters.should_ignore_invalid_customizations = False
171+
realtime_speech_parameters.stabilize_partial_results = (
172+
realtime_speech_parameters.STABILIZE_PARTIAL_RESULTS_NONE
173+
)
174+
175+
realtime_speech_url = "wss://realtime.aiservice.us-phoenix-1.oci.oraclecloud.com"
176+
client = RealtimeClient(
177+
config=config,
178+
realtime_speech_parameters=realtime_speech_parameters,
179+
listener=SpeechListener(),
180+
service_endpoint=realtime_speech_url,
181+
signer=authenticator(),
182+
compartment_id="ocid1.compartment.oc1..MYCOMPARMENTID",
183+
)
184+
185+
loop = asyncio.get_event_loop()
186+
loop.create_task(send_audio(client))
187+
loop.create_task(check_idle())
188+
loop.run_until_complete(client.connect())
189+
190+
if stream.is_active():
191+
stream.close()
192+
193+
print("Closed now")

0 commit comments

Comments
 (0)