Skip to content

Commit 4b894d7

Browse files
authored
Merge pull request #26 from paulparkinson/main
add python realtime speech transcription + select ai nl2sql
2 parents be26153 + 156b1fb commit 4b894d7

File tree

4 files changed

+364
-10
lines changed

4 files changed

+364
-10
lines changed

.gitignore

Lines changed: 163 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -500,16 +500,169 @@ workshops/eventmesh-teq-kafka/.java-version
500500
## Remove Deploy Dir -- K8s Deployment artifacts
501501
workshops/eventmesh-teq-kafka/**/deploy/
502502

503-
## Remove Other Local project stuffs
504-
workshops/eventmesh-teq-kafka/cloud-setup/database/
505-
workshops/eventmesh-teq-kafka/cloud-setup/terraform_dbsystem/
506-
workshops/eventmesh-teq-kafka/wallet/
507-
workshops/oracleAQ/aqJava/.project
508-
workshops/oracleAQ/aqJava/.classpath
509-
workshops/oracleAQ/qJava/.classpath
510-
workshops/oracleAQ/qJava/.factorypath
511-
workshops/oracleAQ/qJava/.project
512-
*.prefs
503+
## Python
504+
# Byte-compiled / optimized / DLL files
505+
__pycache__/
506+
*.py[cod]
507+
*$py.class
508+
509+
# C extensions
510+
*.so
511+
512+
# Distribution / packaging
513+
.Python
514+
build/
515+
develop-eggs/
516+
dist/
517+
downloads/
518+
eggs/
519+
.eggs/
520+
lib/
521+
lib64/
522+
parts/
523+
sdist/
524+
var/
525+
wheels/
526+
share/python-wheels/
527+
*.egg-info/
528+
.installed.cfg
529+
*.egg
530+
MANIFEST
531+
532+
# PyInstaller
533+
# Usually these files are written by a python script from a template
534+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
535+
*.manifest
536+
*.spec
537+
538+
# Installer logs
539+
pip-log.txt
540+
pip-delete-this-directory.txt
541+
542+
# Unit test / coverage reports
543+
htmlcov/
544+
.tox/
545+
.nox/
546+
.coverage
547+
.coverage.*
548+
.cache
549+
nosetests.xml
550+
coverage.xml
551+
*.cover
552+
*.py,cover
553+
.hypothesis/
554+
.pytest_cache/
555+
cover/
556+
557+
# Translations
558+
*.mo
559+
*.pot
560+
561+
# Django stuff:
562+
*.log
563+
local_settings.py
564+
db.sqlite3
565+
db.sqlite3-journal
566+
567+
# Flask stuff:
568+
instance/
569+
.webassets-cache
570+
571+
# Scrapy stuff:
572+
.scrapy
573+
574+
# Sphinx documentation
575+
docs/_build/
576+
577+
# PyBuilder
578+
.pybuilder/
579+
target/
580+
581+
# Jupyter Notebook
582+
.ipynb_checkpoints
583+
584+
# IPython
585+
profile_default/
586+
ipython_config.py
587+
588+
# pyenv
589+
# For a library or package, you might want to ignore these files since the code is
590+
# intended to run in multiple environments; otherwise, check them in:
591+
# .python-version
592+
593+
# pipenv
594+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
595+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
596+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
597+
# install all needed dependencies.
598+
#Pipfile.lock
599+
600+
# poetry
601+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
602+
# This is especially recommended for binary packages to ensure reproducibility, and is more
603+
# commonly ignored for libraries.
604+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
605+
#poetry.lock
606+
607+
# pdm
608+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
609+
#pdm.lock
610+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
611+
# in version control.
612+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
613+
.pdm.toml
614+
.pdm-python
615+
.pdm-build/
616+
617+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
618+
__pypackages__/
619+
620+
# Celery stuff
621+
celerybeat-schedule
622+
celerybeat.pid
623+
624+
# SageMath parsed files
625+
*.sage.py
626+
627+
# Environments
628+
.env
629+
.venv
630+
env/
631+
venv/
632+
ENV/
633+
env.bak/
634+
venv.bak/
635+
636+
# Spyder project settings
637+
.spyderproject
638+
.spyproject
639+
640+
# Rope project settings
641+
.ropeproject
642+
643+
# mkdocs documentation
644+
/site
645+
646+
# mypy
647+
.mypy_cache/
648+
.dmypy.json
649+
dmypy.json
650+
651+
# Pyre type checker
652+
.pyre/
653+
654+
# pytype static type analyzer
655+
.pytype/
656+
657+
# Cython debug symbols
658+
cython_debug/
659+
660+
# PyCharm
661+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
662+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
663+
# and can be added to the global gitignore or merged into this file. For a more nuclear
664+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
665+
.idea/
513666

514667
# Flutter/Dart plugins
515668
.flutter*
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
For details see the Develop with Oracle AI and Database Services: Gen, Vision, Speech, Language, and OML
2+
located here: https://livelabs.oracle.com/pls/apex/r/dbpm/livelabs/view-workshop?wid=3874
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
https://artifactory.oci.oraclecorp.com/ocas-service-platform-dev-pypi-local/oci.ai-speech-realtime/0.2.0/oci.ai_speech_realtime-0.2.0-py2.py3-none-any.whl
2+
https://artifactory.oci.oraclecorp.com/global-dev-pypi/oci-2.129.1+preview.1.1805-py3-none-any.whl
3+
PyAudio>=0.2.14
4+
websockets==11.0.3; python_version >= "3.7"
5+
websockets==9.1; python_version == "3.6"
6+
oracledb
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import asyncio
2+
import getpass
3+
4+
import pyaudio
5+
import oracledb
6+
import oci
7+
from oci.config import from_file
8+
from oci.auth.signers.security_token_signer import SecurityTokenSigner
9+
from oci.ai_speech_realtime import (
10+
RealtimeClient,
11+
RealtimeClientListener,
12+
RealtimeParameters,
13+
)
14+
15+
pw = getpass.getpass("Enter database user password:")
16+
17+
# Use this when making a connection with a wallet
18+
connection = oracledb.connect(
19+
user="moviestream",
20+
password=pw,
21+
dsn="selectaidb_high",
22+
config_dir="/Users/pparkins/Downloads/Wallet_SelectAIDB",
23+
wallet_location="/Users/pparkins/Downloads/Wallet_SelectAIDB"
24+
)
25+
print("Successfully connected to Oracle Database")
26+
print(f"Connection details: {connection}")
27+
28+
# Create a FIFO queue
29+
queue = asyncio.Queue()
30+
31+
# Set audio parameters
32+
SAMPLE_RATE = 16000
33+
FORMAT = pyaudio.paInt16
34+
CHANNELS = 1
35+
BUFFER_DURATION_MS = 96
36+
37+
# Calculate the number of frames per buffer
38+
FRAMES_PER_BUFFER = int(SAMPLE_RATE * BUFFER_DURATION_MS / 1000)
39+
40+
# Variables to keep track of results and state
41+
cummulativeResult = ""
42+
isSelect = False
43+
last_result_time = None
44+
45+
def authenticator():
46+
config = from_file("~/.oci/config", "paulspeechai")
47+
with open(config["security_token_file"], "r") as f:
48+
token = f.readline()
49+
private_key = oci.signer.load_private_key_from_file(config["key_file"])
50+
auth = SecurityTokenSigner(token=token, private_key=private_key)
51+
return auth
52+
53+
def audio_callback(in_data, frame_count, time_info, status):
54+
# This function will be called by PyAudio when there's new audio data
55+
queue.put_nowait(in_data)
56+
return (None, pyaudio.paContinue)
57+
58+
p = pyaudio.PyAudio()
59+
60+
# Open the stream
61+
stream = p.open(
62+
format=FORMAT,
63+
channels=CHANNELS,
64+
rate=SAMPLE_RATE,
65+
input=True,
66+
frames_per_buffer=FRAMES_PER_BUFFER,
67+
stream_callback=audio_callback,
68+
)
69+
70+
stream.start_stream()
71+
config = from_file()
72+
73+
async def send_audio(client):
74+
while True:
75+
data = await queue.get()
76+
# Send it over the websocket
77+
await client.send_data(data)
78+
79+
class SpeechListener(RealtimeClientListener):
80+
def on_result(self, result):
81+
global cummulativeResult, isSelect, last_result_time
82+
if result["transcriptions"][0]["isFinal"]:
83+
transcription = result['transcriptions'][0]['transcription']
84+
cummulativeResult += transcription
85+
print(f"Received final results: {transcription}")
86+
print(f"Current cummulative result: {cummulativeResult}")
87+
if cummulativeResult.lower().startswith("select ai"):
88+
isSelect = True
89+
last_result_time = asyncio.get_event_loop().time()
90+
else:
91+
print(f"Received partial results: {result['transcriptions'][0]['transcription']}")
92+
93+
def on_ack_message(self, ackmessage):
94+
return super().on_ack_message(ackmessage)
95+
96+
def on_connect(self):
97+
return super().on_connect()
98+
99+
def on_connect_message(self, connectmessage):
100+
return super().on_connect_message(connectmessage)
101+
102+
def on_network_event(self, ackmessage):
103+
return super().on_network_event(ackmessage)
104+
105+
def on_error(self):
106+
return super().on_error()
107+
108+
async def check_idle():
109+
global last_result_time, isSelect
110+
while True:
111+
if isSelect and last_result_time and (asyncio.get_event_loop().time() - last_result_time > 2):
112+
executeSelectAI()
113+
isSelect = False
114+
await asyncio.sleep(1)
115+
116+
def executeSelectAI():
117+
global cummulativeResult
118+
print(f"executeSelectAI called cummulative result: {cummulativeResult}")
119+
# for example prompt => 'select ai I am looking for the top 5 selling movies for the latest month please',
120+
query = """SELECT DBMS_CLOUD_AI.GENERATE(
121+
prompt => :prompt,
122+
profile_name => 'openai_gpt35',
123+
action => 'narrate')
124+
FROM dual"""
125+
with connection.cursor() as cursor:
126+
cursor.execute(query, prompt=cummulativeResult)
127+
result = cursor.fetchone()
128+
if result and isinstance(result[0], oracledb.LOB):
129+
text_result = result[0].read()
130+
print(text_result)
131+
else:
132+
print(result)
133+
# Reset cumulativeResult after execution
134+
cummulativeResult = ""
135+
136+
137+
# logic such as the following could be added to make the app further dynamic as far as action type...
138+
# actionValue = 'narrate'
139+
# if cummulativeResult.lower().startswith("select ai narrate"):
140+
# actionValue = "narrate"
141+
# elif cummulativeResult.lower().startswith("select ai chat"):
142+
# actionValue = "chat"
143+
# elif cummulativeResult.lower().startswith("select ai showsql"):
144+
# actionValue = "showsql"
145+
# elif cummulativeResult.lower().startswith("select ai show sql"):
146+
# actionValue = "showsql"
147+
# elif cummulativeResult.lower().startswith("select ai runsql"):
148+
# actionValue = "runsql"
149+
# elif cummulativeResult.lower().startswith("select ai run sql"):
150+
# actionValue = "runsql"
151+
# # Note that "runsql" is not currently supported as action value
152+
# query = """SELECT DBMS_CLOUD_AI.GENERATE(
153+
# prompt => :prompt,
154+
# profile_name => 'openai_gpt35',
155+
# action => :actionValue)
156+
# FROM dual"""
157+
158+
if __name__ == "__main__":
159+
# Run the event loop
160+
def message_callback(message):
161+
print(f"Received message: {message}")
162+
163+
realtime_speech_parameters: RealtimeParameters = RealtimeParameters()
164+
realtime_speech_parameters.language_code = "en-US"
165+
realtime_speech_parameters.model_domain = (
166+
realtime_speech_parameters.MODEL_DOMAIN_GENERIC
167+
)
168+
realtime_speech_parameters.partial_silence_threshold_in_ms = 0
169+
realtime_speech_parameters.final_silence_threshold_in_ms = 2000
170+
realtime_speech_parameters.should_ignore_invalid_customizations = False
171+
realtime_speech_parameters.stabilize_partial_results = (
172+
realtime_speech_parameters.STABILIZE_PARTIAL_RESULTS_NONE
173+
)
174+
175+
realtime_speech_url = "wss://realtime.aiservice.us-phoenix-1.oci.oraclecloud.com"
176+
client = RealtimeClient(
177+
config=config,
178+
realtime_speech_parameters=realtime_speech_parameters,
179+
listener=SpeechListener(),
180+
service_endpoint=realtime_speech_url,
181+
signer=authenticator(),
182+
compartment_id="ocid1.compartment.oc1..MYCOMPARMENTID",
183+
)
184+
185+
loop = asyncio.get_event_loop()
186+
loop.create_task(send_audio(client))
187+
loop.create_task(check_idle())
188+
loop.run_until_complete(client.connect())
189+
190+
if stream.is_active():
191+
stream.close()
192+
193+
print("Closed now")

0 commit comments

Comments
 (0)