Skip to content

Java and GCP Speech AIHolo impl and fixes to Python example #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
534d0df
new aiholo speech impl
paulparkinson Mar 3, 2025
54e32d4
new aiholo speech impl
paulparkinson Mar 3, 2025
6599a6e
basic html updates, add pics
paulparkinson Mar 4, 2025
ba90526
new aiholo speech impl
paulparkinson Mar 5, 2025
330bae4
default to http vs https
paulparkinson Mar 5, 2025
9f5f92f
Merge remote-tracking branch 'origin/main'
paulparkinson Mar 5, 2025
0270c86
image rename, revert to SSL for quick test...
paulparkinson Mar 5, 2025
ab328c0
Merge remote-tracking branch 'origin/main'
paulparkinson Mar 5, 2025
d49ce30
revert AIHolo.html flags and remove SSL default
paulparkinson Mar 5, 2025
1c76cc1
revert AIHolo.html flags and remove SSL default
paulparkinson Mar 5, 2025
2d4993f
prune old code, add multi-language support html and login/security
paulparkinson Mar 6, 2025
d170c24
prune old code, add multi-language support html and login/security
paulparkinson Mar 6, 2025
3092a13
prune old code, add multi-language support html and login/security
paulparkinson Mar 6, 2025
36a6ff9
multi-language support
paulparkinson Mar 6, 2025
5de88c8
multi-language support
paulparkinson Mar 6, 2025
100b08d
multi-language support
paulparkinson Mar 6, 2025
5dbe2e3
multi-language support
paulparkinson Mar 6, 2025
1316d32
multi-language support
paulparkinson Mar 6, 2025
12bd40e
multi-language support
paulparkinson Mar 6, 2025
d9c4717
multi-language support
paulparkinson Mar 6, 2025
bb81f93
multi-language support
paulparkinson Mar 6, 2025
036363c
multi-language support
paulparkinson Mar 6, 2025
9c58415
multi-language support
paulparkinson Mar 6, 2025
66d28b3
multi-language support
paulparkinson Mar 6, 2025
e491316
multi-language support
paulparkinson Mar 6, 2025
4c2a3da
multi-language support
paulparkinson Mar 6, 2025
23acbcf
multi-language support
paulparkinson Mar 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions java-ai/auth_and_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

# token itself expires after 1 hour, but it is automatically refreshed as long as the stored credentials remain valid.
# provides long-lived authentication (~1 week) via Application Default Credentials (ADC).
gcloud auth application-default login
mvn clean package
java -jar .\target\oracleai-0.0.1-SNAPSHOT.jar
129 changes: 128 additions & 1 deletion java-ai/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,29 @@

<properties>
<oci.sdk.version>3.52.1</oci.sdk.version>
<oracle.jdbc.version>21.7.0.0</oracle.jdbc.version>
</properties>


<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>libraries-bom</artifactId>
<version>26.32.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>








<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
Expand All @@ -40,7 +61,7 @@
<dependency>
<groupId>com.oracle.cloud.spring</groupId>
<artifactId>spring-cloud-oci-starter</artifactId>
<version>1.3.0</version>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.oracle.oci.sdk</groupId>
Expand Down Expand Up @@ -99,6 +120,112 @@
<artifactId>service</artifactId>
<version>0.12.0</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
</dependency>


<!-- <dependency>
<groupId>com.oracle.database.spring</groupId>
<artifactId>oracle-spring-boot-starter-ucp</artifactId>
<version>23.4.0</version>
</dependency> -->
<!-- <dependency>
<groupId>com.oracle.database.spring</groupId>
<artifactId>oracle-spring-boot-starter-wallet</artifactId>
<version>23.4.0</version>
</dependency> -->

<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>${oracle.jdbc.version}</version>
</dependency>
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ucp</artifactId>
<version>${oracle.jdbc.version}</version>
</dependency>
<dependency>
<groupId>com.oracle.database.security</groupId>
<artifactId>oraclepki</artifactId>
<version>${oracle.jdbc.version}</version>
</dependency>
<dependency>
<groupId>com.oracle.database.security</groupId>
<artifactId>osdt_core</artifactId>
<version>${oracle.jdbc.version}</version>
</dependency>
<dependency>
<groupId>com.oracle.database.security</groupId>
<artifactId>osdt_cert</artifactId>
<version>${oracle.jdbc.version}</version>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-texttospeech</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
</dependency>
<dependency>
<groupId>net.sourceforge.argparse4j</groupId>
<artifactId>argparse4j</artifactId>
<version>0.9.0</version>
</dependency>


<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-websocket</artifactId>
</dependency>


<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.6.0</version>
</dependency>

<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
<version>1.18.0</version>
</dependency>

<dependency>
<groupId>jakarta.websocket</groupId>
<artifactId>jakarta.websocket-api</artifactId>
<version>2.2.0</version>
<!-- <version>2.1.1</version> -->
</dependency>

<dependency>
<groupId>org.apache.tomcat.embed</groupId>
<artifactId>tomcat-embed-websocket</artifactId>
</dependency>

<!-- <dependency>
<groupId>org.apache.tomcat</groupId>
<artifactId>tomcat-websocket</artifactId>
<version>10.1.14</version>
</dependency> -->
<dependency>
<groupId>org.glassfish.tyrus</groupId>
<artifactId>tyrus-server</artifactId>
<version>2.1.3</version>
</dependency>

<dependency>
<groupId>org.glassfish.tyrus</groupId>
<artifactId>tyrus-container-servlet</artifactId>
<version>2.1.3</version>
</dependency>

</dependencies>
<build>
<plugins>
Expand Down
11 changes: 11 additions & 0 deletions java-ai/src/main/java/oracleai/BinaryServerConfigurator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package oracleai;

import jakarta.websocket.server.ServerEndpointConfig;

public class BinaryServerConfigurator extends ServerEndpointConfig.Configurator {
@Override
public boolean checkOrigin(String originHeaderValue) {
System.out.println("✅ WebSocket checkOrigin originHeaderValue: " + originHeaderValue);
return true; // Allow all origins for WebSocket
}
}
22 changes: 0 additions & 22 deletions java-ai/src/main/java/oracleai/EchoController.java

This file was deleted.

12 changes: 12 additions & 0 deletions java-ai/src/main/java/oracleai/SpeechWebSocketConfigurator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package oracleai;

import jakarta.websocket.server.HandshakeRequest;
import jakarta.websocket.HandshakeResponse;
import jakarta.websocket.server.ServerEndpointConfig;

public class SpeechWebSocketConfigurator extends ServerEndpointConfig.Configurator {
@Override
public void modifyHandshake(ServerEndpointConfig sec, HandshakeRequest request, HandshakeResponse response) {
sec.getUserProperties().put("org.apache.tomcat.websocket.binaryBufferSize", 1024 * 1024); // Enable binary message support
}
}
160 changes: 160 additions & 0 deletions java-ai/src/main/java/oracleai/SpeechWebSocketServer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
package oracleai;

import com.google.api.gax.rpc.BidiStreamingCallable;
import com.google.api.gax.rpc.ApiStreamObserver;
import com.google.cloud.speech.v1.*;
import com.google.protobuf.ByteString;

import jakarta.websocket.*;
import jakarta.websocket.server.ServerEndpoint;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import javax.sound.sampled.*;

import org.springframework.stereotype.Component;

@ServerEndpoint(value = "/speech", configurator = SpeechWebSocketConfigurator.class)
@Component
public class SpeechWebSocketServer {
private static final ScheduledExecutorService executorService = Executors.newSingleThreadScheduledExecutor();
private static SpeechClient speechClient;
private ApiStreamObserver<StreamingRecognizeRequest> requestObserver;

static {
try {
speechClient = SpeechClient.create();
} catch (IOException e) {
throw new RuntimeException("❌ Failed to initialize SpeechClient", e);
}
}

@OnOpen
public void onOpen(Session session) {
System.out.println("✅ WebSocket Connected: " + session.getId());
session.setMaxBinaryMessageBufferSize(1024 * 1024); // Allow large audio messages

ApiStreamObserver<StreamingRecognizeResponse> responseObserver = new ApiStreamObserver<>() {
@Override
public void onNext(StreamingRecognizeResponse response) {
for (StreamingRecognitionResult result : response.getResultsList()) {
if (result.getAlternativesCount() > 0) {
String transcript = result.getAlternatives(0).getTranscript().trim();
if (!transcript.isEmpty()) {
System.out.println("📝 Transcription: " + transcript);
try {
session.getBasicRemote().sendText(transcript);
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}

@Override
public void onError(Throwable t) {
System.err.println("❌ Google API Error: " + t.getMessage());
}

@Override
public void onCompleted() {
System.out.println("✅ Streaming completed.");
}
};

// Initialize Streaming to Google Speech API
BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
speechClient.streamingRecognizeCallable();
requestObserver = callable.bidiStreamingCall(responseObserver);

requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(StreamingRecognitionConfig.newBuilder()
.setConfig(RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setSampleRateHertz(16000)
.setLanguageCode("en-US")
.setAudioChannelCount(1)
.setEnableAutomaticPunctuation(true)
.build())
.setInterimResults(true)
.setSingleUtterance(false)
.build())
.build());
}

/**
* 🔹 **Handles Incoming Binary Audio Data (From WebSocket)**
* This method now **reads a WAV file** instead of processing real-time audio streaming.
*/
@OnMessage
public void onMessage(Session session) {
String filePath = "C:/Users/opc/Downloads/audio_logs/sample.wav"; // Change to your WAV file path
byte[] audioBytes;

try {
audioBytes = readWavFile(filePath);
if (audioBytes == null || audioBytes.length == 0) {
System.out.println("⚠️ WAV file is empty or could not be read.");
return;
}
} catch (IOException | UnsupportedAudioFileException e) {
System.err.println("❌ Error reading WAV file: " + e.getMessage());
return;
}

System.out.println("✅ Sending Audio Data from WAV file: " + audioBytes.length + " bytes");

if (requestObserver != null) {
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(audioBytes))
.build());
}
}

@OnClose
public void onClose(Session session) {
System.out.println("🔴 WebSocket Closed: " + session.getId());
if (requestObserver != null) {
requestObserver.onCompleted();
}
}

@OnError
public void onError(Session session, Throwable throwable) {
System.err.println("⚠️ WebSocket error: " + throwable.getMessage());
}

/**
* **🔹 Reads WAV File and Extracts PCM Data**
* - Converts **WAV file** to **raw PCM data**.
* - Ensures it is in the **correct format** (16-bit mono PCM).
*/
private byte[] readWavFile(String filePath) throws IOException, UnsupportedAudioFileException {
File file = new File(filePath);
AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
AudioFormat format = audioInputStream.getFormat();

System.out.println("🎵 WAV File Format: " + format);

// Convert to PCM Signed if necessary
if (format.getEncoding() != AudioFormat.Encoding.PCM_SIGNED) {
AudioFormat pcmFormat = new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
format.getSampleRate(),
16, // Force 16-bit audio
format.getChannels(),
format.getChannels() * 2,
format.getSampleRate(),
false // Little-endian
);
audioInputStream = AudioSystem.getAudioInputStream(pcmFormat, audioInputStream);
}

// Read raw audio data
byte[] audioBytes = audioInputStream.readAllBytes();
audioInputStream.close();
return audioBytes;
}
}
21 changes: 13 additions & 8 deletions java-ai/src/main/java/oracleai/WebConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;

@Configuration
public class WebConfig implements WebMvcConfigurer {
@Override
public void addCorsMappings(CorsRegistry registry) {
registry.addMapping("/**") // This will apply to all routes
.allowedOrigins("https://130.61.51.75:4884") // Allow this origin
.allowedMethods("GET", "POST", "PUT", "DELETE") // Allowed methods
.allowedHeaders("*") // Allowed headers
.allowCredentials(true); // Allow credentials
public class WebConfig {
@Bean
public WebMvcConfigurer corsConfigurer() {
return new WebMvcConfigurer() {
@Override
public void addCorsMappings(CorsRegistry registry) {
registry.addMapping("/**") // Apply to all endpoints
.allowedOriginPatterns("*") // ✅ Use allowedOriginPatterns instead of "*"
.allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS")
.allowedHeaders("*")
.allowCredentials(true); // ✅ Keep credentials enabled
}
};
}
}
Loading