diff --git a/README.md b/README.md index f90c8213..25d292ad 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,8 @@ MISSING MISSING ## Notes/Issues -MISSING +Spring Boot 3.0 requires Java 17 as a minimum version. + ## URLs https://apexapps.oracle.com/pls/apex/r/dbpm/livelabs/view-workshop?wid=3874 diff --git a/pom.xml b/pom.xml index b212ebd0..b7f7df05 100644 --- a/pom.xml +++ b/pom.xml @@ -1,11 +1,11 @@ + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 org.springframework.boot spring-boot-starter-parent - 3.1.4 + 3.3.1 oracleai @@ -15,26 +15,13 @@ Oracle AI Demos - 2021.0.5 - 21.7.0.0 - 3.1.2 - 1.31 - 3.1.1 - 3.29.0 - 3.3.1 - 4.17.2 + 3.44.2 org.springframework.boot spring-boot-starter-web - - - org.yaml - snakeyaml - - org.springframework.boot @@ -45,11 +32,10 @@ json 20231013 - - com.oracle.oci.sdk - oci-java-sdk-common - ${oci.sdk.version} + com.oracle.cloud.spring + spring-cloud-oci-starter + 1.0.0 com.oracle.oci.sdk @@ -59,7 +45,7 @@ com.oracle.oci.sdk oci-java-sdk-generativeaiinference - 3.32.1 + ${oci.sdk.version} com.oracle.oci.sdk @@ -87,20 +73,6 @@ slf4j-simple 2.0.6 - - org.springframework.boot - spring-boot-starter-web - ${spring.boot.version} - - - org.springframework.boot - spring-boot-starter-test - ${spring.boot.version} - test - - - - javax.xml.bind jaxb-api @@ -122,18 +94,7 @@ service 0.12.0 - - - - - org.springframework.cloud - spring-cloud-dependencies - ${spring-cloud.version} - pom - import - - - + diff --git a/python-rag-chatbot/README.md b/python-rag-chatbot/README.md new file mode 100644 index 00000000..0fd7a639 --- /dev/null +++ b/python-rag-chatbot/README.md @@ -0,0 +1,52 @@ +# Integrating Oracle Database 23ai RAG and OCI Generative AI with LangChain + +[**Oracle Database 23ai**](https://www.oracle.com/database/free-1/) + +[**OCI GenAI**](https://www.oracle.com/artificial-intelligence/generative-ai/large-language-models/) + +[**LangChain**](https://www.langchain.com/) + + +## TODO instructions + +- setup ~/.oci/config +- set yourcompartmentid +- podman run -d --name 23ai -p 1521:1521 -e ORACLE_PWD=Welcome12345 -v oracle-volume:/Users/pparkins/oradata container-registry.oracle.com/database/free:latest +- create/config vector tablespace and user +- add oracle database info for use in init_rag_streamlit.py / init_rag_streamlit_exp.py +- run run_oracle_bot.sh /run_oracle_bot_exp.sh + + +## Documentation +The development of the proposed integration is based on the example, from LangChain, provided [here](https://python.langchain.com/docs/modules/model_io/models/llms/custom_llm) + +## Features +* How-to build a complete, end-2-end RAG solution using LangChain and Oracle GenAI Service. +* How-to load multiple pdf +* How-to split pdf pages in smaller chuncks +* How-to do semantic search using Embeddings +* How-to use Cohere Embeddings +* How-to use HF Embeddings +* How-to setup a Retriever using Embeddings +* How-to add Cohere reranker to the chain +* How to integrate OCI GenAI Service with LangChain +* How to define the LangChain +* How to use the Oracle vector Db capabilities +* How to use in-memory database capability + +## Oracle BOT +Using the script [run_oracle_bot_exp.sh](run_oracle_bot_exp.sh) you can launch a simple ChatBot that showcase Oracle GenAI service. The demo is based on docs from Oracle Database pdf documentation. + +You need to put in the local directory: +* Trobleshooting.pdf +* globally-distributed-autonomous-database.pdf +* Oracle True cache.pdf +* oracle-database-23c.pdf +* oracle-globally-distributed-database-guide.pdf +* sharding-adg-addshard-cookbook-3610618.pdf + +You can add more pdf. Edit [config_rag.py](config_rag.py) + + + + diff --git a/python-rag-chatbot/config_rag.py b/python-rag-chatbot/config_rag.py new file mode 100644 index 00000000..cf019ee8 --- /dev/null +++ b/python-rag-chatbot/config_rag.py @@ -0,0 +1,71 @@ +# configurations for the RAG + +# to enable debugging info.. +DEBUG = False + +# book to use for augmentation +# BOOK1 = "APISpec.pdf" +BOOK1 = "pdfFiles/sharding-adg-addshard-cookbook-3610618.pdf" +BOOK2 = "pdfFiles/globally-distributed-autonomous-database.pdf" +# BOOK4 = "OnBoardingGuide.pdf" +# BOOK5 = "CreateWorkFlow.pdf" +# BOOK6 = "Team Onboarding.pdf" +# BOOK7 = "workflow.pdf" +BOOK3 = "pdfFiles/oracle-database-23c.pdf" +BOOK4 = "pdfFiles/oracle-globally-distributed-database-guide.pdf" +BOOK5 = "pdfFiles/Oracle True cache.pdf" +BOOK6 = "pdfFiles/Trobleshooting.pdf" +# BOOK12 = "OsdCode.pdf" + +BOOK_LIST = [BOOK1, BOOK2, BOOK3, BOOK4, BOOK5, BOOK6] + + +# to divide docs in chunks +CHUNK_SIZE = 1000 +CHUNK_OVERLAP = 50 + + +# +# Vector Store (Chrome or FAISS) +# +# VECTOR_STORE_NAME = "FAISS" +# VECTOR_STORE_NAME = "ORACLEDB" +VECTOR_STORE_NAME = "CHROME" + + +# type of Embedding Model. The choice has been parametrized +# Local means HF +EMBED_TYPE = "LOCAL" +# see: https://huggingface.co/spaces/mteb/leaderboard +# see also: https://github.com/FlagOpen/FlagEmbedding +# base seems to work better than small +# EMBED_HF_MODEL_NAME = "BAAI/bge-base-en-v1.5" +# EMBED_HF_MODEL_NAME = "BAAI/bge-small-en-v1.5" +EMBED_HF_MODEL_NAME = "BAAI/bge-large-en-v1.5" + +# Cohere means the embed model from Cohere site API +# EMBED_TYPE = "COHERE" +EMBED_COHERE_MODEL_NAME = "embed-english-v3.0" + +# number of docs to return from Retriever +MAX_DOCS_RETRIEVED = 6 + +# to add Cohere reranker to the QA chain +ADD_RERANKER = False + +# +# LLM Config +# +# LLM_TYPE = "COHERE" +LLM_TYPE = "OCI" + +# max tokens returned from LLM for single query +MAX_TOKENS = 1000 +# to avoid "creativity" +TEMPERATURE = 0 + +# +# OCI GenAI configs +# +TIMEOUT = 30 +ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com" diff --git a/python-rag-chatbot/copy.txt b/python-rag-chatbot/copy.txt new file mode 100644 index 00000000..6fa79f90 --- /dev/null +++ b/python-rag-chatbot/copy.txt @@ -0,0 +1 @@ +feedback_0 diff --git a/python-rag-chatbot/init_rag.py b/python-rag-chatbot/init_rag.py new file mode 100644 index 00000000..f31d9d07 --- /dev/null +++ b/python-rag-chatbot/init_rag.py @@ -0,0 +1,193 @@ +# +# This one is to be used in Notebooks +# + +# for pdf post processing +import re + +import cohere +# modified to load from Pdf +from langchain.document_loaders import PyPDFLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +# two possible vector store +from langchain.vectorstores import Chroma +from langchain.vectorstores import FAISS + +from langchain.schema.runnable import RunnablePassthrough + +# removed OpenAI, using Cohere embeddings +from langchain.embeddings import CohereEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings + +from langchain import hub + +import oci + +from langchain.llms import Cohere +from langchain_community.llms.oci_generative_ai import OCIGenAI +from oci.generative_ai_inference import generative_ai_inference_client + +# oci_llm is in a local file +from oci_llm import OCIGenAILLM + +# config for the RAG +from config_rag import ( + BOOK_LIST, + CHUNK_SIZE, + CHUNK_OVERLAP, + VECTOR_STORE_NAME, + MAX_TOKENS, + ENDPOINT, + EMBED_TYPE, + MAX_DOCS_RETRIEVED, + TEMPERATURE, + EMBED_HF_MODEL_NAME, + TIMEOUT, + LLM_TYPE, +) + +# private configs +CONFIG_PROFILE = "DEFAULT" +COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartmentid" +oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) +COHERE_API_KEY = oci_config['security_token_file'] +DEBUG = True + + +# +# def load_oci_config() +# +def load_oci_config(): + # read OCI config to connect to OCI with API key + oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) + + # check the config to access to api keys + if DEBUG: + print(oci_config) + + return oci_config + + +# +# do some post processing on text +# +def post_process(splits): + for split in splits: + split.page_content = split.page_content.replace("\n", " ") + split.page_content = re.sub("[^a-zA-Z0-9 \n\.]", " ", split.page_content) + # remove duplicate blank + split.page_content = " ".join(split.page_content.split()) + + return splits + + +# +# def: Initialize_rag_chain +# +def initialize_rag_chain(): + # Initialize RAG + + # Loading a list of pdf documents + all_pages = [] + + # modified to load a list of pdf + for book in BOOK_LIST: + print(f"Loading book: {book}...") + loader = PyPDFLoader(book) + + # loader split in pages + pages = loader.load() + print(f"Loaded {len(pages)} pages...") + + all_pages.extend(pages) + + print("PDF document loaded!") + + # This split in chunks + # try with smaller chuncks + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP + ) + + splits = text_splitter.split_documents(all_pages) + + print(f"We have splitted the pdf in {len(splits)} splits...") + + # some post processing + splits = post_process(splits) + + print("Initializing vector store...") + + if EMBED_TYPE == "COHERE": + print("Loading Cohere Embeddings Model...") + embed_model = CohereEmbeddings(cohere_api_key=COHERE_API_KEY) + if EMBED_TYPE == "LOCAL": + print(f"Loading HF Embeddings Model: {EMBED_HF_MODEL_NAME}") + + model_kwargs = {"device": "cpu"} + # changed to True for BAAI, to use cosine similarity + encode_kwargs = {"normalize_embeddings": True} + + embed_model = HuggingFaceEmbeddings( + model_name=EMBED_HF_MODEL_NAME, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs, + ) + + # using Chroma as Vector store + print(f"Indexing: using {VECTOR_STORE_NAME} as Vector Store...") + + if VECTOR_STORE_NAME == "CHROME": + vectorstore = Chroma.from_documents(documents=splits, embedding=embed_model) + if VECTOR_STORE_NAME == "FAISS": + vectorstore = FAISS.from_documents(documents=splits, embedding=embed_model) + + # increased num. of docs to 5 (default to 4) + retriever = vectorstore.as_retriever(search_kwargs={"k": MAX_DOCS_RETRIEVED}) + + # Build the class for OCI GenAI + + # Only needed for OCI LLM + print(f"Using {LLM_TYPE} llm...") + + if LLM_TYPE == "OCI": + + llm = OCIGenAI( + service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + compartment_id="ocid1.compartment.oc1..yourcompartmentid", + model_kwargs={"max_tokens": 1000}, + auth_type='SECURITY_TOKEN', + ) + if LLM_TYPE == "COHERE": + llm = cohere.Client(COHERE_API_KEY) + response = llm.generate( + prompt="Please explain to me how LLMs work", + ) + print(response) + return llm + + # for now hard coded... + rag_prompt = hub.pull("rlm/rag-prompt") + + print("Building rag_chain...") + rag_chain = ( + {"context": retriever, "question": RunnablePassthrough()} | rag_prompt | llm + ) + + print("Init RAG complete...") + return rag_chain + + +# +# def: get_answer from LLM +# +def get_answer(rag_chain, question): + response = rag_chain.invoke(question) + + print(f"Question: {question}") + print("The response:") + print(response) + print() + + return response diff --git a/python-rag-chatbot/init_rag_streamlit.py b/python-rag-chatbot/init_rag_streamlit.py new file mode 100644 index 00000000..e911bb11 --- /dev/null +++ b/python-rag-chatbot/init_rag_streamlit.py @@ -0,0 +1,318 @@ +# +# This one is to be used with Streamlit +# +import oracledb +import streamlit as st + +# for pdf post processing +import re + +from langchain_community.vectorstores import OracleVS, DistanceStrategy +from streamlit_feedback import streamlit_feedback + +# modified to load from Pdf +from langchain.document_loaders import PyPDFLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +# for caching +from langchain.storage import LocalFileStore + +# two possible vector store +from langchain.vectorstores import Chroma +from langchain.vectorstores import FAISS + +from langchain.schema.runnable import RunnablePassthrough + +# removed OpenAI, using Cohere embeddings +from langchain.embeddings import CohereEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.embeddings import CacheBackedEmbeddings + +from langchain.retrievers import ContextualCompressionRetriever +from langchain.retrievers.document_compressors import CohereRerank + +from langchain import hub +from langchain.prompts import ChatPromptTemplate + +from langchain.llms import Cohere + +import oci + +# oci_llm is in a local file +from oci_llm import OCIGenAILLM + +# config for the RAG +from config_rag import ( + BOOK_LIST, + CHUNK_SIZE, + CHUNK_OVERLAP, + VECTOR_STORE_NAME, + MAX_TOKENS, + ENDPOINT, + EMBED_TYPE, + EMBED_COHERE_MODEL_NAME, + MAX_DOCS_RETRIEVED, + ADD_RERANKER, + TEMPERATURE, + EMBED_HF_MODEL_NAME, + TIMEOUT, + LLM_TYPE, + DEBUG +) + +# private configs +CONFIG_PROFILE = "DEFAULT" +COMPARTMENT_OCID = "ocid1.compartment.oc1..aaaaaaaajdyhd7dqnix2avhlckbhhkkcl3cujzyuz6jzyzonadca3i66pqjq" +oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) +COHERE_API_KEY = oci_config['key_file'] +# +# load_oci_config(): load the OCI security config +# +def load_oci_config(): + # read OCI config to connect to OCI with API key + oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) + + # check the config to access to api keys + if DEBUG: + print() + print("OCI Config:") + print(oci_config) + print() + + return oci_config + + +# +# do some post processing on text +# +def post_process(splits): + for split in splits: + # replace newline with blank + split.page_content = split.page_content.replace("\n", " ") + split.page_content = re.sub("[^a-zA-Z0-9 \n\.]", " ", split.page_content) + # remove duplicate blank + split.page_content = " ".join(split.page_content.split()) + + return splits + + +# +# load all pages from pdf books +# +# +def load_all_pages(book_list): + all_pages = [] + + for book in book_list: + print(f"Loading book: {book}...") + loader = PyPDFLoader(book) + + # loader split in pages + pages = loader.load() + + all_pages.extend(pages) + + print(f"Loaded {len(pages)} pages...") + + return all_pages + + +# +# Split pages in chunk +# +def split_in_chunks(all_pages): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP + ) + + splits = text_splitter.split_documents(all_pages) + + # some post processing on text + splits = post_process(splits) + + print(f"Splitted the pdf in {len(splits)} chunks...") + + return splits + + +# +# Load the embedding model +# +def create_cached_embedder(): + print("Initializing Embeddings model...") + + # Introduced to cache embeddings and make it faster + fs = LocalFileStore("./vector-cache/") + + if EMBED_TYPE == "COHERE": + print("Loading Cohere Embeddings Model...") + embed_model = CohereEmbeddings( + model=EMBED_COHERE_MODEL_NAME, cohere_api_key=COHERE_API_KEY + ) + elif EMBED_TYPE == "LOCAL": + print(f"Loading HF Embeddings Model: {EMBED_HF_MODEL_NAME}") + + model_kwargs = {"device": "cpu"} + # changed to True for BAAI, to use cosine similarity + encode_kwargs = {"normalize_embeddings": True} + + embed_model = HuggingFaceEmbeddings( + model_name=EMBED_HF_MODEL_NAME, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs, + ) + + # the cache for embeddings + cached_embedder = CacheBackedEmbeddings.from_bytes_store( + embed_model, fs, namespace=embed_model.model_name + ) + + return cached_embedder + + +# +# create retrievere with optional reranker +# +def create_retriever(vectorstore): + if ADD_RERANKER == False: + # no reranking + print("No reranking...") + retriever = vectorstore.as_retriever(search_kwargs={"k": MAX_DOCS_RETRIEVED}) + else: + # to add reranking + print("Adding reranking to QA chain...") + + compressor = CohereRerank(cohere_api_key=COHERE_API_KEY) + + base_retriever = vectorstore.as_retriever( + search_kwargs={"k": MAX_DOCS_RETRIEVED} + ) + + retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=base_retriever + ) + + return retriever + + +# +# create vector store +# +def create_vector_store(store_type, document_splits, embedder): + print(f"Indexing: using {store_type} as Vector Store...") + connection = oracledb.connect( + user="ragchat", + password="ragchat", + dsn="localhost/freepdb1") + + if store_type == "CHROME": + # modified to cache + vectorstore = Chroma.from_documents( + documents=document_splits, embedding=embedder + ) + elif store_type == "FAISS": + # modified to cache + vectorstore = FAISS.from_documents( + documents=document_splits, embedding=embedder + ) + elif store_type == "ORACLEDB": + vectorstore = OracleVS.from_documents( + documents=document_splits, embedding=embedder, + table_name="oravs", + client=connection, + distance_strategy=DistanceStrategy.DOT_PRODUCT, + ) + + + return vectorstore + + +# +# Build LLM +# +def build_llm(llm_type): + print(f"Using {llm_type} llm...") + + if llm_type == "OCI": + oci_config = load_oci_config() + + llm = OCIGenAILLM( + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + config=oci_config, + compartment_id=COMPARTMENT_OCID, + service_endpoint=ENDPOINT, + debug=DEBUG, + timeout=TIMEOUT, + ) + elif llm_type == "COHERE": + llm = Cohere( + model="command", # using large model and not nightly + cohere_api_key=COHERE_API_KEY, + max_tokens=MAX_TOKENS, + temperature=TEMPERATURE, + ) + + return llm + + +# +# Initialize_rag_chain +# +# to run it only once +@st.cache_resource +def initialize_rag_chain(): + # Initialize RAG + + # 1. Load a list of pdf documents + all_pages = load_all_pages(BOOK_LIST) + + # 2. Split pages in chunks + document_splits = split_in_chunks(all_pages) + + # 3. Load embeddings model + embedder = create_cached_embedder() + + # 4. Create a Vectore Store and store embeddings + vectorstore = create_vector_store(VECTOR_STORE_NAME, document_splits, embedder) + + # 5. Create a retriever + # increased num. of docs to 5 (default to 4) + # added optionally a reranker + retriever = create_retriever(vectorstore) + + # 6. Build the LLM + llm = build_llm(LLM_TYPE) + + # 7. define the prompt (for now hard coded...) + # rag_prompt = hub.pull("rlm/rag-prompt") + template = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + rag_prompt = ChatPromptTemplate.from_template(template) + + # 8. build the entire RAG chain + print("Building rag_chain...") + rag_chain = ( + {"context": retriever, "question": RunnablePassthrough()} | rag_prompt | llm + ) + + print("Init RAG complete...") + + return rag_chain + + +# +# def: get_answer from LLM +# +def get_answer(rag_chain, question): + response = rag_chain.invoke(question) + if DEBUG: + print(f"Question: {question}") + print("The response:") + print(response) + print() + + return response \ No newline at end of file diff --git a/python-rag-chatbot/init_rag_streamlit_exp.py b/python-rag-chatbot/init_rag_streamlit_exp.py new file mode 100644 index 00000000..0686567b --- /dev/null +++ b/python-rag-chatbot/init_rag_streamlit_exp.py @@ -0,0 +1,331 @@ +# +# This one is to be used with Streamlit +# +import oracledb +import streamlit as st + +# for pdf post processing +import re + +# modified to load from Pdf +from langchain.document_loaders import PyPDFLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +# for caching +from langchain.storage import LocalFileStore + +# two possible vector store +from langchain.vectorstores import Chroma +from langchain.vectorstores import FAISS + +from langchain.schema.runnable import RunnablePassthrough + +# removed OpenAI, using Cohere embeddings +from langchain.embeddings import CohereEmbeddings +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.embeddings import CacheBackedEmbeddings + +from langchain.retrievers import ContextualCompressionRetriever +from langchain.retrievers.document_compressors import CohereRerank + +from langchain import hub +from langchain.prompts import ChatPromptTemplate + +from langchain.llms import Cohere + +import oci +from langchain_community.llms.oci_generative_ai import OCIGenAI +from langchain_community.vectorstores import OracleVS +from langchain_community.vectorstores.utils import DistanceStrategy + +# oci_llm is in a local file +# from oci_llm import OCIGenAILLM + + +# config for the RAG +from config_rag import ( + BOOK_LIST, + CHUNK_SIZE, + CHUNK_OVERLAP, + VECTOR_STORE_NAME, + MAX_TOKENS, + ENDPOINT, + EMBED_TYPE, + EMBED_COHERE_MODEL_NAME, + MAX_DOCS_RETRIEVED, + ADD_RERANKER, + TEMPERATURE, + EMBED_HF_MODEL_NAME, + TIMEOUT, + LLM_TYPE, + DEBUG +) + +# private configs +CONFIG_PROFILE = "DEFAULT" +COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartmentid" +oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) +COHERE_API_KEY = oci_config['security_token_file'] + + +# +# load_oci_config(): load the OCI security config +# +# def load_oci_config(): +# # read OCI config to connect to OCI with API key +# oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE) +# +# # check the config to access to api keys +# if DEBUG: +# print() +# print("OCI Config:") +# print(oci_config) +# print() +# +# return oci_config + + +# +# do some post processing on text +# +def post_process(splits): + for split in splits: + # replace newline with blank + split.page_content = split.page_content.replace("\n", " ") + split.page_content = re.sub("[^a-zA-Z0-9 \n\.]", " ", split.page_content) + # remove duplicate blank + split.page_content = " ".join(split.page_content.split()) + + return splits + + +# +# load all pages from pdf books +# +# +def load_all_pages(book_list): + all_pages = [] + + for book in book_list: + print(f"Loading book: {book}...") + loader = PyPDFLoader(book) + + # loader split in pages + pages = loader.load() + + all_pages.extend(pages) + + print(f"Loaded {len(pages)} pages...") + + return all_pages + + +# +# Split pages in chunk +# +def split_in_chunks(all_pages): + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP + ) + + splits = text_splitter.split_documents(all_pages) + + # some post processing on text + splits = post_process(splits) + + print(f"Splitted the pdf in {len(splits)} chunks...") + + return splits + + +# +# Load the embedding model +# +def create_cached_embedder(): + print("Initializing Embeddings model...") + + # Introduced to cache embeddings and make it faster + fs = LocalFileStore("./vector-cache/") + + if EMBED_TYPE == "COHERE": + print("Loading Cohere Embeddings Model...") + embed_model = CohereEmbeddings( + model=EMBED_COHERE_MODEL_NAME, cohere_api_key=COHERE_API_KEY + ) + elif EMBED_TYPE == "LOCAL": + print(f"Loading HF Embeddings Model: {EMBED_HF_MODEL_NAME}") + + model_kwargs = {"device": "cpu"} + # changed to True for BAAI, to use cosine similarity + encode_kwargs = {"normalize_embeddings": True} + + embed_model = HuggingFaceEmbeddings( + model_name=EMBED_HF_MODEL_NAME, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs, + ) + + # the cache for embeddings + cached_embedder = CacheBackedEmbeddings.from_bytes_store( + embed_model, fs, namespace=embed_model.model_name + ) + + return cached_embedder + + +# +# create retrievere with optional reranker +# +def create_retriever(vectorstore): + if ADD_RERANKER == False: + # no reranking + print("No reranking...") + retriever = vectorstore.as_retriever(search_kwargs={"k": MAX_DOCS_RETRIEVED}) + else: + # to add reranking + print("Adding reranking to QA chain...") + + compressor = CohereRerank(cohere_api_key=COHERE_API_KEY) + + base_retriever = vectorstore.as_retriever( + search_kwargs={"k": MAX_DOCS_RETRIEVED} + ) + + retriever = ContextualCompressionRetriever( + base_compressor=compressor, base_retriever=base_retriever + ) + + return retriever + + +# +# create vector store +# +def create_vector_store(store_type, document_splits, embedder): + global vectorstore + print(f"Indexing: using {store_type} as Vector Store...") + + if store_type == "ORACLEDB": + connection = oracledb.connect( + user="ragchat", + password="ragchat", + dsn="localhost/freepdb1") + vectorstore = OracleVS.from_documents( + documents=document_splits, + embedding=embedder, + client=connection, + table_name="oravs", + distance_strategy=DistanceStrategy.DOT_PRODUCT + ) + print(f"Vector Store Table: {vectorstore.table_name}") + elif store_type == "FAISS": + # modified to cache + vectorstore = FAISS.from_documents( + documents=document_splits, embedding=embedder + ) + elif store_type == "CHROME": + # modified to cache + vectorstore = Chroma.from_documents( + documents=document_splits, embedding=embedder + ) + + + print("Index created.") + return vectorstore + +def make_security_token_signer(oci_config): + pk = oci.signer.load_private_key_from_file(oci_config.get("key_file"), None) + with open(oci_config.get("security_token_file")) as f: + st_string = f.read() + return oci.auth.signers.SecurityTokenSigner(st_string, pk) + +# +# Build LLM +# +def build_llm(llm_type): + print(f"Using {llm_type} llm...") + signer = make_security_token_signer(oci_config=oci_config) + if llm_type == "OCI": + + print("Using OCI experimental integration with LangChain...") + + llm = OCIGenAI( + model_id="cohere.command", + service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + compartment_id="ocid1.compartment.oc1..yourcompartmentid", + model_kwargs={"max_tokens": 200}, + auth_type='SECURITY_TOKEN', + ) + elif llm_type == "COHERE": + llm = Cohere( + model="command", # using large model and not nightly + cohere_api_key=COHERE_API_KEY, + max_tokens=MAX_TOKENS, + temperature=TEMPERATURE, + auth_type='SECURITY_TOKEN', + ) + + return llm + + +# +# Initialize_rag_chain +# +# to run it only once +@st.cache_resource +def initialize_rag_chain(): + # Initialize RAG + + # 1. Load a list of pdf documents + all_pages = load_all_pages(BOOK_LIST) + + # 2. Split pages in chunks + document_splits = split_in_chunks(all_pages) + + # 3. Load embeddings model + embedder = create_cached_embedder() + + # 4. Create a Vectore Store and store embeddings + vectorstore = create_vector_store(VECTOR_STORE_NAME, document_splits, embedder) + + # 5. Create a retriever + # increased num. of docs to 5 (default to 4) + # added optionally a reranker + retriever = create_retriever(vectorstore) + + # 6. Build the LLM + llm = build_llm(LLM_TYPE) + + # 7. define the prompt (for now hard coded...) + # rag_prompt = hub.pull("rlm/rag-prompt") + template = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + rag_prompt = ChatPromptTemplate.from_template(template) + + # 8. build the entire RAG chain + print("Building rag_chain...") + rag_chain = ( + {"context": retriever, "question": RunnablePassthrough()} | rag_prompt | llm + ) + + print("Init RAG complete...") + + return rag_chain + + +# +# def: get_answer from LLM +# +def get_answer(rag_chain, question): + response = rag_chain.invoke(question) + + if DEBUG: + print(f"Question: {question}") + print("The response:") + print(response) + print() + + return response diff --git a/python-rag-chatbot/oci_llm.py b/python-rag-chatbot/oci_llm.py new file mode 100644 index 00000000..1ef85583 --- /dev/null +++ b/python-rag-chatbot/oci_llm.py @@ -0,0 +1,125 @@ +# +# see https://python.langchain.com/docs/modules/model_io/models/llms/custom_llm +# +from typing import Any, List, Mapping, Optional +from time import time + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM + +# import to use OCI GenAI Python API +from oci.generative_ai import GenerativeAiClient +import oci.generative_ai.models +from oci.generative_ai_inference.models import GenerateTextDetails, OnDemandServingMode +from oci.retry import NoneRetryStrategy + + +class OCIGenAILLM(LLM): + # added by LS + model_id: str = "cohere.command" + debug: bool = False + + max_tokens: int = 300 + temperature: int = 0 + frequency_penalty: int = 1 + top_p: float = 0.75 + top_k: int = 0 + config: Optional[Any] = None + service_endpoint: Optional[str] = None + compartment_id: Optional[str] = None + timeout: Optional[int] = 10 + + # moved here by LS + generative_ai_client: GenerativeAiClient = None + + """OCI Generative AI LLM model. + + To use, you should have the ``oci`` python package installed, and pass + named parameters to the constructor. + + Example: + .. code-block:: python + + compartment_id = "ocid1.compartment.oc1..." + CONFIG_PROFILE = "my_custom_profile" # or DEFAULT + config = oci.config.from_file('~/.oci/config', CONFIG_PROFILE) + endpoint = "https://generativeai.aiservice.us-chicago-1.oci.oraclecloud.com" + llm = OCIGenAILLM( + temperature=0, + config=config, + compartment_id=compartment_id, + service_endpoint=endpoint + ) + + + """ + + def __init__(self, **kwargs): + # print(kwargs) + super().__init__(**kwargs) + + # here we create and store the GenAIClient + self.generative_ai_client = GenerativeAiClient( + config=self.config, + service_endpoint=self.service_endpoint, + retry_strategy=NoneRetryStrategy(), + timeout=(self.timeout, 240), + ) + + @property + def _llm_type(self) -> str: + return "OCI Generative AI LLM" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + if stop is not None: + raise ValueError("stop kwargs are not permitted.") + + # calling OCI GenAI + tStart = time() + + generate_text_detail = GenerateTextDetails() + generate_text_detail.prompts = [prompt] + generate_text_detail.serving_mode = OnDemandServingMode(model_id=self.model_id) + generate_text_detail.compartment_id = self.compartment_id + generate_text_detail.max_tokens = self.max_tokens + generate_text_detail.temperature = self.temperature + generate_text_detail.frequency_penalty = self.frequency_penalty + generate_text_detail.top_p = self.top_p + generate_text_detail.top_k = self.top_k + + if self.debug: + print() + print("The input prompt is:") + print(prompt) + print() + + print("Calling OCI genai...") + generate_text_response = self.generative_ai_client.generate_text( + generate_text_detail + ) + + + tEla = time() - tStart + + if self.debug: + print(f"Elapsed time: {round(tEla, 1)} sec...") + print() + + return generate_text_response.data.generated_texts[0][0].text + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "frequency_penalty": self.frequency_penalty, + "top_p": self.top_p, + "top_k": self.top_k, + } diff --git a/python-rag-chatbot/oracle_bot.py b/python-rag-chatbot/oracle_bot.py new file mode 100644 index 00000000..46382776 --- /dev/null +++ b/python-rag-chatbot/oracle_bot.py @@ -0,0 +1,60 @@ +# +# Streamlit App to demo OCI AI GenAI +# this is the main code, with the UI +# +import streamlit as st + +# this function initialise the rag chain, creating retriever, llm and chain +from init_rag_streamlit import initialize_rag_chain, get_answer + +# +# Configs +# + + +def reset_conversation(): + st.session_state.messages = [] + + +# +# Main +# +st.title("OSD Generative AI Bot powered by RAG") + +# Added reset button +st.button("Clear Chat History", on_click=reset_conversation) + +# Initialize chat history +if "messages" not in st.session_state: + reset_conversation() + +# init RAG +rag_chain = initialize_rag_chain() + +# Display chat messages from history on app rerun +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# React to user input +if question := st.chat_input("Hello, how can I help you?"): + # Display user message in chat message container + st.chat_message("user").markdown(question) + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": question}) + + # here we call OCI genai... + + try: + print("...Panks call") + response = get_answer(rag_chain, question) + print("...Panks call response"+response) + # Display assistant response in chat message container + with st.chat_message("assistant"): + st.markdown(response) + + # Add assistant response to chat history + st.session_state.messages.append({"role": "assistant", "content": response}) + + except Exception as e: + st.error("An error for the panks occurred: " + str(e)) diff --git a/python-rag-chatbot/oracle_bot_exp.py b/python-rag-chatbot/oracle_bot_exp.py new file mode 100644 index 00000000..31dd1e4e --- /dev/null +++ b/python-rag-chatbot/oracle_bot_exp.py @@ -0,0 +1,60 @@ +# +# Streamlit App to demo OCI AI GenAI +# this is the main code, with the UI +# +import streamlit as st + +# this function initialise the rag chain, creating retriever, llm and chain +from init_rag_streamlit_exp import initialize_rag_chain, get_answer + +# +# Configs +# + + +def reset_conversation(): + st.session_state.messages = [] + + +# +# Main +# +st.title("Oracle Database AI Bot powered by RAG") + +# Added reset button +st.button("Clear Chat History", on_click=reset_conversation) + +# Initialize chat history +if "messages" not in st.session_state: + reset_conversation() + +# init RAG +rag_chain = initialize_rag_chain() + +# Display chat messages from history on app rerun +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# React to user input +if question := st.chat_input("Hello, how can I help you?"): + # Display user message in chat message container + st.chat_message("user").markdown(question) + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": question}) + + # here we call OCI genai... + + try: + print("...") + response = get_answer(rag_chain, question) + + # Display assistant response in chat message container + with st.chat_message("assistant"): + st.markdown(response) + + # Add assistant response to chat history + st.session_state.messages.append({"role": "assistant", "content": response}) + + except Exception as e: + st.error("An error occurred: " + str(e)) diff --git a/python-rag-chatbot/pdfFiles/Oracle True cache.pdf b/python-rag-chatbot/pdfFiles/Oracle True cache.pdf new file mode 100644 index 00000000..7d223961 Binary files /dev/null and b/python-rag-chatbot/pdfFiles/Oracle True cache.pdf differ diff --git a/python-rag-chatbot/pdfFiles/Trobleshooting.pdf b/python-rag-chatbot/pdfFiles/Trobleshooting.pdf new file mode 100644 index 00000000..998842b1 Binary files /dev/null and b/python-rag-chatbot/pdfFiles/Trobleshooting.pdf differ diff --git a/python-rag-chatbot/pdfFiles/globally-distributed-autonomous-database.pdf b/python-rag-chatbot/pdfFiles/globally-distributed-autonomous-database.pdf new file mode 100644 index 00000000..d8789df2 Binary files /dev/null and b/python-rag-chatbot/pdfFiles/globally-distributed-autonomous-database.pdf differ diff --git a/python-rag-chatbot/pdfFiles/oracle-database-23c.pdf b/python-rag-chatbot/pdfFiles/oracle-database-23c.pdf new file mode 100644 index 00000000..1402bc27 Binary files /dev/null and b/python-rag-chatbot/pdfFiles/oracle-database-23c.pdf differ diff --git a/python-rag-chatbot/pdfFiles/oracle-globally-distributed-database-guide.pdf b/python-rag-chatbot/pdfFiles/oracle-globally-distributed-database-guide.pdf new file mode 100644 index 00000000..eed4573a Binary files /dev/null and b/python-rag-chatbot/pdfFiles/oracle-globally-distributed-database-guide.pdf differ diff --git a/python-rag-chatbot/pdfFiles/sharding-adg-addshard-cookbook-3610618.pdf b/python-rag-chatbot/pdfFiles/sharding-adg-addshard-cookbook-3610618.pdf new file mode 100644 index 00000000..5c194b64 Binary files /dev/null and b/python-rag-chatbot/pdfFiles/sharding-adg-addshard-cookbook-3610618.pdf differ diff --git a/python-rag-chatbot/requirements.txt b/python-rag-chatbot/requirements.txt new file mode 100644 index 00000000..d1eaebd4 --- /dev/null +++ b/python-rag-chatbot/requirements.txt @@ -0,0 +1,216 @@ +aiohttp==3.8.6 +aiosignal==1.3.1 +altair==5.1.2 +annotated-types==0.6.0 +anyio==3.7.1 +appnope==0.1.3 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.0 +async-lru==2.0.4 +async-timeout==4.0.3 +attrs==23.1.0 +Babel==2.13.0 +backcall==0.2.0 +backoff==2.2.1 +bcrypt==4.0.1 +beautifulsoup4==4.12.2 +black==23.9.1 +bleach==6.1.0 +blinker==1.6.3 +cachetools==5.3.1 +certifi==2023.7.22 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.0 +chroma-hnswlib==0.7.3 +chromadb==0.4.14 +circuitbreaker==1.4.0 +click==8.1.7 +cohere==4.27 +colorama==0.4.6 +coloredlogs==15.0.1 +comm==0.1.4 +cryptography==42.0.4 +dataclasses-json==0.6.1 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +emoji==2.8.0 +exceptiongroup==1.1.3 +executing==2.0.0 +faiss-cpu==1.7.4 +fastapi==0.109.1 +fastavro==1.8.2 +fastjsonschema==2.18.1 +filelock==3.12.4 +filetype==1.2.0 +flatbuffers==23.5.26 +fqdn==1.5.1 +frozenlist==1.4.0 +fsspec==2023.9.2 +gitdb==4.0.11 +GitPython==3.1.41 +grpcio==1.59.0 +h11==0.14.0 +halo==0.0.31 +httptools==0.6.0 +huggingface-hub==0.17.3 +humanfriendly==10.0 +idna==3.4 +importlib-metadata==6.8.0 +importlib-resources==6.1.0 +IProgress==0.4 +ipykernel==6.25.2 +ipython==8.16.1 +ipywidgets==8.1.1 +isoduration==20.11.0 +jedi==0.19.1 +Jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonpatch==1.33 +jsonpointer==2.4 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 +jupyter-events==0.7.0 +jupyter-lsp==2.2.2 +jupyter_client==8.4.0 +jupyter_core==5.4.0 +jupyter_server==2.7.3 +jupyter_server_terminals==0.4.4 +jupyterlab==4.1.0b2 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.9 +jupyterlab_server==2.25.0 +langchain==0.1.12 +langchainhub==0.1.13 +langdetect==1.0.9 +langsmith==0.0.43 +log-symbols==0.0.14 +lxml==4.9.3 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +matplotlib-inline==0.1.6 +mdurl==0.1.2 +mistune==3.0.2 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.4 +mypy-extensions==1.0.0 +nbclient==0.8.0 +nbconvert==7.9.2 +nbformat==5.9.2 +nest-asyncio==1.5.8 +networkx==3.1 +nltk==3.8.1 +notebook_shim==0.2.3 +numpy==1.26.1 +oci @ file://Users/pankatiw/Documents/langchain_oracle/oci-2.112.1+preview.1.1649-py3-none-any.whl #sha256=f2b834774aa5c053f9ef8aa2d4c3094ea4047e1c57a1498a2c9b1751408675a8 +onnxruntime==1.16.1 +openai==0.28.1 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.1 +pandocfilters==1.5.0 +parso==0.8.3 +pathspec==0.11.2 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==10.1.0 +platformdirs==3.11.0 +posthog==3.0.2 +prometheus-client==0.17.1 +prompt-toolkit==3.0.39 +protobuf==4.24.4 +psutil==5.9.6 +ptyprocess==0.7.0 +pulsar-client==3.3.0 +pure-eval==0.2.2 +pyarrow==13.0.0 +pycparser==2.21 +pydantic==2.4.2 +pydantic_core==2.10.1 +pydeck==0.8.1b0 +Pygments==2.16.1 +pyOpenSSL==23.2.0 +pypdf==3.17.0 +PyPika==0.48.9 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-iso639==2023.6.15 +python-json-logger==2.0.7 +python-magic==0.4.27 +pytz==2023.3.post1 +PyYAML==6.0.1 +pyzmq==25.1.1 +rapidfuzz==3.4.0 +referencing==0.30.2 +regex==2023.10.3 +requests==2.31.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rich==13.6.0 +rpds-py==0.10.6 +safetensors==0.4.0 +scikit-learn==1.3.1 +scipy==1.11.3 +Send2Trash==1.8.2 +sentence-transformers==2.2.2 +sentencepiece==0.1.99 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.0 +soupsieve==2.5 +spinners==0.0.24 +SQLAlchemy==2.0.22 +stack-data==0.6.3 +starlette==0.36.2 +streamlit==1.30.0 +sympy==1.12 +tabulate==0.9.0 +tenacity==8.2.3 +termcolor==2.3.0 +terminado==0.17.1 +threadpoolctl==3.2.0 +tiktoken==0.5.1 +tinycss2==1.2.1 +tokenize-rt==5.2.0 +tokenizers==0.14.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.0 +torch==2.1.0 +torchvision==0.16.0 +tornado==6.3.3 +tqdm==4.66.1 +traitlets==5.11.2 +transformers==4.34.0 +typer==0.9.0 +types-python-dateutil==2.8.19.14 +types-requests==2.31.0.9 +typing-inspect==0.9.0 +typing_extensions==4.8.0 +tzdata==2023.3 +tzlocal==5.2 +unstructured==0.10.23 +uri-template==1.3.0 +urllib3==2.0.7 +uvicorn==0.23.2 +uvloop==0.18.0 +validators==0.22.0 +watchdog==3.0.0 +watchfiles==0.21.0 +wcwidth==0.2.8 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.6.4 +websockets==11.0.3 +widgetsnbextension==4.0.9 +xmltodict==0.13.0 +yarl==1.9.2 +zipp==3.17.0 + +oci~=2.126.4 \ No newline at end of file diff --git a/python-rag-chatbot/run_oracle_bot.sh b/python-rag-chatbot/run_oracle_bot.sh new file mode 100755 index 00000000..1ade755c --- /dev/null +++ b/python-rag-chatbot/run_oracle_bot.sh @@ -0,0 +1,2 @@ +streamlit run oracle_bot.py + diff --git a/python-rag-chatbot/run_oracle_bot_exp.sh b/python-rag-chatbot/run_oracle_bot_exp.sh new file mode 100755 index 00000000..405ca85a --- /dev/null +++ b/python-rag-chatbot/run_oracle_bot_exp.sh @@ -0,0 +1,2 @@ +streamlit run oracle_bot_exp.py + diff --git a/sql/aiuser-tables-indexes-functions.sql b/sql/aiuser-tables-indexes-functions.sql index 3d4668a3..bce31652 100644 --- a/sql/aiuser-tables-indexes-functions.sql +++ b/sql/aiuser-tables-indexes-functions.sql @@ -179,3 +179,54 @@ BEGIN COMMIT; END; / + + +CREATE TABLE image_store ( + id NUMBER GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY, + image BLOB, + image_name VARCHAR2(100) +) +/ + +create or replace PROCEDURE insert_image(p_image_name IN VARCHAR2, p_image BLOB) IS +BEGIN + INSERT INTO image_store (image_name, image) VALUES (p_image_name, p_image); +END; +/ + +BEGIN + ORDS.ENABLE_OBJECT( + P_ENABLED => TRUE, + P_SCHEMA => 'AIUSER', + P_OBJECT => 'INSERT_IMAGE', + P_OBJECT_TYPE => 'PROCEDURE', + P_OBJECT_ALIAS => 'insert_image', + P_AUTO_REST_AUTH => FALSE + ); + COMMIT; +END; +/ + +--curl --location --request POST \ +--'https://yourORDSendpoint.adb.us-ashburn-1.oraclecloudapps.com/ords/aiuser/insert_image/' \ +----header 'Content-Type: application/json' \ +----data-binary '{ +-- "p_image_name": "", +-- "p_image": "" +--}' + +BEGIN + ORDS.ENABLE_OBJECT( + P_ENABLED => TRUE, + P_SCHEMA => 'AIUSER', + P_OBJECT => 'IMAGE_STORE', + P_OBJECT_TYPE => 'TABLE', + P_OBJECT_ALIAS => 'image_store', + P_AUTO_REST_AUTH => FALSE + ); + COMMIT; +END; +/ + +--curl --location \ +--'https://yourORDSendpoint.adb.us-ashburn-1.oraclecloudapps.com/ords/aiuser/image_store/' diff --git a/src/main/java/oracleai/ImageStore.java b/src/main/java/oracleai/ImageStore.java new file mode 100644 index 00000000..3107a105 --- /dev/null +++ b/src/main/java/oracleai/ImageStore.java @@ -0,0 +1,38 @@ +package oracleai; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class ImageStore { + private Long id; + + @JsonProperty("image_name") + private String imageName; + + @JsonProperty("image") // Assuming the BLOB is mapped to this field + private String imageBase64; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getImageName() { + return imageName; + } + + public void setImageName(String imageName) { + this.imageName = imageName; + } + + public String getImageBase64() { + return imageBase64; + } + + public void setImageBase64(String imageBase64) { + this.imageBase64 = imageBase64; + } +} \ No newline at end of file diff --git a/src/main/java/oracleai/ImageStoreWrapper.java b/src/main/java/oracleai/ImageStoreWrapper.java new file mode 100644 index 00000000..5a1e3a6e --- /dev/null +++ b/src/main/java/oracleai/ImageStoreWrapper.java @@ -0,0 +1,17 @@ +package oracleai; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class ImageStoreWrapper { + private List items; + + public List getItems() { + return items; + } + + public void setItems(List items) { + this.items = items; + } +} \ No newline at end of file diff --git a/src/main/java/oracleai/UploadDownloadImage.java b/src/main/java/oracleai/UploadDownloadImage.java new file mode 100644 index 00000000..ba69c965 --- /dev/null +++ b/src/main/java/oracleai/UploadDownloadImage.java @@ -0,0 +1,34 @@ +package oracleai; + + +import oracleai.services.ORDSCalls; +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; + +@Controller +@RequestMapping("/transferimage") +public class UploadDownloadImage { + + @PostMapping("/uploadimage") + public String uploadImage(@RequestParam("image") MultipartFile image, Model model) { + ORDSCalls.uploadImage(image); + System.out.println("Image upload complete for: " + image.getOriginalFilename()); + ImageStore[] imageStores = ORDSCalls.getImageStoreData(); + model.addAttribute("images", imageStores); + return "images"; + } + + + + @GetMapping("/downloadimages") + public String getImageStoreData(Model model) { + ImageStore[] imageStores = ORDSCalls.getImageStoreData(); + model.addAttribute("images", imageStores); + return "images"; + } +} diff --git a/src/main/java/oracleai/services/ORDSCalls.java b/src/main/java/oracleai/services/ORDSCalls.java index 8c6cdd99..16ebc597 100644 --- a/src/main/java/oracleai/services/ORDSCalls.java +++ b/src/main/java/oracleai/services/ORDSCalls.java @@ -1,5 +1,8 @@ package oracleai.services; +import oracleai.AIApplication; +import oracleai.ImageStore; +import oracleai.ImageStoreWrapper; import org.jetbrains.annotations.Nullable; import org.springframework.http.*; import org.springframework.stereotype.Service; @@ -8,6 +11,8 @@ import java.util.Base64; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; @Service public class ORDSCalls { @@ -66,6 +71,37 @@ private static String callTextSearch(String ordsEndpoint, String jsonPayload) { return response.getBody(); } + public static ResponseEntity uploadImage(MultipartFile image) { + try { + String base64Image = Base64.getEncoder().encodeToString(image.getBytes()); + Map payload = new HashMap<>(); + payload.put("p_image_name", image.getOriginalFilename()); + payload.put("p_image", base64Image); + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity> requestEntity = new HttpEntity<>(payload, headers); + RestTemplate restTemplate = new RestTemplate(); + String uploadUrl = AIApplication.ORDS_ENDPOINT_URL + "insert_image/"; + return restTemplate.exchange(uploadUrl, HttpMethod.POST, requestEntity, String.class); + } catch (Exception e) { + throw new RuntimeException("Failed to upload image", e); + } + } + + public static ImageStore[] getImageStoreData() { + String url = AIApplication.ORDS_ENDPOINT_URL + "image_store/"; + RestTemplate restTemplate = new RestTemplate(); + ResponseEntity response = restTemplate.getForEntity(url, ImageStoreWrapper.class); + ImageStoreWrapper wrapper = response.getBody(); + if (wrapper != null) { + for (ImageStore imageStore : wrapper.getItems()) { + System.out.println("Image Name: " + imageStore.getImageName()); + } + return wrapper.getItems().toArray(new ImageStore[0]); + } else { + return new ImageStore[0]; + } + } } diff --git a/src/main/resources/static/DownloadImages.html b/src/main/resources/static/DownloadImages.html new file mode 100644 index 00000000..084bb3f0 --- /dev/null +++ b/src/main/resources/static/DownloadImages.html @@ -0,0 +1,13 @@ + + + + + Upload Image + + +

Upload Image

+
+ +
+ + \ No newline at end of file diff --git a/src/main/resources/static/UploadImage.html b/src/main/resources/static/UploadImage.html new file mode 100644 index 00000000..f031f1ca --- /dev/null +++ b/src/main/resources/static/UploadImage.html @@ -0,0 +1,15 @@ + + + + + Upload Image + + +

Upload Image

+
+ + + +
+ + \ No newline at end of file diff --git a/src/main/resources/static/sidebar.html b/src/main/resources/static/sidebar.html index f39cfd65..d5b8559c 100644 --- a/src/main/resources/static/sidebar.html +++ b/src/main/resources/static/sidebar.html @@ -3,6 +3,8 @@ Object Detection, Text Generation, and Sentiment Analysis
Face Recognition
Database Text Search and DBMS_SEARCH
+Upload Image
+Download Images
Video Analysis (coming soon)
Speech Transcription And Translation (coming soon)
Realtime Speech Transcription (coming soon)
diff --git a/src/main/resources/templates/images.html b/src/main/resources/templates/images.html new file mode 100644 index 00000000..6f3f33a4 --- /dev/null +++ b/src/main/resources/templates/images.html @@ -0,0 +1,43 @@ + + + + + Oracle AI + + + + + + +
+

Labs

+
+
+
+ +
+

Develop with Oracle AI Services and Oracle Database

+
+ +
+
    +
  • +

    Image Name

    + Image +
  • +
+
+
+ + + + + + +