Skip to content

Add Intel GPU support #893

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,27 @@

- Clone the repo
``` sh
git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git
git clone --recursive https://github.com/DDXDB/CosyVoice-XPU.git
# If you failed to clone submodule due to network failures, please run following command until success
cd CosyVoice
cd CosyVoice-XPU
git submodule update --init --recursive
```

- Install `Intel® Deep Learning Essentials` or `Intel® oneAPI Base Toolkit`
- please see:https://pytorch.org/docs/main/notes/get_start_xpu.html
- Install Conda: please see https://docs.conda.io/en/latest/miniconda.html
- Create Conda env:

``` sh
conda create -n cosyvoice -y python=3.10
conda activate cosyvoice
conda create -n cosyvoice-XPU -y python=3.10
conda activate cosyvoice-XPU
# pynini is required by WeTextProcessing, use conda to install it as it can be executed on all platform.
conda install -y -c conda-forge pynini==2.1.5
# Start oneAPI env and Install pytorch+XPU
call C:\Program Files (x86)\Intel\oneAPI\compiler\2025.0\env\vars.bat
call C:\Program Files (x86)\Intel\oneAPI\ocloc\2025.0\env\vars.bat
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu

pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com

# If you encounter sox compatibility issues
Expand Down
11 changes: 11 additions & 0 deletions cosyvoice/cli/cosyvoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
from hyperpyyaml import load_hyperpyyaml
from modelscope import snapshot_download
import torch
try:
import intel_extension_for_pytorch as ipex
except Exception:
pass

from cosyvoice.cli.frontend import CosyVoiceFrontEnd
from cosyvoice.cli.model import CosyVoiceModel, CosyVoice2Model
from cosyvoice.utils.file_utils import logging
Expand Down Expand Up @@ -45,6 +50,9 @@ def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False):
if torch.cuda.is_available() is False and (load_jit is True or load_trt is True or fp16 is True):
load_jit, load_trt, fp16 = False, False, False
logging.warning('no cuda device, set load_jit/load_trt/fp16 to False')
if torch.xpu.is_available() is False and (load_jit is True or load_trt is True or fp16 is True):
load_jit, load_trt, fp16 = False, False, False
logging.warning('no xpu device, set load_jit/load_trt/fp16 to False')
self.model = CosyVoiceModel(configs['llm'], configs['flow'], configs['hift'], fp16)
self.model.load('{}/llm.pt'.format(model_dir),
'{}/flow.pt'.format(model_dir),
Expand Down Expand Up @@ -145,6 +153,9 @@ def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False):
if torch.cuda.is_available() is False and (load_jit is True or load_trt is True or fp16 is True):
load_jit, load_trt, fp16 = False, False, False
logging.warning('no cuda device, set load_jit/load_trt/fp16 to False')
if torch.xpu.is_available() is False and (load_jit is True or load_trt is True or fp16 is True):
load_jit, load_trt, fp16 = False, False, False
logging.warning('no xpu device, set load_jit/load_trt/fp16 to False')
self.model = CosyVoice2Model(configs['llm'], configs['flow'], configs['hift'], fp16)
self.model.load('{}/llm.pt'.format(model_dir),
'{}/flow.pt'.format(model_dir),
Expand Down
8 changes: 7 additions & 1 deletion cosyvoice/cli/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
import json
import onnxruntime
import torch
try:
import intel_extension_for_pytorch as ipex
except Exception:
pass

import numpy as np
import whisper
from typing import Callable
Expand Down Expand Up @@ -47,13 +52,14 @@ def __init__(self,
allowed_special: str = 'all'):
self.tokenizer = get_tokenizer()
self.feat_extractor = feat_extractor
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.device = torch.device('cuda' if torch.cuda.is_available() else 'xpu' if torch.xpu.is_available() else 'cpu')
option = onnxruntime.SessionOptions()
option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
option.intra_op_num_threads = 1
self.campplus_session = onnxruntime.InferenceSession(campplus_model, sess_options=option, providers=["CPUExecutionProvider"])
self.speech_tokenizer_session = onnxruntime.InferenceSession(speech_tokenizer_model, sess_options=option,
providers=["CUDAExecutionProvider" if torch.cuda.is_available() else
"CUDAExecutionProvider" if torch.xpu.is_available() else
"CPUExecutionProvider"])
if os.path.exists(spk2info):
self.spk2info = torch.load(spk2info, map_location=self.device)
Expand Down
13 changes: 9 additions & 4 deletions cosyvoice/cli/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
import os
from typing import Generator
import torch
try:
import intel_extension_for_pytorch as ipex
except Exception:
pass

import numpy as np
import threading
import time
Expand All @@ -31,7 +36,7 @@ def __init__(self,
flow: torch.nn.Module,
hift: torch.nn.Module,
fp16: bool):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.device = torch.device('cuda' if torch.cuda.is_available() else 'xpu' if torch.xpu.is_available() else 'cpu')
self.llm = llm
self.flow = flow
self.hift = hift
Expand All @@ -57,7 +62,7 @@ def __init__(self,
# rtf and decoding related
self.stream_scale_factor = 1
assert self.stream_scale_factor >= 1, 'stream_scale_factor should be greater than 1, change it according to your actual rtf'
self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else nullcontext()
self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else torch.xpu.stream(torch.xpu.Stream(self.device)) if torch.xpu.is_available() else nullcontext()
self.lock = threading.Lock()
# dict used to store session related variable
self.tts_speech_token_dict = {}
Expand Down Expand Up @@ -286,7 +291,7 @@ def __init__(self,
flow: torch.nn.Module,
hift: torch.nn.Module,
fp16: bool):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.device = torch.device('cuda' if torch.cuda.is_available() else 'xpu' if torch.xpu.is_available() else 'cpu')
self.llm = llm
self.flow = flow
self.hift = hift
Expand All @@ -307,7 +312,7 @@ def __init__(self,
self.speech_window = np.hamming(2 * self.source_cache_len)
# rtf and decoding related
self.stream_scale_factor = 1
self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else nullcontext()
self.llm_context = torch.cuda.stream(torch.cuda.Stream(self.device)) if torch.cuda.is_available() else torch.xpu.stream(torch.xpu.Stream(self.device)) if torch.xpu.is_available() else nullcontext()
self.lock = threading.Lock()
# dict used to store session related variable
self.tts_speech_token_dict = {}
Expand Down
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ tensorboard==2.14.0
tensorrt-cu12==10.0.1; sys_platform == 'linux'
tensorrt-cu12-bindings==10.0.1; sys_platform == 'linux'
tensorrt-cu12-libs==10.0.1; sys_platform == 'linux'
torch==2.3.1
torchaudio==2.3.1

transformers==4.40.1
uvicorn==0.30.0
wget==3.2
fastapi==0.115.6
fastapi-cli==0.0.4
WeTextProcessing==1.0.3
WeTextProcessing==1.0.3