Skip to content

Commit 0692b1c

Browse files
committed
OpenVINO integration for CausalLM models
1 parent 6645b7a commit 0692b1c

File tree

4 files changed

+2905
-1598
lines changed

4 files changed

+2905
-1598
lines changed

Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,8 @@ COPY server/Makefile server/Makefile
154154
# Install server
155155
COPY proto proto
156156
COPY server server
157-
RUN cd server && \
158-
make gen-server && \
159-
pip install ".[accelerate]" --no-cache-dir
157+
# RUN --mount=type=cache,target=/root/.cache/pip cd server && make gen-server && pip install ".[accelerate, openvino]"
158+
RUN cd server && make gen-server && pip install ".[accelerate, openvino]" --no-cache-dir
160159

161160
# Patch codegen model changes into transformers 4.34
162161
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
@@ -275,7 +274,8 @@ COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-*
275274
# Install server
276275
COPY proto proto
277276
COPY server server
278-
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu, quantize]" --no-cache-dir
277+
# RUN --mount=type=cache,target=/root/.cache/pip cd server && make gen-server && pip install ".[accelerate, openvino]"
278+
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu, openvino, quantize]" --no-cache-dir
279279

280280
# Patch codegen model changes into transformers 4.34.0
281281
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py

0 commit comments

Comments
 (0)