|
| 1 | +from concurrent.futures import ProcessPoolExecutor |
| 2 | +from concurrent.futures.process import BrokenProcessPool |
| 3 | +from threading import Event |
| 4 | +from threading import Thread |
| 5 | +from time import sleep |
| 6 | +from urllib import parse |
| 7 | + |
| 8 | +import requests |
1 | 9 | from fastapi import Depends, FastAPI
|
2 | 10 | from fastapi.staticfiles import StaticFiles
|
3 | 11 | from starlette.middleware import Middleware
|
|
15 | 23 | from app.dependencies import USER_DEPENDENCY, api_service_dependency
|
16 | 24 | from app.middlewares.token_validator import access_control
|
17 | 25 | from app.middlewares.trusted_hosts import TrustedHostMiddleware
|
18 |
| -from app.routers import auth, index, services, users, user_services, websocket |
| 26 | +from app.routers import auth, index, services, user_services, users, websocket |
19 | 27 | from app.shared import Shared
|
20 | 28 | from app.utils.chat.managers.cache import CacheManager
|
21 | 29 | from app.utils.js_initializer import js_url_initializer
|
22 | 30 | from app.utils.logger import api_logger
|
23 | 31 | from app.viewmodels.admin import ApiKeyAdminView, UserAdminView
|
24 | 32 |
|
25 | 33 |
|
| 34 | +def check_health(url: str) -> bool: |
| 35 | + try: |
| 36 | + schema = parse.urlparse(url).scheme |
| 37 | + netloc = parse.urlparse(url).netloc |
| 38 | + if requests.get(f"{schema}://{netloc}/health").status_code != 200: |
| 39 | + return False |
| 40 | + return True |
| 41 | + except Exception: |
| 42 | + return False |
| 43 | + |
| 44 | + |
| 45 | +def start_llama_cpp_server(): |
| 46 | + from app.start_llama_cpp_server import run |
| 47 | + |
| 48 | + api_logger.critical("Starting Llama CPP server") |
| 49 | + try: |
| 50 | + Shared().process_pool_executor.submit( |
| 51 | + run, |
| 52 | + terminate_event=Shared().process_terminate_signal, |
| 53 | + ) |
| 54 | + except BrokenProcessPool as e: |
| 55 | + api_logger.exception(f"Broken Llama CPP server: {e}") |
| 56 | + Shared().process_pool_executor.shutdown(wait=False) |
| 57 | + Shared().process_pool_executor = ProcessPoolExecutor() |
| 58 | + start_llama_cpp_server() |
| 59 | + except Exception as e: |
| 60 | + api_logger.exception(f"Failed to start Llama CPP server: {e}") |
| 61 | + |
| 62 | + |
| 63 | +def shutdown_llama_cpp_server(): |
| 64 | + api_logger.critical("Shutting down Llama CPP server") |
| 65 | + Shared().process_terminate_signal.set() |
| 66 | + |
| 67 | + |
| 68 | +def monitor_llama_cpp_server(config: Config, terminate_signal: Event) -> None: |
| 69 | + while not terminate_signal.is_set(): |
| 70 | + sleep(0.5) |
| 71 | + if config.llama_cpp_api_url: |
| 72 | + if not check_health(config.llama_cpp_api_url): |
| 73 | + if config.is_llama_cpp_booting or terminate_signal.is_set(): |
| 74 | + continue |
| 75 | + api_logger.error("Llama CPP server is not available") |
| 76 | + config.llama_cpp_available = False |
| 77 | + config.is_llama_cpp_booting = True |
| 78 | + start_llama_cpp_server() |
| 79 | + else: |
| 80 | + config.is_llama_cpp_booting = False |
| 81 | + config.llama_cpp_available = True |
| 82 | + shutdown_llama_cpp_server() |
| 83 | + |
| 84 | + |
26 | 85 | def create_app(config: Config) -> FastAPI:
|
27 | 86 | # Initialize app & db & js
|
28 | 87 | new_app = FastAPI(
|
@@ -132,11 +191,38 @@ async def startup():
|
132 | 191 | except ImportError:
|
133 | 192 | api_logger.critical("uvloop not installed!")
|
134 | 193 |
|
| 194 | + if config.llama_cpp_api_url: |
| 195 | + # Start Llama CPP server monitoring |
| 196 | + api_logger.critical("Llama CPP server monitoring started!") |
| 197 | + Shared().thread = Thread( |
| 198 | + target=monitor_llama_cpp_server, |
| 199 | + args=(config, Shared().thread_terminate_signal), |
| 200 | + ) |
| 201 | + Shared().thread.start() |
| 202 | + |
135 | 203 | @new_app.on_event("shutdown")
|
136 | 204 | async def shutdown():
|
137 | 205 | # await CacheManager.delete_user(f"testaccount@{HOST_MAIN}")
|
138 |
| - Shared().process_manager.shutdown() |
139 |
| - Shared().process_pool_executor.shutdown() |
| 206 | + Shared().thread_terminate_signal.set() |
| 207 | + Shared().process_terminate_signal.set() |
| 208 | + |
| 209 | + process_manager = Shared()._process_manager |
| 210 | + if process_manager is not None: |
| 211 | + process_manager.shutdown() |
| 212 | + |
| 213 | + process_pool_executor = Shared()._process_pool_executor |
| 214 | + if process_pool_executor is not None: |
| 215 | + process_pool_executor.shutdown(wait=False) |
| 216 | + |
| 217 | + process = Shared()._process |
| 218 | + if process is not None: |
| 219 | + process.terminate() |
| 220 | + process.join() |
| 221 | + |
| 222 | + thread = Shared()._thread |
| 223 | + if thread is not None: |
| 224 | + thread.join() |
| 225 | + |
140 | 226 | await db.close()
|
141 | 227 | await cache.close()
|
142 | 228 | api_logger.critical("DB & CACHE connection closed!")
|
|
0 commit comments