import os
import json
import time
from executor.exec_command import CommandExecutor
SERVER_STARTUP_TIMEOUT_SECONDS = 600
WAIT_AFTER_CONFIG_CHANGE_SECONDS = 10
class ExecVLLMServer(CommandExecutor):
def __init__(
self,
model_path=None,
prof_config_path=None,
port=None,
extra_args=None,
env=None,
startup_timeout=SERVER_STARTUP_TIMEOUT_SECONDS,
):
super().__init__()
self.model_path = model_path or "/data/Qwen2.5-0.5B-Instruct"
self.prof_config_path = prof_config_path or os.environ.get("SERVICE_PROF_CONFIG_PATH", "")
self.port = port
self.extra_args = extra_args or []
self.env = env or {}
self.startup_timeout = startup_timeout
def ready_go(self):
command = ["vllm", "serve", self.model_path]
if self.port is not None:
command.extend(["--port", str(self.port)])
command.extend(self.extra_args)
self.execute(command, env=self.env)
exit_code, has_output = self.wait("Application startup complete.", timeout=self.startup_timeout)
print(f"vLLM startup wait result: exit_code={exit_code}, status={has_output}, timeout={self.startup_timeout}s")
return exit_code is None and has_output == 0
def change_vllm_profiler_config(self):
with open(self.prof_config_path, "r", encoding="utf-8") as f:
config = json.load(f)
config['enable'] = 1
with open(self.prof_config_path, "w", encoding="utf-8") as f:
json.dump(config, f, indent=4)
time.sleep(WAIT_AFTER_CONFIG_CHANGE_SECONDS)