hermes-router/charts/epplib/values.yaml-代码预览-hermes-router:基于 K8s GIE 框架的智能路由 EPP 组件 - AtomGit

Llileqichore: sync deployment charts and example profiles
latencyPredictor:
  enabled: false
  # Training Server Configuration
  trainingServer:
    image:
      registry: registry.k8s.io
      repository: gateway-api-inference-extension/latency-training-server
      tag: v1.5.0
      pullPolicy: Always
    port: 8000
    resources:
      requests:
        cpu: "2000m"
        memory: "4Gi"
      limits:
        cpu: "4000m"
        memory: "8Gi"
    livenessProbe:
      httpGet:
        path: /healthz
        port: 8000
      initialDelaySeconds: 30
      periodSeconds: 20
    readinessProbe:
      httpGet:
        path: /readyz
        port: 8000
      initialDelaySeconds: 45
      periodSeconds: 10
    volumeSize: "20Gi"
    config:
      LATENCY_RETRAINING_INTERVAL_SEC: "10"
      LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
      LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
      LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
      LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
      LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
      LATENCY_MODEL_TYPE: "xgboost"
      LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "500"
      LATENCY_OBJECTIVE_TYPE: "mean"

  # Prediction Server Configuration
  predictionServers:
    count: 1
    startPort: 8001
    image:
      registry: registry.k8s.io
      repository: gateway-api-inference-extension/latency-prediction-server
      tag: v1.5.0
      pullPolicy: Always
    resources:
      requests:
        cpu: "8000m"
        memory: "4Gi"
      limits:
        cpu: "28000m"
        memory: "8Gi"
    livenessProbe:
      httpGet:
        path: /healthz
      initialDelaySeconds: 15
      periodSeconds: 15
      timeoutSeconds: 5
      failureThreshold: 5
    readinessProbe:
      httpGet:
        path: /readyz
      initialDelaySeconds: 10
      periodSeconds: 10
      timeoutSeconds: 5
      failureThreshold: 30
    volumeSize: "10Gi"
    config:
      LATENCY_MODEL_TYPE: "xgboost"
      PREDICT_HOST: "0.0.0.0"
      LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
      LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
      LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
      LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
      UVICORN_WORKERS: "28"
      OMP_NUM_THREADS: "1"
      MODEL_SYNC_INTERVAL_SEC: "30"
      LATENCY_OBJECTIVE_TYPE: "mean"

  # EPP Environment Variables for Latency Predictor
  eppEnv:
    LATENCY_MAX_SAMPLE_SIZE: "10000"
    LATENCY_MAX_CONCURRENT_DISPATCHES: "36"
    LATENCY_COALESCE_WINDOW_MS: "1"