#!/bin/bash
export YUANRONG_WORKER_PORT=32451
export ETCD_CLIENT_PORT=35440
export MODEL_PATH="/workspace/models/Qwen2.5-VL-7B-Instruct"
export PROXY_SERVER_SCRIPT="/workspace/path/vllm_ascend/vllm-ascend/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
export LOG_PATH="/workspace/path/vllm_ascend/logs"
export VLLM_LOG_FILE="${LOG_PATH}/vllm_log.txt"
export DATASYSTEM_CLIENT_LOG_DIR="${LOG_PATH}/client_logs"
export VISIBLE_DEVICES="0,1,2,3"
export TENSOR_PARALLEL_SIZE=2
export GPU_MEMORY_UTILIZATION=0.9
export MAX_NUM_BATCHED_TOKENS=45000
export SHARED_MEMORY_MB=614400
export LD_LIBRARY_PATH=/usr/local/python3.11.13/lib/python3.11/site-packages/datasystem/lib:$LD_LIBRARY_PATH
export USING_PREFIX_CONNECTOR=0
export VLLM_USE_V1=1
export HCL_OP_EXPANSION_MODE="AIV"
export MERGED_HOST_IP="192.168.0.1"
export MERGED_ETCD_IP="${MERGED_HOST_IP}"
export MERGED_VLLM_PORT=18300
export MERGED_KV_CONFIG='{"kv_connector":"YuanRongConnector","kv_role":"kv_both"}'
export MERGED_VLLM_EXTRA_ARGS=()
export MERGED_VLLM_EXTRA_ARGS=(
"--no-enable-prefix-caching"
"--seed" "1024"
"--served-model-name" "qwen25vl"
"--max-num-seqs" "400"
"--max-model-len" "30000"
"--max-num-batched-tokens" "40000"
"--trust-remote-code"
"--allowed-local-media-path" "$MODEL_PATH"
"--additional-config" '{"torchair_graph_config":{"enabled":false},"ascend_scheduler_config":{"enabled":true,"enable_chunked_prefill":false}}'
)
export PRIMARY_HOST_IP="192.168.0.1"
export PRIMARY_ETCD_IP="${PRIMARY_HOST_IP}"
export PRIMARY_VLLM_PORT=18300
export PRIMARY_PROXY_PORT=18500
export PRIMARY_KV_CONFIG='{"kv_connector":"YuanRongConnector","kv_role":"kv_producer"}'
export PRIMARY_VLLM_EXTRA_ARGS=(
"--no-enable-prefix-caching"
"--seed" "1024"
"--served-model-name" "qwen25vl"
"--max-num-seqs" "400"
"--max-model-len" "30000"
"--max-num-batched-tokens" "40000"
"--trust-remote-code"
"--allowed-local-media-path" "$MODEL_PATH"
"--additional-config" '{"torchair_graph_config":{"enabled":false},"ascend_scheduler_config":{"enabled":true,"enable_chunked_prefill":false}}'
)
export SECONDARY_HOST_IP="192.168.0.2"
export SECONDARY_VLLM_PORT=18300
export SECONDARY_KV_CONFIG='{"kv_connector":"YuanRongConnector","kv_role":"kv_consumer"}'
export SECONDARY_VLLM_EXTRA_ARGS=()
export SECONDARY_VLLM_EXTRA_ARGS=(
"--no-enable-prefix-caching"
"--seed" "1024"
"--served-model-name" "qwen25vl"
"--max-num-seqs" "400"
"--max-model-len" "30000"
"--max-num-batched-tokens" "40000"
"--trust-remote-code"
"--allowed-local-media-path" "$MODEL_PATH"
"--additional-config" '{"torchair_graph_config":{"enabled":false,"enable_multistream_shared_expert":false},"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
)
export PROXY_PREFILL_HOST="${PRIMARY_HOST_IP}"
export PROXY_PREFILL_PORT="${PRIMARY_VLLM_PORT}"
export PROXY_DECODE_HOST="${SECONDARY_HOST_IP}"
export PROXY_DECODE_PORT="${SECONDARY_VLLM_PORT}"