vllm serve {model} \
  --host {host} \
  --port {port} \
  --max-model-len {max_model_len} \
  --served-model-name {model} \
  --tensor-parallel-size {tp_size} \
  --disable-log-requests \
  --uvicorn-log-level info \
  {eager_flag} {dtype_flag} {quantization_flag}