Tutorial: deploying vllm inference on instant cluster using ray
apt install python3-venv
curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env # or restart shell
uv python install 3.12
uv venv --python 3.12
source .venv/bin/activate
##create pyproject.toml and add dependencies to it
cat << EOF >> pyproject.toml
[project]
name = "vllm-ray"
version = "1.0.0"
dependencies = [
"ray==2.52.0",
"vllm",
]
[tool.uv.sources]
vllm = { url = "https://github.com/vllm-project/vllm/releases/download/v0.12.0/vllm-0.12.0+cu130-cp38-abi3-manylinux_2_31_x86_64.whl" }
[[tool.uv.index]]
url = "https://download.pytorch.org/whl/cu130"
EOF
uv sync --index-strategy unsafe-best-matchLast updated
Was this helpful?