一、配置清单
1、14700KF
2、RTX 4090 x 2
3、内存ddr5 32G x4
5、主板华硕 Z790
6、硬盘2T固态系统、4T企业
二、BIOS配置调整
2.1 PCI接口确认
lspci | grep -i nvidia
01:00.0 VGA compatible controller: NVIDIA Corporation AD102 [GeForce RTX 4090] (rev a1)
07:00.0 VGA compatible controller: NVIDIA Corporation AD102 [GeForce RTX 4090] (rev a1)
lspci -vvv -s 01:00.0 | grep LnkSta
lspci -vvv -s 07:00.0 | grep LnkSta
LnkSta: Speed 2.5GT/s (downgraded), Width x16
LnkSta: Speed 2.5GT/s (downgraded), Width x4 (downgraded)
已确认PCI通道x16 x4的、需要一会在BIOS里面调中 x8 x8的。
2.2 确认是否开启 Above 4G Decoding
cat /proc/cmdline | grep -i “pci=assign-busses|enable_4g_decoding”
2.3 电源相关 关闭ASPM(链路节能)和CEP,解除功耗限制。
三、驱动安装
apt update
ubuntu-drivers autoinstall
nvidia-smi
四、软件安装
docker 安装
apt install -y apt-transport-https ca-certificates curl software-properties-common curl
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
apt update
apt install -y docker-ce docker-ce-cli containerd.io
安装NVIDIA Container Toolkit
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed "s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g" | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update
apt-get install -y nvidia-container-toolkit
配置并启动Docker
nvidia-ctk runtime configure --runtime=docker
systemctl enable docker
systemctl start docker
cat docker-compose.yml
services:
vllm:
image: vllm/vllm-openai:v0.8.1
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 2
capabilities: [gpu]
ports:
- "172.17.0.1:8001:8000"
volumes:
- /home/kairui/models:/models
- /dev/shm:/dev/shm
logging:
driver: "json-file"
options:
max-size: "1g"
max-file: "10"
environment:
- HF_HOME=/models
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=all
- CUDA_VISIBLE_DEVICES=0,1
- PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
command: [
"--model", "/models/DeepSeek-R1-Distill-Qwen-14B",
"--served-model-name", "deepseek-r1",
"--tensor-parallel-size", "2",
"--gpu-memory-utilization", "0.85",
"--dtype", "float16",
"--max-model-len", "32768",
"--max-num-seqs", "50",
"--api-key", "aaaa"
]
xinf:
image: xprobe/xinference:v1.4.1
restart: unless-stopped
volumes:
- /home/kairui/.huggingface:/root/.cache/huggingface/
- /data0/aicore/xinf/:/srv/
logging:
driver: "json-file"
options:
max-size: "1g"
max-file: "10"
ports:
- "172.17.0.1:8003:8000"
entrypoint: ["/bin/bash", "/srv/entrypoint.sh"]
command: [
"xinference-local","-H0.0.0.0","-p8000"
]
通过Docker hugge
docker run --rm -it -v ~/.cache/huggingface:/root/.cache/huggingface python:3.10-slim bash -c "pip install -U 'huggingface_hub[cli]' && huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-32B --local-dir /root/.cache/huggingface/DeepSeek-R1-Distill-Qwen-32B/"
通过Scp下载远程文件
sshpass -v -p "abc" scp -P 15022 -vr abc@pan.blockelite.cn:/ /root/