环境配置
IP | nvidia-smi版本 | cuda版本 | 操作系统 | 显卡 |
---|---|---|---|---|
192.168.10.30 | 550.120 | 12.4 | 22.04 LTS | NVIDIA A10 |
一.安装显卡驱动
1.查看显卡
root@gpu:~# apt update
root@gpu:~# lspci | grep -i nvidia
00:07.0 3D controller: NVIDIA Corporation GA102GL [A10] (rev a1)
2.安装驱动
#推荐我们使用550驱动
root@gpu:~# ubuntu-drivers devices
model : GA102GL [A10]
driver : nvidia-driver-550 - distro non-free recommended
#安装550驱动
apt install -y nvidia-driver-550
3.查看安装好的驱动
root@gpu:~# nvidia-smi
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120 Driver Version: 550.120 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A10 Off | 00000000:00:07.0 Off | Off |
| 0% 36C P8 10W / 150W | 1MiB / 24564MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
二.安装cuda toolkit
因为安装的nvidia版本是550,所以我们安装cuda是对应的12.4
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
dpkg -i cuda-keyring_1.1-1_all.deb
apt-get update
apt-get -y install cuda-toolkit-12-4
root@gpu:~# vim /etc/profile
export PATH=$PATH:/usr/local/cuda-12.4/bin/
root@gpu:~# source /etc/profile
三.重启
reboot
四.安装docker
apt install -y apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
apt update
apt install -y docker-ce docker-ce-cli containerd.io
systemctl start docker
systemctl enable docker
五.安装 nvidia-container-toolkit
1.安装nvidia-container-toolkit
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update
apt-get install -y nvidia-container-toolkit
2.使用nvidia-ctk配置容器运行时为docker
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
3.配置镜像仓库
root@gpu:~# vim /etc/docker/daemon.json
{
"registry-mirrors": [
"https://docker.211678.top",
"https://docker.1panel.live",
"https://hub.rat.dev",
"https://docker.m.daocloud.io",
"https://do.nark.eu.org",
"https://dockerpull.com",
"https://dockerproxy.cn",
"https://docker.awsl9527.cn"
],
"runtimes": {
"nvidia": {
"args": [],
"path": "nvidia-container-runtime"
}
}
}
root@gpu:~# systemctl daemon-reload
root@gpu:~# systemctl restart docker
六.部署deepseek-R1-7B
1.拉取vllm镜像
#官方镜像
vllm/vllm-openai:latest
ghcr.io/open-webui/open-webui:cuda
#自建阿里云镜像仓库
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
docker pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
docker pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
2.下载deepseek-R1-7B模型(二选一)
2.1modelscope
https://modelscope.cn/home
mkdir /root/model
pip install modelscope setuptools
modelscope download --model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --local_dir /root/model
2.2huggingface
https://huggingface.co/
pip install huggingface_hub
mkdir /root/model
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download --resume-download deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --local-dir /root/model
3.部署vllm-openai
https://github.com/docker/compose/releases
chmod +x /usr/local/bin/docker-compose
root@gpu:~# vim docker-compose.yml
version: '3'
services:
vllm_service:
image: registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
container_name: vllm_deepseek_7b
restart: always
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ports:
- "8000:8000"
volumes:
- /root/model:/app/model
command: [
"--served-model-name", "DeepSeek-R1:7B",
"--trust-remote-code",
"--enforce-eager",
"--enable-chunked-prefill",
"--gpu-memory-utilization", "0.9",
"--model", "/app/model/",
"--host", "0.0.0.0",
"--port", "8000",
"--max-model-len", "10000",
"--api-key", "12345678",
"--tokenizer", "/app/model/",
"--tensor-parallel-size", "1"
]
root@gpu:~# docker-compose up -d
4.部署open-webui
docker run -d \
--name openwebui-container \
--network host \
--gpus all \
-e OPENAI_API_BASE_URL=http://192.168.10.30:8000/v1 \
-e OPENAI_API_KEYS=12345678 \
-e USE_CUDA_DOCKER=true \
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
5.网页查看
6.网页问问题后查看日志
7.查看显卡使用率
root@gpu:~# nvidia-smi
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120 Driver Version: 550.120 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A10 On | 00000000:00:07.0 Off | Off |
| 0% 59C P0 149W / 150W | 21805MiB / 24564MiB | 98% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| 0 N/A N/A 15903 C python3 21796MiB |
+-----------------------------------------------------------------------------------------+
评论区