环境配置
主机名 | IP | k8s版本 | 备注 | 操作系统 | nvidia-smi版本 | cuda版本 | 显卡 |
---|---|---|---|---|---|---|---|
master1 | 192.168.10.32 | v1.28.0 | master1 | 22.04 LTS | 550.120 | 12.4 | NVIDIA A10 |
node1 | 192.168.10.33 | v1.28.0 | node1 | 22.04 LTS | 550.120 | 12.4 | NVIDIA A10 |
一.安装显卡驱动
1.查看显卡
root@gpu:~# apt update
root@gpu:~# lspci | grep -i nvidia
00:07.0 3D controller: NVIDIA Corporation GA102GL [A10] (rev a1)
2.安装驱动
#推荐我们使用550驱动
root@gpu:~# ubuntu-drivers devices
model : GA102GL [A10]
driver : nvidia-driver-550 - distro non-free recommended
#安装550驱动
apt install -y nvidia-driver-550
3.查看安装好的驱动
root@gpu:~# nvidia-smi
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120 Driver Version: 550.120 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A10 Off | 00000000:00:07.0 Off | 0 |
| 0% 40C P0 5W / 150W | 1MiB / 23028MiB | 0% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
二.安装cuda toolkit
因为安装的nvidia版本是550,所以我们安装cuda是对应的12.4
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
dpkg -i cuda-keyring_1.1-1_all.deb
apt-get update
apt-get -y install cuda-toolkit-12-4
root@gpu:~# vim /etc/profile
export PATH=$PATH:/usr/local/cuda-12.4/bin/
root@gpu:~# source /etc/profile
三.重启
reboot
四.安装k8s
1.配置主机名
root@master1:~# hostnamectl set-hostname master1
root@node1:~# hostnamectl set-hostname node1
2.配置host文件
root@master1:~# cat >> /etc/hosts << EOF
192.168.10.32 master1
192.168.10.33 node1
EOF
3.系统优化
#转发 IPv4 并让 iptables 看到桥接流量
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
# 设置所需的 sysctl 参数,参数在重新启动后保持不变
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# 应用 sysctl 参数而不重新启动
sudo sysctl --system
# 临时关闭;关闭swap主要是为了性能考虑
swapoff -a
# 永久关闭
sed -ri 's/.*swap.*/#&/' /etc/fstab
4.安装containerd
4.1安装nvidia-container-toolkit
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update
apt-get install -y nvidia-container-toolkit
4.2安装containerd
apt update
apt install -y containerd
# 生成containetd的配置文件
mkdir -p /etc/containerd/
containerd config default | sudo tee /etc/containerd/config.toml >/dev/null 2>&1
#使用nvidia-ctk配置容器运行时为containerd
nvidia-ctk runtime configure --runtime=containerd
# 修改/etc/containerd/config.toml,修改SystemdCgroup为true
sed -i "s#SystemdCgroup\ \=\ false#SystemdCgroup\ \=\ true#g" /etc/containerd/config.toml
cat /etc/containerd/config.toml | grep SystemdCgroup
# 修改沙箱镜像源
sed -i "s#registry.k8s.io/pause#registry.cn-hangzhou.aliyuncs.com/google_containers/pause#g" /etc/containerd/config.toml
cat /etc/containerd/config.toml | grep sandbox_image
#修改containerd镜像拉取
vim /etc/containerd/config.toml
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = ""
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://teut9a0h.mirror.aliyuncs.com"]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."k8s.gcr.io"]
endpoint = ["registry.aliyuncs.com/google_containers"]
修改runtime为nvidia
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
root@master1:~# systemctl restart containerd
4.3安装crictl
#添加 apt key
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | sudo apt-key add -
#添加Kubernetes的apt源为阿里云的源
apt-add-repository "deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main"
# 安装crictl工具
apt -y install cri-tools
# 生成配置文件
crictl config runtime-endpoint
# 编辑配置文件
cat << EOF | tee /etc/crictl.yaml
runtime-endpoint: "unix:///run/containerd/containerd.sock"
image-endpoint: "unix:///run/containerd/containerd.sock"
timeout: 10
debug: false
pull-image-on-create: false
disable-pull-on-run: false
EOF
5.安装k8s组件
apt install -y kubelet=1.28.0-00 kubeadm=1.28.0-00 kubectl=1.28.0-00
systemctl enable kubelet && systemctl start kubelet
6.初始化master(master1节点操作)
#查看镜像
kubeadm config images list --kubernetes-version=v1.28.0 --image-repository=registry.aliyuncs.com/google_containers
#下载镜像
kubeadm config images pull --kubernetes-version=v1.28.0 --image-repository=registry.aliyuncs.com/google_containers
#初始化k8s
kubeadm init \
--apiserver-advertise-address=192.168.10.32 \
--image-repository registry.aliyuncs.com/google_containers \
--kubernetes-version v1.28.0 \
--service-cidr=10.96.0.0/12 \
--pod-network-cidr=10.244.0.0/16 \
--ignore-preflight-errors=all
拷贝kubectl使用的k8s认证文件到默认路径
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
export KUBECONFIG=/etc/kubernetes/admin.conf
7.node节点加入集群(node1节点操作)
kubeadm join 192.168.10.32:6443 --token 7hwuxh.bvfsd9kia04nx7qq \
--discovery-token-ca-cert-hash sha256:190a72151f58f3632633ca377ae8d2f1517d5bbf401f390a3be67fd091185ac3
8.安装flannel网络插件
wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml
kubectl apply -f kube-flannel.yml
9.查看所有节点
root@master1:~# kubectl get node
NAME STATUS ROLES AGE VERSION
master1 Ready control-plane 15m v1.28.0
node1 Ready <none> 15m v1.28.0
五.安装nvidia-device-plugin插件
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.1/nvidia-device-plugin.yml
root@master1:~# kubectl describe node master1| grep nvidia.com/gpu
nvidia.com/gpu: 1
nvidia.com/gpu: 1
nvidia.com/gpu 0 0
root@master1:~# kubectl describe node node1| grep nvidia.com/gpu
nvidia.com/gpu: 1
nvidia.com/gpu: 1
nvidia.com/gpu 0 0
六.部署deepseek
1.下载镜像
#官方镜像
vllm/vllm-openai:latest
ghcr.io/open-webui/open-webui:cuda
#自建阿里云镜像仓库
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
crictl pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
crictl pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
2.下载deepseek-R1-1.5B模型(二选一)
2.1modelscope
https://modelscope.cn/home
mkdir /root/model
pip install modelscope setuptools
modelscope download --model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local_dir /root/model
2.2huggingface
https://huggingface.co/
pip install huggingface_hub
mkdir /root/model
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download --resume-download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local-dir /root/model
3.部署deepseek
kubectl create ns deepseek
vim deepseek.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: deepseek-vllm
namespace: deepseek
spec:
replicas: 1
selector:
matchLabels:
app: deepseek-vllm
template:
metadata:
labels:
app: deepseek-vllm
spec:
nodeName: node1
containers:
- name: vllm
image: registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
args: [
"--served-model-name", "DeepSeek-R1:1.5B",
"--trust-remote-code",
"--enforce-eager",
"--enable-chunked-prefill",
"--gpu-memory-utilization", "0.95",
"--model", "/app/model/",
"--host", "0.0.0.0",
"--port", "8000",
"--api-key", "12345678",
"--tokenizer", "/app/model/",
"--tensor-parallel-size", "1"
]
ports:
- containerPort: 8000
resources:
requests:
nvidia.com/gpu: "1"
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: data
mountPath: /app/model
volumes:
- name: data
hostPath:
path: /root/model
---
apiVersion: v1
kind: Service
metadata:
name: vllm-api
namespace: deepseek
spec:
selector:
app: vllm
type: ClusterIP
ports:
- name: api
protocol: TCP
port: 8000
targetPort: 8000
kubectl apply -f deepseek.yaml
4.部署webui
apiVersion: apps/v1
kind: Deployment
metadata:
name: webui
namespace: deepseek
spec:
replicas: 1
selector:
matchLabels:
app: webui
template:
metadata:
labels:
app: webui
spec:
nodeName: master1
containers:
- name: webui
image: registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
env:
- name: OPENAI_API_BASE_URL
value: http://vllm-api:8000/v1
- name: ENABLE_OLLAMA_API
value: "False"
- name: OPENAI_API_KEYS
value: "12345678"
- name: USE_CUDA_DOCKER
value: "true"
ports:
- containerPort: 8080
---
apiVersion: v1
kind: Service
metadata:
name: webui
namespace: deepseek
labels:
app: webui
spec:
type: ClusterIP
ports:
- port: 8080
protocol: TCP
targetPort: 8080
selector:
app: webui
5.访问接口测试
curl --location 'http://10.244.1.27:8000/v1/chat/completions' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer 12345678' \
--data '{
"model":"DeepSeek-R1:1.5B",
"messages": [
{
"role": "user",
"content": "你是谁?"
}
]
}'
评论区