forked from justinthelaw/k3d-gpu-support
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
72 lines (52 loc) · 2.64 KB
/
Dockerfile
File metadata and controls
72 lines (52 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# ORIGINAL TUTORIAL: https://k3d.io/v5.6.0/usage/advanced/cuda/#dockerfile
# MODIFIED IMPLEMENTATION: https://github.com/k3d-io/k3d/issues/1108#issue-1315509856
# "DIFF:" comments explain differences between tutorial and this modified implementation
# DIFF: updated base image to most recent k3s version
ARG K3S_TAG="v1.27.4-k3s1"
FROM rancher/k3s:$K3S_TAG as k3s
# DIFF: updated base image to most recent CUDA and base OS version combination
FROM nvidia/cuda:12.2.0-base-ubuntu22.04
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
RUN apt-get update && \
apt-get -y install gnupg2 curl
# Install NVIDIA Container Runtime
RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | apt-key add -
# DIFF: changed base OS for runtime grab
RUN curl -s -L https://nvidia.github.io/nvidia-container-runtime/ubuntu22.04/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list
# DIFF: grab necessary NVIDIA toolkit and deps for base image's CUDA version - NVIDIA_CONTAINER_RUNTIME_VERSION arg is deprecated as well
RUN apt-get update && \
apt-get -y install nvidia-container-toolkit-base nvidia-container-toolkit nvidia-container-runtime util-linux
# DIFF: different mount calls than the original k3s image, deliberate k3s deps copy
COPY --from=k3s /bin/* /bin/
RUN rm /usr/bin/mount
COPY --from=k3s /bin/sh /usr/bin/sh
COPY --from=k3s /bin/sh /bin/sh
COPY --from=k3s /etc /etc
COPY --from=k3s /bin/k3s /bin/k3s
COPY --from=k3s /bin/aux /bin/aux
COPY --from=k3s /lib/modules /lib/modules
COPY --from=k3s /run /run
COPY --from=k3s /lib/firmware /lib/firmware
# DIFF: need to set CRI variable
ENV CRI_CONFIG_FILE=/var/lib/rancher/k3s/agent/etc/crictl.yam
RUN mkdir -p /etc && \
echo 'hosts: files dns' > /etc/nsswitch.conf
RUN chmod 1777 /tmp
# Provide custom containerd configuration to configure the nvidia-container-runtime
RUN mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/
# DIFF: used MODIFIED IMPLEMENTATION config
COPY config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
# Deploy the nvidia driver plugin on startup
RUN mkdir -p /var/lib/rancher/k3s/server/manifests
# DIFF: using the updated NVIDIA device plugin daemonset: https://github.com/NVIDIA/k8s-device-plugin/
COPY device-plugin-daemonset.yaml /var/lib/rancher/k3s/server/manifests/nvidia-device-plugin-daemonset.yaml
VOLUME /var/lib/kubelet
VOLUME /var/lib/rancher/k3s
VOLUME /var/lib/cni
VOLUME /var/log
# DIFF: resolve fsnotify issues
RUN sysctl -w fs.inotify.max_user_watches=100000
RUN sysctl -w fs.inotify.max_user_instances=100000
ENV PATH="$PATH:/bin/aux"
ENTRYPOINT ["/bin/k3s"]
CMD ["agent"]