-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathdockerfile-amd
More file actions
129 lines (92 loc) · 4.41 KB
/
dockerfile-amd
File metadata and controls
129 lines (92 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Use Ubuntu as the base image
FROM ubuntu:22.04
RUN apt-get update && \
apt-get install -y openssh-server sudo build-essential curl git wget vim && \
rm -rf /var/lib/apt/lists/*
# Download and install Node.js v18.20.6 (with npm v10.8.2)
RUN curl -fsSL https://deb.nodesource.com/node_18.x/pool/main/n/nodejs/nodejs_18.20.6-1nodesource1_amd64.deb -o nodejs.deb && \
dpkg -i nodejs.deb && \
rm -f nodejs.deb /var/lib/apt/lists/*
# Create the SSH daemon run directory.
RUN mkdir /var/run/sshd
# Set the root password and update SSH config to permit root login.
RUN echo 'root:123' | chpasswd && \
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN mkdir -p /workspace
# Install Anaconda3:
RUN wget https://repo.anaconda.com/archive/Anaconda3-2024.02-1-Linux-x86_64.sh -O anaconda.sh && \
bash anaconda.sh -b -p /opt/conda && \
rm anaconda.sh
# Create Kolo env
RUN /opt/conda/bin/conda create -y --name kolo_env python=3.10
# Run Kolo env
SHELL ["/opt/conda/bin/conda", "run", "-n", "kolo_env", "/bin/bash", "-c"]
RUN conda config --set remote_read_timeout_secs 86400
# Install torchtune and related libraries with fixed versions.
RUN pip install torch==2.6.0
RUN pip install torchvision==0.21.0
RUN pip install torchao==0.8.0
RUN pip install torchtune==0.5.0
# === ROCm Setup for AMD GPU Support ===
# Install PyTorch with ROCm support using pip from the ROCm pre-release channel.
RUN pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/rocm5.4
# Attempt to install torchaudio (if available); otherwise, print a message.
RUN pip install --pre torchaudio --index-url https://download.pytorch.org/whl/rocm5.4 || echo "torchaudio not available for ROCm 5.4"
# Add the ROCm repository and install AMD GPU dependencies.
RUN apt-get update && apt-get install -y wget gnupg2 && \
wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.4.2/ ubuntu main' | tee /etc/apt/sources.list.d/rocm.list && \
apt-get update && apt-get install -y rocm-libs hip-runtime-amd && apt-get clean
# =======================================
# Set a long timeout for pip commands.
RUN pip config set global.timeout 86400
# Install packages with exact version pins.
RUN pip install numpy==2.2.3 datasets==3.3.2
# Install unsloth from a specific commit.
RUN pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git@038e6d4c8d40207a87297ab3aaf787c19b1006d1"
# Install additional ML/utility packages with version pins.
RUN pip install --no-deps trl==0.14.0 peft==0.14.0 accelerate==1.4.0 bitsandbytes==0.45.3
# Freeze transformers version.
RUN pip install transformers==4.49.0
# Upgrade Xformers to a specific version.
RUN pip install xformers==0.0.29.post3 --index-url https://download.pytorch.org/whl/cu124
# Install OpenAI with a fixed version.
RUN pip install openai==1.64.0
# Create Open-webui env
RUN /opt/conda/bin/conda create -y --name openwebui_env python=3.11
# Run openwebui env
SHELL ["/opt/conda/bin/conda", "run", "-n", "openwebui_env", "/bin/bash", "-c"]
# Install Open-webui
RUN pip install git+https://github.com/open-webui/open-webui.git@b72150c881955721a63ae7f4ea1b9ea293816fc1
SHELL ["/bin/bash", "-c"]
# Install Ollama.
RUN curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION=0.5.12 sh
# Set the working directory.
WORKDIR /app
# Create a volume for persistent data.
VOLUME /var/kolo_data
RUN apt-get update && \
apt-get install -y openssh-server supervisor && \
rm -rf /var/lib/apt/lists/*
# Copy the supervisor configuration file.
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
# Init the Conda env.
RUN /opt/conda/bin/conda init bash
# Update ~/.bashrc to auto-activate the kolo_env.
RUN echo '# activate conda env' | tee -a ~/.bashrc
RUN echo 'conda activate kolo_env' | tee -a ~/.bashrc
RUN echo '' | tee -a ~/.bashrc
# Expose necessary ports.
EXPOSE 22 8080
RUN apt-get update && apt-get install -y cmake && apt-get clean
RUN git clone https://github.com/ggerganov/llama.cpp && \
cd llama.cpp && \
git checkout a82c9e7c23ef6db48cebfa194dc9cebbc4ac3552 && \
cmake -B build && \
cmake --build build --config Release
RUN mv llama.cpp/build/bin/llama-quantize llama.cpp/
# Copy scripts and torchtune configurations.
COPY scripts /app/
COPY torchtune /app/torchtune
# Set the entrypoint to start supervisord.
CMD ["/usr/bin/supervisord"]