-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
140 lines (114 loc) · 4.68 KB
/
Dockerfile.gpu
File metadata and controls
140 lines (114 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# GPU-enabled Dockerfile for WriteBot
# Optimized for NVIDIA RTX 50 series (Blackwell) and RTX 40/30 series GPUs
# Uses CUDA 12.6+ for TensorFlow 2.18 compatibility
#
# GPU Requirements:
# - TensorFlow 2.18 requires CUDA 12.5+ and cuDNN 9.3+
# - Blackwell/RTX 50 series works best with CUDA 12.8+ for native kernels
# - Host needs NVIDIA driver 565+ for Blackwell GPUs
#=============================================================================
# BUILD ARGUMENTS - Customize these at build time
#=============================================================================
ARG CUDA_VERSION=12.8.1
ARG CUDNN_VERSION=9
ARG UBUNTU_VERSION=22.04
ARG PYTHON_VERSION=3.11
ARG GUNICORN_WORKERS=2
ARG GUNICORN_THREADS=4
ARG GUNICORN_TIMEOUT=180
# Build stage with CUDA support
FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS builder
# Re-declare ARG after FROM (Docker requirement)
ARG PYTHON_VERSION=3.11
# Prevent timezone prompt
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
# Set working directory
WORKDIR /app
# Install Python 3.11 from deadsnakes PPA and build dependencies
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone \
&& apt-get update && apt-get install -y software-properties-common \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python3-pip \
gcc \
g++ \
make \
libhdf5-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
# Create symlinks for python
RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \
ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
# Upgrade pip
RUN python -m pip install --upgrade pip
# Copy requirements and install Python dependencies
COPY requirements.txt .
# Install latest TensorFlow with CUDA support for RTX 50 series (Blackwell)
# Using latest available for best GPU support
# tf-keras provides Keras 2 compatibility for legacy RNN APIs
RUN pip install --no-cache-dir --user "tensorflow[and-cuda]>=2.19.0" "tensorflow-probability>=0.25.0" "tf-keras>=2.18.0"
# Install remaining dependencies
RUN pip install --no-cache-dir --user -r requirements.txt
# Production stage
ARG CUDA_VERSION=12.8.1
ARG CUDNN_VERSION=9
ARG UBUNTU_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
# Re-declare ARGs for production stage
ARG PYTHON_VERSION=3.11
ARG GUNICORN_WORKERS=2
ARG GUNICORN_THREADS=4
ARG GUNICORN_TIMEOUT=180
# Prevent timezone prompt
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
# Set working directory
WORKDIR /app
# Install Python 3.11 runtime from deadsnakes PPA
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone \
&& apt-get update && apt-get install -y software-properties-common \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Create symlinks for python
RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \
ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
# Copy Python dependencies from builder
COPY --from=builder /root/.local /root/.local
# Make sure scripts in .local are usable
ENV PATH=/root/.local/bin:$PATH
# Copy application code
COPY . .
# Create necessary directories
RUN mkdir -p webapp/instance webapp/logs webapp/job_storage model/data
# Set environment variables from build args (can be overridden at runtime)
ENV GUNICORN_WORKERS=${GUNICORN_WORKERS}
ENV GUNICORN_THREADS=${GUNICORN_THREADS}
ENV GUNICORN_TIMEOUT=${GUNICORN_TIMEOUT}
# Set environment variables for GPU
ENV FLASK_ENV=production
ENV PYTHONUNBUFFERED=1
ENV TF_CPP_MIN_LOG_LEVEL=2
ENV CUDA_VISIBLE_DEVICES=0
ENV TF_FORCE_GPU_ALLOW_GROWTH=true
# Enable TensorFloat-32 for RTX 30/40/50 series
ENV TF_ENABLE_TF32=1
# Use Keras 2 (tf-keras) instead of Keras 3 for TF1 compat code compatibility
ENV TF_USE_LEGACY_KERAS=1
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:5000/api/health', timeout=5)" || exit 1
# Run database initialization and start the application
# Uses environment variables for Gunicorn configuration (customizable at runtime)
CMD ["bash", "-c", "python webapp/init_db.py --auto && gunicorn --bind 0.0.0.0:5000 --workers ${GUNICORN_WORKERS} --threads ${GUNICORN_THREADS} --timeout ${GUNICORN_TIMEOUT} --access-logfile - --error-logfile - webapp.app:app"]