From 5c2674b3927e57bc1fa6233fab0251c695fce45c Mon Sep 17 00:00:00 2001 From: Christoph Baker Date: Wed, 1 Apr 2026 14:54:11 -0700 Subject: [PATCH] Updated docker image to quay.io/jupyter/all-spark-notebook --- CLAUDE.md | 4 ++-- Dockerfile | 2 +- Makefile | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 6 +++-- 4 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 Makefile diff --git a/CLAUDE.md b/CLAUDE.md index 12e2236..d923d07 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides context and coding guidance for Claude when working in the ** - **Container**: Custom image built on `jupyter/all-spark-notebook:latest` via Docker Compose - **Languages**: Python (primary), Rust (via evcxr Jupyter kernel) -- **Framework**: Apache Spark 3.5.0 / PySpark +- **Framework**: Apache Spark 4.1.1 / PySpark - **IDE**: JupyterLab (port 8888) - **Build**: `docker compose build && docker compose up` @@ -155,7 +155,7 @@ Before suggesting or finalising any code, verify: - [ ] Docstrings present on all functions and classes - [ ] Errors handled explicitly - [ ] Complex logic is commented -- [ ] Compatible with Spark 3.5.0 / PySpark +- [ ] Compatible with Spark 4.1.1 / PySpark - [ ] Uses container paths (`/home/jovyan/work/`) not host paths - [ ] Notebook cells are ordered and independently reproducible diff --git a/Dockerfile b/Dockerfile index 2c85b37..407a28b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM jupyter/all-spark-notebook:latest +FROM quay.io/jupyter/all-spark-notebook:2026-03-23 # Install Rust and Cargo RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3354f7b --- /dev/null +++ b/Makefile @@ -0,0 +1,65 @@ +.PHONY: build build-no-cache up up-d down restart logs shell validate ayce default help + +COMPOSE = docker compose +IMAGE = quay.io/jupyter/all-spark-notebook:2026-03-23 + +# Default target +default: help + +# Display help information +help: + @echo "Available targets:" + @echo " make (default) - Display this help message" + @echo " make build - Build the Docker image" + @echo " make build-no-cache - Build the Docker image without cache" + @echo " make up - Start JupyterLab in the foreground" + @echo " make up-d - Start JupyterLab in the background" + @echo " make down - Stop and remove containers" + @echo " make restart - Restart running containers" + @echo " make logs - Tail container logs" + @echo " make shell - Open a shell in the running container" + @echo " make validate - Build image and verify PySpark imports correctly" + @echo " make ayce - Run build and validate" + @echo "" + +# Build the Docker image +build: + $(COMPOSE) build + +# Build without layer cache (useful after Dockerfile changes) +build-no-cache: + $(COMPOSE) build --no-cache + +# Start JupyterLab in the foreground +up: + $(COMPOSE) up + +# Start JupyterLab in the background +up-d: + $(COMPOSE) up -d + +# Stop and remove containers +down: + $(COMPOSE) down + +# Restart running containers +restart: + $(COMPOSE) restart + +# Tail container logs +logs: + $(COMPOSE) logs -f + +# Open a bash shell in the running JupyterLab container +shell: + $(COMPOSE) exec jupyterlab /bin/bash + +# Verify PySpark is importable inside the image +validate: + @echo "Validating PySpark installation..." + @docker run --rm $(IMAGE) python -c "import pyspark; print('PySpark', pyspark.__version__, 'OK')" \ + && echo "Validation passed." \ + || (echo "Validation FAILED." && exit 1) + +# All You Can Eat - build and validate +ayce: build validate diff --git a/README.md b/README.md index f7744c0..cf8b1fd 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,12 @@ A [Docker Compose](https://docs.docker.com/compose/) [Jupyter notebook](https://docs.jupyter.org/en/latest/) image with [Apache Spark](https://spark.apache.org/), -[PySpark 3.5.0](https://spark.apache.org/docs/3.5.0/api/python/index.html), +[PySpark 4.1.1](https://spark.apache.org/docs/4.1.1/api/python/index.html), [JupyterLab](https://github.com/jupyterlab/jupyterlab), and [Rust](https://www.rust-lang.org/) support. +Based on the [quay.io/jupyter/all-spark-notebook](https://quay.io/repository/jupyter/all-spark-notebook) image. + ## TL;DR ```shell @@ -76,7 +78,7 @@ Access JupyterLab at http://localhost:8888. Notebooks in `./notebooks` are mount ## Resources -- [hub.docker.com/r/jupyter/all-spark-notebook](https://hub.docker.com/r/jupyter/all-spark-notebook) +- [quay.io/jupyter/all-spark-notebook](https://quay.io/repository/jupyter/all-spark-notebook) - [Data science with JupyterLab](https://docs.docker.com/guides/jupyter/#run-and-access-a-jupyterlab-container) - [Supercharging AI/ML Development with JupyterLab and Docker](https://www.docker.com/blog/supercharging-ai-ml-development-with-jupyterlab-and-docker/) - [PySpark Cheat Sheet](https://cartershanklin.github.io/pyspark-cheatsheet/)