Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion jupyterhub/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Custom image, see README
FROM jupyter/custom
# usually we build the fresh image of jupyter custom from docker stacks repo but since we are just updating a package
# here we will build it on top of our existing jupyter image
# FROM jupyter/custom
FROM us-docker.pkg.dev/vgi-pn-277619/data-team/pubnative/jupyterhub:spark3.4.1-py3.11-hadoop3-openjdk11-scala2.12-build2.0.0

USER root
WORKDIR /usr/local/
Expand Down
2 changes: 1 addition & 1 deletion jupyterhub/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ build: requirements

publish: build
docker push ${JUPYTERHUB}
@echo pushed ${JUPYTERHUB} to DockerHub
@echo pushed ${JUPYTERHUB} to Google Artifact Registry
29 changes: 27 additions & 2 deletions jupyterhub/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@
## Build base image
## Choosing a python version
- see available versions
```shell
conda search python
```
- create conda environment. We are choosing python 3.11 mainly because tensorflow currently does not support python larger than 3.11
![img.png](img.png)

## Setting up environment
```shell
conda create -n jupyterhub python=3.11.7 conda
```
```shell
conda activate jupyterhub
```
```shell
poetry install
```

## Build base image first (jupyter/custom) for building jupyterhub image
This image is built on top of this [repo](https://github.com/jupyter/docker-stacks).
To reproduce follow the steps below:
- Clone repo
- Build the image(`jupyter/custom`) using the command below
```shell
git clone git@github.com:jupyter/docker-stacks.git
```
- Build the image(`jupyter/custom`) using the command below

```shell
docker build --rm --force-rm \
-t jupyter/custom ./images/pyspark-notebook \
--build-arg python_version=3.11.10 \
--build-arg openjdk_version=11 \
--build-arg spark_version=3.4.1 \
--build-arg hadoop_version=3 \
--build-arg spark_download_url="https://archive.apache.org/dist/spark/" \
--platform linux/amd64
```
- If you are just adding a package instead of starting from `jupyter/custom` consider starting from your existing/current image of jupyter to minimize changes to user experience

## Additional information
- `gcs-connector-hadoop3-2.2.11-shaded.jar` is added to spark
Expand Down
Binary file added jupyterhub/img.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
346 changes: 41 additions & 305 deletions jupyterhub/poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion jupyterhub/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "jupyterhub"
version = "2.0.0"
version = "2.0.1"
description = ""
authors = ["Abhinav <abhinav.jain@pubnative.net>"]
readme = "README.md"
Expand All @@ -26,6 +26,7 @@ influxdb = "^5.3.1"
google-cloud-bigquery = "^3.16.0"
tensorflow = "2.14.0"
tensorflow-probability = "^0.23.0"
xgboost = "^3.0.0"


[build-system]
Expand Down
2 changes: 2 additions & 0 deletions jupyterhub/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ mysql-connector-python==8.2.0 ; python_version >= "3.11" and python_version < "3
numba==0.58.1 ; python_version >= "3.11" and python_version < "3.12"
numexpr==2.8.8 ; python_version >= "3.11" and python_version < "3.12"
numpy==1.26.3 ; python_version >= "3.11" and python_version < "3.12"
nvidia-nccl-cu12==2.26.5 ; platform_system == "Linux" and platform_machine != "aarch64" and python_version >= "3.11" and python_version < "3.12"
oauthlib==3.2.2 ; python_version >= "3.11" and python_version < "3.12"
opt-einsum==3.3.0 ; python_version >= "3.11" and python_version < "3.12"
optuna==3.5.0 ; python_version >= "3.11" and python_version < "3.12"
Expand Down Expand Up @@ -123,4 +124,5 @@ werkzeug==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
wheel==0.42.0 ; python_version >= "3.11" and python_version < "3.12"
wrapt==1.14.1 ; python_version >= "3.11" and python_version < "3.12"
xarray==2023.12.0 ; python_version >= "3.11" and python_version < "3.12"
xgboost==3.0.0 ; python_version >= "3.11" and python_version < "3.12"
yarl==1.9.4 ; python_version >= "3.11" and python_version < "3.12"
3 changes: 2 additions & 1 deletion pyspark/executors/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM us-docker.pkg.dev/vgi-pn-277619/data-team/pubnative/pyspark:v3.4.1-python3.11-2
#FROM us-docker.pkg.dev/vgi-pn-277619/data-team/pubnative/pyspark:v3.4.1-python3.11-2
# use the base image here
FROM us-docker.pkg.dev/vgi-pn-277619/data-team/pubnative/pyspark-executor:v3.4.1-python3.11-2

USER 0

Expand Down
5 changes: 4 additions & 1 deletion pyspark/executors/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
OWNER := us-docker.pkg.dev/vgi-pn-277619/data-team/pubnative
REPO := ${OWNER}/pyspark-executor
TAG := v3.4.1-python3.11-2
TAG := v3.4.1-python3.11-3
IMAGE := ${REPO}:${TAG}

build:
docker build . -t ${IMAGE}

push: build
docker push ${IMAGE}

requirements:
cp ../../jupyterhub/requirements.txt .
3 changes: 3 additions & 0 deletions pyspark/executors/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ mysql-connector-python==8.2.0 ; python_version >= "3.11" and python_version < "3
numba==0.58.1 ; python_version >= "3.11" and python_version < "3.12"
numexpr==2.8.8 ; python_version >= "3.11" and python_version < "3.12"
numpy==1.26.3 ; python_version >= "3.11" and python_version < "3.12"
nvidia-nccl-cu12==2.26.5 ; platform_system == "Linux" and platform_machine != "aarch64" and python_version >= "3.11" and python_version < "3.12"
oauthlib==3.2.2 ; python_version >= "3.11" and python_version < "3.12"
opt-einsum==3.3.0 ; python_version >= "3.11" and python_version < "3.12"
optuna==3.5.0 ; python_version >= "3.11" and python_version < "3.12"
Expand All @@ -89,6 +90,7 @@ pydantic-core==2.14.6 ; python_version >= "3.11" and python_version < "3.12"
pydantic==2.5.3 ; python_version >= "3.11" and python_version < "3.12"
pydata-google-auth==1.8.2 ; python_version >= "3.11" and python_version < "3.12"
pyparsing==3.1.1 ; python_version >= "3.11" and python_version < "3.12"
pyspark==3.4.1 ; python_version >= "3.11" and python_version < "3.12"
python-dateutil==2.8.2 ; python_version >= "3.11" and python_version < "3.12"
pytz==2023.3.post1 ; python_version >= "3.11" and python_version < "3.12"
pytzdata==2020.1 ; python_version >= "3.11" and python_version < "3.12"
Expand Down Expand Up @@ -121,4 +123,5 @@ werkzeug==3.0.1 ; python_version >= "3.11" and python_version < "3.12"
wheel==0.42.0 ; python_version >= "3.11" and python_version < "3.12"
wrapt==1.14.1 ; python_version >= "3.11" and python_version < "3.12"
xarray==2023.12.0 ; python_version >= "3.11" and python_version < "3.12"
xgboost==3.0.0 ; python_version >= "3.11" and python_version < "3.12"
yarl==1.9.4 ; python_version >= "3.11" and python_version < "3.12"