Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1ffda14
Training evaluation init
Laurits7 Apr 29, 2025
05897d3
pylint error fix
Laurits7 Apr 29, 2025
68c1f34
pylint
Laurits7 Apr 29, 2025
7e6fceb
up models + pyling
Laurits7 Apr 30, 2025
8493101
update simpler models
Laurits7 Apr 30, 2025
7fd78e8
update pylint
Laurits7 Apr 30, 2025
28fb59e
lint test
Laurits7 Apr 30, 2025
6f0adfd
up
Laurits7 Apr 30, 2025
f8163d3
up
Laurits7 Apr 30, 2025
df51784
init visualization + pylint
Laurits7 Apr 30, 2025
babe8d8
update lint
Laurits7 Apr 30, 2025
7584923
up
Laurits7 Apr 30, 2025
b7e4eba
up
Laurits7 Apr 30, 2025
024a01b
chore: auto-fix Python lint issues
github-actions[bot] Apr 30, 2025
301c0b8
up
Laurits7 Apr 30, 2025
d3ed5fd
replace autopep8 with black
Laurits7 Apr 30, 2025
3fbcac1
chore: auto-fix Python lint issues
github-actions[bot] Apr 30, 2025
f4692d2
visualize dataset
Laurits7 May 5, 2025
a6f4a0a
chore: auto-fix Python lint issues
github-actions[bot] May 5, 2025
1e5f297
up
Laurits7 May 5, 2025
e917931
up
Laurits7 May 5, 2025
371b270
chore: auto-fix Python lint issues
github-actions[bot] May 5, 2025
2643a61
up
Laurits7 May 5, 2025
d11e762
chore: auto-fix Python lint issues
github-actions[bot] May 5, 2025
ce8fbf8
update training & evaluation from one_step
Laurits7 May 5, 2025
37f1e77
chore: auto-fix Python lint issues
github-actions[bot] May 5, 2025
d1b2dd7
clusterization data source
Laurits7 May 8, 2025
760e820
merge
Laurits7 May 8, 2025
7c96467
chore: auto-fix Python lint issues
github-actions[bot] May 8, 2025
7e21a2c
typo
Laurits7 May 8, 2025
7060723
typo
Laurits7 May 8, 2025
337095f
cl
Laurits7 May 11, 2025
72b4294
chore: auto-fix Python lint issues
github-actions[bot] May 11, 2025
3c42cc6
fix cfg
Laurits7 May 16, 2025
3a40306
up
Laurits7 May 18, 2025
356a1cd
chore: auto-fix Python lint issues
github-actions[bot] May 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,47 @@ name: Pylint
on: [pull_request]

jobs:

lint-autofix:
runs-on: ubuntu-latest
if: github.actor != 'github-actions[bot]'
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
persist-credentials: false # Needed for manual push

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install black

- name: Auto-fix with black
run: black --line-length 120 .

- name: Commit and push changes
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git fetch origin ${{ github.head_ref }}
git checkout ${{ github.head_ref }}
git add .
if ! git diff --cached --quiet; then
git commit -m "chore: auto-fix Python lint issues"
git rebase origin/${{ github.head_ref }}
git push https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }} HEAD:refs/heads/${{ github.head_ref }}
fi
env:
# Required if using a token for push
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

build:
needs: lint-autofix
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -19,6 +59,13 @@ jobs:
python -m pip install --upgrade pip
pip install pylint
pip install -r requirements.txt
- name: Checkout updated code
uses: actions/checkout@v4
with:
ref: ${{ github.head_ref }}
- name: Run Pylint for logs (with score)
if: always()
run: pylint --rcfile=config/.pylintrc $(git ls-files '*.py') --exit-zero
- name: Analysing the code with pylint
run: |
pylint --rcfile=config/.pylintrc $(git ls-files '*.py') --exit-zero --output-format=json > pylint_output.json
Expand All @@ -41,7 +88,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: pylint-report
path: pylint-report.json
path: pylint_output.json
- name: Show Pylint warnings
if: always()
run: |
Expand All @@ -53,3 +100,4 @@ jobs:
echo "Errors:"
jq -r '.[] | select(.type == "error" or .type == "fatal") | "\(.path):\(.line): \(.message)"' pylint_output.json || true


4 changes: 4 additions & 0 deletions config/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@ disable=C0114, C0116

[REPORTS]
output-format=colorized
score = yes

[FORMAT]
max-line-length=120
2 changes: 1 addition & 1 deletion ml4cc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from . import models
from . import tools
from . import data
from . import data
2 changes: 1 addition & 1 deletion ml4cc/config/environment/lumi.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: lumi
project_dir: /scratch/project_465001293/ML4CC/ml4cc
data_dir: /scratch/project_465001293/ML4CC/data
data_dir: /scratch/project_465001293/ML4CC
tmp_dir: /scratch/project_465001293/ML4CC/tmp
slurm:
queue:
Expand Down
5 changes: 5 additions & 0 deletions ml4cc/config/evaluation/evaluation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
dataset:
num_evaluation_waveforms: 1000
results_output_dir: ${training.results_dir}/data
training:
eval_all_always: false
5 changes: 4 additions & 1 deletion ml4cc/config/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ defaults:
- environment@host: manivald # Options: lumi, manivald
- datasets@dataset: CEPC # Options: FCC, CEPC
- models: models
- datasets@datasets.CEPC: CEPC
- datasets@datasets.FCC: FCC
- evaluation: evaluation
- preprocessing
- training
- training
7 changes: 1 addition & 6 deletions ml4cc/config/models/one_step/models/transformer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,7 @@ hyperparameters:
hidden_dim: 2048
num_classes: 1
max_len: ${dataset.input_dim}
lr: 0.001
checkpoint:
model: null
losses: null


# TODO: Maybe need to have name and target under "model" key to instantiate the class?

defaults:
- _self_
25 changes: 21 additions & 4 deletions ml4cc/config/models/two_step/clusterization/CNN.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
_target_: ml4cc.models.simpler_models
_target_: ml4cc.models.simpler_models.DNNModule
name: CNN
hyperparameters:
conv_layer_1:
in_channels: 1
out_channels: 32
kernel_size: 4
pool_layer_1:
kernel_size: 2
conv_layer_2:
out_channels: 16
kernel_size: 4
linear_layer_1:
out_features: 32
output_layer:
in_features: 32
out_features: 1
num_features: ${dataset.input_dim} # TODO: Check if this is as done by Guang
optimizer:
target: torch.optim.AdamW
lr: 0.001

checkpoint:
model: null
losses: null

defaults:
- _self_
13 changes: 8 additions & 5 deletions ml4cc/config/models/two_step/clusterization/DGCNN.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
_target_: ml4cc.models.DGCNN
_target_: ml4cc.models.DGCNN.DGCNN
name: DGCNN
checkpoint:
model: null
losses: null

optimizer:
_target_: torch.optim.AdamW
lr: 0.001
hyperparameters:
n_conv1: 32
n_conv2: 32
Expand All @@ -17,3 +16,7 @@ hyperparameters:
k: 4
mlp_dropout: 0.5
out_channels: 2

checkpoint:
model: null
losses: null
20 changes: 15 additions & 5 deletions ml4cc/config/models/two_step/clusterization/DNN.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
_target_: ml4cc.models.simpler_models
_target_: ml4cc.models.simpler_models.DNNModule
name: DNN
hyperparameters:
n_features: 1 # ${dataset.input_dim} # TODO: Fix
linear_layer_1:
out_features: 32
linear_layer_2:
out_features: 32
linear_layer_3:
out_features: 32
output_layer:
out_features: 1
optimizer:
target: torch.optim.AdamW
lr: 0.001
checkpoint:
model: null
losses: null

defaults:
- _self_
losses: null
19 changes: 15 additions & 4 deletions ml4cc/config/models/two_step/clusterization/RNN.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
_target_: ml4cc.models.simpler_models
_target_: ml4cc.models.simpler_models.RNNModule
name: RNN
hyperparameters:
LSTM_layers:
input_size: 1
hidden_size: 16
num_layers: 1
batch_first: true
linear_layer_1:
out_features: 16
output_layer:
out_features: 1
optimizer:
target: torch.optim.AdamW
lr: 0.001

checkpoint:
model: null
losses: null

defaults:
- _self_
3 changes: 0 additions & 3 deletions ml4cc/config/models/two_step/peak_finding/LSTM.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,3 @@ hyperparameters:
input_dim: ${dataset.input_dim}
lstm_hidden_dim: 32
num_lstm_layers: 1

defaults:
- _self_
2 changes: 1 addition & 1 deletion ml4cc/config/models/two_step/two_step.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
defaults:
- _self_
- peak_finding@peak_finding.model: LSTM
- clusterization@clusterization.model: DNN
- clusterization@clusterization.model: RNN
3 changes: 0 additions & 3 deletions ml4cc/config/models/two_step_minimal/models/LSTM.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,3 @@ hyperparameters:
input_dim: ${dataset.input_dim}
lstm_hidden_dim: 32
num_lstm_layers: 1

defaults:
- _self_
7 changes: 5 additions & 2 deletions ml4cc/config/training.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
training:
debug_run: false
type: one_step # Options: one_step, two_step, two_step_minimal
output_dir: null
output_dir_: ${training.output_dir}/${training.type}
models_dir: ${training.output_dir_}/models
log_dir: ${training.output_dir_}/logs
predictions_dir: ${training.output_dir_}/predictions
results_dir: ${training.output_dir}/results
dataloader:
batch_sizes:
one_step: 128
two_step: 512
two_step_minimal: 512
batch_size: ${training.dataloader.batch_sizes[${training.type}]}
num_dataloader_workers: 2
num_dataloader_workers: 1
prefetch_factor: 100
trainer:
max_epochs: 5 # 50 epochs in Guang paper
max_epochs: 50 # 50 epochs in Guang paper
model_evaluation_only: False

hydra:
Expand Down
24 changes: 14 additions & 10 deletions ml4cc/models/LSTM.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
import torch.nn.functional as F



class LSTM(torch.nn.Module): # TODO: Is this implemented like in their paper? In their paper they have multiple LSTMs.
def __init__(self, input_dim: int = 3000, lstm_hidden_dim: int = 32, num_lstm_layers: int = 1):
# TODO: Is this implemented like in their paper? In their paper they have
# multiple LSTMs.
class LSTM(torch.nn.Module):
def __init__(self, lstm_hidden_dim: int = 32, num_lstm_layers: int = 1):
super().__init__()
self.lstm = torch.nn.LSTM(input_size=1, num_layers=num_lstm_layers, hidden_size=lstm_hidden_dim, batch_first=True)
self.lstm = torch.nn.LSTM(
input_size=1, num_layers=num_lstm_layers, hidden_size=lstm_hidden_dim, batch_first=True
)
self.fc3 = torch.nn.Linear(lstm_hidden_dim, 32)
self.fc4 = torch.nn.Linear(32, 1)

def forward(self, x):
ula, (h, _) = self.lstm(x)
out = h[-1]
out = F.relu(self.fc3(out)) # If we would like to have a prediction for each point in wf, then we would use ula instead of out here
# If we would like to have a prediction for each point in wf, then we
# would use ula instead of out here
out = F.relu(self.fc3(out))
clf = F.sigmoid(self.fc4(out)).squeeze()
return clf

Expand All @@ -26,9 +31,8 @@ def __init__(self, name: str, hyperparameters: dict):
self.hyperparameters = hyperparameters
super().__init__()
self.lstm = LSTM(
input_dim=self.hyperparameters["input_dim"],
lstm_hidden_dim=self.hyperparameters["lstm_hidden_dim"],
num_lstm_layers=self.hyperparameters["num_lstm_layers"]
num_lstm_layers=self.hyperparameters["num_lstm_layers"],
)

def training_step(self, batch, batch_idx):
Expand All @@ -47,15 +51,15 @@ def configure_optimizers(self):
return optim.AdamW(self.parameters(), lr=0.001)

def predict_step(self, batch, batch_idx):
predicted_labels, target = self.forward(batch)
predicted_labels, _ = self.forward(batch)
return predicted_labels

def test_step(self, batch, batch_idx):
predicted_labels, target = self.forward(batch)
predicted_labels, _ = self.forward(batch)
return predicted_labels

def forward(self, batch):
waveform, target, wf_idx = batch
waveform, target = batch
predicted_labels = self.lstm(waveform).squeeze()
return predicted_labels, target

Expand Down
Loading