Laurits7 · Laurits7 · May 18, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -3,7 +3,47 @@ name: Pylint
 on: [pull_request]
 
 jobs:
+
+  lint-autofix:
+    runs-on: ubuntu-latest
+    if: github.actor != 'github-actions[bot]'
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false  # Needed for manual push
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black
+
+      - name: Auto-fix with black
+        run: black --line-length 120 .
+
+      - name: Commit and push changes
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git fetch origin ${{ github.head_ref }}
+          git checkout ${{ github.head_ref }}
+          git add .
+          if ! git diff --cached --quiet; then
+            git commit -m "chore: auto-fix Python lint issues"
+            git rebase origin/${{ github.head_ref }}
+            git push https://x-access-token:${GITHUB_TOKEN}@github.com/${{ github.repository }} HEAD:refs/heads/${{ github.head_ref }}
+          fi
+        env:
+          # Required if using a token for push
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
   build:
+    needs: lint-autofix
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -19,6 +59,13 @@ jobs:
         python -m pip install --upgrade pip
         pip install pylint
         pip install -r requirements.txt
+    - name: Checkout updated code
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
+    - name: Run Pylint for logs (with score)
+      if: always()
+      run: pylint --rcfile=config/.pylintrc $(git ls-files '*.py') --exit-zero
     - name: Analysing the code with pylint
       run: |
         pylint --rcfile=config/.pylintrc $(git ls-files '*.py') --exit-zero --output-format=json > pylint_output.json
@@ -41,7 +88,7 @@ jobs:
       uses: actions/upload-artifact@v4
       with:
         name: pylint-report
-        path: pylint-report.json
+        path: pylint_output.json
     - name: Show Pylint warnings
       if: always()
       run: |
@@ -53,3 +100,4 @@ jobs:
         echo "Errors:"
         jq -r '.[] | select(.type == "error" or .type == "fatal") | "\(.path):\(.line): \(.message)"' pylint_output.json || true
 
+
diff --git a/config/.pylintrc b/config/.pylintrc
@@ -3,3 +3,7 @@ disable=C0114, C0116
 
 [REPORTS]
 output-format=colorized
+score = yes
+
+[FORMAT]
+max-line-length=120
diff --git a/ml4cc/__init__.py b/ml4cc/__init__.py
@@ -1,3 +1,3 @@
 from . import models
 from . import tools
-from . import data
+from . import data
diff --git a/ml4cc/config/environment/lumi.yaml b/ml4cc/config/environment/lumi.yaml
@@ -1,6 +1,6 @@
 name: lumi
 project_dir: /scratch/project_465001293/ML4CC/ml4cc
-data_dir: /scratch/project_465001293/ML4CC/data
+data_dir: /scratch/project_465001293/ML4CC
 tmp_dir: /scratch/project_465001293/ML4CC/tmp
 slurm:
     queue:

diff --git a/ml4cc/config/evaluation/evaluation.yaml b/ml4cc/config/evaluation/evaluation.yaml
@@ -0,0 +1,5 @@
+dataset:
+  num_evaluation_waveforms: 1000
+  results_output_dir: ${training.results_dir}/data
+training:
+  eval_all_always: false
diff --git a/ml4cc/config/main.yaml b/ml4cc/config/main.yaml
@@ -3,5 +3,8 @@ defaults:
     - environment@host: manivald  # Options: lumi, manivald
     - datasets@dataset: CEPC  # Options: FCC, CEPC
     - models: models
+    - datasets@datasets.CEPC: CEPC
+    - datasets@datasets.FCC: FCC
+    - evaluation: evaluation
     - preprocessing
-    - training
+    - training
diff --git a/ml4cc/config/models/one_step/models/transformer.yaml b/ml4cc/config/models/one_step/models/transformer.yaml
@@ -8,12 +8,7 @@ hyperparameters:
     hidden_dim: 2048
     num_classes: 1
     max_len: ${dataset.input_dim}
+    lr: 0.001
 checkpoint:
     model: null
     losses: null
-
-
-# TODO: Maybe need to have name and target under "model" key to instantiate the class?
-
-defaults:
-    - _self_
diff --git a/ml4cc/config/models/two_step/clusterization/CNN.yaml b/ml4cc/config/models/two_step/clusterization/CNN.yaml
@@ -1,8 +1,25 @@
-_target_: ml4cc.models.simpler_models
+_target_: ml4cc.models.simpler_models.DNNModule
 name: CNN
+hyperparameters:
+    conv_layer_1:
+        in_channels: 1
+        out_channels: 32
+        kernel_size: 4
+    pool_layer_1:
+        kernel_size: 2
+    conv_layer_2:
+        out_channels: 16
+        kernel_size: 4
+    linear_layer_1:
+        out_features: 32
+    output_layer:
+        in_features: 32
+        out_features: 1
+    num_features: ${dataset.input_dim} # TODO: Check if this is as done by Guang
+optimizer:
+    target: torch.optim.AdamW
+    lr: 0.001
+
 checkpoint:
     model: null
     losses: null
-
-defaults:
-    - _self_
diff --git a/ml4cc/config/models/two_step/clusterization/DGCNN.yaml b/ml4cc/config/models/two_step/clusterization/DGCNN.yaml
@@ -1,9 +1,8 @@
-_target_: ml4cc.models.DGCNN
+_target_: ml4cc.models.DGCNN.DGCNN
 name: DGCNN
-checkpoint:
-    model: null
-    losses: null
-
+optimizer:
+    _target_: torch.optim.AdamW
+    lr: 0.001
 hyperparameters:
     n_conv1: 32
     n_conv2: 32
@@ -17,3 +16,7 @@ hyperparameters:
     k: 4
     mlp_dropout: 0.5
     out_channels: 2
+
+checkpoint:
+    model: null
+    losses: null
diff --git a/ml4cc/config/models/two_step/clusterization/DNN.yaml b/ml4cc/config/models/two_step/clusterization/DNN.yaml
@@ -1,8 +1,18 @@
-_target_: ml4cc.models.simpler_models
+_target_: ml4cc.models.simpler_models.DNNModule
 name: DNN
+hyperparameters:
+    n_features: 1 # ${dataset.input_dim} # TODO: Fix
+    linear_layer_1:
+        out_features: 32
+    linear_layer_2:
+        out_features: 32
+    linear_layer_3:
+        out_features: 32
+    output_layer:
+        out_features: 1
+optimizer:
+    target: torch.optim.AdamW
+    lr: 0.001
 checkpoint:
     model: null
-    losses: null
-
-defaults:
-    - _self_
+    losses: null
diff --git a/ml4cc/config/models/two_step/clusterization/RNN.yaml b/ml4cc/config/models/two_step/clusterization/RNN.yaml
@@ -1,8 +1,19 @@
-_target_: ml4cc.models.simpler_models
+_target_: ml4cc.models.simpler_models.RNNModule
 name: RNN
+hyperparameters:
+    LSTM_layers:
+        input_size: 1
+        hidden_size: 16
+        num_layers: 1
+        batch_first: true
+    linear_layer_1:
+        out_features: 16
+    output_layer:
+        out_features: 1
+optimizer:
+    target: torch.optim.AdamW
+    lr: 0.001
+
 checkpoint:
     model: null
     losses: null
-
-defaults:
-    - _self_
diff --git a/ml4cc/config/models/two_step/peak_finding/LSTM.yaml b/ml4cc/config/models/two_step/peak_finding/LSTM.yaml
@@ -4,6 +4,3 @@ hyperparameters:
     input_dim: ${dataset.input_dim}
     lstm_hidden_dim: 32
     num_lstm_layers: 1
-
-defaults:
-    - _self_
diff --git a/ml4cc/config/models/two_step/two_step.yaml b/ml4cc/config/models/two_step/two_step.yaml
@@ -1,4 +1,4 @@
 defaults:
   - _self_
   - peak_finding@peak_finding.model: LSTM
-  - clusterization@clusterization.model: DNN
+  - clusterization@clusterization.model: RNN
diff --git a/ml4cc/config/models/two_step_minimal/models/LSTM.yaml b/ml4cc/config/models/two_step_minimal/models/LSTM.yaml
@@ -4,6 +4,3 @@ hyperparameters:
     input_dim: ${dataset.input_dim}
     lstm_hidden_dim: 32
     num_lstm_layers: 1
-
-defaults:
-    - _self_
diff --git a/ml4cc/config/training.yaml b/ml4cc/config/training.yaml
@@ -1,19 +1,22 @@
 training:
+    debug_run: false
     type: one_step  # Options: one_step, two_step, two_step_minimal
     output_dir: null
     output_dir_: ${training.output_dir}/${training.type}
     models_dir: ${training.output_dir_}/models
     log_dir: ${training.output_dir_}/logs
     predictions_dir: ${training.output_dir_}/predictions
+    results_dir: ${training.output_dir}/results
     dataloader:
         batch_sizes:
             one_step: 128
             two_step: 512
+            two_step_minimal: 512
         batch_size: ${training.dataloader.batch_sizes[${training.type}]}
-        num_dataloader_workers: 2
+        num_dataloader_workers: 1
         prefetch_factor: 100
     trainer:
-        max_epochs: 5 # 50 epochs in Guang paper
+        max_epochs: 50 # 50 epochs in Guang paper
     model_evaluation_only: False
 
 hydra:

diff --git a/ml4cc/models/LSTM.py b/ml4cc/models/LSTM.py
@@ -4,18 +4,23 @@
 import torch.nn.functional as F
 
 
-
-class LSTM(torch.nn.Module):  # TODO: Is this implemented like in their paper? In their paper they have multiple LSTMs.
-    def __init__(self, input_dim: int = 3000, lstm_hidden_dim: int = 32, num_lstm_layers: int = 1):
+# TODO: Is this implemented like in their paper? In their paper they have
+# multiple LSTMs.
+class LSTM(torch.nn.Module):
+    def __init__(self, lstm_hidden_dim: int = 32, num_lstm_layers: int = 1):
         super().__init__()
-        self.lstm = torch.nn.LSTM(input_size=1, num_layers=num_lstm_layers, hidden_size=lstm_hidden_dim, batch_first=True)
+        self.lstm = torch.nn.LSTM(
+            input_size=1, num_layers=num_lstm_layers, hidden_size=lstm_hidden_dim, batch_first=True
+        )
         self.fc3 = torch.nn.Linear(lstm_hidden_dim, 32)
         self.fc4 = torch.nn.Linear(32, 1)
 
     def forward(self, x):
         ula, (h, _) = self.lstm(x)
         out = h[-1]
-        out = F.relu(self.fc3(out))  # If we would like to have a prediction for each point in wf, then we would use ula instead of out here
+        # If we would like to have a prediction for each point in wf, then we
+        # would use ula instead of out here
+        out = F.relu(self.fc3(out))
         clf = F.sigmoid(self.fc4(out)).squeeze()
         return clf
 
@@ -26,9 +31,8 @@ def __init__(self, name: str, hyperparameters: dict):
         self.hyperparameters = hyperparameters
         super().__init__()
         self.lstm = LSTM(
-            input_dim=self.hyperparameters["input_dim"],
             lstm_hidden_dim=self.hyperparameters["lstm_hidden_dim"],
-            num_lstm_layers=self.hyperparameters["num_lstm_layers"]
+            num_lstm_layers=self.hyperparameters["num_lstm_layers"],
         )
 
     def training_step(self, batch, batch_idx):
@@ -47,15 +51,15 @@ def configure_optimizers(self):
         return optim.AdamW(self.parameters(), lr=0.001)
 
     def predict_step(self, batch, batch_idx):
-        predicted_labels, target = self.forward(batch)
+        predicted_labels, _ = self.forward(batch)
         return predicted_labels
 
     def test_step(self, batch, batch_idx):
-        predicted_labels, target = self.forward(batch)
+        predicted_labels, _ = self.forward(batch)
         return predicted_labels
 
     def forward(self, batch):
-        waveform, target, wf_idx = batch
+        waveform, target = batch
         predicted_labels = self.lstm(waveform).squeeze()
         return predicted_labels, target