Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9f8b5b2
feat: add pLDDT to CL results table
jorisfu May 11, 2026
24eb514
feat: add trivial PAE based validation
jorisfu May 11, 2026
1ddc103
feat: trivial plDDT based validation
jorisfu May 12, 2026
77498d5
fix: broken formula
jorisfu May 15, 2026
0ab6fa0
fix monomer validation test
jorisfu May 15, 2026
291bd7d
refactor: expose PAE as matrix for monomers
jorisfu May 18, 2026
23d5b60
feat: PAE for multimers
jorisfu May 18, 2026
1e0e30e
feat: pLDDT for multimers
jorisfu May 19, 2026
eabd547
feat: add PAE/plDDT consistently to multimer imports
jorisfu May 19, 2026
bcb054f
feat: proper PAE validation for multimers
jorisfu May 19, 2026
58ac815
fix: adjust existing cl validation tests
jorisfu May 19, 2026
634e712
fix: some alphafold import tests
jorisfu May 19, 2026
2c66b35
tempfix: bridge monomer plots so method doesn't fail
jorisfu May 21, 2026
7ecac2b
tempfix: bridge multimer plots so method doesn't fail
jorisfu May 21, 2026
213e340
merge crosslinking
jorisfu May 21, 2026
f2518c1
chore: remove obsolete todos
jorisfu May 21, 2026
e13e2a3
chore: adjust some tests
jorisfu May 21, 2026
0c645e5
chore: adjust some tests
jorisfu May 21, 2026
c1d55a1
feat: introduce parsing of _chem_comp table in cif-files
tE3m May 10, 2026
a935874
chore: fix existing tests
jorisfu May 21, 2026
d506787
chore: test for no pLDDT data within cif
jorisfu May 21, 2026
a034f65
chore: tests for PAE based CL validation
jorisfu May 21, 2026
6ec24c0
chore: tests for pLDDT based CL validation
jorisfu May 21, 2026
ab538c7
feat: AF3 to AF2 PAE matrix translation
jorisfu May 26, 2026
e42066f
(AI) tests: PAE matrix reduction
jorisfu May 26, 2026
036de6b
chore: remove unused imports
jorisfu May 26, 2026
e53e0cb
feat: only make bounds fields visible if manual bounds is selected mode
jorisfu May 26, 2026
f57bfc8
chore: black
jorisfu May 26, 2026
9e27c43
fix tests
jorisfu May 28, 2026
0cb9ec0
chore: black
jorisfu May 28, 2026
ed07ce9
fix more tests
jorisfu May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions backend/protzilla/constants/cif_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from enum import StrEnum


ATOM_SITE_PREFIX = "_atom_site."


class ATOM_SITE_COLUMNS(StrEnum):
"""
Enum containing all column names that should be present in
the _atom_site. table for mmCIF files from PDB or AFDB
"""

ID = f"{ATOM_SITE_PREFIX}id"
TYPE_SYMBOL = f"{ATOM_SITE_PREFIX}type_symbol"
LABEL_ATOM_ID = f"{ATOM_SITE_PREFIX}label_atom_id"
LABEL_ALT_ID = f"{ATOM_SITE_PREFIX}label_alt_id"
LABEL_COMP_ID = f"{ATOM_SITE_PREFIX}label_comp_id"
LABEL_ASYM_ID = f"{ATOM_SITE_PREFIX}label_asym_id"
LABEL_ENTITY_ID = f"{ATOM_SITE_PREFIX}label_entity_id"
LABEL_SEQ_ID = f"{ATOM_SITE_PREFIX}label_seq_id"
PDBX_PDB_INS_CODE = f"{ATOM_SITE_PREFIX}pdbx_PDB_ins_code"
CARTN_X = f"{ATOM_SITE_PREFIX}Cartn_x"
CARTN_Y = f"{ATOM_SITE_PREFIX}Cartn_y"
CARTN_Z = f"{ATOM_SITE_PREFIX}Cartn_z"
OCCUPANCY = f"{ATOM_SITE_PREFIX}occupancy"
B_ISO_OR_EQUIV = f"{ATOM_SITE_PREFIX}B_iso_or_equiv"
PDBX_FORMAL_CHARGE = f"{ATOM_SITE_PREFIX}pdbx_formal_charge"
AUTH_SEQ_ID = f"{ATOM_SITE_PREFIX}auth_seq_id"
AUTH_COMP_ID = f"{ATOM_SITE_PREFIX}auth_comp_id"
AUTH_ASYM_ID = f"{ATOM_SITE_PREFIX}auth_asym_id"
AUTH_ATOM_ID = f"{ATOM_SITE_PREFIX}auth_atom_id"
PDBX_PDB_MODEL_NUM = f"{ATOM_SITE_PREFIX}pdbx_PDB_model_num"


ATOM_SITE_LABEL_COMP_ID = ATOM_SITE_COLUMNS.LABEL_COMP_ID

ATOM_SITE_COLUMNS_NUMERIC = [
ATOM_SITE_COLUMNS.ID,
ATOM_SITE_COLUMNS.LABEL_SEQ_ID,
ATOM_SITE_COLUMNS.CARTN_X,
ATOM_SITE_COLUMNS.CARTN_Y,
ATOM_SITE_COLUMNS.CARTN_Z,
ATOM_SITE_COLUMNS.OCCUPANCY,
ATOM_SITE_COLUMNS.B_ISO_OR_EQUIV,
ATOM_SITE_COLUMNS.AUTH_SEQ_ID,
]

CHEM_COMP_PREFIX = "_chem_comp."


class CHEM_COMP_COLUMNS(StrEnum):
"""
Enum containing all column names that should be present in
the _chem_comp. table for mmCIF files from PDB or AFDB
"""

ID = f"{CHEM_COMP_PREFIX}id"
TYPE = f"{CHEM_COMP_PREFIX}type"
MON_NSTD_FLAG = f"{CHEM_COMP_PREFIX}mon_nstd_flag"
NAME = f"{CHEM_COMP_PREFIX}name"
PDBX_SYNONYMS = f"{CHEM_COMP_PREFIX}pdbx_synonyms"
FORMULA = f"{CHEM_COMP_PREFIX}formula"
FORMULA_WEIGHT = f"{CHEM_COMP_PREFIX}formula_weight"
2 changes: 1 addition & 1 deletion backend/protzilla/constants/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DataKey(StrEnum):
GENE_MAPPING_DF = "gene_mapping_df"
CIF_DF = "cif_df"
AMINO_ACID_SEQUENCES_DF = "amino_acid_sequences_df"
PAE_DF = "pae_df" # pae = predicted aligned error
PAE_MATRIX = "pae_matrix" # pae = predicted aligned error
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes me suspicious, since we just had a discussion where it was important that things stayed a dataframe. Not that I'm against changing this, it just feels like something that needs to be discussed at least briefly.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can discuss this tomorrow surely, but using the PAE as it was previously is impossible (it's not really a DF, rather just a string packed into one row of a df) and casting it to a reasonable df would lead to worse performance in all aspects

PLDDT_DF = "plddt_df" # plddt = predicted local distance difference test
CROSSLINKING_DF = "crosslinking_df"
CONFIDENCE_DF = "confidence_df"
Expand Down
7 changes: 7 additions & 0 deletions backend/protzilla/constants/option_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ class PValueColumnName(StrEnum):
ptm = "PTM"


class CrosslinkingValidationCriterion(Enum):
manual_bounds = "Manual Bounds (set below)"
max_pae = "CL length +/- maximum PAE between sites"
min_pae = "CL length +/- minimum PAE between sites"
plddt_adjusted = "plDDT adjusted"


FC_SIGNIFICANCE_COLUMNS = ["Protein ID", "fc_z_score", "fc_significance"]
CORRECTED_P_VALUES_COLUMNS = [
"Protein ID",
Expand Down
Loading
Loading