diff --git a/README.md b/README.md
index 392372a..d731444 100644
--- a/README.md
+++ b/README.md
@@ -425,6 +425,7 @@ files_mix:  # PDB files for each crosslink type (required if mix_bool=true)
 
 replace_bool: false      # Enable crosslink replacement
 ratio_replace: 30        # Percentage of crosslinks to replace (0-100)
+ratio_replace_scope: "enzymatic"  # Crosslinks to replace: "enzymatic" (default), "non_enzymatic" (AGEs), or "all"
 replace_file: null       # Input file, or null to use geometry output
 
 # ================================================================================
@@ -620,10 +621,13 @@ n_term_combination: "9.C - 947.A"
 c_term_combination: "1047.C - 104.C"
 replace_bool: true
 ratio_replace: 30  # Replace 30% of crosslinks
+ratio_replace_scope: "enzymatic"  # "enzymatic" (default), "non_enzymatic" (AGEs), or "all"
 fibril_length: 40.0
 contact_distance: 20
 ```
 
+> **Note:** When you supply an input `pdb_file` together with `n_term_type`/`c_term_type`, ColBuilder verifies that the crosslinks in the PDB match the specified types. Mismatches (e.g. a trivalent PDB with `n_term_type: "HLKNL"`) stop generation with error `GEO_ERR_008`.
+
 ```bash
 colbuilder --config_file config_replace_crosslinks.yaml
 ```
diff --git a/config.yaml b/config.yaml
index 867ed3f..fc1dcd2 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,141 +1,204 @@
 # ================================================================================
-# ColBuilder Configuration File
+# ColBuilder Configuration File — FULL REFERENCE
 # ================================================================================
-# This configuration file controls the ColBuilder pipeline for generating collagen
-# structures. The pipeline consists of three main modules that can be run 
-# independently or in combination:
+# This file documents EVERY user-facing option in the ColBuilder pipeline, with
+# defaults, valid values, and notes. Copy it and delete what you don't need.
 #
-# 1. SEQUENCE GENERATOR: Creates collagen triple helix structures from sequences
-# 2. GEOMETRY GENERATOR: Builds collagen microfibrils from triple helices
-# 3. TOPOLOGY GENERATOR: Generates force field topologies for simulations
+# The pipeline has three modules that run independently or chained together:
+#   1. SEQUENCE GENERATOR : collagen triple helix from a sequence (homology model)
+#   2. GEOMETRY GENERATOR : collagen microfibril from triple helices
+#   3. TOPOLOGY GENERATOR : MD force-field topologies (Amber99 or Martini3)
 #
-# IMPORTANT: When using additional crosslinks with a pre-mutated PDB, 
-# e.g. to add AGE crosslinks:
-# - Run sequence generation first to apply and optimize additional crosslinks
-# - Then run geometry generation in a separate execution using the output PDB
+# Each module is toggled by its *_generator flag below. Any combination is valid:
+#   - sequence only, geometry only, topology only, or all three in one run.
+#   - When chained, the output of one stage feeds the next automatically.
+#
+# IMPORTANT (mutated-PDB / additional-crosslink workflow, e.g. adding AGEs):
+#   - Run sequence generation FIRST to apply and optimize additional crosslinks.
+#   - Then run geometry generation in a SEPARATE execution using the output PDB.
+# ================================================================================
+
+
+# ================================================================================
+# OPERATION MODE
 # ================================================================================
+mode: null                   # Leave null — set automatically from the flags below.
+config_file: null            # Path to another YAML config to load (nested configs).
+sequence_generator: true     # Generate triple helix structure from sequence.
+geometry_generator: true     # Build microfibril from triple helix.
+topology_generator: true     # Generate simulation topology files.
+debug: false                 # Keep intermediate files + verbose logs for debugging.
+working_directory: "./"      # Directory for output files.
 
-# Operation Mode
-mode: null
-config_file: null            # Path to another config file (for nested configs)  
-sequence_generator: true    # Generate triple helix structure from sequence
-geometry_generator: true    # Build microfibril from triple helix
-topology_generator: true     # Generate simulation topology files
-debug: false                 # Keep intermediate files for debugging
-working_directory: "./"      # Directory for output files
 
 # ================================================================================
 # SEQUENCE GENERATION SETTINGS
 # ================================================================================
-# These settings control the generation of collagen triple helix structures.
-# Two main workflows are supported:
-# 1. Standard: Generate from FASTA sequence (leave mutated_pdb as null)
-# 2. Mutated PDB: Add crosslinks to existing structure (must provide mutated_pdb)
+# Two workflows:
+#   1. Standard    : generate from a FASTA sequence (leave mutated_pdb null).
+#   2. Mutated PDB : add crosslinks to an existing structure (set mutated_pdb).
 
-# Species selection - determines the collagen sequence if no custom FASTA provided
+# Species selection — determines the collagen sequence if no custom FASTA is given.
 species: "rattus_norvegicus"  # Available options:
                         # Mammals (Primates): homo_sapiens, pan_troglodytes, pongo_abelii, callithrix_jacchus, otolemur_garnettii
                         # Mammals (Rodents): mus_musculus, rattus_norvegicus
                         # Mammals (Other): bos_taurus, canis_lupus, ailuropoda_melanoleuca, mustela_putorius, myotis_lucifugus, loxodonta_africana
                         # Fish: danio_rerio, oreochromis_niloticus, oryzias_latipes, tetraodon_nigroviridis, xiphophorus_maculatus
                         # Reptiles: pelodiscus_sinensis
+                        # NOTE: for a species NOT in this list you MUST provide a fasta_file
+                        # (and, if sequence_generator is false, a pdb_file).
 
-# Mutated PDB workflow - provide pre-optimized structure to add additional crosslinks
-mutated_pdb: null  # Path to pre-mutated PDB, or null for standard workflow
+# Mutated-PDB workflow — provide a pre-optimized structure to add crosslinks onto.
+mutated_pdb: null  # Path to pre-mutated PDB, or null for the standard workflow.
 # Example: "rattusnorvegicus_N_PYD_C_PYD+ADD1_Glucosepane.pdb"
 
-# Custom sequence input - overrides species selection if provided
-fasta_file: null  # Path to custom FASTA file, or null to use available species sequence
+# Custom sequence input — overrides species selection if provided.
+fasta_file: null  # Path to a custom FASTA file, or null to use the species sequence.
 # Example: "path/to/custom_sequence.fasta"
 
+
 # ================================================================================
 # CROSSLINK CONFIGURATION
 # ================================================================================
-# Terminal crosslinks are applied at N and C termini during standard generation.
-# Additional crosslinks can be added anywhere (e.g., AGE products).
+# Terminal crosslinks are applied at the N and C termini during standard
+# generation. Additional crosslinks (e.g. AGE products) can be added on top.
 
-crosslink: true  # Enable crosslink application and optimization
+crosslink: true  # Enable crosslink application and optimization.
 
-# Terminal crosslinks (applied during standard sequence generation)
-n_term_type: "HLKNL"               # N-terminal crosslink type
-c_term_type: "HLKNL"               # C-terminal crosslink type
-n_term_combination: "9.C - 946.A"  # Format: "residue.chain - residue.chain"
+# Terminal crosslinks (applied during standard sequence generation).
+n_term_type: "HLKNL"               # N-terminal crosslink type (or "NONE").
+c_term_type: "HLKNL"               # C-terminal crosslink type (or "NONE").
+n_term_combination: "9.C - 946.A"  # Format: "residue.chain - residue.chain".
 c_term_combination: "1046.C - 103.C"
-# Available crosslink types: DPD, DPL, HLKNL, LKNL, PYD, PYL, deHHLNL, deHLNL
-# Check available crosslinks and respective combinations at [src/colbuilder/data/sequence/crosslinks.csv](https://github.com/graeter-group/colbuilder/blob/main/src/colbuilder/data/sequence/crosslinks.csv)
-
-# Additional crosslinks (uncomment for mutated PDB workflow)
-# These are applied on top of existing crosslinks in the structure
-# additional_1_type: "Pentosidine"            # First additional crosslink type
-# additional_2_type: null                     # Second additional crosslink type (optional)
-# additional_1_combination: "1008.A - 767.B"  # Position for first crosslink
-# additional_2_combination: null              # Position for second crosslink
+#
+# Available crosslink types:
+#   Enzymatic DIVALENT  (2 residues): HLKNL, LKNL, deHLNL, deHHLNL
+#   Enzymatic TRIVALENT (3 residues): PYD, DPD, PYL, DPL
+#   Non-enzymatic / AGE (2 residues): Pentosidine, Glucosepane, MOLD
+#   None:                             NONE
+# Combination format:
+#   Divalent  -> "resid.chain - resid.chain"            (e.g. "9.C - 947.A")
+#   Trivalent -> "resid.chain - resid.chain - resid.chain" (e.g. "6.B - 9.C - 946.A")
+# Check valid types + combinations per species in:
+#   https://github.com/graeter-group/colbuilder/blob/main/src/colbuilder/data/sequence/crosslinks.csv
+#
+# CROSSLINK TYPE VALIDATION (input PDBs):
+#   If you also supply an input pdb_file together with n_term_type/c_term_type,
+#   ColBuilder checks that the crosslinks present in the PDB match the specified
+#   types (divalent vs trivalent). A mismatch — e.g. a trivalent PYD structure
+#   while n_term_type: "HLKNL" — stops generation with error GEO_ERR_008 and
+#   lists the detected crosslink residues. Fix the types to match the PDB, or
+#   supply a PDB whose crosslinks match the types.
+
+# Additional crosslinks (uncomment for the mutated-PDB workflow).
+# Applied on top of existing crosslinks in the structure.
+# additional_1_type: "Pentosidine"            # First additional crosslink type.
+# additional_2_type: null                     # Second additional crosslink type (optional).
+# additional_1_combination: "1008.A - 767.B"  # Position for the first crosslink.
+# additional_2_combination: null              # Position for the second crosslink.
+
+# Crosslink optimization settings.
+# crosslink_copies: ["D0", "D5"]  # Periodic copies used for distance optimization.
+# Must be exactly 2 elements from D0..D5 (e.g. ["D0","D5"], ["D2","D3"]).
+# Default is ["D0", "D5"] if not specified.
 
-# Crosslink optimization settings
-# crosslink_copies: ["D0", "D5"]  # Periodic copies for optimization, must be exactly 2 elements
-# Options: D0-D5, D0-D1, D1-D2, D2-D3, D3-D4. If not specified, default is ["D0", "D5"]
 
 # ================================================================================
 # GEOMETRY GENERATION SETTINGS
 # ================================================================================
-# These settings control microfibril assembly from triple helices.
-# The geometry generator arranges multiple copies of the input structure
-# into a collagen microfibril based on crystal contacts.
+# Arranges copies of the input triple helix into a microfibril via crystal contacts.
 
-# Input structure for geometry generation
-pdb_file: null  # Path to input PDB, or null if sequence_generator is true
+# Input structure for geometry generation.
+pdb_file: null  # Path to input PDB, or null when sequence_generator is true.
 # Example: "rattusnorvegicus_N_PYD_C_PYD.pdb"
-# IMPORTANT: When using additional crosslinks, run sequence generation 
-# separately first, then use its output here
+# IMPORTANT: when using additional crosslinks, run sequence generation separately
+# first, then use its output here.
+
+# Microfibril parameters.
+contact_distance: 20      # Maximum distance (Å) for crystal contacts. Larger = thicker fibril.
+fibril_length: 70.0       # Target length of the microfibril (nm).
 
-# Microfibril parameters
-contact_distance: 20      # Maximum distance (Å) for crystal contacts
-fibril_length: 70.0       # Target length of microfibril (nm)
+# Advanced geometry options (usually left as null/false).
+crystalcontacts_file: null      # Custom crystal-contacts file (skip contact_distance).
+connect_file: null              # Custom connectivity information.
+crystalcontacts_optimize: false # Optimize crystal-contact packing (slower, better packed).
+# Note: optimization is performed automatically when generating from contact_distance;
+# set this true to force it when using a crystalcontacts_file.
+
+# Lattice search space — three positive multipliers [dx, dy, dz] for the
+# solution space explored during contact generation. Default (1, 1, 1).
+# solution_space: [1, 1, 1]
+
+# CRYST1 record used to seed crystal contacts when none is present in the input.
+# Override only if you know your unit cell differs from the collagen default.
+# pdb_first_line: "CRYST1   39.970   26.950  677.900  89.24  94.59 105.58 P 1           2"
 
-# Advanced geometry options (usually left as null/false)
-crystalcontacts_file: null      # Custom crystal contacts file
-connect_file: null              # Custom connectivity information
-crystalcontacts_optimize: false # Optimize crystal contacts
-# Note: crystalcontacts_optimize is automatically true when using contact_distance
 
 # ================================================================================
 # MIXING OPTIONS
 # ================================================================================
-# Create heterogeneous microfibrils with different crosslink types.
+# Create heterogeneous microfibrils that mix different crosslink types.
 
-mix_bool: false           # Enable mixing of different crosslink types
-ratio_mix: "D:80 T:20"    # Ratio of each type (must sum to 100)
+mix_bool: false           # Enable mixing of different crosslink types.
+ratio_mix: "D:80 T:20"    # Ratio of each type; percentages MUST sum to 100.
 # Format: "TypeLabel:percentage TypeLabel:percentage"
-# Example: "D:80 T:20" = 80% divalent, 20% trivalent
-files_mix:  # PDB files for each crosslink type (required if mix_bool=true)
-  - "rattusnorvegicus_N_HLKNL_C_HLKNL.pdb"    # File for type "D" (divalent)
-  - "rattusnorvegicus_N_PYD_C_PYD.pdb"        # File for type "T" (trivalent)
-# Note: Labels (D, T) are arbitrary but must match ratio_mix
+# Example: "D:80 T:20" = 80% divalent, 20% trivalent.
+files_mix:                # One PDB per type label (required when mix_bool=true).
+  - "rattusnorvegicus_N_HLKNL_C_HLKNL.pdb"    # File for type "D" (divalent).
+  - "rattusnorvegicus_N_PYD_C_PYD.pdb"        # File for type "T" (trivalent).
+# Note: labels (D, T) are arbitrary but must match those used in ratio_mix,
+# and the number of files must equal the number of types in ratio_mix.
+
 
 # ================================================================================
 # REPLACEMENT OPTIONS
 # ================================================================================
-# Reduce crosslink density by replacing some with standard amino acids.
-# Useful for modeling aged or diseased tissue with fewer crosslinks.
+# Reduce crosslink density by replacing some crosslinks with standard lysines.
+# Useful for modeling aged/diseased tissue or partial crosslinking.
+
+replace_bool: false                     # Enable crosslink replacement.
+ratio_replace: 30                       # Percentage of crosslinks to remove (0-100).
+
+# Which crosslinks are eligible for ratio-based replacement.
+#   "enzymatic"     (DEFAULT) : enzymatic crosslinks (HLKNL/PYD-derived residues).
+#   "non_enzymatic"           : AGE crosslinks only (Glucosepane, Pentosidine, MOLD).
+#   "all"                     : both enzymatic and non-enzymatic.
+ratio_replace_scope: "enzymatic"
+
+replace_file: "rattus_norvegicus.pdb"   # PDB containing the crosslinks to replace.
+# If geometry_generator is true, this can be omitted (set to null) and the geometry
+# output is used in its place.
+
+# Auto-fix unpaired enzymatic crosslinks: detect crosslink residues left without a
+# partner after assembly and replace them, instead of erroring. Default false.
+# auto_fix_unpaired: false
+
+# Manual, explicit replacement directives (advanced). Each entry targets one residue:
+#   "<pdb> <RESNAME> <resid> <chain>"
+# When set, these take precedence over ratio-based replacement.
+# manual_replacements:
+#   - "collagen_fibril.pdb LYS 947 A"
 
-replace_bool: false                     # Enable crosslink replacement
-ratio_replace: 30                       # Percentage of crosslinks to remove (0-100)
-replace_file: "rattus_norvegicus.pdb"   # Path to a file containing specific crosslinks to replace 
-# If running with geometry step set to true, this file can be ommited (set to null) 
-# and the output of geometry will be used in its place. 
 
 # ================================================================================
 # TOPOLOGY GENERATION SETTINGS
 # ================================================================================
-# Generate molecular dynamics topology files for the final structure.
+# Generate molecular-dynamics topology files for the final structure.
 
-force_field: "amber99"  # Force field to use
-# Options: "amber99" (all-atom), "martini3" (coarse-grained)
+force_field: "amber99"    # "amber99" (all-atom) or "martini3" (coarse-grained).
+topology_debug: false     # Save all intermediate topology files.
 
-topology_debug: false     # Save intermediate topology files
+# --- Martini3-specific (ignored for amber99) ---
+# go_epsilon: 9.414       # Go-model epsilon for Martini3 CG parametrization (default 9.414).
+# use_conda_run: false    # Invoke Martinize2 via `conda run` (set true if it lives in another env).
+# martinize2_command: null  # Usually auto-detected from PATH; override only if needed.
+# martinize2_env: null      # Conda env name for Martinize2; usually auto-detected.
 
-# Topology-Only Mode
-# When topology_generator: true AND both sequence_generator: false and 
-# geometry_generator: false, ColBuilder operates in topology-only mode,
-# reading an existing fibril PDB and generating topology files without 
-# running geometry generation. Crosslink parameters must match the input PDB.
\ No newline at end of file
+# ================================================================================
+# TOPOLOGY-ONLY MODE
+# ================================================================================
+# When topology_generator: true AND both sequence_generator: false and
+# geometry_generator: false, ColBuilder runs in topology-only mode: it reads an
+# existing fibril PDB (pdb_file) and generates topology without geometry generation.
+# The crosslink type detected in the PDB must match any crosslink types you specify
+# (see the GEO_ERR_008 validation note above).
diff --git a/docs/configuration.md b/docs/configuration.md
index 337a906..cf9f884 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -69,6 +69,7 @@ files_mix:
 # Replacement Options
 replace_bool: false
 ratio_replace: 30
+ratio_replace_scope: "enzymatic"   # enzymatic | non_enzymatic | all
 replace_file: null
 
 # Topology Options
@@ -182,6 +183,10 @@ These parameters control the sequence generation stage (homology modeling).
 - A complete list of available species and combinations can be found at [src/colbuilder/data/sequence/crosslinks.csv](https://github.com/graeter-group/colbuilder/blob/main/src/colbuilder/data/sequence/crosslinks.csv).
 - When using the mutated PDB workflow, run sequence generation separately first, then use the output in geometry generation.
 
+**Crosslink type validation**:
+- When you provide an input `pdb_file` together with `n_term_type`/`c_term_type`, ColBuilder checks that the crosslink residues present in the PDB are consistent with the specified types. Divalent types (HLKNL, LKNL, deHLNL, deHHLNL) expect a divalent structure, and trivalent types (PYD, DPD, PYL, DPL) expect a trivalent structure.
+- If the PDB contains crosslinks of a category that was not requested (for example, a trivalent PDB while `n_term_type: "HLKNL"` is set), generation stops with error `GEO_ERR_008` and a message listing the detected crosslink residues. Update `n_term_type`/`c_term_type` to match the structure, or supply a PDB whose crosslinks match the specified types.
+
 ## Geometry Generation Parameters
 
 These parameters control the generation of the microfibril structure.
@@ -213,6 +218,7 @@ These parameters control advanced features for creating mixed crosslinked microf
 | `files_mix` | list of strings | | Required if mix_bool is true, paths to PDB files with different crosslink types |
 | `replace_bool` | boolean | false | Enable crosslink replacement (with lysines) |
 | `ratio_replace` | integer | 30 | Percentage of crosslinks to replace (0-100) |
+| `ratio_replace_scope` | string | "enzymatic" | Which crosslinks are eligible for ratio-based replacement: `enzymatic`, `non_enzymatic`, or `all` |
 | `replace_file` | string, null | null | File with crosslinks to be replaced |
 
 **Notes**:
@@ -221,6 +227,7 @@ These parameters control advanced features for creating mixed crosslinked microf
 - The `files_mix` parameter specifies paths to PDB files of collagen molecules, each with a different crosslink type.
 - The `replace_bool` feature simulates partial crosslinking or aged collagen by replacing some crosslinks with unmodified lysine residues.
 - The `ratio_replace` parameter controls what percentage of crosslinks should be replaced.
+- The `ratio_replace_scope` parameter selects which crosslinks may be replaced. **The default is `enzymatic`**, so ratio-based replacement targets enzymatic crosslinks (e.g. HLKNL/PYD-derived residues) unless you choose otherwise. Use `non_enzymatic` to target advanced glycation end-product (AGE) crosslinks (Glucosepane, Pentosidine, MOLD), or `all` to consider both.
 - The `replace_file` parameter specifies the path to a PDB file of a previously generated collagen microfibril. Set to null to use the geometry generation output.
 
 ## Topology Generation Parameters
@@ -399,6 +406,7 @@ Understanding how parameters interact is important for successful use of ColBuil
    - Ratios in `ratio_mix` must sum to 100.
    - If `replace_bool` is true, you must specify a valid percentage in `ratio_replace` (0-100).
    - If `replace_bool` is true and `geometry_generator` is false, you must provide a `replace_file`.
+   - `ratio_replace_scope` must be one of `enzymatic` (default), `non_enzymatic`, or `all`.
 
 5. **Geometry Generation Dependencies**:
    - If `crystalcontacts_optimize` is true, the geometry generation will take longer but may produce better-packed microfibrils.
diff --git a/docs/data_dictionary.md b/docs/data_dictionary.md
index 2e133d2..ccba8de 100644
--- a/docs/data_dictionary.md
+++ b/docs/data_dictionary.md
@@ -174,6 +174,7 @@ The most commonly used parameters for ColBuilder configuration:
 | files_mix | List of Paths | PDB files with different crosslink types | Valid PDB file paths (≥2 files) | [] |
 | replace_bool | boolean | Replace crosslinks with lysines | true/false | false |
 | ratio_replace | float | Percentage of crosslinks to replace | 0-100 | None |
+| ratio_replace_scope | string | Which crosslinks are eligible for ratio-based replacement | "enzymatic", "non_enzymatic", "all" | "enzymatic" |
 | replace_file | Path/null | Input PDB file of fibril with crosslinks | Valid file path or null | null |
 
 **Validation Rules**:
@@ -186,10 +187,12 @@ The most commonly used parameters for ColBuilder configuration:
   - `ratio_replace` must be between 0 and 100
   - Either `geometry_generator=true` OR `replace_file` must be provided
   - If `geometry_generator=false`, must provide `replace_file`
+  - `ratio_replace_scope` must be one of `enzymatic`, `non_enzymatic`, or `all`
 
 **Notes**:
 - **Mixing** creates heterogeneous microfibrils with different crosslink types (e.g., 80% divalent + 20% trivalent)
 - **Replacement** simulates partial crosslinking or aged collagen by replacing some crosslinks with unmodified lysine residues
+- `ratio_replace_scope` selects which crosslinks may be replaced. The default `enzymatic` targets enzymatic crosslinks (HLKNL/PYD-derived residues); `non_enzymatic` targets AGE crosslinks (Glucosepane, Pentosidine, MOLD); `all` considers both
 - Set `replace_file: null` to use geometry generation output for replacement
 
 ### Topology Generation Parameters
diff --git a/docs/user_guide.md b/docs/user_guide.md
index 6ec83d4..58c4e5d 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -272,9 +272,14 @@ files_mix:                  # Required if mix_bool is true
 ```yaml
 replace_bool: false         # Enable replacement of crosslinks with lysines
 ratio_replace: 30           # Percentage of crosslinks to replace
+ratio_replace_scope: "enzymatic"  # Crosslinks to replace: "enzymatic" (default), "non_enzymatic" (AGEs), or "all"
 replace_file: null          # File with crosslinks to be replaced
 ```
 
+The `ratio_replace_scope` parameter controls which crosslinks are eligible for ratio-based
+replacement. The default `enzymatic` targets enzymatic crosslinks (HLKNL/PYD-derived residues);
+use `non_enzymatic` to target AGE crosslinks (Glucosepane, Pentosidine, MOLD), or `all` for both.
+
 ### Topology Options
 
 ```yaml
diff --git a/pyproject.toml b/pyproject.toml
index 2cff7ca..812ae9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,21 +20,19 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
 ]
 dependencies = [
-    "numpy==1.25",
-    "vermouth==0.9.1", 
+    # <2 keeps the numpy 1.x ABI (vermouth 0.9.1); >=1.25 lets pip pick a wheel
+    # that exists for the running Python (e.g. 1.26 on 3.12, where 1.25 has none).
+    "numpy>=1.25,<2",
+    "vermouth==0.9.1",
     "pandas==2.2.2",
     "biopython==1.84",
     "scikit-learn",
-    "h5py",
-    "libnetcdf",
-    "threadpoolctl",
     "PyYAML",
-    "click",
     "pydantic>=2.0",
     "tqdm",
-    "asyncio",
     "colorama>=0.4.4",
     "click>=8.0.0",
 ]
diff --git a/src/colbuilder/colbuilder.py b/src/colbuilder/colbuilder.py
index 21d03ff..ae5ad8b 100644
--- a/src/colbuilder/colbuilder.py
+++ b/src/colbuilder/colbuilder.py
@@ -511,6 +511,21 @@ async def run_pipeline(config: ColbuilderConfig) -> Dict[str, Path]:
                     f"Using generated sequence PDB for further processing: {sequence_pdb}"
                 )
 
+        # Validate that the crosslinks present in an input PDB are consistent
+        # with the crosslink types requested in the configuration. This catches
+        # the common mistake of, e.g., specifying a divalent type (HLKNL) for a
+        # trivalent (PYD) structure. Generated PDBs match by construction, so
+        # this only meaningfully guards user-provided structures.
+        if config.pdb_file and (config.n_term_type or config.c_term_type):
+            from colbuilder.core.utils.crosslink_detector import CrosslinkDetector
+
+            pdb_to_check = Path(config.pdb_file).resolve()
+            if pdb_to_check.exists():
+                CrosslinkDetector.validate_against_specified_types(
+                    pdb_to_check,
+                    [config.n_term_type, config.c_term_type],
+                )
+
         # Topology-only mode
         if (config.topology_generator and 
             not config.geometry_generator and 
diff --git a/src/colbuilder/core/geometry/chimera.py b/src/colbuilder/core/geometry/chimera.py
index 31e1e87..04c4487 100644
--- a/src/colbuilder/core/geometry/chimera.py
+++ b/src/colbuilder/core/geometry/chimera.py
@@ -165,12 +165,15 @@ def swapaa(self, replace: str, system_type: str) -> subprocess.CompletedProcess:
             LOG.debug(
                 f"Chimera command completed with return code: {result.returncode}"
             )
-            if result.stdout:
-                # LOG.debug(f"Chimera stdout: {result.stdout}")
-                pass
-            if result.stderr:
-                # LOG.debug(f"Chimera stderr: {result.stderr}")
-                pass
+            if result.returncode != 0:
+                # Surface failures: a silent swapaa failure leaves unpaired crosslink
+                # markers (non-standard residues) in the structure, which breaks
+                # downstream topology generation / simulation.
+                LOG.error(
+                    f"Chimera swapaa failed (return code {result.returncode}); "
+                    f"unpaired crosslink markers may remain unmutated. "
+                    f"stderr: {result.stderr}"
+                )
             return result
         except Exception as e:
             LOG.error(f"Error executing Chimera command: {str(e)}")
diff --git a/src/colbuilder/core/geometry/connect.py b/src/colbuilder/core/geometry/connect.py
index 9b07ed6..bea8037 100644
--- a/src/colbuilder/core/geometry/connect.py
+++ b/src/colbuilder/core/geometry/connect.py
@@ -142,12 +142,16 @@ def get_external_connect_file(
             Any: Updated system object.
         """
         if connect_file:
-            self.external_connect = [
-                float(l.split(" ")[0].replace(".caps.pdb", ""))
-                for l in open(connect_file.with_suffix(".txt"), "r")
-            ]
-        if np.min(self.external_connect) > 0:
-            self.external_connect = [i - 1 for i in self.external_connect]
+            with open(connect_file.with_suffix(".txt"), "r") as fh:
+                self.external_connect = [
+                    float(l.split(" ")[0].replace(".caps.pdb", ""))
+                    for l in fh
+                ]
+        # Connect files are written by write_connect() using the system's own
+        # (0-based) model ids, so they are already aligned with system.get_connect().
+        # The previous "if min > 0: subtract 1" heuristic wrongly shifted every id
+        # whenever model 0 happened to be absent from the file, mis-mapping
+        # connectivity. No shift is applied.
 
         for model_id in system.get_connect().keys():
             if model_id not in self.external_connect:
diff --git a/src/colbuilder/core/geometry/crosslink.py b/src/colbuilder/core/geometry/crosslink.py
index afb7c5d..31e1ff1 100644
--- a/src/colbuilder/core/geometry/crosslink.py
+++ b/src/colbuilder/core/geometry/crosslink.py
@@ -97,9 +97,9 @@ def read_crosslink(pdb_file: Union[str, Path]) -> List[Crosslink]:
                         resname=line[17:20],
                         chain=line[21],
                         position=[
-                            float(line[29:38]),
+                            float(line[30:38]),
                             float(line[38:46]),
-                            float(line[46:56]),
+                            float(line[46:54]),
                         ],
                         type="T",
                     )
@@ -113,9 +113,9 @@ def read_crosslink(pdb_file: Union[str, Path]) -> List[Crosslink]:
                         resname=line[17:20],
                         chain=line[21],
                         position=[
-                            float(line[29:38]),
+                            float(line[30:38]),
                             float(line[38:46]),
-                            float(line[46:56]),
+                            float(line[46:54]),
                         ],
                         type="D",
                     )
@@ -129,9 +129,9 @@ def read_crosslink(pdb_file: Union[str, Path]) -> List[Crosslink]:
                         resname=line[17:20],
                         chain=line[21],
                         position=[
-                            float(line[29:38]),
+                            float(line[30:38]),
                             float(line[38:46]),
-                            float(line[46:56]),
+                            float(line[46:54]),
                         ],
                         type="D",
                     )
@@ -145,9 +145,9 @@ def read_crosslink(pdb_file: Union[str, Path]) -> List[Crosslink]:
                         resname=line[17:20],
                         chain=line[21],
                         position=[
-                            float(line[29:38]),
+                            float(line[30:38]),
                             float(line[38:46]),
-                            float(line[46:56]),
+                            float(line[46:54]),
                         ],
                         type="D",
                     )
diff --git a/src/colbuilder/core/geometry/geometry_replacer.py b/src/colbuilder/core/geometry/geometry_replacer.py
index 314549a..101195c 100644
--- a/src/colbuilder/core/geometry/geometry_replacer.py
+++ b/src/colbuilder/core/geometry/geometry_replacer.py
@@ -41,7 +41,7 @@
     (["LZS"], ["LZD"]),  # MOLD
     # Enzymatic divalent crosslinks
     (["L5Y"], ["L4Y"]),  # HLKNL
-    (["L5X"], ["L4Y"]),  # LKNL
+    (["L5X"], ["L4X"]),  # LKNL
     (["LY5"], ["LY4"]),  # deH-HLNL
     (["LX5"], ["LX4"]),  # deH-LNL
 ]
@@ -65,6 +65,7 @@
     "L5Y": "LYS",
     "L4Y": "LYS",
     "L5X": "LYS",
+    "L4X": "LYS",
     "LY5": "LYS",
     "LX5": "LYS",
     "LY4": "LYS",
@@ -77,7 +78,7 @@
 DEFAULT_REPLACEMENT = "LYS"
 PAIR_DISTANCE_CUTOFF = 5.0
 
-PAIRED_RESIDUES: Set[str] = {"AGS", "APD", "LGX", "LPS", "LZS", "LZD", "L5Y", "L4Y", "L5X", "LY5", "LX5", "LY4", "LX4"}
+PAIRED_RESIDUES: Set[str] = {"AGS", "APD", "LGX", "LPS", "LZS", "LZD", "L5Y", "L4Y", "L5X", "L4X", "LY5", "LX5", "LY4", "LX4"}
 NON_ENZYMATIC_PAIRED: Set[str] = {"AGS", "APD", "LGX", "LPS", "LZS", "LZD"}
 ENZYMATIC_SINGLETONS: Set[str] = {
     "LYX",
@@ -95,6 +96,7 @@
     "L5Y",
     "L4Y",
     "L5X",
+    "L4X",
     "LY5",
     "LX5",
     "LY4",
@@ -269,11 +271,14 @@ async def replace_in_system(
                 source_dir = Path.cwd()
 
             # Determine system type and setup type directory
-            type_dir_candidates = [source_dir]
+            # Prefer canonical typed caps in subdirs; check the source root LAST so a
+            # stale root copy cannot shadow the correct typed file (first-match-wins).
+            type_dir_candidates = []
             for sub in ["DT", "TD", "D", "T", "M", "NC", "DY"]:
                 cand = source_dir / sub
                 if cand.exists():
                     type_dir_candidates.append(cand)
+            type_dir_candidates.append(source_dir)
 
             model_zero = system.get_model(model_id=0.0)
             system_type = model_zero.type if hasattr(model_zero, "type") else "D"
@@ -356,7 +361,7 @@ async def replace_in_system(
                         records=records,
                         connect_groups=connect_groups,
                         ratio_replace=float(config.ratio_replace),
-                        scope=getattr(config, "ratio_replace_scope", "non_enzymatic"),
+                        scope=getattr(config, "ratio_replace_scope", "enzymatic"),
                         config=config,
                     )
                     generated_from_ratio = bool(manual_list)
@@ -367,7 +372,7 @@ async def replace_in_system(
                         system=system,
                         ratio_replace=float(config.ratio_replace),
                         fibril_length=config.fibril_length,
-                        scope=getattr(config, "ratio_replace_scope", "non_enzymatic"),
+                        scope=getattr(config, "ratio_replace_scope", "enzymatic"),
                     )
                     generated_from_ratio = bool(manual_list)
                     LOG.debug(f"Generated {len(manual_list)} ratio-based instructions from system")
@@ -379,7 +384,7 @@ async def replace_in_system(
                         "No replacement instructions generated (ratio=%s%%, scope=%s). "
                         "No matching markers found.",
                         config.ratio_replace,
-                        getattr(config, "ratio_replace_scope", "non_enzymatic"),
+                        getattr(config, "ratio_replace_scope", "enzymatic"),
                     )
                 else:
                     LOG.debug("No replacement instructions provided")
@@ -605,7 +610,7 @@ async def replace_direct(
                     records=records,
                     ratio_replace=float(config.ratio_replace),
                     fibril_length=getattr(config, "fibril_length", 0.0),
-                    scope=getattr(config, "ratio_replace_scope", "non_enzymatic"),
+                    scope=getattr(config, "ratio_replace_scope", "enzymatic"),
                 )
                 generated_from_ratio = bool(manual_list)
 
@@ -735,8 +740,8 @@ def _calculate_fibril_bounds(
 
                 fibril_length_angstroms = fibril_length * 10.0
 
-                z_min = z_center - fibril_length_angstroms * 5
-                z_max = z_center + fibril_length_angstroms * 5
+                z_min = z_center - fibril_length_angstroms / 2
+                z_max = z_center + fibril_length_angstroms / 2
             else:
                 z_min = min_z
                 z_max = max_z
@@ -879,7 +884,7 @@ async def _run_chimera_command(
     # ==================================================================================
 
     def _build_ratio_replacements(
-        self, system: Any, ratio_replace: float, fibril_length: float, scope: str = "non_enzymatic"
+        self, system: Any, ratio_replace: float, fibril_length: float, scope: str = "enzymatic"
     ) -> List[str]:
         """
         Select crosslinks to mutate based on a density (ratio_replace) value.
@@ -894,11 +899,11 @@ def _build_ratio_replacements(
             return []
 
         try:
-            scope = scope or "non_enzymatic"
+            scope = scope or "enzymatic"
             scope = str(scope).strip().lower()
             if scope not in {"enzymatic", "non_enzymatic", "all"}:
-                LOG.warning("Unknown ratio_replace scope '%s'; defaulting to 'non_enzymatic'", scope)
-                scope = "non_enzymatic"
+                LOG.warning("Unknown ratio_replace scope '%s'; defaulting to 'enzymatic'", scope)
+                scope = "enzymatic"
 
             z_bounds = self._calculate_fibril_bounds(system, fibril_length)
         except Exception as e:
@@ -1254,11 +1259,14 @@ async def _apply_manual_replacements_to_dir(
             except Exception:
                 pass
 
-        type_dir_candidates = [source_dir]
+        # Prefer canonical typed caps in subdirs; check the source root LAST so a
+        # stale root copy cannot shadow the correct typed file (first-match-wins).
+        type_dir_candidates = []
         for sub in ["DT", "TD", "D", "T", "M", "NC", "DY"]:
             cand = source_dir / sub
             if cand.exists():
                 type_dir_candidates.append(cand)
+        type_dir_candidates.append(source_dir)
 
         try:
             model_zero = system.get_model(model_id=0.0)
@@ -1331,19 +1339,19 @@ def _build_ratio_replacements_from_connect(
         records: List[Dict[str, Any]],
         connect_groups: List[List[int]],
         ratio_replace: float,
-        scope: str = "non_enzymatic",
+        scope: str = "enzymatic",
         config: Optional[ColbuilderConfig] = None,
     ) -> List[str]:
         """Generate replacement instructions using connect groups and parsed caps records."""
         if not records or not connect_groups or ratio_replace <= 0:
             return []
 
-        scope = (scope or "non_enzymatic").strip().lower()
+        scope = (scope or "enzymatic").strip().lower()
         if scope not in {"enzymatic", "non_enzymatic", "all"}:
             LOG.warning(
-                "Unknown ratio_replace scope '%s'; defaulting to 'non_enzymatic'", scope
+                "Unknown ratio_replace scope '%s'; defaulting to 'enzymatic'", scope
             )
-            scope = "non_enzymatic"
+            scope = "enzymatic"
 
         records_by_model: Dict[int, List[Dict[str, Any]]] = {}
         for rec in records:
@@ -1534,18 +1542,18 @@ def _build_ratio_replacements_from_records(
         records: List[Dict[str, Any]],
         ratio_replace: float,
         fibril_length: float,
-        scope: str = "non_enzymatic",
+        scope: str = "enzymatic",
     ) -> List[str]:
         """Generate replacement instructions using parsed crosslink records."""
         if not records or ratio_replace <= 0:
             return []
 
-        scope = (scope or "non_enzymatic").strip().lower()
+        scope = (scope or "enzymatic").strip().lower()
         if scope not in {"enzymatic", "non_enzymatic", "all"}:
             LOG.warning(
-                "Unknown ratio_replace scope '%s'; defaulting to 'non_enzymatic'", scope
+                "Unknown ratio_replace scope '%s'; defaulting to 'enzymatic'", scope
             )
-            scope = "non_enzymatic"
+            scope = "enzymatic"
 
         if scope == "enzymatic":
             target_resnames = ENZYMATIC_SINGLETONS.copy()
@@ -1734,6 +1742,25 @@ def _calculate_bounds_from_records(
 
         return (z_min, z_max)
 
+    @staticmethod
+    def _extract_position(item: Any) -> Any:
+        """
+        Return the 'position' of a crosslink entry regardless of how it is stored.
+
+        Entries may be a plain dict ({'position': ...}), a dict whose 'crosslink'
+        value is itself a dict, or a dict whose 'crosslink' value is a Crosslink
+        object (the system-based path). Using getattr for objects avoids the
+        AttributeError that previously caused PYD trios to be silently dropped.
+        """
+        if item is None:
+            return None
+        if isinstance(item, dict):
+            if "crosslink" in item:
+                cl = item["crosslink"]
+                return cl.get("position") if isinstance(cl, dict) else getattr(cl, "position", None)
+            return item.get("position")
+        return getattr(item, "position", None)
+
     def _build_pyd_trios(
         self,
         lyx_list: List[Dict[str, Any]],
@@ -1754,8 +1781,8 @@ def _build_pyd_trios(
                 if idx in used_ly2:
                     continue
                 try:
-                    lyx_pos = lyx.get("crosslink", {}).get("position") if "crosslink" in lyx else lyx.get("position")
-                    ly2_pos = ly2.get("crosslink", {}).get("position") if "crosslink" in ly2 else ly2.get("position")
+                    lyx_pos = self._extract_position(lyx)
+                    ly2_pos = self._extract_position(ly2)
                     if not lyx_pos or not ly2_pos:
                         continue
                     dist = self._calculate_distance(lyx_pos, ly2_pos)
@@ -1771,8 +1798,8 @@ def _build_pyd_trios(
                 if jdx in used_ly3:
                     continue
                 try:
-                    lyx_pos = lyx.get("crosslink", {}).get("position") if "crosslink" in lyx else lyx.get("position")
-                    ly3_pos = ly3.get("crosslink", {}).get("position") if "crosslink" in ly3 else ly3.get("position")
+                    lyx_pos = self._extract_position(lyx)
+                    ly3_pos = self._extract_position(ly3)
                     if not lyx_pos or not ly3_pos:
                         continue
                     dist = self._calculate_distance(lyx_pos, ly3_pos)
@@ -1951,17 +1978,29 @@ def _split_pdb_into_models(self, input_pdb: Path, type_dir: Path) -> int:
         atom_lines = [ln for ln in all_lines if ln.startswith(("ATOM", "HETATM", "TER"))]
         models: List[List[str]] = []
         current: List[str] = []
-        chain_sequence: List[str] = []
+        current_chains: List[str] = []   # ordered chain ids seen in the current model
+        first_chain: Optional[str] = None
 
         for line in atom_lines:
             if line.startswith(("ATOM", "HETATM")):
                 chain = line[21]
-                if not chain_sequence or chain != chain_sequence[-1]:
-                    chain_sequence.append(chain)
-                    if len(chain_sequence) > 3 and chain_sequence[-4:] == ["A", "B", "C", "A"]:
+                # Chains are written contiguously; act only on a chain transition.
+                if not current_chains or chain != current_chains[-1]:
+                    # A new triple helix begins when we return to the model's first
+                    # chain after a full set of >=3 distinct chains. This does not
+                    # hardcode A/B/C ordering, so non-standard chain labels still split.
+                    if (
+                        first_chain is not None
+                        and chain == first_chain
+                        and len(set(current_chains)) >= 3
+                    ):
                         models.append(current)
                         current = []
-                        chain_sequence = ["A"]
+                        current_chains = []
+                        first_chain = None
+                    if first_chain is None:
+                        first_chain = chain
+                    current_chains.append(chain)
             current.append(line)
 
         if current:
diff --git a/src/colbuilder/core/geometry/main_geometry.py b/src/colbuilder/core/geometry/main_geometry.py
index 954e5d3..539590a 100644
--- a/src/colbuilder/core/geometry/main_geometry.py
+++ b/src/colbuilder/core/geometry/main_geometry.py
@@ -419,6 +419,11 @@ async def _handle_full_generation(self) -> Tuple[Optional[System], Optional[Path
                 LOG.section("Mixing geometry...")
                 mixing_dir = self.file_manager.ensure_mixing_dir()
                 system, _ = await self.mixer_service.mix(system, temp_config, mixing_dir)
+                # Finalize from where the mixed, per-type caps actually live. The
+                # mixer writes caps into mixing_dir/<type>; geometry_dir only holds
+                # the original single build type, so writing from it would miss the
+                # newly assigned mix types (FileNotFoundError or stale caps).
+                final_temp_dir = mixing_dir
                 LOG.info("Mixing completed.")
 
             # Determine if replacement is requested
@@ -601,7 +606,7 @@ async def _handle_full_generation(self) -> Tuple[Optional[System], Optional[Path
 
                 LOG.info(f"Writing final mixed system PDB to {output_pdb_path}")
                 system.write_pdb(
-                    pdb_out=output_prefix,
+                    pdb_out=output_pdb_path,
                     fibril_length=temp_config.fibril_length,
                     cleanup=False,
                     temp_dir=final_temp_dir,
@@ -696,6 +701,14 @@ async def build_geometry(self) -> Tuple[Optional[System], Optional[Path]]:
             GeometryGenerationError: If any step in the process fails
         """
         try:
+            # Clear stale caps/type-subdirs from previous runs so a re-run cannot
+            # silently reuse old geometry (different fibril length, contact distance, etc.).
+            try:
+                if getattr(self, "file_manager", None) is not None:
+                    self.file_manager.clear_generation_dirs()
+            except Exception as e:
+                LOG.debug(f"Could not clear generation dirs at run start: {e}")
+
             # Allow direct replacement when a PDB is provided, even if replace_bool was not set explicitly
             if self.config.replace_file and not self.config.geometry_generator:
                 self.config.replace_bool = True
diff --git a/src/colbuilder/core/geometry/optimize.py b/src/colbuilder/core/geometry/optimize.py
index bfce6cf..dff00d0 100644
--- a/src/colbuilder/core/geometry/optimize.py
+++ b/src/colbuilder/core/geometry/optimize.py
@@ -445,7 +445,7 @@ def _create_complementary_model(self, crosslink: Any, source_model: Any, system:
                     )
                     
                     system.add_model(model=new_model)
-                    
+
                     if not self._node_exists_in_system(pr_candidate, system):
                         pr_model = model.Model(
                             id=new_id + 1,
@@ -453,8 +453,24 @@ def _create_complementary_model(self, crosslink: Any, source_model: Any, system:
                             transformation=pr_transformation,
                             pdb_file=system.crystal.pdb_file
                         )
-                        system.add_model(model=pr_model)
-                    
+                        # Only keep the point-reflection copy if it is actually
+                        # crosslink-connected to the existing system. Adding it
+                        # unconditionally can introduce a floating, unconnected
+                        # triple helix (orphan molecule) into the fibril.
+                        pr_connected = any(
+                            connect.get_connect(
+                                ref_model=system.get_model(model_id=mid),
+                                model=pr_model,
+                            )
+                            for mid in system.get_models()
+                        )
+                        if pr_connected:
+                            system.add_model(model=pr_model)
+                        else:
+                            LOG.debug(
+                                f"Skipping unconnected point-reflection copy at {pr_candidate}"
+                            )
+
                     self._mark_model_crosslinks_paired(new_model)
                     
                     crosslink_id = f"{source_model.id}_{crosslink.resid}_{crosslink.type}"
diff --git a/src/colbuilder/core/geometry/system.py b/src/colbuilder/core/geometry/system.py
index 4a78db6..72c3fac 100644
--- a/src/colbuilder/core/geometry/system.py
+++ b/src/colbuilder/core/geometry/system.py
@@ -352,13 +352,22 @@ def write_pdb(self, pdb_out: Union[str, Path], fibril_length: float, cleanup: bo
                 if crystal_header:
                     f.write(crystal_header)
                 
-                # Write model files if they exist (typically for initial crystal structures)
+                # Write model files if they exist (typically for initial crystal structures).
+                # Skip any model that will also be written from its caps file below, otherwise
+                # the same atoms are emitted twice (uncapped body + caps) -> 0 A overlaps.
                 for model_id, model in self.system.items():
                     if hasattr(model, 'pdb_file') and Path(model.pdb_file).exists():
+                        model_type = getattr(model, 'type', None)
+                        if model_type is not None and (model_type, float(model_id)) in caps_by_model:
+                            LOG.debug(
+                                f"Skipping pdb_file body for model {model_id}; "
+                                f"caps file will be written instead (avoid duplicate atoms)"
+                            )
+                            continue
                         LOG.debug(f"Writing model {model_id} from pdb_file")
                         self._write_model_file_content(f, model.pdb_file)
                         written_models += 1
-                        model_type = getattr(model, 'type', 'unknown')
+                        model_type = model_type if model_type is not None else 'unknown'
                         written_by_type[model_type] = written_by_type.get(model_type, 0) + 1
                 
                 # Write caps files: one per model ID, respecting assigned type
diff --git a/src/colbuilder/core/geometry/unpaired_crosslinks.py b/src/colbuilder/core/geometry/unpaired_crosslinks.py
index efb2d36..c5ccd2b 100644
--- a/src/colbuilder/core/geometry/unpaired_crosslinks.py
+++ b/src/colbuilder/core/geometry/unpaired_crosslinks.py
@@ -234,9 +234,15 @@ def _build_crosslink_data(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Se
                     continue
                 if res_name not in crosslinks_by_type:
                     continue
-                chain_id, resid = residues[-1]
-                line = f"{fname} {res_name} {resid} {chain_id}"
-                crosslinks_by_type[res_name][fname] = line
+                # Emit EVERY unpaired marker of this type in the model, not just the
+                # last one. A fragment can legitimately carry multiple same-type
+                # markers; dropping all but one leaves non-standard residues behind
+                # (which then break topology generation / simulation).
+                lines = [
+                    f"{fname} {res_name} {resid} {chain_id}"
+                    for (chain_id, resid) in residues
+                ]
+                crosslinks_by_type[res_name][fname] = lines
                 types_by_model.setdefault(fname, set()).add(res_name)
 
         crosslinks_by_type = {r: m for r, m in crosslinks_by_type.items() if m}
@@ -310,13 +316,16 @@ def _build_unpaired_entries(
             all_models = set(mapping.keys())
             unpaired = all_models - paired_models.get(res_type, set())
             for model in unpaired:
-                parts = mapping[model].split()
-                if len(parts) < 4:
-                    LOG.warning("Malformed mapping for %s: %s", model, mapping[model])
-                    continue
-                pdb_file, orig_res, resid, chain = parts[:4]
-                new_res = RESIDUE_TO_MUTATION.get(orig_res, DEFAULT_MUTATION)
-                entries.append((pdb_file, new_res, resid, chain, orig_res))
+                raw = mapping[model]
+                model_lines = raw if isinstance(raw, list) else [raw]
+                for entry_line in model_lines:
+                    parts = entry_line.split()
+                    if len(parts) < 4:
+                        LOG.warning("Malformed mapping for %s: %s", model, entry_line)
+                        continue
+                    pdb_file, orig_res, resid, chain = parts[:4]
+                    new_res = RESIDUE_TO_MUTATION.get(orig_res, DEFAULT_MUTATION)
+                    entries.append((pdb_file, new_res, resid, chain, orig_res))
 
         def sort_key(entry: Tuple[str, str, str, str, str]):
             pdb_file, _, resid, chain, orig_res = entry
diff --git a/src/colbuilder/core/sequence/sequence_generator.py b/src/colbuilder/core/sequence/sequence_generator.py
index d737a38..e779fde 100644
--- a/src/colbuilder/core/sequence/sequence_generator.py
+++ b/src/colbuilder/core/sequence/sequence_generator.py
@@ -460,6 +460,13 @@ async def _load_crosslinks(self) -> None:
                 )
 
             self._crosslinks = []
+            missing: List[str] = []
+
+            def _is_real_type(t: Optional[str]) -> bool:
+                return bool(t) and str(t).strip().upper() not in {"NONE", "NOCROSS"}
+
+            def _is_explicit_none(t: Optional[str]) -> bool:
+                return bool(t) and str(t).strip().upper() in {"NONE", "NOCROSS"}
 
             if self.config.n_term_type:
                 LOG.debug(f"Loading N-terminal crosslinks of type: {self.config.n_term_type}")
@@ -472,6 +479,11 @@ async def _load_crosslinks(self) -> None:
                 if n_crosslinks:
                     self._crosslinks.extend(n_crosslinks)
                     LOG.debug(f"Loaded {len(n_crosslinks)} N-terminal crosslinks")
+                elif _is_real_type(self.config.n_term_type):
+                    missing.append(
+                        f"N-terminal type '{self.config.n_term_type}' "
+                        f"(combination '{self.config.n_term_combination}')"
+                    )
 
             if self.config.c_term_type:
                 LOG.debug(f"Loading C-terminal crosslinks of type: {self.config.c_term_type}")
@@ -484,6 +496,54 @@ async def _load_crosslinks(self) -> None:
                 if c_crosslinks:
                     self._crosslinks.extend(c_crosslinks)
                     LOG.debug(f"Loaded {len(c_crosslinks)} C-terminal crosslinks")
+                elif _is_real_type(self.config.c_term_type):
+                    missing.append(
+                        f"C-terminal type '{self.config.c_term_type}' "
+                        f"(combination '{self.config.c_term_combination}')"
+                    )
+
+            # Crosslinking is enabled (we returned early otherwise). If a real
+            # crosslink type was requested but nothing matched, fail loudly instead
+            # of silently building an uncrosslinked structure. Setting a terminal
+            # type to "NONE" is the explicit, accepted way to request no crosslink.
+            if missing:
+                available = sorted(species_crosslinks["type"].unique().tolist())
+                raise SequenceGenerationError(
+                    "Crosslinking is enabled but no matching crosslinks were found for: "
+                    + "; ".join(missing)
+                    + f". Available types for '{self.config.species}': {available}. "
+                    "Check the type name (case-sensitive), the combination, and the "
+                    "species — or set the terminal type to 'NONE' for no crosslink.",
+                    error_code="SEQ_ERR_002",
+                    context={
+                        "species": self.config.species,
+                        "n_term_type": self.config.n_term_type,
+                        "c_term_type": self.config.c_term_type,
+                        "missing": missing,
+                    },
+                )
+
+            # Enabled, but neither terminus requested a real type or an explicit
+            # 'NONE' — i.e. crosslinking was intended but never specified.
+            if (
+                not self._crosslinks
+                and not _is_real_type(self.config.n_term_type)
+                and not _is_real_type(self.config.c_term_type)
+                and not _is_explicit_none(self.config.n_term_type)
+                and not _is_explicit_none(self.config.c_term_type)
+            ):
+                raise SequenceGenerationError(
+                    "Crosslinking is enabled (crosslink: true) but no crosslink type was "
+                    "specified. Set n_term_type/c_term_type to a valid type, or set them "
+                    "to 'NONE' (or crosslink: false) to build without crosslinks.",
+                    error_code="SEQ_ERR_002",
+                    context={
+                        "species": self.config.species,
+                        "n_term_type": self.config.n_term_type,
+                        "c_term_type": self.config.c_term_type,
+                    },
+                )
+
 
             if not self._crosslinks:
                 LOG.warning("No crosslinks were loaded despite crosslinks being enabled")
diff --git a/src/colbuilder/core/topology/amber.py b/src/colbuilder/core/topology/amber.py
index 7abd107..3f90f06 100644
--- a/src/colbuilder/core/topology/amber.py
+++ b/src/colbuilder/core/topology/amber.py
@@ -13,6 +13,7 @@
 import numpy as np
 from colorama import Fore, Style
 from typing import List, Any, Optional, Dict, Union, Tuple, Set
+import re
 
 from colbuilder.core.geometry.system import System
 from colbuilder.core.geometry.crosslink import read_crosslink, Crosslink
@@ -171,8 +172,8 @@ def _are_compatible_crosslinks(self, cl1: Crosslink, cl2: Crosslink) -> bool:
             True if crosslinks can form a bond
         """
         # Divalent crosslinks (D-type)
-        divalent_aldehyde = {'L4Y', 'L4X', 'LY4', 'LX4', 'LGX', 'LPS'}
-        divalent_amine = {'L5Y', 'L5X', 'LY5', 'LX5', 'AGS', 'APD'}
+        divalent_aldehyde = {'L4Y', 'L4X', 'LY4', 'LX4', 'LGX', 'LPS', 'LZD'}
+        divalent_amine = {'L5Y', 'L5X', 'LY5', 'LX5', 'AGS', 'APD', 'LZS'}
         
         # Trivalent crosslinks (T-type)
         trivalent_aldehyde = {'LYX', 'LXY', 'LYY', 'LXX'}
@@ -225,7 +226,7 @@ def find_atom_indices_for_crosslinks(
         atom_by_index = {a['index']: a for a in atom_data}
 
         # Read coordinates from GRO file
-        gro_file = itp_file.replace('.itp', '.gro')
+        gro_file = str(Path(itp_file).with_suffix('.gro'))
         gro_coords: Dict[int, np.ndarray] = {}
         if os.path.exists(gro_file):
             with open(gro_file, 'r') as f:
@@ -467,6 +468,20 @@ def add_crosslink_topology_to_itp(
 
         try:
             mapped_pairs = self.find_atom_indices_for_crosslinks(itp_file, crosslink_pairs, merged_pdb_file)
+
+            # Surface mapping failures: a dropped pair means NO bond and NO
+            # exclusions for that crosslink -> close contacts that crash MD.
+            total = len(crosslink_pairs)
+            n_mapped = len(mapped_pairs)
+            if n_mapped < total:
+                LOG.error(
+                    f"Crosslink mapping incomplete in {os.path.basename(itp_file)}: "
+                    f"{n_mapped}/{total} pair(s) mapped, {total - n_mapped} dropped. "
+                    f"Dropped crosslinks get no bond and no exclusions (likely clashes "
+                    f"during equilibration). Check residue/atom-name coverage in "
+                    f"_is_crosslink_atom and the 5 A marker cutoff."
+                )
+
             if not mapped_pairs:
                 LOG.warning(f"No atom indices found for crosslink topology in {itp_file}")
                 return
@@ -498,6 +513,11 @@ def add_crosslink_topology_to_itp(
             except Exception as e:
                 LOG.warning(f"Failed to add angles/dihedrals, but bonds were added successfully: {str(e)}")
 
+            try:
+                self._add_crosslink_exclusions(itp_file, valid_bond_data)
+            except Exception as e:
+                LOG.warning(f"Failed to add crosslink exclusions, bonds/angles still added: {str(e)}")
+
             LOG.debug(f"    Successfully added crosslink topology to {itp_file}")
 
         except Exception as e:
@@ -567,6 +587,118 @@ def _add_crosslink_bonds(self, itp_file: str, valid_bond_data: List[Dict]) -> No
         with open(itp_file, 'w') as f:
             f.writelines(lines)
             
+    def _add_crosslink_exclusions(
+        self,
+        itp_file: str,
+        valid_bond_data: List[Dict],
+        cutoff: float = 4.5,
+    ) -> None:
+        """Write explicit [ exclusions ] between each pair of crosslinked residues.
+
+        nrexcl=3 only excludes atoms within 3 bonds along the bond graph. Covalently
+        joining two residues (e.g. HLKNL: L4Y CE - L5Y NZ) brings several heavy atoms
+        into close contact that are more than 3 bonds apart through the new bond, so
+        they are NOT auto-excluded and clash during equilibration. Here we exclude all
+        atom pairs between the two joined residues that lie within `cutoff` Angstrom
+        (falling back to excluding every inter-residue pair if no coordinates exist).
+        """
+        if not valid_bond_data:
+            return
+
+        with open(itp_file, "r") as f:
+            lines = f.readlines()
+
+        # Map atom index -> (residue_nr, residue_name) and residue -> [indices]
+        atom_res: Dict[int, Tuple[int, str]] = {}
+        res_atoms: Dict[Tuple[int, str], List[int]] = {}
+        atoms_section = False
+        for line in lines:
+            s = line.strip()
+            if s.startswith("[ atoms ]"):
+                atoms_section = True
+                continue
+            if atoms_section and s.startswith("["):
+                break
+            if atoms_section and s and not s.startswith(";"):
+                parts = s.split()
+                if len(parts) >= 8:
+                    idx = int(parts[0])
+                    key = (int(parts[2]), parts[3])
+                    atom_res[idx] = key
+                    res_atoms.setdefault(key, []).append(idx)
+
+        # Coordinates (Angstrom) from the matching GRO, if present
+        gro_file = str(Path(itp_file).with_suffix(".gro"))
+        coords: Dict[int, np.ndarray] = {}
+        if os.path.exists(gro_file):
+            with open(gro_file, "r") as f:
+                glines = f.readlines()
+            for line in glines[2:-1]:
+                if len(line) >= 44:
+                    try:
+                        aidx = int(line[15:20].strip())
+                        coords[aidx] = np.array([
+                            float(line[20:28]) * 10,
+                            float(line[28:36]) * 10,
+                            float(line[36:44]) * 10,
+                        ])
+                    except (ValueError, IndexError):
+                        continue
+
+        # Build exclusion sets between each crosslinked residue pair
+        excl: Dict[int, Set[int]] = {}
+        pairs_added = 0
+        for bond_data in valid_bond_data:
+            a1, a2 = bond_data["atoms"]
+            r1 = atom_res.get(a1)
+            r2 = atom_res.get(a2)
+            if not r1 or not r2 or r1 == r2:
+                continue
+            for i in res_atoms.get(r1, []):
+                for j in res_atoms.get(r2, []):
+                    if i == j:
+                        continue
+                    if coords:
+                        pi, pj = coords.get(i), coords.get(j)
+                        if pi is not None and pj is not None and np.linalg.norm(pi - pj) > cutoff:
+                            continue
+                    lo, hi = (i, j) if i < j else (j, i)
+                    if hi not in excl.setdefault(lo, set()):
+                        excl[lo].add(hi)
+                        pairs_added += 1
+
+        if not pairs_added:
+            return
+
+        # Locate an existing [ exclusions ] section (amber pdb2gmx output has none)
+        start = end = -1
+        for i, line in enumerate(lines):
+            if line.strip().startswith("[ exclusions ]"):
+                start = i
+            elif start >= 0 and line.strip().startswith("[") and not line.strip().startswith("[ exclusions ]"):
+                end = i
+                break
+
+        body = [f"{ai} " + " ".join(str(x) for x in sorted(excl[ai])) + "\n" for ai in sorted(excl)]
+
+        if start >= 0:
+            insert_at = end if end >= 0 else len(lines)
+            for entry in reversed(body):
+                lines.insert(insert_at, entry)
+        else:
+            lines.append("\n[ exclusions ]\n")
+            lines.append("; crosslink through-space exclusions (beyond nrexcl)\n")
+            lines.extend(body)
+
+        with open(itp_file, "w") as f:
+            f.writelines(lines)
+
+        LOG.debug(
+            f"Added {pairs_added} crosslink exclusion pair(s) "
+            f"({'distance-filtered' if coords else 'all inter-residue'}) "
+            f"to {os.path.basename(itp_file)}"
+        )
+
     def _filter_angles_by_bonds(
         self,
         angles: List[Tuple[int, int, int]],
@@ -900,8 +1032,13 @@ def write_itp(
         except Exception:
             pass
         
-        output_file = itp_file.parent / str(itp_file.name).replace("top", "itp")
-        
+        output_file = itp_file.with_suffix(".itp")
+
+        # pdb2gmx writes a single moleculetype block. GROMACS <2023 names it
+        # "Protein_chain_A"; GROMACS >=2023 names it just "Protein". Match either,
+        # anchored so comments/other lines don't trigger.
+        molname_re = re.compile(r'^\s*Protein(?:_chain_\w+)?\s+\d+\s*$')
+
         with open(output_file, 'w') as f:
             write = False
             for line in itp_model:
@@ -909,7 +1046,7 @@ def write_itp(
                     break
                 if write:
                     f.write(line)
-                elif 'Protein_chain_A' in line:
+                elif molname_re.match(line):
                     f.write('[ moleculetype ]\n')
                     f.write(f'{molecule_name}  3\n')
                     write = True
diff --git a/src/colbuilder/core/topology/crosslink.py b/src/colbuilder/core/topology/crosslink.py
index 4a8267d..73eef6f 100644
--- a/src/colbuilder/core/topology/crosslink.py
+++ b/src/colbuilder/core/topology/crosslink.py
@@ -47,12 +47,16 @@ def __init__(self, cnt_model: Optional[int] = None) -> None:
             'dihedrals': []
         }
         
-        # Distance thresholds for different crosslink types (in Å)
-        # Adjust these values based on your coarse-grained model
+        # Distance thresholds for different crosslink types (in Å).
+        # Equilibrium bead-bead distances are ~2.3-4.2 A (see dlyxly3/dlyxly2/dly45
+        # below). A loose 15 A cutoff lets one bead pair with several partners in
+        # crosslink-dense regions, creating spurious bonds. Keep the cutoff modest
+        # so only genuine partners (close in the assembled fibril) are bonded.
         self.crosslink_thresholds = {
-            'LYX_LY2': 15.0,  # Maximum distance for LYX SC4 - LY2 SC1 crosslinks
-            'LYX_LY3': 15.0,  # Maximum distance for LYX SC5 - LY3 SC1 crosslinks
-            'L4Y_L5Y': 15.0   # Maximum distance for L4Y SC1 - L5Y SC2 crosslinks
+            'LYX_LY2': 6.0,  # Maximum distance for LYX SC4 - LY2 SC1 crosslinks
+            'LYX_LY3': 6.0,  # Maximum distance for LYX SC5 - LY3 SC1 crosslinks
+            'LY2_LY3': 6.0,  # Maximum distance for LYX SC5 - LY3 SC1 crosslinks
+            'L4Y_L5Y': 6.0   # Maximum distance for L4Y SC1 - L5Y SC2 crosslinks
         }
         
         # HLKNL-crosslink parameters
@@ -65,16 +69,18 @@ def __init__(self, cnt_model: Optional[int] = None) -> None:
         # PYD-crosslink parameters
         self.klyxly2: str = '9000'   # LYX-LY2 bond force constant (kJ/mol/nm^2)
         self.klyxly3: str = '12000'  # LYX-LY3 bond force constant (kJ/mol/nm^2)
+        self.klyx5ly2: str = '12000'  # LYX-LY2 bond force constant (kJ/mol/nm^2) (conserve ring)
         self.dlyxly2: str = '0.290'  # LYX-LY2 bond equilibrium distance (nm)
         self.dlyxly3: str = '0.230'  # LYX-LY3 bond equilibrium distance (nm)
+        self.dlyx5ly2: str = '0.370'  # LYX-LY2 bond equilibrium distance (nm) (added)
         
         # PYD-crosslink angle parameters (degrees)
-        self.al2yx_1: str = '100'    # LY2-LYX TP1q-TC6q-TC4 angle
-        self.al2yx_2: str = '60'     # LY2-LYX TQ2p-TP1q-TC4 angle
-        self.al2yx_3: str = '130'    # LY2-LYX TP1q-TC4-SP2 angle
-        self.al3yx_1: str = '100'    # LY3-LYX TP1q-TC6q-TC4 angle
-        self.al3yx_2: str = '110'    # LY3-LYX TQ2p-TC6q-TC4 angle
-        self.al3yx_3: str = '130'    # LY3-LYX TC6q-TC4-SP2 angle
+        self.al2yx_1: str = '100'    # LY2-LYX TP1q-TC6q-TC4 angle R2 R4 R3/S3
+        self.al2yx_2: str = '60'     # LY2-LYX TQ2p-TP1q-TC4 angle R1 R2 R3
+        self.al2yx_3: str = '130'    # LY2-LYX TP1q-TC4-SP2 angle R2 B2 R3
+        self.al3yx_1: str = '100'    # LY3-LYX TP1q-TC6q-TC4 angle R2 R4 R3/S3
+        self.al3yx_2: str = '110'    # LY3-LYX TQ2p-TC6q-TC4 angle R1 R4 R3/S3
+        self.al3yx_3: str = '130'    # LY3-LYX TC6q-TC4-SP2 angle R4 S3 B3
         
         LOG.debug(f"Initialized Crosslink for model counter {cnt_model}")
 
@@ -110,7 +116,7 @@ def get_crosslink_coords(self, cnt_model: Optional[int] = None) -> List[List[flo
                         (line[17:20] == 'L4Y' and line[12:15] == 'SC1') or
                         (line[17:20] == 'L5Y' and line[12:15] == 'SC2')):
                         
-                        coords = [float(line[29:38]), float(line[38:46]), float(line[46:56])]
+                        coords = [float(line[30:38]), float(line[38:46]), float(line[46:54])]
                         
                         self.crosslink_pdb.append([
                             str(it_pdb),           # atom index as string
@@ -214,6 +220,15 @@ def find_crosslink_pairs(self, cnt_model: Optional[int] = None) -> List[Tuple[Li
                 if dist <= self.crosslink_thresholds['LYX_LY2']:
                     self.crosslink_pairs.append((lyx_atom, ly2_atom))
                     LOG.info(f" Added LYX-LY2 pair: atoms {lyx_atom[0]} - {ly2_atom[0]} (distance: {dist:.3f} Å)")
+       
+        # Find LYX SC5 - LY2 SC1 pairs
+        for lyx_atom in lyx_sc5_atoms:
+            for ly2_atom in ly2_sc1_atoms:
+                dist = np.linalg.norm(np.array(lyx_atom[-3:]) - np.array(ly2_atom[-3:]))
+                LOG.debug(f"    Distance LYX SC5 (atom {lyx_atom[0]}) - LY2 SC1 (atom {ly2_atom[0]}): {dist:.3f} Å")
+                if dist <= self.crosslink_thresholds['LYX_LY2']:
+                    self.crosslink_pairs.append((lyx_atom, ly2_atom))
+                    LOG.info(f" Added LYX-LY2 pair: atoms {lyx_atom[0]} - {ly2_atom[0]} (distance: {dist:.3f} Å)")
         
         # Find LYX SC5 - LY3 SC1 pairs
         for lyx_atom in lyx_sc5_atoms:
@@ -280,10 +295,21 @@ def set_crosslink_bonded(self, cnt_model: Optional[int] = None,
                     ])
                     connections_found += 1
                     LOG.info(f"Added LYX-LY2 crosslink between {clx[0]} and {cly[0]} (distance: {dist:.3f} Å)")
+                
+                # LYX SC5 - LY2 SC1 crosslinks
+                elif (clx[1] == 'LYX' and clx[2] == 'SC5' and 
+                      cly[1] == 'LY2' and cly[2] == 'SC1'):
                     
+                    self.crosslink_bonded['bonds'].append([
+                        clx[0], cly[0], '1', self.dlyx5ly2, f"{self.klyx5ly2}\n"
+                    ])
+                    #angles check again
+                    connections_found += 1
+                    LOG.info(f"Added LYX-LY2 (SC5) crosslink between {clx[0]} and {cly[0]} (distance: {dist:.3f} Å)")
+
                 # LYX SC5 - LY3 SC1 crosslinks  
                 elif (clx[1] == 'LYX' and clx[2] == 'SC5' and 
-                      cly[1] == 'LY3' and cly[2] == 'SC1'):
+                      cly[1] == 'LY3' and cly[2] == 'SC1')
                     
                     self.crosslink_bonded['bonds'].append([
                         clx[0], cly[0], '1', self.dlyxly3, f"{self.klyxly3}\n"
@@ -335,6 +361,15 @@ def set_crosslink_bonded(self, cnt_model: Optional[int] = None,
                     connections_found += 1
                     LOG.info(f" Added LYX-LY2 crosslink between {cly[0]} and {clx[0]} (distance: {dist:.3f} Å)")
                     
+                elif (cly[1] == 'LYX' and cly[2] == 'SC5' and 
+                      clx[1] == 'LY2' and clx[2] == 'SC1'):
+                    
+                    self.crosslink_bonded['bonds'].append([
+                        cly[0], clx[0], '1', self.dlyx5ly2, f"{self.klyx5ly2}\n"
+                    ])
+                    connections_found += 1
+                    LOG.info(f" Added LYX-LY2 (SC5) crosslink between {cly[0]} and {clx[0]} (distance: {dist:.3f} Å)")
+
                 elif (cly[1] == 'LYX' and cly[2] == 'SC5' and 
                       clx[1] == 'LY3' and clx[2] == 'SC1'):
                     
diff --git a/src/colbuilder/core/topology/itp.py b/src/colbuilder/core/topology/itp.py
index 774e3e8..f00b438 100644
--- a/src/colbuilder/core/topology/itp.py
+++ b/src/colbuilder/core/topology/itp.py
@@ -257,6 +257,13 @@ def read_itp(
                         elif bonded_type == "dihedrals":
                             self.dihedrals[cnt_con].append(tokens)
 
+            # Atom-count offset: derive the connection's last atom index
+            # from the parsed atoms section, so merge offsets stay correct even if
+            # martinize2 emitted no [ position_restraints ] block. Equivalent to the
+            # posres-derived value when posres is present.
+            if cnt_con is not None and self.atoms[cnt_con]:
+                self.mol_ends[cnt_con] = [int(self.atoms[cnt_con][-1][0])]
+
         except FileNotFoundError:
             LOG.error(f"ITP file not found: {itp_path}")
             raise
diff --git a/src/colbuilder/core/topology/main_topology.py b/src/colbuilder/core/topology/main_topology.py
index d35e444..20a3317 100644
--- a/src/colbuilder/core/topology/main_topology.py
+++ b/src/colbuilder/core/topology/main_topology.py
@@ -35,7 +35,8 @@
     'go-table.itp',
     'col_go-sites.itp',
     '*.CG.pdb',
-    'D'
+    # Intermediate per-type caps subdirs copied into the topology working dir
+    'D', 'T', 'NC', 'DT', 'TD', 'M',
 }
 
 def cleanup_temporary_files(ff_name: str, temp_patterns: Set[str], search_dirs: Optional[List[Path]] = None) -> None:
@@ -329,11 +330,12 @@ async def build_topology(system: System, config: ColbuilderConfig, file_manager:
             # Always return to original directory
             os.chdir(original_dir)
             
-            # Clean up temporary files unless in debug mode
+            # Clean up temporary files unless in debug mode.
+            # Only clean the working topology_dir, NOT output_topology_dir: the
+            # latter holds the deliverables and some temp-file globs could match
+            # and delete them.
             search_dirs = [topology_dir]
-            if output_topology_dir is not None:
-                search_dirs.append(output_topology_dir)
-            
+
             if not config.topology_debug:
                 cleanup_temporary_files(
                     config.force_field or "unknown_force_field", 
diff --git a/src/colbuilder/core/topology/martini.py b/src/colbuilder/core/topology/martini.py
index d119e50..e3d7a96 100644
--- a/src/colbuilder/core/topology/martini.py
+++ b/src/colbuilder/core/topology/martini.py
@@ -15,6 +15,7 @@
 
 import cmd
 import os
+import sys
 import subprocess
 import shutil
 from pathlib import Path
@@ -33,6 +34,7 @@
     get_active_conda_env,
     find_and_install_custom_force_field,
     get_conda_command_with_path,
+    find_martinize2_executable,
 )
 from colbuilder.core.utils.logger import setup_logger
 from colbuilder.core.utils.files import FileManager
@@ -324,17 +326,26 @@ def cap_pdb(self, pdb: Optional[List[str]] = None) -> Tuple[List[str], str, str]
             firsts = set(first_res.values()) if first_res else set()
             lasts  = {name for _, name in last_res.values()} if last_res else set()
 
-            # Anything here means "don't ask martinize2 to apply a terminal mod":
+            # Residues meaning "this terminus is already capped / is a crosslink
+            # block", so martinize2 must NOT add another terminal modification.
             special_first = {"ACE", "CLA", "LY2", "LY3", "L4Y", "L5Y", "LYX"}
             special_last  = {"ACE", "CLA", "LY2", "LY3", "L4Y", "L5Y", "LYX", "NME"}
 
-            # N-terminus: conservative (ACE often causes issues on GLN etc. in some FF)
-            nter_flag = "none" if (not firsts or (firsts & special_first)) else "none"
+            # N-terminus: the geometry stage (caps.py) pre-caps every chain with an
+            # ACE residue, so the first residue is normally ACE and needs no -nter
+            # modification. We always use 'none' (martinize2's ACE modification is
+            # also known to fail on some first residues, e.g. GLN). Warn if a chain
+            # is unexpectedly NOT pre-capped so it isn't silently left uncapped.
+            if firsts and not (firsts & special_first):
+                LOG.warning(
+                    f"N-terminus not pre-capped (first residues={sorted(firsts)}); "
+                    f"using -nter none. Check the geometry capping stage if unexpected."
+                )
+            nter_flag = "none"
 
-            # C-terminus: allow NME only if all chains end on standard residues (not special, not NME)
-            cter_flag = "NME"
-            if not lasts or (lasts & special_last):
-                cter_flag = "none"
+            # C-terminus: apply the NME cap unless the chain already ends in a
+            # cap/crosslink block, in which case use 'none'.
+            cter_flag = "none" if (not lasts or (lasts & special_last)) else "NME"
 
             # LOG.debug(f"cap_pdb decided: -nter {nter_flag}, -cter {cter_flag} (first={firsts}, last={lasts})")
             return pdb, cter_flag, nter_flag
@@ -447,7 +458,10 @@ def get_system_pdb(self, size: Optional[int] = None) -> List[str]:
             return []
 
     def write_system_topology(
-        self, topology_file: str = "system.top", size: Optional[int] = None
+        self,
+        topology_file: str = "system.top",
+        size: Optional[int] = None,
+        processed_ids: Optional[List[int]] = None,
     ) -> None:
         """
         Write final topology file for the system.
@@ -457,10 +471,18 @@ def write_system_topology(
         topology_file : str
             Path to the output topology file
         size : Optional[int]
-            Number of models to include
+            Number of models considered (fallback range when processed_ids is None).
+        processed_ids : Optional[List[int]]
+            col_{id} indices that actually produced an .itp. When given, only these
+            are #included, preventing GROMACS errors from missing files when some
+            models failed or were skipped.
         """
-        if not size:
-            LOG.warning("No size provided to write_system_topology")
+        if processed_ids is not None:
+            ids_for_includes = sorted({int(m) for m in processed_ids})
+        elif size:
+            ids_for_includes = list(range(size))
+        else:
+            LOG.warning("No size or processed_ids provided to write_system_topology")
             return
 
         try:
@@ -470,9 +492,15 @@ def write_system_topology(
                 f.write('#include "martini_v3.0.0.itp"\n')
                 f.write('#include "go-sites.itp"\n\n')
 
-                for m in range(size):
-                    f.write(f'#include "col_{m}.itp"\n')
-                    f.write(f'#include "col_{m}_go-excl.itp"\n')
+                included = []
+                for m in ids_for_includes:
+                    # Belt-and-suspenders: never include a file that isn't on disk.
+                    if not Path(f"col_{int(m)}.itp").exists():
+                        LOG.warning(f"Skipping missing topology include: col_{int(m)}.itp")
+                        continue
+                    f.write(f'#include "col_{int(m)}.itp"\n')
+                    f.write(f'#include "col_{int(m)}_go-excl.itp"\n')
+                    included.append(int(m))
 
                 f.write('\n#include "martini_v3.0.0_solvents_v1.itp"\n')
                 f.write('#include "martini_v3.0.0_ions_v1.itp"\n')
@@ -481,8 +509,8 @@ def write_system_topology(
                 f.write("Collagen, Martini 3 and Go-Potentials \n")
                 f.write("\n[ molecules ]\n")
 
-                for t in range(size):
-                    f.write(f"col_{t}     1\n")
+                for t in included:
+                    f.write(f"col_{int(t)}     1\n")
 
             self.write_go_topology(name_type="sites.itp")
 
@@ -527,7 +555,10 @@ def translate_pdb(self, pdb: Optional[List[str]] = None) -> List[str]:
 
         translated = []
         for line in pdb:
-            if line[0:6] in self.is_line:
+            # Only ATOM/HETATM carry coordinates. TER/ANISOU lines have no
+            # parseable x/y/z, so don't try (and don't emit a spurious
+            # "Bad coord formatting" warning for every TER record).
+            if line[0:6] in ("ATOM  ", "HETATM"):
                 try:
                     # Columns: x[30:38], y[38:46], z[46:54]
                     x = float(line[30:38])
@@ -561,6 +592,47 @@ def write_gro(
         return None
 
 
+def _build_connected_groups(system: System) -> List[List[Any]]:
+    """
+    Collapse crosslink-connected models into transitive groups.
+
+    Mirrors Amber.get_connected_groups so that each physically connected set
+    of triple helices is emitted exactly once. model.connect is symmetric and
+    self-inclusive (e.g. {self} + direct neighbors); a BFS gives the full group.
+    """
+    all_models = list(system.get_models())
+    processed: set = set()
+    groups: List[List[Any]] = []
+
+    for model_id in all_models:
+        if model_id in processed:
+            continue
+
+        model = system.get_model(model_id=model_id)
+        if not model or not getattr(model, "connect", None):
+            groups.append([model_id])
+            processed.add(model_id)
+            continue
+
+        group: set = set()
+        stack = [model_id]
+        while stack:
+            cur = stack.pop()
+            if cur in group:
+                continue
+            group.add(cur)
+            cur_model = system.get_model(model_id=cur)
+            if cur_model and getattr(cur_model, "connect", None):
+                for nb in cur_model.connect:
+                    if nb not in group:
+                        stack.append(nb)
+
+        groups.append(sorted(group))
+        processed.update(group)
+
+    return groups
+
+
 def check_output_files(topology_dir: Path, expected_models: int):
     """Diagnostic function to check which files were created"""
     cg_files = list(topology_dir.glob("*.CG.pdb"))
@@ -689,7 +761,9 @@ async def build_martini3(
             )
 
         LOG.info(f"Step 3/{steps} Processing models with Martinize2")
-        processed_models = []
+        processed_models = []             # fibril model_ids that succeeded — for counts/logging ONLY
+                                          # (col_N.itp file indices live in written_itp_ids)
+        written_itp_ids: List[int] = []   # col_{cnt_model} indices actually written
 
         try:
             martinize2_command = getattr(config, "martinize2_command", None)
@@ -715,27 +789,38 @@ async def build_martini3(
             type_dir.mkdir(exist_ok=True, parents=True)
 
         models_list = [model_id for model_id in system.get_models()]
+
+        # Group crosslink-connected models so each physical triple helix is
+        # written exactly once. Without this, every model re-merges its whole
+        # neighborhood and connected helices are emitted multiple times,
+        # producing duplicate beads (0 A overlaps) and inflated particle counts.
+        connected_groups = _build_connected_groups(system)
+        LOG.debug(
+            f"Martini: collapsed {len(models_list)} models into "
+            f"{len(connected_groups)} connected group(s)"
+        )
+
         model_status = {}
         failed_martinize, failed_contact, failed_go, failed_itp = [], [], [], []
         processed_in_topology: set = set()
 
-        for model_id in tqdm(models_list, desc="Building topology", unit="%"):
+        for group in tqdm(connected_groups, desc="Building topology", unit="grp"):
+            # Representative model id drives all file naming for this group.
+            model_id = group[0]
             model = system.get_model(model_id=model_id)
             if model is None:
-                LOG.warning(f"Skipping model {model_id}: model not found in System")
+                LOG.warning(f"Skipping group {group}: representative model not found in System")
                 model_status[model_id] = "no_model"
                 continue
 
-            # build connect_ids fallback
-            sys_connect = getattr(model, "connect", None)
-            if sys_connect:
-                connect_ids = list(sys_connect)
-            else:
-                connect_ids = [model_id]  # self-connection
-                LOG.debug(f"Model {model_id}: no connections in System; processing as self-connection")
+            # Process every member of the group exactly once, and make sure the
+            # downstream helpers that read connectivity from the System
+            # (merge_pdbs, Itp.allocate/read_model) see the full group.
+            connect_ids = list(group)
+            model.connect = list(group)
 
             if not connect_ids:
-                LOG.warning(f"Skipping model {model_id}: no connections and no fallback")
+                LOG.warning(f"Skipping group {group}: no members")
                 model_status[model_id] = "no_connections"
                 continue
 
@@ -762,7 +847,30 @@ def _martinize_cmd(nter_flag: str, cter_flag: str) -> str:
                                 f"-nter {nter_flag} -cter {cter_flag} "
                                 f"-govs-include -govs-moltype col_{int(model_id)}.{int(connect_id)} -maxwarn 4"
                             )
-                            return get_conda_command_with_path("martinize2", args)
+                            # An explicit user override wins.
+                            configured = getattr(config, "martinize2_command", None)
+                            if configured and configured not in ("martinize2", "auto"):
+                                return f"{configured} {args}"
+                            # Prefer the martinize2 that lives in the SAME environment
+                            # as the running interpreter: that is the vermouth into
+                            # which the custom martini300C force field was just
+                            # installed. This avoids picking a martinize2 from another
+                            # Python (e.g. a pyenv shim ahead on PATH) whose vermouth
+                            # does not know the custom force field ("Unknown force
+                            # field martini300C").
+                            local_exe = Path(sys.executable).parent / "martinize2"
+                            if local_exe.exists():
+                                return f"{local_exe} {args}"
+                            # Otherwise fall back to a direct martinize2 on PATH, and
+                            # only wrap in `conda run` when it is reachable solely via
+                            # a conda env.
+                            try:
+                                exe, use_conda, _env = find_martinize2_executable()
+                            except Exception:
+                                exe, use_conda = "martinize2", True
+                            if use_conda:
+                                return get_conda_command_with_path("martinize2", args)
+                            return f"{exe} {args}"
 
                         cmd = _martinize_cmd(nter, cter)
                         process = await asyncio.create_subprocess_shell(
@@ -799,7 +907,7 @@ def _martinize_cmd(nter_flag: str, cter_flag: str) -> str:
                             continue
 
                         go_cmd = (
-                            f"python create_goVirt.py -s {int(model_id)}.{int(connect_id)}.CG.pdb "
+                            f"{sys.executable} create_goVirt.py -s {int(model_id)}.{int(connect_id)}.CG.pdb "
                             f"-f map.out --moltype col_{int(model_id)}.{int(connect_id)} --go_eps {go_epsilon}"
                         )
                         go_process = await asyncio.create_subprocess_shell(
@@ -826,6 +934,7 @@ def _martinize_cmd(nter_flag: str, cter_flag: str) -> str:
                         itp_.go_to_pairs(model_id=int(model_id))
                         itp_.make_topology(model_id=int(model_id), cnt_model=cnt_model)
                         processed_models.append(model_id)
+                        written_itp_ids.append(cnt_model)   # this col_{cnt_model}.itp now exists                        
 
                         # Track what we processed
                         for connect_id in connect_ids:
@@ -883,7 +992,11 @@ def _martinize_cmd(nter_flag: str, cter_flag: str) -> str:
 
         try:
             final_topology_file = f"collagen_fibril_{config.species}.top"
-            martini.write_system_topology(topology_file=final_topology_file, size=cnt_model)
+            martini.write_system_topology(
+                topology_file=final_topology_file,
+                size=cnt_model,
+                processed_ids=written_itp_ids,
+            )
 
             topology_file_path = Path(final_topology_file)
             if topology_file_path.exists():
@@ -914,7 +1027,9 @@ def _martinize_cmd(nter_flag: str, cter_flag: str) -> str:
             )
 
         try:
-            if not config.debug:
+            # Gate intermediate-file cleanup on topology_debug (the flag meant for
+            # keeping topology intermediates), not the general logging `debug` flag.
+            if not getattr(config, "topology_debug", False):
                 subprocess.run(r"rm \#*", shell=True, check=False)
         except Exception as e:
             LOG.warning(f"Error cleaning up temporary files: {str(e)}")
diff --git a/src/colbuilder/core/utils/config.py b/src/colbuilder/core/utils/config.py
index ec432d1..75eb897 100644
--- a/src/colbuilder/core/utils/config.py
+++ b/src/colbuilder/core/utils/config.py
@@ -259,7 +259,7 @@ class ColbuilderConfig(BaseModel):
         description="Automatically detect unpaired enzymatic crosslinks and replace them",
     )
     ratio_replace_scope: Literal["enzymatic", "non_enzymatic", "all"] = Field(
-        default="non_enzymatic",
+        default="enzymatic",
         description="Scope of residues considered for ratio-based replacement",
     )
     ratio_replace: Optional[float] = Field(
@@ -387,8 +387,8 @@ def __init__(self, **data):
             data["working_directory"] = Path.cwd().resolve()
 
         super().__init__(**data)
-        if self.mix_bool and self.ratio_mix is not None:
-            self.ratio_mix = self._convert_ratio_mix(self.ratio_mix)
+        # ratio_mix is converted/validated (incl. sum-to-100) by the
+        # validate_ratio_mix field validator; no extra conversion needed here.
         self.solution_space = self._convert_to_tuple(self.solution_space)
         self.files_mix = tuple(self.files_mix) if self.files_mix else None
         self.set_mode()
@@ -483,8 +483,12 @@ def validate_contact_distance(cls, value: Optional[float]) -> Optional[float]:
         return value
 
     @field_validator("fibril_length")
-    def validate_fibril_length(cls, v: float) -> float:
-        """Validate fibril length."""
+    def validate_fibril_length(cls, v: Optional[float]) -> Optional[float]:
+        """Validate fibril length. None is allowed here; required-ness is enforced
+        per-mode by validate_fibril_length_required (avoids a confusing TypeError
+        when the value is simply omitted)."""
+        if v is None:
+            return None
         if v <= 0:
             raise ValueError("fibril_length must be greater than 0")
         LOG.debug(f"Validating fibril_length: {v}")
@@ -631,16 +635,6 @@ def validate_files_mix(cls, value):
         """Validate files mix format."""
         return tuple(value) if value else None
 
-    @property
-    def ratio_mix(self) -> Dict[str, int]:
-        """Get ratio mix property."""
-        return self._ratio_mix
-
-    @ratio_mix.setter
-    def ratio_mix(self, value: Union[str, Dict[str, int]]):
-        """Set ratio mix property."""
-        self._ratio_mix = self._convert_ratio_mix(value)
-
     @field_validator("manual_replacements", mode="before")
     def validate_manual_replacements(
         cls, value: Optional[Union[str, List[str], Tuple[str, ...]]]
@@ -793,12 +787,31 @@ def validate_mix_config(cls, values):
                     error_code="CFG_ERR_004",
                 )
             if values.fibril_length is None:
-                values.fibril_length = values.get(
-                    "fibril_length", 40.0
-                )  # Default to 40 for mixing
+                values.fibril_length = 40.0  # Default to 40 nm for mixing
             LOG.debug(f"Validated fibril_length for mixing: {values.fibril_length}")
         return values
 
+    @model_validator(mode="after")
+    def validate_fibril_length_required(self) -> "ColbuilderConfig":
+        """fibril_length is required for geometry generation (no sensible default)."""
+        if self.geometry_generator and self.fibril_length is None:
+            raise ConfigurationError(
+                "fibril_length (nm) is required for geometry generation",
+                error_code="CFG_ERR_006",
+            )
+        return self
+
+    @model_validator(mode="after")
+    def validate_topology_requirements(self) -> "ColbuilderConfig":
+        """force_field is required when generating topology."""
+        if self.topology_generator and not self.force_field:
+            raise ConfigurationError(
+                "force_field is required for topology generation "
+                "(supported: 'amber99' or 'martini3')",
+                error_code="CFG_ERR_006",
+            )
+        return self
+
     @model_validator(mode="after")
     def validate_replace_config(self) -> "ColbuilderConfig":
         """Validate replacement configuration."""
diff --git a/src/colbuilder/core/utils/crosslink_detector.py b/src/colbuilder/core/utils/crosslink_detector.py
index 001d2e5..e417eea 100644
--- a/src/colbuilder/core/utils/crosslink_detector.py
+++ b/src/colbuilder/core/utils/crosslink_detector.py
@@ -41,7 +41,22 @@ class CrosslinkDetector:
     }
     
     ALL_CROSSLINK_RESIDUES = DIVALENT_RESIDUES | TRIVALENT_RESIDUES
-    
+
+    # Map user-facing crosslink type names (as specified in the config via
+    # n_term_type / c_term_type) to the expected structure category.
+    # 'D' = divalent (two participating residues), 'T' = trivalent (three).
+    DIVALENT_TYPES = {
+        'HLKNL', 'LKNL', 'deHLNL', 'deHHLNL',
+        'Pentosidine', 'Glucosepane', 'MOLD',
+    }
+    TRIVALENT_TYPES = {
+        'PYD', 'DPD', 'PYL', 'DPL',
+    }
+
+    # Type names that explicitly mean "no crosslink" and should be skipped
+    # when validating against the structure.
+    NO_CROSSLINK_TYPES = {'NONE', 'NOCROSS'}
+
     @classmethod
     def detect_structure_type(cls, pdb_path: Path) -> str:
         """
@@ -78,7 +93,136 @@ def detect_structure_type(cls, pdb_path: Path) -> str:
             # No crosslinks found - could be a non-crosslinked structure
             LOG.info("No crosslinks detected, using default type D")
             return 'D'  # Default to divalent
-    
+
+    @classmethod
+    def categorize_type(cls, type_name: Optional[str]) -> Optional[str]:
+        """
+        Map a user-facing crosslink type name to a structure category.
+
+        Args:
+            type_name: Crosslink type as specified in the config (e.g. "HLKNL",
+                "PYD"). May be None or a "no crosslink" placeholder.
+
+        Returns:
+            'D' for divalent types, 'T' for trivalent types, or None when the
+            type is empty, a "no crosslink" placeholder, or unrecognized.
+        """
+        if not type_name:
+            return None
+        name = str(type_name).strip()
+        if name.upper() in cls.NO_CROSSLINK_TYPES:
+            return None
+        if name in cls.DIVALENT_TYPES:
+            return 'D'
+        if name in cls.TRIVALENT_TYPES:
+            return 'T'
+        return None
+
+    @classmethod
+    def validate_against_specified_types(
+        cls,
+        pdb_path: Path,
+        specified_types: List[Optional[str]],
+    ) -> None:
+        """
+        Check that the crosslinks present in a PDB match the specified types.
+
+        This guards against a common mistake where a user supplies a PDB that
+        contains, for example, trivalent (PYD) crosslinks while specifying a
+        divalent crosslink type (e.g. HLKNL) in the configuration, or vice
+        versa.
+
+        Validation is skipped (no error raised) when:
+          - no crosslink types are specified (or all are "NONE"/"NOCROSS"),
+          - none of the specified types are recognized,
+          - the PDB contains no crosslink residues at all.
+
+        Args:
+            pdb_path: Path to the input PDB file.
+            specified_types: Crosslink type names from the config, e.g.
+                ``[config.n_term_type, config.c_term_type]``.
+
+        Raises:
+            GeometryGenerationError: If the structure contains crosslink
+                residues whose category was not requested by the specified
+                types (error code ``GEO_ERR_008``).
+        """
+        # Import here to avoid a circular import at module load time.
+        from colbuilder.core.utils.exceptions import GeometryGenerationError
+
+        expected_categories: Set[str] = set()
+        recognized_types: List[str] = []
+        for type_name in specified_types:
+            category = cls.categorize_type(type_name)
+            if category is not None:
+                expected_categories.add(category)
+                recognized_types.append(str(type_name).strip())
+
+        # Nothing meaningful to validate against.
+        if not expected_categories:
+            LOG.debug(
+                "Skipping crosslink type validation: no recognized crosslink "
+                "types were specified"
+            )
+            return
+
+        residues = cls.get_all_residues(pdb_path)
+        divalent_found = sorted(residues & cls.DIVALENT_RESIDUES)
+        trivalent_found = sorted(residues & cls.TRIVALENT_RESIDUES)
+
+        detected_categories: Set[str] = set()
+        if divalent_found:
+            detected_categories.add('D')
+        if trivalent_found:
+            detected_categories.add('T')
+
+        # No crosslinks in the structure: cannot (and need not) validate types.
+        if not detected_categories:
+            LOG.debug(
+                "Skipping crosslink type validation: no crosslink residues "
+                "found in %s", pdb_path
+            )
+            return
+
+        # The PDB contains a category of crosslink that was not requested.
+        if not detected_categories.issubset(expected_categories):
+            category_labels = {'D': 'divalent', 'T': 'trivalent'}
+            detected_label = ", ".join(
+                category_labels[c] for c in sorted(detected_categories)
+            )
+            expected_label = ", ".join(
+                category_labels[c] for c in sorted(expected_categories)
+            )
+            detected_residues = sorted(set(divalent_found) | set(trivalent_found))
+
+            message = (
+                f"The crosslinks found in '{pdb_path}' do not match the "
+                f"specified crosslink types.\n"
+                f"  Specified types: {', '.join(recognized_types)} "
+                f"(expected {expected_label} crosslinks)\n"
+                f"  Detected in PDB: {detected_label} crosslink residues "
+                f"{detected_residues}\n"
+                f"Update n_term_type/c_term_type to match the structure, or "
+                f"provide a PDB whose crosslinks match the specified types."
+            )
+            LOG.error(message)
+            raise GeometryGenerationError(
+                message=message,
+                error_code="GEO_ERR_008",
+                context={
+                    "pdb_file": str(pdb_path),
+                    "specified_types": recognized_types,
+                    "expected_categories": sorted(expected_categories),
+                    "detected_categories": sorted(detected_categories),
+                    "detected_residues": detected_residues,
+                },
+            )
+
+        LOG.debug(
+            "Crosslink type validation passed: specified %s, detected %s",
+            sorted(expected_categories), sorted(detected_categories),
+        )
+
     @classmethod
     def get_all_residues(cls, pdb_path: Path) -> Set[str]:
         """
diff --git a/src/colbuilder/core/utils/crosslinks.py b/src/colbuilder/core/utils/crosslinks.py
index 04931c7..54eaebb 100644
--- a/src/colbuilder/core/utils/crosslinks.py
+++ b/src/colbuilder/core/utils/crosslinks.py
@@ -87,7 +87,7 @@
 
 
 def parse_crosslink_position(
-    position_str: str, residue_type: str, atom_name: str
+    position_str: str, residue_type: str, atom_name: str, atom_name2: Optional[str] = None
 ) -> Optional[CrosslinkPosition]:
     """
     Parse a position string into a CrosslinkPosition object.
@@ -116,6 +116,7 @@ def parse_crosslink_position(
             chain_id=parts[1],
             residue_type=residue_type,
             atom_name=atom_name,
+            atom_name2=(atom_name2 if atom_name2 and atom_name2 != "NONE" else None),
         )
     except (ValueError, IndexError) as e:
         raise SequenceGenerationError(
@@ -174,7 +175,9 @@ def extract_crosslinks_from_dataframe(
 
             pos3 = None
             if "P3" in row and row["P3"] != "NONE":
-                pos3 = parse_crosslink_position(row["P3"], row["R3"], row["A31"])
+                pos3 = parse_crosslink_position(
+                    row["P3"], row["R3"], row["A31"], row.get("A32")
+                )
 
             crosslinks.append(
                 CrosslinkPair(
@@ -429,7 +432,13 @@ def _prepare_crosslink_info(self) -> List[Dict[str, str]]:
                     pair.position3.residue_type if pair.position3 else "NONE"
                 ),
                 "atom31": pair.position3.atom_name if pair.position3 else "NONE",
-                "atom32": pair.position3.atom_name if pair.position3 else "NONE",
+                # Second bond of the trivalent third residue uses A32 (atom_name2);
+                # fall back to A31 only if A32 is absent.
+                "atom32": (
+                    (pair.position3.atom_name2 or pair.position3.atom_name)
+                    if pair.position3
+                    else "NONE"
+                ),
             }
             for pair in self.crosslink_pairs
         ]
diff --git a/src/colbuilder/core/utils/data_structures.py b/src/colbuilder/core/utils/data_structures.py
index fe256c7..59286f9 100644
--- a/src/colbuilder/core/utils/data_structures.py
+++ b/src/colbuilder/core/utils/data_structures.py
@@ -102,6 +102,7 @@ class CrosslinkPosition:
     chain_id: str
     residue_type: str
     atom_name: str
+    atom_name2: Optional[str] = None  # second atom (CSV A32) for the third residue of a trivalent crosslink
 
     def __post_init__(self):
         if self.chain_id not in {"A", "B", "C"}:
diff --git a/src/colbuilder/core/utils/error_codes.py b/src/colbuilder/core/utils/error_codes.py
index 312ce9d..0e910b1 100644
--- a/src/colbuilder/core/utils/error_codes.py
+++ b/src/colbuilder/core/utils/error_codes.py
@@ -152,7 +152,7 @@ class ErrorInfo(NamedTuple):
             "Contact distance must be a positive number",
             "Fibril length must be a positive number less than 334",
             "Terminal combinations must follow format 'ResidueNumber.Chain - ResidueNumber.Chain'",
-            "Force field must be either 'amber99' or 'martini'",
+            "Force field must be either 'amber99' or 'martini3'",
         ],
     ),
 }
@@ -276,6 +276,17 @@ class ErrorInfo(NamedTuple):
             "Verify chain terminations match chain count",
         ],
     ),
+    "GEO_ERR_008": ErrorInfo(
+        code="GEO_ERR_008",
+        message="Crosslinks in PDB do not match the specified crosslink types",
+        suggestions=[
+            "Ensure n_term_type/c_term_type match the crosslinks present in the PDB",
+            "Divalent types (HLKNL, LKNL, deHLNL, deHHLNL) require a divalent PDB",
+            "Trivalent types (PYD, DPD, PYL, DPL) require a trivalent PDB",
+            "Set the crosslink types to match the input structure, or provide a matching PDB",
+        ],
+        docs_url="https://colbuilder.readthedocs.io/en/latest/geometry.html#input-requirements",
+    ),
 }
 
 # Topology-related errors
@@ -284,7 +295,7 @@ class ErrorInfo(NamedTuple):
         code="TOP_ERR_001",
         message="Invalid force field specification",
         suggestions=[
-            "Use a supported force field (currently only amber99)",
+            "Use a supported force field ('amber99' or 'martini3')",
             "Check force field spelling",
             "Verify force field is properly configured",
             "Ensure force field files are available in the expected location",
diff --git a/src/colbuilder/core/utils/files.py b/src/colbuilder/core/utils/files.py
index ec709e9..f7f568c 100644
--- a/src/colbuilder/core/utils/files.py
+++ b/src/colbuilder/core/utils/files.py
@@ -531,6 +531,38 @@ def ensure_mixing_dir(self) -> Path:
 
         return self.mixing_dir
 
+    def clear_generation_dirs(self) -> None:
+        """
+        Remove stale per-model PDBs, caps files and type subdirectories from the
+        geometry, mixing and replacement temp directories at the start of a run.
+
+        Without this, re-running with a different fibril length / contact distance
+        leaves old ``{id}.caps.pdb`` files behind; since the final PDB is assembled
+        by selecting caps per model id, a model whose caps were not regenerated this
+        run would silently be written from the previous run's geometry.
+
+        Deliberately does NOT touch topology_gen or sequence_gen (those are consumed
+        in later phases and may legitimately persist within a run).
+        """
+        type_subdirs = {"NC", "D", "T", "TD", "DT", "M", "DY"}
+        for base in (self.geometry_dir, self.mixing_dir, self.replacement_dir):
+            if not base.exists():
+                continue
+            try:
+                for entry in base.iterdir():
+                    if entry.is_dir() and entry.name in type_subdirs:
+                        shutil.rmtree(entry, ignore_errors=True)
+                    elif entry.is_file() and (
+                        entry.name.endswith(".caps.pdb") or entry.suffix == ".pdb"
+                    ):
+                        try:
+                            entry.unlink()
+                        except Exception:
+                            pass
+            except Exception as e:
+                LOG.debug(f"Could not clear generation dir {base}: {e}")
+        LOG.debug("Cleared stale caps/type subdirs from geometry/mixing/replacement temp dirs")
+
     def cleanup(self, category: Optional[str] = None, force: bool = False) -> None:
         """
         Clean up temporary files and directories.