From e6ffcaf001270f7dd9d47b28fb4bb3e9382ab5a0 Mon Sep 17 00:00:00 2001
From: Thomas Duigou <thomas.duigou@inrae.fr>
Date: Wed, 29 Oct 2025 23:22:08 +0100
Subject: [PATCH] fix(rp2erxn.py): skips without failure transformation having
 dot-containing products

Column "Product SMILES" from RP2 output file is supposed to contain only molecule per row (without dots)
---
 rp2paths/rp2erxn.py | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/rp2paths/rp2erxn.py b/rp2paths/rp2erxn.py
index 08807a3..1493c65 100644
--- a/rp2paths/rp2erxn.py
+++ b/rp2paths/rp2erxn.py
@@ -201,6 +201,7 @@ def compute(infile, cmpdfile='compounds.txt', rxnfile='reactions.txt',
 
     # 1) Check consistency and 2) Populate compounds
     all_cmpds = dict()
+    banished_trs = set()
     for tid in sorted(content.keys()):  # Order determine CMPD IDs
         first = True
         for row in content[tid]:
@@ -228,20 +229,42 @@ def compute(infile, cmpdfile='compounds.txt', rxnfile='reactions.txt',
             sys.exit(0)
         try:
             assert prods_from_rxn == prods_from_cmpd
-        except BaseException:
-            print('Assertion error: differences in products')
-            print(tid)
-            print(prods_from_rxn, prods_from_cmpd)
-            sys.exit(0)
+        except AssertionError:
+            if any("." in smi for smi in prods_from_cmpd):
+                print(
+                    "Unexpected multiple products detected in 'Product "
+                    "SMILES' column from RetroPath2.0 result file. This "
+                    "is likely due to a sanitization issue from "
+                    "RetroPath2.0 result file."
+                )
+                print(f" L Transformation skipped: {tid}")
+                print(f" L Products from reaction: {prods_from_rxn}")
+                print(f" L Products from 'Product SMILES' column: {prods_from_cmpd}")
+                banished_trs.add(tid)
+                continue
+            else:
+                print('Assertion error: differences in products')
+                print(tid)
+                print(prods_from_rxn, prods_from_cmpd)
+                sys.exit(0)
         # Populate
         for smi in sorted(list(subs_from_rxn | prods_from_rxn)):
             if smi not in all_cmpds.keys():
                 cmpd = Compound(smi)
                 all_cmpds[smi] = cmpd
 
+    # Debug info
+    if len(banished_trs) > 0:
+        print(f"Total transformations skipped: {len(banished_trs)}")
+        for tid in sorted(banished_trs):
+            print(f" - {tid}")
+
     # Populate transformations
     all_trs = dict()
     for tid in content.keys():
+        # Skip transformations that were marked as banished
+        if tid in banished_trs:
+            continue
         first = True
         for row in content[tid]:
             if first: