From 9b01420ce2f791a7ab362f9f37bee5dbe8efbfe1 Mon Sep 17 00:00:00 2001
From: Franz Waibl <waiblfranz@gmail.com>
Date: Wed, 5 Feb 2025 15:15:58 +0100
Subject: [PATCH 1/5] Fix the scaling of the hydrophobicity output, and add a
 unit test

* There is now a unit test to check that the scale of the written
  surface roughly matches that of the atomic coordinates. This is to
  avoid unit conversion errors in the future.
* Updated the README with some information on PLY output and units.
---
 README.md                                   |  5 +++
 surface_analyses/commandline_hydrophobic.py |  2 +-
 surface_analyses/surface.py                 | 30 +++++++++++----
 test/commandline_electrostatic_test.py      | 41 +++++++++++++--------
 test/commandline_hydrophobic_test.py        | 31 +++++++++++++---
 5 files changed, 80 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index fb2d6b0..5924234 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,11 @@ pre-defined. See Eisenberg and McLachlan (1986) or Wildman and Crippen (1999)
 for references. Additionally, using `rdkit-crippen` will use a SMILES input to 
 assign Crippen parameters using the RdKit (useful for small molecules).
 
+## Output formats
+Both the electrostatics and hydrophobicity script allow writing surfaces to .ply files, using the plyfile library (https://github.com/dranjan/python-plyfile). The surfaces is colored using either the patches (where each patch gets a different color), or using the raw data (using the electrostatic potential at the vertices, or the hydrohobic potential approach of Heiden et al. https://doi.org/10.1007/BF00124359). The raw data will also be written to PLY properties, and the coordinate units are chosen to match Pymol (units of $10^-8$m).
+
+Additionally, the hydrophobicity script supports output in the numpy npz format, which is more efficient when writing many surfaces.
+
 ## Examples
 Visualize the hydrophobic potential on a non-protein molecule:
 ```
diff --git a/surface_analyses/commandline_hydrophobic.py b/surface_analyses/commandline_hydrophobic.py
index c486641..441dc84 100644
--- a/surface_analyses/commandline_hydrophobic.py
+++ b/surface_analyses/commandline_hydrophobic.py
@@ -244,7 +244,7 @@ def run_hydrophobic(
         if ply_out:
             fnames = ply_filenames(ply_out, len(surfs))
             for surf, fname in zip(surfs, fnames):
-                surf.write_ply(fname, coordinate_scaling=10)
+                surf.write_ply(fname)
         output.update(surfaces_to_dict(surfs, basename="hydrophobic_potential"))
     if out is not None:
         np.savez(out, **output)
diff --git a/surface_analyses/surface.py b/surface_analyses/surface.py
index e8483b1..bf29357 100644
--- a/surface_analyses/surface.py
+++ b/surface_analyses/surface.py
@@ -90,10 +90,10 @@ def vertex_areas(self):
     def __repr__(self):
         return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)"
 
-    def as_plydata(self, text=True, units_per_angstrom=0.1):
+    def as_plydata(self, text=True, units_per_nm=0.1):
         """Convert to a plyfile.PlyData object, while scaling coordinates
 
-        the default scaling units_per_angstrom=0.1 matches the PyMol CGO
+        the default scaling units_per_nm=0.1 matches the PyMol CGO
         scaling factor of 1:100 and the nm -> A conversion.
         """
         vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")]
@@ -107,7 +107,7 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
             vertex_dtype.append((k, PLY_DTYPES[dtype.kind]))
         vertex_data = []
         for i in range(self.n_vertices):
-            vert = list(self.vertices[i] * units_per_angstrom)
+            vert = list(self.vertices[i] * units_per_nm)
             for k in keys:
                 vert.append(self.data[k][i])
             vertex_data.append(tuple(vert))
@@ -121,14 +121,28 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
         out = plyfile.PlyData([vertex, face], text=text)
         return out
 
-    def write_ply(self, fname, coordinate_scaling=1.):
-        "Write a PLY file, scaling is relative to the default of 0.1."
+    def write_ply(self, fname, units_per_nm=0.1):
+        "Write a PLY file, scaling of 0.1 matches nm -> PyMol [10^-8 m]."
         with open(fname, mode="wb") as f:
-            self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f)
+            self.as_plydata(units_per_nm=units_per_nm).write(f)
 
     @classmethod
-    def from_plydata(cls, plydata):
-        pass
+    def from_plydata(cls, plydata, units_per_nm=0.1):
+        vert = plydata['vertex']
+        xyz = np.stack([vert['x'], vert['y'], vert['z']], axis=-1) / units_per_nm
+        faces = np.array(list(plydata['face']['vertex_indices']))
+        surf = Surface(xyz, faces)
+        props = set([p.name for p in vert.properties]) - {'x', 'y', 'z'}
+        for prop in props:
+            surf[prop] = vert[prop]
+        return surf
+
+    @classmethod
+    def read_ply(cls, fname, units_per_nm=0.1):
+        return cls.from_plydata(
+            plyfile.PlyData.read(fname),
+            units_per_nm=units_per_nm,
+        )
 
     @classmethod
     def isosurface(cls, grid, values, isovalue, gradient_direction='descent'):
diff --git a/test/commandline_electrostatic_test.py b/test/commandline_electrostatic_test.py
index 07b60c1..14ba198 100644
--- a/test/commandline_electrostatic_test.py
+++ b/test/commandline_electrostatic_test.py
@@ -2,6 +2,8 @@
 # -*- coding: utf-8 -*-
 
 from surface_analyses.commandline_electrostatic import main, biggest_residue_contribution, parse_args, run_electrostatics
+from surface_analyses.surface import Surface
+
 from contextlib import redirect_stdout
 import inspect
 import io
@@ -9,7 +11,9 @@
 import os
 import shutil
 from unittest.mock import patch
+from tempfile import TemporaryDirectory
 
+import mdtraj as md
 import pandas as pd
 from pandas.testing import assert_frame_equal
 import numpy as np
@@ -115,21 +119,28 @@ def get_nth_line(file, n):
 
 
 def test_trastuzumab_ply_out():
-    args = [
-        TRASTUZUMAB_PATH / 'apbs-input.pdb',
-        TRASTUZUMAB_PATH / 'apbs-potential.dx',
-        '--out',
-        str(TRASTUZUMAB_PATH / 'apbs-patches-ply.csv'),
-        '--ply_out',
-        str(TRASTUZUMAB_PATH / 'apbs'),
-    ]
-    run_commandline(*args, surface_type="sas")
-    # check the number of vertices in the output
-    with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f:
-        if msms.msms_available():
-            assert get_nth_line(f, 2).strip() == "element vertex 40549"
-        else:
-            assert get_nth_line(f, 2).strip() == "element vertex 67252"
+    with TemporaryDirectory() as tmp_str:
+        tmp = Path(tmp_str)
+        pdb_name = str(TRASTUZUMAB_PATH / 'apbs-input.pdb')
+        pot_name = str(TRASTUZUMAB_PATH / 'apbs-potential.dx')
+        ply_name = str(tmp / 'apbs')
+        csv_name = str(tmp / 'apbs-patches-ply.csv')
+        args = [pdb_name, pot_name, '--out', csv_name, '--ply_out', ply_name]
+        run_commandline(*args, surface_type='sas')
+        # check the number of vertices in the output
+        with open(ply_name + '-potential.ply') as f:
+            if msms.msms_available():
+                assert get_nth_line(f, 2).strip() == 'element vertex 40549'
+            else:
+                assert get_nth_line(f, 2).strip() == 'element vertex 67252'
+        surf = Surface.read_ply(ply_name + '-potential.ply')
+        crd = md.load(pdb_name).xyz[0]
+        surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
+        crd_extent = crd.max(axis=0) - crd.min(axis=0)
+        scale_ratios = surf_extent / crd_extent
+        assert np.all(scale_ratios > 1)
+        assert np.all(scale_ratios < 1.2)
+
 
 
 def test_biggest_residue_contribution():
diff --git a/test/commandline_hydrophobic_test.py b/test/commandline_hydrophobic_test.py
index 0082d5c..56bd2c6 100644
--- a/test/commandline_hydrophobic_test.py
+++ b/test/commandline_hydrophobic_test.py
@@ -5,12 +5,14 @@
 import inspect
 import os.path
 import shutil
-import tempfile
+from tempfile import TemporaryDirectory
 
 import pytest
 import numpy as np
 
 from surface_analyses.commandline_hydrophobic import main, parse_args, run_hydrophobic
+from surface_analyses.surface import Surface
+import mdtraj as md
 
 TrastuzumabRun = namedtuple('TrastuzumabRun', 'scale method expected_data')
 
@@ -60,7 +62,8 @@ def trastuzumab_run(request):
     return
 
 def run_commandline(parm, traj, scale, outfile, *args):
-    main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + list(args))
+    args_s = [str(a) for a in args]
+    main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + args_s)
 
 def test_output_consistent(trastuzumab_run):
     runtype = trastuzumab_run.method
@@ -86,7 +89,7 @@ def test_output_consistent(trastuzumab_run):
 
     if scale == 'wimley-white':
         scale = TRASTUZUMAB_PATH / 'wimley-white-scaled.csv'
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / 'out.npz'
         run_commandline(parm7, rst7, scale, out, *args)
         with np.load(out, allow_pickle=True) as npz:
@@ -96,18 +99,36 @@ def test_output_consistent(trastuzumab_run):
 def test_output_with_sc_norm():
     scale = TRASTUZUMAB_PATH / 'glmnet.csv'
     args = ['--surfscore', '--surftype', 'sc_norm']
-    parm7 = TRASTUZUMAB_PATH / 'input.parm7'
+    parm7 = TRASTUZUMAB_PATH / 'input.parm9'
     rst7 = TRASTUZUMAB_PATH / 'input.rst7'
     expected_fname = TRASTUZUMAB_PATH / 'jain-surfscore-sc-norm.npz'
     with np.load(expected_fname, allow_pickle=True) as npz:
         expected = dict(npz)
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / "out.npz"
         run_commandline(parm7, rst7, scale, out, *args)
         with np.load(out, allow_pickle=True) as npz:
             assert_outputs_equal(npz, expected)
 
 
+def test_surface_size():
+    scale = TRASTUZUMAB_PATH / 'glmnet.csv'
+    parm7 = TRASTUZUMAB_PATH / 'input.parm7'
+    rst7 = TRASTUZUMAB_PATH / 'input.rst7'
+    with TemporaryDirectory() as tmp:
+        out = Path(tmp) / "out.npz"
+        ply_out = Path(tmp) / "out.ply"
+        args = ['--potential', '--ply_out', ply_out]
+        run_commandline(parm7, rst7, scale, out, *args)
+        surf = Surface.read_ply(ply_out)
+        crd = md.load(rst7, top=parm7).xyz[0]
+        surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
+        crd_extent = crd.max(axis=0) - crd.min(axis=0)
+        scale_ratios = surf_extent / crd_extent
+        assert np.all(scale_ratios > 1)
+        assert np.all(scale_ratios < 1.2)
+
+
 def test_rdkit_crippen():
     pdb = TESTS_PATH / '1csa-model1.pdb'
     args = ['--surfscore', '--smiles', CsA_SMILES]

From bef08cbbb642ff426d3973b54619db89eddddaa9 Mon Sep 17 00:00:00 2001
From: Franz Waibl <44898997+fwaibl@users.noreply.github.com>
Date: Wed, 5 Feb 2025 15:19:43 +0100
Subject: [PATCH 2/5] Update README.md formatting

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5924234..e9a7101 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ for references. Additionally, using `rdkit-crippen` will use a SMILES input to
 assign Crippen parameters using the RdKit (useful for small molecules).
 
 ## Output formats
-Both the electrostatics and hydrophobicity script allow writing surfaces to .ply files, using the plyfile library (https://github.com/dranjan/python-plyfile). The surfaces is colored using either the patches (where each patch gets a different color), or using the raw data (using the electrostatic potential at the vertices, or the hydrohobic potential approach of Heiden et al. https://doi.org/10.1007/BF00124359). The raw data will also be written to PLY properties, and the coordinate units are chosen to match Pymol (units of $10^-8$m).
+Both the electrostatics and hydrophobicity script allow writing surfaces to .ply files, using the [plyfile](https://github.com/dranjan/python-plyfile) library. The surfaces is colored using either the patches (where each patch gets a different color), or using the raw data (using the electrostatic potential at the vertices, or the hydrohobic potential approach of [Heiden et al.](https://doi.org/10.1007/BF00124359)). The raw data will also be written to PLY properties, and the coordinate units are chosen to match Pymol (units of 10^(-8)m).
 
 Additionally, the hydrophobicity script supports output in the numpy npz format, which is more efficient when writing many surfaces.
 

From dba10b07c89667e36dde46933236a1786067427b Mon Sep 17 00:00:00 2001
From: Franz Waibl <waiblfranz@gmail.com>
Date: Wed, 5 Feb 2025 15:22:47 +0100
Subject: [PATCH 3/5] Fix surface_test.py

---
 test/surface_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/surface_test.py b/test/surface_test.py
index be82cb7..50ea511 100644
--- a/test/surface_test.py
+++ b/test/surface_test.py
@@ -17,7 +17,7 @@ def test_repr_no_error(minimal):
     repr(minimal)
 
 def test_convert_to_ply(minimal):
-    plydat = minimal.as_plydata(units_per_angstrom=1.)
+    plydat = minimal.as_plydata(units_per_nm=1.)
     assert len(plydat['vertex']) == minimal.n_vertices
     for i_dim, dim in enumerate('xyz'):
         assert np.allclose(plydat['vertex'][dim], minimal.vertices[:, i_dim])

From 5f8c93063610a4307f132e00f545895d90d5aebc Mon Sep 17 00:00:00 2001
From: Franz Waibl <waiblfranz@gmail.com>
Date: Wed, 5 Feb 2025 15:25:28 +0100
Subject: [PATCH 4/5] Fix commandline_hydrophobic_test.py

---
 test/commandline_hydrophobic_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/commandline_hydrophobic_test.py b/test/commandline_hydrophobic_test.py
index 56bd2c6..44970ae 100644
--- a/test/commandline_hydrophobic_test.py
+++ b/test/commandline_hydrophobic_test.py
@@ -132,7 +132,7 @@ def test_surface_size():
 def test_rdkit_crippen():
     pdb = TESTS_PATH / '1csa-model1.pdb'
     args = ['--surfscore', '--smiles', CsA_SMILES]
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / "out.npz"
         run_commandline(pdb, pdb, 'rdkit-crippen', out, *args)
         with np.load(out, allow_pickle=True) as npz:

From 89ff494ce5c48d02662b2a64cc8d9a885ee3acf0 Mon Sep 17 00:00:00 2001
From: Franz Waibl <waiblfranz@gmail.com>
Date: Wed, 5 Feb 2025 15:27:42 +0100
Subject: [PATCH 5/5] Fix commandline_hydrophobic_test.py

---
 test/commandline_hydrophobic_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/commandline_hydrophobic_test.py b/test/commandline_hydrophobic_test.py
index 44970ae..ade1c2f 100644
--- a/test/commandline_hydrophobic_test.py
+++ b/test/commandline_hydrophobic_test.py
@@ -99,7 +99,7 @@ def test_output_consistent(trastuzumab_run):
 def test_output_with_sc_norm():
     scale = TRASTUZUMAB_PATH / 'glmnet.csv'
     args = ['--surfscore', '--surftype', 'sc_norm']
-    parm7 = TRASTUZUMAB_PATH / 'input.parm9'
+    parm7 = TRASTUZUMAB_PATH / 'input.parm7'
     rst7 = TRASTUZUMAB_PATH / 'input.rst7'
     expected_fname = TRASTUZUMAB_PATH / 'jain-surfscore-sc-norm.npz'
     with np.load(expected_fname, allow_pickle=True) as npz: