liedllab · fwaibl · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/README.md b/README.md
@@ -42,6 +42,11 @@ pre-defined. See Eisenberg and McLachlan (1986) or Wildman and Crippen (1999)
 for references. Additionally, using `rdkit-crippen` will use a SMILES input to 
 assign Crippen parameters using the RdKit (useful for small molecules).
 
+## Output formats
+Both the electrostatics and hydrophobicity script allow writing surfaces to .ply files, using the [plyfile](https://github.com/dranjan/python-plyfile) library. The surfaces is colored using either the patches (where each patch gets a different color), or using the raw data (using the electrostatic potential at the vertices, or the hydrohobic potential approach of [Heiden et al.](https://doi.org/10.1007/BF00124359)). The raw data will also be written to PLY properties, and the coordinate units are chosen to match Pymol (units of 10^(-8)m).
+
+Additionally, the hydrophobicity script supports output in the numpy npz format, which is more efficient when writing many surfaces.
+
 ## Examples
 Visualize the hydrophobic potential on a non-protein molecule:
 ```

diff --git a/surface_analyses/commandline_hydrophobic.py b/surface_analyses/commandline_hydrophobic.py
@@ -244,7 +244,7 @@ def run_hydrophobic(
         if ply_out:
             fnames = ply_filenames(ply_out, len(surfs))
             for surf, fname in zip(surfs, fnames):
-                surf.write_ply(fname, coordinate_scaling=10)
+                surf.write_ply(fname)
         output.update(surfaces_to_dict(surfs, basename="hydrophobic_potential"))
     if out is not None:
         np.savez(out, **output)

diff --git a/surface_analyses/surface.py b/surface_analyses/surface.py
@@ -90,10 +90,10 @@ def vertex_areas(self):
     def __repr__(self):
         return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)"
 
-    def as_plydata(self, text=True, units_per_angstrom=0.1):
+    def as_plydata(self, text=True, units_per_nm=0.1):
         """Convert to a plyfile.PlyData object, while scaling coordinates
 
-        the default scaling units_per_angstrom=0.1 matches the PyMol CGO
+        the default scaling units_per_nm=0.1 matches the PyMol CGO
         scaling factor of 1:100 and the nm -> A conversion.
         """
         vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")]
@@ -107,7 +107,7 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
             vertex_dtype.append((k, PLY_DTYPES[dtype.kind]))
         vertex_data = []
         for i in range(self.n_vertices):
-            vert = list(self.vertices[i] * units_per_angstrom)
+            vert = list(self.vertices[i] * units_per_nm)
             for k in keys:
                 vert.append(self.data[k][i])
             vertex_data.append(tuple(vert))
@@ -121,14 +121,28 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
         out = plyfile.PlyData([vertex, face], text=text)
         return out
 
-    def write_ply(self, fname, coordinate_scaling=1.):
-        "Write a PLY file, scaling is relative to the default of 0.1."
+    def write_ply(self, fname, units_per_nm=0.1):
+        "Write a PLY file, scaling of 0.1 matches nm -> PyMol [10^-8 m]."
         with open(fname, mode="wb") as f:
-            self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f)
+            self.as_plydata(units_per_nm=units_per_nm).write(f)
 
     @classmethod
-    def from_plydata(cls, plydata):
-        pass
+    def from_plydata(cls, plydata, units_per_nm=0.1):
+        vert = plydata['vertex']
+        xyz = np.stack([vert['x'], vert['y'], vert['z']], axis=-1) / units_per_nm
+        faces = np.array(list(plydata['face']['vertex_indices']))
+        surf = Surface(xyz, faces)
+        props = set([p.name for p in vert.properties]) - {'x', 'y', 'z'}
+        for prop in props:
+            surf[prop] = vert[prop]
+        return surf
+
+    @classmethod
+    def read_ply(cls, fname, units_per_nm=0.1):
+        return cls.from_plydata(
+            plyfile.PlyData.read(fname),
+            units_per_nm=units_per_nm,
+        )
 
     @classmethod
     def isosurface(cls, grid, values, isovalue, gradient_direction='descent'):

diff --git a/test/commandline_electrostatic_test.py b/test/commandline_electrostatic_test.py
@@ -2,14 +2,18 @@
 # -*- coding: utf-8 -*-
 
 from surface_analyses.commandline_electrostatic import main, biggest_residue_contribution, parse_args, run_electrostatics
+from surface_analyses.surface import Surface
+
 from contextlib import redirect_stdout
 import inspect
 import io
 from pathlib import Path
 import os
 import shutil
 from unittest.mock import patch
+from tempfile import TemporaryDirectory
 
+import mdtraj as md
 import pandas as pd
 from pandas.testing import assert_frame_equal
 import numpy as np
@@ -115,21 +119,28 @@ def get_nth_line(file, n):
 
 
 def test_trastuzumab_ply_out():
-    args = [
-        TRASTUZUMAB_PATH / 'apbs-input.pdb',
-        TRASTUZUMAB_PATH / 'apbs-potential.dx',
-        '--out',
-        str(TRASTUZUMAB_PATH / 'apbs-patches-ply.csv'),
-        '--ply_out',
-        str(TRASTUZUMAB_PATH / 'apbs'),
-    ]
-    run_commandline(*args, surface_type="sas")
-    # check the number of vertices in the output
-    with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f:
-        if msms.msms_available():
-            assert get_nth_line(f, 2).strip() == "element vertex 40549"
-        else:
-            assert get_nth_line(f, 2).strip() == "element vertex 67252"
+    with TemporaryDirectory() as tmp_str:
+        tmp = Path(tmp_str)
+        pdb_name = str(TRASTUZUMAB_PATH / 'apbs-input.pdb')
+        pot_name = str(TRASTUZUMAB_PATH / 'apbs-potential.dx')
+        ply_name = str(tmp / 'apbs')
+        csv_name = str(tmp / 'apbs-patches-ply.csv')
+        args = [pdb_name, pot_name, '--out', csv_name, '--ply_out', ply_name]
+        run_commandline(*args, surface_type='sas')
+        # check the number of vertices in the output
+        with open(ply_name + '-potential.ply') as f:
+            if msms.msms_available():
+                assert get_nth_line(f, 2).strip() == 'element vertex 40549'
+            else:
+                assert get_nth_line(f, 2).strip() == 'element vertex 67252'
+        surf = Surface.read_ply(ply_name + '-potential.ply')
+        crd = md.load(pdb_name).xyz[0]
+        surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
+        crd_extent = crd.max(axis=0) - crd.min(axis=0)
+        scale_ratios = surf_extent / crd_extent
+        assert np.all(scale_ratios > 1)
+        assert np.all(scale_ratios < 1.2)
+
 
 
 def test_biggest_residue_contribution():

diff --git a/test/commandline_hydrophobic_test.py b/test/commandline_hydrophobic_test.py
@@ -5,12 +5,14 @@
 import inspect
 import os.path
 import shutil
-import tempfile
+from tempfile import TemporaryDirectory
 
 import pytest
 import numpy as np
 
 from surface_analyses.commandline_hydrophobic import main, parse_args, run_hydrophobic
+from surface_analyses.surface import Surface
+import mdtraj as md
 
 TrastuzumabRun = namedtuple('TrastuzumabRun', 'scale method expected_data')
 
@@ -60,7 +62,8 @@ def trastuzumab_run(request):
     return
 
 def run_commandline(parm, traj, scale, outfile, *args):
-    main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + list(args))
+    args_s = [str(a) for a in args]
+    main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + args_s)
 
 def test_output_consistent(trastuzumab_run):
     runtype = trastuzumab_run.method
@@ -86,7 +89,7 @@ def test_output_consistent(trastuzumab_run):
 
     if scale == 'wimley-white':
         scale = TRASTUZUMAB_PATH / 'wimley-white-scaled.csv'
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / 'out.npz'
         run_commandline(parm7, rst7, scale, out, *args)
         with np.load(out, allow_pickle=True) as npz:
@@ -101,17 +104,35 @@ def test_output_with_sc_norm():
     expected_fname = TRASTUZUMAB_PATH / 'jain-surfscore-sc-norm.npz'
     with np.load(expected_fname, allow_pickle=True) as npz:
         expected = dict(npz)
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / "out.npz"
         run_commandline(parm7, rst7, scale, out, *args)
         with np.load(out, allow_pickle=True) as npz:
             assert_outputs_equal(npz, expected)
 
 
+def test_surface_size():
+    scale = TRASTUZUMAB_PATH / 'glmnet.csv'
+    parm7 = TRASTUZUMAB_PATH / 'input.parm7'
+    rst7 = TRASTUZUMAB_PATH / 'input.rst7'
+    with TemporaryDirectory() as tmp:
+        out = Path(tmp) / "out.npz"
+        ply_out = Path(tmp) / "out.ply"
+        args = ['--potential', '--ply_out', ply_out]
+        run_commandline(parm7, rst7, scale, out, *args)
+        surf = Surface.read_ply(ply_out)
+        crd = md.load(rst7, top=parm7).xyz[0]
+        surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
+        crd_extent = crd.max(axis=0) - crd.min(axis=0)
+        scale_ratios = surf_extent / crd_extent
+        assert np.all(scale_ratios > 1)
+        assert np.all(scale_ratios < 1.2)
+
+
 def test_rdkit_crippen():
     pdb = TESTS_PATH / '1csa-model1.pdb'
     args = ['--surfscore', '--smiles', CsA_SMILES]
-    with tempfile.TemporaryDirectory() as tmp:
+    with TemporaryDirectory() as tmp:
         out = Path(tmp) / "out.npz"
         run_commandline(pdb, pdb, 'rdkit-crippen', out, *args)
         with np.load(out, allow_pickle=True) as npz:

diff --git a/test/surface_test.py b/test/surface_test.py
@@ -17,7 +17,7 @@ def test_repr_no_error(minimal):
     repr(minimal)
 
 def test_convert_to_ply(minimal):
-    plydat = minimal.as_plydata(units_per_angstrom=1.)
+    plydat = minimal.as_plydata(units_per_nm=1.)
     assert len(plydat['vertex']) == minimal.n_vertices
     for i_dim, dim in enumerate('xyz'):
         assert np.allclose(plydat['vertex'][dim], minimal.vertices[:, i_dim])