Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ pre-defined. See Eisenberg and McLachlan (1986) or Wildman and Crippen (1999)
for references. Additionally, using `rdkit-crippen` will use a SMILES input to
assign Crippen parameters using the RdKit (useful for small molecules).

## Output formats
Both the electrostatics and hydrophobicity script allow writing surfaces to .ply files, using the [plyfile](https://github.com/dranjan/python-plyfile) library. The surfaces is colored using either the patches (where each patch gets a different color), or using the raw data (using the electrostatic potential at the vertices, or the hydrohobic potential approach of [Heiden et al.](https://doi.org/10.1007/BF00124359)). The raw data will also be written to PLY properties, and the coordinate units are chosen to match Pymol (units of 10^(-8)m).

Additionally, the hydrophobicity script supports output in the numpy npz format, which is more efficient when writing many surfaces.

## Examples
Visualize the hydrophobic potential on a non-protein molecule:
```
Expand Down
2 changes: 1 addition & 1 deletion surface_analyses/commandline_hydrophobic.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def run_hydrophobic(
if ply_out:
fnames = ply_filenames(ply_out, len(surfs))
for surf, fname in zip(surfs, fnames):
surf.write_ply(fname, coordinate_scaling=10)
surf.write_ply(fname)
output.update(surfaces_to_dict(surfs, basename="hydrophobic_potential"))
if out is not None:
np.savez(out, **output)
Expand Down
30 changes: 22 additions & 8 deletions surface_analyses/surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ def vertex_areas(self):
def __repr__(self):
return f"{self.__class__.__name__}({self.n_vertices} vertices, {self.n_faces} faces)"

def as_plydata(self, text=True, units_per_angstrom=0.1):
def as_plydata(self, text=True, units_per_nm=0.1):
"""Convert to a plyfile.PlyData object, while scaling coordinates

the default scaling units_per_angstrom=0.1 matches the PyMol CGO
the default scaling units_per_nm=0.1 matches the PyMol CGO
scaling factor of 1:100 and the nm -> A conversion.
"""
vertex_dtype = [("x", "f4"), ("y", "f4"), ("z", "f4")]
Expand All @@ -107,7 +107,7 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
vertex_dtype.append((k, PLY_DTYPES[dtype.kind]))
vertex_data = []
for i in range(self.n_vertices):
vert = list(self.vertices[i] * units_per_angstrom)
vert = list(self.vertices[i] * units_per_nm)
for k in keys:
vert.append(self.data[k][i])
vertex_data.append(tuple(vert))
Expand All @@ -121,14 +121,28 @@ def as_plydata(self, text=True, units_per_angstrom=0.1):
out = plyfile.PlyData([vertex, face], text=text)
return out

def write_ply(self, fname, coordinate_scaling=1.):
"Write a PLY file, scaling is relative to the default of 0.1."
def write_ply(self, fname, units_per_nm=0.1):
"Write a PLY file, scaling of 0.1 matches nm -> PyMol [10^-8 m]."
with open(fname, mode="wb") as f:
self.as_plydata(units_per_angstrom=coordinate_scaling*0.1).write(f)
self.as_plydata(units_per_nm=units_per_nm).write(f)

@classmethod
def from_plydata(cls, plydata):
pass
def from_plydata(cls, plydata, units_per_nm=0.1):
vert = plydata['vertex']
xyz = np.stack([vert['x'], vert['y'], vert['z']], axis=-1) / units_per_nm
faces = np.array(list(plydata['face']['vertex_indices']))
surf = Surface(xyz, faces)
props = set([p.name for p in vert.properties]) - {'x', 'y', 'z'}
for prop in props:
surf[prop] = vert[prop]
return surf

@classmethod
def read_ply(cls, fname, units_per_nm=0.1):
return cls.from_plydata(
plyfile.PlyData.read(fname),
units_per_nm=units_per_nm,
)

@classmethod
def isosurface(cls, grid, values, isovalue, gradient_direction='descent'):
Expand Down
41 changes: 26 additions & 15 deletions test/commandline_electrostatic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
# -*- coding: utf-8 -*-

from surface_analyses.commandline_electrostatic import main, biggest_residue_contribution, parse_args, run_electrostatics
from surface_analyses.surface import Surface

from contextlib import redirect_stdout
import inspect
import io
from pathlib import Path
import os
import shutil
from unittest.mock import patch
from tempfile import TemporaryDirectory

import mdtraj as md
import pandas as pd
from pandas.testing import assert_frame_equal
import numpy as np
Expand Down Expand Up @@ -115,21 +119,28 @@ def get_nth_line(file, n):


def test_trastuzumab_ply_out():
args = [
TRASTUZUMAB_PATH / 'apbs-input.pdb',
TRASTUZUMAB_PATH / 'apbs-potential.dx',
'--out',
str(TRASTUZUMAB_PATH / 'apbs-patches-ply.csv'),
'--ply_out',
str(TRASTUZUMAB_PATH / 'apbs'),
]
run_commandline(*args, surface_type="sas")
# check the number of vertices in the output
with open(TRASTUZUMAB_PATH / 'apbs-potential.ply') as f:
if msms.msms_available():
assert get_nth_line(f, 2).strip() == "element vertex 40549"
else:
assert get_nth_line(f, 2).strip() == "element vertex 67252"
with TemporaryDirectory() as tmp_str:
tmp = Path(tmp_str)
pdb_name = str(TRASTUZUMAB_PATH / 'apbs-input.pdb')
pot_name = str(TRASTUZUMAB_PATH / 'apbs-potential.dx')
ply_name = str(tmp / 'apbs')
csv_name = str(tmp / 'apbs-patches-ply.csv')
args = [pdb_name, pot_name, '--out', csv_name, '--ply_out', ply_name]
run_commandline(*args, surface_type='sas')
# check the number of vertices in the output
with open(ply_name + '-potential.ply') as f:
if msms.msms_available():
assert get_nth_line(f, 2).strip() == 'element vertex 40549'
else:
assert get_nth_line(f, 2).strip() == 'element vertex 67252'
surf = Surface.read_ply(ply_name + '-potential.ply')
crd = md.load(pdb_name).xyz[0]
surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
crd_extent = crd.max(axis=0) - crd.min(axis=0)
scale_ratios = surf_extent / crd_extent
assert np.all(scale_ratios > 1)
assert np.all(scale_ratios < 1.2)



def test_biggest_residue_contribution():
Expand Down
31 changes: 26 additions & 5 deletions test/commandline_hydrophobic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import inspect
import os.path
import shutil
import tempfile
from tempfile import TemporaryDirectory

import pytest
import numpy as np

from surface_analyses.commandline_hydrophobic import main, parse_args, run_hydrophobic
from surface_analyses.surface import Surface
import mdtraj as md

TrastuzumabRun = namedtuple('TrastuzumabRun', 'scale method expected_data')

Expand Down Expand Up @@ -60,7 +62,8 @@ def trastuzumab_run(request):
return

def run_commandline(parm, traj, scale, outfile, *args):
main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + list(args))
args_s = [str(a) for a in args]
main([str(parm), str(traj), '--scale', str(scale), '--out', str(outfile)] + args_s)

def test_output_consistent(trastuzumab_run):
runtype = trastuzumab_run.method
Expand All @@ -86,7 +89,7 @@ def test_output_consistent(trastuzumab_run):

if scale == 'wimley-white':
scale = TRASTUZUMAB_PATH / 'wimley-white-scaled.csv'
with tempfile.TemporaryDirectory() as tmp:
with TemporaryDirectory() as tmp:
out = Path(tmp) / 'out.npz'
run_commandline(parm7, rst7, scale, out, *args)
with np.load(out, allow_pickle=True) as npz:
Expand All @@ -101,17 +104,35 @@ def test_output_with_sc_norm():
expected_fname = TRASTUZUMAB_PATH / 'jain-surfscore-sc-norm.npz'
with np.load(expected_fname, allow_pickle=True) as npz:
expected = dict(npz)
with tempfile.TemporaryDirectory() as tmp:
with TemporaryDirectory() as tmp:
out = Path(tmp) / "out.npz"
run_commandline(parm7, rst7, scale, out, *args)
with np.load(out, allow_pickle=True) as npz:
assert_outputs_equal(npz, expected)


def test_surface_size():
scale = TRASTUZUMAB_PATH / 'glmnet.csv'
parm7 = TRASTUZUMAB_PATH / 'input.parm7'
rst7 = TRASTUZUMAB_PATH / 'input.rst7'
with TemporaryDirectory() as tmp:
out = Path(tmp) / "out.npz"
ply_out = Path(tmp) / "out.ply"
args = ['--potential', '--ply_out', ply_out]
run_commandline(parm7, rst7, scale, out, *args)
surf = Surface.read_ply(ply_out)
crd = md.load(rst7, top=parm7).xyz[0]
surf_extent = surf.vertices.max(axis=0) - surf.vertices.min(axis=0)
crd_extent = crd.max(axis=0) - crd.min(axis=0)
scale_ratios = surf_extent / crd_extent
assert np.all(scale_ratios > 1)
assert np.all(scale_ratios < 1.2)


def test_rdkit_crippen():
pdb = TESTS_PATH / '1csa-model1.pdb'
args = ['--surfscore', '--smiles', CsA_SMILES]
with tempfile.TemporaryDirectory() as tmp:
with TemporaryDirectory() as tmp:
out = Path(tmp) / "out.npz"
run_commandline(pdb, pdb, 'rdkit-crippen', out, *args)
with np.load(out, allow_pickle=True) as npz:
Expand Down
2 changes: 1 addition & 1 deletion test/surface_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_repr_no_error(minimal):
repr(minimal)

def test_convert_to_ply(minimal):
plydat = minimal.as_plydata(units_per_angstrom=1.)
plydat = minimal.as_plydata(units_per_nm=1.)
assert len(plydat['vertex']) == minimal.n_vertices
for i_dim, dim in enumerate('xyz'):
assert np.allclose(plydat['vertex'][dim], minimal.vertices[:, i_dim])
Expand Down
Loading