Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 64 additions & 32 deletions surface_analyses/commandline_electrostatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def parse_args(argv=None):
parser.add_argument('--apbs_dir', help="Directory in which intermediate files are stored when running APBS. Will be created if it does not exist.", type=str, default=None)
parser.add_argument('--probe_radius', type=float, help='Probe radius in nm', default=0.14)
parser.add_argument('-o', '--out', default=None, type=str, help='Output csv file.')
parser.add_argument('-ro', '--resout', default=None, type=str, help='Residue csv file.')
parser.add_argument(
'-c', '--patch_cutoff',
type=float,
Expand Down Expand Up @@ -148,6 +149,7 @@ def run_electrostatics(
apbs_dir: str = None,
probe_radius: float = 0.14,
out: str = None,
resout: str = None,
patch_cutoff: tuple = (2., -2.),
integral_cutoff: tuple = (0.3, -0.3),
surface_type: str = "sas",
Expand All @@ -174,6 +176,11 @@ def run_electrostatics(
else:
csv_outfile = open(out, "w")

if resout is None:
res_outfile = sys.stdout
else:
res_outfile = open(resout, "w")

ion_species = get_ion_species(ion_species)
# Renumber residues, takes care of insertion codes in PDB residue numbers
for i, res in enumerate(traj.top.residues,start=1):
Expand Down Expand Up @@ -237,27 +244,28 @@ def run_electrostatics(

# The patch searching
print('Finding patches')
# the griddata object is in Angstroms
values = griddata.interpolate(columns, surf.vertices * 10)[columns[0]]
patches = pd.DataFrame({
'positive': assign_patches(surf.faces, values > patch_cutoff[0]),
'negative': assign_patches(surf.faces, values < patch_cutoff[1]),
'area': vert_areas,
'atom': closest_atom,
'residue': residues[closest_atom],
'cdr': np.isin(residues, cdrs)[closest_atom],
'value': values,
})

values = griddata.interpolate(columns, surf.vertices * 10)[columns[0]]

# save values and atom in surf for consistency with commandline_hydrophobic
surf['values'] = values
surf['positive'] = assign_patches(surf.faces, values > patch_cutoff[0])
neg_patches = assign_patches(surf.faces, values < patch_cutoff[1])
# Give numbers higher than every positive patch to the negative patches
neg_patches[neg_patches != -1] += max(surf['positive'])
surf['negative'] = neg_patches
surf['value'] = values
surf['atom'] = closest_atom
surf['area'] = vert_areas
surf['residue'] = np.array(residues[closest_atom])
surf['cdr'] = np.isin(residues, cdrs)[closest_atom]

patches = surf.vertices_to_df()
#keep args.n_patches largest patches (n > 0) or smallest patches (n < 0) or patches with an area over the size cutoff
if n_patches != 0 or size_cutoff != 0:
#interesting interaction: setting a -n cutoff and size cutoff should yield the n smallest patches with an area over the size cutoff
replace_vals = {}
for patch_type in ('negative', 'positive'):
max_previous = 0
for patch_type in ('positive', 'negative'):
# sort patches by area and filter top n patches (or bottom n patches for n < 0)
# also we apply the size cutoff here. It defaults to 0, so should not do anything if not explicitly set as all areas should be > 0.
area = patches.query(f'{patch_type} != -1').groupby(f'{patch_type}').sum(numeric_only=True)['area']
Expand All @@ -267,16 +275,22 @@ def run_electrostatics(
filtered = order[:n_patches] if n_patches > 0 else order[n_patches:]
# set patches not in filtered to -1
patches.loc[~patches[patch_type].isin(filtered), patch_type] = -1

# build replacement dict to renumber patches in df according to size
order_map = {elem: i for i, elem in enumerate(order[:n_patches])}
order_map = {elem: i for i, elem in enumerate(filtered, start=max_previous)}
max_previous = max(order_map, key=order_map.get)
replace_vals[patch_type] = order_map
patches.replace(replace_vals, inplace=True)

output = csv.writer(csv_outfile)
output.writerow(['type', 'npoints', 'area', 'cdr', 'main_residue'])
write_patches(patches, output, 'positive')
write_patches(patches, output, 'negative')
# output patches information
patch_summ = summarize_patches(patches)
if not check_cdrs:
patch_summ = patch_summ.drop(columns='cdr')
patch_summ.to_csv(csv_outfile, index=False)

# output residues involved in each patch
residue_output = csv.writer(res_outfile)
residue_output.writerow(['nr', 'residues'])
write_residues(patches, residue_output)

# Compute the total solvent-accessible potential.
within_range, closest_atom, distance = grid.distance_to_spheres(centers=traj.xyz[0], rmax=1., radii=radii)
Expand Down Expand Up @@ -313,6 +327,8 @@ def run_electrostatics(
# close user output file, but not stdout
if out is not None:
csv_outfile.close()
if resout is not None:
res_outfile.close()

return {
'surface': surf,
Expand Down Expand Up @@ -388,18 +404,35 @@ def get_apbs_potential_from_mdtraj(traj, apbs_dir, pH, ion_species):
griddata = load_dx(dxfile, colname='DX')
return griddata

def write_patches(df, out, column):
groups = dict(list(df[df[column] != -1].groupby(column)))
for patch in sorted(groups.values(), key=lambda df: -df['area'].sum()):
out.writerow([
column,
len(patch),
patch['area'].sum(),
np.any(patch['cdr']),
biggest_residue_contribution(patch),
])


def summarize_patches(df, cols=['positive','negative']):
ix = 1
Row = namedtuple('Row', 'nr type npoints area value cdr main_residue')
output = []
for column in cols:
groups = dict(list(df[df[column] != -1].groupby(column)))
for patch in sorted(groups.values(), key=lambda df: -df['area'].sum()):
output.append(Row(
ix,
column,
len(patch),
patch['area'].sum(),
patch['value'].sum(),
np.any(patch['cdr']),
biggest_residue_contribution(patch)
))
ix += 1
return pd.DataFrame(output)

def write_residues(df, out, cols=['positive','negative']):
ix = 1
for column in cols:
groups = dict(list(df[df[column] != -1].groupby(column)))
for patch in sorted(groups.values(), key=lambda df: -df['area'].sum()):
out.writerow([
ix, ' '.join(sorted(patch['residue'].unique(), key=lambda x: int(x[3:])))
])
ix += 1

def biggest_residue_contribution(df):
"""Find the element in df['residue'] with the highest total contribution in df['area']."""
return (
Expand All @@ -411,7 +444,6 @@ def biggest_residue_contribution(df):
.index[-1]
)


def run_pdb2pqr(pdbfile, cwd=".", ff="AMBER", name_base="apbs", pH=None):
if not isinstance(cwd, pathlib.Path):
cwd = pathlib.Path(cwd)
Expand Down
10 changes: 10 additions & 0 deletions surface_analyses/surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import matplotlib.colors
import numpy as np
import plyfile
import pandas as pd

import msms.wrapper as msms

Expand Down Expand Up @@ -188,6 +189,15 @@ def to_dict(self, basename="surface"):
out[f"{basename}:data:{element}"] = self.data[element]
return out

def vertices_to_df(self):
"""
Output surface vertices and data as pandas DataFrame
"""
out = {}
out['x'], out['y'], out['z'] = self.vertices.T
for element in self.data:
out[f"{element}"] = self.data[element]
return pd.DataFrame(out)

def surfaces_from_dict(dictionary, basename="surfaces"):
"""Load several surfaces from a dictionary.
Expand Down
27 changes: 17 additions & 10 deletions test/commandline_electrostatic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def run_commandline(pdb, dx, *args, **kwargs):
kwargs_list = []
for k, v in kwargs.items():
kwargs_list.extend(["--" + str(k), str(v)])
print(kwargs_list)
args_list = [str(arg) for arg in args]
with redirect_stdout(output):
# using the pdb as "topology" and "trajectory"
main([str(pdb), str(pdb), "--dx", str(dx)] + list(args) + kwargs_list)
main([str(pdb), str(pdb), "--dx", str(dx)] + args_list + kwargs_list)
return output.getvalue()


Expand All @@ -58,7 +58,8 @@ def test_parser_options_match_python_interface():
for opt in parser_options:
assert parser_options[opt] == python_options[opt].default

@pytest.fixture(params=['without', 'with'])

@pytest.fixture(params=[[], ['--check_cdrs']])
def with_or_without_cdrs(request):
yield request.param

Expand All @@ -73,14 +74,13 @@ def test_trastuzumab_sas_integrals(with_or_without_cdrs):
-1.86765861091,
]
)
if with_or_without_cdrs == 'with':
resout_fname = TRASTUZUMAB_PATH / 'resout.csv'
args = ['--resout', resout_fname] + with_or_without_cdrs
if "--check_cdrs" in args:
if shutil.which('hmmscan') is None:
pytest.skip('hmmscan was not found')
if not HAS_ANARCI:
pytest.skip('ANARCI is not available')
args = ['--check_cdrs']
else:
args = []
stdout = run_commandline(
TRASTUZUMAB_PATH / 'apbs-input.pdb',
TRASTUZUMAB_PATH / 'apbs-potential.dx',
Expand All @@ -95,9 +95,16 @@ def test_trastuzumab_sas_integrals(with_or_without_cdrs):
patches = pd.read_csv(str(TRASTUZUMAB_PATH / 'apbs-patches.csv'))
exp_fname = 'apbs-patches-msms.save' if msms.msms_available() else 'apbs-patches.save'
expected_patches = pd.read_csv(str(TRASTUZUMAB_PATH / exp_fname))
if with_or_without_cdrs == 'without':
expected_patches['cdr'] = False
if "--check_cdrs" not in args:
del expected_patches['cdr']
assert_frame_equal(patches, expected_patches)
resout_df = pd.read_csv(resout_fname)
expected_n_patches = 32 if msms.msms_available() else 36
assert len(resout_df) == expected_n_patches
assert resout_df.iloc[0]["residues"].startswith(
"TYR33 ARG50 ASN55 TYR57 THR58 ARG59 TYR60 ALA61 ASP62 LYS65 GLY66 TRP99 "
"ASP122 ILE123 GLN148 HIS212 TYR213 THR214 THR215 PRO216" # PRO217 is missing w/o msms
)


def get_nth_line(file, n):
Expand All @@ -112,7 +119,7 @@ def test_trastuzumab_ply_out():
TRASTUZUMAB_PATH / 'apbs-input.pdb',
TRASTUZUMAB_PATH / 'apbs-potential.dx',
'--out',
str(TRASTUZUMAB_PATH / 'apbs-patches.csv'),
str(TRASTUZUMAB_PATH / 'apbs-patches-ply.csv'),
'--ply_out',
str(TRASTUZUMAB_PATH / 'apbs'),
]
Expand Down
66 changes: 33 additions & 33 deletions test/trastuzumab/apbs-patches-msms.save
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
type,npoints,area,cdr,main_residue
positive,1725,4.182278508729034,True,ARG59
positive,1345,3.3018772409740245,True,LYS166
positive,474,1.1227447014770573,False,LYS224
positive,212,0.4780537234772543,False,PRO14
positive,196,0.42350239444585136,False,GLN82
positive,153,0.26352678455568534,True,THR74
positive,131,0.2623972855174914,True,ARG187
positive,113,0.2239198468055985,True,GLU1
positive,91,0.18047135288680483,False,ARG182
positive,82,0.17243666156532797,False,ARG187
positive,85,0.11763809617857156,True,TYR170
positive,24,0.04135049908954891,False,THR195
positive,10,0.027526301024288966,False,SER85
positive,12,0.02167615367273435,False,SER198
positive,12,0.019511131671895172,False,THR69
positive,7,0.015586532451777404,False,ARG87
positive,18,0.01556431292971975,True,PHE27
positive,8,0.006207044981287775,True,PHE219
positive,5,0.0058994152378983485,False,SER197
positive,3,0.005203025153503235,False,ARG182
positive,6,0.00519997560517816,False,ARG182
positive,3,0.003164115750466868,True,ARG145
positive,2,0.0025863522808405913,True,LYS30
negative,241,0.6363343407004047,True,ASP31
negative,268,0.6162607965834226,False,GLU89
negative,27,0.07296024063999074,False,ASP138
negative,25,0.04096139412400949,False,GLU226
negative,23,0.022811577815085794,True,THR32
negative,17,0.016368615194651954,True,LEU175
negative,14,0.015925282524936708,False,GLN13
negative,7,0.011160988364316836,True,GLY100
negative,6,0.010733508585959266,False,ALA40
nr,type,npoints,area,value,cdr,main_residue
1,positive,1725,4.182278508729034,5837.588495521277,True,ARG59
2,positive,1345,3.3018772409740245,4457.250193616104,True,LYS166
3,positive,474,1.1227447014770573,1615.377352677039,False,LYS224
4,positive,212,0.4780537234772543,551.4412348285368,False,PRO14
5,positive,196,0.42350239444585136,675.6514743166699,False,GLN82
6,positive,153,0.26352678455568534,540.226705945201,True,THR74
7,positive,131,0.2623972855174914,318.4519895515599,True,ARG187
8,positive,113,0.2239198468055985,265.7312049146501,True,GLU1
9,positive,91,0.18047135288680483,242.82859600619585,False,ARG182
10,positive,82,0.17243666156532797,188.3076970571323,False,ARG187
11,positive,85,0.11763809617857156,217.4192353502932,True,TYR170
12,positive,24,0.04135049908954891,70.28936614643578,False,THR195
13,positive,10,0.027526301024288966,20.7597361666323,False,SER85
14,positive,12,0.02167615367273435,25.6625185537019,False,SER198
15,positive,12,0.019511131671895172,25.213701850502137,False,THR69
16,positive,7,0.015586532451777404,14.131560782100696,False,ARG87
17,positive,18,0.01556431292971975,37.40732945242014,True,PHE27
18,positive,8,0.006207044981287775,17.677805714979243,True,PHE219
19,positive,5,0.0058994152378983485,10.40878235947409,False,SER197
20,positive,3,0.005203025153503235,6.0562606150358524,False,ARG182
21,positive,6,0.00519997560517816,12.244182295189493,False,ARG182
22,positive,3,0.003164115750466868,6.137556290116853,True,ARG145
23,positive,2,0.0025863522808405913,4.018059952417971,True,LYS30
24,negative,241,0.6363343407004047,-648.0796775146749,True,ASP31
25,negative,268,0.6162607965834226,-626.4097689813246,False,GLU89
26,negative,27,0.07296024063999074,-60.37267556867309,False,ASP138
27,negative,25,0.04096139412400949,-55.94862378146032,False,GLU226
28,negative,23,0.022811577815085794,-52.56402908640423,True,THR32
29,negative,17,0.016368615194651954,-37.110362416262035,True,LEU175
30,negative,14,0.015925282524936708,-31.015360920890828,False,GLN13
31,negative,7,0.011160988364316836,-14.59490620019469,True,GLY100
32,negative,6,0.010733508585959266,-13.172523925351646,False,ALA40
74 changes: 37 additions & 37 deletions test/trastuzumab/apbs-patches.save
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
type,npoints,area,cdr,main_residue
positive,2706,4.0555237192411315,True,ARG59
positive,2050,3.1358180610606072,True,LYS166
positive,714,1.0785564850219775,False,LYS224
positive,269,0.4349030355542176,False,PRO14
positive,269,0.4006016732560038,False,GLN82
positive,164,0.24193543018723437,True,ARG187
positive,146,0.23264465711205656,True,THR74
positive,138,0.2145502520054379,True,GLU1
positive,120,0.1716530068706561,False,ARG187
positive,119,0.1706755639411952,False,ARG182
positive,66,0.10186810414633858,True,TYR170
positive,22,0.033606791475904174,False,THR195
positive,30,0.030287938567198577,True,ALA79
positive,15,0.023222786999516153,False,SER85
positive,9,0.015245096806514388,False,TYR60
positive,14,0.011946238389706801,False,SER198
positive,6,0.007587021401074405,True,PHE27
positive,3,0.00633904083467011,False,ARG87
positive,2,0.004148372012423352,False,ARG139
positive,1,0.001185125942962865,True,THR218
positive,6,0.0010143504841835236,False,LYS166
positive,6,0.0006065153211238794,False,ILE34
positive,6,0.00025960786297218874,True,LYS30
positive,6,6.43316927835258e-05,False,LYS166
positive,6,6.1429446418515e-08,True,TRP99
negative,379,0.6080976480519841,True,ASP31
negative,375,0.5670296154251784,False,GLU89
negative,44,0.06279480835187692,False,ASP138
negative,18,0.03518602688154715,False,GLU226
negative,20,0.016015456201785128,True,THR32
negative,5,0.00913510504324222,False,LEU11
negative,5,0.0078237506319662,False,ALA40
negative,3,0.004150241366005503,True,ASP108
negative,5,0.003132833357085474,True,LEU175
negative,4,0.0016012810156098565,False,SER181
negative,6,0.001596131180122029,False,ALA40
nr,type,npoints,area,value,cdr,main_residue
1,positive,2706,4.0555237192411315,8906.732408338641,True,ARG59
2,positive,2050,3.1358180610606072,6378.02794764717,True,LYS166
3,positive,714,1.0785564850219775,2366.2263496670826,False,LYS224
4,positive,269,0.4349030355542176,663.5351588924126,False,PRO14
5,positive,269,0.4006016732560038,820.109078686167,False,GLN82
6,positive,164,0.24193543018723437,384.8357004032635,True,ARG187
7,positive,146,0.23264465711205656,442.7531807008791,True,THR74
8,positive,138,0.2145502520054379,308.98002828476854,True,GLU1
9,positive,120,0.1716530068706561,266.72787666433067,False,ARG187
10,positive,119,0.1706755639411952,307.27723743167127,False,ARG182
11,positive,66,0.10186810414633858,163.63140165864482,True,TYR170
12,positive,22,0.033606791475904174,54.60327632139041,False,THR195
13,positive,30,0.030287938567198577,232.70025916176803,True,ALA79
14,positive,15,0.023222786999516153,30.820802103676705,False,SER85
15,positive,9,0.015245096806514388,18.922786593196502,False,TYR60
16,positive,14,0.011946238389706801,30.10462995764804,False,SER198
17,positive,6,0.007587021401074405,12.370451415593246,True,PHE27
18,positive,3,0.00633904083467011,6.04540847008538,False,ARG87
19,positive,2,0.004148372012423352,4.1426211246149105,False,ARG139
20,positive,1,0.001185125942962865,2.110696531605022,True,THR218
21,positive,6,0.0010143504841835236,14.350174996542579,False,LYS166
22,positive,6,0.0006065153211238794,34.48725109895096,False,ILE34
23,positive,6,0.00025960786297218874,34.820354009268065,True,LYS30
24,positive,6,6.43316927835258e-05,17.98491281121401,False,LYS166
25,positive,6,6.1429446418515e-08,49.21569313975243,True,TRP99
26,negative,379,0.6080976480519841,-990.0929767279357,True,ASP31
27,negative,375,0.5670296154251784,-869.4265885055943,False,GLU89
28,negative,44,0.06279480835187692,-97.02849908589296,False,ASP138
29,negative,18,0.03518602688154715,-38.661829480754555,False,GLU226
30,negative,20,0.016015456201785128,-43.77045246436204,True,THR32
31,negative,5,0.00913510504324222,-10.87626482898136,False,LEU11
32,negative,5,0.0078237506319662,-10.770950367116772,False,ALA40
33,negative,3,0.004150241366005503,-6.058838802786218,True,ASP108
34,negative,5,0.003132833357085474,-10.606429320031229,True,LEU175
35,negative,4,0.0016012810156098565,-8.446501503306715,False,SER181
36,negative,6,0.001596131180122029,-15.624449931069849,False,ALA40
Loading