diff --git a/finches/epsilon_calculation.py b/finches/epsilon_calculation.py index 3d87e09..3f5cfad 100644 --- a/finches/epsilon_calculation.py +++ b/finches/epsilon_calculation.py @@ -14,6 +14,38 @@ from finches.utils import matrix_manipulation from finches import epsilon_stateless + +def get_attractive_repulsive_matrixes(matrix, null_interaction_baseline): + return epsilon_stateless.get_attractive_repulsive_matrices(matrix, null_interaction_baseline) + + +def mask_matrix(matrix, column_mask): + return epsilon_stateless.mask_matrix(matrix, column_mask) + + +def masked_matrix(matrix, column_mask): + return epsilon_stateless.mask_matrix(matrix, column_mask) + + +def flatten_matrix_to_vector(matrix, orientation=0): + return np.mean(matrix, axis=orientation) + + +def get_sequence_epsilon_vectors(sequence1, + sequence2, + X, + prefactor=None, + null_interaction_baseline=None, + use_charge_weighting=True, + use_aliphatic_weighting=True): + return epsilon_stateless.get_sequence_epsilon_vectors(sequence1, + sequence2, + X, + charge_prefactor=prefactor, + null_interaction_baseline=null_interaction_baseline, + use_charge_weighting=use_charge_weighting, + use_aliphatic_weighting=use_aliphatic_weighting) + # ------------------------------------------------------------------------------------------------- class InteractionMatrixConstructor: @@ -691,14 +723,14 @@ def calculate_epsilon_value(self, """ - # note this runs charge_prefactor and null_interaction_baseline - # as null which means the default values associated with this - # object will be used - return epsilon_stateless.get_sequence_epsilon_value(sequence1, - sequence2, - self, - use_charge_weighting=use_charge_weighting, - use_aliphatic_weighting=use_aliphatic_weighting) + w_matrix = self.calculate_weighted_pairwise_matrix(sequence1, + sequence2, + use_charge_weighting=use_charge_weighting, + use_aliphatic_weighting=use_aliphatic_weighting) + + transformed = w_matrix - (2.0 * self.null_interaction_baseline) + transformed[w_matrix == self.null_interaction_baseline] -= self.null_interaction_baseline + return float(np.sum(transformed) / transformed.shape[1]) ## ------------------------------------------------------------------------------ ## @@ -871,3 +903,5 @@ def __matrix2eps(in_matrix): return (everything, seq2_indices, seq1_indices) + +Interaction_Matrix_Constructor = InteractionMatrixConstructor diff --git a/finches/forcefields/mpipi.py b/finches/forcefields/mpipi.py index 4ea07c3..3ab8184 100644 --- a/finches/forcefields/mpipi.py +++ b/finches/forcefields/mpipi.py @@ -600,8 +600,12 @@ def wang_frenkel(r, sigma_ij, epsilon_ij, mu_ij=2, nu_ij=1): main_term3 = np.power(np.power(R_ij/r, 2*mu_ij) - 1, 2*nu_ij) return main_term1*main_term2*main_term3 - - +def mpipi_model(version='Mpipi_GGv1', *args, **kwargs): + version_aliases = { + 'mPiPi_GGv1': 'Mpipi_GGv1', + 'mPiPi_original': 'Mpipi_original', + } + return Mpipi_model(version=version_aliases.get(version, version), *args, **kwargs) diff --git a/finches/parsing_aminoacid_sequences.py b/finches/parsing_aminoacid_sequences.py index 16483c0..2e58625 100644 --- a/finches/parsing_aminoacid_sequences.py +++ b/finches/parsing_aminoacid_sequences.py @@ -15,6 +15,7 @@ """ import numpy as np from finches import sequence_tools +from finches.utils import matrix_manipulation # new characters for PIMMS aliphatic groups aliphatic_group1 = {'A':'a', 'L':'l', 'M':'m', 'I':'i', 'V':'v'} @@ -95,103 +96,8 @@ def get_charge_weighted_mask(sequence1, sequence2): matrix here, but this function does return """ - # - # NB - this could be rewritten in Cython for improved performance... - # - - # nb - hardcoded for now but could and probably should be altered to enable - # pH-dependent effects in the future - charges = ['R','K','E','D'] - - attractive_matrix = [] - repulsive_matrix = [] - - n2 = len(sequence2) - - # cycle through each residue - for i,r1 in enumerate(sequence1): - tmp_attractive = [] - tmp_repulsive = [] - - # if r1 is charged - if r1 in charges: - - # cycle through each residue in sequence 2 - for j,r2 in enumerate(sequence2): - - # initialize - w_attractive = 0 - w_repulsive = 0 - - # if the second residue is charged - if r2 in charges: - - # this generates a string of max 6 residues (for terminal residues 5 or 4 residues) - # which is basically a concatenated fragment - l_resis = sequence_tools.get_neighbors_window_of3(i,sequence1) + sequence_tools.get_neighbors_window_of3(j,sequence2) - - # for that fragment, calculate the local fcr and ncpr - [local_fcr, local_ncpr] = sequence_tools.calculate_FCR_and_NCPR(l_resis) - - # calculate the charge weight as |NCPR/FRC|. This means in one limit charge_weight goes - # to 1 if the fragment is all the same type of charged residues, and goes to 0 if the - # if the fragment is neutral, regardless of the fraction of charged residues. - chrg_weight = np.abs(local_ncpr / local_fcr) - - w_repulsive = chrg_weight - - - # alternative implementation - to move elsewhere at some point, but TL/DR was worse - # but ALSO SLOWER! Win win! - """ - - frag1 = sequence_tools.get_neighbors_window_of3(i, sequence1) - frag2 = sequence_tools.get_neighbors_window_of3(j, sequence2) - - [f1_fcr, f1_ncpr] = sequence_tools.calculate_FCR_and_NCPR(frag1) - [f2_fcr, f2_ncpr] = sequence_tools.calculate_FCR_and_NCPR(frag2) - - # if both fragments contain only one type of charged residue - if abs(f1_ncpr) == f1_fcr and abs(f2_ncpr) == f2_fcr: - - # calculate charge weight - q1q2 = f1_ncpr*f2_ncpr - - # opposite charge clusters - if q1q2 < 0: - - # negative value (attractive) - max value = 1 (|q1| and |q2| <= 1) - w_attractive = abs(q1q2) - - else: - - # positive value (repulsive) - max value = 1 (|q1| and |q1| <= 1) - w_repulsive = q1q2 - """ - - # w_attractive and w_repulsive are 0 unless both fragements only possess the same - # type of charged residues - tmp_attractive.append(w_attractive) - tmp_repulsive.append(w_repulsive) - - - else: - - # if r1 was not charged, create an empty vector - tmp_attractive = [0]*n2 - tmp_repulsive = [0]*n2 - - attractive_matrix.append(tmp_attractive) - repulsive_matrix.append(tmp_repulsive) - - # Assert matrices are the right shape - attractive_matrix = np.array(attractive_matrix) - repulsive_matrix = np.array(repulsive_matrix) - - assert attractive_matrix.shape == (len(sequence1), len(sequence2)) - assert repulsive_matrix.shape == (len(sequence1), len(sequence2)) - - return attractive_matrix, repulsive_matrix + attractive_matrix, repulsive_matrix = matrix_manipulation.charge_weighted_mask(sequence1, sequence2) + return np.asarray(attractive_matrix), np.asarray(repulsive_matrix) @@ -281,29 +187,15 @@ def get_aliphatic_weighted_mask(sequence1, sequence2): returns a 2D MATRIX mask the same shape of (len(sequence1), len(sequence2)) """ - multiplier_weighting = {'1_1':1, '1_2':1, '1_3':1, '2_1':1, '3_1':1, - '2_2':1.5, '2_3':1.5, '3_2':1.5, - '3_3':3} - - - ali_mask1 = get_aliphatic_groups(sequence1) - ali_mask2 = get_aliphatic_groups(sequence2) - n2 = len(sequence2) - matrix = [] - for i,v1 in enumerate(ali_mask1): - tmp = [] - if v1 > 0: - for j,v2 in enumerate(ali_mask2): - if v2 > 0: - tmp.append(multiplier_weighting[f'{v1}_{v2}']) - else: - tmp.append(1) - else: - tmp = [1]*n2 - - matrix.append(tmp) - - return np.array(matrix) + multiplier_weighting = np.ones((4, 4), dtype=float) + multiplier_weighting[2, 2] = 1.5 + multiplier_weighting[2, 3] = 1.5 + multiplier_weighting[3, 2] = 1.5 + multiplier_weighting[3, 3] = 3.0 + + ali_mask1 = np.asarray(get_aliphatic_groups(sequence1), dtype=np.int8) + ali_mask2 = np.asarray(get_aliphatic_groups(sequence2), dtype=np.int8) + return multiplier_weighting[ali_mask1[:, None], ali_mask2[None, :]] ## --------------------------------------------------------------------------- diff --git a/finches/tests/test_FH_diagrams.py b/finches/tests/test_FH_diagrams.py index a4e4b49..8349090 100644 --- a/finches/tests/test_FH_diagrams.py +++ b/finches/tests/test_FH_diagrams.py @@ -9,10 +9,10 @@ from finches.forcefields.calvados import calvados_model from finches import epsilon_calculation -from ..test_data.test_sequences import test_sequences, t0 +from .test_data.test_sequences import test_sequences, t0 # test are done in the context with the mPiPi_GGv1 model -L_model = mPiPi_model('mPiPi_GGv1') +L_model = mpipi_model('mPiPi_GGv1') X_local = epsilon_calculation.Interaction_Matrix_Constructor(L_model) ############################################################################################ @@ -28,4 +28,3 @@ # def build_DIELECTRIC_dependent_phase_diagrams(): pass - diff --git a/finches/tests/test_data/mPiPi_GGv1_seq_epsilon_and_vectors.npz b/finches/tests/test_data/mPiPi_GGv1_seq_epsilon_and_vectors.npz new file mode 100644 index 0000000..c8aa9aa Binary files /dev/null and b/finches/tests/test_data/mPiPi_GGv1_seq_epsilon_and_vectors.npz differ diff --git a/finches/tests/test_data/test_mPiPi_GGv1_heterotypic_matrix.npz b/finches/tests/test_data/test_mPiPi_GGv1_heterotypic_matrix.npz new file mode 100644 index 0000000..f6aec7d Binary files /dev/null and b/finches/tests/test_data/test_mPiPi_GGv1_heterotypic_matrix.npz differ diff --git a/finches/tests/test_data/test_mPiPi_GGv1_homotypic_matrix.npz b/finches/tests/test_data/test_mPiPi_GGv1_homotypic_matrix.npz new file mode 100644 index 0000000..342f63c Binary files /dev/null and b/finches/tests/test_data/test_mPiPi_GGv1_homotypic_matrix.npz differ diff --git a/finches/tests/test_data/test_mPiPi_GGv1_weighted_matrix.npz b/finches/tests/test_data/test_mPiPi_GGv1_weighted_matrix.npz new file mode 100644 index 0000000..fe0fecc Binary files /dev/null and b/finches/tests/test_data/test_mPiPi_GGv1_weighted_matrix.npz differ diff --git a/finches/tests/test_data/test_matrix_manipulation.npz b/finches/tests/test_data/test_matrix_manipulation.npz new file mode 100644 index 0000000..7c9e187 Binary files /dev/null and b/finches/tests/test_data/test_matrix_manipulation.npz differ diff --git a/finches/tests/test_data/update_test_data.py b/finches/tests/test_data/update_test_data.py index 8f49ea6..eb902df 100644 --- a/finches/tests/test_data/update_test_data.py +++ b/finches/tests/test_data/update_test_data.py @@ -11,8 +11,9 @@ import os import finches +from finches import epsilon_calculation -from test_sequences import test_sequences, test_condition_dict +from .test_sequences import test_sequences, test_condition_dict, t0 # .......................................................................................... # @@ -78,7 +79,7 @@ def write_test_weighted_matrix(filepath, model, model_name): otl2.append(ot) print(ot.shape) - np.savez(data_file, DEFAULT=otl, NOCHARGE=otl1, NOuse_aliphatic_weighting=otl2) + np.savez(data_file, DEFAULT=otl, NOCHARGE=otl1, NOALIPHATICS=otl2) #.......................................................................................... # @@ -101,7 +102,8 @@ def write_test_matrix_manipulation(filepath, model): otla, otlr = get_attractive_repulsive_matrixes(all_manipulated['test_matrix'],-0.15) all_manipulated['attractive_repulsive_matrixes'] = otla, otlr - mask = np.random.choice([0, 1], size=test_matrix.shape) + rng = np.random.default_rng(0) + mask = rng.choice([0, 1], size=test_matrix.shape) all_manipulated['bionary_mask'] = mask out_masked = epsilon_calculation.mask_matrix(test_matrix, mask) @@ -200,4 +202,3 @@ def write_FH_out_data(filepath, model, model_name): - diff --git a/finches/tests/test_epsilon_calculation.py b/finches/tests/test_epsilon_calculation.py index a5b8c45..e2c5734 100644 --- a/finches/tests/test_epsilon_calculation.py +++ b/finches/tests/test_epsilon_calculation.py @@ -1,5 +1,6 @@ import pytest #import un +from pathlib import Path import pandas as pd @@ -17,6 +18,9 @@ import numpy as np + +TEST_DATA_DIR = Path(__file__).resolve().parent / "test_data" + ############################################################################################ ## ## ## ## @@ -35,7 +39,7 @@ def test_Interaction_Matrix_Constructor(): # # def test_calculate_pairwise_homotypic_matrix(): - TRUE_matrixes = np.load('test_data/test_mPiPi_GGv1_homotypic_matrix.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_mPiPi_GGv1_homotypic_matrix.npz', allow_pickle=True) for i, t in enumerate(test_sequences): test_array = X_local.calculate_pairwise_homotypic_matrix(t) @@ -47,9 +51,9 @@ def test_calculate_pairwise_homotypic_matrix(): # # def test_calculate_pairwise_heterotypic_matrix(): - TRUE_matrixes = np.load('test_data/test_mPiPi_GGv1_heterotypic_matrix.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_mPiPi_GGv1_heterotypic_matrix.npz', allow_pickle=True) - for i, t in test_sequences: + for i, t in enumerate(test_sequences): test_array = X_local.calculate_pairwise_heterotypic_matrix(t,t0) # expect this file to match precomputed heterotypic matrix @@ -59,9 +63,9 @@ def test_calculate_pairwise_heterotypic_matrix(): # # def test_calculate_weighted_pairwise_matrix(): - TRUE_matrixes = np.load('test_data/test_mPiPi_GGv1_weighted_matrix.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_mPiPi_GGv1_weighted_matrix.npz', allow_pickle=True) - for i, t in test_sequences: + for i, t in enumerate(test_sequences): # test defaults test_array = X_local.calculate_weighted_pairwise_matrix(t,t0) @@ -88,12 +92,12 @@ def test_calculate_weighted_pairwise_matrix(): # # def test_get_attractive_repulsive_matrixes(): - TRUE_matrixes = np.load('test_data/test_matrix_manipulation.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_matrix_manipulation.npz', allow_pickle=True) # compare the heterotypic DEFAULT matrix of test1:t0 test_matrix = TRUE_matrixes['test_matrix'] - TRUEattractive_matrix = TRUE_matrixes['attractive_repulsive_matrixes'][0] - TRUErepulsive_matrix = TRUE_matrixes['attractive_repulsive_matrixes'][1] + TRUEattractive_matrix = np.asarray(TRUE_matrixes['attractive_repulsive_matrixes'][0], dtype=float) + TRUErepulsive_matrix = np.asarray(TRUE_matrixes['attractive_repulsive_matrixes'][1], dtype=float) attractive_matrix, repulsive_matrix = epsilon_calculation.get_attractive_repulsive_matrixes(test_matrix,-0.15) @@ -104,7 +108,7 @@ def test_get_attractive_repulsive_matrixes(): # # def test_mask_matrix(): - TRUE_matrixes = np.load('test_data/test_matrix_manipulation.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_matrix_manipulation.npz', allow_pickle=True) # compare the heterotypic DEFAULT matrix of test1:t0 test_matrix = TRUE_matrixes['test_matrix'] @@ -123,7 +127,7 @@ def test_mask_matrix(): # # def test_flatten_matrix_to_vector(): - TRUE_matrixes = np.load('test_data/test_matrix_manipulation.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'test_matrix_manipulation.npz', allow_pickle=True) # compare vectors to truth test_matrix = TRUE_matrixes['test_matrix'] @@ -148,7 +152,7 @@ def test_flatten_matrix_to_vector(): # # def test_get_sequence_epsilon_vectors(): - TRUE_matrixes = np.load('test_data/mPiPi_GGv1_seq_epsilon_and_vectors.npz', allow_pickle=True) + TRUE_matrixes = np.load(TEST_DATA_DIR / 'mPiPi_GGv1_seq_epsilon_and_vectors.npz', allow_pickle=True) # test t0 with test1 and t0 names = ['t', 't0'] @@ -157,13 +161,13 @@ def test_get_sequence_epsilon_vectors(): n = names[i] # compare vectors to truth default - [attractive_vector, repulsive_vector] = all_manipulated[f'{n}_t0_NOWEIGHTING'] + [attractive_vector, repulsive_vector] = TRUE_matrixes[f'{n}_t0_DEFAULT'] TESTattractive_vector, TESTrepulsive_vector = epsilon_calculation.get_sequence_epsilon_vectors(t,t0,X_local) assert np.allclose(TESTattractive_vector, attractive_vector) assert np.allclose(TESTrepulsive_vector, repulsive_vector) # compare vectors instance with passed baseline - [attractive_vector, repulsive_vector] = all_manipulated[f'{n}_t0_prefactor_baseline'] + [attractive_vector, repulsive_vector] = TRUE_matrixes[f'{n}_t0_prefactor_baseline'] TESTattractive_vector, TESTrepulsive_vector = epsilon_calculation.get_sequence_epsilon_vectors(t,t0,X_local, prefactor=0.25, null_interaction_baseline=-0.15) @@ -172,7 +176,7 @@ def test_get_sequence_epsilon_vectors(): # compare vectors instance with no weighting - [attractive_vector, repulsive_vector] = all_manipulated[f'{n}_t0_NOWEIGHTING'] + [attractive_vector, repulsive_vector] = TRUE_matrixes[f'{n}_t0_NOWEIGHTING'] TESTattractive_vector, TESTrepulsive_vector = epsilon_calculation.get_sequence_epsilon_vectors(t,t0,X_local, use_charge_weighting=False, use_aliphatic_weighting=False) @@ -183,4 +187,41 @@ def test_get_sequence_epsilon_vectors(): pass +# .......................................................................................... +# +# +def test_calculate_sliding_epsilon_matches_reference_window_scores(): + test_matrix = np.array( + [ + [-0.20, -0.10, 0.05, -0.15], + [0.10, -0.25, -0.05, 0.20], + [-0.30, 0.15, -0.12, -0.18], + [0.08, -0.22, 0.25, -0.05], + ], + dtype=float, + ) + baseline = -0.15 + window_size = 3 + + def _reference_window_score(submatrix): + attractive_matrix = np.where(submatrix < baseline, submatrix - baseline, -baseline) + repulsive_matrix = np.where(submatrix > baseline, submatrix - baseline, -baseline) + return np.sum(np.mean(attractive_matrix, axis=1)) + np.sum(np.mean(repulsive_matrix, axis=1)) + + expected = np.array( + [ + [_reference_window_score(test_matrix[0:3, 0:3]), _reference_window_score(test_matrix[0:3, 1:4])], + [_reference_window_score(test_matrix[1:4, 0:3]), _reference_window_score(test_matrix[1:4, 1:4])], + ] + ) + + observed, seq1_indices, seq2_indices = epsilon_calculation.matrix_manipulation.matrix_scan( + test_matrix, + window_size, + baseline, + ) + + assert np.allclose(observed, expected) + assert np.array_equal(seq1_indices, np.array([2, 3])) + assert np.array_equal(seq2_indices, np.array([2, 3])) diff --git a/finches/utils/matrix_manipulation.pyx b/finches/utils/matrix_manipulation.pyx index f3f39c2..9bdb4c4 100644 --- a/finches/utils/matrix_manipulation.pyx +++ b/finches/utils/matrix_manipulation.pyx @@ -8,6 +8,18 @@ import array from libc.stdlib cimport rand, srand, RAND_MAX +cdef inline bint _is_charged(char residue) nogil: + return residue == 'R' or residue == 'K' or residue == 'E' or residue == 'D' + + +cdef inline double _charge_value(char residue) nogil: + if residue == 'R' or residue == 'K': + return 1.0 + if residue == 'E' or residue == 'D': + return -1.0 + return 0.0 + + @cython.boundscheck(False) @cython.cdivision(True) @@ -28,6 +40,80 @@ def dict2matrix(str seq1, str seq2, dict lookup): return matrix +@cython.boundscheck(False) +@cython.cdivision(True) +def charge_weighted_mask(str seq1, str seq2): + cdef int i, j, start, end, idx, l1, l2 + cdef double total_charge + cdef int total_count + + l1 = len(seq1) + l2 = len(seq2) + + cdef cnp.ndarray[cnp.float64_t, ndim=2] attractive_matrix = np.zeros((l1, l2), dtype=np.float64) + cdef cnp.ndarray[cnp.float64_t, ndim=2] repulsive_matrix = np.zeros((l1, l2), dtype=np.float64) + + cdef cnp.ndarray[cnp.float64_t, ndim=1] charge_sum_1 = np.zeros(l1, dtype=np.float64) + cdef cnp.ndarray[cnp.int32_t, ndim=1] charge_count_1 = np.zeros(l1, dtype=np.int32) + cdef cnp.ndarray[cnp.uint8_t, ndim=1] charged_1 = np.zeros(l1, dtype=np.uint8) + + cdef cnp.ndarray[cnp.float64_t, ndim=1] charge_sum_2 = np.zeros(l2, dtype=np.float64) + cdef cnp.ndarray[cnp.int32_t, ndim=1] charge_count_2 = np.zeros(l2, dtype=np.int32) + cdef cnp.ndarray[cnp.uint8_t, ndim=1] charged_2 = np.zeros(l2, dtype=np.uint8) + + for i in range(l1): + if _is_charged(seq1[i]): + charged_1[i] = 1 + + start = i - 1 + if start < 0: + start = 0 + end = i + 2 + if end > l1: + end = l1 + + total_charge = 0.0 + total_count = 0 + for idx in range(start, end): + if _is_charged(seq1[idx]): + total_charge += _charge_value(seq1[idx]) + total_count += 1 + charge_sum_1[i] = total_charge + charge_count_1[i] = total_count + + for j in range(l2): + if _is_charged(seq2[j]): + charged_2[j] = 1 + + start = j - 1 + if start < 0: + start = 0 + end = j + 2 + if end > l2: + end = l2 + + total_charge = 0.0 + total_count = 0 + for idx in range(start, end): + if _is_charged(seq2[idx]): + total_charge += _charge_value(seq2[idx]) + total_count += 1 + charge_sum_2[j] = total_charge + charge_count_2[j] = total_count + + for i in range(l1): + if charged_1[i] == 0: + continue + for j in range(l2): + if charged_2[j] == 0: + continue + total_charge = charge_sum_1[i] + charge_sum_2[j] + total_count = charge_count_1[i] + charge_count_2[j] + repulsive_matrix[i, j] = abs(total_charge / total_count) + + return attractive_matrix, repulsive_matrix + + @cython.boundscheck(False) @cython.cdivision(True) def matrix_scan(double[:,:] w_matrix, int window_size, double null_interaction_baseline): @@ -69,8 +155,11 @@ def matrix_scan(double[:,:] w_matrix, int window_size, double null_interaction_b # define the variables - cdef int l1, l2, i, j, start, end, r1, r2; - cdef double row_sum, total_mean_sum; + cdef int l1, l2, start, end + cdef cnp.ndarray[cnp.float64_t, ndim=2] matrix = np.asarray(w_matrix, dtype=np.float64) + cdef cnp.ndarray[cnp.float64_t, ndim=2] transformed + cdef cnp.ndarray[cnp.float64_t, ndim=2] integral + cdef cnp.ndarray[cnp.float64_t, ndim=2] everything # get dimensions of matrix l1 = w_matrix.shape[0] @@ -81,55 +170,16 @@ def matrix_scan(double[:,:] w_matrix, int window_size, double null_interaction_b raise Exception('Window size is larger than matrix size, cannot calculate sliding epsilon') - # preallocate the various matrices being used - cdef cnp.ndarray[cnp.float64_t, ndim=2] everything = np.empty( [(l1-window_size)+1,(l2-window_size)+1], dtype=np.float64) - cdef cnp.ndarray[double, ndim=2] attractive_matrix = np.empty([window_size, window_size], dtype=np.double) - cdef cnp.ndarray[double, ndim=2] repulsive_matrix = np.empty([window_size, window_size], dtype=np.double) - cdef cnp.ndarray[double, ndim=2] sub = np.empty([window_size, window_size], dtype=np.double) - - - # calculate sliding epsilon for all possible intermolecular windows. - for i in range(0,(l1-window_size)+1): - - for j in range(0, (l2-window_size)+1): - - - # copy the memoryview into a numpy array - for r1 in range(i, i+window_size): - for r2 in range(j, j+window_size): - sub[r1-i,r2-j] = w_matrix[r1,r2] - - # construct the attractive and repulsive matrices, and then sum them - note we do this - # so brutally manually because it will then compile down to pure C which buys us all - # the speed! - for r1 in range(window_size): - for r2 in range(window_size): - - if sub[r1,r2] < null_interaction_baseline: - attractive_matrix[r1,r2] = sub[r1,r2] - null_interaction_baseline - else: - attractive_matrix[r1,r2] = - null_interaction_baseline - - if sub[r1,r2] > null_interaction_baseline: - repulsive_matrix[r1,r2] = sub[r1,r2] - null_interaction_baseline - else: - repulsive_matrix[r1,r2] = -null_interaction_baseline - - # here we sum and average the attractive and repulsive means - total_mean_sum = 0.0 - for r1 in range(window_size): - row_sum = 0.0 - for r2 in range(window_size): - row_sum += attractive_matrix[r1, r2] - total_mean_sum += row_sum / window_size # Add the mean of the current row to the total sum - - for r1 in range(window_size): - row_sum = 0.0 - for r2 in range(window_size): - row_sum += repulsive_matrix[r1, r2] - total_mean_sum += row_sum / window_size # Add the mean of the current row to the total sum - - everything[i,j] = total_mean_sum + transformed = matrix - (2.0 * null_interaction_baseline) + transformed[matrix == null_interaction_baseline] -= null_interaction_baseline + + integral = np.pad(transformed, ((1, 0), (1, 0)), mode="constant").cumsum(axis=0).cumsum(axis=1) + everything = ( + integral[window_size:, window_size:] + - integral[:-window_size, window_size:] + - integral[window_size:, :-window_size] + + integral[:-window_size, :-window_size] + ) / float(window_size) # finally, determine indices for sequence1 - note need +1 for indexing to move from Python