-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest.py
More file actions
54 lines (46 loc) · 1.74 KB
/
test.py
File metadata and controls
54 lines (46 loc) · 1.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tool import model as md
from tool import config as cfg
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from keras.models import load_model
from tool.att import Attention
def preprocess_data(dataset):
"""
Load and preprocess the dataset.
Args:
None.
Returns:
pd.DataFrame: Preprocessed dataset containing the following columns:
- 'uniprot_id': UniProt ID.
- 'seq': Sequence.
- 'f1': Feature value.
- 'new_label': Encoded label value using LabelEncoder.
- 'label': Original label value.
"""
feature = pd.read_feather(f'{cfg.FEATURE_PATH}feature_esm2.feather')
dataset = dataset.rename(columns={'Entry': 'uniprot_id', 'Sequence': 'seq'})
data_df = dataset.merge(feature, on='uniprot_id', how='left')
data_df = data_df[~data_df.f1.isnull()]
data_df['label'] = LabelEncoder().fit_transform(data_df['label'])
return data_df
def reshape_features(data):
"""
Reshape input data to have 3 dimensions.
Args:
data (np.ndarray): Input data to be reshaped.
Returns:
np.ndarray: Reshaped data with shape (n_samples, 1, n_features).
"""
return np.array(data).reshape(data.shape[0],1,-1)
def run(dataset):
# Load and preprocess the dataset.
dataset = preprocess_data(dataset)
X_val = reshape_features(dataset.iloc[:,3:])
loaded_model = load_model("./model/deepsub.h5",custom_objects={"Attention": Attention},compile=False)
predicted = loaded_model.predict(X_val)
predicted_labels = np.argmax(predicted, axis=1)
return dataset.label.values, predicted_labels