-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_features.py
More file actions
89 lines (74 loc) · 3.44 KB
/
extract_features.py
File metadata and controls
89 lines (74 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# This script is responsible of extract the feature vectors from the dataset
# and store then for later classification training.
# Usage: python extract_features.py
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.applications import VGG16
from sklearn.preprocessing import LabelEncoder
from deeplearning import config
from imutils import paths
from tqdm import tqdm
import numpy as np
import pickle
import random
import os
# load VGG16 model previously trained with ImageNet dataset
model = VGG16(weights="imagenet", include_top=False)
le = None
# create output directory is it doesn't exists
if not os.path.exists(config.CSV_PATH):
os.makedirs(config.CSV_PATH)
# loop over dataset splits for extracting features of all images inside each one
for split in (config.TRAIN_PATH, config.VAL_PATH, config.TEST_PATH):
print('[INFO] extracting features from {} images'.format(split))
# read image paths with in split directory
split_dir = os.path.sep.join([config.DATASET_PATH, split])
image_paths = list(paths.list_images(split_dir))
# random shuffle image paths and extract class labels from paths
random.seed(42)
random.shuffle(image_paths)
labels = [p.split(os.path.sep)[-2] for p in image_paths]
# if the label encoder is None, create it
if le is None:
le = LabelEncoder()
le.fit(labels)
# create the cvs file to store features for the split
csv_path = os.path.sep.join([config.CSV_PATH, '{}.csv'.format(split)])
csv = open(csv_path, 'w')
# loop over image paths in form of batches for extracting features
progress = tqdm(total=len(image_paths))
for i in range(0, len(image_paths), config.BATCH_SIZE):
paths_batch = image_paths[i: i + config.BATCH_SIZE]
labels_batch = labels[i: i + config.BATCH_SIZE]
labels_batch = le.transform(labels_batch)
images_batch = []
# loop over images path in current paths batch
for path in paths_batch:
# load image using Keras utility functions while ensuring the size of images are 128x128
image = load_img(path, target_size=(config.IMAGE_WIDTH, config.IMAGE_HEIGHT))
image = img_to_array(image)
# subtracting RGB pixel intensity mean from ImageNet dataset to the image
image = np.expand_dims(image, axis=0)
image = imagenet_utils.preprocess_input(image)[0]
# add processed image to batch of images
images_batch.append(image)
# pass the images through the network for extracting features
# and flatter the resulting features
images_batch = np.array(images_batch)
features = model.predict(images_batch)
features = features.reshape((features.shape[0], features.shape[1] * features.shape[2] * features.shape[3]))
# writing features to csv file in a format of <label>,<features> per line
for label, vector in zip(labels_batch, features):
vec = ','.join([str(v) for v in vector])
csv_row = '{}, {}\n'.format(label, vec)
csv.write(csv_row)
# updating the progress bar
progress.update(config.BATCH_SIZE)
# closing and freeing resources
progress.close()
csv.close()
# save the label encoder to disk
le_file = open(config.LE_PATH, 'wb')
le_file.write(pickle.dumps(le))
le_file.close()