-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathdataloader.py
95 lines (85 loc) · 3.58 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import json
import cv2
import numpy as np
class dataloader:
def __init__(self, path, label_file, test_label):
self.train_path = path+"training/"
self.test_path = path+"testing/"
self.label_path = path+label_file
self.test_label_path = path+test_label
self.train_data = []
self.train_images =[]
self.train_file_name = []
self.test_data = []
self.test_images = []
self.test_file_name = []
self.labels_train = []
self.labels_test = []
self.load_hand_dataset(self.train_path, self.test_path)
self._assert_exist(self.label_path)
self._assert_exist(self.test_label_path)
self.load_labels(self.label_path, self.test_label_path)
self.TOTAL_SAMPLES_PER_CLASS = 100
def _assert_exist(self, label_path):
msg = 'File is not availble: %s' % label_path
assert os.path.exists(label_path), msg
def load_labels(self, label_path, test_label):
"""
loads the training and testing labels from a json file
"""
self._assert_exist(label_path)
self._assert_exist(test_label)
with open(label_path, 'r') as f:
label_data = json.load(f)
self.labels_train = label_data["labels"]
with open(test_label, 'r') as f:
test_label = json.load(f)
self.labels_test = test_label["labels"]
#return labels_train, labels_test
def scaled_data(self, train_data, test_data):
"""
This method helps scaling/normalizing data
"""
raw_scaler = preprocessing.StandardScaler().fit(train_data)
scaled_train_data = raw_scaler.transform(train_data)
scaled_test_data = raw_scaler.transform(test_data)
return scaled_train_data, scaled_test_data, raw_scaler
def load_hand_dataset(self, train_path, test_path):
"""
This method loads the images for training the classifier.
"""
WIDTH = 256
HEIGHT = 256
for filename in sorted(os.listdir(train_path)):
self.train_file_name.append(filename)
image = cv2.imread(train_path+filename)
#image = image[:, ::-1, :]
#image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
self.train_data.append(np.reshape(np.array(image), 196608))
self.train_images.append(image)
for filename in sorted(os.listdir(test_path)):
self.test_file_name.append(filename)
image = cv2.imread(test_path+filename)
#image = image[:, ::-1, :]
#image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)
self.test_images.append(image)
self.test_data.append(np.reshape(np.array(image), 196608))
#return train_images, test_images, train_data, test_data
def smaller_dataset(self, dataset, no_samples_per_class, no_of_classes):
"""
This method helps training with a subset of the dataset
"""
start = 0
end = no_samples_per_class
new_dataset = []
labels = []
for i in range(no_of_classes):
new_data = dataset[start:end]
start = start+self.TOTAL_SAMPLES_PER_CLASS
end = start+no_samples_per_class
new_dataset.extend(new_data)
labels.extend([i+1]*no_samples_per_class)
return new_dataset, labels