# 0) data import and set as pytorch dataset
from sklearn import datasets
class IrisDataset(Dataset):
# data loading
def __init__(self):
iris = datasets.load_iris()
feature = pd.DataFrame(iris.data, columns=iris.feature_names)
target = pd.DataFrame(iris.target, columns=['target'])
iris_data = pd.concat([target, feature], axis=1)
# keep only Iris-Setosa and Iris-Versicolour classes
iris_data = iris_data[iris_data.target <= 1]
self.x = torch.from_numpy(np.array(iris_data)[:, 1:])
self.y = torch.from_numpy(np.array(iris_data)[:, [0]])
self.n_samples = self.x.shape[0]
# working for indexing
def __getitem__(self, index):
return self.x[index], self.y[index]
# return the length of our dataset
def __len__(self):
return self.n_samples
dataset = IrisDataset()
# create data spliter
def dataSplit(dataset, val_split=0.25, shuffle=False, random_seed=0):
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(val_split * dataset_size))
if shuffle:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
return train_sampler, valid_sampler
# base data parameters
batch_size = 16
val_split = 0.25
shuffle_dataset = True
random_seed= 42
train_sampler, valid_sampler = \
dataSplit(dataset=dataset, val_split=val_split, shuffle=shuffle_dataset, random_seed=random_seed)
train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)
{%gist 2035d6a1eb4cf873427535f0f61a01df%}