The detailed description of task and the dataset, including the report and source code please see in Github repository
Some Utils Functions Definition
# import useful packages
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from scipy.io import loadmat
def generate_k_folders(dataset, k):
"""
Generate K-folders
Input: dataset and k
Output: a list contains k dictionary, each dictionary contains training set, validation set and testing set
"""
x = dataset["x_train"]
y = dataset["y_train"]
x_test = dataset["x_test"]
y_test = dataset["y_test"]
k_folders = []
for i in range(k):
if i < (k-1):
a = i*int(x.shape[0]/k)
b = (i+1)*int(x.shape[0]/k)
k_folders.append({
"x_train": torch.cat((x[:a], x[b:]), dim=0),
"y_train": torch.cat((y[:a], y[b:])),
"x_val": x[a:b],
"y_val": y[a:b],
"x_test": x_test,
"y_test": y_test
})
else:
a = i*int(x.shape[0]/k)
k_folders.append({
"x_train": x[:a],
"y_train": y[:a],
"x_val": x[a:],
"y_val": y[a:],
"x_test": x_test,
"y_test": y_test
})
return k_folders
def unzip_dataset(dataset):
"""
upzip dataset
"""
x_train = dataset["x_train"]
y_train = dataset["y_train"]
x_val = dataset["x_val"]
y_val = dataset["y_val"]
x_test = dataset["x_test"]
y_test = dataset["y_test"]
return x_train, y_train, x_val, y_val, x_test, y_test
def confusion_mat_evaluate(y_test, y_pred):
"""
Evaluate the model performance by confusion matrix
Input: y_predict and the truth label y_test
Output: accuracy, precision, recall, f1
"""
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average="macro")
recall = metrics.recall_score(y_test, y_pred, average="macro")
f1 = metrics.f1_score(y_test, y_pred, average="macro")
return accuracy, precision, recall, f1
def read_bi_data(dataset):
"""
Read binary-class dataset
Input: (numpy.array) dataset
Output: a list consists of x_train, y_train, x_test and y_test
"""
x_train = torch.from_numpy(dataset['train_X']).type(torch.FloatTensor).cuda()
y_train = torch.from_numpy(dataset['train_Y']).type(torch.FloatTensor).cuda()
x_test = torch.from_numpy(dataset['test_X']).type(torch.FloatTensor).cuda()
y_test = torch.from_numpy(dataset['test_Y']).type(torch.FloatTensor).cuda()
dataset = {
'x_train' : x_train,
'y_train' : y_train,
'x_test' : x_test,
'y_test' : y_test
}
return dataset
def read_multi_data():
"""
Read multi-class dataset
Output: a list consists of x_train, y_train, x_test and y_test
"""
x_train = torch.from_numpy(loadmat("datasets/multi-class/train_images.mat")["train_images"]).type(torch.FloatTensor).cuda()
y_train = torch.from_numpy(loadmat("datasets/multi-class/train_labels.mat")["train_labels"]).type(torch.LongTensor).cuda()
y_train = y_train.t().squeeze(dim=-1)
x_test = torch.from_numpy(loadmat("datasets/multi-class/test_images.mat")["test_images"]).type(torch.FloatTensor).cuda()
y_test = torch.from_numpy(loadmat("datasets/multi-class/test_labels.mat")["test_labels"]).type(torch.LongTensor).cuda()
y_test = y_test.t().squeeze(dim=-1)
dataset = {
'x_train' : x_train,
'y_train' : y_train,
'x_test' : x_test,
'y_test' : y_test
}
return dataset
Train Pytorch NN on Five Classifcation Data Sets
# Load data
breast_cancer_data = np.load("datasets/bi-class/breast-cancer.npz")
diabetes_data = np.load("datasets/bi-class/diabetes.npz")
digit_data = np.load("datasets/bi-class/digit.npz")
iris_data = np.load("datasets/bi-class/iris.npz")
wine_data = np.load("datasets/bi-class/wine.npz")
def train_bi_nn_model(dataset, H_list, device, learning_rate=1e-2, iteration=5000):
x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)
N = x_train.shape[0]
D_in = x_train.shape[1]
D_out = 1
# Binary Cross Entropy Loss
loss_fn = torch.nn.BCELoss()
# The result table
# Each row preserves the related result of corresponding H
# 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
res_table = np.zeros((len(H_list),8))
res_table_ind = 0
for H in H_list:
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
torch.nn.Sigmoid()
).to(device)
loss_history = []
accuracy_val = []
correct = 0.0
best_accuracy_val = 0.0
best_iteration = 0.0
best_model = None
# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
t0 = time.time()
for t in range(iteration):
y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
loss = loss_fn(y_train_pred, y_train)
loss_history.append(loss.item())
# Transfer the prediction result from probability to [0,1] label
y_val_pred = model(x_val).squeeze(dim=-1)
y_val_pred[y_val_pred >= 0.5] = 1.0
y_val_pred[y_val_pred < 0.5] = 0.0
# Calculate accuracy on the validation set
correct = (y_val_pred == y_val).sum().item()
accuracy = correct / y_val.shape[0]
accuracy_val.append(accuracy)
# Save the best model and best accuracy
if accuracy > best_accuracy_val:
best_accuracy_val = accuracy
best_iteration = t
best_model = model
if t % 1000 == 0:
print("iteration: %s/%s" % (t, iteration))
optimizer.zero_grad()
# Backward pass
loss.backward()
# Update parameters
optimizer.step()
# Compute training time
t1 = time.time()
print("training time = %s(s)" % (t1 - t0))
# Use the best model to predict on testing dataset
y_test_pred = best_model(x_test).squeeze(dim=-1)
# Detach data from the graph and transfer to numpy array
yt = y_test.cpu().detach().numpy()
yp = y_test_pred.cpu().detach().numpy()
# Compute AUC (Area Under ROC Curve)
auc = metrics.roc_auc_score(yt, yp)
# Compute accuracy, precision, recall and f1
yp[yp >= 0.5] = 1.0 # probalility >= 0.5 atached to positive label
yp[yp < 0.5] = 0.0 # probability < 0.5 atached to negative label
accuracy_test, precision, recall, f1 = confusion_mat_evaluate(yt, yp)
# Save all the results in res_table
res_table[res_table_ind] += np.array([H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, t1-t0])
res_table_ind += 1
# Output the result
print("The best model: iteration = %s" % (best_iteration))
print("On validation dataset: accuracy = %s" % (best_accuracy_val))
print("On testing dataset: accuracy = %s, auc = %s, precision = %s, recall = %s, f1 = %s" \
% (accuracy_test, auc, precision, recall, f1))
# Plot the loss curve and varlidation accuracy curve
fig = plt.figure(figsize=(15,4))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
plt.sca(ax1)
plt.title("H = %s" % H)
plt.xlabel("iteration")
plt.ylabel("loss")
plt.plot(range(iteration), loss_history)
plt.sca(ax2)
plt.title("H = %s" % H)
plt.xlabel("iteration")
plt.ylabel("accuracy_val")
plt.plot(range(iteration), accuracy_val)
plt.show()
# Return the result tables
return res_table
def framework_run(data, device, k, H_list, lr, iteration):
# Generate K-folders
k_folders = generate_k_folders(read_bi_data(breast_cancer_data), k)
# The result table
# Each row preserves the related result of corresponding H
# 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
res_table = np.zeros((len(H_list), 8))
# Traversal all folders dataset
for index in range(k):
print("K-Folder index = %s" % index)
# Sum up the result table
res_table += train_bi_nn_model(k_folders[index], H_list, device, lr, iteration)
# Return the average table
return np.round(res_table / k, decimals=3)
Testing part
If this part running successfully, it means that all functions works well.
device = torch.device('cuda')
framework_run(breast_cancer_data, device, k=5, H_list=[5,6], lr=5e-3, iteration=500)
Training on five binary classification data sets
# breast_data features = 10 => H* = 3, lr = 1e-2
framework_run(breast_cancer_data, device, k=5, H_list=[1,2,3,4,5,6,7], lr=1e-2, iteration=2500)
H_breastNN | Val_Accuracy | Test_Accuracy | AUC | Precision | Recall | F1 | Training Time |
---|
1 | 0.892 | 0.901 | 0.892 | 0.838 | 0.866 | 0.848 | 9.782(s) |
2 | 0.973 | 0.963 | 0.997 | 0.962 | 0.957 | 0.96 | 9.643(s) |
3 | 0.973 | 0.969 | 0.997 | 0.967 | 0.966 | 0.966 | 9.4(s) |
4 | 0.965 | 0.966 | 0.998 | 0.964 | 0.962 | 0.963 | 10.327(s) |
5 | 0.963 | 0.968 | 0.997 | 0.965 | 0.964 | 0.965 | 11.125(s) |
6 | 0.965 | 0.968 | 0.997 | 0.965 | 0.964 | 0.965 | 11.011(s) |
7 | 0.973 | 0.968 | 0.998 | 0.965 | 0.964 | 0.965 | 10.401(s) |
# digit_data features = 64 => H* = 10, lr = 2e-3
framework_run(digit_data, device, k=5, H_list=[5,6,7,8,9,10], lr=2e-3, iteration=2500)
H_digitNN | Val_Accuracy | Test_Accuracy | AUC | Precision | Recall | F1 | Training Time |
---|
5 | 0.914 | 0.859 | 0.987 | 0.818 | 0.8 | 0.793 | 9.569(s) |
6 | 0.947 | 0.931 | 0.998 | 0.951 | 0.903 | 0.92 | 9.327(s) |
7 | 0.927 | 0.935 | 0.997 | 0.956 | 0.908 | 0.924 | 9.11 (s) |
8 | 0.941 | 0.929 | 0.998 | 0.951 | 0.9 | 0.918 | 11.058(s) |
9 | 0.919 | 0.922 | 0.997 | 0.948 | 0.89 | 0.907 | 11.388(s) |
10 | 0.956 | 0.94 | 0.997 | 0.958 | 0.915 | 0.93 | 10.615(s) |
# diabetes_data features = 8 => H* = 4, lr = 5e-2
framework_run(diabetes_data, device, k=5, H_list=[1,2,3,4,5,6,7,8], lr=5e-2, iteration=2500)
H_diabetesNN | Val_Accuracy | Test_Accuracy | AUC | Precision | Recall | F1 | Training Time |
---|
1 | 0.926 | 0.9 | 0.897 | 0.833 | 0.867 | 0.846 | 9.412(s) |
2 | 0.973 | 0.965 | 0.997 | 0.961 | 0.961 | 0.961 | 9.183(s) |
3 | 0.967 | 0.968 | 0.997 | 0.964 | 0.966 | 0.965 | 9.584(s) |
4 | 0.971 | 0.969 | 0.997 | 0.966 | 0.967 | 0.966 | 9.897(s) |
5 | 0.973 | 0.968 | 0.997 | 0.965 | 0.965 | 0.965 | 9.909(s) |
6 | 0.976 | 0.965 | 0.997 | 0.961 | 0.962 | 0.961 | 10.188(s) |
7 | 0.971 | 0.966 | 0.997 | 0.963 | 0.963 | 0.963 | 10.127(s) |
8 | 0.971 | 0.968 | 0.997 | 0.963 | 0.966 | 0.965 | 9.489(s) |
# iris_data features = 4 => H* = 3, lr = 1e-2
framework_run(iris_data, device, k=5, H_list=[1,2,3,4], lr=1e-2, iteration=2500)
H_irisNN | Val_Accuracy | Test_Accuracy | AUC | Precision | Recall | F1 | Training Time |
---|
1 | 0.886 | 0.903 | 0.891 | 0.837 | 0.87 | 0.85 | 9.465(s) |
2 | 0.962 | 0.965 | 0.997 | 0.963 | 0.959 | 0.961 | 9.192(s) |
3 | 0.963 | 0.966 | 0.997 | 0.964 | 0.962 | 0.963 | 9.107(s) |
4 | 0.965 | 0.963 | 0.998 | 0.962 | 0.957 | 0.96 | 9.198(s) |
# wine_data 13 features => H* = 6, lr = 1e-3
framework_run(wine_data, device, k=5, H_list=list(range(1,11)), lr=1e-3, iteration=2500)
H_wineNN | Val_Accuracy | Test_Accuracy | AUC | Precision | Recall | F1 | Training Time |
---|
1 | 0.715 | 0.709 | 0.827 | 0.643 | 0.674 | 0.6 | 9.564(s) |
2 | 0.822 | 0.781 | 0.979 | 0.678 | 0.691 | 0.66 | 9.17(s) |
3 | 0.758 | 0.744 | 0.869 | 0.662 | 0.638 | 0.601 | 9.814(s) |
4 | 0.864 | 0.841 | 0.979 | 0.901 | 0.778 | 0.791 | 10.202(s) |
5 | 0.734 | 0.751 | 0.929 | 0.668 | 0.648 | 0.604 | 9.864(s) |
6 | 0.861 | 0.834 | 0.997 | 0.902 | 0.765 | 0.773 | 9.523(s) |
7 | 0.824 | 0.807 | 0.964 | 0.787 | 0.728 | 0.723 | 9.795(s) |
8 | 0.87 | 0.815 | 0.996 | 0.892 | 0.737 | 0.748 | 9.733(s) |
9 | 0.83 | 0.803 | 0.997 | 0.886 | 0.721 | 0.73 | 9.45(s) |
10 | 0.813 | 0.799 | 0.997 | 0.885 | 0.715 | 0.716 | 10.286(s) |
Train Pytorch NN for Multi-class Data Sets
def train_multi_nn_model(dataset, L1_list, L2_list, device, learning_rate=1e-2, iteration=5000):
x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)
# Cross Entropy Loss: combines nn.LogSoftmax() and nn.NLLLoss()
loss_fn = torch.nn.CrossEntropyLoss()
# The result table
# Each row preserves the related result of corresponding combination of L1 and L2
# 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
res_table = np.zeros((len(L1_list)*len(L2_list),5))
res_table_ind = 0
for L1 in L1_list:
for L2 in L2_list:
t0 = time.time()
model = torch.nn.Sequential(
torch.nn.Linear(784, L1), # input dimension = 784, hidden layer1 = L1
torch.nn.ReLU(),
torch.nn.Linear(L1, L2), # hidden layer2 = L2
torch.nn.ReLU(),
torch.nn.Linear(L2, 10), # output probability on 10 classes
).to(device)
loss_history = []
accuracy_val = []
correct = 0.0
best_accuracy_val = 0.0
best_model = None
# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for t in range(iteration):
y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
loss = loss_fn(y_train_pred, y_train)
loss_history.append(loss.item())
# Choose the max possibility index as the prediction class
y_val_pred = model(x_val)
y_val_pred_label = torch.argmax(y_val_pred, dim=1)
# Calculate accuracy on the validation set
correct = (y_val_pred_label == y_val).sum().item()
accuracy = correct / y_val.shape[0]
accuracy_val.append(accuracy)
# Save the best model and best accuracy
if accuracy > best_accuracy_val:
best_accuracy_val = accuracy
best_model = model
if t % 200 == 0:
print("iteration: %s/%s" % (t, iteration))
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Compute training time
t1 = time.time()
print("training time = %s(s)" % (t1 - t0))
# Use the best model to predict on testing dataset
y_test_pred = best_model(x_test)
y_test_pred_label = torch.argmax(y_test_pred, dim=1)
# Calculate accuracy on the testing dataset
correct = (y_test_pred_label == y_test).sum().item()
accuracy_test = correct / y_val.shape[0]
# Save in result table
res_table[res_table_ind] += np.array([L1, L2, best_accuracy_val, accuracy_test, t1-t0])
res_table_ind += 1
print("best_accuracy_val = %s, accuracy_test = %s" % (best_accuracy_val, accuracy_test))
# Plot the loss curve and varlidation accuracy curve
fig = plt.figure(figsize=(15,4))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
plt.sca(ax1)
plt.title("L1 = %s, L2 = %s" % (L1, L2))
plt.xlabel("iteration")
plt.ylabel("loss")
plt.plot(range(iteration), loss_history)
plt.sca(ax2)
plt.title("L1 = %s, L2 = %s" % (L1, L2))
plt.xlabel("iteration")
plt.ylabel("accuracy_val")
plt.plot(range(iteration), accuracy_val)
fig.tight_layout(pad=0.4, w_pad=3.0, h_pad=3.0)
plt.show()
# Return the result tables
return res_table
# Generate 5-folders
k_folders = generate_k_folders(read_multi_data(), 5)
# The trying parameters of L1 and L2
L1_list = [50, 75, 100]
L2_list = [10, 15, 20]
# The result table
# Each row preserves the related result of corresponding combination of L1 and L2
# 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
res_table = np.zeros((len(L1_list)*len(L2_list),5))
for index in range(5):
print("K-Folder index = %s" % index)
# Sum up the result table
res_table += train_multi_nn_model(k_folders[index], L1_list, L2_list, device, learning_rate=1e-4, iteration=1000)
# Compute the average table
np.round(res_table / 5, decimals=3)
L1 | L2 | Val Accuracy | Test Accuracy | Training Time |
---|
50 | 10 | 0.454 | 0.221 | 11.726(s) |
50 | 15 | 0.601 | 0.298 | 11.976(s) |
50 | 20 | 0.704 | 0.352 | 12.125(s) |
75 | 10 | 0.413 | 0.208 | 12.466(s) |
75 | 15 | 0.615 | 0.304 | 12.493(s) |
75 | 20 | 0.691 | 0.346 | 12.867(s) |
100 | 10 | 0.529 | 0.256 | 13.829(s) |
100 | 15 | 0.585 | 0.282 | 13.956(s) |
100 | 20 | 0.651 | 0.32 | 13.946(s) |
本博客文章除特别声明外,均可自由转载与引用,转载请标注原文出处:http://www.yelbee.top/index.php/archives/167/