The detailed description of task and the dataset, including the report and source code please see in Github repository

Some Utils Functions Definition

# import useful packages
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from scipy.io import loadmat

def generate_k_folders(dataset, k):
    """
    Generate K-folders
    
    Input: dataset and k
    Output: a list contains k dictionary, each dictionary contains training set, validation set and testing set
    """
    
    x = dataset["x_train"]
    y = dataset["y_train"]
    x_test = dataset["x_test"]
    y_test = dataset["y_test"]
    
    k_folders = []
    
    for i in range(k):
        if i < (k-1):     
            a = i*int(x.shape[0]/k)
            b = (i+1)*int(x.shape[0]/k)
            k_folders.append({
                "x_train": torch.cat((x[:a], x[b:]), dim=0),
                "y_train": torch.cat((y[:a], y[b:])),
                "x_val": x[a:b],
                "y_val": y[a:b],
                "x_test": x_test,
                "y_test": y_test
            })
        else:
            a = i*int(x.shape[0]/k)
            k_folders.append({
                "x_train": x[:a],
                "y_train": y[:a],
                "x_val": x[a:],
                "y_val": y[a:],
                "x_test": x_test,
                "y_test": y_test
            })
            
    return k_folders

def unzip_dataset(dataset):
    """
    upzip dataset
    """
    x_train = dataset["x_train"]
    y_train = dataset["y_train"]
    x_val = dataset["x_val"]
    y_val = dataset["y_val"]
    x_test = dataset["x_test"]
    y_test = dataset["y_test"]
    
    return x_train, y_train, x_val, y_val, x_test, y_test

def confusion_mat_evaluate(y_test, y_pred):
    """
    Evaluate the model performance by confusion matrix
    
    Input: y_predict and the truth label y_test
    Output: accuracy, precision, recall, f1
    """ 
    
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred, average="macro")
    recall = metrics.recall_score(y_test, y_pred, average="macro")
    f1 = metrics.f1_score(y_test, y_pred, average="macro")

    return accuracy, precision, recall, f1

def read_bi_data(dataset):
    """
    Read binary-class dataset
    
    Input: (numpy.array) dataset
    Output: a list consists of x_train, y_train, x_test and y_test
    """
    x_train = torch.from_numpy(dataset['train_X']).type(torch.FloatTensor).cuda()
    y_train = torch.from_numpy(dataset['train_Y']).type(torch.FloatTensor).cuda() 
    x_test = torch.from_numpy(dataset['test_X']).type(torch.FloatTensor).cuda()
    y_test = torch.from_numpy(dataset['test_Y']).type(torch.FloatTensor).cuda()
    
    dataset = {
        'x_train' : x_train,
        'y_train' : y_train,
        'x_test' : x_test,
        'y_test' : y_test
    }
    
    return dataset

def read_multi_data():
    """
    Read multi-class dataset
    
    Output: a list consists of x_train, y_train, x_test and y_test
    """
    x_train = torch.from_numpy(loadmat("datasets/multi-class/train_images.mat")["train_images"]).type(torch.FloatTensor).cuda()
    y_train = torch.from_numpy(loadmat("datasets/multi-class/train_labels.mat")["train_labels"]).type(torch.LongTensor).cuda()
    y_train = y_train.t().squeeze(dim=-1)
   

    x_test = torch.from_numpy(loadmat("datasets/multi-class/test_images.mat")["test_images"]).type(torch.FloatTensor).cuda()
    y_test = torch.from_numpy(loadmat("datasets/multi-class/test_labels.mat")["test_labels"]).type(torch.LongTensor).cuda()
    y_test = y_test.t().squeeze(dim=-1)
    
    dataset = {
        'x_train' : x_train,
        'y_train' : y_train,
        'x_test' : x_test,
        'y_test' : y_test
    }
    
    return dataset

Train Pytorch NN on Five Classifcation Data Sets

# Load data
breast_cancer_data = np.load("datasets/bi-class/breast-cancer.npz")
diabetes_data = np.load("datasets/bi-class/diabetes.npz")
digit_data = np.load("datasets/bi-class/digit.npz")
iris_data = np.load("datasets/bi-class/iris.npz")
wine_data = np.load("datasets/bi-class/wine.npz")

def train_bi_nn_model(dataset, H_list, device, learning_rate=1e-2, iteration=5000):
    
    x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)

    N = x_train.shape[0]
    D_in = x_train.shape[1]
    D_out = 1
    
    # Binary Cross Entropy Loss
    loss_fn = torch.nn.BCELoss() 
    
    # The result table
    # Each row preserves the related result of corresponding H
    # 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
    res_table = np.zeros((len(H_list),8))
    res_table_ind = 0
    
    for H in H_list:
        
        model = torch.nn.Sequential(
            torch.nn.Linear(D_in, H),
            torch.nn.ReLU(),
            torch.nn.Linear(H, D_out),
            torch.nn.Sigmoid()
        ).to(device)
        
        loss_history = []
        accuracy_val = []
        correct = 0.0
        best_accuracy_val = 0.0
        best_iteration = 0.0
        best_model = None
        
        # SGD optimizer
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
        
        t0 = time.time()
        for t in range(iteration):
            y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
            loss = loss_fn(y_train_pred, y_train)
            loss_history.append(loss.item())
            
            # Transfer the prediction result from probability to [0,1] label
            y_val_pred = model(x_val).squeeze(dim=-1)
            y_val_pred[y_val_pred >= 0.5] = 1.0
            y_val_pred[y_val_pred < 0.5] = 0.0
            
            # Calculate accuracy on the validation set
            correct = (y_val_pred == y_val).sum().item()
            accuracy = correct / y_val.shape[0]
            accuracy_val.append(accuracy)
            
            # Save the best model and best accuracy
            if accuracy > best_accuracy_val:
                best_accuracy_val = accuracy
                best_iteration = t
                best_model = model
            
            if t % 1000 == 0:   
                print("iteration: %s/%s" % (t, iteration))
    
            optimizer.zero_grad()
        
            # Backward pass
            loss.backward()
            
            # Update parameters
            optimizer.step()
        
        # Compute training time
        t1 = time.time()
        print("training time = %s(s)" % (t1 - t0))
        
        # Use the best model to predict on testing dataset
        y_test_pred = best_model(x_test).squeeze(dim=-1)

        # Detach data from the graph and transfer to numpy array
        yt = y_test.cpu().detach().numpy()
        yp = y_test_pred.cpu().detach().numpy()
        
        # Compute AUC (Area Under ROC Curve)
        auc = metrics.roc_auc_score(yt, yp)
        
        # Compute accuracy, precision, recall and f1
        yp[yp >= 0.5] = 1.0 # probalility >= 0.5 atached to positive label
        yp[yp < 0.5] = 0.0 # probability < 0.5 atached to negative label
        accuracy_test, precision, recall, f1 = confusion_mat_evaluate(yt, yp)
        
        # Save all the results in res_table
        res_table[res_table_ind] += np.array([H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, t1-t0])
        res_table_ind += 1
        
        # Output the result
        print("The best model: iteration = %s" % (best_iteration))
        print("On validation dataset: accuracy = %s" % (best_accuracy_val))
        print("On testing dataset: accuracy = %s, auc = %s, precision = %s, recall = %s, f1 = %s" \
              % (accuracy_test, auc, precision, recall, f1))

        # Plot the loss curve and varlidation accuracy curve
        fig = plt.figure(figsize=(15,4))
        ax1 = plt.subplot(1,2,1)
        ax2 = plt.subplot(1,2,2)
        plt.sca(ax1)
        plt.title("H = %s" % H)
        plt.xlabel("iteration")
        plt.ylabel("loss")
        plt.plot(range(iteration), loss_history)
        plt.sca(ax2)
        plt.title("H = %s" % H)
        plt.xlabel("iteration")
        plt.ylabel("accuracy_val")
        plt.plot(range(iteration), accuracy_val)
        plt.show()

    # Return the result tables
    return res_table

def framework_run(data, device, k, H_list, lr, iteration):
    # Generate K-folders
    k_folders = generate_k_folders(read_bi_data(breast_cancer_data), k)

    # The result table
    # Each row preserves the related result of corresponding H
    # 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
    res_table = np.zeros((len(H_list), 8))
    
    # Traversal all folders dataset
    for index in range(k):
        print("K-Folder index = %s" % index)
        # Sum up the result table
        res_table += train_bi_nn_model(k_folders[index], H_list, device, lr, iteration)

    # Return the average table
    return np.round(res_table / k, decimals=3)

Testing part

If this part running successfully, it means that all functions works well.

device = torch.device('cuda')

framework_run(breast_cancer_data, device, k=5, H_list=[5,6], lr=5e-3, iteration=500)

Training on five binary classification data sets

# breast_data features = 10 => H* = 3, lr = 1e-2
framework_run(breast_cancer_data, device, k=5, H_list=[1,2,3,4,5,6,7], lr=1e-2, iteration=2500)

H_breastNN	Val_Accuracy	Test_Accuracy	AUC	Precision	Recall	F1	Training Time
1	0.892	0.901	0.892	0.838	0.866	0.848	9.782(s)
2	0.973	0.963	0.997	0.962	0.957	0.96	9.643(s)
3	0.973	0.969	0.997	0.967	0.966	0.966	9.4(s)
4	0.965	0.966	0.998	0.964	0.962	0.963	10.327(s)
5	0.963	0.968	0.997	0.965	0.964	0.965	11.125(s)
6	0.965	0.968	0.997	0.965	0.964	0.965	11.011(s)
7	0.973	0.968	0.998	0.965	0.964	0.965	10.401(s)

# digit_data features = 64 => H* = 10, lr = 2e-3
framework_run(digit_data, device, k=5, H_list=[5,6,7,8,9,10], lr=2e-3, iteration=2500)

H_digitNN	Val_Accuracy	Test_Accuracy	AUC	Precision	Recall	F1	Training Time
5	0.914	0.859	0.987	0.818	0.8	0.793	9.569(s)
6	0.947	0.931	0.998	0.951	0.903	0.92	9.327(s)
7	0.927	0.935	0.997	0.956	0.908	0.924	9.11 (s)
8	0.941	0.929	0.998	0.951	0.9	0.918	11.058(s)
9	0.919	0.922	0.997	0.948	0.89	0.907	11.388(s)
10	0.956	0.94	0.997	0.958	0.915	0.93	10.615(s)

# diabetes_data features = 8 => H* = 4, lr = 5e-2
framework_run(diabetes_data, device, k=5, H_list=[1,2,3,4,5,6,7,8], lr=5e-2, iteration=2500)

H_diabetesNN	Val_Accuracy	Test_Accuracy	AUC	Precision	Recall	F1	Training Time
1	0.926	0.9	0.897	0.833	0.867	0.846	9.412(s)
2	0.973	0.965	0.997	0.961	0.961	0.961	9.183(s)
3	0.967	0.968	0.997	0.964	0.966	0.965	9.584(s)
4	0.971	0.969	0.997	0.966	0.967	0.966	9.897(s)
5	0.973	0.968	0.997	0.965	0.965	0.965	9.909(s)
6	0.976	0.965	0.997	0.961	0.962	0.961	10.188(s)
7	0.971	0.966	0.997	0.963	0.963	0.963	10.127(s)
8	0.971	0.968	0.997	0.963	0.966	0.965	9.489(s)

# iris_data features = 4 => H* = 3, lr = 1e-2
framework_run(iris_data, device, k=5, H_list=[1,2,3,4], lr=1e-2, iteration=2500)

H_irisNN	Val_Accuracy	Test_Accuracy	AUC	Precision	Recall	F1	Training Time
1	0.886	0.903	0.891	0.837	0.87	0.85	9.465(s)
2	0.962	0.965	0.997	0.963	0.959	0.961	9.192(s)
3	0.963	0.966	0.997	0.964	0.962	0.963	9.107(s)
4	0.965	0.963	0.998	0.962	0.957	0.96	9.198(s)

# wine_data 13 features => H* = 6, lr = 1e-3 
framework_run(wine_data, device, k=5, H_list=list(range(1,11)), lr=1e-3, iteration=2500)

H_wineNN	Val_Accuracy	Test_Accuracy	AUC	Precision	Recall	F1	Training Time
1	0.715	0.709	0.827	0.643	0.674	0.6	9.564(s)
2	0.822	0.781	0.979	0.678	0.691	0.66	9.17(s)
3	0.758	0.744	0.869	0.662	0.638	0.601	9.814(s)
4	0.864	0.841	0.979	0.901	0.778	0.791	10.202(s)
5	0.734	0.751	0.929	0.668	0.648	0.604	9.864(s)
6	0.861	0.834	0.997	0.902	0.765	0.773	9.523(s)
7	0.824	0.807	0.964	0.787	0.728	0.723	9.795(s)
8	0.87	0.815	0.996	0.892	0.737	0.748	9.733(s)
9	0.83	0.803	0.997	0.886	0.721	0.73	9.45(s)
10	0.813	0.799	0.997	0.885	0.715	0.716	10.286(s)

Train Pytorch NN for Multi-class Data Sets

def train_multi_nn_model(dataset, L1_list, L2_list, device, learning_rate=1e-2, iteration=5000):
    
    x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)
    
    # Cross Entropy Loss: combines nn.LogSoftmax() and nn.NLLLoss()
    loss_fn = torch.nn.CrossEntropyLoss() 
    
    # The result table
    # Each row preserves the related result of corresponding combination of L1 and L2
    # 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
    res_table = np.zeros((len(L1_list)*len(L2_list),5))
    res_table_ind = 0
    
    for L1 in L1_list:
        for L2 in L2_list:
            t0 = time.time()
            model = torch.nn.Sequential(
                torch.nn.Linear(784, L1), # input dimension = 784, hidden layer1 = L1
                torch.nn.ReLU(),
                torch.nn.Linear(L1, L2), # hidden layer2 = L2
                torch.nn.ReLU(),
                torch.nn.Linear(L2, 10), # output probability on 10 classes
            ).to(device)
        
            loss_history = []
            accuracy_val = []
            correct = 0.0
            best_accuracy_val = 0.0
            best_model = None
            
            # SGD optimizer
            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
            
            for t in range(iteration):
                y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
                loss = loss_fn(y_train_pred, y_train)
                loss_history.append(loss.item())
     
                # Choose the max possibility index as the prediction class
                y_val_pred = model(x_val)
                y_val_pred_label = torch.argmax(y_val_pred, dim=1)
                
                # Calculate accuracy on the validation set
                correct = (y_val_pred_label == y_val).sum().item()
                accuracy = correct / y_val.shape[0]
                accuracy_val.append(accuracy)
            
                # Save the best model and best accuracy
                if accuracy > best_accuracy_val:
                    best_accuracy_val = accuracy
                    best_model = model
            
                if t % 200 == 0:   
                    print("iteration: %s/%s" % (t, iteration))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
            # Compute training time
            t1 = time.time()
            print("training time = %s(s)" % (t1 - t0))
        
            # Use the best model to predict on testing dataset
            y_test_pred = best_model(x_test)
            y_test_pred_label = torch.argmax(y_test_pred, dim=1)
                
            # Calculate accuracy on the testing dataset
            correct = (y_test_pred_label == y_test).sum().item()
            accuracy_test = correct / y_val.shape[0]
            
            # Save in result table
            res_table[res_table_ind] += np.array([L1, L2, best_accuracy_val, accuracy_test, t1-t0])
            res_table_ind += 1
            
            print("best_accuracy_val = %s, accuracy_test = %s" % (best_accuracy_val, accuracy_test))

            
            # Plot the loss curve and varlidation accuracy curve
            fig = plt.figure(figsize=(15,4))
            ax1 = plt.subplot(1,2,1)
            ax2 = plt.subplot(1,2,2)
            plt.sca(ax1)
            plt.title("L1 = %s, L2 = %s" % (L1, L2))
            plt.xlabel("iteration")
            plt.ylabel("loss")
            plt.plot(range(iteration), loss_history)
            plt.sca(ax2)
            plt.title("L1 = %s, L2 = %s" % (L1, L2))
            plt.xlabel("iteration")
            plt.ylabel("accuracy_val")
            plt.plot(range(iteration), accuracy_val)
            fig.tight_layout(pad=0.4, w_pad=3.0, h_pad=3.0)            
            plt.show()
            
    # Return the result tables
    return res_table

# Generate 5-folders
k_folders = generate_k_folders(read_multi_data(), 5)

# The trying parameters of L1 and L2
L1_list = [50, 75, 100]
L2_list = [10, 15, 20]

# The result table
# Each row preserves the related result of corresponding combination of L1 and L2
# 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
res_table = np.zeros((len(L1_list)*len(L2_list),5))

for index in range(5):
    print("K-Folder index = %s" % index)
    # Sum up the result table
    res_table += train_multi_nn_model(k_folders[index], L1_list, L2_list, device, learning_rate=1e-4, iteration=1000)

# Compute the average table
np.round(res_table / 5, decimals=3)

L1	L2	Val Accuracy	Test Accuracy	Training Time
50	10	0.454	0.221	11.726(s)
50	15	0.601	0.298	11.976(s)
50	20	0.704	0.352	12.125(s)
75	10	0.413	0.208	12.466(s)
75	15	0.615	0.304	12.493(s)
75	20	0.691	0.346	12.867(s)
100	10	0.529	0.256	13.829(s)
100	15	0.585	0.282	13.956(s)
100	20	0.651	0.32	13.946(s)