Homework no.2#

Student name: Mohammad Amin Dadgar
Student Id: 4003624016

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from sklearn.metrics import roc_curve

Q1#

Using the training set below answer these question. \begin{equation} \text{Training Set }= {(0.5, 2), (0.7, 3),(1, 8), (1.5, 10), (2, 10.3), (2.7, 5.5), (3, 8), (3.3, 11), (4, 10.2), (4.3, 12)} \end{equation} Each data is \((x_i, y_i)\)

P1#

fit the model \(y=ax^2+bx+c\) on the given dataset and name the model as model_1. This means \begin{equation} \phi_1(x) = \begin{bmatrix} 1 \ x \ x^2 \ \end{bmatrix} \end{equation} And the weights matrix is \begin{equation} W_1 = \begin{bmatrix} c \ b \ a \ \end{bmatrix} \end{equation}

def model_1(training_set):
    """
    Linear Regression using quadratic function as the kernel function

    
    Parameters:
    -----------
    training_set : array of tuples
        array of `x_i` and `y_i`
        `x` the input variables
        `y` are the output variables
        
    Returns:
    ----------
    w : matrix_like
        the matrix of learned weights
    """
    ## Extract the X values and covert them using kernel function
    X = np.array(training_set)[:, 0]
    X = np.matrix(X).T
    
    ## apply the quadratic function to input data
    X_phi = kernel_Quadratic(X)
    ## extract the outputs
    Y = np.array(training_set)[:, 1]
    Y = np.matrix(Y)

    ## the `A` and `b` parameters of a Linear Regression function
    A = X_phi @ X_phi.T
    b = np.multiply(Y, X_phi)
    b = np.sum(b, axis=1)
    
    w = np.dot(np.linalg.inv(A), b)
    return w
    
    
def kernel_Quadratic(X):
    """
    The quadratic kernel function
    
    Parameters:
    -----------
    X : array_like
        the variables to be converted
   
    Returns:
    --------
    phi : array_like
        the quadratic function applied to `X`
    """
    ## first convert to a numpy array and copy it
    X_new = np.array(X).copy()
    
    ones_arr = np.ones(len(X))
    ones_arr = np.matrix(ones_arr)
    
    X_power_2 = np.power(X_new, 2)

    phi = np.vstack((ones_arr, X_new.T, X_power_2.T))
    
    return phi
train = [(0.5,2),(0.7,3),(1,8),(1.5,10),(2,10.3),(2.7,5.5),(3,8),(3.3,11),(4,10.2),(4.3,12)]
train = np.array(train)
model1_weight = model_1(train)
model1_weight
matrix([[ 1.94132333],
        [ 4.13814418],
        [-0.49656258]])

P2#

fit the model \(y=ax^3+bx^2+cx+d\) on the given dataset and name the model as model_2. This means \begin{equation} \phi_2(x) = \begin{bmatrix} 1 \ x \ x^2 \ x^3 \ \end{bmatrix} \end{equation} And the weights matrix is \begin{equation} W_2 = \begin{bmatrix} d \ c \ b \ a \ \end{bmatrix} \end{equation}

def mode_2(training_set):
    """
    Linear Regression function using ```ax^3 + bx^2 + cx + d``` as kernel function
    
    Parameters:
    -----------
    training_set : array of tuples
        array of `x_i` and `y_i`
        `x` the input variables
        `y` are the output variables
        
    Returns:
    ----------
    w : matrix_like
        the matrix of learned weights
    """

    ## Extract the X values and covert them using kernel function
    X = np.array(training_set)[:, 0]
    X = np.matrix(X).T
    
    ## apply the quadratic function to input data
    X_phi = kernel_cubic(X)
    ## extract the outputs
    Y = np.array(training_set)[:, 1]
    Y = np.matrix(Y)

    ## the `A` and `b` parameters of a Linear Regression function
    A = X_phi @ X_phi.T
    b = np.multiply(Y, X_phi)
    b = np.sum(b, axis=1)
    
    w = np.dot(np.linalg.inv(A), b)
    return w

def kernel_cubic(X):
    """
    The cubic function
    
    
    Parameters:
    -----------
    X : array_like
        the variables to be converted
        
    Returns:
    --------
    phi : array_like
        the quadratic function applied to `X`
    """
    
    ## first convert to a numpy array and copy it
    X_new = np.array(X).copy()
    
    ones_arr = np.ones(len(X))
    ones_arr = np.matrix(ones_arr)
    
    X_power_2 = np.power(X_new, 2)
    X_power_3 = np.power(X_new, 3)

    phi = np.vstack((ones_arr, X_new.T, X_power_2.T, X_power_3.T))
    
    return phi
model2_weight = mode_2(train)
model2_weight
matrix([[-7.33397387],
        [22.63114975],
        [-9.53106286],
        [ 1.24181752]])

P3#

Calculate the MSE for \(\text{model}_1\) and \(\text{model}_2\) using the dataset below \begin{equation} \text{Test set }= {(3.8, 12.9), (1, 9),(2, 8), (1.9, 9), (1.5, 7), (1.6, 9.5), (1.2, 10), (3.3, 11), (1.5, 11), (4, 6)} \end{equation}

test = np.array(((3.8, 12.9), (1, 9),(2, 8), (1.9, 9), 
                    (1.5, 7), (1.6, 9.5), (1.2, 10), (3.3, 11),
                    (1.5, 11), (4, 6)))
test
array([[ 3.8, 12.9],
       [ 1. ,  9. ],
       [ 2. ,  8. ],
       [ 1.9,  9. ],
       [ 1.5,  7. ],
       [ 1.6,  9.5],
       [ 1.2, 10. ],
       [ 3.3, 11. ],
       [ 1.5, 11. ],
       [ 4. ,  6. ]])
## applying model_1
def model_predict(weights, data, **kwargs):
    """
    predict the output of a model in this question 
    using the trained weights for the data
    
    the desired model can be selected using the quadratic or the cubic parameter
    
    Parameters:
    ------------
    weights : array_like
        the learned parameters for model1
    data : array or scaler
        the desired data for prediction task
    kwargs : 
        quadratics : use the model_1 in this question 
        cubic : use the model_2 in this question
        
    **NOTE:** one of the parameters can be choosed!
    **NOTE:** any value can be passed to kwargs, there is no difference in that. (because the value would not be used)
    
    Returns:
    --------
    prediction : array or scaler
        the predicted output value for the input data
    """
    
    ## first we need to apply the transformation for the data 
    ## as the learned parameters are trained using the transformation
    if 'quadratics' in kwargs and 'cubic' in kwargs:
        raise ValueError("Both parameters cannot be chosen! They have different behaviour")
    elif 'quadratics' in kwargs:
        data_phi = kernel_Quadratic(data) 
    elif 'cubic' in kwargs:
        data_phi = kernel_cubic(data) 
    else:
        raise ValueError("no parameters cubic or quadratics was chosen!")
        
    prediction = np.matrix(weights).T @ data_phi
    
    return prediction
def calculate_MSE(true_outputs, predicted_outputs):
    """
    calculate the Mean Squared Error for the predicted data
    
    Parameters:
    ------------
    true_outputs : array_like 
        the actual output of the data
    predicted_outputs : array_like
        the predicted outputs for the data
        
    Returns:
    ---------
    mse_value : float
        the mean squared error value
    """
    
    ## the difference 
    diff = predicted_outputs - true_outputs
    ## squeare the difference
    diff = np.power(diff, 2)
    
    ## find the mean value
    mse_value = np.mean(diff)
    
    return mse_value
    
pred_model1_test = model_predict(model1_weight, test[:, 0], quadratics='True')
pred_model2_test = model_predict(model2_weight, test[:, 0], cubic='True')
mse_model1 = calculate_MSE(test[:, 1], pred_model1_test)
mse_model2 = calculate_MSE(test[:, 1], pred_model2_test)
print(f'MSE for model1 is {mse_model1}')
print(f'MSE for model2 is {mse_model2}')
MSE for model1 is 7.496789449073051
MSE for model2 is 5.812331603917104

P4#

To find the T value for our datasets we need to use the equation below \begin{equation} T = \frac{mean_1 - mean_2}{\frac{std(diffrences)}{\sqrt{n}}} \end{equation} refrence: https://www.investopedia.com/terms/t/t-test.asp

def calculate_T_value(dataset1, dataset2):
    """
    Find the T-value for two datasets
    
    Parameters:
    ------------
    dataset1 : array_of_Tuples
        (`x_i`, `y_i`) data, `x` representing data and `y` representig label
    dataset2 : array_of_Tuples
        (`x_i`, `y_i`) data, `x` representing data and `y` representig label

    Returns:
    ---------
    T_value : float
        the T value for datasets
    """
    ## convert to numpy array for ease of use
    set1 = np.array(dataset1)
    set2 = np.array(dataset2)
    
    ## calculate the medians
    m1 = np.mean(set1[:, 1])
    m2 = np.mean(set2[:, 1])
    
    ## find the differences
    diff = set1[:, 1] - set2[:, 1]
    
    ## below the division
    below = np.std(diff) / np.sqrt(len(diff))
    
    above = m1 - m2
    
    T_value = above / below
    
    return T_value
calculate_T_value(train, test)
-0.9350212787977713
z = (train[:, 1] - np.mean(train[:, 1]- test[:, 1]) ) / np.std(train[:, 1] - test[:, 1])
np.mean(z)
2.0609315717988514

As we can see the value for T-test is about -0.935 and it is not out of the [-1.96, 1.96].

Q2#

Explain the friedman test.

Friedman test is one another tests used in statistics to measure the different values between different tests. Friedman test should have three assumptions: \((1)\) Data in each test must be continues \((2)\) Data retrieved in each test must be random, for example choosing random patients in order to analyze a drug effect. If the data wasn’t randomly selected, We would have bias in results and results cannot be trusted. \((3)\) Data retrieved in each group must be enough. Having few data samples could occur bias in results.

To explain this test in more detail we can say this is another version of analyze of the variance test (ANOVA). In ANOVA we analyze a value in different dependent test but in friedman test, the rank of the data is compared and after that the chi-square and p values are calculated. Using the table of chi-squared distribution and comparing it to the computed chi-square we can find out the null hypothesis is rejected or not. If the value of chi-square is less than the founded value in chi-square table then the null hypothesis is not rejected and there is no difference between the responsiveness at different time points.

Q3#

Use housing data in Homework no.1 and the linear regression model. The kernel function is \begin{equation} \phi_j(x) = exp[-\frac{1}{2} \beta (x-m_j)^T(x-m_j)] \end{equation} Find the parameters of the weight \(w\).

columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD','TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

df_housing = pd.read_csv('../HW1/hw1_data/housing/housing.txt', delimiter=' +'
            , engine='python', names=columns, index_col=False)
df_housing.head()
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT MEDV
0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222.0 18.7 396.90 5.33 36.2
df_housing.shape
(506, 14)
## removing label
df_X = df_housing[columns[:-1]]
df_X.head()
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296.0 15.3 396.90 4.98
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242.0 17.8 396.90 9.14
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242.0 17.8 392.83 4.03
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222.0 18.7 394.63 2.94
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222.0 18.7 396.90 5.33

P1#

Using \begin{equation} \phi_j(x) = exp[-\frac{1}{2} \beta (x-m_j)^T(x-m_j)] \end{equation}

!python3 scripts/main2_4_1.py learning_rate=0 iterations=500
Linear Regression on classification dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
[[ 1.01282826  4.01649081  1.65647289 13.2619483   1.15354131  1.06494048
   1.07111885  1.00458072  4.40342042  1.07956694]
 [ 1.06031624  6.10072219  1.83830247 20.13251952  1.33604887  1.0121326
   1.08259325  1.02583593  6.68167274  1.22564921]]
Test set head :
[[ 1.1035626   3.90739883  1.1927901   6.89767518  1.18865428  1.18089521
   1.22723299  1.08127911  3.65352341  1.19721517]
 [ 1.03688092  3.65448769  1.4859806  10.54206949  1.11699903  1.09818608
   1.14145285  1.00633977  3.86608794  1.12006533]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error:
 [61.89315891]
 Test Error: 
  5184.793971193906
 Final Weights of the closed form training
[[ 55.43584553]
 [-33.28978507]
 [ -3.62285238]
 [ -3.05717393]
 [ 24.25689617]
 [ 16.36590991]
 [-47.56391815]
 [-10.09092706]
 [ 38.31770048]
 [ -3.69446901]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[30.25776804]
  Test Error:
   MSE:4502459.01443338

 Final wights of Incremental Learning
[[ -2.93593411]
 [-24.61501897]
 [ 17.80302619]
 [ 37.11704268]
 [  6.64056529]
 [ -1.38367835]
 [ -3.17517381]
 [ -0.0943037 ]
 [ -4.28366212]
 [ -7.53687668]]
----------------------------------------------------------------------

P2#

And using the other kernel function \begin{equation} \phi_j(x) = \sigma{a||x-m_j||} \end{equation}

!python3 scripts/main2_4_1.py learning_rate=0 iterations=500
Linear Regression on classification dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
[[ 1.15354131  4.40342042  1.00458072 13.2619483   1.06494048  4.01649081
   1.01282826  1.65647289  1.07111885  1.07956694]
 [ 1.33604887  6.68167274  1.02583593 20.13251952  1.0121326   6.10072219
   1.06031624  1.83830247  1.08259325  1.22564921]]
Test set head :
[[ 1.18865428  3.65352341  1.08127911  6.89767518  1.18089521  3.90739883
   1.1035626   1.1927901   1.22723299  1.19721517]
 [ 1.11699903  3.86608794  1.00633977 10.54206949  1.09818608  3.65448769
   1.03688092  1.4859806   1.14145285  1.12006533]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error:
 [61.89315891]
 Test Error: 
  5184.79397154851
 Final Weights of the closed form training
[[ 24.25689617]
 [ 38.31770048]
 [-10.09092705]
 [ -3.05717393]
 [ 16.36590991]
 [-33.28978507]
 [ 55.43584553]
 [ -3.62285238]
 [-47.56391815]
 [ -3.69446901]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[30.25776804]
  Test Error:
   MSE:4502459.014433393

 Final wights of Incremental Learning
[[  6.64056529]
 [ -4.28366212]
 [ -0.0943037 ]
 [ 37.11704268]
 [ -1.38367835]
 [-24.61501897]
 [ -2.93593411]
 [ 17.80302619]
 [ -3.17517381]
 [ -7.53687668]]
----------------------------------------------------------------------

We’ve set the \(\beta\) and \(\alpha\) value to \(0.00001\). Setting these hyperparameters to higher values near and more than \(1\) can occur overflow due to very high values. So the best values can be around \(10^{-4}\).

Q4#

P1#

Plot the training and the test data and answer the question that “Are the data linearly separable?”

cols = ['X', 'Y', 'label']
train_ds = pd.read_csv('./hw2_data/classification_trn.txt',
                       delimiter=' +',
                      names=cols,
                      index_col=False,
                      engine='python')
train_ds.shape
(250, 3)
train_ds.head()
X Y label
0 0.462766 0.339111 1
1 0.779089 0.894436 1
2 0.067501 0.846257 1
3 -0.048451 0.142165 1
4 -0.529524 0.792441 1
cols = ['X', 'Y', 'label']
test_ds = pd.read_csv('./hw2_data/classification_tst.txt',
                       delimiter=' +',
                      names=cols,
                      index_col=False,
                      engine='python')
test_ds.shape
(100, 3)
## plotting the training data
plt.figure(figsize=(8,8))
sns.scatterplot(data=train_ds, x='X', y='Y',
                style='label',
                hue='label')
plt.text(y=2.3, x=-2, s='Traning Data', size=16, bbox=dict(boxstyle='round',
                                                          color='c'))
plt.show()
../_images/main_42_0.png
## plotting the test data
plt.figure(figsize=(8,8))
sns.scatterplot(data=test_ds, x='X', y='Y',
                style='label',
                hue='label')
plt.text(y=2, x=-0.5, s='Test Data', size=16, bbox=dict(boxstyle='round',
                                                          color='c'))
plt.show()
../_images/main_43_0.png

It is possible to see that the data is not linearly separable in training set.

P2, P3, P4#

Question: Find the gradient descent formulas in order to maximize the likelihood of the regression and using the initial weights as \(1\) apply the learning rate \(\frac{1}{t}\). (\(t\) is the time steps \(1, 2, ..., T\))

\begin{equation} W^{new} \leftarrow W^{old} + \alpha(i)\sum_{i=1}^{n} \big( y_i - f(x_i, W^{old})\big) x_{i} \ \text{Where }\alpha(i) = \frac{1}{i} \text{ For i=1, 2, …} \end{equation}

The learning_rate=0 corresponds to \(\frac{2}{t}\) and the learning_rate=1 corresponds to \(\frac{1}{\sqrt{t}}\). Also the iterations can be adjusted using the argument iterations.

The errors are plotted in images that are saved within the HW2 folder.

!python3 scripts/main2_5.py learning_rate=0 iterations=500
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=500 iterations
 Train Errors: 
  MSE: [21.179730570075883]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[2.66828095]
 [2.44634553]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.7290789712448449, 0.07]]
  Test Error:
   MSE:0.07

 Final wights of Incremental Learning
[4.0564108  3.72600682]
----------------------------------------------------------------------

P5#

The learning rate is \(\frac{1}{t}\) in the cell below

!python3 scripts/main2_5.py learning_rate=1 iterations=500
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=500 iterations
 Train Errors: 
  MSE: [21.514205312276683]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[3.81875707]
 [3.33814397]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.7268892378981953, 0.07]]
  Test Error:
   MSE:0.07

 Final wights of Incremental Learning
[13.60834642 14.01209199]
----------------------------------------------------------------------

And using static learning rate 0.5 in below

!python3 scripts/main2_5.py learning_rate=0.5 iterations=500
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=500 iterations
 Train Errors: 
  MSE: [22.45222254661859]
 Test Error: 
  MSE: 8.0

 Final wights of Offline Gradient Descent Learning
[[6.0130267 ]
 [4.98060682]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.7357933632670419, 0.07]]
  Test Error:
   MSE:0.07

 Final wights of Incremental Learning
[75.46032218 77.3744382 ]
----------------------------------------------------------------------

In high iteration counts there is no obvious difference between different learning rates (But it just seems the \(\frac{2}{t}\) learning rate is doing better). To find the difference we used 50 iterations in below

!python3 scripts/main2_5.py learning_rate=0 iterations=50
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=50 iterations
 Train Errors: 
  MSE: [21.321777405908414]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[2.34004787]
 [2.17965517]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.743316177460033, 0.07]]
  Test Error:
   MSE:0.07

 Final wights of Incremental Learning
[2.70364651 2.3125385 ]
----------------------------------------------------------------------
!python3 scripts/main2_5.py learning_rate=1 iterations=50
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=50 iterations
 Train Errors: 
  MSE: [21.222015553883374]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[2.59108759]
 [2.37625462]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.7267833093158856, 0.09]]
  Test Error:
   MSE:0.09

 Final wights of Incremental Learning
[4.70243878 5.20285994]
----------------------------------------------------------------------
## And static learning rate
!python3 scripts/main2_5.py learning_rate=0.5 iterations=500
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=500 iterations
 Train Errors: 
  MSE: [22.45222254661859]
 Test Error: 
  MSE: 8.0

 Final wights of Offline Gradient Descent Learning
[[6.0130267 ]
 [4.98060682]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[0.7357933632670419, 0.07]]
  Test Error:
   MSE:0.07

 Final wights of Incremental Learning
[75.46032218 77.3744382 ]
----------------------------------------------------------------------

We lower the iteration counts more to see better the differences

!python3 scripts/main2_5.py learning_rate=0 iterations=5
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=5 iterations
 Train Errors: 
  MSE: [21.954185815907675]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[1.90462057]
 [1.81567883]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[1.0534808105827163, 0.32000000000000006]]
  Test Error:
   MSE:0.32000000000000006

 Final wights of Incremental Learning
[ 0.82746549 -0.11811742]
----------------------------------------------------------------------
!python3 scripts/main2_5.py learning_rate=1 iterations=5
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=5 iterations
 Train Errors: 
  MSE: [22.70260229673039]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[1.6710624 ]
 [1.60624242]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[1.0748037987641088, 0.28]]
  Test Error:
   MSE:0.28

 Final wights of Incremental Learning
[ 0.5997547  -0.00995924]
----------------------------------------------------------------------
!python3 scripts/main2_5.py learning_rate=0.5 iterations=5
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
          X         Y
0  0.462766  0.339111
1  0.779089  0.894436
2  0.067501  0.846257
3 -0.048451  0.142165
4 -0.529524  0.792441
Test set head :
          X         Y
0 -0.521659  0.363769
1 -0.993098 -0.295949
2  2.083345  1.483410
3 -0.866462  0.166879
4 -1.182918 -0.075403
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.18533821040260134
 Test Error: 
  0.28
 Final Weights of the closed form training
[-0.10041674  0.74624728]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=5 iterations
 Train Errors: 
  MSE: [23.278982955755232]
 Test Error: 
  MSE: 7.0

 Final wights of Offline Gradient Descent Learning
[[1.54112266]
 [1.4904238 ]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[1.1067532327276703, 0.23]]
  Test Error:
   MSE:0.23

 Final wights of Incremental Learning
[0.42850402 0.02784965]
----------------------------------------------------------------------

In the results we can see that there may be a little difference and the static learning rate 0.5 show the better results on Test Error in Incremental learning Gradient Descent.

P6#

Comparing the Incremental version of logisitic regression with the normal version shows that it can achive better results with high iteration counts.

Q5#

The data is used from the question 4 and Nive bayes method will be applied.

def probability_normal_distribution(X, mu, sigma):
    """
    The probability value for normal distribution function
    
    Parameters:
    ------------
    x : array_like
        the input data
    mu : float
        the mean value given
    sigma : float
        the variance given 
        
    Returns:
    --------
    probability : float
        the probability value for the x input values 
    """
    ## we've divided the equation in two parts
    p1 = 1 / (np.sqrt(np.pi * 2) * sigma)
    p2 = np.exp(-0.5 * ((X-mu) / sigma)**2 )
    
    probability = p1 * p2
    
    return probability
    
def find_MLE_Normal_distro(X):
    """
    the maximum likelihood estimation for parameters of normal distribution
    the parameters for normal distribution is covariance matrix and mean vector
    
    Parameters:
    ------------
    X : array_like
        the X input data vectors
    
    Returns:
    ---------
    mu : array_like
        the means vector
    variance : matrix_like
        the matrix representing the covariance

    """
    X = np.array(X)
    
    mu = (1 / len(X)) * np.sum(X)
    
    ## some changes was made to the ML estimation of variance
    ## because of dataset shape
    variance = np.sqrt((1 / len(X)) * np.sum((X - mu)**2))    
    
    return mu, variance
cols = ['X', 'Y', 'label']
train_ds = pd.read_csv('./hw2_data/classification_trn.txt',
                       delimiter=' +',
                      names=cols,
                      index_col=False,
                      engine='python')

test_ds = pd.read_csv('./hw2_data/classification_tst.txt',
                       delimiter=' +',
                      names=cols,
                      index_col=False,
                      engine='python')

In Naive Bayes the class conditional density can be computed by \begin{equation} p(x|c=1) = \prod_{i=1}^{d} p(x_i | c=1) \ p(x|c=0) = \prod_{i=1}^{d} p(x_i | c=0) \end{equation}

## divide the dataset into 0 and 1 labels
def estimate_MLE_NB(X, Y, features_arr):
    """
    estimate the Maximum likelihood parameters for naive bayes method
    in detail: in naive bayes we have a parameter for each dimension and each class

    Parameters:
    ------------
    X : array_like
        the input data (a pandas dataframe is prefered)
    Y : array_like
        the labels for each `X` inputs
    features : array_like
        the string array for the name of each label in training data (dimensions)

    Returns:
    --------
    MLE_estimates : dictionary 
        the estimated parameters as a dictionary
    """
    ## dictionary of maximum likelihood estimations
    MLE_estimates = {}
    for feature in features_arr:
        for label in [0, 1]:
            mu, var = find_MLE_Normal_distro(X[Y == label][feature])
            ## each feature of class estimation
            MLE_estimates[f'{feature},{label}'] = [mu, var]
    return MLE_estimates
##################### Code is Implemented in the function above #####################

# ## dictionary of maximum likelihood estimations
# MLE_estimates = {}
# for feature in ['X', 'Y']:
#     for label in [0, 1]:
#         mu, var = find_MLE_Normal_distro(train_ds[train_ds.label == label][feature])
#         ## each feature of class estimation
#         MLE_estimates[f'{feature},{label}'] = [mu, var]


MLE_estimates = estimate_MLE_NB(train_ds[['X', 'Y']], train_ds[['label']].values, ['X', 'Y'])
MLE_estimates
{'X,0': [-0.9766294551726316, 0.4043628347745132],
 'X,1': [0.01564038684774194, 0.574343819845795],
 'Y,0': [-0.04355836269157895, 0.42688761701060846],
 'Y,1': [0.9415618305584516, 0.5324787748723679]}
def predict_NB(X, MLE_estimations, features_arr):
    """
    predict the class Using Naive bayes algorithm
    
    Parameters:
    ------------
    X : pandas dataframe
        Input data, `X` and `Y` should be the features
    MLE_estimations : dictionary
        Maximum likelihood estimations corresponding to each dimension and class as a dictionary with keys like `X,0`
        meaning X as first feature and 0 as first class 
        
    Returns:
    ---------
    prediction : array_like
        the array representing the probability of each class for data
    """
    ## the predicted value for each data
    prediction = []
    for idx in range(len(X)):
        ## initialize Class probability array
        class_p_arr = []
        for i in [0, 1]:
            ## multiply probability for each dimension
            p = 1
            for feature in features_arr:
                mu, var = MLE_estimations[f'{feature},{i}']
                p = p * probability_normal_distribution(X.iloc[idx][feature],mu, var)
            class_p_arr.append(p)
        ## save each class probability of each data
        prediction.append(class_p_arr)
    
    ## for ease of use convert to numpy
    prediction = np.array(prediction)
    return prediction
results  = predict_NB(train_ds[['X', 'Y']], MLE_estimates, ['X', 'Y'])
results.shape
(250, 2)
results[:4]
array([[1.09327234e-03, 2.02663171e-01],
       [6.64630522e-06, 2.14271447e-01],
       [3.74476058e-03, 5.10057518e-01],
       [6.01875199e-02, 1.67582766e-01]])
y_pred = (results[:, 0] < results[:, 1]).astype(int)
## Accuracy
(y_pred == train_ds[['label']].T.values).sum() / len(y_pred)
0.9
def probability_multivariate_normal_distribution(X,mu, sigma):
    """
    The probability value for multivariate normal distribution function
    
    Parameters:
    ------------
    x : array_like
        the input data
    mu : array_like
        the means vector
    sigma : matrix_like
        the matrix representing the covariance
        
    Returns:
    --------
    probability : float
        the probability value for the x input values 
    """
    dimension = len(mu)
    
    ## divide the formula into 2 parts
    p1 = 1 / np.sqrt(((2*np.pi)**dimension) * np.linalg.det(sigma))
    
    ## some changes was made to the equation
    ## because of dataset shape
    p2 = np.exp(-1/2 * (np.dot(X-mu, np.linalg.inv(sigma) @ (X-mu).T)))
    
    
    probability = p1 * p2
    
    return probability

def roc_curve(y, y_pred):
    """
    draw the roc curve
    
    Parameters:
    ------------
    y : array_like
        the actual output for the data
    y_pred : array_like
        the predicted output for data

    """

    scores = []
    ## find the values for each threshold
    ## TP -> True Positive
    ## TN -> True Negative
    ## FP -> False Positive
    ## FN -> False Negative
    for threshold in np.linspace(0, 1, 101):
        TP = ((y_pred >= threshold) & (y == 1)).sum()
        TN = ((y_pred <= threshold) & (y == 0)).sum()
        FP = ((y_pred > threshold) & (y == 0)).sum()
        FN = ((y_pred < threshold) & (y == 1)).sum()

        scores.append([threshold, TP, TN, FP, FN])

    df_cols = ['threshold', 'TP', 'TN', 'FP', 'FN']
    df_scores = pd.DataFrame(scores, columns=df_cols)

    ## sensitivity and specificity
    ## True Positive rate = sensitivity
    df_scores['sens'] = df_scores.TP / (df_scores.TP + df_scores.FN)
    ## False Positive rate = 1 - specificity
    df_scores['1-spec'] = df_scores.FP / (df_scores.FP + df_scores.TN)
    
    plt.plot(df_scores['sens'], df_scores['1-spec'])
    plt.xlabel('sens')
    plt.ylabel('1-spec')
    plt.title(f'ROC Curve')
    plt.show()
roc_curve(train_ds[['label']].values, y_pred)
../_images/main_77_0.png

Now we’re going to apply the estimations on test set.

## test set
results = predict_NB(test_ds[['X', 'Y']], MLE_estimates, ['X', 'Y'])
y_pred = (results[:, 0] < results[:, 1]).astype(int)
## test accuracy
(y_pred == test_ds[['label']].T.values).sum() / len(y_pred)
0.91
roc_curve(test_ds[['label']].values, y_pred)
../_images/main_81_0.png

Q6#

Use Quadratic Discriminant Analysis for classification data. The functions for question 5 can be used.

## we need to find the mean and covariance of each class

## divide into class 0 and class 1
train_ds_C0 = train_ds[train_ds.label == 0]
train_ds_C1 = train_ds[train_ds.label == 1]

train_ds_C0_X = train_ds_C0[['X', 'Y']]
train_ds_C0_Y = train_ds_C0[['label']]

train_ds_C1_X = train_ds_C1[['X', 'Y']]
train_ds_C1_Y = train_ds_C1[['label']]

mu0, sigma0 = find_MLE_Normal_distro(train_ds_C0_X.T)
mu1, sigma1 = find_MLE_Normal_distro(train_ds_C1_X.T)
## take a look at properties found
print("Class 0")
print(f'Covariance:\n{sigma0}\nMean vector:\n{mu0}\n')
print('--'*10)
print("Class 1")
print(f'Covariance:\n{sigma1}\nMean vector:\n{mu1}\n')
Class 0
Covariance:
467.38705057468275
Mean vector:
-48.45892134854999

--------------------
Class 1
Covariance:
917.6585102015431
Mean vector:
74.18317184898

Implementing The functions above and some other in a Class.

class QDA():
    """
    Quadratic Discriminant Analysis Class
    """
    def __init__(self):
        self.mu0 = None
        self.mu1 = None
        self.sigma0 = None
        self.sigma1 = None
    
    __
    def predict(self, X):
        """
        Predict the output for the X input
        
        Parameters:
        ------------
        X : array_like
            The data to be appended
        """
        mu0 = self.mu0
        mu1 = self.mu1
        sigma0 = self.sigma0
        sigma1 = self.sigma1
        
        ## check if the model is not learned and the parameters is updated
        ## checking one parameter is enough 
        ## because we are assigning a value to all in learning phase 
        if len(sigma0) == 0:
            raise "Error! First fit the model on a dataset then try to predict the values!"
        


        ## Find the probabilities for class 0
        ## save them in an array for furthur comparisons
        prediction = []
        for i in range(len(X)):
            ## Find the predicted Class of each data in class 0
            P_Class0 = self.__probability_multivariate_normal_distribution(X.iloc[i]
                                                             ,mu0, sigma0)

            P_Class1 = self.__probability_multivariate_normal_distribution(X.iloc[i]
                                                             ,mu1, sigma1)
            ## Compare and set the class to highest probability
            P = P_Class1 >= P_Class0

            ## Append the number of Class
            prediction.append(int(P))
        
        return prediction
    def fit(self, X):
        """
        Learning the parameters of the model (Binary Classification model!)
        
        Parameters:
        -----------
        X : array_like (pandas dataframe is preferred)
            the input values to be learned, With outputs as label
        # Y : array_like
        #     the label for the data (The binary classification task is here)
        """
        ## we need to find the mean and covariance of each class

        features_arr = X.columns[:-1]

        ## divide into class 0 and class 1
        train_ds_C0 = X[X.label == 0]
        train_ds_C1 = X[X.label == 1]

        # train_ds_C0_X = train_ds_C0[['X', 'Y']]
        train_ds_C0_X = train_ds_C0[features_arr]

        # train_ds_C0_Y = train_ds_C0[['label']]

        train_ds_C1_X = train_ds_C1[features_arr]
        # train_ds_C1_Y = train_ds_C1[['label']]

        mu0, sigma0 = self.__find_MLE_Normal_distro(train_ds_C0_X.T)
        mu1, sigma1 = self.__find_MLE_Normal_distro(train_ds_C1_X.T)
        
        ## Save the parameters of the model
        self.mu0 = mu0
        self.mu1 = mu1
        self.sigma0 = sigma0
        self.sigma1 = sigma1
            
    def __probability_multivariate_normal_distribution(self, X, mu, sigma):
        """
        The probability value for multivariate normal distribution function

        Parameters:
        ------------
        x : array_like
            the input data
        mu : array_like
            the means vector
        sigma : matrix_like
            the matrix representing the covariance

        Returns:
        --------
        probability : float
            the probability value for the x input values 
        """
        dimension = len(mu)

        ## divide the formula into 2 parts
        p1 = 1 / np.sqrt(((2*np.pi)**dimension) * np.linalg.det(sigma))

        ## some changes was made to the equation
        ## because of dataset shape
        p2 = np.exp(-1/2 * (np.dot(X-mu, np.linalg.inv(sigma) @ (X-mu).T)))


        probability = p1 * p2

        return probability

    def __find_MLE_Normal_distro(self, X):
        """
        the maximum likelihood estimation for parameters of multivatiate normal distribution
        the parameters for normal distribution is covariance matrix and mean vector

        Parameters:
        ------------
        X : array_like
            the X input data vectors

        Returns:
        ---------
        mu : array_like
            the means vector
        covariance : matrix_like
            the matrix representing the covariance

        """

        mu = (1 / len(X.T)) * np.sum(X, axis=1)

        ## some changes was made to the ML estimation of covariance
        ## because of dataset shape
        covariance = (1 / len(X.T)) * ((X.T-mu).T @ (X.T-mu))    

        return mu, covariance
   
## convert probabilities to numpy array
model_QDA = QDA()

train_ds_X = train_ds[['X', 'Y']]
train_ds_Y = train_ds[['label']]

model_QDA.fit(train_ds)
test_Y_pred = model_QDA.predict(test_ds[['X','Y']])
train_Y_pred = model_QDA.predict(train_ds_X)
print(f'Mu0:\n{model_QDA.mu0}\n')
print(f'Mu1:\n{model_QDA.mu1}')
Mu0:
X   -0.976629
Y   -0.043558
dtype: float64

Mu1:
X    0.015640
Y    0.941562
dtype: float64
print(f'Covariance_0:\n {model_QDA.sigma0}\n')
print(f'Covariance_1:\n {model_QDA.sigma1}\n')
Covariance_0:
           X         Y
X  0.163509  0.040219
Y  0.040219  0.182233

Covariance_1:
           X         Y
X  0.329871  0.092079
Y  0.092079  0.283534
## test accuracy
test_Y = test_ds[['label']].values.flatten()

acc_test = (test_Y == test_Y_pred).sum() / len(test_Y_pred)
acc_test
0.9
def find_ROC(y_pred, y_true, thresholds = 101):
    """
    Find the confusion matrix for each threshold (ROC)
    
    Parameters:
    ------------
    y_true : array_like
        the actual output of inputs
    y_pred : array_like
        the predicted values of inputs
    thresholds : positive integer
        the count of thresholds to be evaluated between 0 and 1
        default is 101

    Returns:
    ---------
    df_scores : pandas dataframe
        dataframe contains the confusion matrix for different thresholds
    """

    assert thresholds >= 5, "Error: Thresholds must be more than 5!" 
    
    scores = []

    ## find the values for each threshold
    ## TP -> True Positive
    ## TN -> True Negative
    ## FP -> False Positive
    ## FN -> False Negative
    for threshold in np.linspace(0, 1, thresholds):
        TP = ((y_pred >= threshold) & (y_true == 1)).sum()
        TN = ((y_pred <= threshold) & (y_true == 0)).sum()
        FP = ((y_pred > threshold) & (y_true == 0)).sum()
        FN = ((y_pred < threshold) & (y_true == 1)).sum()
        

        scores.append([threshold, TP, TN, FP, FN])

    df_cols = ['threshold', 'TP', 'TN', 'FP', 'FN']
    df_scores = pd.DataFrame(scores, columns=df_cols)

    ## sensitivity and specificity
    ## True Positive rate = sensitivity
    df_scores['sens'] = df_scores.TP / (df_scores.TP + df_scores.FN)
    ## False Positive rate = 1 - specificity
    df_scores['1-spec'] = df_scores.FP / (df_scores.FP + df_scores.TN)

    return df_scores
def export_ROC_Curve(df_scores, description):
    """
    save the roc curve using the dataframe scores
    
    Parameters:
    ------------
    df_scores : array_like
        the results computed for different thresholds
    description : string
        the name of the saved file
    """

    plt.plot(df_scores['sens'], df_scores['1-spec'])
    plt.title(f'ROC Curve for {description}')
    plt.savefig(f'Q6_QDA_{description}.png')
    plt.close()
roc_scores = find_ROC(test_Y_pred, test_Y)
export_ROC_Curve(roc_scores, 'MLE')
roc_scores.iloc[np.linspace(0, 100, 10, dtype=int)]
threshold TP TN FP FN sens 1-spec
0 0.00 62 35 3 0 1.000000 0.078947
11 0.11 55 35 3 7 0.887097 0.078947
22 0.22 55 35 3 7 0.887097 0.078947
33 0.33 55 35 3 7 0.887097 0.078947
44 0.44 55 35 3 7 0.887097 0.078947
55 0.55 55 35 3 7 0.887097 0.078947
66 0.66 55 35 3 7 0.887097 0.078947
77 0.77 55 35 3 7 0.887097 0.078947
88 0.88 55 35 3 7 0.887097 0.078947
100 1.00 55 38 0 7 0.887097 0.000000

Q7#

## column names are used from ```pima-indians-diabetes.name``` file
cols = ['pregnancy_count', 'glucose_test', 'blood_pressure', 'triceps_thickness', '2h_insulin', 'mass', 'pedi', 'age', 'label']

df_pima = pd.read_csv('../HW1/hw1_data/pima/pima-indians-diabetes.data', index_col=False ,names=cols)
df_pima.head()
pregnancy_count glucose_test blood_pressure triceps_thickness 2h_insulin mass pedi age label
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
##################### From HW1 #####################
def divideset2(df, fraction = 0.66):
    """
    Divide the dataset into train and test with fixed size every run

    INPUTS:
    ---------
    df: pandas dataframe, the dataset that is going to be splitted
    fraction: the value to divide the dataset, default is 0.66

    OUTPUTS:
    ---------
    train: pandas dataframe, the portion of the dataset for train
    test: pandas dataframe, the portion of the dataset for test
    """

    train = df.sample(frac=0.66).copy()
    test = df.drop(train.index)

    return train, test
df_pima_trn, df_pima_tst = divideset2(df_pima)
df_pima_trn.shape
(507, 9)
df_pima_tst.shape
(261, 9)

We have modified the script for question 4 and run it here.

!python3 scripts/main_2_7.py
Logistic Regression on housing dataset!
----------------------------------------------------------------------
----------------------------------------------------------------------
Loading Training and Test sets
datasets loaded!
Train set head:
   pregnancy_count  glucose_test  blood_pressure  ...  mass   pedi  age
0                6           148              72  ...  33.6  0.627   50
1                1            85              66  ...  26.6  0.351   31
2                8           183              64  ...  23.3  0.672   32
3                1            89              66  ...  28.1  0.167   21
4                0           137              40  ...  43.1  2.288   33

[5 rows x 8 columns]
----------------------------------------------------------------------
----------------------------------------------------------------------
Closed Form Training: 
 Train Error: 
  Mean Squared Error: 0.1791001059784589
 Test Error: 
  0.6471354166666666
 Final Weights of the closed form training
[ 2.46554784e-02  3.86639820e-03 -4.91162585e-03  5.75161548e-05
  4.78331990e-05  4.18631699e-03  9.80129310e-02 -1.04267995e-03]
----------------------------------------------------------------------
----------------------------------------------------------------------
Gradient descent For Logistic Regression with T=50 iterations
 Train Errors: 
scripts/main_2_7.py:148: RuntimeWarning: overflow encountered in exp
  y = 1 / (1 + np.exp(-w.T @ x))
  MSE: [395.00014894191975]
 Test Error: 
  MSE: 395.0

 Final wights of Offline Gradient Descent Learning
[[ -12.22417034]
 [  50.06305025]
 [ -46.92348329]
 [  77.92385392]
 [  17.51867599]
 [-122.31863815]
 [   7.4775007 ]
 [  -0.72949004]]
----------------------------------------------------------------------
----------------------------------------------------------------------
Incremental Learning (Online GD) 
  Training Error:
   MSE:[[1.0178211994736963, 0.6510416666666666]]
  Test Error:
   MSE:0.6510416666666666

 Final wights of Incremental Learning
[0.80728208 3.19264994 0.06047742 2.39362356 0.54607482 0.85250314
 1.88412344 2.44401855]
----------------------------------------------------------------------

Q8#

Use Naive bayes implemented and used by question 5 for pima dataset.

pima_cols_X = df_pima_trn.columns[:-1]
MLE_estimates_Q8 = estimate_MLE_NB(df_pima_trn[pima_cols_X], df_pima_trn[['label']].values, pima_cols_X )
MLE_estimates_Q8
{'pregnancy_count,0': [3.3827893175074184, 3.075518425354172],
 'pregnancy_count,1': [4.947058823529412, 3.5616614839076943],
 'glucose_test,0': [109.9406528189911, 26.209089248154815],
 'glucose_test,1': [140.91764705882352, 33.11715807809526],
 'blood_pressure,0': [68.31750741839762, 18.76743713077681],
 'blood_pressure,1': [69.69411764705882, 21.696312253420487],
 'triceps_thickness,0': [19.762611275964392, 15.091512038708125],
 'triceps_thickness,1': [22.605882352941176, 17.416455051886835],
 '2h_insulin,0': [65.35311572700297, 91.42289286036038],
 '2h_insulin,1': [103.74705882352941, 139.6904166052542],
 'mass,0': [30.332640949554893, 7.0884075860170555],
 'mass,1': [34.94529411764706, 6.658175529684017],
 'pedi,0': [0.4379050445103858, 0.30617648254103347],
 'pedi,1': [0.5496176470588235, 0.3501867917274237],
 'age,0': [31.42433234421365, 11.96313829923815],
 'age,1': [36.67058823529412, 10.740126229178365]}
results_Q8 = predict_NB(df_pima_trn[pima_cols_X], MLE_estimates_Q8, pima_cols_X)
results_Q8.shape
(507, 2)

training set accuracy

y_pred = (results_Q8[:, 0] < results_Q8[:, 1]).astype(int)
## Accuracy
(y_pred == df_pima_trn[['label']].T.values).sum() / len(y_pred)
0.757396449704142

Training ROC

roc_curve(df_pima_trn[['label']].values, y_pred)
../_images/main_110_0.png

Test accuracy and ROC curve

results_Q8_tst = predict_NB(df_pima_tst[pima_cols_X], MLE_estimates_Q8, pima_cols_X)
y_pred = (results_Q8_tst[:, 0] < results_Q8_tst[:, 1]).astype(int)
## Accuracy
(y_pred == df_pima_tst[['label']].T.values).sum() / len(y_pred)
0.7624521072796935
roc_curve(df_pima_tst[['label']].values, y_pred)
../_images/main_114_0.png

Q9#

Use QDA implemented in question 6 for pima dataset.

mode_QDA_pima = QDA()
## the dataframe with the output value will be sent
## the output column is named 'label'
mode_QDA_pima.fit(df_pima_trn)
results_Q9 = mode_QDA_pima.predict(df_pima_trn[pima_cols_X])
## accuracy
(results_Q9 == df_pima_trn[['label']].T.values).sum() / len(results_Q9)
0.7554240631163708
results_Q9 = np.array(results_Q9)

roc_curve(df_pima_trn[['label']].values.flatten(), results_Q9 )
../_images/main_119_0.png

Let’s take a look at the test set.

results_Q9_tst = mode_QDA_pima.predict(df_pima_tst[pima_cols_X])
## accuracy
(results_Q9_tst == df_pima_tst[['label']].T.values).sum() / len(results_Q9_tst)
0.7586206896551724
## ROC curve
results_Q9 = np.array(results_Q9)

roc_curve(df_pima_tst[['label']].values.flatten(), results_Q9_tst )
../_images/main_122_0.png

Q10#

Use SVM method to predict the outputs of pima dataset.

from sklearn.svm import SVC
model_svm = SVC()

model_svm.fit(df_pima_trn[pima_cols_X], df_pima_trn[['label']].values.flatten())
SVC()
results_Q10 = model_svm.predict(df_pima_trn[pima_cols_X])
## Training Accuracy
(results_Q10 == df_pima_trn[['label']].values.flatten()).sum() / len(results_Q10)
0.7633136094674556
results_Q10_tst = model_svm.predict(df_pima_tst[pima_cols_X])
## Test accuracy
(results_Q10_tst == df_pima_tst[['label']].values.flatten()).sum() / len(results_Q10_tst)
0.7509578544061303
## Train ROC curve
roc_curve(df_pima_trn[['label']].values.flatten(), results_Q10)
../_images/main_128_0.png
## Test ROC curve
roc_curve(df_pima_tst[['label']].values.flatten(), results_Q10_tst)
../_images/main_129_0.png