library(NetDA)
We demonstrate standard analysis of classification and prediction based on two functions in the NetDA package.
In this study, we take the wine dataset as an example, which is available in https://archive.ics.uci.edu/ml/datasets/wine. These data were collected based on a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. In this dataset, there are three types of wines and 13 constituents, including Alcohol (Alcohol), Malic acid (Malic acid), Ash (Ash), Alcalinity of ash (Alcalinity), Magnesium (Magnesium), Total phenols (phenols), Flavanoids (Flavanoids), Nonflavanoid phenols (Nonflavanoid), Proanthocyanins (Proanthocyanins), Color intensity (Color), Hue (Hue), OD280/OD315 of diluted wines (OD280), and Proline (Proline).
data(WineData)
= WineData[,1] # the response
Y = WineData[,2:14] # the predictors X
In the following analysis, the response is types of wines that are labeled as 1, 2, and 3; constituents are treated as predictors that are continuous. The goal is to adopt the information of constituents to construct predictive models, and then use them to classify type of wines for a given subject.
= WineData[which(Y==1),]
D1 = WineData[which(Y==2),]
D2 = WineData[which(Y==3),] D3
To demonstrate the functions and perform classification and prediction, we first split the full data into the training data and the validation data. In our example, we take the first 45 samples in each class to obtain the training data, and use the remaining samples in each class to form the validation data.
= rbind(D1[1:45,], D2[1:45,],D3[1:45,]) # user-specific training data
Train = rbind(D1[45:dim(D1)[1],],D2[45:dim(D2)[1],],D3[45:dim(D3)[1],]) # user-specific testing data Test
The response (Y) and predictors (X) in the training data:
= Train[,2:14]
X = Train[,1] Y
The response (Y_test) and predictors (X_test) in the validation data:
= Test[,2:14]
X_test = Test[,1] Y_test
When the training data and the validation data are determined, we employ the function NetDA to perform classification. We insert X, Y, and X_test to the function NetDA, and we denote “NetLDA” and “NetQDA” as the argument method=1 and method=2, respectively. The resulting vectors of predicted classes and estimated precision matrices are given by “$yhat” and “$Network”, respectively.
NetDA(X,Y,method=1,X_test) -> NetLDA
= NetLDA$yhat
yhat_lda = NetLDA$Network
Net_lda
NetDA(X,Y,method=2,X_test) -> NetQDA
= NetQDA$yhat
yhat_qda = NetQDA$Network
Net_qda
yhat_lda#> [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2
#> [39] 2 2 2 2 3 3 3 3
round(Net_lda,3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 3.739 -0.415 -0.665 0.107 0.004 0.000 -0.444 0.000 0.000 -0.508
#> [2,] -0.415 1.247 -0.511 -0.035 0.000 0.109 0.324 -0.225 0.065 0.000
#> [3,] -0.665 -0.511 19.463 -0.936 -0.059 -0.055 -0.619 0.000 0.000 -0.087
#> [4,] 0.107 -0.035 -0.936 0.167 0.001 -0.048 0.152 -0.160 -0.038 -0.023
#> [5,] 0.004 0.000 -0.059 0.001 0.006 0.000 0.007 0.087 -0.036 0.003
#> [6,] 0.000 0.109 -0.055 -0.048 0.000 8.774 -3.847 0.000 -0.364 -0.249
#> [7,] -0.444 0.324 -0.619 0.152 0.007 -3.847 6.127 2.104 -1.800 0.016
#> [8,] 0.000 -0.225 0.000 -0.160 0.087 0.000 2.104 46.879 0.000 -0.084
#> [9,] 0.000 0.065 0.000 -0.038 -0.036 -0.365 -1.800 0.000 5.582 -0.216
#> [10,] -0.508 0.000 -0.087 -0.023 0.003 -0.249 0.016 -0.084 -0.216 0.467
#> [11,] 0.000 1.413 0.000 -0.035 0.007 0.000 -2.845 0.000 0.000 1.662
#> [12,] -0.291 0.017 0.000 -0.013 0.013 -1.274 -1.877 0.762 -0.391 0.606
#> [13,] -0.004 0.000 -0.005 0.000 0.000 -0.001 -0.002 -0.002 0.001 0.000
#> [,11] [,12] [,13]
#> [1,] 0.000 -0.291 -0.004
#> [2,] 1.413 0.017 0.000
#> [3,] 0.000 0.000 -0.005
#> [4,] -0.035 -0.013 0.000
#> [5,] 0.007 0.013 0.000
#> [6,] 0.000 -1.274 -0.001
#> [7,] -2.845 -1.877 -0.002
#> [8,] 0.000 0.762 -0.002
#> [9,] 0.000 -0.391 0.001
#> [10,] 1.663 0.606 0.000
#> [11,] 32.850 0.000 -0.002
#> [12,] 0.000 5.667 -0.001
#> [13,] -0.002 -0.001 0.000
yhat_qda#> [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [39] 2 2 2 2 3 3 3 3
round(Net_qda[[1]],3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 7.176 0.011 0.000 0.470 -0.073 0.000 -0.404 0.000 -1.754 -0.175
#> [2,] 0.012 2.721 0.000 0.005 -0.007 0.000 0.000 0.051 -0.171 0.000
#> [3,] 0.000 0.000 23.194 -0.959 -0.161 0.000 0.000 0.000 0.000 0.000
#> [4,] 0.470 0.005 -0.959 0.236 -0.011 0.138 0.076 -0.397 -0.101 0.046
#> [5,] -0.073 -0.007 -0.161 -0.011 0.013 -0.097 0.008 -0.085 0.049 -0.042
#> [6,] 0.000 0.000 0.000 0.138 -0.097 22.439 -7.472 0.000 -0.680 -2.326
#> [7,] -0.405 0.000 0.000 0.076 0.008 -7.471 16.520 0.000 -3.594 -2.131
#> [8,] 0.000 0.051 0.000 -0.397 -0.085 0.000 0.000 69.356 0.000 0.056
#> [9,] -1.754 -0.171 0.000 -0.101 0.049 -0.680 -3.594 0.000 8.449 -0.246
#> [10,] -0.175 0.000 0.000 0.046 -0.042 -2.326 -2.131 0.056 -0.246 2.509
#> [11,] 0.000 2.039 0.000 -0.244 -0.029 0.000 0.000 0.000 0.000 0.563
#> [12,] -0.158 0.000 0.000 0.170 -0.023 -0.328 0.000 0.159 0.000 0.000
#> [13,] -0.005 0.003 0.000 0.000 0.000 0.002 0.001 0.000 0.003 -0.006
#> [,11] [,12] [,13]
#> [1,] 0.000 -0.159 -0.005
#> [2,] 2.039 0.000 0.003
#> [3,] 0.000 0.000 0.000
#> [4,] -0.244 0.170 0.000
#> [5,] -0.029 -0.023 0.000
#> [6,] 0.000 -0.330 0.002
#> [7,] 0.000 0.000 0.001
#> [8,] 0.000 0.159 0.000
#> [9,] 0.000 0.000 0.003
#> [10,] 0.566 0.000 -0.006
#> [11,] 48.139 0.000 -0.008
#> [12,] 0.000 7.213 0.004
#> [13,] -0.008 0.004 0.000
round(Net_qda[[2]],3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 4.060 0.019 0.710 -0.128 -0.005 0.000 -0.527 0.000 0.432 -0.783
#> [2,] 0.021 2.357 0.000 -0.173 -0.017 0.018 -0.015 -1.439 -0.359 0.138
#> [3,] 0.708 0.000 18.423 -1.213 -0.044 0.000 -0.006 0.000 0.000 0.000
#> [4,] -0.128 -0.173 -1.213 0.208 0.007 0.082 -0.043 -0.214 0.059 0.056
#> [5,] -0.005 -0.017 -0.044 0.007 0.005 -0.033 0.063 0.090 -0.066 0.000
#> [6,] 0.000 0.017 0.000 0.082 -0.033 7.713 -5.999 0.866 1.732 0.443
#> [7,] -0.527 -0.015 -0.006 -0.043 0.063 -5.999 10.017 0.181 -3.932 -1.565
#> [8,] 0.000 -1.440 0.000 -0.214 0.090 0.869 0.173 46.367 0.000 0.000
#> [9,] 0.432 -0.359 0.000 0.059 -0.066 1.733 -3.932 0.000 5.282 0.506
#> [10,] -0.783 0.137 0.000 0.056 0.000 0.443 -1.565 0.000 0.506 1.733
#> [11,] 0.000 0.912 0.000 -0.160 -0.049 0.000 -1.634 0.000 0.000 0.542
#> [12,] 0.170 0.121 0.826 -0.393 0.014 -0.822 -1.902 5.077 -0.648 0.763
#> [13,] -0.001 0.004 0.004 -0.001 0.000 -0.002 0.002 -0.003 -0.001 -0.001
#> [,11] [,12] [,13]
#> [1,] 0.000 0.169 -0.001
#> [2,] 0.912 0.122 0.004
#> [3,] 0.000 0.826 0.004
#> [4,] -0.160 -0.393 -0.001
#> [5,] -0.049 0.014 0.000
#> [6,] 0.000 -0.822 -0.002
#> [7,] -1.637 -1.902 0.002
#> [8,] 0.000 5.077 -0.003
#> [9,] 0.000 -0.648 -0.001
#> [10,] 0.542 0.763 -0.001
#> [11,] 29.109 0.000 0.008
#> [12,] 0.000 7.293 0.002
#> [13,] 0.008 0.002 0.000
round(Net_qda[[3]],3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 4.261 -0.254 0.000 -0.072 0.020 0.000 0.000 0.000 -1.200 -0.202
#> [2,] -0.254 0.951 0.000 -0.096 0.017 0.350 0.336 0.000 0.302 0.050
#> [3,] 0.000 0.000 39.923 -2.076 -0.119 -1.713 0.000 0.000 0.000 0.000
#> [4,] -0.072 -0.096 -2.076 0.359 -0.010 -0.326 -0.073 -0.193 -0.115 -0.010
#> [5,] 0.020 0.017 -0.119 -0.010 0.022 0.060 -0.431 0.371 -0.011 0.021
#> [6,] 0.000 0.350 -1.713 -0.326 0.060 10.790 -0.498 -0.740 -4.461 0.055
#> [7,] 0.000 0.336 0.000 -0.073 -0.431 -0.498 24.098 0.000 -1.503 -0.930
#> [8,] 0.000 0.000 0.000 -0.193 0.371 -0.740 0.000 50.622 0.000 0.000
#> [9,] -1.200 0.302 0.000 -0.115 -0.011 -4.461 -1.503 0.000 12.612 -1.032
#> [10,] -0.202 0.050 0.000 -0.010 0.021 0.055 -0.930 0.000 -1.032 0.410
#> [11,] -0.098 0.000 0.000 -0.178 -0.034 0.000 0.000 0.000 0.000 1.353
#> [12,] -0.832 0.146 -1.640 -0.055 0.040 -1.173 2.400 0.000 0.846 0.070
#> [13,] 0.003 0.000 0.006 0.000 -0.001 0.000 0.018 -0.014 -0.008 -0.001
#> [,11] [,12] [,13]
#> [1,] -0.098 -0.832 0.003
#> [2,] 0.000 0.146 0.000
#> [3,] 0.000 -1.640 0.006
#> [4,] -0.178 -0.055 0.000
#> [5,] -0.034 0.040 -0.001
#> [6,] 0.000 -1.173 0.000
#> [7,] 0.000 2.400 0.018
#> [8,] 0.000 0.000 -0.014
#> [9,] 0.000 0.846 -0.008
#> [10,] 1.353 0.070 -0.001
#> [11,] 50.699 0.000 -0.005
#> [12,] 0.000 13.312 -0.008
#> [13,] -0.005 -0.008 0.000
Finally, to assess the performance of prediction, we input predicted values (yhat_lda or yhat_qda) and responses in the validation data (Y_test) to the function Metrics, and the resulting values are displayed below.
Metrics(yhat_lda,Y_test)
#> $`Confusion matrix`
#> [,1] [,2] [,3]
#> [1,] 15 0 0
#> [2,] 0 26 0
#> [3,] 0 1 4
#>
#> $`(PRE,REC,F-score)`
#> [1] 0.9782609 0.9782609 0.9782609
#>
#> $ARI
#> [1] 0.9410827
Metrics(yhat_qda,Y_test)
#> $`Confusion matrix`
#> [,1] [,2] [,3]
#> [1,] 15 0 0
#> [2,] 0 27 0
#> [3,] 0 0 4
#>
#> $`(PRE,REC,F-score)`
#> [1] 1 1 1
#>
#> $ARI
#> [1] 1