-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDiscriminant_Analysis.R
More file actions
58 lines (37 loc) · 1.88 KB
/
Discriminant_Analysis.R
File metadata and controls
58 lines (37 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#CLASSIFICATION ON LINEAR DISCRIMINAT ANALYSIS-A classifier suitable for small data sets
#having low dimenstions and less predictors in input space and also stable for multiclass
#classification K>2(class labels > 2)
#Uses Bayes theoram as a Base Model
require(MASS)
#using LDA on Smarket dataset
#Test Data Frame
Test.2005<-subset(Smarket,Year==2005 , select = c(Lag1,Lag2,Direction))
#Using Previous 2 Days Returns to predict the Direction of Market on The Particular day
#Model trained on Training Data = Inductive Learning
fit1<-lda(Direction ~ Lag1 + Lag2 , data = Smarket, subset = Year < 2005)
summary(fit1)
#Predictions on TEST DATA SET
fit1.pred<-predict(fit1,newdata = Test.2005)
#returns a list with Classified Label for that data point ,
# Probabilities of each class -Here 'Up' and 'Down' & Discriminant Score
#creating a Data frame of predictions
df<-data.frame(fit1.pred)
#If predictors are Quantitative variables then we classify test points to the class label
#having higher Densities(Prior Probability) | Higher Conditional Probability(Pr(Y|X=xi))
head(df)
#Confusion Matrix for Model's Perfomance
table(predicted=df$class,True=Test.2005$Direction)
#Accuracy rate of 56% , same as that of Logistic Regression Model3
#56% correct classifications , and 44% misclassifications
#MODEL2 - QUADRATIC DISCRIMINANT ANALYSIS(More Complex Due to different Covariance
#matrix for each class label K)
fit2<-qda(Direction ~ Lag1 + Lag2 , data = Smarket, subset = Year < 2005)
fit2
#Predictions on TEST DATA
fit2.pred<-predict(fit2,newdata = Test.2005)
df2<-data.frame(fit2.pred)
table(Predicted = df2$class,True=Test.2005$Direction)
mean(df2$class==Test.2005$Direction)
#Hence The overall accuracy on TEST Data has increased to 60%-better Than LDA Model(fit1)
#Hence Overall accuracy of QDA is higher than LDA
#As QDA is much more complex and complicated than LDA due to the quadratic terms