Train Analysis

Learning Goal: I’m working on a data analytics exercise and need the explanation and answer to help me learn.

Please answer all questions in the word file and provide all related R code.

HELP: Reading the data

# Bringing the data
train.x <- read.table(file = “train.txt”, header = FALSE)
train.y <- read.table(file = “train_id.txt”, header = FALSE)
test.x <- read.table(file = “test.txt”, header = FALSE)
test.y <- read.table(file = “test_id.txt”, header = FALSE)

# Combining the data
train.dt <- cbind(train.y, train.x)
test.dt <- cbind(test.y, test.x)

# Assign names for the columns
colnames(train.dt) <- c(“Y”, paste(“X”, 1:4, sep = “”))
colnames(test.dt) <- c(“Y”, paste(“X”, 1:4, sep = “”))

# Converting target Y to (0, 1) standard look (this is optional)
train.dt$Y <- ifelse(train.dt$Y == 1, 0, 1)
test.dt$Y <- ifelse(test.dt$Y == 1, 0, 1)

Sample Code:

Illustration with Stock Market data

Attached Files:

Illustration with Stock Market data.pdf (820.749 KB)

### Discriminant Alanalysis
### Comparing Log Regression, LDA and QDA
### Package ISLR
install.packages(“ISLR”)
library(“ISLR”)

head(Smarket)
class(Smarket)

# verify the format of data is data.frame
# automatic variable assignment
attach(Smarket)

# checking dependence of observations
plot(Lag1[1:1249] ~ Lag1[2:1250])

# spliting the data
train <- subset(Smarket, Year < 2005)
test <- subset(Smarket, Year == 2005)

### Logistic Regression (full model)
log.reg <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = train, family = “binomial”)
summary(log.reg)

# predictions
pred.log.reg <- predict(log.reg, test, type=”response”)

# confusion matrix
# use the function ifelse(condition, if the condition is satisfied then “”, if the condition is not satisfied then “”)
table(test$Direction, ifelse(pred.log.reg > 0.5, “Up”, “Down”))

# accuracy rate

accu.log <- mean(ifelse(pred.log.reg > 0.5, “Up”, “Down”) == test$Direction)
accu.log

# misclassification rate
misc.log <- 1 – accu.log
misc.log

# another way to check the error rate (i.e. misc rate)
# mean(ifelse(pred.log.reg > 0.5, “Up”, “Down”) != test$Direction)

#### LDA model full
library(MASS)
lda.model <- lda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = train)
lda.model

# predictions
pred.lda <- predict(lda.model, test, type=”response”)

# confusion matrix
table(test$Direction, pred.lda$class)

# accuracy rate
accu.lda <- mean(pred.lda$class == test$Direction)
accu.lda

# misclassification rate
misc.lda <- 1 – accu.lda
misc.lda

#### QDA model full
qda.model <- qda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = train)
qda.model

# predictions
pred.qda <- predict(qda.model, test, type=”response”)

# confusion matrix
table(test$Direction, pred.qda$class)

# accuracy rate
accu.qda <- mean(pred.qda$class == test$Direction)
accu.qda

# misclassification rate
misc.qda <- 1 – accu.qda
misc.qda

###########################
###### Reduced Models #######
###########################

### Logistic Regression (reduced)
log.reg.reduced <- glm(Direction ~ Lag1 + Lag2, data = train, family = “binomial”)
summary(log.reg.reduced)

# predictions
pred.log.reg.reduced <- predict(log.reg.reduced, test, type=”response”)

# confusion matrix
table(test$Direction, ifelse(pred.log.reg.reduced > 0.5, “Up”, “Down”))

# accuracy rate
accu.log <- mean(ifelse(pred.log.reg.reduced > 0.5, “Up”, “Down”) == test$Direction)
accu.log

# misclassification rate
misc.log <- 1 – accu.log
misc.log

#### LDA model (reduced)
lda.reduced <- lda(Direction ~ Lag1 + Lag2, data = train)
lda.reduced

# predictions
pred.lda.reduced <- predict(lda.reduced, test, type=”response”)

# confusion matrix
table(test$Direction, pred.lda.reduced$class)

# accuracy rate
accu.lda <- mean(pred.lda.reduced$class == test$Direction)
accu.lda

# misclassification rate
misc.lda <- 1 – accu.lda
misc.lda

#### QDA model (reduced)
qda.reduced <- qda(Direction ~ Lag1 + Lag2, data = train)
qda.reduced

# predictions
pred.qda.reduced <- predict(qda.reduced, test, type=”response”)

# confusion matrix
table(test$Direction, pred.qda.reduced$class)

# accuracy rate
accu.qda <- mean(pred.qda.reduced$class == test$Direction)
accu.qda

# misclassification rate
misc.qda <- 1 – accu.qda
misc.qda

#### ROC plots
install.packages(“ROCR”)
library(“ROCR”)

pred_LM <- prediction(pred.log.reg.reduced, test$Direction)
LM <- performance(pred_LM, measure = “tpr”, x.measure = “fpr”)

pred_LDA <- prediction(pred.lda.reduced$posterior[,2], test$Direction)
LDA <- performance(pred_LDA, measure = “tpr”, x.measure = “fpr”)

pred_QDA <- prediction(pred.qda.reduced$posterior[,2], test$Direction)
QDA <- performance(pred_QDA, measure = “tpr”, x.measure = “fpr”)

plot(LM, col = “black”)
plot(LDA, add = TRUE, col = “orange”)
plot(QDA, add = TRUE, col = “blue”)

Train Analysis

Illustration with Stock Market data

Do you need help with this paper? 🏆 - Let us help you write it!

Why Choose Our Essay Writing Service?

🎓 Why wait? Let us help you succeed! Our Writers are waiting..

Get started

Recent Customer Feedback

See more customer feedback..

How our paper writing service works

Quick Links

Legal

Other