ML
Advanced ML Journal Index
Practical No.01
Aim:- Linear Regression using Boston and ggplot2 , medv and lstat for house prediction
Code:
# Load libraries
library(MASS)
library(ggplot2)
# Load the Boston dataset
data("Boston")
# Simple Linear Regression: medv predicted by lstat
model <- lm(medv ~ lstat, data = Boston)
# Show model summary
summary(model)
# Plot data points and regression line
ggplot(Boston, aes(lstat, medv)) +
geom_point() +
geom_smooth(method = "lm") +
labs(
title = "Linear Regression: medv vs lstat",
x = "LSTAT (%)",
y = "MEDV (House Price)"
)
Output:
Graph:
Practical No.02
Aim:
To implement a multiple linear regression model on a standard dataset and plot the least squares regression fit using R
Code:
library(MASS)
data(Boston)
str(Boston)
model <- lm(medv ~ lstat + rm, data = Boston)
summary(model)
predicted_medv <- predict(model, Boston)
plot(Boston$medv, predicted_medv,
xlab = "Actual Median House Value (MEDV)",
ylab = "Predicted Median House Value",
main = "Least Squares Regression Fit",
pch = 19,
col = "blue")
abline(0, 1, col = "red", lwd = 2)
par(mfrow = c(2, 2))
plot(model)
Output:
Graph:
Conclusion: Hence, we have successfully implemented the multiple linear regression using the model and with the MASS and ISLR dataset and find the least square regression fit.
Practical NO. 3
Aim: To fit a classification model using Quadratic Discriminant Analysis (QDA) on a standard dataset (Weekly dataset) and compare its classification performance with Linear Discriminant Analysis (LDA) using a confusion matrix and accuracy measure.
Code:
library(ISLR)
library(MASS)
data(Weekly)
str(Weekly)
summary(Weekly)
set.seed(1)
train <- Weekly$Year < 2009
test <- !train
qda.model <- qda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5,
data = Weekly,
subset = train)
qda.pred <- predict(qda.model, Weekly[test, ])
confusion.matrix <- table(Predicted = qda.pred$class,
Actual = Weekly$Direction[test])
print(confusion.matrix)
accuracy <- mean(qda.pred$class == Weekly$Direction[test])
print(paste("QDA Classification Accuracy:", round(accuracy, 4)))
lda.model <- lda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5,
data = Weekly,
subset = train)
lda.pred <- predict(lda.model, Weekly[test, ])
lda.confusion <- table(Predicted = lda.pred$class,
Actual = Weekly$Direction[test])
print(lda.confusion)
lda.accuracy <- mean(lda.pred$class == Weekly$Direction[test])
print(paste("LDA Classification Accuracy:", round(lda.accuracy, 4)))
Data + values :
Output:
Conclusion: Hence , We have successfully completed the QDA and LDA to measure a performance using confusion matrix and accuracy measure.which was QDA : 0.4615 and LDA : 0.5481
Practical no: 04
Aim: Fit a classification model using K Nearest Neighbour (KNN) Algorithm on a given data set. [One may use data sets like Caravan, Smarket, Weekly, Auto and Boston]
CODE:
# Install packages (run once if not installed)
install.packages("ISLR")
install.packages("class")
install.packages("caret")
# Load libraries
library(ISLR)
library(class)
library(caret)
# Load dataset
data(Weekly)
# Convert target variable to numeric (Up = 1, Down = 0)
Weekly$Direction <- ifelse(Weekly$Direction == "Up", 1, 0)
# Select predictors and target
X <- Weekly[, c("Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume")]
Y <- Weekly$Direction
# Split data into training and testing
set.seed(123)
trainIndex <- createDataPartition(Y, p = 0.7, list = FALSE)
X_train <- X[trainIndex, ]
X_test <- X[-trainIndex, ]
Y_train <- Y[trainIndex]
Y_test <- Y[-trainIndex]
# Standardize features
X_train <- scale(X_train)
X_test <- scale(X_test)
# Apply KNN model
k <- 5
knn_pred <- knn(
train = X_train,
test = X_test,
cl = Y_train,
k = k
)
# Evaluate model
conf_mat <- confusionMatrix(as.factor(knn_pred), as.factor(Y_test))
accuracy <- mean(knn_pred == Y_test)
# Print results
print(conf_mat)
print(paste("Accuracy:", round(accuracy * 100, 2), "%"))
OUTPUT:
Practical no: 05
Aim: Use bootstrap to give an estimate of a given statistic. [Datasets like Auto.
# Load necessary libraries
library(MASS) # For Boston dataset
library(boot) # For bootstrap
# Load dataset
data("Boston")
# View first few rows
head(Boston)
# Define the statistic function
mean_medv <- function(data, indices) {
sample_data <- data[indices] # bootstrap sample
return(mean(sample_data)) # statistic
}
# Perform bootstrap
set.seed(123) # reproducibility
bootstrap_results <- boot(
data = Boston$medv,
statistic = mean_medv,
R = 1000
)
# View results
print(bootstrap_results)
# 95% Confidence Interval (Percentile method)
boot.ci(bootstrap_results, type = "perc")
Output:
> boot.ci(bootstrap_results, type = "perc")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
Intervals :
Level Percentile
95% (21.74 , 23.34)
Practical no: 06
Aim: For a given data set, split the data into two training and testing and fit the
following on the training set:
(i) PCR model
(ii) PLS model
Report test errors obtained in each case and compare the results. [Data sets
like College, Boston etc may be used for the purpose].
CODE:
# ----------------------------------------
# Practical: PCR vs PLS on Boston Dataset
# ----------------------------------------
# Load required libraries
library(MASS)
library(pls)
# Set seed for reproducibility
set.seed(1)
# Load Boston dataset
data(Boston)
# -----------------------------
# Train-Test Split (50-50)
# -----------------------------
n <- nrow(Boston)
train_index <- sample(1:n, n/2)
train_data <- Boston[train_index, ]
test_data <- Boston[-train_index, ]
# =====================================================
# PCR MODEL
# =====================================================
# Fit PCR model with Cross-Validation
pcr_fit <- pcr(medv ~ .,
data = train_data,
scale = TRUE,
validation = "CV")
# Find optimal number of components
pcr_rmse <- RMSEP(pcr_fit)
opt_pcr_comp <- which.min(pcr_rmse$val[1, , ])
# Predict on test data
pcr_pred <- predict(pcr_fit,
newdata = test_data,
ncomp = opt_pcr_comp)
# Calculate Test MSE
pcr_test_mse <- mean((pcr_pred - test_data$medv)^2)
# =====================================================
# PLS MODEL
# =====================================================
# Fit PLS model with Cross-Validation
pls_fit <- plsr(medv ~ .,
data = train_data,
scale = TRUE,
validation = "CV")
# Find optimal number of components
pls_rmse <- RMSEP(pls_fit)
opt_pls_comp <- which.min(pls_rmse$val[1, , ])
# Predict on test data
pls_pred <- predict(pls_fit,
newdata = test_data,
ncomp = opt_pls_comp)
# Calculate Test MSE
pls_test_mse <- mean((pls_pred - test_data$medv)^2)
# =====================================================
# RESULTS
# =====================================================
cat("Optimal PCR Components :", opt_pcr_comp, "\n")
cat("PCR Test MSE :", pcr_test_mse, "\n\n")
cat("Optimal PLS Components :", opt_pls_comp, "\n")
cat("PLS Test MSE :", pls_test_mse, "\n")
OUTPUT:
CONCLUSION: Hence , We have successfully Completed with PCR and PLS and we train training and Testing data and we got the PCR Test MSE : 26.86123 as after testing .
Practical 07 :
Aim: For a given data set, do the following:(i) Fit a regression
tree
[One
may choose data sets like Carseats, Boston etc for the purpose]
Code:
#
Install packages if needed
#
install.packages("MASS")
#
install.packages("tree")
library(MASS)
library(tree)
#
Load data
data(Boston)
#
Fit regression tree (predict median house value 'medv')
set.seed(123)
tree_model
<- tree(medv ~ ., data = Boston)
#
View summary
summary(tree_model)
#
Plot tree
plot(tree_model)
text(tree_model,
pretty = 0)
# Optional: prune tree using cross-validation
cv_model
<- cv.tree(tree_model)
best_size
<- cv_model$size[which.min(cv_model$dev)]
pruned_tree
<- prune.tree(tree_model, best = best_size)
#
Plot pruned tree
plot(pruned_tree)
text(pruned_tree,
pretty = 0)
Output:
Conclusion: Hence , we have successfully performed
a regression tree for a given data set .
Comments
Post a Comment