Probability and Statistics Lab 7
Prerequisites
Explanatory code
# Load necessary libraries
library(ggplot2)
library(GGally)
library(corrplot)
# Load the built-in 'mtcars' dataset
data(mtcars)
# Compute Pearson correlation between mpg and hp
correlation <- cor(mtcars$mpg, mtcars$hp, method = "pearson")
cat("Correlation between mpg and hp:", correlation, "\n")
# Scatter plot of hp vs mpg using ggplot2
ggplot(mtcars) +
aes(x = hp, y = mpg) +
geom_point(color = "red") +
theme_minimal() +
ggtitle("Scatter Plot of HP vs MPG")
# Pairwise scatter plots for all variables
ggpairs(mtcars)
# Select specific columns (1, 4, 6, 7) for scatterplot matrix
pairs(mtcars[, c(1, 4, 6, 7)])
# GGPairs for selected columns
ggpairs(mtcars[, c(1, 4, 6, 7)])
# Compute correlation matrix for all variables
mat <- cor(mtcars)
print(mat)
# Plot the correlation matrix
corrplot(mat, method = "circle")
Question 2
# Q1
# Define rankings given by two judges
judgeA = c(8,7,6,3,2,1,5,4)
judgeB = c(7,5,4,1,3,2,6,8)
# Compute Pearson correlation coefficient between Judge A and Judge B
rescorr <- cor(judgeA, judgeB, method = "pearson")
rescorr # Print the correlation value
# Create a dataframe with the rankings
judge_data = data.frame(Play = 1:8, judgeA, judgeB)
# Scatter plot using ggplot2
ggplot(judge_data, aes(x = judgeA, y = judgeB)) +
geom_point(color = "blue", size = 3) + # Blue scatter points
labs(title = "Scatter plot of Judge A vs Judge B Rankings",
x = "Judge A Rankings",
y = "Judge B Rankings") +
theme_minimal() # Minimalistic theme for better appearance
# Q1
> > # Define rankings given by two judges
> judgeA = c(8,7,6,3,2,1,5,4)
> judgeB = c(7,5,4,1,3,2,6,8)
> > # Compute Pearson correlation coefficient between Judge A and Judge B
> rescorr <- cor(judgeA, judgeB, method = "pearson")
> rescorr # Print the correlation value
[1] 0.6190476
Question 3
x <- c(62, 64, 65, 69, 70, 71, 72, 74)
y <- c(126, 125, 139, 145, 165, 152, 180, 208)
corr <- cor(x, y, method = "pearson")
corr
ggplot(data.frame(x,y), aes(x,y))+
geom_point(color = "green", size = 3) +
labs(title = "Scatter plot of X vs Y",
x = "X values",
y = "Y values") +
theme_minimal()
> corr <- cor(x, y, method = "pearson")
> corr
[1] 0.9031822
> ggplot(data.frame(x,y), aes(x,y))+
+ geom_point(color = "green", size = 3) +
+ labs(title = "Scatter plot of X vs Y",
+ x = "X values",
+ y = "Y values") +
+ theme_minimal()
Question 4
x <- c(62, 64, 65, 69, 70, 71, 72, 74)
y <- c(126, 125, 139, 145, 165, 152, 180, 208)
corr <- cor(x, y, method = "pearson")
corr
ggplot(data.frame(x,y), aes(x,y))+
geom_point(color = "green", size = 3) +
labs(title = "K071 Scatter plot of X vs Y",
x = "X values",
y = "Y values") +
theme_minimal()
data("airquality")
data("faithful")
data("trees")
data("longley")
airquality_corr <- cor(airquality)
airquality_corr
ggpairs(airquality)
faithful_corr <- cor(faithful)
faithful_corr
ggpairs(faithful)
trees_corr <- cor(trees)
trees_corr
ggpairs(trees)
longley_corr <- cor(longley)
longley_corr
ggpairs(longley)
> #Q3
> data("airquality")
> data("faithful")
> data("trees")
> data("longley")
> airquality_corr <- cor(airquality)
> airquality_corr
Ozone Solar.R Wind Temp Month Day
Ozone 1 NA NA NA NA NA
Solar.R NA 1 NA NA NA NA
Wind NA NA 1.0000000 -0.4579879 -0.178292579 0.027180903
Temp NA NA -0.4579879 1.0000000 0.420947252 -0.130593175
Month NA NA -0.1782926 0.4209473 1.000000000 -0.007961763
Day NA NA 0.0271809 -0.1305932 -0.007961763 1.000000000
Question 4
# Q2
x <- c(62, 64, 65, 69, 70, 71, 72, 74)
y <- c(126, 125, 139, 145, 165, 152, 180, 208)
corr <- cor(x, y, method = "pearson")
corr
ggplot(data.frame(x,y), aes(x,y))+
geom_point(color = "green", size = 3) +
labs(title = "Scatter plot of X vs Y",
x = "X values",
y = "Y values") +
theme_minimal()
# Q3
data("airquality")
data("faithful")
data("trees")
data("longley")
airquality_corr <- cor(airquality)
airquality_corr
ggpairs(airquality)
faithful_corr <- cor(faithful)
faithful_corr
ggpairs(faithful)
trees_corr <- cor(trees)
trees_corr
ggpairs(trees)
longley_corr <- cor(longley)
longley_corr
ggpairs(longley)
# Q4
getSymbols("TSLA", from = "2023-01-01", to = "2023-12-31")
getSymbols("F", from = "2023-01-01", to = "2023-12-31")
TSLA_returns <- dailyReturn(Cl(TSLA))
F_returns <- dailyReturn(Cl(F))
#Calculate the correlation between Tesla and Ford daily returns
correlation <- cor(TSLA_returns, F_returns)
cat("Correlation between TSLA and F daily returns:", correlation, "\n")
par(mfrow = c(2, 1)) # Two plots in one row
plot(TSLA_returns, main = "Tesla Daily Returns", col = "blue", type = "l")
plot(F_returns, main = "Ford Daily Returns", col = "red", type = "l")
plot(TSLA_returns, F_returns, main = "Correlation between TSLA and F Returns",
xlab = "TSLA Returns", ylab = "Ford Returns", pch = 19, col = "purple")
abline(lm(F_returns ~ TSLA_returns), col = "red")
Output
- date: 2025.03.04
- time: 13:33