Â
library(MASS)
library(mvtnorm)
# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)
# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)
# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))
# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)
# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values
# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)
# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)
# Add a legend outside the plot
legend("topright", inset = c(0, 0), legend = c("Eigenvector 1", "Eigenvector 2"),
col = c("blue", "green"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)
Â
Regressing \(X_{2}\) onto \(X_1\), the line of best fit is given by:
\[ X_{2\cdot 1} = X_2 - \Sigma_{21}\Sigma_{11}^{-1}X_1. \]
Â
# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)
# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)
# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))
# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)
# Define the values of x for the conditional distributions
x_values <- c(-2, 0, 2)
# Conditional distribution parameters
sigma_xx <- sigma[1, 1]
sigma_xy <- sigma[1, 2]
sigma_yy <- sigma[2, 2]
mu_x <- mu[1]
mu_y <- mu[2]
# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values
# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)
# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)
# Compute regression line parameters
beta_1 <- sigma_xy / sigma_xx
beta_0 <- mu_y - beta_1 * mu_x
# Generate points for the regression line
x_reg <- seq(-3, 3, length.out = 100)
y_reg <- beta_0 + beta_1 * x_reg
# Overlay regression line
lines(x_reg, y_reg, col = "purple", lwd = 2, lty = 2)
# Add a legend outside the plot
legend("topright", inset = c(-0.05, 0), legend = c("Eigenvector 1", "Eigenvector 2", "Regression"),
col = c("blue", "green", "purple"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)
Â
The variance of the conditional distribution \(X_2\,|\,X_1\) does not depend on the value of \(X_1\):
\[ {\rm Var}(X_2\,|\,X_1) = \Sigma_{22} - \Sigma_{21}\Sigma_{11}^{-1}\Sigma_{12}. \]
Â
# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)
# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)
# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))
# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)
# Define the values of x for the conditional distributions
x_values <- c(-2, 0, 2)
# Conditional distribution parameters
sigma_xx <- sigma[1, 1]
sigma_xy <- sigma[1, 2]
sigma_yy <- sigma[2, 2]
mu_x <- mu[1]
mu_y <- mu[2]
# Loop over x_values to plot the conditional distributions
for (x_val in x_values) {
# Conditional mean and variance of Y given X = x_val
cond_mean <- mu_y + sigma_xy / sigma_xx * (x_val - mu_x)
cond_var <- sigma_yy - sigma_xy^2 / sigma_xx
# Generate points for the conditional distribution
y_cond <- seq(-3, 3, length.out = 100)
density_cond <- dnorm(y_cond, mean = cond_mean, sd = sqrt(cond_var))
# Scale the density for better visualization
density_cond <- density_cond / max(density_cond) * 0.5
# Add the conditional distribution as a line to the plot
lines(density_cond + x_val, y_cond, col = "red", lwd = 2)
}
# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values
# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)
# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)
# Compute regression line parameters
beta_1 <- sigma_xy / sigma_xx
beta_0 <- mu_y - beta_1 * mu_x
# Generate points for the regression line
x_reg <- seq(-3, 3, length.out = 100)
y_reg <- beta_0 + beta_1 * x_reg
# Overlay regression line
lines(x_reg, y_reg, col = "purple", lwd = 2, lty = 2)
# Add a legend outside the plot
legend("topright", inset = c(-0.05, 0), legend = c("Eigenvector 1", "Eigenvector 2", "Conditionals", "Regression"),
col = c("blue", "green", "red", "purple"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)