Multivariate Normal Distribution Contours

 

library(MASS)
library(mvtnorm)

# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)

# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)

# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))

# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
     xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)

# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values

# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)

# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)

# Add a legend outside the plot
legend("topright", inset = c(0, 0), legend = c("Eigenvector 1", "Eigenvector 2"),
       col = c("blue", "green"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)

 

Regression:

Regressing \(X_{2}\) onto \(X_1\), the line of best fit is given by:

\[ X_{2\cdot 1} = X_2 - \Sigma_{21}\Sigma_{11}^{-1}X_1. \]

 

# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)

# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)

# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))

# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
     xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)

# Define the values of x for the conditional distributions
x_values <- c(-2, 0, 2)

# Conditional distribution parameters
sigma_xx <- sigma[1, 1]
sigma_xy <- sigma[1, 2]
sigma_yy <- sigma[2, 2]
mu_x <- mu[1]
mu_y <- mu[2]

# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values

# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)

# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)

# Compute regression line parameters
beta_1 <- sigma_xy / sigma_xx
beta_0 <- mu_y - beta_1 * mu_x

# Generate points for the regression line
x_reg <- seq(-3, 3, length.out = 100)
y_reg <- beta_0 + beta_1 * x_reg

# Overlay regression line
lines(x_reg, y_reg, col = "purple", lwd = 2, lty = 2)

# Add a legend outside the plot
legend("topright", inset = c(-0.05, 0), legend = c("Eigenvector 1", "Eigenvector 2", "Regression"),
       col = c("blue", "green", "purple"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)

 

Conditional distributions:

The variance of the conditional distribution \(X_2\,|\,X_1\) does not depend on the value of \(X_1\):

\[ {\rm Var}(X_2\,|\,X_1) = \Sigma_{22} - \Sigma_{21}\Sigma_{11}^{-1}\Sigma_{12}. \]

 

# Define parameters for the bivariate normal distribution
mu <- c(0, 0) # Mean vector
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow = 2) # Covariance matrix (not diagonal)

# Generate grid points for the contours
x <- seq(-3, 3, length.out = 100)
y <- seq(-3, 3, length.out = 100)
grid <- expand.grid(x = x, y = y)

# Compute density values for the contours
z <- mapply(function(x, y) dmvnorm(c(x, y), mean = mu, sigma = sigma), grid$x, grid$y)
z_matrix <- matrix(z, nrow = length(x), ncol = length(y))

# Plot contours with a 1:1 aspect ratio
plot(1, type = "n", xlab = "X_1", ylab = "X_2", main = "Bivariate Normal Distribution",
     xlim = c(-3, 3), ylim = c(-3, 3), asp = 1)
contour(x, y, z_matrix, add = TRUE)

# Define the values of x for the conditional distributions
x_values <- c(-2, 0, 2)

# Conditional distribution parameters
sigma_xx <- sigma[1, 1]
sigma_xy <- sigma[1, 2]
sigma_yy <- sigma[2, 2]
mu_x <- mu[1]
mu_y <- mu[2]

# Loop over x_values to plot the conditional distributions
for (x_val in x_values) {
  # Conditional mean and variance of Y given X = x_val
  cond_mean <- mu_y + sigma_xy / sigma_xx * (x_val - mu_x)
  cond_var <- sigma_yy - sigma_xy^2 / sigma_xx
  
  # Generate points for the conditional distribution
  y_cond <- seq(-3, 3, length.out = 100)
  density_cond <- dnorm(y_cond, mean = cond_mean, sd = sqrt(cond_var))
  
  # Scale the density for better visualization
  density_cond <- density_cond / max(density_cond) * 0.5
  
  # Add the conditional distribution as a line to the plot
  lines(density_cond + x_val, y_cond, col = "red", lwd = 2)
}

# Compute eigenvectors and eigenvalues of the covariance matrix
eig <- eigen(sigma)
eig_vectors <- eig$vectors
eig_values <- eig$values

# Scale eigenvectors for visualization
scale_factor <- sqrt(eig_values) * 2
eig_scaled <- t(t(eig_vectors) * scale_factor)

# Overlay eigenvectors as arrows
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 1], mu[2] + eig_scaled[2, 1], col = "blue", lwd = 2, length = 0.1)
arrows(mu[1], mu[2], mu[1] + eig_scaled[1, 2], mu[2] + eig_scaled[2, 2], col = "green", lwd = 2, length = 0.1)

# Compute regression line parameters
beta_1 <- sigma_xy / sigma_xx
beta_0 <- mu_y - beta_1 * mu_x

# Generate points for the regression line
x_reg <- seq(-3, 3, length.out = 100)
y_reg <- beta_0 + beta_1 * x_reg

# Overlay regression line
lines(x_reg, y_reg, col = "purple", lwd = 2, lty = 2)

# Add a legend outside the plot
legend("topright", inset = c(-0.05, 0), legend = c("Eigenvector 1", "Eigenvector 2", "Conditionals", "Regression"),
       col = c("blue", "green", "red", "purple"), lty = c(1, 1, 1, 2), lwd = 2, xpd = TRUE)