SVD Algorithm

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

SVD Algorithm

SVD.pls <- function(x, y, n.components = NULL) {
  # Step 1: Center and scale X and Y
  X <- scale(x, center = TRUE, scale = TRUE)
  Y <- scale(y, center = TRUE, scale = TRUE)

  x.mean <- attr(X, "scaled:center")
  x.sd <- attr(X, "scaled:scale")
  y.mean <- attr(Y, "scaled:center")
  y.sd <- attr(Y, "scaled:scale")

  n <- nrow(X)
  p <- ncol(X)
  q <- ncol(Y)

  # Determine number of components
  rank_X <- qr(X)$rank
  if (is.null(n.components)) {
    n.components <- rank_X
  } else {
    n.components <- min(n.components, rank_X)
  }

  # Preallocate matrices
  T <- matrix(0, n, n.components)  # X scores
  U <- matrix(0, n, n.components)  # Y scores
  P_loadings <- matrix(0, p, n.components)  # X loadings
  W <- matrix(0, p, n.components)  # X weights
  Q_loadings <- matrix(0, q, n.components)  # Y loadings (reference)
  B_vector <- numeric(n.components)

  # Initial total sum of squares
  SSX_total <- sum(X^2)
  SSY_total <- sum(Y^2)

  X_explained <- numeric(n.components)
  Y_explained <- numeric(n.components)

  # Store initial X and Y
  E <- X
  F <- Y

  for (h in seq_len(n.components)) {
    # Step 1: Cross-covariance matrix
    R <- t(E) %*% F

    # Step 2: SVD of R
    svd_R <- svd(R)
    w <- svd_R$u[, 1, drop = FALSE]
    c <- svd_R$v[, 1, drop = FALSE]

    # Step 3: Scores
    t <- E %*% w
    t <- t / sqrt(sum(t^2))  # normalize t
    u <- F %*% c

    # Step 4: Loadings
    p <- t(E) %*% t

    # Step 5: Regression scalar b
    b <- drop(t(t) %*% u)

    # Step 6: Deflation
    E <- E - t %*% t(p)
    F <- F - b * t %*% t(c)

    # Store results
    T[, h] <- t
    U[, h] <- u
    P_loadings[, h] <- p
    W[, h] <- w
    Q_loadings[, h] <- c
    B_vector[h] <- b

    # Explained variance
    X_explained[h] <- sum(p^2) / SSX_total * 100
    Y_explained[h] <- (b^2) / SSY_total * 100
  }

  # Cumulative variance explained
  X_cum_explained <- cumsum(X_explained)
  Y_cum_explained <- cumsum(Y_explained)

  # Clean up effective components
  effective_components <- sum(B_vector != 0)

  P_loadings <- P_loadings[, seq_len(effective_components), drop = FALSE]
  Q_loadings <- Q_loadings[, seq_len(effective_components), drop = FALSE]
  W <- W[, seq_len(effective_components), drop = FALSE]
  T <- T[, seq_len(effective_components), drop = FALSE]
  U <- U[, seq_len(effective_components), drop = FALSE]
  B_vector <- B_vector[seq_len(effective_components)]
  X_explained <- X_explained[seq_len(effective_components)]
  Y_explained <- Y_explained[seq_len(effective_components)]
  X_cum_explained <- X_cum_explained[seq_len(effective_components)]
  Y_cum_explained <- Y_cum_explained[seq_len(effective_components)]

  # Normalize C (Y weights)
  C <- apply(Q_loadings, 2, function(c) c / sqrt(sum(c^2)))

  # Pseudo-inverse of P_loadings
  svd_P <- svd(P_loadings)
  d_inv <- ifelse(svd_P$d > .Machine$double.eps, 1 / svd_P$d, 0)
  P_pinv <- t(svd_P$v %*% diag(d_inv) %*% t(svd_P$u))
  P_pinv <- P_pinv[, seq_len(effective_components), drop = FALSE]

  # Final scaled coefficients
  B_scaled <- P_pinv %*% diag(B_vector) %*% t(Q_loadings)

  # Rescale to original units
  B_original <- sweep(B_scaled, 2, y.sd, "*")
  B_original <- sweep(B_original, 1, x.sd, "/")

  rownames(B_original) <- colnames(x)
  colnames(B_original) <- colnames(y)

  intercept <- rep(0, length(y.mean))
  names(intercept) <- colnames(y)

  list(
    T = T,  # X scores
    U = U,  # Y scores
    W = W,  # X weights
    C = C,  # Y weights (normalized)
    P_loadings = P_loadings,  # X loadings (reference)
    Q_loadings = Q_loadings,  # Y loadings (reference)
    B_vector = B_vector,
    coefficients = B_original,
    intercept = intercept,
    X_explained = X_explained,
    Y_explained = Y_explained,
    X_cum_explained = X_cum_explained,
    Y_cum_explained = Y_cum_explained
  )
}

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.