The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(SmoothPLS)
library(pls)
#> Warning: le package 'pls' a été compilé avec la version R 4.4.3
#>
#> Attachement du package : 'pls'
#> L'objet suivant est masqué depuis 'package:stats':
#>
#> loadingsThis notebook show the Smooth PLS algorithm for a multi-states Categorical Functional Data (MS-CFD).
N_states = 3
nind = 100 # number of individuals (train set)
start = 0 # First time
end = 100 # end time
curve_type = 'cat'
TTRatio = 0.2 # Train Test Ratio means we have floor(nind*TTRatio/(1-TTRatio))
NotS_ratio = 0.2 # noise variance over total variance for Y
beta_0_real=65.4321 # Intercept value for the link between X(t) and Y
nbasis = 10 # number of basis functions
norder = 4 # 4 for cubic splines basis
regul_time = seq(start, end, 5) # regularisation time sequence
regul_time_0 = seq(start, end, 1)
int_mode = 1 # in case of integration errors.All the states will share the same basis.
basis = create_bspline_basis(start, end, nbasis, norder)
#basis = fda::create.fourier.basis(c(start,end), nbasis = nbasis)
# All the states will share the same basis.
basis_list = obj_list_creation(N_rep = N_states, obj = basis)
plot(basis, main=paste0(nbasis, " ", basis$type," functions basis"))We have to prepare the data before the FPLS method. For each state, we build its indicator function.
df_processed = cat_data_to_indicator(df_cfd)
length(df_processed)
#> [1] 3
names(df_processed)
#> [1] "state_1" "state_2" "state_3"head(df_processed$state_3, 20)
#> id time state
#> 1 1 0.0000000 1
#> 2 1 0.4182081 0
#> 3 1 1.5448560 1
#> 4 1 5.7064580 0
#> 6 1 44.2917255 1
#> 7 1 49.3986723 0
#> 12 1 94.5885722 1
#> 13 1 100.0000000 1
#> 14 2 0.0000000 0
#> 16 2 23.5972614 1
#> 17 2 35.2188747 0
#> 19 2 62.3399729 1
#> 20 2 69.0495525 0
#> 23 2 100.0000000 0
#> 24 3 0.0000000 1
#> 25 3 7.7964939 0
#> 27 3 15.5383881 1
#> 28 3 26.7390645 0
#> 29 3 38.7901396 1
#> 30 3 60.9841073 0for(i in 1:length(beta_func_list)){
plot(0:end, beta_func_list[[i]](0:end, end_time = end),
ylab=paste0("Beta(t) n°=", i), type = 'l')
title(paste0("Beta(t) n°=", i))
}Y generation is based on the following equation : $Y = 0 + {i=1}^K _0^T _i(t) ind_i(t) dt $ with \(ind_i(t) = \{0, 1\}_{t \in [0, T]}\) the indicator function of the state \(i\).
We link \(\beta_i\) with the \(state_i\).
Y_df = generate_Y_df(df = df_processed, curve_type = curve_type,
beta_real_func_or_list = beta_func_list,
beta_0_real = beta_0_real,
NotS_ratio = NotS_ratio)
Y = Y_df$Y_noised
names(Y_df)
#> [1] "id" "Y_beta1" "Y_beta2" "Y_beta3" "Y_real" "Y_noised"head(Y_df)
#> id Y_beta1 Y_beta2 Y_beta3 Y_real Y_noised
#> 1 1 65.806580 -8.252748 -7.4320011 115.55393 117.36995
#> 2 2 26.027793 3.651140 0.2430665 95.35410 98.68405
#> 3 3 18.239666 -6.248977 -19.0326323 58.39016 102.56355
#> 4 4 5.205643 22.113474 -19.5805494 73.17067 85.04207
#> 5 5 -12.639639 38.985054 -9.9986223 81.77889 49.19584
#> 6 6 -13.391616 -20.803539 -20.0241998 11.21275 -6.47791naivePLS_obj = naivePLS(df_list = df_cfd, Y = Y, regul_time_obj = regul_time,
curve_type_obj = 'cat',
id_col_obj = 'id', time_col_obj = 'time',
print_steps = TRUE, plot_rmsep = TRUE,
print_nbComp = TRUE,plot_reg_curves = TRUE)
#> ### Naive PLS ###
#> => Input format assertions.
#> => Input format assertions OK.
#> => Data formatting.
#> => PLS model.#> [1] "Optimal number of PLS components : 2"
fpls_obj = funcPLS(df_list = df_cfd, Y = Y_df$Y_noised,
basis_obj = basis,
curve_type_obj = 'cat',
regul_time_obj = regul_time,
id_col_obj = 'id', time_col_obj = 'time',
print_steps = TRUE, plot_rmsep = TRUE,
print_nbComp = TRUE, plot_reg_curves = TRUE)
#> ### Functional PLS ###
#> => Input format assertions.
#> => Input format assertions OK.
#> => Building alpha matrix.
#> => Building curve names.
#> ==> Evaluating alpha for : CatFD_1_1.
#> ==> Evaluating alpha for : CatFD_1_2.
#> ==> Evaluating alpha for : CatFD_1_3.
#> => Evaluate metrix and root_metric.
#> => plsr(Y ~ trans_alphas).#> Optimal number of PLS components : 2 .
#> => Build Intercept and regression curves for optimal number of components.
#> ==> Build regression curve for : CatFD_1_1
#> ==> Build regression curve for : CatFD_1_2
#> ==> Build regression curve for : CatFD_1_3
spls_obj = smoothPLS(df_list = df_cfd, Y = Y_df$Y_noised, basis_obj = basis,
orth_obj = TRUE, curve_type_obj = 'cat',
int_mode = 1,
id_col_obj ='id', time_col_obj = 'time',
print_steps = TRUE, plot_rmsep = TRUE,
print_nbComp = TRUE, plot_reg_curves = TRUE)
#> ### Smooth PLS ###
#> ## Use parallelization in case of heavy computational load. ##
#> ## Threshold can be manualy adjusted : (default 2500) ##
#> ## >options(SmoothPLS.parallel_threshold = 500) ##
#> => Input format assertions.
#> => Input format assertions OK.
#> => Orthonormalize basis.
#> => Data objects formatting.
#> => Evaluate Lambda matrix.
#> ==> Lambda for : CatFD_1_state_1.
#> ==> Lambda for : CatFD_1_state_2.
#> ==> Lambda for : CatFD_1_state_3.
#> => PLSR model.
#> => Optimal number of PLS components : 2#> => Evaluate SmoothPLS functions and <w_i, p_j> coef.
#> => Build regression functions and intercept.
#> ==> Build regression curve for : CatFD_1_state_1
#> ==> Build regression curve for : CatFD_1_state_2
#> ==> Build regression curve for : CatFD_1_state_3
# Warning ms_spls_obj$delta_ms_list[[1]] is the intercept!
cat("curve_1 : smooth PLS regression curve.\n")
#> curve_1 : smooth PLS regression curve.
cat("curve_2 : functional PLS regression curve.\n")
#> curve_2 : functional PLS regression curve.
cat("curve_3 : naive PLS regression coefficients\n")
#> curve_3 : naive PLS regression coefficients
for(i in 1:N_states){
start = 0
print(paste0("State_", (i)))
evaluate_curves_distances(real_f = beta_func_list[[i]],
regul_time = regul_time,
fun_fd_list = list(spls_obj$reg_obj[[i+1]],
fpls_obj$reg_obj[[i+1]],
approxfun(
x = regul_time,
y = naivePLS_obj$opti_reg_coef[
start:(start+length(regul_time)
)])
)
)
start = start + length(regul_time)
}
#> [1] "State_1"
#> [1] "real_f -> curve_1 / INPROD : 47.4572000558795 / DIST : 13.2905098633588"
#> [1] "real_f -> curve_2 / INPROD : 43.6591352214833 / DIST : 13.6320162598076"
#> [1] "real_f -> curve_3 / INPROD : 13.4690073074092 / DIST : 27.8941820901434"
#> [1] "State_2"
#> [1] "real_f -> curve_1 / INPROD : 22.9684162701822 / DIST : 14.1561671305604"
#> [1] "real_f -> curve_2 / INPROD : 25.7051668331251 / DIST : 15.742963397086"
#> [1] "real_f -> curve_3 / INPROD : 31.4665194389073 / DIST : 38.1269198992419"
#> [1] "State_3"
#> [1] "real_f -> curve_1 / INPROD : 25.9590236019394 / DIST : 15.1535445344829"
#> [1] "real_f -> curve_2 / INPROD : 27.0203400870638 / DIST : 11.7454038364192"
#> [1] "real_f -> curve_3 / INPROD : -64.0495576976778 / DIST : 37.3104708298011"for(i in 1:N_states){
start = 0
y_lim = eval_max_min_y(f_list = list(spls_obj$reg_ob[[i+1]],
fpls_obj$reg_ob[[i+1]],
approxfun(
x = regul_time,
y = naivePLS_obj$opti_reg_coef[
start:(start+length(regul_time))]),
beta_func_list[[i]]
),
regul_time = regul_time_0)
plot(regul_time_0, beta_func_list[[i]](regul_time_0), col = 'black',
ylim = y_lim, xlab = 'Time', ylab = 'Value', type = 'l')
lines(regul_time_0, approxfun(x = regul_time,
y = naivePLS_obj$opti_reg_coef[
start:(start+
length(regul_time))])(regul_time_0),
col = 'green')
title(paste0(names(spls_obj$reg_obj)[i+1], " regression curves"))
plot(spls_obj$reg_obj[[i+1]], col = 'blue', add = TRUE)
plot(fpls_obj$reg_obj[[i+1]], col = 'red', add = TRUE)
legend("topleft",
legend = c("Real curve", "NaivePLS coef",
"SmoothPLS reg curve", "FunctionalPLS reg curve"),
col = c("black", "green", "blue", "red"),
lty = 1,
lwd = 1)
start = start + length(regul_time)
}train_results = data.frame(matrix(ncol = 5, nrow = 3))
colnames(train_results) = c("PRESS", "RMSE", "MAE", "R2", "var_Y")
rownames(train_results) = c("NaivePLS", "FPLS", "SmoothPLS")
test_results = train_resultsY_train = Y_df$Y_noised
# Naive
Y_hat = predict(naivePLS_obj$plsr_model,
ncomp = naivePLS_obj$nbCP_opti,
newdata = naivePLS_obj$plsr_model$model$`as.matrix(df_mod_wide)`)
train_results["NaivePLS", ] = evaluate_results(Y_train, Y_hat)
# FPLS
Y_hat_fpls = (predict(fpls_obj$plsr_model, ncomp = fpls_obj$nbCP_opti,
newdata = fpls_obj$trans_alphas)
+ fpls_obj$reg_obj$Intercept
+ mean(Y))
Y_hat_fpls = smoothPLS_predict(df_predict_list = df_cfd,
delta_list = fpls_obj$reg_obj,
curve_type_obj = curve_type,
int_mode = int_mode,
regul_time_obj = regul_time)
train_results["FPLS", ] = evaluate_results(Y_train, Y_hat_fpls)
# Smooth PLS
Y_hat_spls = smoothPLS_predict(df_predict_list = df_cfd,
delta_list = spls_obj$reg_obj,
curve_type_obj = curve_type,
int_mode = int_mode,
regul_time_obj = regul_time)
train_results["SmoothPLS", ] = evaluate_results(Y_train, Y_hat_spls)
train_results["NaivePLS", "nb_cp"] = naivePLS_obj$nbCP_opti
train_results["FPLS", "nb_cp"] = fpls_obj$nbCP_opti
train_results["SmoothPLS", "nb_cp"] = spls_obj$nbCP_optiY_test = Y_df_test$Y_noised
# Naive
df_test_wide = naivePLS_formatting(df_list = df_test,
regul_time_obj = regul_time,
curve_type_obj = curve_type,
id_col_obj = 'id', time_col_obj = 'time')
Y_hat = predict(naivePLS_obj$plsr_model,
ncomp = naivePLS_obj$nbCP_opti,
newdata = as.matrix(df_test_wide))
test_results["NaivePLS", ] = evaluate_results(Y_test, Y_hat)
# FPLS
Y_hat_fpls = smoothPLS_predict(df_predict_list = df_test,
delta_list = fpls_obj$reg_obj,
curve_type_obj = curve_type,
int_mode = int_mode,
regul_time_obj = regul_time)
test_results["FPLS", ] = evaluate_results(Y_test, Y_hat_fpls)
# Smooth PLS
Y_hat_spls = smoothPLS_predict(df_predict_list = df_test,
delta_list = spls_obj$reg_obj,
curve_type_obj = curve_type,
int_mode = int_mode,
regul_time_obj = regul_time)
test_results["SmoothPLS", ] = evaluate_results(Y_test, Y_hat_spls)
test_results["NaivePLS", "nb_cp"] = naivePLS_obj$nbCP_opti
test_results["FPLS", "nb_cp"] = fpls_obj$nbCP_opti
test_results["SmoothPLS", "nb_cp"] = spls_obj$nbCP_optitrain_results
#> PRESS RMSE MAE R2 var_Y nb_cp
#> NaivePLS 56851.26 23.84350 18.74728 0.8052915 80.52915 2
#> FPLS 51006.48 22.58462 17.48461 0.8253091 77.95544 2
#> SmoothPLS 54449.52 23.33442 18.54678 0.8135171 111.55514 2
test_results
#> PRESS RMSE MAE R2 var_Y nb_cp
#> NaivePLS 12668.95 22.51128 17.71806 0.7918992 96.46796 2
#> FPLS 13999.53 23.66392 17.75687 0.7700429 91.67738 2
#> SmoothPLS 17130.76 26.17691 20.60632 0.7186092 135.21697 2These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.