##Initial Setup
Only run the first time you load flimo. Installing Julia packages.
#julia_setup()
##Example 1 : Poisson Distribution
Five Poisson variables with parameter = 100 are drawn. We try to find this value by comparing mean of 10 simulated Poisson variables with the observed data. The summary statistic is :
\[sumstats(\theta) =\left( \widehat{\mathbb{E}[X|\theta]}-\overline{X^{data}}\right)^2=\left(\frac{1}{n_{sim}}\sum_{i=1}^{n_{sim}}X_i^\theta - \frac{1}{n_{data}}\sum_{i=1}^{n_{data}}X_i^{data}\right)^2\]
With the Normal approximation, Poisson distribution is replaced by a Normal distribution with same mean and variance :
\[\mathcal{P}(\theta) \leftarrow \mathcal{N}(\mu = \theta, \sigma^2 = \theta)\]
#Setup
set.seed(1)
#Create data
<- 100 #data parameter
Theta_true1 <- 5 #data size
n1
<- function(Theta, n){
simulator1 #classical random simulator
rpois(n, lambda = Theta)
}
<- simulator1(Theta_true1, n1)
data1
#Simulations with quantiles
#See README to know how to build this simulator
<- n1 #number of random draws for one simulation
ndraw1
check_simulator(simulator1, ndraw1, 0, 200)
#> [1] FALSE
<- function(Theta, quantiles){
simulatorQ1 qpois(quantiles, lambda = Theta)
}check_simulator(simulatorQ1, ndraw1, 0, 200)
#> [1] TRUE
#With Normal approximation
<- function(Theta, quantiles){
simulatorQ1N qnorm(quantiles, mean = Theta, sd = sqrt(Theta))
}
check_simulator(simulatorQ1N, ndraw1, 0, 200)
#> [1] TRUE
#Quantile-simulator with Normal approximation
#First simulations
<- 50
Theta11 <- 200
Theta21
<- 10
nsim1
<- matrix(runif(ndraw1*nsim1), nrow = nsim1)
quantiles1
#just one simulation
simulatorQ1(Theta11, quantiles1[1,])
#> [1] 49 43 52 45 49
#Same value :
simulatorQ1(Theta11, quantiles1[1,])
#> [1] 49 43 52 45 49
#independent values :
simulatorQ1(Theta11, quantiles1[2,])
#> [1] 43 55 61 54 50
#Matrix of nsim simulations
<- simulatorQ1(Theta11, quantiles1)
simu11 <- simulatorQ1(Theta21, quantiles1)
simu21
#Sample Comparison : summary statistics
<- function(simulations, data){
sumstats1 #simulations : 2D array
#data : 1D array
<- mean(rowMeans(simulations))
mean_simu <- mean(data)
mean_data -mean_data)^2
(mean_simu }
#Plot objective function
#Objective by parameter :
plot_objective(ndraw1, nsim1, data1, sumstats1, simulatorQ1, lower = 0, upper = 200)
#Objective with Normal approximation :
plot_objective(ndraw1, nsim1, data1, sumstats1, simulatorQ1N, lower = 0, upper = 200)
#both plots
#We use same quantiles for both
<- matrix(runif(ndraw1*nsim1), nrow = nsim1)
quantiles1
<- plot_objective(NULL, NULL, data1, sumstats1, simulatorQ1,
p lower = 0, upper = 200, quantiles = quantiles1)
plot_objective(NULL, NULL,
data1, sumstats1, simulatorQ1N,lower = 0, upper = 200,
visualize_min = FALSE,
add_to_plot = p, quantiles = quantiles1)
#Locally, Poisson quantiles and then objective function are constant by pieces
#To compare with normal approximation
<- plot_objective(NULL, NULL, data1, sumstats1,
p lower = 71, upper = 72,
simulatorQ1, visualize_min = FALSE, quantiles = quantiles1)
plot_objective(ndraw1, nsim1, data1, sumstats1, simulatorQ1N,
lower = 71, upper = 72, visualize_min = FALSE, add_to_plot = p,
quantiles = quantiles1)
<- 5
nsimplot <- matrix(runif(ndraw1*nsimplot), nrow = nsimplot)
quantilesplot
<- function(Theta) flimobjective(Theta, quantilesplot, data1, sumstats1, simulatorQ1)
intern_obj
<- function(Theta) flimobjective(Theta, quantilesplot, data1, sumstats1, simulatorQ1N)
intern_objN
<- function(Theta){
intern_objRand <- rpois(nsim1, Theta)
sim mean(sim)-mean(data1))^2
(
}
<- c(seq(0, 200, length.out = 1e4), seq(70.5, 72.5, length.out = 1e4))
x <- sapply(x, intern_obj)
y <- sapply(x, intern_objN)
yN
<- seq(0, 200, length.out = 2e3)
xR <- sapply(xR, intern_objRand)
yR
<- data.frame(x = x, y = y, Method = "Fixed Landscape")
df <- rbind(df, data.frame(x = x, y = yR, Method = "Naive computation"))
df <- rbind(df, data.frame(x = x, y = yN, Method = "Fixed Landscape\nwith Normal Approximation"))
df
ggplot()+
geom_line(aes(xR,yR), color = "grey")+
geom_line(aes(x,y), color = "blue")+
ggtitle("Inference for a Poisson distribution : value of objective")+
labs(x = "Theta", y = "J(Theta)")+
theme(legend.position='none',
plot.title = element_text(hjust = 0.5))
# ggsave("../../manuscript/Figures/poisson.png", dpi = 350, units = "mm", width = 180, height = 150)
<- 71
x1 <- x1+1
x2 <- x[which(x<=x1)[length(which(x<=x1))]]
xin <- max(y[which(x<=x1)[length(which(x<=x1))]], yN[which(x<=x1)[length(which(x<=x1))]])
yax <- x[which(x>=x2)[1]]
xax <- min(y[which(x>=x2)[1]], yN[which(x>=x2)[1]])
yin
ggplot()+
geom_line(aes(x,y), color = "blue")+
geom_line(aes(x,yN), color = "red")+
ggtitle("Inference for a Poisson distribution : value of objective")+
labs(x = "Theta", y = "J(Theta)")+
theme(legend.position='none',
plot.title = element_text(hjust = 0.5))+
coord_cartesian(xlim = c(xin, xax), ylim = c(yin, yax))
# ggsave("../../manuscript/Figures/poisson_zoom.png", dpi = 350, units = "mm", width = 180, height = 150)
Warning : optimization issues in R for normalized process and Theta close to 0. Lower bound set to 1.
#Optimization with normal approximation of Poisson distribution
#First mode : full R
#default mode
system.time(optim1R <- flimoptim(data1, ndraw1, sumstats1, simulatorQ1N,
ninfer = 10,
nsim = nsim1,
lower = 1, upper = 1000,
randomTheta0 = TRUE))
#> utilisateur système écoulé
#> 0.019 0.001 0.020
optim1R#> Optimisation Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 101.105782785824
#> Best minimizer :
#> 101.358828149557
#> Reached minima : from 3.79036936450385e-24 to 1.49222794632798e-19
#> Median time by inference 0.00167191028594971
#> Optimisation Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 101.105782785824
#> Best minimizer :
#> 101.358828149557
#> Reached minima : from 3.79036936450385e-24 to 1.49222794632798e-19
#> Median time by inference 0.00167191028594971
summary(optim1R)
#> $Mode
#> [1] "R"
#>
#> $method
#> [1] "L-BFGS-B"
#>
#> $number_inferences
#> [1] 10
#>
#> $number_converged
#> [1] 10
#>
#> $minimizer
#> par1
#> Min. : 99.11
#> 1st Qu.: 99.95
#> Median :101.52
#> Mean :101.11
#> 3rd Qu.:102.01
#> Max. :102.62
#>
#> $minimum
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 3.790e-24 2.638e-22 2.882e-22 1.559e-20 6.127e-22 1.492e-19
#>
#> $median_time_inference
#> [1] 0.00167191
attributes(optim1R)
#> $names
#> [1] "mode" "method" "AD" "minimizer" "minimum" "converged"
#> [7] "initial_x" "f_calls" "g_calls" "message" "time_run"
#>
#> $class
#> [1] "flimo_result"
plot(optim1R)
#Second mode : full J
#Optimization with Automatic Differentiation
#Warning : you need to translate sumstats and simulatorQ to Julia
#Both of these names for the Julia functions are mandatory !
<-"
julia_simulatorQ1N function simulatorQ(Theta, quantiles)
quantile.(Normal.(Theta, sqrt.(Theta)), quantiles)
end
"
<-"
julia_sumstats1 function sumstats(simulations, data)
(mean(mean(simulations, dims = 2))-mean(data))^2
end
"
#Most accurate config for complex problems : IPNewton with AD
if (juliaSetupOk()){
system.time(optim1JAD <- flimoptim(data1, ndraw1, julia_sumstats1, julia_simulatorQ1N,
ninfer = 10,
nsim = nsim1,
lower = 0,
upper = 1000,
randomTheta0 = TRUE,
mode = "Julia",
load_julia = TRUE))
optim1JADsummary(optim1JAD)
attributes(optim1JAD)
plot(optim1JAD)
#IPNewton without AD
system.time(optim1J <- flimoptim(data1, ndraw1, julia_sumstats1, julia_simulatorQ1N,
ninfer = 10, nsim = nsim1, lower = 0, upper = 1000,
randomTheta0 = TRUE, mode = "Julia", AD = FALSE))
optim1Jsummary(optim1J)
attributes(optim1J)
plot(optim1J)
#Brent
system.time(
<- flimoptim(data1,
optim1JBrent
ndraw1,
julia_sumstats1,
julia_simulatorQ1N,ninfer = 10,
nsim = nsim1,
lower = 0,
upper = 1000,
randomTheta0 = TRUE,
mode = "Julia",
method = "Brent")
)
optim1JBrentsummary(optim1JBrent)
attributes(optim1JBrent)
plot(optim1JBrent)
}#> Starting Julia ...
##Example 2 : Normal Distribution
Five normal variables with mean = 0 and sd = 1 are drawn. We try to find these mean/sd values by comparing mean and sd of 10 simulated normal variables with the observed data. The summary statistic is :
\[sumstats(\theta) =\left( \widehat{\mathbb{E}[X|\theta]}-\overline{X^{data}}\right)^2 + \left( \widehat{\sigma(X|\theta)}-\sigma(X^{data})\right)^2\]
#Setup
set.seed(1)
#Create data
<- c(3, 2) #data parameter
Theta_true2 <- 5 #data size
n2
<- function(Theta, n){
simulator2 #classical random simulator
rnorm(n, mean = Theta[1], sd = Theta[2])
}
<- simulator2(Theta_true2, n2)
data2
#Simulations with quantiles
#See README to know how to build this simulator
<- function(Theta, quantiles){
simulatorQ2 qnorm(quantiles, mean = Theta[1], sd = Theta[2])
}
<- 5
ndraw2
check_simulator(simulatorQ2, ndraw2, c(0, 0), c(10, 10))
#> [1] TRUE
check_simulator(simulator2, ndraw2, c(0, 0), c(10, 10))
#> [1] FALSE
<-function(simulations, data){
sumstats2 <- mean(rowMeans(simulations))
mean_simu <- mean(data)
mean_data <-mean(apply(simulations, 1, sd))
sd_simu <- sd(data)
sd_data -mean_data)^2+(sd_simu-sd_data)^2
(mean_simu
}
<- 10 nsim2
plot_objective(ndraw2, nsim2, data2, sumstats2, simulatorQ2, index = 1, other_param = c(1, 2, 10),
lower = -5, upper = 10)
#Optimization
#First mode : full R
#default mode
<- flimoptim(data2, ndraw2, sumstats2, simulatorQ2,
optim2R ninfer = 10, nsim = nsim2,
lower = c(-5, 0), upper = c(10, 10),
randomTheta0 = TRUE)
optim2R#> Optimisation Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 3.20567648680977 2.11520187608635
#> Best minimizer :
#> 3.14574674163785 1.97643436033205
#> Reached minima : from 0 to 3.8290401805314e-14
#> Median time by inference 0.00823163986206055
#> Optimisation Result
#> Mode : R
#> method : L-BFGS-B
#> Number of inferences : 10
#> Convergence : 10/10
#> Mean of minimizer :
#> 3.20567648680977 2.11520187608635
#> Best minimizer :
#> 3.14574674163785 1.97643436033205
#> Reached minima : from 0 to 3.8290401805314e-14
#> Median time by inference 0.00823163986206055
summary(optim2R)
#> $Mode
#> [1] "R"
#>
#> $method
#> [1] "L-BFGS-B"
#>
#> $number_inferences
#> [1] 10
#>
#> $number_converged
#> [1] 10
#>
#> $minimizer
#> par1 par2
#> Min. :2.975 Min. :1.681
#> 1st Qu.:3.044 1st Qu.:1.948
#> Median :3.130 Median :2.116
#> Mean :3.206 Mean :2.115
#> 3rd Qu.:3.211 3rd Qu.:2.276
#> Max. :3.820 Max. :2.536
#>
#> $minimum
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.000e+00 0.000e+00 0.000e+00 3.830e-15 1.000e-18 3.829e-14
#>
#> $median_time_inference
#> [1] 0.00823164
plot(optim2R, pairwise_par = TRUE, hist = TRUE, par_minimum = TRUE)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Warning: Transformation introduced infinite values in continuous y-axis
#Second mode : full Julia
#Optimization with Automatic Differentiation
#Warning : you need to translate sumstats and simulatorQ to Julia
#Both of these names for the Julia functions are mandatory !
if (juliaSetupOk()){
<-"
julia_simulatorQ2 function simulatorQ(Theta, quantiles)
quantile.(Normal.(Theta[1], Theta[2]), quantiles)
end
"
<-"
julia_sumstats2 function sumstats(simulations, data)
(mean(mean(simulations, dims = 2))-mean(data))^2+
(mean(std(simulations, dims = 2))-std(data))^2
end
"
#Most accurate config for complex problems : IPNewton with AD
<- flimoptim(data2, ndraw2, julia_sumstats2, julia_simulatorQ2,
optim2JAD ninfer = 10, nsim = nsim2,
lower = c(-5, 0), upper = c(10, 10),
randomTheta0 = TRUE,
mode = "Julia")
optim2JADsummary(optim2JAD)
plot(optim2JAD)
#IPNewton without AD
<- flimoptim(data2, ndraw2, julia_sumstats2, julia_simulatorQ2,
optim2J ninfer = 10, nsim = nsim2,
lower = c(-5, 0), upper = c(10, 10),
randomTheta0 = TRUE,
mode = "Julia")
optim2Jsummary(optim2J)
plot(optim2J)
}