The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Here we apply the penetrance package to simulated families where the data-generating penetrance function is known and illustrate the estimation of the parameters of the Weibull distribution.
The data used for the simulation is available as sim_fam.Rdata
. Recall the parameterization of the Weibull distribution.For the function \(f(x; \alpha, \beta, \delta, \gamma)\) we define:
\[ f(x; \alpha, \beta, \delta, \gamma) = \begin{cases} \gamma \left( \frac{\alpha}{\beta} \left( \frac{x - \delta}{\beta} \right)^{\alpha - 1} e^{-\left( \frac{x - \delta}{\beta} \right)^\alpha } \right), & x \geq \delta \\ 0, & x < \delta \end{cases} \]
And for the cumulative distribution function \(F(x; \alpha, \beta, \delta, \gamma)\):
\[ F(x; \alpha, \beta, \delta, \gamma) = \begin{cases} \gamma \left( 1 - e^{-\left( \frac{x - \delta}{\beta} \right)^\alpha } \right), & x \geq \delta \\ 0, & x < \delta \end{cases} \]
The data-generating penetrance was constructed using the following parameters for the Weibull distribution.
# Create generating_penetrance data frame
<- 1:94
age
# Calculate Weibull distribution for Females
<- 2
alpha <- 50
beta <- 0.6
gamma <- 15
delta <- dweibull(age - delta, alpha, beta) * gamma
penetrance.mod.f
# Calculate Weibull distribution for Males
<- 2
alpha <- 50
beta <- 0.6
gamma <- 30
delta <- dweibull(age - delta, alpha, beta) * gamma
penetrance.mod.m
<- data.frame(
generating_penetrance Age = age,
Female = penetrance.mod.f,
Male = penetrance.mod.m
)
The families were simulated using the PedUtils Rpackage.
<- simulated_families dat
Then we run the estimation using the default settings.
# Set the random seed
set.seed(2024)
# Set the prior
<- list(
prior_params asymptote = list(g1 = 1, g2 = 1),
threshold = list(min = 5, max = 40),
median = list(m1 = 2, m2 = 2),
first_quartile = list(q1 = 6, q2 = 3)
)
# Set the prevalence
<- 0.001
prevMLH1
# We use the default baseline (non-carrier) penetrance
print(baseline_data_default)
# We run the estimation procedure with one chain and 20k iterations
<- penetrance(
out_sim pedigree = dat, twins = NULL, n_chains = 1, n_iter_per_chain = 20000,
ncores = 2, baseline_data = baseline_data_default , prev = prevMLH1,
prior_params = prior_params, burn_in = 0.1, median_max = TRUE,
ageImputation = FALSE, removeProband = FALSE
)
We then plot the respective penetrance curves from the data-generating distribution defined above and the penetrance curves (including credible intervals) from the estimation procedure.
# Function to calculate Weibull cumulative density
<- function(x, alpha, beta, threshold, asymptote) {
weibull_cumulative pweibull(x - threshold, shape = alpha, scale = beta) * asymptote
}
# Function to plot the penetrance and compare with simulated data
<- function(data, generating_penetrance, prob, max_age, sex) {
plot_penetrance_comparison if (prob <= 0 || prob >= 1) {
stop("prob must be between 0 and 1")
}
# Calculate Weibull parameters for the given sex
<- if (sex == "Male") {
params calculate_weibull_parameters(
$median_male_results,
data$first_quartile_male_results,
data$threshold_male_results
data
)else if (sex == "Female") {
} calculate_weibull_parameters(
$median_female_results,
data$first_quartile_female_results,
data$threshold_female_results
data
)else {
} stop("Invalid sex. Please choose 'Male' or 'Female'.")
}
<- params$alpha
alphas <- params$beta
betas <- if (sex == "Male") data$threshold_male_results else data$threshold_female_results
thresholds <- if (sex == "Male") data$asymptote_male_results else data$asymptote_female_results
asymptotes
<- seq(1, max_age)
x_values
# Calculate cumulative densities for the specified sex
<- mapply(function(alpha, beta, threshold, asymptote) {
cumulative_density pweibull(x_values - threshold, shape = alpha, scale = beta) * asymptote
SIMPLIFY = FALSE)
}, alphas, betas, thresholds, asymptotes,
<- matrix(unlist(cumulative_density), nrow = length(x_values), byrow = FALSE)
distributions_matrix <- rowMeans(distributions_matrix, na.rm = TRUE)
mean_density
# Calculate credible intervals
<- (1 - prob) / 2
lower_prob <- 1 - lower_prob
upper_prob <- apply(distributions_matrix, 1, quantile, probs = lower_prob)
lower_ci <- apply(distributions_matrix, 1, quantile, probs = upper_prob)
upper_ci
# Recover the data-generating penetrance
<- cumsum(generating_penetrance[[sex]])
cumulative_generating_penetrance
# Create data frame for plotting
<- seq_along(cumulative_generating_penetrance)
age_values <- min(length(cumulative_generating_penetrance), length(mean_density))
min_length
<- data.frame(
plot_df age = age_values[1:min_length],
cumulative_generating_penetrance = cumulative_generating_penetrance[1:min_length],
mean_density = mean_density[1:min_length],
lower_ci = lower_ci[1:min_length],
upper_ci = upper_ci[1:min_length]
)
# Plot the cumulative densities with credible intervals
<- ggplot(plot_df, aes(x = age)) +
p geom_line(aes(y = cumulative_generating_penetrance, color = "Data-generating penetrance"), linewidth = 1, linetype = "solid", na.rm = TRUE) +
geom_line(aes(y = mean_density, color = "Estimated penetrance"), linewidth = 1, linetype = "dotted", na.rm = TRUE) +
geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci), alpha = 0.2, fill = "red", na.rm = TRUE) +
labs(title = paste("Cumulative Density Comparison for", sex),
x = "Age",
y = "Cumulative Density") +
theme_minimal() +
scale_color_manual(values = c("Data-generating penetrance" = "blue",
"Estimated penetrance" = "red")) +
scale_y_continuous(labels = scales::percent) +
theme(legend.title = element_blank())
print(p)
# Calculate Mean Squared Error (MSE)
<- mean((plot_df$cumulative_generating_penetrance - plot_df$mean_density)^2, na.rm = TRUE)
mse cat("Mean Squared Error (MSE):", mse, "\n")
# Calculate Confidence Interval Coverage
<- mean((plot_df$cumulative_generating_penetrance >= plot_df$lower_ci) &
coverage $cumulative_generating_penetrance <= plot_df$upper_ci), na.rm = TRUE)
(plot_dfcat("Confidence Interval Coverage:", coverage, "\n")
}
# Plot for Female
plot_penetrance_comparison(
data = out_sim$combined_chains,
generating_penetrance = generating_penetrance,
prob = 0.95,
max_age = 94,
sex = "Female"
)
## Mean Squared Error (MSE): 0.002250424
## Confidence Interval Coverage: 0.6702128
# Plot for Male
plot_penetrance_comparison(
data = out_sim$combined_chains,
generating_penetrance = generating_penetrance,
prob = 0.95,
max_age = 94,
sex = "Male"
)
## Mean Squared Error (MSE): 0.000929737
## Confidence Interval Coverage: 1
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.