diff --git a/apps/mcmc/Main.hs b/apps/mcmc/Main.hs new file mode 100644 index 0000000000000000000000000000000000000000..4ea27008882f40cb3e05c807ec94ff410bdcf233 --- /dev/null +++ b/apps/mcmc/Main.hs @@ -0,0 +1,100 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE RecordWildCards #-} + +module Main where + +import BDSCOD.Llhd (initLlhdState, llhdAndNB) +import BDSCOD.Types (Observation) +import BDSCOD.Types (LogLikelihood (..), + MCMCConfiguration (..), MWCSeed, + NegativeBinomial (..), NumLineages, + Observation (..), ObservedEvent (..), + PDESolution (..), Parameters (..), + packParameters, putLambda, putMu, + putNus, putOmega, putPsi, putRhos, + scheduledTimes, unpackParameters) +import qualified Data.Aeson as Json +import Data.Either.Combinators (fromRight, maybeToRight) +import Data.List (intersperse) +import Data.Maybe (fromJust) +import qualified Data.Vector.Unboxed as Unboxed +import Epidemic.Types.Parameter (AbsoluteTime (..)) +import GHC.Generics (Generic) +import GHC.Word (Word32 (..)) +import Numeric.MCMC.Metropolis (Chain (..), chain') +import System.Environment (getArgs) +import System.Random.MWC (initialize) + + + + + +data MCMCInput = MI { mcmcObservations :: [Observation] + , mcmcNumSamples :: Int + , mcmcSampleCSV :: FilePath + , mcmcStepSD :: Double + , mcmcInit :: [Double] + , mcmcSeed :: [Word32] + , mcmcParameterisation :: String + , mcmcKnownMu :: Maybe Double + , mcmcSimDuration :: Maybe Double + , mcmcPrior :: String } deriving (Show, Generic) + +instance Json.FromJSON MCMCInput + +main :: IO () +main = do + configFile <- head <$> getArgs + eitherMI <- Json.eitherDecodeFileStrict configFile :: IO (Either String MCMCInput) + case eitherMI of + Right MI {..} -> + do putStrLn $ "read MCMC configuration from " ++ configFile + + -- report some of the details of the mcmc so you can tell if the + -- configuration has been read correctly + putStrLn $ + "\n\tprior: WARNING NOT IMPLEMENTED" ++ + "\n\tparameterisation name: " ++ mcmcParameterisation ++ + "\n\toutput file: " ++ mcmcSampleCSV ++ + "\n\tnumber observations: " ++ show (length mcmcObservations) ++ + "\n\tnumber samples: " ++ show mcmcNumSamples + + -- construct the target density and a summary function if necessary. + let asParam = paramConstructor mcmcParameterisation mcmcKnownMu mcmcSimDuration + llhd x = fromRight (-1e6) $ fst <$> do params <- maybeToRight mempty $ asParam x + llhdAndNB mcmcObservations params initLlhdState + -- TODO Implement the prior functionality + prior :: [Double] -> Double + prior = undefined + target x = llhd x + tunable = Nothing + + -- run the sampler and write the results to file. + gen <- initialize $ Unboxed.fromList mcmcSeed + ch <- chain' mcmcNumSamples mcmcStepSD mcmcInit target tunable gen + writeChainToCSV ch mcmcSampleCSV + Left msg -> + do putStrLn $ "failed to read MCMC configuration from " ++ configFile + putStrLn msg + +-- | A function for constructing parameters that can be used by the likelihood. +paramConstructor :: String -> Maybe Double -> Maybe Double -> ([Double] -> Maybe Parameters) +paramConstructor p maybeMu maybeDuration + | p == "identity-mu1-lambda-psi-noRho-omega-noNu" = + \[λ, ψ, ω] -> Just $ packParameters (λ, 0.026, ψ, [], ω, []) + | p == "identity-muKnown-lambda-psi-noRho-omega-noNu" = + let μ = fromJust maybeMu + in \[λ, ψ, ω] -> Just $ packParameters (λ, μ, ψ, [], ω, []) + | p == "identity-muKnown-lambda-psi-rhoAtDuration-omega-noNu" = + let μ = fromJust maybeMu + dur = fromJust maybeDuration + in \[λ, ψ, Ï, ω] -> if minimum [λ, ψ, Ï, ω] > 0.0 + then Just $ packParameters (λ, μ, ψ, [(AbsoluteTime dur, Ï)], ω, []) + else Nothing + | otherwise = error $ "do not recognise parameterisation: " ++ p + +-- | Write the samples to file. +writeChainToCSV :: [Chain [Double] b] -> FilePath -> IO () +writeChainToCSV samples csv = writeFile csv $ samples2CSV samples ++ "\n" + where samples2CSV = mconcat . intersperse "\n" . map sample2Row + sample2Row s = mconcat . intersperse "," . map show $ (chainScore s) : (chainPosition s) diff --git a/bdscod.cabal b/bdscod.cabal index 27e9f3a84cd148b4cc1c30d347085aa2be488bb7..a32a14a364a98191754f2d1ececee9e71a2488d4 100644 --- a/bdscod.cabal +++ b/bdscod.cabal @@ -27,7 +27,7 @@ library src build-depends: aeson - , either + , either >= 5.0 , generic-random , QuickCheck >= 2.13.2 , base >= 4.7 && < 5 @@ -38,10 +38,29 @@ library , hspec-core >= 2.7.4 , math-functions , statistics - , vector + , vector >= 0.12 default-language: Haskell2010 ghc-options: -fwarn-tabs -Wmissing-signatures -O2 -fwarn-incomplete-patterns +executable mcmc + main-is: Main.hs + other-modules: + Paths_bdscod + hs-source-dirs: + apps/mcmc + ghc-options: -O2 + + build-depends: + base + , aeson + , bdscod + , either + , epi-sim + , mighty-metropolis >= 2.0 + , mwc-random >= 0.14 + , vector + + default-language: Haskell2010 executable simulation-study main-is: Main.hs diff --git a/examples/ape-simulation/README.org b/examples/ape-simulation/README.org index d20446109faad4ccca9582470772bef96010d345..5c8910b6311ae6f9151e203749e197ae894bfc0b 100644 --- a/examples/ape-simulation/README.org +++ b/examples/ape-simulation/README.org @@ -2,12 +2,7 @@ There is a =run.sh= script to carry out this computation. It uses the [[https://cran.r-project.org/web/packages/ape/index.html][=ape=]] package in R to simulate a birth-death process and then randomly annotate the -tips to produce a sample from the BDSCOD process. +tips to produce a sample from the BDSCOD process. The results of this can be +viewed by opening =index.html= in a browser. -* Simulation script -The following demonstrates how to use the =ape-sim.R= script - -#+begin_src sh -./ape-sim.R --seed 1 -p ../example-parameters.json -o out --duration 30.0 -#+end_src diff --git a/examples/ape-simulation/ape-sim.R b/examples/ape-simulation/ape-sim.R index 5f4952caa2e6f7f534462fffcdfbeee6bfbae414..4ec8a1e939095066aaf300913f21abe1c98cfcc2 100755 --- a/examples/ape-simulation/ape-sim.R +++ b/examples/ape-simulation/ape-sim.R @@ -7,11 +7,17 @@ #' interactive mode this will substitute some parameters otherwise it expected #' to get everything from the command line. #' +#' By default this avoids a population sample at the end of the simulation but +#' there is a command line argument if you want to include this. +#' #' Usage #' ----- #' #' $ ./ape-sim.R --seed 1 -p ../example-parameters.json -o out --duration 30.0 #' +#' or if you want to generate a figure showing the simulated data append the +#' --make-plots flag. +#' #' Help #' ---- #' @@ -67,15 +73,28 @@ parser$add_argument( default = FALSE, help = "Generate plots" ) +parser$add_argument( + "-r", + "--rho", + type = "double", + default = -1.0, + help = "Scheduled sample probability") - -read_parameters <- function(parameter_filepath, sim_duration, is_verbose) { +read_parameters <- function(parameter_filepath, sim_duration, maybe_rho, is_verbose) { if (!file.exists(parameter_filepath)) { stop("Cannot find parameter file: ", parameter_filepath) } else if (sim_duration <= 0.0) { stop("Need a positive duration") + } else if (!(is.null(maybe_rho) | is.numeric(maybe_rho))) { + stop("Need a maybe numeric value for maybe_rho") } else { params <- jsonlite::read_json(parameter_filepath) + expected_names <- + c("birthRate", + "deathRate", + "samplingRate", + "occurrenceRate") + stopifnot(setequal(names(params), expected_names)) ## it will be useful to have some other ways to talk about the parameters so ## we compute a couple of other views. params$net_rem_rate <- @@ -87,7 +106,18 @@ read_parameters <- function(parameter_filepath, sim_duration, is_verbose) { params$prob_sampled_given_observed <- params$sampling_prob / params$prob_observed params$duration <- sim_duration - return(params) + ## we need to handle the possibility that there is a valid rho. + if (is.null(maybe_rho)) { + params$rho <- maybe_rho + return(params) + } else { + if (0 < maybe_rho && maybe_rho < 1) { + params$rho <- maybe_rho + return(params) + } else { + stop("invalid rho argument: ", maybe_rho) + } + } } } @@ -116,9 +146,27 @@ run_simulation <- function(params, is_verbose) { tip_times <- head(ape::node.depth.edgelength(phy), num_tips) ## TODO we can find the tips that are still extant in the simulation but it is ## unclear if we can use strict equality here of if we need to account for - ## potential error in the branch lengths. + ## potential error in the branch lengths. It seems like this is safe... extant_mask <- tip_times + tmrca == params$duration extant_labels <- tip_labels[extant_mask] + num_extant <- length(extant_labels) + ## We need to do a rho sample at the end of the duration if one has been + ## requested, otherwise we need to propagate the null values. + if (is.null(params$rho)) { + num_rho_sampled <- NULL + rho_sampled_labels <- NULL + } else { + num_rho_sampled <- rbinom( + n = 1, + size = num_extant, + prob = params$rho + ) + rho_sampled_labels <- sample( + x = extant_labels, + size = num_rho_sampled, + replace = FALSE + ) + } extinct_labels <- tip_labels[!extant_mask] num_extinct <- length(extinct_labels) ## We can select which extinctions are deaths, samples and occurrences by @@ -156,7 +204,13 @@ run_simulation <- function(params, is_verbose) { tl <- tip_labels[ix] outcome[ix] <- if (is.element(tl, extant_labels)) { - "extant" + if (is.null(rho_sampled_labels)) { + "extant" + } else if (is.element(tl, rho_sampled_labels)) { + "rho" + } else { + "extant" + } } else if (is.element(tl, unobserved_labels)) { "death" } else if (is.element(tl, sampling_labels)) { @@ -169,13 +223,19 @@ run_simulation <- function(params, is_verbose) { } ## We then drop the lineages that did not result in a sampled lineage to get ## the reconstructed tree. - rt <- ape::keep.tip(phy, sampling_labels) + rt_tip_labels <- c(sampling_labels, rho_sampled_labels) + num_rt_tips <- length(rt_tip_labels) + if (num_rt_tips < 2) { + stop(paste0(c("\n\n", rep("-", 60), "\n\n\tSIMULATION HAS LESS THAN TWO SEQUENCED SAMPLES!\n\n", rep("-", 60), "\n"), collapse="")) + } + rt <- ape::keep.tip(phy, rt_tip_labels) rt_tip_and_node_depths <- ape::node.depth.edgelength(rt) - rt_tip_depths <- head(rt_tip_and_node_depths, num_sampled) - rt_node_depths <- tail(rt_tip_and_node_depths, -num_sampled) + rt_tip_depths <- head(rt_tip_and_node_depths, num_rt_tips) + rt_node_depths <- tail(rt_tip_and_node_depths, -num_rt_tips) ## The depths of the nodes and tips is relative to the TMRCA so we need to ## adjust for this when computing the extact times that they occurred at. - true_first_sample_time <- min(tip_times[outcome == "sampling"]) + true_first_sample_time <- + min(tip_times[outcome == "sampling" | outcome == "rho"]) depth_first_in_rt <- min(rt_tip_depths) rt_offset <- true_first_sample_time - depth_first_in_rt ## Finally we can put all of this information into a single dataframe. @@ -183,12 +243,16 @@ run_simulation <- function(params, is_verbose) { time = c(-tmrca, tip_times[outcome == "occurrence"], tip_times[outcome == "sampling"], + tip_times[outcome == "rho"], rt_node_depths + rt_offset ), event = c("origin", rep( - c("occurrence", "sampling", "birth"), - times = c(num_occurrences, num_sampled, num_sampled - 1) + c("occurrence", "sampling", "rho", "birth"), + times = c(num_occurrences, + num_sampled, + ifelse(is.null(num_rho_sampled), 0, num_rho_sampled), + ifelse(is.null(num_rho_sampled), num_sampled - 1, num_rt_tips - 1)) ) ) ) @@ -198,7 +262,7 @@ run_simulation <- function(params, is_verbose) { ## We can do a quick check of the results to make sure that some invariants ## have been preserved. Note that one of the checks here will fail a small ## percent of the time because it is not exact. - dur_1 <- max(tip_times[outcome == "sampling"]) + tmrca + dur_1 <- max(tip_times[outcome == "sampling" | outcome == "rho"]) + tmrca dur_2 <- max(rt_tip_depths) + rt_offset + tmrca ## these should be equal up to numerical error. stopifnot(abs(dur_1 - dur_2) < 1e-10 * params$duration) @@ -210,7 +274,9 @@ run_simulation <- function(params, is_verbose) { outcome = outcome, tip_ix = tip_ix, num_extinct = num_extinct, + num_extant = num_extant, num_sampled = num_sampled, + num_rho_sampled = num_rho_sampled, num_observed = num_observed, num_occurrences = num_occurrences)) } @@ -252,6 +318,23 @@ write_plot <- function(simulation_results, parameters, output_directory, is_verb prob = parameters$occurrence_prob)) ) + if (!is.null(parameters$rho)) { + rho_row <- data.frame( + outcome = "rho", + empirical = simulation_results$num_rho_sampled, + theory = qbinom(p = 0.5, + size = simulation_results$num_extant, + prob = parameters$rho), + theory_min = qbinom(p = 0.025, + size = simulation_results$num_extant, + prob = parameters$rho), + theory_max = qbinom(p = 0.975, + size = simulation_results$num_extant, + prob = parameters$rho) + ) + hist_plt_df <- rbind(hist_plt_df, rho_row) + } + plt5 <- ggplot(hist_plt_df) + geom_col(mapping = aes(x = outcome, y = empirical)) + geom_point(mapping = aes(x = outcome, y = theory)) + @@ -259,7 +342,7 @@ write_plot <- function(simulation_results, parameters, output_directory, is_verb labs(y = "Count", x = NULL) + theme_classic() - is_observed <- is.element(simulation_results$outcome, c("sampling", "occurrence")) + is_observed <- is.element(simulation_results$outcome, c("sampling", "occurrence", "rho")) tip_annotations <- tibble( node = simulation_results$tip_ix, @@ -269,7 +352,7 @@ write_plot <- function(simulation_results, parameters, output_directory, is_verb ## TODO does this come from tidytree??? tr <- treedata(phylo = simulation_results$phylo, data = tip_annotations) - + plt4 <- ggplot(tr, mapping = aes(x, y)) + geom_tippoint(mapping = aes(colour = outcome, shape = is_observed), size = 3) + @@ -298,12 +381,30 @@ write_plot <- function(simulation_results, parameters, output_directory, is_verb main <- function(args) { if (args$verbose) { - cat("Reading parameters from", args$parameters, "\n") + cat("reading parameters from", args$parameters, "\n") } set.seed(args$seed) - params <- read_parameters(args$parameters, args$duration, args$verbose) + ## Awkward because you cannot assign NULL via ifelse. + if (args$rho == -1.0) { + maybe_rho <- NULL + } else { + maybe_rho <- args$rho + } + + if (args$verbose) { + if (is.null(maybe_rho)) { + cat("without rho sampling at the end of the simulation\n") + } else { + cat("with rho sampling at the end of the simulation\n") + } + } + params <- read_parameters( + args$parameters, + args$duration, + maybe_rho, + args$verbose) sim_result <- run_simulation(params, args$verbose) @@ -339,7 +440,8 @@ if (!interactive()) { parameters = "../example-parameters.json", duration = 30.0, output_directory = "out", - make_plots = TRUE + make_plots = TRUE, + rho = 0.1 ) main(args) } diff --git a/examples/ape-simulation/clean.sh b/examples/ape-simulation/clean.sh new file mode 100755 index 0000000000000000000000000000000000000000..9f5c4034a0fefd15bae9cea112ae7b0798882cdd --- /dev/null +++ b/examples/ape-simulation/clean.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + + +rm -f out/ape-sim-event-times.csv +rm -f out/ape-simulation-figure.png +rm -f out/mcmc-diagnostics.json +rm -f out/mcmc-samples.csv +rm -f out/mcmc-traceplot-*.png +rm -f out/simulation-data.json + +rm -f index.html diff --git a/examples/ape-simulation/run.sh b/examples/ape-simulation/run.sh old mode 100644 new mode 100755 index dbc96f377e94972880ab0c3bb122ed028a7fb411..5f28e22cee86940c6745f0ca3a7228f103b39e8f --- a/examples/ape-simulation/run.sh +++ b/examples/ape-simulation/run.sh @@ -1,7 +1,15 @@ #!/usr/bin/env bash -./ape-sim.R --seed 1 -p ../example-parameters.json -o out --duration 30.0 +SEED=2 +DURATION=25.0 +PARAM_JSON=../example-parameters.json -# Rscript src/prep-mcmc-data.R +./ape-sim.R --seed $SEED -p $PARAM_JSON -o out --duration $DURATION --make-plots -v +Rscript src/prep-mcmc-data.R +stack exec -- mcmc out/simulation-data.json + +Rscript src/mcmc-plots.R + +Rscript src/make-viewer.R diff --git a/examples/ape-simulation/src/make-viewer.R b/examples/ape-simulation/src/make-viewer.R new file mode 100644 index 0000000000000000000000000000000000000000..b0e5435987b8a5672b1fbd31ad7a917d59e03bff --- /dev/null +++ b/examples/ape-simulation/src/make-viewer.R @@ -0,0 +1,52 @@ +library(htmltools) + +mcmc_diagnostics <- jsonlite::read_json("out/mcmc-diagnostics.json") +out_html <- "index.html" + +foo <- lapply(mcmc_diagnostics, unlist) |> as.data.frame() +bar <- c() +for (ix in seq.int(nrow(foo))) { + if (ncol(foo) == 4) { + bar <- c(bar, sprintf("%s, %f, %f, %f", foo[ix,1], foo[ix,2], foo[ix,3], foo[ix, 4])) + } else if (ncol(foo) == 3) { + bar <- c(bar, sprintf("%s, %f, %f", foo[ix,1], foo[ix,2], foo[ix,3])) + } else { + stop("") + } +} + + +html <- + tags$html( + tags$head(tags$title("ape simulation example")), + tags$body( + tags$h1("ape simulation example"), + tags$div( + tags$h3("Simulated data"), + tags$img(src = "out/ape-simulation-figure.png", + style = "width: 1000px;") + ), + tags$div( + tags$h3("MCMC summary"), + tags$div( + tags$p(paste(names(foo), collapse = ", ")), + tags$ul(purrr::map(bar, tags$li)) + ), + tags$div( + tags$h5("Posterior distribution of parameters"), + tags$img(src = "out/posterior-marginals.png", + style = "width: 1000px;"), + tags$h5("Posterior distribution of R-naught"), + tags$img(src = "out/posterior-r-naught.png", + style = "width: 600px;") + ) + ), + tags$div( + tags$h3("MCMC traceplot"), + tags$img(src = "out/mcmc-traceplot-1.png"), + tags$img(src = "out/mcmc-traceplot-2.png") + ) + ) + ) + +save_html(doRenderTags(html), file = out_html) diff --git a/examples/ape-simulation/src/mcmc-plots.R b/examples/ape-simulation/src/mcmc-plots.R new file mode 100644 index 0000000000000000000000000000000000000000..d804a41b8d7ccbf12bc77ab685945ee04f1180c8 --- /dev/null +++ b/examples/ape-simulation/src/mcmc-plots.R @@ -0,0 +1,197 @@ +library(mcmc) +library(coda) +library(dplyr) +library(ggplot2) +library(cowplot) +library(magrittr) +library(reshape2) +library(purrr) +library(latex2exp) + +green_hex_colour <- "#7fc97f" +purple_hex_colour <- "#beaed4" + +true_params <- jsonlite::read_json("../example-parameters.json") + +mcmc_input <- jsonlite::read_json("out/simulation-data.json") +mcmc_csv <- "out/mcmc-samples.csv" +trace_png <- function(n) sprintf("out/mcmc-traceplot-%d.png", n) +mcmc_diagnostics_json <- "out/mcmc-diagnostics.json" + +marginal_plot_filepath <- function(fmt, extra="") sprintf("out/posterior-marginals%s.%s", extra, fmt) +r_naught_plot_filepath <- function(fmt, extra="") sprintf("out/posterior-r-naught%s.%s", extra, fmt) + +post_samples_df <- read.csv(mcmc_csv, header = F) +if (length(mcmc_input$mcmcInit) == 3) { + names(post_samples_df) <- c("llhd", "birth_rate", "sampling_rate", "omega_rate") +} else if (length(mcmc_input$mcmcInit) == 4) { + names(post_samples_df) <- c("llhd", "birth_rate", "sampling_rate", "rho_prob", "omega_rate") +} else { + stop("") +} + +post_samples <- as.mcmc(post_samples_df) + +jx <- 1 +fig_n <- 1 +while (jx < ncol(post_samples)) { + png(filename=trace_png(fig_n), width=700, height=1500) + plot(post_samples[,c(jx, jx+1)]) + dev.off() + jx <- jx + 2 + fig_n <- fig_n + 1 +} +if (jx == ncol(post_samples)) { + png(filename=trace_png(fig_n), width=700, height=1500) + plot(post_samples[,c(jx)]) + dev.off() +} + +summary_stats <- as.data.frame(summary(post_samples)$statistics) + +diagnostics <- list( + varnames = varnames(post_samples), + mean = summary_stats$Mean, + sd = summary_stats$SD, + effective_size = effectiveSize(post_samples) +) + +jsonlite::write_json( + x = diagnostics, + path = mcmc_diagnostics_json, + auto_unbox = T + ) + + +## ====================================================== +## Make the plots of the marginal posterior distribution. +## ====================================================== + + +marginal_plot_summary <- function(samples, varname) { + density_df <- + samples |> + density() |> + extract(c("x", "y")) |> + data.frame() |> + mutate(variable = varname) + + qs <- quantile(x = samples, probs = c(0.025, 0.5, 0.975)) + list(df = density_df, point_est = qs[2], ci = qs[c(1, 3)]) +} + +birth_rate_marg <- marginal_plot_summary(post_samples_df$birth_rate, "birth_rate") + +birth_rate_fig <- ggplot(mapping = aes(x = x, y = y)) + + geom_line( + data = birth_rate_marg$df, + colour = green_hex_colour + ) + + geom_area( + data = filter(birth_rate_marg$df, birth_rate_marg$ci[1] < x, x < birth_rate_marg$ci[2]), + fill = green_hex_colour, + alpha = 0.3 + ) + + geom_vline( + xintercept = true_params$birthRate, + linetype = "dashed" + ) + + labs(y = "Posterior density", x = TeX("Birth rate ($\\lambda$)")) + + theme_classic() + + theme() + +sampling_rate_marg <- marginal_plot_summary(post_samples_df$sampling_rate, "sampling_rate") + +sampling_rate_fig <- ggplot(mapping = aes(x = x, y = y)) + + geom_line( + data = sampling_rate_marg$df, + colour = green_hex_colour + ) + + geom_area( + data = filter(sampling_rate_marg$df, sampling_rate_marg$ci[1] < x, x < sampling_rate_marg$ci[2]), + fill = green_hex_colour, + alpha = 0.3 + ) + + geom_vline( + xintercept = true_params$samplingRate, + linetype = "dashed" + ) + + labs(y = NULL, x = TeX("Sampling rate ($\\psi$)")) + + theme_classic() + + theme() + +omega_rate_marg <- marginal_plot_summary(post_samples_df$omega_rate, "omega_rate") + +omega_rate_fig <- ggplot(mapping = aes(x = x, y = y)) + + geom_line( + data = omega_rate_marg$df, + colour = green_hex_colour + ) + + geom_area( + data = filter(omega_rate_marg$df, omega_rate_marg$ci[1] < x, x < omega_rate_marg$ci[2]), + fill = green_hex_colour, + alpha = 0.3 + ) + + geom_vline( + xintercept = true_params$occurrenceRate, + linetype = "dashed" + ) + + labs(y = NULL, x = TeX("Occurrence rate ($\\omega$)")) + + theme_classic() + + theme() + +marginals_fig <- plot_grid(birth_rate_fig, sampling_rate_fig, omega_rate_fig, ncol = 3) + +ggsave(filename = marginal_plot_filepath("png"), + plot = marginals_fig, + height = 10, + width = 2.8 * 10, + units = "cm") +## ggsave(filename = marginal_plot_filepath("pdf"), +## plot = marginals_fig, +## height = 10, +## width = 2.8 * 10, +## units = "cm") + +## ======================================================= +## Make the plot of the posterior distribution of r-naught +## ======================================================= + +r_naught_samples <- + post_samples_df |> + mutate(r_naught = birth_rate / (sampling_rate + true_params$deathRate + omega_rate)) |> + extract2("r_naught") + +true_r_naught <- true_params$birthRate / (true_params$deathRate + true_params$samplingRate + true_params$occurrenceRate) + +foo <- marginal_plot_summary(r_naught_samples, "r_naught") + +foo_fig <- ggplot(mapping = aes(x = x, y = y)) + + geom_line( + data = foo$df, + colour = green_hex_colour + ) + + geom_area( + data = filter(foo$df, foo$ci[1] < x, x < foo$ci[2]), + fill = green_hex_colour, + alpha = 0.3 + ) + + geom_vline( + xintercept = true_r_naught, + linetype = "dashed" + ) + + labs(y = "Posterior density", x = TeX("Basic reproduction number ($R_{0}$)")) + + theme_classic() + + theme() + + +ggsave(filename = r_naught_plot_filepath("png"), + plot = foo_fig, + height = 10, + width = 1.3 * 10, + units = "cm") +## ggsave(filename = r_naught_plot_filepath("pdf"), +## plot = foo_fig, +## height = 10, +## width = 2.8 * 10, +## units = "cm") diff --git a/examples/ape-simulation/src/prep-mcmc-data.R b/examples/ape-simulation/src/prep-mcmc-data.R new file mode 100644 index 0000000000000000000000000000000000000000..465af08e6c670cc01a0976965c406ced120cda99 --- /dev/null +++ b/examples/ape-simulation/src/prep-mcmc-data.R @@ -0,0 +1,69 @@ + +params <- jsonlite::read_json("../example-parameters.json") + +input_csv <- "out/ape-sim-event-times.csv" + +sim_events <- read.csv(input_csv) +sim_events <- sim_events[order(sim_events$time), ] + +if (is.element("rho", sim_events$event)) { + maybe_sim_dur <- diff(range(sim_events$time)) +} else { + maybe_sim_dur <- NULL +} + +num_rho_sampled <- sum(sim_events$event == "rho") +## we only support a single rho event at the moment +rho_times <- unique(sim_events[sim_events$event == "rho", "time"]) +stopifnot(length(rho_times) <= 1) +sim_events <- sim_events[sim_events$event != "rho", ] + +last_non_rho_time <- max(sim_events$time) +delays <- diff(sim_events$time) +events <- tail(sim_events$event, -1) + +observation <- function(d, e) { + list( + d, + list( + tag = switch( + e, + birth = "OBirth", + occurrence = "OOccurrence", + sampling = "ObsUnscheduledSequenced" + ) + ) + ) +} + +observations_list <- purrr::map2(delays, events, observation) + +if (length(rho_times) == 1) { + rho_sample <- list(rho_times[1] - last_non_rho_time, + list(tag = "OCatastrophe", + contents = num_rho_sampled)) + observations_list <- c(observations_list, list(rho_sample)) +} + +mcmc_input <- list( + mcmcObservations = observations_list, + mcmcNumSamples = 1e6, + mcmcSampleCSV= "out/mcmc-samples.csv", + mcmcStepSD = 1e-3, + ## mcmcInit = c(0.228, 0.048, 0.5, 0.026), + mcmcInit = c(0.228, 0.048, 0.026), + mcmcSeed = c(1, 2), + ## mcmcParameterisation = "identity-muKnown-lambda-psi-rhoAtDuration-omega-noNu", + mcmcParameterisation = "identity-muKnown-lambda-psi-noRho-omega-noNu", + mcmcKnownMu = params$deathRate, + ## mcmcSimDuration = maybe_sim_dur, + mcmcPrior= "foobar" +) + + +jsonlite::write_json( + x = mcmc_input, + path = "out/simulation-data.json", + auto_unbox = T + ) + diff --git a/src/BDSCOD/Utility.hs b/src/BDSCOD/Utility.hs index 847c042b1f817b9d994df85f76017ddf5c9c526a..e632725d384aa883aded7047ad9138e419193aa3 100644 --- a/src/BDSCOD/Utility.hs +++ b/src/BDSCOD/Utility.hs @@ -17,7 +17,7 @@ instance Csv.ToField TimeDelta where -- | Check if two absolute times differ by such a small amount that they are -- likely identical. timesWithinEpsilon :: AbsoluteTime -> AbsoluteTime -> Bool -timesWithinEpsilon (AbsoluteTime a) (AbsoluteTime b) = abs (a - b) < 1e-13 +timesWithinEpsilon (AbsoluteTime a) (AbsoluteTime b) = abs (a - b) < 1e-3 -- | Convert simulation events to observation events, this assumes that the -- epidemic events have already been filtered by to only include the observable