Broad technical terms | |
Object | Description |
argset | A named list containing a set of arguments. |
analysis | These are the fundamental units that are scheduled in
|
plan | This is the overarching “scheduler”:
|
Different types of plans | |
Plan Type | Description |
Single-function plan | Same action function applied multiple times with different argsets applied to the same datasets. |
Multi-function plan | Different action functions applied to the same datasets. |
Plan Examples | |
Plan Type | Example |
Single-function plan | Multiple strata (e.g. locations, age groups) that you need to apply the same function to to (e.g. outbreak detection, trend detection, graphing). |
Single-function plan | Multiple variables (e.g. multiple outcomes, multiple exposures) that you need to apply the same statistical methods to (e.g. regression models, correlation plots). |
Multi-function plan | Creating the output for a report (e.g. multiple different tables and graphs). |
This approach is generally used when you:
When we apply the same function multiple times, it is preferable to add the argsets first, and then apply the analysis function just before running the analyses.
In this example, we loop through multiple geographical locations and apply a graphing function to the data from each of these geographical locations.
library(ggplot2)
library(data.table)
library(magrittr)
# We begin by defining a new plan
<- plnr::Plan$new()
p
# Data function
<- function(){
data_fn return(plnr::nor_covid19_cases_by_time_location)
}
# We add sources of data
# We can add data directly
$add_data(
pname = "covid19_cases",
fn_name = "data_fn"
)
$get_data() p
## $covid19_cases
## granularity_time granularity_geo country_iso3 location_code border age
## 1: day county nor county_nor03 2020 total
## 2: day county nor county_nor03 2020 total
## 3: day county nor county_nor03 2020 total
## 4: day county nor county_nor03 2020 total
## 5: day county nor county_nor03 2020 total
## ---
## 11024: isoweek nation nor nation_nor 2020 total
## 11025: isoweek nation nor nation_nor 2020 total
## 11026: isoweek nation nor nation_nor 2020 total
## 11027: isoweek nation nor nation_nor 2020 total
## 11028: isoweek nation nor nation_nor 2020 total
## sex isoyear isoweek isoyearweek season seasonweek calyear calmonth
## 1: total 2020 8 2020-08 2019/2020 31 2020 2
## 2: total 2020 8 2020-08 2019/2020 31 2020 2
## 3: total 2020 8 2020-08 2019/2020 31 2020 2
## 4: total 2020 9 2020-09 2019/2020 32 2020 2
## 5: total 2020 9 2020-09 2019/2020 32 2020 2
## ---
## 11024: total 2022 14 2022-14 2021/2022 37 NA NA
## 11025: total 2022 15 2022-15 2021/2022 38 NA NA
## 11026: total 2022 16 2022-16 2021/2022 39 NA NA
## 11027: total 2022 17 2022-17 2021/2022 40 NA NA
## 11028: total 2022 18 2022-18 2021/2022 41 NA NA
## calyearmonth date covid19_cases_testdate_n
## 1: 2020-M02 2020-02-21 0
## 2: 2020-M02 2020-02-22 0
## 3: 2020-M02 2020-02-23 0
## 4: 2020-M02 2020-02-24 0
## 5: 2020-M02 2020-02-25 0
## ---
## 11024: <NA> 2022-04-10 6888
## 11025: <NA> 2022-04-17 3635
## 11026: <NA> 2022-04-24 3764
## 11027: <NA> 2022-05-01 2243
## 11028: <NA> 2022-05-08 502
## covid19_cases_testdate_pr100000
## 1: 0.000000
## 2: 0.000000
## 3: 0.000000
## 4: 0.000000
## 5: 0.000000
## ---
## 11024: 126.961423
## 11025: 67.001274
## 11026: 69.379036
## 11027: 41.343564
## 11028: 9.252996
##
## $hash
## $hash$current
## [1] "cbb5d442160f26df4c2d9a4fec794fd7"
##
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "7f1b0a581386e75e907bffd94938a3a7"
<- p$get_data()$covid19_cases$location_code %>%
location_codes unique() %>%
print()
## [1] "county_nor03" "county_nor11" "county_nor15" "county_nor18" "county_nor30"
## [6] "county_nor34" "county_nor38" "county_nor42" "county_nor46" "county_nor50"
## [11] "county_nor54" "nation_nor"
$add_argset_from_list(
p::expand_list(
plnrlocation_code = location_codes,
granularity_time = "isoweek"
)
)# Examine the argsets that are available
$get_argsets_as_dt() p
## name_analysis index_analysis location_code
## 1: 66e8e88b-4bf4-4158-a43c-ba894a7f36bd 1 county_nor03
## 2: e755cfa7-7f80-4a6a-b418-f1810d556711 2 county_nor11
## 3: 98b1e86c-08a8-4e3e-a9dc-05ed0932ac11 3 county_nor15
## 4: 5315e66d-d229-4d0e-963b-7c2ec9263ca2 4 county_nor18
## 5: d4fb2115-095f-4346-8113-6bd4a807cbff 5 county_nor30
## 6: fe2cf3bf-f0e2-45bb-ab29-060f151ca647 6 county_nor34
## 7: 654741ea-3192-4617-b71f-d94f95c9ec6f 7 county_nor38
## 8: 871f9501-c9c4-4004-96e7-c7e4d1fc534b 8 county_nor42
## 9: 0938f565-1a4a-46c3-9ab9-34cbfb0bc66e 9 county_nor46
## 10: ca15c576-e187-4297-a64d-a5fffb48c30e 10 county_nor50
## 11: fea0c004-e483-4834-b7f7-1efa6c5362c1 11 county_nor54
## 12: f85ac0c1-baa2-4df3-bb52-b89350388925 12 nation_nor
## granularity_time
## 1: isoweek
## 2: isoweek
## 3: isoweek
## 4: isoweek
## 5: isoweek
## 6: isoweek
## 7: isoweek
## 8: isoweek
## 9: isoweek
## 10: isoweek
## 11: isoweek
## 12: isoweek
# We can then add a simple analysis that returns a figure:
# To do this, we first need to create an action function
# (takes two arguments -- data and argset)
<- function(data, argset){
action_fn if(plnr::is_run_directly()){
<- p$get_data()
data <- p$get_argset(1)
argset
}<- data$covid19_cases[
pd == argset$location_code &
location_code == argset$granularity_time
granularity_time
]
<- ggplot(pd, aes(x=date, y=covid19_cases_testdate_n))
q <- q + geom_line()
q <- q + labs(title = argset$location_code)
q
q
}
$apply_action_fn_to_all_argsets(fn_name = "action_fn")
p
$run_one(1) p
<- p$run_all()
q 1]] q[[
2]] q[[
In this example, we loop through multiple variable combinations (1. raw numbers of Covid-19 cases vs Covid-19 cases per 100 000 population, and 2. aggregating over isoweek vs day) and apply a graphing function to the data according to each of these variable combinations.
library(ggplot2)
library(data.table)
library(magrittr)
# We begin by defining a new plan
<- plnr::Plan$new()
p
# Data function
<- function(){
data_fn return(plnr::nor_covid19_cases_by_time_location[location_code=="nation_nor"])
}
# We add sources of data
# We can add data directly
$add_data(
pname = "covid19_cases",
fn_name = "data_fn"
)
$get_data() p
## $covid19_cases
## granularity_time granularity_geo country_iso3 location_code border age
## 1: day nation nor nation_nor 2020 total
## 2: day nation nor nation_nor 2020 total
## 3: day nation nor nation_nor 2020 total
## 4: day nation nor nation_nor 2020 total
## 5: day nation nor nation_nor 2020 total
## ---
## 915: isoweek nation nor nation_nor 2020 total
## 916: isoweek nation nor nation_nor 2020 total
## 917: isoweek nation nor nation_nor 2020 total
## 918: isoweek nation nor nation_nor 2020 total
## 919: isoweek nation nor nation_nor 2020 total
## sex isoyear isoweek isoyearweek season seasonweek calyear calmonth
## 1: total 2020 8 2020-08 2019/2020 31 2020 2
## 2: total 2020 8 2020-08 2019/2020 31 2020 2
## 3: total 2020 8 2020-08 2019/2020 31 2020 2
## 4: total 2020 9 2020-09 2019/2020 32 2020 2
## 5: total 2020 9 2020-09 2019/2020 32 2020 2
## ---
## 915: total 2022 14 2022-14 2021/2022 37 NA NA
## 916: total 2022 15 2022-15 2021/2022 38 NA NA
## 917: total 2022 16 2022-16 2021/2022 39 NA NA
## 918: total 2022 17 2022-17 2021/2022 40 NA NA
## 919: total 2022 18 2022-18 2021/2022 41 NA NA
## calyearmonth date covid19_cases_testdate_n
## 1: 2020-M02 2020-02-21 1
## 2: 2020-M02 2020-02-22 0
## 3: 2020-M02 2020-02-23 0
## 4: 2020-M02 2020-02-24 0
## 5: 2020-M02 2020-02-25 0
## ---
## 915: <NA> 2022-04-10 6888
## 916: <NA> 2022-04-17 3635
## 917: <NA> 2022-04-24 3764
## 918: <NA> 2022-05-01 2243
## 919: <NA> 2022-05-08 502
## covid19_cases_testdate_pr100000
## 1: 0.01863037
## 2: 0.00000000
## 3: 0.00000000
## 4: 0.00000000
## 5: 0.00000000
## ---
## 915: 126.96142312
## 916: 67.00127367
## 917: 69.37903551
## 918: 41.34356447
## 919: 9.25299570
##
## $hash
## $hash$current
## [1] "0ad573d37712f0a8ab666846d1b721a1"
##
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "07cc51795bccaf2afebe48619ce87227"
$add_argset_from_list(
p::expand_list(
plnrvariable = c("covid19_cases_testdate_n", "covid19_cases_testdate_pr100000"),
granularity_time = c("isoweek","day")
)
)# Examine the argsets that are available
$get_argsets_as_dt() p
## name_analysis index_analysis
## 1: 8e6c0d10-356c-456b-9c58-6e28f47164fc 1
## 2: f521b0a2-b352-45e6-8d5a-d7ee34a88bc4 2
## 3: f06a6b06-29a9-4d05-b66e-772709a4eaa0 3
## 4: 46ed0a2f-e549-4ade-aaa7-77ab12344dc1 4
## variable granularity_time
## 1: covid19_cases_testdate_n isoweek
## 2: covid19_cases_testdate_pr100000 isoweek
## 3: covid19_cases_testdate_n day
## 4: covid19_cases_testdate_pr100000 day
# We can then add a simple analysis that returns a figure:
# To do this, we first need to create an action function
# (takes two arguments -- data and argset)
<- function(data, argset){
action_fn if(plnr::is_run_directly()){
<- p$get_data()
data <- p$get_argset(1)
argset
}<- data$covid19_cases[
pd == argset$granularity_time
granularity_time
]
<- ggplot(pd, aes_string(x="date", y=argset$variable))
q <- q + geom_line()
q <- q + labs(title = argset$granularity_time)
q
q
}
$apply_action_fn_to_all_argsets(fn_name = "action_fn")
p
$run_one(1) p
$run_one(2) p
$run_one(3) p
$run_one(4) p
This approach is generally used when you are creating the output for a report, and you need multiple different tables and graphs.
library(ggplot2)
library(data.table)
library(magrittr)
# We begin by defining a new plan
<- plnr::Plan$new()
p
# Data function
<- function(){
data_fn return(plnr::nor_covid19_cases_by_time_location)
}
# We add sources of data
# We can add data directly
$add_data(
pname = "covid19_cases",
fn_name = "data_fn"
)
$get_data() p
## $covid19_cases
## granularity_time granularity_geo country_iso3 location_code border age
## 1: day county nor county_nor03 2020 total
## 2: day county nor county_nor03 2020 total
## 3: day county nor county_nor03 2020 total
## 4: day county nor county_nor03 2020 total
## 5: day county nor county_nor03 2020 total
## ---
## 11024: isoweek nation nor nation_nor 2020 total
## 11025: isoweek nation nor nation_nor 2020 total
## 11026: isoweek nation nor nation_nor 2020 total
## 11027: isoweek nation nor nation_nor 2020 total
## 11028: isoweek nation nor nation_nor 2020 total
## sex isoyear isoweek isoyearweek season seasonweek calyear calmonth
## 1: total 2020 8 2020-08 2019/2020 31 2020 2
## 2: total 2020 8 2020-08 2019/2020 31 2020 2
## 3: total 2020 8 2020-08 2019/2020 31 2020 2
## 4: total 2020 9 2020-09 2019/2020 32 2020 2
## 5: total 2020 9 2020-09 2019/2020 32 2020 2
## ---
## 11024: total 2022 14 2022-14 2021/2022 37 NA NA
## 11025: total 2022 15 2022-15 2021/2022 38 NA NA
## 11026: total 2022 16 2022-16 2021/2022 39 NA NA
## 11027: total 2022 17 2022-17 2021/2022 40 NA NA
## 11028: total 2022 18 2022-18 2021/2022 41 NA NA
## calyearmonth date covid19_cases_testdate_n
## 1: 2020-M02 2020-02-21 0
## 2: 2020-M02 2020-02-22 0
## 3: 2020-M02 2020-02-23 0
## 4: 2020-M02 2020-02-24 0
## 5: 2020-M02 2020-02-25 0
## ---
## 11024: <NA> 2022-04-10 6888
## 11025: <NA> 2022-04-17 3635
## 11026: <NA> 2022-04-24 3764
## 11027: <NA> 2022-05-01 2243
## 11028: <NA> 2022-05-08 502
## covid19_cases_testdate_pr100000
## 1: 0.000000
## 2: 0.000000
## 3: 0.000000
## 4: 0.000000
## 5: 0.000000
## ---
## 11024: 126.961423
## 11025: 67.001274
## 11026: 69.379036
## 11027: 41.343564
## 11028: 9.252996
##
## $hash
## $hash$current
## [1] "0306cac791d5f990073167e17ed15f9b"
##
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "bad75e8e213b3de3eee2b4ecbf157f46"
# Completely unique function for figure 1
$add_analysis(
pname = "figure_1",
fn_name = "figure_1"
)
<- function(data, argset){
figure_1 if(plnr::is_run_directly()){
<- p$get_data()
data <- p$get_argset("figure_1")
argset
}<- data$covid19_cases[
pd == "isoweek"
granularity_time
]
<- ggplot(pd, aes_string(x="date", y="covid19_cases_testdate_pr100000"))
q <- q + geom_line()
q <- q + facet_wrap(~location_code)
q <- q + labs(title = "Weekly covid-19 cases per 100 000 population")
q
q
}
# Reusing a function for figures 2 and 3
$add_analysis(
pname = "figure_2",
fn_name = "plot_epicurve_by_location",
location_code = "nation_nor"
)
# Reusing a function for figures 2 and 3
$add_analysis(
pname = "figure_3",
fn_name = "plot_epicurve_by_location",
location_code = "county_nor03"
)
<- function(data, argset){
plot_epicurve_by_location if(plnr::is_run_directly()){
<- p$get_data()
data <- p$get_argset("figure_2")
argset <- p$get_argset("figure_3")
argset
}<- data$covid19_cases[
pd == "isoweek" &
granularity_time == argset$location_code
location_code
]
<- ggplot(pd, aes_string(x="date", y="covid19_cases_testdate_n"))
q <- q + geom_line()
q <- q + labs(title = argset$location_code)
q
q
}
$run_one("figure_1") p
$run_one("figure_2") p
$run_one("figure_3") p