## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----echo = FALSE, out.width = "100%", fig.cap = "Figure 1: Flowchart for recommended data extraction process. Blue boxes denoate data files. Orange boxes denote application of rcprd functions."---- #![Figure 1: Flowchart for recommended data extraction process](){width = 100%} knitr::include_graphics("FlowChart.png") ## ----echo = FALSE, out.width = "100%", fig.cap = "Table 1: Table of rcprd functions"---- #![Table 1: Table of rcprd functions](FunctionTable20240906.png){width = 100%} knitr::include_graphics("FunctionTable20240906.png") ## ----------------------------------------------------------------------------- #devtools::install_github("alexpate30/rcprd") #install.packages("rcprd") NOT YET ON CRAN library(rcprd) #devtools::load_all() list.files(system.file("aurum_data", package = "rcprd"), pattern = ".txt") ## ----------------------------------------------------------------------------- pat <- extract_cohort(filepath = system.file("aurum_data", package = "rcprd"), patids = as.character(c(1,3,4,6))) str(pat) ## ----------------------------------------------------------------------------- pat <- extract_cohort(filepath = system.file("aurum_data", package = "rcprd")) str(pat) ## ----------------------------------------------------------------------------- pat <- subset(pat, patid %in% c(1,3,4,6)) ## ----------------------------------------------------------------------------- aurum_extract <- connect_database(file.path(tempdir(), "temp.sqlite")) ## ----------------------------------------------------------------------------- add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_observation_001.txt", package = "rcprd"), filetype = "observation", subset_patids = c(1,3,4,6), db = aurum_extract, overwrite = TRUE) add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_observation_002.txt", package = "rcprd"), filetype = "observation", subset_patids = c(1,3,4,6), db = aurum_extract, append = TRUE) add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_observation_003.txt", package = "rcprd"), filetype = "observation", subset_patids = c(1,3,4,6), db = aurum_extract, append = TRUE) ## ----------------------------------------------------------------------------- RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM observation', n = 3) ## ----------------------------------------------------------------------------- add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_drugissue_001.txt", package = "rcprd"), filetype = "drugissue", subset_patids = c(1,3,4,6), db = aurum_extract, overwrite = TRUE) add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_drugissue_002.txt", package = "rcprd"), filetype = "drugissue", subset_patids = c(1,3,4,6), db = aurum_extract, append = TRUE) add_to_database(filepath = system.file("aurum_data", "aurum_allpatid_set1_extract_drugissue_003.txt", package = "rcprd"), filetype = "drugissue", subset_patids = c(1,3,4,6), db = aurum_extract, append = TRUE) ## ----------------------------------------------------------------------------- RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM drugissue', n = 3) ## ----------------------------------------------------------------------------- RSQLite::dbListTables(aurum_extract) ## ----------------------------------------------------------------------------- RSQLite::dbDisconnect(aurum_extract) ## ----------------------------------------------------------------------------- aurum_extract <- connect_database(file.path(tempdir(), "temp.sqlite")) ## ----------------------------------------------------------------------------- ### Extract data cprd_extract(db = aurum_extract, filepath = system.file("aurum_data", package = "rcprd"), filetype = "observation", subset_patids = c(1,3,4,6), use_set = FALSE) ### Query first three rows RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM observation', n = 3) ## ----------------------------------------------------------------------------- ### Extract data cprd_extract(db = aurum_extract, filepath = system.file("aurum_data", package = "rcprd"), filetype = "drugissue", subset_patids = c(1,3,4,6), use_set = FALSE) ### List tables RSQLite::dbListTables(aurum_extract) ### Query first three rows RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM drugissue', n = 3) ### Disconnect RSQLite::dbDisconnect(aurum_extract) ## ----------------------------------------------------------------------------- pat <- extract_cohort(filepath = system.file("aurum_data", package = "rcprd"), patids = as.character(c(1,3,4,6)), set = TRUE) str(pat) ## ----------------------------------------------------------------------------- ### Create connection to SQLite database aurum_extract <- connect_database(file.path(tempdir(), "temp.sqlite")) ### Add observation files cprd_extract(db = aurum_extract, filepath = system.file("aurum_data", package = "rcprd"), filetype = "observation", subset_patids = pat, use_set = TRUE) ### Add drugissue files cprd_extract(db = aurum_extract, filepath = system.file("aurum_data", package = "rcprd"), filetype = "drugissue", subset_patids = pat, use_set = TRUE) ### Query first three rows of each table RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM observation', n = 3) RSQLite::dbGetQuery(aurum_extract, 'SELECT * FROM drugissue', n = 3) ## ----------------------------------------------------------------------------- ### Define codelist codelist <- "187341000000114" ### Add an index date to cohort pat$fup_start <- as.Date("01/01/2020", format = "%d/%m/%Y") ### Extract a history of type variable using extract_ho ho <- extract_ho(cohort = pat, codelist_vector = codelist, indexdt = "fup_start", db_open = aurum_extract, tab = "observation", return_output = TRUE) str(ho) ## ----------------------------------------------------------------------------- ### Add an censoring date to cohort pat$fup_end <- as.Date("01/01/2024", format = "%d/%m/%Y") ### Extract a time until variable using extract_time_until time_until <- extract_time_until(cohort = pat, codelist_vector = codelist, indexdt = "fup_start", censdt = "fup_end", db_open = aurum_extract, tab = "observation", return_output = TRUE) str(time_until) ## ----------------------------------------------------------------------------- ### Extract test data using extract_test_data test_data <- extract_test_data(cohort = pat, codelist_vector = codelist, indexdt = "fup_start", db_open = aurum_extract, time_post = 0, time_prev = Inf, return_output = TRUE) str(test_data) ## ----------------------------------------------------------------------------- ### Recursive merge analysis.ready.pat <- Reduce(function(df1, df2) merge(df1, df2, by = "patid", all.x = TRUE), list(pat[,c("patid", "gender", "yob")], ho, time_until, test_data)) analysis.ready.pat ## ----------------------------------------------------------------------------- db_query <- db_query(db_open = aurum_extract, tab ="observation", codelist_vector = "187341000000114") db_query ## ----------------------------------------------------------------------------- ### Add an index date to pat pat$indexdt <- as.Date("01/01/2020", format = "%d/%m/%Y") ### Combine query with cohort creating a boolean variable denoting 'history of' combine.query.boolean <- combine_query_boolean(cohort = pat, db_query = db_query, query_type = "med") combine.query.boolean ## ----------------------------------------------------------------------------- ### Combine query with cohort retaining most recent three records combine.query <- combine_query(cohort = pat, db_query = db_query, query_type = "med", numobs = 3) combine.query ## ----------------------------------------------------------------------------- ### Extract a history of type variable using extract_ho combine.query <- combine_query(cohort = pat, db_query = db_query, query_type = "test", numobs = 3) combine.query ### Disconnect RSQLite::dbDisconnect(aurum_extract) ## ----include = FALSE---------------------------------------------------------- ### Hide clean up of filespaces prior to moving onto next session (clean up required by CRAN) unlink(file.path(tempdir(), "temp.sqlite")) ## ----include = FALSE---------------------------------------------------------- ## Save working directory location, so can revert at end of vignette (required by CRAN) oldwd <- getwd() ## ----------------------------------------------------------------------------- ## Set working directory knitr::opts_knit$set(root.dir = tempdir()) ## ----------------------------------------------------------------------------- suppressMessages( create_directory_system() ) file.exists(file.path(tempdir(), "data")) file.exists(file.path(tempdir(), "codelists")) file.exists(file.path(tempdir(), "code")) ## ----------------------------------------------------------------------------- ## Open connection aurum_extract <- connect_database("data/sql/mydb.sqlite") ## Add data to SQLite database using cprd_extract cprd_extract(db = aurum_extract, filepath = system.file("aurum_data", package = "rcprd"), filetype = "observation", use_set = FALSE) ## Disconnect RSQLite::dbDisconnect(aurum_extract) ## ----------------------------------------------------------------------------- ### Define codelist codelist <- data.frame(medcodeid = "187341000000114") ### Save codelist write.csv(codelist, "codelists/analysis/mylist.csv") ## ----------------------------------------------------------------------------- extract_ho(cohort = pat, codelist = "mylist", indexdt = "fup_start", db = "mydb", tab = "observation", return_output = FALSE, out_save_disk = TRUE) ## ----------------------------------------------------------------------------- readRDS("data/extraction/var_ho.rds") ## ----include = FALSE---------------------------------------------------------- ### Return filespace to how it was prior to example delete_directory_system() ## ----include = FALSE---------------------------------------------------------- ## Return root directory to how it was prior to example knitr::opts_knit$set(root.dir = oldwd)