Setting up

Below is an example of a more complicated workflow, starting from multiple input files, to be used as an example of a full rtrackr workflow.

We will define and log simple dataframes for this example. For more information, please see getting started and summarising data.

trackr_dir <- '~/Documents/Personal/trackr_dev/trackr_dir'

df1 <- data.frame(a = c('a', 'b', 'c'), b = c(1, 2, 3))
df2 <- data.frame(a = c('a', 'b', 'c'), b = c(4, 5, 6))
Details 1df1
a b
a 1
b 2
c 3
Details 2df2
a b
a 4
b 5
c 6
df1 <- trackr_new(df1, trackr_dir = trackr_dir, timepoint_message = 'Start', suppress_success = TRUE)
df2 <- trackr_new(df2, trackr_dir = trackr_dir, timepoint_message = 'Start', suppress_success = TRUE)
Details 1df1
a b trackr_id
a 1 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_4751028c3d830cf93f7d1e64d5e4d58c9d01ee32
b 2 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_d6543aeb67806714fa9e9567dc5c46b2106ae843
c 3 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_acecf5a5dfb445683737c2f7f135b18c5eaee1a7
Details 2df2
a b trackr_id
a 4 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_47ed5196677a528d0267158fa4aecf92372cb285
b 5 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_b845cb31be54e58d8719cbb88ea27d51f1a2df1c
c 6 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_d407e3ee6c77faa4ed4ffe31fa434f326ff9ddbe

Workflow

df <- rbind(df1, df2)

df <- df %>% dplyr::group_by(a) %>%
  trackr_summarise(n = dplyr::n())
Details
a n trackr_id
a 2 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_4751028c3d830cf93f7d1e64d5e4d58c9d01ee32, 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_47ed5196677a528d0267158fa4aecf92372cb285
b 2 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_d6543aeb67806714fa9e9567dc5c46b2106ae843, 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_b845cb31be54e58d8719cbb88ea27d51f1a2df1c
c 2 7833e8fc049ebc8f3e8c5224ba8f99bffa972b94_acecf5a5dfb445683737c2f7f135b18c5eaee1a7, 7c4429f5b1b63c89fa23c59af6c2bff395286bc3_d407e3ee6c77faa4ed4ffe31fa434f326ff9ddbe
df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'processing 1', suppress_success = TRUE)
Details
a n trackr_id
a 2 b705390d268b5baccbd116b28abe11fd93fb73e9_9b298ab2c9539c0d7f187ce26315f459fa58e78e
b 2 b705390d268b5baccbd116b28abe11fd93fb73e9_d6543aeb67806714fa9e9567dc5c46b2106ae843
c 2 b705390d268b5baccbd116b28abe11fd93fb73e9_c7a74dd8ad5c5743a5983d3102600cc7f9df9370
df <- df %>%
  dplyr::mutate(n = n + 20)

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'processing 1', suppress_success = TRUE)
Details
a n trackr_id
a 22 c105cd4b4ddc31d50ac78b1000d7ccc61f56e206_68c5ca73812a80a8ef11cbd4150ca48d1f956ec1
b 22 c105cd4b4ddc31d50ac78b1000d7ccc61f56e206_15286b997ca4ef9b34614bc77265b1f529295a5e
c 22 c105cd4b4ddc31d50ac78b1000d7ccc61f56e206_6e894e5f35b10a1e3b36eec46f595b65f28c8905

Results

Inspect the full processing chain of a single record with trackr_lineage() and trackr_network(). For more information, please see getting started. Note that this record has two “root” records (it is the child of two starting points in the processing chain).

target_id <- df$trackr_id[1]
trackr_lineage(target_id, trackr_dir)
## [1] "Successfully written c105cd4b4ddc31d50ac78b1000d7ccc61f56e206_68c5ca73812a80a8ef11cbd4150ca48d1f956ec1_lineage.json"
lineage_fn <- paste0(trackr_dir, '/', target_id, '_lineage.json')

trackr_network(lineage_fn)

Clean up

clean_trackr_dir(trackr_dir)

Article by Hamish Gibbs 2020-06-18 15:47:25. To report a problem with this package, please create an issue on GitHub.