Setting up

Below is an example of a more complicated workflow, starting from a simple input file, to be used as an example of a full rtrackr workflow.

We will define and log a simple dataframe for this example. For more information, please see getting started and summarising data.

df <- data.frame(a = c('a', 'b', 'c'), b = c(1, 2, 3))
Details
a b
a 1
b 2
c 3
trackr_dir <- '~/Documents/Personal/trackr_dev/trackr_dir'

#setup a new trackr
df <- trackr_new(df, trackr_dir = trackr_dir, timepoint_message = 'Start', suppress_success = TRUE)

Workflow

#make some change to the data 
df <- df %>% dplyr::mutate(b = b + 1)

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Change point #1', suppress_success = TRUE)
Details
a b trackr_id
a 2 db392d21268eeefc9771d84c1928eae1e20dfd1b_9b298ab2c9539c0d7f187ce26315f459fa58e78e
b 3 db392d21268eeefc9771d84c1928eae1e20dfd1b_0d62e87ad3877b0c51934dd0c38cba3467375b09
c 4 db392d21268eeefc9771d84c1928eae1e20dfd1b_b1ba9407cf19da9a3625e46f5378484323799a05
#repeat for subsequent processing steps
df <- df %>% dplyr::mutate(b = b + 300)

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Change point #2', suppress_success = TRUE)
Details
a b trackr_id
a 302 a1428b580b6ad9c612d7c73dddc0b3b941f7cc9e_df6c36b931b3c85b98b33b3be98d70c6c3569d95
b 303 a1428b580b6ad9c612d7c73dddc0b3b941f7cc9e_ad3a917f44d6a3f9fbc121c14cf631784c721fc4
c 304 a1428b580b6ad9c612d7c73dddc0b3b941f7cc9e_20e8c8352858485ba2abdc2a151f5cc2dbcf4503
#splitting a single record into multiples
df <- rbind(df, df %>% dplyr::mutate(b = b -300))

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Dividing rows', suppress_success = TRUE)
Details
a b trackr_id
a 302 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_df6c36b931b3c85b98b33b3be98d70c6c3569d95
b 303 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_ad3a917f44d6a3f9fbc121c14cf631784c721fc4
c 304 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_20e8c8352858485ba2abdc2a151f5cc2dbcf4503
a 2 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_9b298ab2c9539c0d7f187ce26315f459fa58e78e
b 3 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_0d62e87ad3877b0c51934dd0c38cba3467375b09
c 4 9e17b3a0b0cd998fcd22cd7de557350eeda7c9ab_b1ba9407cf19da9a3625e46f5378484323799a05
#summarising data
df <- df %>% dplyr::group_by(a)

df <- trackr_summarise(df, n = dplyr::n())

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Summarising rows', suppress_success = TRUE)
Details
a n trackr_id
a 2 b2f039a58124708092becc302805eff4ecf04f6f_9b298ab2c9539c0d7f187ce26315f459fa58e78e
b 2 b2f039a58124708092becc302805eff4ecf04f6f_d6543aeb67806714fa9e9567dc5c46b2106ae843
c 2 b2f039a58124708092becc302805eff4ecf04f6f_c7a74dd8ad5c5743a5983d3102600cc7f9df9370
df <- df %>% dplyr::mutate(n = n**runif(3))

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Further Processing', suppress_success = TRUE)
Details
a n trackr_id
a 1.016543 c56210178736508ca0b43865bea6e0ace305f873_be5f5bb5d274daba25da42b1c33f426213919db4
b 1.563559 c56210178736508ca0b43865bea6e0ace305f873_858af3dce73222db702423f6f16355d0d8783522
c 1.646108 c56210178736508ca0b43865bea6e0ace305f873_f5660b11ae1a5eba313054758f2b80f8bbdecbe5
df <- rbind(df, df %>% dplyr::mutate(n = n -60), df %>% dplyr::mutate(n = n * 2), df %>% dplyr::mutate(n = n * 56))

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'Really messing with things', suppress_success = TRUE)
Details
a n trackr_id
a 1.016543 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_be5f5bb5d274daba25da42b1c33f426213919db4
b 1.563559 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_858af3dce73222db702423f6f16355d0d8783522
c 1.646108 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_f5660b11ae1a5eba313054758f2b80f8bbdecbe5
a -58.983457 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_c0bc3b62de7d6679d03090660af2cb82fc5e071b
b -58.436441 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_d5c83636d77ede413dd6826ece55230b761231b8
c -58.353892 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_1995c90e2ec23098fc8f89bd0dacdec31cd76927
a 2.033085 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_3bab44e6e5b9d40b3f5ec62f724340a1323577c7
b 3.127117 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_8c725a6441a63d68a8e3d612ead73af457574782
c 3.292215 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_23b95527f4e506946d77662cd22c565219361877
a 56.926395 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_71e61b198fd471ba604109ebeea61ca2003a2606
b 87.559286 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_9b1af3a14a0fb31d581be74d3eeddf5b69972f6d
c 92.182024 003a7b5cdb6cd22dcaa8ca63967bb94b824ca905_e198dc7d23d67c26d3eb17e1f6ea1c7da1121d8d
df <- df %>% dplyr::mutate(n = n + 1)

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'added 1', suppress_success = TRUE)
Details
a n trackr_id
a 2.016543 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_c812d4a3a21379c47f3417124de8377122517697
b 2.563559 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_bae106ac3f8ed7baf680f53294cdc8e5c6defa3b
c 2.646108 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_3e761f1fe2973635f7ec6cc16bbdae9c89adc58b
a -57.983457 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_7ab1c0f5038cd25f6cf4c9b46b10c0d5e74edf5d
b -57.436441 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_25910a223c7f9a2b3f89f6f4f3ff111b263e5ab0
c -57.353892 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_84ec2a3ba79aa3354e2632e39b5ef0458f7452c9
a 3.033085 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_8cd117f6846311c4b93c0e2a302b7a7bb94cc57a
b 4.127117 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_4f10684c334b2da0cd82a511eaf8cba898322696
c 4.292215 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_c4e1b4cbe0a0e0735f221a21eb35cebf7cce0024
a 57.926395 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_a41e224c6df203c61236ad10570caf56962cf936
b 88.559286 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_93f608ddcc884941e2e106ffcd7da3cfab0f952e
c 93.182024 b1d98cdf5bc2ee3ee6e8a7fc52671ff30dce7803_a0657d892eee7fcb8b1adf444f82ac8b57d958d6
df <- df %>% dplyr::group_by(a) %>%
  trackr_summarise(n =  dplyr::n())

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'grouped', suppress_success = TRUE)
Details
a n trackr_id
a 4 d9cf35fd27483b14b1592dcb7ecd83860b7ca68a_47ed5196677a528d0267158fa4aecf92372cb285
b 4 d9cf35fd27483b14b1592dcb7ecd83860b7ca68a_3edbbf5495c6c72354eb25125e5b718cf570d596
c 4 d9cf35fd27483b14b1592dcb7ecd83860b7ca68a_b1ba9407cf19da9a3625e46f5378484323799a05
df <- df %>% dplyr::mutate(n = n + 2000)

df <- trackr_timepoint(df, trackr_dir = trackr_dir, timepoint_message = 'added 2000', suppress_success = TRUE)
Details
a n trackr_id
a 2004 d68f8d4df8ca397665029b5620b1613d8b45cf7e_2a8b275852b4085d374094c98a3f95bc0e22ec3b
b 2004 d68f8d4df8ca397665029b5620b1613d8b45cf7e_8fe5b9603973e432094f4e7b81bd411b11d9107b
c 2004 d68f8d4df8ca397665029b5620b1613d8b45cf7e_3474d281b937403b9c2e9f283b09268bc6f2833a

Results

Inspect the full processing chain of a single record with trackr_lineage() and trackr_network(). For more information, please see getting started.

target_id <- df$trackr_id[1]
trackr_lineage(target_id, trackr_dir)
## [1] "Successfully written d68f8d4df8ca397665029b5620b1613d8b45cf7e_2a8b275852b4085d374094c98a3f95bc0e22ec3b_lineage.json"
lineage_fn <- paste0(trackr_dir, '/', target_id, '_lineage.json')

trackr_network(lineage_fn)

Clean up

clean_trackr_dir(trackr_dir)

Article by Hamish Gibbs 2020-06-18 15:47:37. To report a problem with this package, please create an issue on GitHub.