Arun Srinivasan is the man! Once he saw that his data.table solution to the TidyR Challenge had an issue, he fixed it!
His solution is below along with a quick equivalence test to my original solution, and check out this stackOverflow question for a more engaging discussion of the strengths and weaknesses of both dplyr/tidyr and data.table.
Fake Data
library(wakefield)
library(tidyr)
library(dplyr)
d <- r_data_frame(
n=100,
id,
r_series(date_stamp,15,name='foo_date'),
r_series(level,15,name='foo_supply'),
r_series(date_stamp,10,name='bar_date'),
r_series(level,10,name='bar_supply'),
r_series(date_stamp,3,name='baz_date'),
r_series(level,3,name='baz_supply')
)
Test Function for Equivalence
# Create a true ordered data frame and drop any extraneous classes for each column
true_ordered_df <- function(x){
x$ID <- as.character(x$ID); class(x$ID) <- 'character'
x$med_date <- as.Date(x$med_date); class(x$med_date) <- 'Date'
x$med_supply <- as.integer(x$med_supply); class(x$med_supply) <- 'integer'
x$med_name <- as.character(x$med_name); class(x$med_name) <- 'character'
x <- data.frame(
ID=x$ID,
med_date=x$med_date,
med_supply=x$med_supply,
med_name=x$med_name,
stringsAsFactors=FALSE
)
x <- x[with(x,order(ID,med_date,med_supply,med_name)),]
row.names(x) <- NULL
x
}
Data.Table Solution, thanks to Arun Srinivasan
require(data.table) # v1.9.5
dt = as.data.table(d)
pattern = c("date", "supply")
mcols = lapply(pattern, grep, names(dt), value=TRUE)
dt.m = melt(dt, id="ID", measure=mcols, variable.name="med_name",
value.name = paste("med", pattern, sep="_"))
setattr(dt.m$med_name, 'levels', gsub("_.*$", "", mcols[[1L]]))
scripts2 <- true_ordered_df(dt.m)
My Original Solution
# foo
med_dates <- d %>%
select(ID,foo_date_1:foo_date_15) %>%
gather(med_seq, med_date, foo_date_1:foo_date_15)
med_dates$med_seq <- as.integer(sub('^foo_date_','',med_dates$med_seq))
med_supply <- d %>%
select(ID,foo_supply_1:foo_supply_15) %>%
gather(med_seq, med_supply, foo_supply_1:foo_supply_15)
med_supply$med_seq <- as.integer(sub('^foo_supply_','',med_supply$med_seq))
foo <- left_join(med_dates,med_supply, by=c('ID','med_seq')) %>%
select(ID,med_date,med_supply)
foo$med_name <- 'foo'
# bar
med_dates <- d %>%
select(ID,bar_date_1:bar_date_10) %>%
gather(med_seq, med_date, bar_date_1:bar_date_10)
med_dates$med_seq <- as.integer(sub('^bar_date_','',med_dates$med_seq))
med_supply <- d %>%
select(ID,bar_supply_1:bar_supply_10) %>%
gather(med_seq, med_supply, bar_supply_1:bar_supply_10)
med_supply$med_seq <- as.integer(sub('^bar_supply_','',med_supply$med_seq))
bar <- left_join(med_dates,med_supply, by=c('ID','med_seq')) %>%
select(ID,med_date,med_supply)
bar$med_name <- 'bar'
# baz
med_dates <- d %>%
select(ID,baz_date_1:baz_date_3) %>%
gather(med_seq, med_date, baz_date_1:baz_date_3)
med_dates$med_seq <- as.integer(sub('^baz_date_','',med_dates$med_seq))
med_supply <- d %>%
select(ID,baz_supply_1:baz_supply_3) %>%
gather(med_seq, med_supply, baz_supply_1:baz_supply_3)
med_supply$med_seq <- as.integer(sub('^baz_supply_','',med_supply$med_seq))
baz <- left_join(med_dates,med_supply, by=c('ID','med_seq')) %>%
select(ID,med_date,med_supply)
baz$med_name <- 'baz'
scripts <- true_ordered_df(rbind(foo,bar,baz))
all.equal(scripts,scripts2)
## [1] TRUE