[R] Extract complete rows by group and maximum

Sandy Small sandy.small at nhs.net
Thu Apr 28 13:31:43 CEST 2011


Hi

I'm trying to extract complete rows from a dataframe by group based on
the maximum in a column within that group.
Thus I have a dataframe:

cvd_basestudy ... es_time ...
_____________
study1        ... 0.3091667
study2        ... 0.3091667
study2        ... 0.2625000
study3        ... 0.3033333
study3        ... 0.2625000
__________
etc

I can extract the basestudy and the max(es_time) using ddply
ddply(datares_sinus_variable, .(cvd_basestudy),
function(x){max(x[['es_time']])})
or by
by(datares_sinus_variable$es_time, datares_sinus_variable$cvd_basestudy,
max)

but how do I extract the whole line so that I can get a dataframe with
all the data for the maximum line?

(dput output from first 5 rows of my actual dataframe follows)

Any help would be much appreciated. Thanks in advance
Sandy Small

structure(list(cvd_basestudy = c("study1", "study2", "study2",
"study3", "study3"), ecd_rhythm = structure(c(5L, 5L, 5L, 5L,
5L), .Label = c("AF", "FLUTTER", "PACED AF", "SCRAP", "SINUS",
"UNSURE"), class = "factor"), cvd_frame_mode = structure(c(2L,
2L, 2L, 2L, 2L), .Label = c("fixed_time", "variable_time"), class =
"factor"),
    cvd_part_fmt = structure(c(4L, 4L, 4L, 4L, 4L), .Label = c("first",
    "last", "mid", "whole"), class = "factor"), cvd_prev_fmt =
structure(c(1L,
    2L, 1L, 3L, 2L), .Label = c("All", "Best", "Q1", "Q2", "Q3",
    "Q4"), class = "factor"), cvd_cur_fmt = structure(c(5L, 5L,
    1L, 4L, 4L), .Label = c("All", "Best", "Q1", "Q2", "Q3",
    "Q4"), class = "factor"), ps_pt = c(1, 1, 2, 1, 2), es_pt = c(8,
    8, 8, 8, 8), ed_pt = c(21, 21, 18, 17, 18), cvd_median_limit = c(1.057,
    1.057, 1.048, 1.037, 1.05), cvd_average_beat = c(1.06, 1.06,
    1.05, 1.04, 1.05), limit = c(0.9, 0.9, 0.9, 0.9, 0.9), sstd_mi =
c(FALSE,
    FALSE, FALSE, FALSE, FALSE), sstd_hbp = c(FALSE, FALSE, FALSE,
    FALSE, FALSE), sstd_ptca = c(FALSE, FALSE, FALSE, FALSE,
    FALSE), sstd_cabg = c(TRUE, TRUE, TRUE, TRUE, TRUE), sstd_norm_perf
= c(FALSE,
    FALSE, FALSE, FALSE, FALSE), sstd_posnegett = structure(c(NA_integer_,
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("-",
    "+"), class = "factor"), sstd_function = structure(c(NA_integer_,
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label =
c("MODERATE",
    "NORMAL", "POOR", "VERY POOR"), class = "factor"), cvd_cur_fmt_n = c(3,
    3, NA, 2, 2), cvd_prev_fmt_n = c(NA, NA, NA, 1, NA), cvd_cur_fmt2 =
structure(c(3L,
    3L, 1L, 3L, 3L), .Label = c("All", "Best", "Quartiles"), class =
"factor"),
    cvd_prev_fmt2 = structure(c(1L, 2L, 1L, 3L, 2L), .Label = c("All",
    "Best", "Quartiles"), class = "factor"), es_time = c(0.309166666666667,
    0.309166666666667, 0.2625, 0.303333333333333, 0.2625), es_time_err =
c(0.0441666666666667,
    0.0441666666666667, 0.04375, 0.0433333333333333, 0.04375),
    ed_time = c(0.574166666666667, 0.574166666666667, 0.4375,
    0.39, 0.4375)), .Names = c("cvd_basestudy", "ecd_rhythm",
"cvd_frame_mode", "cvd_part_fmt", "cvd_prev_fmt", "cvd_cur_fmt",
"ps_pt", "es_pt", "ed_pt", "cvd_median_limit", "cvd_average_beat",
"limit", "sstd_mi", "sstd_hbp", "sstd_ptca", "sstd_cabg", "sstd_norm_perf",
"sstd_posnegett", "sstd_function", "cvd_cur_fmt_n", "cvd_prev_fmt_n",
"cvd_cur_fmt2", "cvd_prev_fmt2", "es_time", "es_time_err", "ed_time"
), row.names = c("651", "655", "656", "661", "663"), class = "data.frame")


********************************************************************************************************************

This message may contain confidential information. If yo...{{dropped:21}}



More information about the R-help mailing list