[R] How to create a new data.frame based on calculation of subsets of an existing data.frame

Thu Dec 19 03:04:47 CET 2019

Hi Ioanna,
I looked at the problem this morning and tried to work out what you
wanted. With a problem like this, it is often easy when you have
someone point to the data and say "I want this added to that and this
multiplied by that". I have probably made the wrong guesses, but I
hope that you can correct my guesses and I can get the calculations
correct for you. For example, I have assumed that you want the sum of
the IM_* values for each set of damage states as the values for VC_1,
VC_2 etc.

D<-data.frame(Ref.No = c(1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629),
 Region = rep(c('South America'), times = 8),
 IM.type = c('PGA', 'PGA', 'PGA', 'PGA', 'Sa', 'Sa', 'Sa', 'Sa'),
 Damage.state = c('DS1', 'DS2', 'DS3', 'DS4','DS1', 'DS2', 'DS3', 'DS4'),
 Taxonomy = c('ER+ETR_H1','ER+ETR_H1','ER+ETR_H1','ER+ETR_H1','ER+ETR_H2',
 'ER+ETR_H2','ER+ETR_H2','ER+ETR_H2'),
 IM_1 = c(0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00),
 IM_2 = c(0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08, 0.08),
 IM_3 = c(0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16),
 IM_4 = c(0.24, 0.24, 0.24, 0.24, 0.24, 0.24, 0.24, 0.24),
 Prob.of.exceedance_1 = c(0,0,0,0,0,0,0,0),
 Prob.of.exceedance_2 = c(0,0,0,0,0,0,0,0),
 Prob.of.exceedance_3 =
 c(0.26,0.001,0.00019,0.000000573,0.04,0.00017,0.000215,0.000472),
 Prob.of.exceedance_4 =
 c(0.72,0.03,0.008,0.000061,0.475,0.0007,0.00435,0.000405),
 stringsAsFactors=FALSE)
# assume the above has been read in
# add the four columns to the data frame filled with NAs
D$VC_1<-D$VC_2<-D$VC_3<-D$VC_4<-NA
# names of the variables used in the calculations
calc_vars<-paste("Prob.of.exceedance",1:4,sep="_")
# get the rows for the four damage states
DS1_rows<-D$Damage.state == "DS1"
DS2_rows<-D$Damage.state == "DS2"
DS3_rows<-D$Damage.state == "DS3"
DS4_rows<-D$Damage.state == "DS4"
# step through all possible values of IM.type and Taxonomy
for(IM in unique(D$IM.type)) {
 for(Tax in unique(D$Taxonomy)) {
  # get a logical vector of the rows to be used in this calculation
  calc_rows<-D$IM.type == IM & D$Taxonomy == Tax
  cat(IM,Tax,calc_rows,"\n")
  # check that there are any such rows in the data frame
  if(sum(calc_rows)) {
   # if so, fill in the four values for these rows
   D$VC_1[calc_rows]<-sum(0.01 * (D[calc_rows & DS1_rows,calc_vars] -
    D[calc_rows & DS2_rows,calc_vars]))
   D$VC_2[calc_rows]<-sum(0.02 * (D[calc_rows & DS2_rows,calc_vars] -
    D[calc_rows & DS3_rows,calc_vars]))
   D$VC_3[calc_rows]<-sum(0.43 * (D[calc_rows & DS3_rows,calc_vars] -
    D[calc_rows & DS4_rows,calc_vars]))
   D$VC_4[calc_rows]<-sum(D[calc_rows & DS4_rows,calc_vars])
  }
 }
}

Jim