[R] Help on reducing multiple loops

Sumanta Basak sumanta24 at gmail.com
Wed May 17 08:22:23 CEST 2017


Hi All,

I've a data-set on product sub-product matrix on which I'm doing multiple
calculation, but unfortunately using nested loops, the programme is taking
long time to execute. Can anyone help me how to get rid of the following
jungle? Any direction would be helpful.

GA <- "India"
verticle <- "Prod1"

prod_data <- readRDS(paste0("/Prod_ladder_",GA,"_",verticle,".rds"))
setDF(prod_data)

Final_data <-
subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm")],!duplicated(prod_data[,c("P_KEY","Active_Prod_Id")]))

proximity_prod_mapping <- readRDS("Proximity_prod_mapping.rds")
dst_prod <- subset(prod_data[,c("P_KEY")],!duplicated(prod_data$P_KEY))


output_data <- c()
data_merge_final <- c()

system.time({
  for(i in 1 : length(dst_prod)){

    prod_data <- subset(prod_data,prod_data$P_KEY == dst_prod[i]) #
Subsetting data at prod level
    dst_prod <-
subset(prod_data[,c("Active_Prod_Id")],!duplicated(prod_data$Active_Prod_Id))
# Finding distinct prods of active prodloyee

    for(j in 1 : length(dst_prod)){
      # Subsetting data at prod level for active prod
      # Fetiching data for Anchor prod
      prod_data1 <-
subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm","Start_Date_1","End_Date_1")],prod_data$Active_Prod_Id
== dst_prod[j])
      prod_data1$Anchor_prod <- 1
      anc_max_End_Date_1 <- as.Date(max(prod_data1$End_Date_1),origin =
"1970-01-01")
      anc_prod_count <- sum(prod_data1$Anchor_prod)

      # Fetiching data for Proximate prod
      prox_prod_data <-
subset(proximity_prod_mapping[,c("Proximate_prod_ID")],proximity_prod_mapping$Anchor_prod_ID
== dst_prod[j])
      prod_data2 <-
subset(prod_data[,c("P_KEY","Active_Prod_Id","Active_Prod_Nm","Start_Date_1","End_Date_1")],prod_data$Active_Prod_Id
%in% c(prox_prod_data))
      prox_sill_count <- 0
      if(nrow(prod_data2) > 0){
        prod_data2$Proximity_prod <- 1
        prox_max_End_Date_1 <- as.Date(max(prod_data2$End_Date_1),origin =
"1970-01-01")
        prox_sill_count <- sum(prod_data2$Proximity_prod)
      }
      # library(plyr)
      prod_data <-rbind.fill(prod_data1,prod_data2)
      prod_data$exclude <- 0
      prod_data$Anchor_Active_Prod_Id <- dst_prod[j]

      prod_data$Start_Date_1 <- as.Date(prod_data$Start_Date_1,origin =
"1970-01-01")
      prod_data$End_Date_1 <- as.Date(prod_data$End_Date_1,origin =
"1970-01-01")

      if(prox_sill_count > 0){
        if(nrow(prod_data) > 1){
          # Trimming end date of proximity prods where end data of
proximity prod is greater that Anchor prod
          if((prox_max_End_Date_1 - anc_max_End_Date_1) > 0){
            prod_data$End_Date_1 <- ifelse(prod_data$Proximity_prod == 1 &
(prod_data$End_Date_1 - anc_max_End_Date_1) > 0,
anc_max_End_Date_1,prod_data$End_Date_1)
            prod_data$End_Date_1 <- as.Date(prod_data$End_Date_1,origin =
"1970-01-01")
          }
          prod_data$exclude <- ifelse(prod_data$Proximity_prod == 1 &
(as.Date(prod_data$Start_Date_1,origin = "1970-01-01") -
anc_max_End_Date_1) > 0,1,0)
          prod_data <- subset(prod_data,prod_data$exclude == 0)

          prod_data <-
arrange(prod_data,prod_data$Anchor_prod,desc(prod_data$End_Date_1),prod_data$Start_Date_1)

          prod_data$Anchor_prod <- ifelse(is.na
(prod_data$Anchor_prod),0,prod_data$Anchor_prod)
          prod_data$Proximity_prod <- ifelse(is.na
(prod_data$Proximity_prod),0,prod_data$Proximity_prod)
          prod_data$new_rec <- 0

          tot_loop <- nrow(prod_data)
          k=1
          # Looping to map start date and end date of each row with other
rows
          while(k <= tot_loop){
            excl_flag <- prod_data[k,c("exclude")]
            if(excl_flag == 0){
              st_dt1 <- as.Date(prod_data[k,c("Start_Date_1")])
              end_dt1 <- as.Date(prod_data[k,c("End_Date_1")])
              prod_flag1 <- prod_data[k,c("Anchor_prod")]

              if(k != nrow(prod_data)){
                tot_row <- nrow(prod_data)

                for(m in 1 : (tot_row -k)){
                  l = k+m
                  if(l != k){
                    st_dt2 <- as.Date(prod_data[l,c("Start_Date_1")])
                    end_dt2 <- as.Date(prod_data[l,c("End_Date_1")])
                    prod_flag2 <- prod_data[l,c("Anchor_prod")]

                    flag_excl <- prod_data[l,c("exclude")]
                    if(flag_excl ==0){
                      rec_check <- prod_data[l,c("new_rec")]
                      # if(rec_check == 0){
                      prod_data$Start_date2 <- NA
                      prod_data$End_date2 <- NA

                      new_start_date <- as.Date(ifelse(prod_flag1 == 1 &
prod_flag2 == 1,NA,
                                                       ifelse(prod_flag1 ==
1 & prod_flag2 == 0 & end_dt2 > end_dt1 & st_dt2 < end_dt1,end_dt1,

ifelse(prod_flag1 == 0 & prod_flag2 == 1 & end_dt1 > end_dt2 & st_dt1 <
end_dt2,end_dt2,NA))),origin = "1970-01-01")
                      message(paste0("new_start_date = ",new_start_date))
                      new_start_date <- as.Date(new_start_date,origin =
"1970-01-01")
                      message(paste0("new_start_date = ",new_start_date))
                      new_end_date <- as.Date(ifelse(prod_flag1 == 1 &
prod_flag2 == 1,NA,
                                                     ifelse(prod_flag1 == 1
& prod_flag2 == 0 & end_dt2 > end_dt1 & st_dt2 < end_dt1,end_dt2,

ifelse(prod_flag1 == 0 & prod_flag2 == 1 & end_dt1 > end_dt2 & st_dt1 <
end_dt2,end_dt1,NA))),origin = "1970-01-01")
                      message(paste0("new_end_date = ",new_end_date))
                      new_end_date <- as.Date(new_end_date,origin =
"1970-01-01")
                      message(paste0("new_end_date = ",new_end_date))
                      prod_data[l,c("Start_date2")] <-
as.Date(new_start_date,origin = "1970-01-01")
                      prod_data[l,c("End_date2")] <-
as.Date(new_end_date,origin = "1970-01-01")

                      tmp_data <- subset(prod_data,!is.na
(prod_data$Start_date2))
                      tmp_data$Start_Date_1 <-
as.Date(tmp_data$Start_date2,origin = "1970-01-01")
                      tmp_data$End_Date_1 <-
as.Date(tmp_data$End_date2,origin = "1970-01-01")
                      if(nrow(tmp_data)){
                        tmp_data$new_rec <- 1
                        prod_data[l,c("End_Date_1")] <-
as.Date(end_dt1,origin = "1970-01-01")
                      }
                      prod_data <- rbind(prod_data,tmp_data)
                      tot_row <- tot_row + nrow(tmp_data)
                      tot_loop <- tot_loop + nrow(tmp_data)
                      prod_data$Start_date2 <- NULL
                      prod_data$End_date2 <- NULL
                      # }
                    }
                    # Condition to identify true subset

                    # overlap <- ifelse((st_dt1 >= st_dt2 & st_dt1 <=
end_dt2) & (end_dt1 >= st_dt2 & end_dt1 <= end_dt2),1,
                    #            ifelse((st_dt2 >= st_dt1 & st_dt2 <=
end_dt1) & (end_dt2 >= st_dt1 & end_dt2 <= end_dt1),1,0))



                    if((end_dt1 - st_dt2) >= 0){
                      if((end_dt2 - st_dt1) >= 0){
                        if((st_dt2 - st_dt1) >=0){
                          prod_data[k,c("exclude")] <- ifelse(prod_flag1 ==
1 & prod_flag2 == 1,9999, #if Anchor prods have overlapping

 ifelse(prod_flag1 == 1 & prod_flag2 == 0,0,

ifelse(prod_flag1 == 0 & prod_flag2 == 1,1,

 ifelse(prod_flag1 == 0 & prod_flag2 == 0,0,1))))
                          prod_data[l,c("exclude")] <- ifelse(prod_flag1 ==
1 & prod_flag2 == 1,9999,

 ifelse(prod_flag1 == 0 & prod_flag2 == 1,0,

ifelse(prod_flag1 == 1 & prod_flag2 == 0,1,

 ifelse(prod_flag1 == 0 & prod_flag2 == 0,1,0))))
                        }
                      }
                    }
                    # Condition to trim the dates as to make dates in each
observation mutually exclusive to exch other
                    flag_excl <- prod_data[l,c("exclude")]
                    if(flag_excl == 0){
                      if(end_dt1 > st_dt2){
                        if(st_dt1 >= st_dt2){
                          new_date <- ifelse(end_dt2 >
st_dt1,as.Date(st_dt1,origin = "1970-01-01"),as.Date(end_dt2,origin =
"1970-01-01"))
                          new_date <- as.Date(new_date,origin =
"1970-01-01")
                          old_date <-
as.Date(prod_data[l,c("End_Date_1")],origin = "1970-01-01")
                          old_date <- as.Date(old_date,origin =
"1970-01-01")

                          # prod_data[j,c("End_Date_1")] <-
ifelse(prod_flag1 == 1 & prod_flag2 == 1,as.Date(old_date,origin =
"1970-01-01"),
                          #
ifelse(prod_flag1 == 0 & prod_flag2 == 1,as.date(old_date, origin =
"1970-01-01"),as.Date(new_date,origin = "1970-01-01")))

                          prod_data[l,c("End_Date_1")] <-
as.Date(ifelse(prod_flag1 == 1 & prod_flag2 == 1,old_date,ifelse(prod_flag1
== 0 & prod_flag2 == 1,old_date,new_date)),origin = "1970-01-01")

                        }
                      }
                    }
                  }
                }

              }
            }
            k=k+1
          }
        }
      }
      # excluding non required observations
      prod_data <- subset(prod_data,prod_data$exclude == 0)

      prod_data$multiply_factor <- ifelse(prod_data$Anchor_prod == 1,1,
                                           ifelse(prod_data$Proximity_prod
== 1,0.5,9999))

      prod_data$recency_in_months <- (as.Date("2017-01-31") -
prod_data$End_Date_1)/30

      prod_data$recency_factor <- ifelse(prod_data$recency_in_months <=
12,1,

ifelse(prod_data$recency_in_months > 12 & prod_data$recency_in_months <=
24,0.9,

 ifelse(prod_data$recency_in_months > 24 & prod_data$recency_in_months <=
36,0.8,

ifelse(prod_data$recency_in_months > 36 & prod_data$recency_in_months <=
48,0.7,

 ifelse(prod_data$recency_in_months > 48,0.6,9999)))))

      prod_data$duration_in_months <- (prod_data$End_Date_1 -
prod_data$Start_Date_1)/30

      prod_data$weight <-
prod_data$duration_in_months*prod_data$multiply_factor*prod_data$recency_factor

      prod <- prod_data[1,c("Anchor_Active_Prod_Id")]
      if(nrow(prod_data) > 1){
        data_merge <-with(prod_data,aggregate(weight ~ P_KEY, FUN =
function(x) c(Proficiency_Score = sum(x))))
      }else{
        data_merge <- prod_data[1,c("P_KEY","weight")]
      }


      data_merge$prod <- prod_data[1,c("Anchor_Active_Prod_Id")]

      data_merge_final <- rbind(data_merge_final,data_merge)


      # Recency and Duration calculation goes here and final score will be
added in final data
      output_data <- rbind.fill(output_data,prod_data)

    }
  }


  Final_data <- merge(Final_data,data_merge_final,by.x= c("P_KEY",
"Active_Prod_Id"),by.y = c("P_KEY", "prod"),all.x=TRUE)

  names(Final_data)[names(Final_data) == "weight"] <- "Proficiency_Score"

  emerging_prod_mapping <- readRDS("5.Emerging_prod_Lookup.rds")

  emerging_prod_list <-
subset(emerging_prod_mapping[,c("prod_ID")],!duplicated(emerging_prod_mapping$prod_ID))


  Final_data$Emerging_Traditional <- ifelse(Final_data$Active_Prod_Id %in%
c(emerging_prod_list),"Emerging","Traditional")

  Final_data$Final_Proficiency <- ifelse(Final_data$Emerging_Traditional ==
"Traditional",

 ifelse(Final_data$Proficiency_Score < 12, "P0",

ifelse(Final_data$Proficiency_Score >=12 & Final_data$Proficiency_Score <
24,"P1",

 ifelse(Final_data$Proficiency_Score >=24 & Final_data$Proficiency_Score <
48,"P2",

ifelse(Final_data$Proficiency_Score >=48 & Final_data$Proficiency_Score <
60,"P3",

 ifelse(Final_data$Proficiency_Score >=60,"P4",NA))))),

 ifelse(Final_data$Emerging_Traditional == "Emerging",

ifelse(Final_data$Proficiency_Score < 6, "P0",

 ifelse(Final_data$Proficiency_Score >=6 & Final_data$Proficiency_Score <
12,"P1",

ifelse(Final_data$Proficiency_Score >=12 & Final_data$Proficiency_Score <
24,"P2",

 ifelse(Final_data$Proficiency_Score >=24 & Final_data$Proficiency_Score <
30,"P3",

ifelse(Final_data$Proficiency_Score >=30,"P4",NA))))),NA))

  tst <- prod_data[,c("P_KEY", "Id")]
  tst <- subset(tst,!duplicated(tst))

  Final_data <-
merge(Final_data,tst[,c("P_KEY","Id")],by="P_KEY",all.x=TRUE)
})

*SUMANTA BASAK*

	[[alternative HTML version deleted]]



More information about the R-help mailing list