message ("Big For loop starts...\n\n") for (VID in vid) { #VID <- 64490 if (debugindex > 0) cat(VID, ":") #browser() ## get the relevant columns from Postgres DB and order by the trip ids: (so no need to sort in R) pgsqlcmd <- paste("SELECT ", SELECTVARS," FROM ", t.trp$table, " WHERE vehicleid =", VID, " ORDER BY ", t.trp$id) mydata <- try(dbGetQuery(con,pgsqlcmd),TRUE) #origdate <- as.character(as.POSIXct(dbGetQuery(con, paste("SELECT datestamp FROM ", t.trp$table, "WHERE vehicleid =", VID, " ORDER BY ", t.trp$id))[,1])) ## On VID reading error go to the next VID... if (debugindex > 1) cat("class(mydata): ", class(mydata), "\n") if (class(mydata)=="try-error") {cat("\tDatabase query error.\n"); next} colnames(mydata) <- tolower(colnames(mydata)) ## convert NA to "" (empty strings) and [[:space:]] to ""... if (debugindex > 1) cat("\tcleaning.out.cols: geofencename, geofencereason\n") mydata[,"geofencename"] <- clean.out.cols(mydata[,"geofencename"]) mydata[,"geofencereason"] <- clean.out.cols(mydata[,"geofencereason"]) ## Bail out if either the geofencename or the geofencereason field is empty in the whole file... u.gfn <- unique(mydata[["geofencename"]]) if (debugindex > 1) cat("\tprod(u.gfn == \"\"):",prod(u.gfn == "")," \n") if (prod(u.gfn == "")) {cat("Empty, bailing.\n"); next} u.gfr <- unique(mydata[["geofencereason"]]) if (debugindex > 1) cat("\tprod(u.gfr == \"\"):",prod(u.gfr == "")," \n") if (prod(u.gfr == "")) {cat("Empty, bailing.\n"); next} ## Remove delaytime form datestamp: Why do we want to do this? ## Ans: well, this is just something that Jinjian suggested; so trying it out. if (remove.delaytime) { ########### CHECK BEOFRE USING ME ############ ## subtract the delaytime seconds from datestamp tmp.ds <- as.POSIXct(mydata[["datestamp"]]) tmp.dt <- mydata[["delaytime"]] if (!is.numeric(tmp.dt)) tmp.dt <- as.numeric(tmp.dt) na.dt <- which(is.na(tmp.dt)) if (length(na.dt)>0) tmp.dt[na.dt] <- 0 ## is this the best we can do? ## the following subtraction works, since tmp.dt is in seconds. see examples in ?difftime tmp.ds <- tmp.ds - tmp.dt } ## Sort: ## now that we are using RPostgreSQL, we don't need to sort, unless removing DELAYTIME if (remove.delaytime) { ########### CHECK BEOFRE USING ME ############ ## clean up the GEOFENCEREASON field to be able to sort on that: `empty' < "out" < "in" if (debugindex > 1) cat("\tpre-sort state: as.character(GEOFENCENAME), =0, out=1, in=2\n") tmp.gfr <- as.character(my.csv[,"GEOFENCEREASON"]) ## see [NOTE A] tmp.gfr[(tmp.gfr=="")] <- 0; tmp.gfr[(tmp.gfr=="out")] <- 1; tmp.gfr[(tmp.gfr=="in")] <- 2; if (debugindex > 1) cat("\tsort: odometer, datestamp-delaytime, tmp.gfr\n") mydata <- cbind(mydata, datestamp_nodelay=tmp.ds, stringsAsFactors=FALSE) ## include the tmp.ds before sorting mydata <- mydata[order(mydata[["odometer"]], tmp.ds, tmp.gfr),] } ## initializations... n <- dim(mydata)[1] hourflag <- odoflag <- stateflag <- numeric(n) if (debugindex > 1) cat("\tdim(mydata):",dim(mydata),"\n") if (n==1) { ## some vehicles have only one record if (debugindex > 0) cat("\tOnly one record, so bailing out.\n\n") next } ## create hourflag, odoflag, stateflag (when n >= 2) if (debugindex > 2) cat("\tCreating hourflag, odoflag, stateflag...") hourflag[1] <- odoflag[1] <- 1; ## otherwise the "first" few records will always be missed out ## populate hourflag : if consecutive datestamps differences (in hours) > hour.thres: if (remove.delaytime) { ## see Note B hourflag[2:n] <- as.numeric( as.numeric(as.POSIXct(tmp.ds[2:n]) - as.POSIXct(tmp.ds[1:(n-1)]), units="hours") > hour.thres) } else { ## see Note B hourflag[2:n] <- as.numeric( as.numeric(as.POSIXct(mydata[2:n, "datestamp"]) - as.POSIXct(mydata[1:(n-1), "datestamp"]), units="hours") > hour.thres) } ## populate odoflag : if consecutive odometer differences (in km) > odo.km.thres odoflag[2:n] <- as.numeric ((mydata[2:n, "odometer"] - mydata[1:(n-1), "odometer"])/1000 > odo.km.thres) ## populate stateflag : if consecutive state_abbr don't match stateflag[2:n] <- as.numeric(mydata[2:n, "state_abbr"] != mydata[1:(n-1), "state_abbr"]) if (debugindex > 2) cat("done.\n\tAppending them to the data.frame...") mydata <- cbind(mydata, bridge_new=rep("",n), tripscore=rep(0,n), stflag=stateflag, hrflag=hourflag, odflag=odoflag, stringsAsFactors=FALSE) if (debugindex > 2) cat("done.\n") ## logical AND of hourflag and odoflag: (ie both hour.thres and odo.thres are violated) hrodoflag <- hourflag * odoflag hrodoflag.index <- which (hrodoflag==1) if (debugindex > 1) cat("\tSum:: hourflag:",sum(hourflag),", odoflag:", sum(odoflag),", stateflag:",sum(stateflag),", hrOdoflag:",sum(hrodoflag),"\n") if ((length(hrodoflag.index)==0) || (sum(stateflag)==0)) { ## ie, either all the differences are within the (at least one of the) two (hr, odo) thresholds ## OR, there is no chagne in "state_abbr" if (debugindex > 0) cat("\tEither 'hourflag * odoflag' or 'stateflag' are all zeros, so bailing out.\n\n") next } ## else: ## ie, at least one change in "state_abbr" ## AND at least one instance where both (hr.diff > 1) and (odo.diff > 10) between consecutive records occur. if (debugindex > 1) cat("\tStarting hrodoflag.index loop...\n") for (i in 1:length(hrodoflag.index)) { if (debugindex > 6) cat("\t\ti:",i,", hrodoflag.index[i]:",hrodoflag.index[i],"\n") ## demarcate the two ENDS of a "plaussible" trip, using hourodoflag if (i < length(hrodoflag.index)) { plaussible.trip.index <- hrodoflag.index[i]:(hrodoflag.index[i+1]-1) } else { if (debugindex > 6) cat("\t\t----- i == length(hrodoflag.index)\n") plaussible.trip.index <- hrodoflag.index[i]:n } ## number of records in this "plaussible" trip: all types, not just 'Crossing' ones... n.records <- length(plaussible.trip.index) if (debugindex > 6) cat("\t\tn.records:",n.records,"\n") ## ckh for length(pla...) == 1/2/3.. if (n.records < 4) { ## a valid trip should've at least four records. if not then move to the next "plaussible" trip... if (debugindex > 6) cat("\t\t--loop-next","\n") next } ## needed b/c stateflag[1] is NA always and we've eliminated those cases where n.records < 4!! chk.state.flag <- plaussible.trip.index[-1] if (sum(stateflag[chk.state.flag])==1) { ## THIS IS THE MOST IMPORTANT ONE... if (debugindex > 6) cat("\t\tOnly one state change in this loop\n") change.state <- mydata[chk.state.flag[as.logical(stateflag[chk.state.flag])],"state_abbr"] if (change.state == "ON") { direction <- "MI-ON" } else if (change.state == "MI") { direction <- "ON-MI" } else direction <- NULL ## AMB/BWB bridge identification: use the BRIDGE string obtained from PostgreSQL if (all(mydata[plaussible.trip.index,"bridge"] %in% "atAMB")) { bridge <- "AMB" } else if (all(mydata[plaussible.trip.index,"bridge"] %in% "atBWB")) { bridge <- "BWB" } else bridge <- NULL ## {"", AMB, BWB}:{"", ON-MI, MI-ON} --- "" for the NULL cases (which should not arise anyway) final.string <- paste(bridge,direction,sep=":") if (inout.only.tagging) { ## ie tag only the strict sequence of in/out records j <- plaussible.trip.index[1] BRIDGE.index <- NULL while ( j < plaussible.trip.index[n.records]) { ## j "strictly"-less-than ... so j+1 is fine a.gfn <- mydata[j,"geofencename"]; b.gfn <- mydata[j+1,"geofencename"] a.gfr <- mydata[j,"geofencereason"]; b.gfr <- mydata[j+1,"geofencereason"] if ((a.gfn==b.gfn) && (a.gfr=="in") && (b.gfr=="out")) { BRIDGE.index <- c(BRIDGE.index, j, j+1) j <- j+2 } else j <- j+1 } } else { ## tag all reocrds of this plaussible trip BRIDGE.index <- plaussible.trip.index } ntrips <- ntrips+1 mydata[BRIDGE.index,"bridge_new"] <- final.string mydata[BRIDGE.index,"tripscore"] <- ntrips } else if (sum(stateflag[chk.state.flag]) > 1) { ## something is wrong here: ## more than one change of "STATE" inside this seuqence of records ## where no two consecutive records differ by '>1-hr & >10 km' thresholds... ## ## this case encompases trips/trucks which are sent back to the originating country ## from the destination country's secondary customs... right? if (debugindex > 6) cat("\t\tMore than one state change in this loop\n") ## do nothing, as of now. DO not have any idea :(( } else { ## ie, sum == 0 => same STATE... if (debugindex > 6) cat("\t\tZero state change in this loop\n") ## do nothing, as of now. ## But later try to use the DIRECTION to infer the direction of crossing. false.ntrips <- false.ntrips + 1 mydata[plaussible.trip.index,"tripscore"] <- -false.ntrips } } ## for (i in 1:length(hrodoflag.index)) if (debugindex > 1) cat("\throdoflag.index loop finished.\n") ###################################################### ###################################################### ###################################################### message("Delete 'public.tmptable'") if (dbExistsTable(con,tname.tmp$table)) dbRemoveTable(con, tname.tmp$table) message("Finish deleting") dbWriteTable(con,tname.tmp$table, mydata[,c(t.trp$id,newVariables)], overwrite=TRUE, row.names=FALSE) ## see Note C for (varname in newVariables) { dbGetQuery (con, paste ("UPDATE ", t.trp$table, " SET ", varname, " = ", paste(tname.tmp$table, varname, sep="."), "FROM ", tname.tmp$table, " \n WHERE ", paste(t.trp$table, t.trp$id, sep="."), " = ", paste(tname.tmp$table, t.trp$id, sep="."), ";") ) } } ## for (VID in vid) message ("Big For loop ends.")