[R] How can I get this function to work?

Paul Miller pjmiller_57 at yahoo.com
Thu May 31 19:38:05 CEST 2012


Hello All,

Can anyone tell help me understand why the function below doesn't work and how I can fix it? Below are some sample data, some code that works on individual rows of the data, and my attempt to translate that code into a function. My hope is to get the function working and then to apply it to the larger data frame using ddply() from the plyr package or possibly some other approach.

As yet, I don't have much experience writing anonymous functions. I imagine I'm doing something that is obviously wrong, but I don't know what it is. 

Thanks,

Paul

#### Read in test data ####

testData <-
structure(list(profile_key = structure(c(1L, 1L, 2L, 2L, 2L, 
3L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 7L, 7L), .Label = c("001-001 ", 
"001-002 ", "001-003 ", "001-004 ", "001-005 ", "001-006 ", "001-007 "
), class = "factor"), encounter_date = structure(c(9L, 10L, 11L, 
12L, 13L, 5L, 6L, 7L, 8L, 1L, 2L, 3L, 4L, 4L, 7L, 7L), .Label = c(" 2009-03-01 ", 
" 2009-03-22 ", " 2009-04-01 ", " 2010-03-01 ", " 2010-10-15 ", 
" 2010-11-15 ", " 2011-03-01 ", " 2011-03-14 ", " 2011-10-10 ", 
" 2011-10-24 ", " 2012-09-15 ", " 2012-10-05 ", " 2012-10-17 "
), class = "factor"), raw = c(" ordered kras testing on 10102010 results not yet available if patient has a mutation will start erbitux ", 
" received kras results on 10202010 test results indicate tumor is wild type ua protein positve erpr positive her2neu positve ", 
" will conduct kras mutation testing prior to initiation of therapy with erbitux ", 
" still need to order kras mutation testing ", " ordered kras testing waiting for results ", 
" kras test results pending note that patient was negative for lynch mutation ", 
" kras results still pending note that patient was negative for lynch mutation ", 
" kras mutated will not prescribe erbitux due to mutation ", 
" kras mutated therefore did not prescribe erbitux ", " kras wild ", 
" tumor is negative for mutation ", " tumor is wild type patient is eligible to receive eribtux ", 
" if patient kras result is wild type they will start erbitux several lines of material ordered kras mutation test 11112011 results are still not available ", 
" kras results are in patient has the mutation ", " ordered kras mutation testing on 02152011 results came back negative several lines of material patient kras mutation test is negative will start erbitux ", 
" patient is kras negative started erbitux on 03012011 ")), .Names = c("profile_key", 
"encounter_date", "raw"), row.names = c(NA, -16L), class = "data.frame")

#### Convert text record to lowercase ####

testData$raw <- tolower(testData$raw)

#### Remove punctuation and any multiple spaces ####

testData$raw <- gsub("[[:punct:]]", "", testData$raw)
testData$raw <- gsub(" +", " ", testData$raw)

#### Select test row ####

testRow <- testData[13,]
testRow

#### Select terms +/- a specified number of words from "kras" ####

Text <- unlist(strsplit(testRow$raw, " ")) 
Target <- grep("kras", Text)

if (length(Target) == 0) {testRow$reduced <- ""} else{ 

Length <- length(Text)
Keep <- rep(NA, Length)
Lower <- ifelse(Target - 6 > 0, Target - 6, 1)
Upper <- ifelse(Target + 6 < Length, Target + 6, Length)

for(i in 1:length(Keep)){
for(j in 1:length(Lower)){
	Keep[i][i %in% seq(Lower[j], Upper[j])] <- i
}}

testRow$reduced <- paste(Text[!is.na(Keep)], collapse=" ")

}

testRow

length(Text)
length(Text[!is.na(Keep)])

#### Function for selecting words within specified range of a target term ####

nearTerms <- function(df, text, target, before, after, outvar){

   Text <- with(df, strsplit(text, " ")) 
   Target <- grep(target, Text)

   if (length(Target) == 0) {df$reduced <- ""} else{ 

   Length <- length(Text)
   Keep <- rep(NA, Length)
   Lower <- ifelse(Target - before > 0, Target - before, 1)
   Upper <- ifelse(Target + after < Length, Target + after, Length)

   for(i in 1:length(Keep)){
   for(j in 1:length(Lower)){
      Keep[i][i %in% seq(Lower[j], Upper[j])] <- i
   }}

   df <- transform(df, outvar = paste(Text[!is.na(Keep)], collapse=" "))

   }

}

nearTerms(testRow, raw, "kras", 6, 6)

nearTerms(df = testRow, text = raw, target = "kras", before = 6, after = 6)



More information about the R-help mailing list