[R] Tables: Invitation to make a collective package

Gabor Grothendieck ggrothendieck at gmail.com
Thu Jul 7 14:28:35 CEST 2005


If the functionality you are thinking of already exists across multiple
packages an alternative to creating a new package would be to create
a task view as in:
   http://cran.r-project.org/src/contrib/Views/
as explained in the ctv package and the article in R News 5/1.

On 7/7/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
> Hi All,
> 
> I would like to make an invitation to make a collective package with all
> functions related to TABLES.
> 
> I know that there are many packages with these functions, the original idea is
> collect all this functions and to make a single package, because is arduous for
> the user know all this functions broadcast in many packages.
> 
> So, I think that the original packages can continue with its original functions,
> but, is very good to know that exist one package with many (I dream all) the
> functions related to tables.
> 
> I've been working with these functions (while I am learning R programming):
> 
> #######################
> #   Tables - Package  #
> #######################
> 
> #
> # 1. Tables
> #
> 
> #
> # Common function
> #
> tb.make.table.I <- function(x,
>                             start,
>                             end,
>                             h,
>                             right)
> {
>   f    <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq
>   fr   <- f/length(x)                                       # Relative freq
>   frP  <- 100*(f/length(x))                                 # Relative freq, %
>   fac  <- cumsum(f)                                         # Cumulative freq
>   facP <- 100*(cumsum(f/length(x)))                         # Cumulative freq, %
>   fi   <- round(f, 2)
>   fr   <- round(as.numeric(fr), 2)
>   frP  <- round(as.numeric(frP), 2)
>   fac  <- round(as.numeric(fac), 2)
>   facP <- round(as.numeric(facP),2)
>   res  <- data.frame(fi, fr, frP, fac, facP)                # Make final table
>   names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
>   return(res)
> }
> 
> #
> # Common function
> #
> tb.make.table.II <- function (x,
>                               k,
>                               breaks=c('Sturges', 'Scott', 'FD'),
>                               right=FALSE)
> {
>   x <- na.omit(x)
> 
>   # User defines only x and/or 'breaks'
>   # (x, {k,}[breaks, right])
>   if (missing(k)) {
>     brk   <- match.arg(breaks)
>     switch(brk,
>            Sturges = k <- nclass.Sturges(x),
>            Scott   = k <- nclass.scott(x),
>            FD      = k <- nclass.FD(x))
>     tmp   <- range(x)
>     start <- tmp[1] - abs(tmp[2])/100
>     end   <- tmp[2] + abs(tmp[2])/100
>     R     <- end-start
>     h     <- R/k
>   }
> 
>   # User defines 'x' and 'k'
>   # (x, k,[breaks, right])
>   else {
>     tmp   <- range(x)
>     start <- tmp[1] - abs(tmp[2])/100
>     end   <- tmp[2] + abs(tmp[2])/100
>     R     <- end-start
>     h     <- R/abs(k)
>   }
>   tbl     <- tb.make.table.I(x, start, end, h, right)
>   return(tbl)
> }
> 
> #
> # With Gabor Grotendieck suggestions (thanks Gabor, very much!)
> #
> tb.table <- function(x, ...) UseMethod("tb.table")
> 
> #
> # Table form vectors
> #
> tb.table.default <- function(x,
>                              k,
>                              start,
>                              end,
>                              h, breaks=c('Sturges', 'Scott', 'FD'),
>                              right=FALSE)
> {
>   # User defines nothing or not 'x' isn't numeric -> stop
>   stopifnot(is.numeric(x))
>   x <- na.omit(x)
> 
>   # User defines only 'x'
>   # (x, {k, start, end, h}, [breaks, right])
>   if (missing(k) && missing(start) && missing(end) && missing(h) ) {
>     brk   <- match.arg(breaks)
>     switch(brk,
>            Sturges = k <- nclass.Sturges(x),
>            Scott   = k <- nclass.scott(x),
>            FD      = k <- nclass.FD(x))
>     tmp   <- range(x)
>     start <- tmp[1] - abs(tmp[2])/100
>     end   <- tmp[2] + abs(tmp[2])/100
>     R     <- end-start
>     h     <- R/k
>   }
> 
>   # User defines 'x' and 'k'
>   # (x, k, {start, end, h}, [breaks, right])
>   else if (missing(start) && missing(end) && missing(h)) {
>     stopifnot(length(k) >= 1)
>     tmp   <- range(x)
>     start <- tmp[1] - abs(tmp[2])/100
>     end   <- tmp[2] + abs(tmp[2])/100
>     R     <- end-start
>     h     <- R/abs(k)
>   }
> 
>   # User defines 'x', 'start' and 'end'
>   # (x, {k,} start, end, {h,} [breaks, right])
>   else if (missing(k) && missing(h)) {
>     stopifnot(length(start) >= 1, length(end) >=1)
>     tmp <- range(x)
>     R   <- end-start
>     k   <- sqrt(abs(R))
>     if (k < 5)  k <- 5 # min value of k
>     h   <- R/k
>   }
> 
>   # User defines 'x', 'start', 'end' and 'h'
>   # (x, {k,} start, end, h, [breaks, right])
>   else if (missing(k)) {
>     stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)
>   }
> 
>   else stop('Error, please, see the function sintax!')
>   tbl <- tb.make.table.I(x, start, end, h, right)
>   return(tbl)
> }
> 
> 
> #
> # Table form data.frame
> #
> tb.table.data.frame <- function(df,
>                                 k,
>                                 by,
>                                 breaks=c('Sturges', 'Scott', 'FD'),
>                                 right=FALSE)
> {
>   stopifnot(is.data.frame(df))
>   tmpList <- list()
>   nameF   <- character()
>   nameY   <- character()
> 
>   # User didn't defines a factor
>   if (missing(by)) {
>     logCol  <- sapply(df, is.numeric)
>     for (i in 1:ncol(df)) {
>       if (logCol[i]) {
>         x       <- as.matrix(df[ ,i])
>         tbl     <- tb.make.table.II(x, k, breaks, right)
>         tmpList <- c(tmpList, list(tbl))
>       }
>     }
>     valCol <- logCol[logCol]
>     names(tmpList) <- names(valCol)
>     return(tmpList)
>   }
> 
>   # User defines one factor
>   else {
>     namesdf <- names(df)
>     pos     <- which(namesdf == by)
>     stopifnot(is.factor((df[[pos]])))
>     numF    <- table(df[[pos]])
>     for(i in 1:length(numF)) {
>       tmpdf  <- subset(df, df[[pos]] == names(numF[i]))
>       logCol <- sapply(tmpdf, is.numeric)
>       for (j in 1:ncol(tmpdf)) {
>         if (logCol[j]) {
>           x            <- as.matrix(tmpdf[ ,j])
>           tbl          <- tb.make.table.II(x, k, breaks, right)
>           newFY        <- list(tbl)
>           nameF        <- names(numF[i])
>           nameY        <- names(logCol[j])
>           nameFY       <- paste(nameF,'.', nameY, sep="")
>           names(newFY) <- sub(' +$', '', nameFY)
>           tmpList      <- c(tmpList, newFY)
>         }
>       }
>     }
>   }
>   return(tmpList)
> }
> 
> ############################
> #     Tables package       #
> #         to try           #
> ############################
> 
> # 1.Tables
> # 1.1. Tables from vectors
> 
> # Making a vector
> set.seed(1)
> x=rnorm(100, 5, 1)
> #x=as.factor(rep(1:10, 10))  # to try
> 
> tbl <- tb.table(x)
> print(tbl); cat('\n')
> 
> # Equal to above
> tbl <- tb.table(x, breaks='Sturges')
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, breaks='Scott')
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, breaks='FD')
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, breaks='F', right=T)
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, k=4)
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, k=20)
> print(tbl); cat('\n')
> 
> # Partial
> tbl <- tb.table(x, start=4, end=6)
> print(tbl); cat('\n')
> 
> # Partial
> tbl <- tb.table(x, start=4.5, end=5.5)
> print(tbl); cat('\n')
> 
> # Nonsense
> tbl <- tb.table(x, start=0, end=10, h=.5)
> print(tbl); cat('\n')
> 
> # First and last class forced (fi=0)
> tbl <- tb.table(x, start=1, end=9, h=1)
> print(tbl); cat('\n')
> 
> tbl <- tb.table(x, start=1, end=10, h=2)
> print(tbl); cat('\n')
> 
> 
> # 1.2. Tables from data.frame
> 
> # 1.2.1. Making a data.frame
> mdf=data.frame(X1=rep(LETTERS[1:4], 25),
>                X2=as.factor(rep(1:10, 10)),
>                Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA),
>                Y2=rnorm(100, 58, 4),
>                Y3=c(NA, NA, rnorm(98, -20, 2)))
> 
> tbl <- tb.table(mdf)
> print(tbl)
> 
> # Equal to above
> tbl <- tb.table(mdf, breaks='Sturges')
> print(tbl)
> 
> tbl <- tb.table(mdf, breaks='Scott')
> print(tbl)
> 
> tbl <- tb.table(mdf, breaks='FD')
> print(tbl)
> 
> tbl <- tb.table(mdf, k=4)
> print(tbl)
> 
> tbl <- tb.table(mdf, k=10)
> print(tbl)
> 
> levels(mdf$X1)
> tbl=tb.table(mdf, k=5, by='X1')
> length(tbl)
> names(tbl)
> print(tbl)
> 
> tbl=tb.table(mdf, breaks='FD', by='X1')
> print(tbl)
> 
> # A 'big' result: X2 is a factor with 10 levels!
> tbl=tb.table(mdf, breaks='FD', by='X2')
> print(tbl)
> 
> # 1.2.2. Using 'iris'
> tbl=tb.table(iris, k=5)
> print(tbl)
> 
> levels(iris$Species)
> tbl=tb.table(iris, k=5, by='Species')
> length(tbl)
> names(tbl)
> print(tbl)
> 
> tbl=tb.table(iris, k=5, by='Species', right=T)
> print(tbl)
> 
> tbl=tb.table(iris, breaks='FD', by='Species')
> print(tbl)
> 
> library(MASS)
> levels(Cars93$Origin)
> tbl=tb.table(Cars93, k=5, by='Origin')
> names(tbl)
> print(tbl)
> 
> tbl=tb.table(Cars93, breaks='FD', by='Origin')
> print(tbl)
> 
> I find that this package would be very useful and would like to hear the opinion
> of the interested parties in participating.
> 
> Best regards,
> --
> Jose Claudio Faria
> Brasil/Bahia/UESC/DCET
> Estatistica Experimental/Prof. Adjunto
> mails:
>  joseclaudio.faria at terra.com.br
>  jc_faria at uesc.br
>  jc_faria at uol.com.br
> tel: 73-3634.2779
> 
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
>




More information about the R-help mailing list