[R] Tables: Invitation to make a collective package
Gabor Grothendieck
ggrothendieck at gmail.com
Thu Jul 7 14:28:35 CEST 2005
If the functionality you are thinking of already exists across multiple
packages an alternative to creating a new package would be to create
a task view as in:
http://cran.r-project.org/src/contrib/Views/
as explained in the ctv package and the article in R News 5/1.
On 7/7/05, Jose Claudio Faria <joseclaudio.faria at terra.com.br> wrote:
> Hi All,
>
> I would like to make an invitation to make a collective package with all
> functions related to TABLES.
>
> I know that there are many packages with these functions, the original idea is
> collect all this functions and to make a single package, because is arduous for
> the user know all this functions broadcast in many packages.
>
> So, I think that the original packages can continue with its original functions,
> but, is very good to know that exist one package with many (I dream all) the
> functions related to tables.
>
> I've been working with these functions (while I am learning R programming):
>
> #######################
> # Tables - Package #
> #######################
>
> #
> # 1. Tables
> #
>
> #
> # Common function
> #
> tb.make.table.I <- function(x,
> start,
> end,
> h,
> right)
> {
> f <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq
> fr <- f/length(x) # Relative freq
> frP <- 100*(f/length(x)) # Relative freq, %
> fac <- cumsum(f) # Cumulative freq
> facP <- 100*(cumsum(f/length(x))) # Cumulative freq, %
> fi <- round(f, 2)
> fr <- round(as.numeric(fr), 2)
> frP <- round(as.numeric(frP), 2)
> fac <- round(as.numeric(fac), 2)
> facP <- round(as.numeric(facP),2)
> res <- data.frame(fi, fr, frP, fac, facP) # Make final table
> names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
> return(res)
> }
>
> #
> # Common function
> #
> tb.make.table.II <- function (x,
> k,
> breaks=c('Sturges', 'Scott', 'FD'),
> right=FALSE)
> {
> x <- na.omit(x)
>
> # User defines only x and/or 'breaks'
> # (x, {k,}[breaks, right])
> if (missing(k)) {
> brk <- match.arg(breaks)
> switch(brk,
> Sturges = k <- nclass.Sturges(x),
> Scott = k <- nclass.scott(x),
> FD = k <- nclass.FD(x))
> tmp <- range(x)
> start <- tmp[1] - abs(tmp[2])/100
> end <- tmp[2] + abs(tmp[2])/100
> R <- end-start
> h <- R/k
> }
>
> # User defines 'x' and 'k'
> # (x, k,[breaks, right])
> else {
> tmp <- range(x)
> start <- tmp[1] - abs(tmp[2])/100
> end <- tmp[2] + abs(tmp[2])/100
> R <- end-start
> h <- R/abs(k)
> }
> tbl <- tb.make.table.I(x, start, end, h, right)
> return(tbl)
> }
>
> #
> # With Gabor Grotendieck suggestions (thanks Gabor, very much!)
> #
> tb.table <- function(x, ...) UseMethod("tb.table")
>
> #
> # Table form vectors
> #
> tb.table.default <- function(x,
> k,
> start,
> end,
> h, breaks=c('Sturges', 'Scott', 'FD'),
> right=FALSE)
> {
> # User defines nothing or not 'x' isn't numeric -> stop
> stopifnot(is.numeric(x))
> x <- na.omit(x)
>
> # User defines only 'x'
> # (x, {k, start, end, h}, [breaks, right])
> if (missing(k) && missing(start) && missing(end) && missing(h) ) {
> brk <- match.arg(breaks)
> switch(brk,
> Sturges = k <- nclass.Sturges(x),
> Scott = k <- nclass.scott(x),
> FD = k <- nclass.FD(x))
> tmp <- range(x)
> start <- tmp[1] - abs(tmp[2])/100
> end <- tmp[2] + abs(tmp[2])/100
> R <- end-start
> h <- R/k
> }
>
> # User defines 'x' and 'k'
> # (x, k, {start, end, h}, [breaks, right])
> else if (missing(start) && missing(end) && missing(h)) {
> stopifnot(length(k) >= 1)
> tmp <- range(x)
> start <- tmp[1] - abs(tmp[2])/100
> end <- tmp[2] + abs(tmp[2])/100
> R <- end-start
> h <- R/abs(k)
> }
>
> # User defines 'x', 'start' and 'end'
> # (x, {k,} start, end, {h,} [breaks, right])
> else if (missing(k) && missing(h)) {
> stopifnot(length(start) >= 1, length(end) >=1)
> tmp <- range(x)
> R <- end-start
> k <- sqrt(abs(R))
> if (k < 5) k <- 5 # min value of k
> h <- R/k
> }
>
> # User defines 'x', 'start', 'end' and 'h'
> # (x, {k,} start, end, h, [breaks, right])
> else if (missing(k)) {
> stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)
> }
>
> else stop('Error, please, see the function sintax!')
> tbl <- tb.make.table.I(x, start, end, h, right)
> return(tbl)
> }
>
>
> #
> # Table form data.frame
> #
> tb.table.data.frame <- function(df,
> k,
> by,
> breaks=c('Sturges', 'Scott', 'FD'),
> right=FALSE)
> {
> stopifnot(is.data.frame(df))
> tmpList <- list()
> nameF <- character()
> nameY <- character()
>
> # User didn't defines a factor
> if (missing(by)) {
> logCol <- sapply(df, is.numeric)
> for (i in 1:ncol(df)) {
> if (logCol[i]) {
> x <- as.matrix(df[ ,i])
> tbl <- tb.make.table.II(x, k, breaks, right)
> tmpList <- c(tmpList, list(tbl))
> }
> }
> valCol <- logCol[logCol]
> names(tmpList) <- names(valCol)
> return(tmpList)
> }
>
> # User defines one factor
> else {
> namesdf <- names(df)
> pos <- which(namesdf == by)
> stopifnot(is.factor((df[[pos]])))
> numF <- table(df[[pos]])
> for(i in 1:length(numF)) {
> tmpdf <- subset(df, df[[pos]] == names(numF[i]))
> logCol <- sapply(tmpdf, is.numeric)
> for (j in 1:ncol(tmpdf)) {
> if (logCol[j]) {
> x <- as.matrix(tmpdf[ ,j])
> tbl <- tb.make.table.II(x, k, breaks, right)
> newFY <- list(tbl)
> nameF <- names(numF[i])
> nameY <- names(logCol[j])
> nameFY <- paste(nameF,'.', nameY, sep="")
> names(newFY) <- sub(' +$', '', nameFY)
> tmpList <- c(tmpList, newFY)
> }
> }
> }
> }
> return(tmpList)
> }
>
> ############################
> # Tables package #
> # to try #
> ############################
>
> # 1.Tables
> # 1.1. Tables from vectors
>
> # Making a vector
> set.seed(1)
> x=rnorm(100, 5, 1)
> #x=as.factor(rep(1:10, 10)) # to try
>
> tbl <- tb.table(x)
> print(tbl); cat('\n')
>
> # Equal to above
> tbl <- tb.table(x, breaks='Sturges')
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, breaks='Scott')
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, breaks='FD')
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, breaks='F', right=T)
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, k=4)
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, k=20)
> print(tbl); cat('\n')
>
> # Partial
> tbl <- tb.table(x, start=4, end=6)
> print(tbl); cat('\n')
>
> # Partial
> tbl <- tb.table(x, start=4.5, end=5.5)
> print(tbl); cat('\n')
>
> # Nonsense
> tbl <- tb.table(x, start=0, end=10, h=.5)
> print(tbl); cat('\n')
>
> # First and last class forced (fi=0)
> tbl <- tb.table(x, start=1, end=9, h=1)
> print(tbl); cat('\n')
>
> tbl <- tb.table(x, start=1, end=10, h=2)
> print(tbl); cat('\n')
>
>
> # 1.2. Tables from data.frame
>
> # 1.2.1. Making a data.frame
> mdf=data.frame(X1=rep(LETTERS[1:4], 25),
> X2=as.factor(rep(1:10, 10)),
> Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA),
> Y2=rnorm(100, 58, 4),
> Y3=c(NA, NA, rnorm(98, -20, 2)))
>
> tbl <- tb.table(mdf)
> print(tbl)
>
> # Equal to above
> tbl <- tb.table(mdf, breaks='Sturges')
> print(tbl)
>
> tbl <- tb.table(mdf, breaks='Scott')
> print(tbl)
>
> tbl <- tb.table(mdf, breaks='FD')
> print(tbl)
>
> tbl <- tb.table(mdf, k=4)
> print(tbl)
>
> tbl <- tb.table(mdf, k=10)
> print(tbl)
>
> levels(mdf$X1)
> tbl=tb.table(mdf, k=5, by='X1')
> length(tbl)
> names(tbl)
> print(tbl)
>
> tbl=tb.table(mdf, breaks='FD', by='X1')
> print(tbl)
>
> # A 'big' result: X2 is a factor with 10 levels!
> tbl=tb.table(mdf, breaks='FD', by='X2')
> print(tbl)
>
> # 1.2.2. Using 'iris'
> tbl=tb.table(iris, k=5)
> print(tbl)
>
> levels(iris$Species)
> tbl=tb.table(iris, k=5, by='Species')
> length(tbl)
> names(tbl)
> print(tbl)
>
> tbl=tb.table(iris, k=5, by='Species', right=T)
> print(tbl)
>
> tbl=tb.table(iris, breaks='FD', by='Species')
> print(tbl)
>
> library(MASS)
> levels(Cars93$Origin)
> tbl=tb.table(Cars93, k=5, by='Origin')
> names(tbl)
> print(tbl)
>
> tbl=tb.table(Cars93, breaks='FD', by='Origin')
> print(tbl)
>
> I find that this package would be very useful and would like to hear the opinion
> of the interested parties in participating.
>
> Best regards,
> --
> Jose Claudio Faria
> Brasil/Bahia/UESC/DCET
> Estatistica Experimental/Prof. Adjunto
> mails:
> joseclaudio.faria at terra.com.br
> jc_faria at uesc.br
> jc_faria at uol.com.br
> tel: 73-3634.2779
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
>
More information about the R-help
mailing list