[R] Tables: Invitation to make a collective package
Jose Claudio Faria
joseclaudio.faria at terra.com.br
Thu Jul 7 13:57:01 CEST 2005
Hi All,
I would like to make an invitation to make a collective package with all
functions related to TABLES.
I know that there are many packages with these functions, the original idea is
collect all this functions and to make a single package, because is arduous for
the user know all this functions broadcast in many packages.
So, I think that the original packages can continue with its original functions,
but, is very good to know that exist one package with many (I dream all) the
functions related to tables.
I've been working with these functions (while I am learning R programming):
#######################
# Tables - Package #
#######################
#
# 1. Tables
#
#
# Common function
#
tb.make.table.I <- function(x,
start,
end,
h,
right)
{
f <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq
fr <- f/length(x) # Relative freq
frP <- 100*(f/length(x)) # Relative freq, %
fac <- cumsum(f) # Cumulative freq
facP <- 100*(cumsum(f/length(x))) # Cumulative freq, %
fi <- round(f, 2)
fr <- round(as.numeric(fr), 2)
frP <- round(as.numeric(frP), 2)
fac <- round(as.numeric(fac), 2)
facP <- round(as.numeric(facP),2)
res <- data.frame(fi, fr, frP, fac, facP) # Make final table
names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
return(res)
}
#
# Common function
#
tb.make.table.II <- function (x,
k,
breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE)
{
x <- na.omit(x)
# User defines only x and/or 'breaks'
# (x, {k,}[breaks, right])
if (missing(k)) {
brk <- match.arg(breaks)
switch(brk,
Sturges = k <- nclass.Sturges(x),
Scott = k <- nclass.scott(x),
FD = k <- nclass.FD(x))
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/k
}
# User defines 'x' and 'k'
# (x, k,[breaks, right])
else {
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/abs(k)
}
tbl <- tb.make.table.I(x, start, end, h, right)
return(tbl)
}
#
# With Gabor Grotendieck suggestions (thanks Gabor, very much!)
#
tb.table <- function(x, ...) UseMethod("tb.table")
#
# Table form vectors
#
tb.table.default <- function(x,
k,
start,
end,
h, breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE)
{
# User defines nothing or not 'x' isn't numeric -> stop
stopifnot(is.numeric(x))
x <- na.omit(x)
# User defines only 'x'
# (x, {k, start, end, h}, [breaks, right])
if (missing(k) && missing(start) && missing(end) && missing(h) ) {
brk <- match.arg(breaks)
switch(brk,
Sturges = k <- nclass.Sturges(x),
Scott = k <- nclass.scott(x),
FD = k <- nclass.FD(x))
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/k
}
# User defines 'x' and 'k'
# (x, k, {start, end, h}, [breaks, right])
else if (missing(start) && missing(end) && missing(h)) {
stopifnot(length(k) >= 1)
tmp <- range(x)
start <- tmp[1] - abs(tmp[2])/100
end <- tmp[2] + abs(tmp[2])/100
R <- end-start
h <- R/abs(k)
}
# User defines 'x', 'start' and 'end'
# (x, {k,} start, end, {h,} [breaks, right])
else if (missing(k) && missing(h)) {
stopifnot(length(start) >= 1, length(end) >=1)
tmp <- range(x)
R <- end-start
k <- sqrt(abs(R))
if (k < 5) k <- 5 # min value of k
h <- R/k
}
# User defines 'x', 'start', 'end' and 'h'
# (x, {k,} start, end, h, [breaks, right])
else if (missing(k)) {
stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)
}
else stop('Error, please, see the function sintax!')
tbl <- tb.make.table.I(x, start, end, h, right)
return(tbl)
}
#
# Table form data.frame
#
tb.table.data.frame <- function(df,
k,
by,
breaks=c('Sturges', 'Scott', 'FD'),
right=FALSE)
{
stopifnot(is.data.frame(df))
tmpList <- list()
nameF <- character()
nameY <- character()
# User didn't defines a factor
if (missing(by)) {
logCol <- sapply(df, is.numeric)
for (i in 1:ncol(df)) {
if (logCol[i]) {
x <- as.matrix(df[ ,i])
tbl <- tb.make.table.II(x, k, breaks, right)
tmpList <- c(tmpList, list(tbl))
}
}
valCol <- logCol[logCol]
names(tmpList) <- names(valCol)
return(tmpList)
}
# User defines one factor
else {
namesdf <- names(df)
pos <- which(namesdf == by)
stopifnot(is.factor((df[[pos]])))
numF <- table(df[[pos]])
for(i in 1:length(numF)) {
tmpdf <- subset(df, df[[pos]] == names(numF[i]))
logCol <- sapply(tmpdf, is.numeric)
for (j in 1:ncol(tmpdf)) {
if (logCol[j]) {
x <- as.matrix(tmpdf[ ,j])
tbl <- tb.make.table.II(x, k, breaks, right)
newFY <- list(tbl)
nameF <- names(numF[i])
nameY <- names(logCol[j])
nameFY <- paste(nameF,'.', nameY, sep="")
names(newFY) <- sub(' +$', '', nameFY)
tmpList <- c(tmpList, newFY)
}
}
}
}
return(tmpList)
}
############################
# Tables package #
# to try #
############################
# 1.Tables
# 1.1. Tables from vectors
# Making a vector
set.seed(1)
x=rnorm(100, 5, 1)
#x=as.factor(rep(1:10, 10)) # to try
tbl <- tb.table(x)
print(tbl); cat('\n')
# Equal to above
tbl <- tb.table(x, breaks='Sturges')
print(tbl); cat('\n')
tbl <- tb.table(x, breaks='Scott')
print(tbl); cat('\n')
tbl <- tb.table(x, breaks='FD')
print(tbl); cat('\n')
tbl <- tb.table(x, breaks='F', right=T)
print(tbl); cat('\n')
tbl <- tb.table(x, k=4)
print(tbl); cat('\n')
tbl <- tb.table(x, k=20)
print(tbl); cat('\n')
# Partial
tbl <- tb.table(x, start=4, end=6)
print(tbl); cat('\n')
# Partial
tbl <- tb.table(x, start=4.5, end=5.5)
print(tbl); cat('\n')
# Nonsense
tbl <- tb.table(x, start=0, end=10, h=.5)
print(tbl); cat('\n')
# First and last class forced (fi=0)
tbl <- tb.table(x, start=1, end=9, h=1)
print(tbl); cat('\n')
tbl <- tb.table(x, start=1, end=10, h=2)
print(tbl); cat('\n')
# 1.2. Tables from data.frame
# 1.2.1. Making a data.frame
mdf=data.frame(X1=rep(LETTERS[1:4], 25),
X2=as.factor(rep(1:10, 10)),
Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA),
Y2=rnorm(100, 58, 4),
Y3=c(NA, NA, rnorm(98, -20, 2)))
tbl <- tb.table(mdf)
print(tbl)
# Equal to above
tbl <- tb.table(mdf, breaks='Sturges')
print(tbl)
tbl <- tb.table(mdf, breaks='Scott')
print(tbl)
tbl <- tb.table(mdf, breaks='FD')
print(tbl)
tbl <- tb.table(mdf, k=4)
print(tbl)
tbl <- tb.table(mdf, k=10)
print(tbl)
levels(mdf$X1)
tbl=tb.table(mdf, k=5, by='X1')
length(tbl)
names(tbl)
print(tbl)
tbl=tb.table(mdf, breaks='FD', by='X1')
print(tbl)
# A 'big' result: X2 is a factor with 10 levels!
tbl=tb.table(mdf, breaks='FD', by='X2')
print(tbl)
# 1.2.2. Using 'iris'
tbl=tb.table(iris, k=5)
print(tbl)
levels(iris$Species)
tbl=tb.table(iris, k=5, by='Species')
length(tbl)
names(tbl)
print(tbl)
tbl=tb.table(iris, k=5, by='Species', right=T)
print(tbl)
tbl=tb.table(iris, breaks='FD', by='Species')
print(tbl)
library(MASS)
levels(Cars93$Origin)
tbl=tb.table(Cars93, k=5, by='Origin')
names(tbl)
print(tbl)
tbl=tb.table(Cars93, breaks='FD', by='Origin')
print(tbl)
I find that this package would be very useful and would like to hear the opinion
of the interested parties in participating.
Best regards,
--
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
joseclaudio.faria at terra.com.br
jc_faria at uesc.br
jc_faria at uol.com.br
tel: 73-3634.2779
More information about the R-help
mailing list