[R] Improving effeciency - better table()?
Simon Cullen
cullens at tcd.ie
Tue Jul 6 18:21:44 CEST 2004
It seems that attachments are stripped off so I'll include the code and
the output of Rprof below:
#apologies for disgusting wrapping
f.powertest <-
function(path=paste(substr(getwd(),1,3),"R/",sep=""),iter=5000,sample=25,trun.prop=0,m,s){
rej.2.5 <- 0
for (i in 1:iter){
z<-c()
z.trun<-c()
zeros<- 0
total<-0
while(length(z.trun)< sample){
trun<-qnorm(trun.prop,m,s)
z<-rnorm(sample,m,s)
ind <- z>trun
zeros <- zeros + sum(!ind)
total<- total + sample
z.trun <- c(z.trun,z[ind])
}
z.trun<-z.trun[1:sample]
tau<-ifelse(qnorm(zeros/total)==-Inf,-3,qnorm(zeros/total))
#estimate the true mean
m.tr <- mean(z.trun)
s.tr <- sd(z.trun)
s.untr <- s.tr/sqrt(1-f.lambda(tau)*(f.lambda(tau)-tau))
m.untr <- m.tr - s.untr*f.lambda(tau)
#cacluate the breakpoints
#number of breakpoints first:
br <- ceiling(log2(length(z.trun)) + 1)
z.trun<- sort(z.trun)
br.len <- (z.trun[sample - 6] - z.trun[1])/(br-1)
breaks <- seq(from=z.trun[1],by=br.len,length=br)
top <- 1000#top limit of the range - I'm using N(0,1) so for the moment
this is ok
breaks <- c(breaks, top)
tab <- hist(z.trun,br=breaks, plot=F,right=F)$counts
expect<-f.expect(breaks,m.untr,s.untr,tau)*sample
stat <- sum((tab-expect)^2/expect)
if(qchisq(0.975,br-3) < stat){
rej.2.5 <-rej.2.5 + 1
}
}
return(100*rej.2.5/iter)
}
f.expect <- function(breaks,m,sigma,alpha){
res<-sapply(breaks,f.tr.pnorm,m=m,sigma=sigma,alpha=alpha)
return(res[-1] - res[-length(res)])
}
f.tr.dnorm <- function(x,m=0,sigma=1,alpha){
dnorm((x-m)/sigma)/(sigma*(1-pnorm(alpha)))
}
f.tr.pnorm <- function(q,m=0,sigma=1,alpha){
integrate(f.tr.dnorm,alpha,q,m=m,sigma=sigma,alpha=alpha)$value#rel.tol=.Machine$double.eps^0.5
}
f.test <- function(){
Rprof()
f.powertest(iter=1000,m=0,s=1,sample=200,trun.prop=0.25)
Rprof(NULL)
print(summaryRprof())
}
f.lambda <- function(x) dnorm(x)/(1 - pnorm(x))
************************************************
> f.test()
$by.self
self.time self.pct total.time total.pct
integrate 0.92 15.2 3.28 51.7
as.double 0.56 9.3 0.68 10.7
dnorm 0.44 7.3 0.52 8.2
rnorm 0.30 5.0 0.30 4.7
pnorm 0.26 4.3 0.26 4.1
> 0.18 3.0 0.18 2.8
hist.default 0.18 3.0 1.24 19.6
f 0.16 2.6 0.90 14.2
names 0.16 2.6 0.18 2.8
f.powertest 0.14 2.3 6.34 100.0
switch 0.14 2.3 0.14 2.2
== 0.12 2.0 0.12 1.9
as.double.default 0.12 2.0 0.12 1.9
as.integer 0.12 2.0 0.16 2.5
diff.default 0.12 2.0 0.32 5.0
- 0.10 1.7 0.10 1.6
paste 0.10 1.7 0.20 3.2
seq 0.10 1.7 0.20 3.2
...
$by.total
total.time total.pct self.time self.pct
f.powertest 6.34 100.0 0.14 2.3
f.test 6.34 100.0 0.00 0.0
f.expect 3.64 57.4 0.02 0.3
sapply 3.62 57.1 0.04 0.7
lapply 3.52 55.5 0.04 0.7
FUN 3.38 53.3 0.02 0.3
integrate 3.28 51.7 0.92 15.2
hist 1.30 20.5 0.06 1.0
hist.default 1.24 19.6 0.18 3.0
<Anonymous> 0.94 14.8 0.04 0.7
f 0.90 14.2 0.16 2.6
as.double 0.68 10.7 0.56 9.3
dnorm 0.52 8.2 0.44 7.3
sort 0.40 6.3 0.04 0.7
diff 0.36 5.7 0.00 0.0
storage.mode<- 0.34 5.4 0.02 0.3
diff.default 0.32 5.0 0.12 2.0
rnorm 0.30 4.7 0.30 5.0
pnorm 0.26 4.1 0.26 4.3
ifelse 0.22 3.5 0.04 0.7
eval 0.20 3.2 0.06 1.0
paste 0.20 3.2 0.10 1.7
seq 0.20 3.2 0.10 1.7
...
$sampling.time
[1] 6.34
--
SC
Simon Cullen
Room 3030
Dept. Of Economics
Trinity College Dublin
Ph. (608)3477
Email cullens at tcd.ie
More information about the R-help
mailing list