[R] Improving effeciency - better table()?

Tue Jul 6 18:21:44 CEST 2004

It seems that attachments are stripped off so I'll include the code and  
the output of Rprof below:

#apologies for disgusting wrapping

f.powertest <-  
function(path=paste(substr(getwd(),1,3),"R/",sep=""),iter=5000,sample=25,trun.prop=0,m,s){
	rej.2.5 <- 0

	for (i in 1:iter){
		z<-c()
		z.trun<-c()
		zeros<- 0
		total<-0
		while(length(z.trun)< sample){

			trun<-qnorm(trun.prop,m,s)
			z<-rnorm(sample,m,s)
			ind <- z>trun
			zeros <- zeros + sum(!ind)
			total<- total + sample
			z.trun <- c(z.trun,z[ind])

		}

		z.trun<-z.trun[1:sample]
		tau<-ifelse(qnorm(zeros/total)==-Inf,-3,qnorm(zeros/total))

		#estimate the true mean
		m.tr <- mean(z.trun)
		s.tr <- sd(z.trun)
		s.untr <- s.tr/sqrt(1-f.lambda(tau)*(f.lambda(tau)-tau))
		m.untr <- m.tr - s.untr*f.lambda(tau)

		#cacluate the breakpoints

		#number of breakpoints first:
		br <- ceiling(log2(length(z.trun)) + 1)
		z.trun<- sort(z.trun)

		br.len <- (z.trun[sample - 6] - z.trun[1])/(br-1)

		breaks <- seq(from=z.trun[1],by=br.len,length=br)
		top <- 1000#top limit of the range - I'm using N(0,1) so for the moment  
this is ok
		breaks <- c(breaks, top)
		tab <- hist(z.trun,br=breaks, plot=F,right=F)$counts
		expect<-f.expect(breaks,m.untr,s.untr,tau)*sample

		stat <- sum((tab-expect)^2/expect)

		if(qchisq(0.975,br-3) < stat){
			rej.2.5 <-rej.2.5 + 1
		}

	}

	return(100*rej.2.5/iter)
}
f.expect <- function(breaks,m,sigma,alpha){
	res<-sapply(breaks,f.tr.pnorm,m=m,sigma=sigma,alpha=alpha)
	return(res[-1] - res[-length(res)])
}
f.tr.dnorm <- function(x,m=0,sigma=1,alpha){
	dnorm((x-m)/sigma)/(sigma*(1-pnorm(alpha)))
}
f.tr.pnorm <- function(q,m=0,sigma=1,alpha){
	integrate(f.tr.dnorm,alpha,q,m=m,sigma=sigma,alpha=alpha)$value#rel.tol=.Machine$double.eps^0.5
}

f.test <- function(){
	Rprof()
	f.powertest(iter=1000,m=0,s=1,sample=200,trun.prop=0.25)
	Rprof(NULL)
	print(summaryRprof())
}

f.lambda <- function(x) dnorm(x)/(1 - pnorm(x))

************************************************

> f.test()
$by.self
                    self.time self.pct total.time total.pct
integrate               0.92     15.2       3.28      51.7
as.double               0.56      9.3       0.68      10.7
dnorm                   0.44      7.3       0.52       8.2
rnorm                   0.30      5.0       0.30       4.7
pnorm                   0.26      4.3       0.26       4.1
>                       0.18      3.0       0.18       2.8
hist.default            0.18      3.0       1.24      19.6
f                       0.16      2.6       0.90      14.2
names                   0.16      2.6       0.18       2.8
f.powertest             0.14      2.3       6.34     100.0
switch                  0.14      2.3       0.14       2.2
==                      0.12      2.0       0.12       1.9
as.double.default       0.12      2.0       0.12       1.9
as.integer              0.12      2.0       0.16       2.5
diff.default            0.12      2.0       0.32       5.0
-                       0.10      1.7       0.10       1.6
paste                   0.10      1.7       0.20       3.2
seq                     0.10      1.7       0.20       3.2
...

$by.total
                    total.time total.pct self.time self.pct
f.powertest              6.34     100.0      0.14      2.3
f.test                   6.34     100.0      0.00      0.0
f.expect                 3.64      57.4      0.02      0.3
sapply                   3.62      57.1      0.04      0.7
lapply                   3.52      55.5      0.04      0.7
FUN                      3.38      53.3      0.02      0.3
integrate                3.28      51.7      0.92     15.2
hist                     1.30      20.5      0.06      1.0
hist.default             1.24      19.6      0.18      3.0
<Anonymous>              0.94      14.8      0.04      0.7
f                        0.90      14.2      0.16      2.6
as.double                0.68      10.7      0.56      9.3
dnorm                    0.52       8.2      0.44      7.3
sort                     0.40       6.3      0.04      0.7
diff                     0.36       5.7      0.00      0.0
storage.mode<-           0.34       5.4      0.02      0.3
diff.default             0.32       5.0      0.12      2.0
rnorm                    0.30       4.7      0.30      5.0
pnorm                    0.26       4.1      0.26      4.3
ifelse                   0.22       3.5      0.04      0.7
eval                     0.20       3.2      0.06      1.0
paste                    0.20       3.2      0.10      1.7
seq                      0.20       3.2      0.10      1.7
...
$sampling.time
[1] 6.34

-- 
SC

Simon Cullen
Room 3030
Dept. Of Economics
Trinity College Dublin

Ph. (608)3477
Email cullens at tcd.ie