[R] exces return by mktcap decile for each year
jim holtman
jholtman at gmail.com
Wed Jul 11 18:20:49 CEST 2007
here is one way of doing it using 'ave':
> dat <- read.table(textConnection(" mc yr ret
+ 32902.233 01/01/1995 0.426
+ 15793.691 01/01/1995 0.024
+ 2375.868 01/01/1995 0.660
+ 54586.558 01/01/1996 0.497
+ 10674.900 01/01/1996 0.405
+ 859.656 01/01/1996 -0.033
+ 770.963 01/01/1995 -1.248
+ 423.480 01/01/1995 0.654
+ 2135.504 01/01/1995 0.394
+ 696.599 01/01/1995 -0.482
+ 5115.476 01/01/1995 0.352
+ 821.347 01/01/1995 0.869
+ 43329.695 01/01/1995 0.495
+ 7975.151 01/01/1995 0.112
+ 396.450 01/01/1995 0.956
+ 843.870 01/01/1995 0.172
+ 2727.037 01/01/1995 -0.358
+ 114.584 01/01/1995 -1.015
+ 1347.327 01/01/1995 -0.083
+ 4592.049 01/01/1995 -0.251
+ 674.305 01/01/1995 -0.327
+ 39424.887 01/01/1996 0.198
+ 4447.383 01/01/1996 -0.045
+ 1608.540 01/01/1996 -0.109
+ 217.151 01/01/1996 0.539
+ 1813.320 01/01/1996 0.754
+ 145.170 01/01/1996 0.249
+ 3176.298 01/01/1996 -0.202
+ 14379.686 01/01/1996 0.013
+ 3009.059 01/01/1996 -0.328
+ 1781.406 01/01/1996 -0.158
+ 2576.215 01/01/1996 0.514
+ 1236.317 01/01/1996 0.346
+ 3003.735 01/01/1996 0.151
+ 1544.003 01/01/1996 0.482
+ 7588.657 01/01/1996 0.306
+ 1516.625 01/01/1996 0.183
+ 1596.098 01/01/1996 0.674
+ 2792.192 01/01/1996 0.528
+ 1276.702 01/01/1996 0.010
+ 875.716 01/01/1996 0.189
+ 4858.450 01/01/1995 0.250
+ 2033.623 01/01/1995 -0.582
+ 2164.125 01/01/1995 0.631"), header=TRUE)
> # quantiles by year (need as grouping in next statement
> dat$qByYr <- ave(dat$mc, dat$yr, FUN=function(x){
+ cut(x, quantile(x, prob=seq(0, 1, .1)), include.lowest=TRUE)
+ })
> # compute the mean for year/quantile
> dat$dec.mean <- ave(dat$ret, dat$yr, dat$qByYr, FUN=mean)
> # mean adjusted return
> dat$mean.adjusted <- dat$ret - dat$dec.mean
> dat
mc yr ret qByYr dec.mean mean.adjusted
1 32902.233 01/01/1995 0.426 10 0.4605000 -0.034500000
2 15793.691 01/01/1995 0.024 9 0.0680000 -0.044000000
3 2375.868 01/01/1995 0.660 6 0.6455000 0.014500000
4 54586.558 01/01/1996 0.497 10 0.2360000 0.261000000
5 10674.900 01/01/1996 0.405 9 0.3555000 0.049500000
6 859.656 01/01/1996 -0.033 1 0.2516667 -0.284666667
7 770.963 01/01/1995 -1.248 3 -0.1895000 -1.058500000
8 423.480 01/01/1995 0.654 1 0.1983333 0.455666667
9 2135.504 01/01/1995 0.394 5 -0.0940000 0.488000000
10 696.599 01/01/1995 -0.482 2 -0.4045000 -0.077500000
11 5115.476 01/01/1995 0.352 8 0.3010000 0.051000000
12 821.347 01/01/1995 0.869 3 -0.1895000 1.058500000
13 43329.695 01/01/1995 0.495 10 0.4605000 0.034500000
14 7975.151 01/01/1995 0.112 9 0.0680000 0.044000000
15 396.450 01/01/1995 0.956 1 0.1983333 0.757666667
16 843.870 01/01/1995 0.172 4 0.0445000 0.127500000
17 2727.037 01/01/1995 -0.358 7 -0.3045000 -0.053500000
18 114.584 01/01/1995 -1.015 1 0.1983333 -1.213333333
19 1347.327 01/01/1995 -0.083 4 0.0445000 -0.127500000
20 4592.049 01/01/1995 -0.251 7 -0.3045000 0.053500000
21 674.305 01/01/1995 -0.327 2 -0.4045000 0.077500000
22 39424.887 01/01/1996 0.198 10 0.2360000 -0.038000000
23 4447.383 01/01/1996 -0.045 8 -0.1235000 0.078500000
24 1608.540 01/01/1996 -0.109 5 0.1623333 -0.271333333
25 217.151 01/01/1996 0.539 1 0.2516667 0.287333333
26 1813.320 01/01/1996 0.754 5 0.1623333 0.591666667
27 145.170 01/01/1996 0.249 1 0.2516667 -0.002666667
28 3176.298 01/01/1996 -0.202 8 -0.1235000 -0.078500000
29 14379.686 01/01/1996 0.013 10 0.2360000 -0.223000000
30 3009.059 01/01/1996 -0.328 7 -0.0885000 -0.239500000
31 1781.406 01/01/1996 -0.158 5 0.1623333 -0.320333333
32 2576.215 01/01/1996 0.514 6 0.5210000 -0.007000000
33 1236.317 01/01/1996 0.346 2 0.2675000 0.078500000
34 3003.735 01/01/1996 0.151 7 -0.0885000 0.239500000
35 1544.003 01/01/1996 0.482 4 0.5780000 -0.096000000
36 7588.657 01/01/1996 0.306 9 0.3555000 -0.049500000
37 1516.625 01/01/1996 0.183 3 0.0965000 0.086500000
38 1596.098 01/01/1996 0.674 4 0.5780000 0.096000000
39 2792.192 01/01/1996 0.528 6 0.5210000 0.007000000
40 1276.702 01/01/1996 0.010 3 0.0965000 -0.086500000
41 875.716 01/01/1996 0.189 2 0.2675000 -0.078500000
42 4858.450 01/01/1995 0.250 8 0.3010000 -0.051000000
43 2033.623 01/01/1995 -0.582 5 -0.0940000 -0.488000000
44 2164.125 01/01/1995 0.631 6 0.6455000 -0.014500000
>
>
>
>
On 7/11/07, Frank Hansen <hansenfrank at yahoo.com> wrote:
> Hi Jim,
>
> Thanks for getting back on this. I did not see your
> email on the help list. I or you can post this
> solution
>
> You are right I mis-stated about mc. mc is real, it is
> yr that is a factor.
>
> Here is a solution, which works, but it is clunky. I
> thought there might be a better/more R-like less
> for-loop way to do this.
>
> dat <- read.table("test.data", header=TRUE)
>
> if( "new.data" %in% ls()) {
> rm( new.data)
> }
> yrs <- as.character(unique( dat$yr))
> for (y in yrs) {
> bool <- as.character(dat$yr) == y
> tmp.dat <- dat[ bool,]
> breaks <- quantile(tmp.dat$mc,
> probs=seq(0,1,0.1),na.rm=TRUE)
> breaks[1] <- breaks[1]*.9
> # breaks >0, else 1st value not in (a,b] interval
> cuts <- cut(tmp.dat$mc, breaks)
> means.by.dec <- by( tmp.dat$ret, cuts, mean)
> for ( i in seq(1, dim( tmp.dat)[1])) {
> tmp.dat[i,"dec.mean"] <- means.by.dec[ cuts[i]]
> }
> if(! "new.data" %in% ls()) {
> new.data <- tmp.dat
> } else {
> new.data <- rbind( new.data, tmp.dat)
> }
> }
>
> Here is some test input data in the file test.data
> ----- test.data -----
> mc yr ret
> 32902.233 01/01/1995 0.426
> 15793.691 01/01/1995 0.024
> 2375.868 01/01/1995 0.660
> 54586.558 01/01/1996 0.497
> 10674.900 01/01/1996 0.405
> 859.656 01/01/1996 -0.033
> 770.963 01/01/1995 -1.248
> 423.480 01/01/1995 0.654
> 2135.504 01/01/1995 0.394
> 696.599 01/01/1995 -0.482
> 5115.476 01/01/1995 0.352
> 821.347 01/01/1995 0.869
> 43329.695 01/01/1995 0.495
> 7975.151 01/01/1995 0.112
> 396.450 01/01/1995 0.956
> 843.870 01/01/1995 0.172
> 2727.037 01/01/1995 -0.358
> 114.584 01/01/1995 -1.015
> 1347.327 01/01/1995 -0.083
> 4592.049 01/01/1995 -0.251
> 674.305 01/01/1995 -0.327
> 39424.887 01/01/1996 0.198
> 4447.383 01/01/1996 -0.045
> 1608.540 01/01/1996 -0.109
> 217.151 01/01/1996 0.539
> 1813.320 01/01/1996 0.754
> 145.170 01/01/1996 0.249
> 3176.298 01/01/1996 -0.202
> 14379.686 01/01/1996 0.013
> 3009.059 01/01/1996 -0.328
> 1781.406 01/01/1996 -0.158
> 2576.215 01/01/1996 0.514
> 1236.317 01/01/1996 0.346
> 3003.735 01/01/1996 0.151
> 1544.003 01/01/1996 0.482
> 7588.657 01/01/1996 0.306
> 1516.625 01/01/1996 0.183
> 1596.098 01/01/1996 0.674
> 2792.192 01/01/1996 0.528
> 1276.702 01/01/1996 0.010
> 875.716 01/01/1996 0.189
> 4858.450 01/01/1995 0.250
> 2033.623 01/01/1995 -0.582
> 2164.125 01/01/1995 0.631
>
> Here is the output which looks ok
>
> > new.data
> mc yr ret dec.mean
> 1 32902.233 01/01/1995 0.426 0.4605000
> 2 4858.450 01/01/1995 0.250 0.3010000
> 3 2033.623 01/01/1995 -0.582 -0.0940000
> 4 2164.125 01/01/1995 0.631 0.6455000
> 5 15793.691 01/01/1995 0.024 0.0680000
> 6 2375.868 01/01/1995 0.660 0.6455000
> 7 770.963 01/01/1995 -1.248 -0.1895000
> 8 423.480 01/01/1995 0.654 0.1983333
> 9 2135.504 01/01/1995 0.394 -0.0940000
> 10 696.599 01/01/1995 -0.482 -0.4045000
> 11 5115.476 01/01/1995 0.352 0.3010000
> 12 821.347 01/01/1995 0.869 -0.1895000
> 13 43329.695 01/01/1995 0.495 0.4605000
> 14 7975.151 01/01/1995 0.112 0.0680000
> 15 396.450 01/01/1995 0.956 0.1983333
> 16 843.870 01/01/1995 0.172 0.0445000
> 17 2727.037 01/01/1995 -0.358 -0.3045000
> 18 114.584 01/01/1995 -1.015 0.1983333
> 19 1347.327 01/01/1995 -0.083 0.0445000
> 20 4592.049 01/01/1995 -0.251 -0.3045000
> 21 674.305 01/01/1995 -0.327 -0.4045000
> 22 39424.887 01/01/1996 0.198 0.2360000
> 23 4447.383 01/01/1996 -0.045 -0.1235000
> 24 1608.540 01/01/1996 -0.109 0.1623333
> 25 217.151 01/01/1996 0.539 0.2516667
> 26 1813.320 01/01/1996 0.754 0.1623333
> 27 145.170 01/01/1996 0.249 0.2516667
> 28 3176.298 01/01/1996 -0.202 -0.1235000
> 29 14379.686 01/01/1996 0.013 0.2360000
> 30 3009.059 01/01/1996 -0.328 -0.0885000
> 31 1781.406 01/01/1996 -0.158 0.1623333
> 32 2576.215 01/01/1996 0.514 0.5210000
> 33 1236.317 01/01/1996 0.346 0.2675000
> 34 3003.735 01/01/1996 0.151 -0.0885000
> 35 1544.003 01/01/1996 0.482 0.5780000
> 36 7588.657 01/01/1996 0.306 0.3555000
> 37 1516.625 01/01/1996 0.183 0.0965000
> 38 54586.558 01/01/1996 0.497 0.2360000
> 39 10674.900 01/01/1996 0.405 0.3555000
> 40 859.656 01/01/1996 -0.033 0.2516667
> 41 1596.098 01/01/1996 0.674 0.5780000
> 42 2792.192 01/01/1996 0.528 0.5210000
> 43 1276.702 01/01/1996 0.010 0.0965000
> 44 875.716 01/01/1996 0.189 0.2675000
> >
>
> notice that records 1 and 13 fall into the same mc
> decile for the year 1995, and their ret mean is .4605
> and so forth for the other mc deciles in both years.
>
> I'd be interested to know if there is a cleaner way to
> do this. Thanks.
>
> Frank
>
>
>
>
> ____________________________________________________________________________________
> TV dinner still cooling?
> Check out "Tonight's Picks" on Yahoo! TV.
> http://tv.yahoo.com/
>
--
Jim Holtman
Cincinnati, OH
+1 513 646 9390
What is the problem you are trying to solve?
More information about the R-help
mailing list