[R] Aggregate matrix in a 2 by 2 manor
Anthoni, Peter (IMK)
peter.anthoni at kit.edu
Sun Jul 31 09:31:40 CEST 2016
Hi Jeff,
many thanks, that one is the Speedy Gonzalles out of all. Can also do some FUN stuff.
aggregate.nx.ny.array.aperm <- function( dta, nx = 2, ny = 2, FUN=colMeans, ... ) {
# number of rows in result
nnr <- nrow( dta ) %/% ny
# number of columns in result
nnc <- ncol( dta ) %/% nx
# number of values to take mean of
nxny <- nx * ny
# describe existing layout of values in dta as 4-d array
a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
# swap data in dimensions 2 and 3
a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
# treat first two dimensions as column vectors, remaining as columns
a3 <- matrix( a2, nrow = nxny )
# fast calculation of column means
v <- FUN( a3, ... )
# reframe result vector as a matrix
matrix( v, ncol = nnc )
}
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
user system elapsed
14.003 0.271 14.663
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
user system elapsed
32.686 1.175 35.012
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
user system elapsed
9.590 0.197 9.951
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
user system elapsed
8.391 0.174 8.737
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colMeans,na.rm=T)})
user system elapsed
0.195 0.019 0.216
> system.time( {for(i in 1:10) tst_2x2=aggregate.nx.ny.array.aperm(tst,2,2,colSums,na.rm=T)})
user system elapsed
0.169 0.017 0.188
> aggregate.nx.ny.array.aperm(tst.small,FUN=colMeans)
[,1] [,2] [,3] [,4]
[1,] 3.5 11.5 19.5 27.5
[2,] 5.5 13.5 21.5 29.5
> aggregate.nx.ny.array.aperm(tst.small,FUN=colSums)
[,1] [,2] [,3] [,4]
[1,] 14 46 78 110
[2,] 22 54 86 118
> aggregate.nx.ny.forloop(tst.small,FUN=sum)
[,1] [,2] [,3] [,4]
[1,] 14 46 78 110
[2,] 22 54 86 118
cheers
Peter
> On 31 Jul 2016, at 08:13, Jeff Newmiller <jdnewmil at dcn.davis.ca.us> wrote:
>
> If you don't need all that FUN flexibility, you can get this done way faster with the aperm and colMeans functions:
>
> tst <- matrix( seq.int( 1440 * 360 )
> , ncol = 1440
> , nrow = 360
> )
> tst.small <- matrix( seq.int( 8 * 4 )
> , ncol = 8
> , nrow = 4
> )
> aggregate.nx.ny.expand.grid <- function( dta, nx = 2, ny = 2, FUN = mean, ... )
> {
> ilon <- seq( 1, ncol( dta ), nx )
> ilat <- seq( 1, nrow( dta ), ny )
> cells <- as.matrix( expand.grid( ilat, ilon ) )
> blocks <- apply( cells
> , 1
> , function( x ) dta[ x[ 1 ]:( x[ 1 ] + 1 ), x[ 2 ]:( x[ 2 ] + 1 ) ] )
> block.means <- colMeans( blocks )
> matrix( block.means
> , nrow( dta ) / ny
> , ncol( dta ) / nx
> )
> }
>
> aggregate.nx.ny.array.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
> a <- array( dta
> , dim = c( ny
> , nrow( dta ) %/% ny
> , nx
> , ncol( dta ) %/% nx
> )
> )
> apply( a, c( 2, 4 ), FUN, ... )
> }
>
> aggregate.nx.ny.array.aperm.mean <- function( dta, nx = 2, ny = 2, ... ) {
> # number of rows in result
> nnr <- nrow( dta ) %/% ny
> # number of columns in result
> nnc <- ncol( dta ) %/% nx
> # number of values to take mean of
> nxny <- nx * ny
> # describe existing layout of values in dta as 4-d array
> a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
> # swap data in dimensions 2 and 3
> a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
> # treat first two dimensions as column vectors, remaining as columns
> a3 <- matrix( a2, nrow = nxny )
> # fast calculation of column means
> v <- colMeans( a3, ... )
> # reframe result vector as a matrix
> matrix( v, ncol = nnc )
> }
>
> aggregate.nx.ny.array.aperm.apply <- function( dta, nx = 2, ny = 2, FUN = mean, ... ) {
> # number of rows in result
> nnr <- nrow( dta ) %/% ny
> # number of columns in result
> nnc <- ncol( dta ) %/% nx
> # number of values to apply FUN to
> nxny <- nx * ny
> # describe existing layout of values in dta as 4-d array
> a1 <- array( dta, dim = c( ny, nnr, nx, nnc ) )
> # swap data in dimensions 2 and 3
> a2 <- aperm( a1, c( 1, 3, 2, 4 ) )
> # treat first two dimensions as column vectors, remaining as columns
> a3 <- matrix( a2, nrow = nxny )
> # apply FUN to column vectors
> v <- apply( a3, 2, FUN = FUN, ... )
> matrix( v, ncol = nnc )
> }
> test1 <- aggregate.nx.ny.expand.grid( tst )
> test2 <- aggregate.nx.ny.array.apply( tst )
> test3 <- aggregate.nx.ny.array.aperm.mean( tst )
> test4 <- aggregate.nx.ny.array.aperm.apply( tst )
> library(microbenchmark)
> microbenchmark(
> aggregate.nx.ny.expand.grid( tst, 2, 2, mean, na.rm = TRUE )
> , aggregate.nx.ny.array.apply( tst, 2, 2, mean, na.rm = TRUE )
> , aggregate.nx.ny.array.aperm.mean( tst, 2, 2, na.rm = TRUE )
> , aggregate.nx.ny.array.aperm.apply( tst, 2, 2, mean, na.rm = TRUE )
> )
> #Unit: milliseconds
> # expr min
> # aggregate.nx.ny.expand.grid(tst, 2, 2, mean, na.rm = TRUE) 628.528322
> # aggregate.nx.ny.array.apply(tst, 2, 2, mean, na.rm = TRUE) 846.883314
> # aggregate.nx.ny.array.aperm.mean(tst, 2, 2, na.rm = TRUE) 8.904369
> # aggregate.nx.ny.array.aperm.apply(tst, 2, 2, mean, na.rm = TRUE) 619.691851
> # lq mean median uq max neval cld
> # 675.470967 916.39630 778.54090 873.9754 2452.695 100 b
> # 920.831966 1126.94691 1000.33830 1094.9233 3412.639 100 c
> # 9.191747 21.98528 10.30099 15.9169 158.687 100 a
> # 733.246331 936.73359 757.58383 844.2016 2824.557 100 b
>
>
> On Sat, 30 Jul 2016, Jeff Newmiller wrote:
>
>> For the record, the array.apply code can be fixed as below, but then it is slower than the expand.grid version.
>>
>> aggregate.nx.ny.array.apply <- function(dta,nx=2,ny=2, FUN=mean,...)
>> {
>> a <- array(dta, dim = c(ny, nrow( dta ) %/% ny, nx, ncol( dta ) %/% nx))
>> apply( a, c(2, 4), FUN, ... )
>> }
>>
>> --
>> Sent from my phone. Please excuse my brevity.
>>
>> On July 30, 2016 11:06:16 AM PDT, "Anthoni, Peter (IMK)" <peter.anthoni at kit.edu> wrote:
>>> Hi all,
>>>
>>> thanks for the suggestions, I did some timing tests, see below.
>>> Unfortunately the aggregate.nx.ny.array.apply, does not produce the
>>> expected result.
>>> So the fastest seems to be the aggregate.nx.ny.expand.grid, though the
>>> double for loop is not that much slower.
>>>
>>> many thanks
>>> Peter
>>>
>>>> tst=matrix(1:(1440*360),ncol=1440,nrow=360)
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.forloop(tst,2,2,mean,na.rm=T)})
>>> user system elapsed
>>> 11.227 0.073 11.371
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.interaction(tst,2,2,mean,na.rm=T)})
>>> user system elapsed
>>> 26.354 0.475 26.880
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.expand.grid(tst,2,2,mean,na.rm=T)})
>>> user system elapsed
>>> 9.683 0.055 9.763
>>>> system.time( {for(i in 1:10)
>>> tst_2x2=aggregate.nx.ny.array.apply(tst,2,2,mean,na.rm=T)})
>>> user system elapsed
>>> 7.693 0.055 7.800
>>>
>>>> tst.small=matrix(1:(8*4),ncol=8,nrow=4)
>>>> aggregate.nx.ny.forloop = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> + nlon=nrow(data)
>>> + nlat=ncol(data)
>>> + newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>>> + dim(newdata)
>>> + for(ilon in seq(1,nlon,nx)) {
>>> + for(ilat in seq(1,nlat,ny)) {
>>> + ilon_new=1+(ilon-1)/nx
>>> + ilat_new=1+(ilat-1)/ny
>>> + newdata[ilon_new,ilat_new] = FUN(data[ilon+0:1,ilat+0:1],...)
>>> + }
>>> + }
>>> + newdata
>>> + }
>>>> aggregate.nx.ny.forloop(tst.small)
>>> [,1] [,2] [,3] [,4]
>>> [1,] 3.5 11.5 19.5 27.5
>>> [2,] 5.5 13.5 21.5 29.5
>>>>
>>>> aggregate.nx.ny.interaction = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> +
>>> + nlon=nrow(data)
>>> + nlat=ncol(data)
>>> + newdata=matrix(NA,nrow=nlon/nx,ncol=nlat/ny)
>>> + newdata[] <- tapply( data, interaction( (row(data)+1) %/% 2,
>>> (col(data)+1) %/% 2 ), FUN, ...)
>>> + newdata
>>> + }
>>>> aggregate.nx.ny.interaction(tst.small)
>>> [,1] [,2] [,3] [,4]
>>> [1,] 3.5 11.5 19.5 27.5
>>> [2,] 5.5 13.5 21.5 29.5
>>>>
>>>> aggregate.nx.ny.expand.grid = function(data,nx=2,ny=2, FUN=mean,...)
>>> + {
>>> + ilon <- seq(1,ncol(data),nx)
>>> + ilat <- seq(1,nrow(data),ny)
>>> + cells <- as.matrix(expand.grid(ilat, ilon))
>>> + blocks <- apply(cells, 1, function(x)
>>> data[x[1]:(x[1]+1),x[2]:(x[2]+1)])
>>> + block.means <- colMeans(blocks)
>>> + matrix(block.means, nrow(data)/ny, ncol(data)/nx)
>>> + }
>>>> aggregate.nx.ny.expand.grid(tst.small)
>>> [,1] [,2] [,3] [,4]
>>> [1,] 3.5 11.5 19.5 27.5
>>> [2,] 5.5 13.5 21.5 29.5
>>>>
>>>> aggregate.nx.ny.array.apply = function(data,nx=2,ny=2, FUN=mean,...)
>>> {
>>> + a <- array(data, dim = c(ny, nrow( data ) %/% ny, ncol( data ) %/%
>>> nx))
>>> + apply( a, c(2, 3), FUN, ... )
>>> + }
>>>> aggregate.nx.ny.array.apply(tst.small)
>>> [,1] [,2] [,3] [,4]
>>> [1,] 1.5 5.5 9.5 13.5
>>> [2,] 3.5 7.5 11.5 15.5
>>>
>>>
>>>
>>>> On 28 Jul 2016, at 00:26, David Winsemius <dwinsemius at comcast.net>
>>> wrote:
>>>>
>>>>
>>>>> On Jul 27, 2016, at 12:02 PM, Jeff Newmiller
>>> <jdnewmil at dcn.davis.ca.us> wrote:
>>>>>
>>>>> An alternative (more compact, not necessarily faster, because apply
>>> is still a for loop inside):
>>>>>
>>>>> f <- function( m, nx, ny ) {
>>>>> # redefine the dimensions of my
>>>>> a <- array( m
>>>>> , dim = c( ny
>>>>> , nrow( m ) %/% ny
>>>>> , ncol( m ) %/% nx )
>>>>> )
>>>>> # apply mean over dim 1
>>>>> apply( a, c( 2, 3 ), FUN=mean )
>>>>> }
>>>>> f( tst, nx, ny )
>>>>
>>>> Here's an apparently loopless strategy, although I suspect the code
>>> for interaction (and maybe tapply as well?) uses a loop.
>>>>
>>>>
>>>> tst_2X2 <- matrix(NA, ,ncol=4,nrow=2)
>>>>
>>>> tst_2x2[] <- tapply( tst, interaction( (row(tst)+1) %/% 2,
>>> (col(tst)+1) %/% 2 ), mean)
>>>>
>>>> tst_2x2
>>>>
>>>> [,1] [,2] [,3] [,4]
>>>> [1,] 3.5 11.5 19.5 27.5
>>>> [2,] 5.5 13.5 21.5 29.5
>>>>
>>>> --
>>>> David.
>>>>
>>>>
>>>>>
>>>>> --
>>>>> Sent from my phone. Please excuse my brevity.
>>>>>
>>>>> On July 27, 2016 9:08:32 AM PDT, David L Carlson <dcarlson at tamu.edu>
>>> wrote:
>>>>>> This should be faster. It uses apply() across the blocks.
>>>>>>
>>>>>>> ilon <- seq(1,8,nx)
>>>>>>> ilat <- seq(1,4,ny)
>>>>>>> cells <- as.matrix(expand.grid(ilat, ilon))
>>>>>>> blocks <- apply(cells, 1, function(x) tst[x[1]:(x[1]+1),
>>>>>> x[2]:(x[2]+1)])
>>>>>>> block.means <- colMeans(blocks)
>>>>>>> tst_2x2 <- matrix(block.means, 2, 4)
>>>>>>> tst_2x2
>>>>>> [,1] [,2] [,3] [,4]
>>>>>> [1,] 3.5 11.5 19.5 27.5
>>>>>> [2,] 5.5 13.5 21.5 29.5
>>>>>>
>>>>>> -------------------------------------
>>>>>> David L Carlson
>>>>>> Department of Anthropology
>>>>>> Texas A&M University
>>>>>> College Station, TX 77840-4352
>>>>>>
>>>>>>
>>>>>>
>>>>>> -----Original Message-----
>>>>>> From: R-help [mailto:r-help-bounces at r-poject.org] On Behalf Of
>>> Anthoni,
>>>>>> Peter (IMK)
>>>>>> Sent: Wednesday, July 27, 2016 6:14 AM
>>>>>> To: r-help at r-project.org
>>>>>> Subject: [R] Aggregate matrix in a 2 by 2 manor
>>>>>>
>>>>>> Hi all,
>>>>>>
>>>>>> I need to aggregate some matrix data (1440x720) to a lower
>>> dimension
>>>>>> (720x360) for lots of years and variables
>>>>>>
>>>>>> I can do double for loop, but that will be slow. Anybody know a
>>> quicker
>>>>>> way?
>>>>>>
>>>>>> here an example with a smaller matrix size:
>>>>>>
>>>>>> tst=matrix(1:(8*4),ncol=8,nrow=4)
>>>>>> tst_2x2=matrix(NA,ncol=4,nrow=2)
>>>>>> nx=2
>>>>>> ny=2
>>>>>> for(ilon in seq(1,8,nx)) {
>>>>>> for (ilat in seq(1,4,ny)) {
>>>>>> ilon_2x2=1+(ilon-1)/nx
>>>>>> ilat_2x2=1+(ilat-1)/ny
>>>>>> tst_2x2[ilat_2x2,ilon_2x2] = mean(tst[ilat+0:1,ilon+0:1])
>>>>>> }
>>>>>> }
>>>>>>
>>>>>> tst
>>>>>> tst_2x2
>>>>>>
>>>>>>> tst
>>>>>> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
>>>>>> [1,] 1 5 9 13 17 21 25 29
>>>>>> [2,] 2 6 10 14 18 22 26 30
>>>>>> [3,] 3 7 11 15 19 23 27 31
>>>>>> [4,] 4 8 12 16 20 24 28 32
>>>>>>
>>>>>>> tst_2x2
>>>>>> [,1] [,2] [,3] [,4]
>>>>>> [1,] 3.5 11.5 19.5 27.5
>>>>>> [2,] 5.5 13.5 21.5 29.5
>>>>>>
>>>>>>
>>>>>> I though a cast to 3d-array might do the trick and apply over the
>>> new
>>>>>> dimension, but that does not work, since it casts the data along
>>> the
>>>>>> row.
>>>>>>> matrix(apply(array(tst,dim=c(nx,ny,8)),3,mean),nrow=nrow(tst)/ny)
>>>>>> [,1] [,2] [,3] [,4]
>>>>>> [1,] 2.5 10.5 18.5 26.5
>>>>>> [2,] 6.5 14.5 22.5 30.5
>>>>>>
>>>>>>
>>>>>> cheers
>>>>>> Peter
>>>>>>
>>>>>> ______________________________________________
>>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>>> PLEASE do read the posting guide
>>>>>> http://www.R-project.org/posting-guide.html
>>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>>>
>>>>>> ______________________________________________
>>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>>> PLEASE do read the posting guide
>>>>>> http://www.R-project.org/posting-guide.html
>>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>>
>>>>> ______________________________________________
>>>>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>>>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>>>> PLEASE do read the posting guide
>>> http://www.R-project.org/posting-guide.html
>>>>> and provide commented, minimal, self-contained, reproducible code.
>>>>
>>>> David Winsemius
>>>> Alameda, CA, USA
>>>>
>>
>> ______________________________________________
>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>>
>
> ---------------------------------------------------------------------------
> Jeff Newmiller The ..... ..... Go Live...
> DCN:<jdnewmil at dcn.davis.ca.us> Basics: ##.#. ##.#. Live Go...
> Live: OO#.. Dead: OO#.. Playing
> Research Engineer (Solar/Batteries O.O#. #.O#. with
> /Software/Embedded Controllers) .OO#. .OO#. rocks...1k
> ---------------------------------------------------------------------------
More information about the R-help
mailing list