[R] [netcdfgroup] [ncdf4] error converting GEIA data to netCDF
Tom Roche
Tom_Roche at pobox.com
Tue Aug 28 21:24:48 CEST 2012
Roy Mendelssohn Tue, 28 Aug 2012 09:07:32 -0700
> here is the relevant section from your code:
> > netcdf.file <- nc_create(
> > filename=netcdf.fn,
> > # vars=list(emis.var),
> > # verbose=TRUE)
> > vars=list(emis.var))
> > # Write data to data variable: gotta have file first.
> > # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
> > # Do this before adding _FillValue to prevent:
> > # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or =
> _FillValue type mismatch
> > ## global.emis.arr <- global.emis.mx
> > ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
> > ## global.emis.arr[1,,] <- global.emis.mx
> > # Note
> > # * global.emis.mx[lat,lon]
> > # * datavar needs [lon, lat, time] (with time *last*)
> > ncvar_put(
> > nc=netcdf.file,
> > varid=emis.var,
> > # vals=global.emis.arr,
> > vals=t(global.emis.mx),
> > # start=rep.int(1, length(dim(global.emis.arr))),
> > start=c(1, 1, 1),
> > # count=dim(global.emis.arr))
> > count=c(-1,-1, 1)) # -1 -> all data
> You can't write until all dimensions have been defined, and all
> variables defined.
But in fact, that's only a part of the code ... which omits the prior
dimension and variable definitions :-( See the current version @
https://github.com/TomRoche/GEIA_to_netCDF/blob/c380c0a28dc8c71dbf0c2ba18130a2439a4fe089/GEIA.to.netCDF.r
I've also attached that (quoted) following my .sig, with the top-most
constant and function declarations removed for brevity. The dimension
definitions are prefixed with '1', the variable definition is prefixed
with '2'.
HTH, Tom Roche <Tom_Roche at pobox.com>
current GEIA.to.netCDF.r code block follows to end of post------------
# code----------------------------------------------------------------
> # process input
> library(maps) # on tlrPanP5 as well as clusters
> # input file path
> GEIA.emis.txt.fp <- sprintf('%s/%s', GEIA.emis.txt.dir, GEIA.emis.txt.fn)
> # columns are grid#, mass
> GEIA.emis.mx <-
> as.matrix(read.table(GEIA.emis.txt.fp, skip=GEIA.emis.txt.n.header))
> # mask zeros? no, use NA for non-ocean areas
> # GEIA.emis.mx[GEIA.emis.mx == 0] <- NA
> # <simple input check>
> dim(GEIA.emis.mx) ## [1] 36143 2
> # start debug
> GEIA.emis.mx.rows <- dim(GEIA.emis.mx)[1]
> if (GEIA.emis.mx.rows > GEIA.emis.grids.dim) {
> cat(sprintf('ERROR: %s: GEIA.emis.mx.rows=%.0d > GEIA.emis.grids.dim=%.0d\n',
> this.fn, GEIA.emis.mx.rows, GEIA.emis.grids.dim))
> } else {
> cat(sprintf('debug: %s: GEIA.emis.mx.rows < GEIA.emis.grids.dim\n',
> this.fn))
> }
> # end debug
> # </simple input check>
> global.emis.vec <-
> create.global.emissions.vector(
> GEIA.emis.grids.dim, GEIA.emis.mx.rows, GEIA.emis.mx)
> # <visual input check>
> # Need sorted lat and lon vectors: we know what those are a priori
> # Add 0.5 since grid centers
> lon.vec <- 0.5 +
> seq(from=grid.lon.degree.start, by=grid.lon.degree.per.cell, length.out=GEIA.emis.lon.dim)
> lat.vec <- 0.5 +
> seq(from=grid.lat.degree.start, by=grid.lat.degree.per.cell, length.out=GEIA.emis.lat.dim)
> # Create emissions matrix corresponding to those dimensional vectors
> # (i.e., global.emis.mx is the "projection" of global.emis.vec)
> # First, create empty global.emis.mx? No, fill from global.emis.vec.
> # Fill using byrow=T? or "bycol" == byrow=FALSE? (row=lat)
> # I assigned (using lon.lat.vec.to.grid.index)
> # "grid indices" (global.emis.vec.index values)
> # "lon-majorly" (i.e., iterate over lats before incrementing lon),
> # so we want to fill byrow=FALSE ... BUT,
> # that will "fill from top" (i.e., starting @ 90N) and
> # we want to "fill from bottom" (i.e., starting @ 90S) ...
> # global.emis.mx <- matrix(
> # global.emis.vec, nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim,
> # # so flip/reverse rows/latitudes when done
> # byrow=FALSE)[GEIA.emis.lat.dim:1,]
> # NO: I cannot just fill global.emis.mx from global.emis.vec:
> # latter's/GEIA's grid numbering system ensures 1000 lons per lat!
> # Which overflows the "real-spatial" global.emis.mx :-(
> # So I need to fill global.emis.mx using a for loop to decode the grid indices :-(
> # (but at least I can fill in whatever direction I want :-)
> global.emis.mx <- matrix(
> rep(NA, GEIA.emis.grids.dim), nrow=GEIA.emis.lat.dim, ncol=GEIA.emis.lon.dim)
> # 1: works if subsequently transposed: TODO: FIXME
> for (i.lon in 1:GEIA.emis.lon.dim) {
> for (i.lat in 1:GEIA.emis.lat.dim) {
> # 2: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
> # for (i.lat in 1:GEIA.emis.lat.dim) {
> # for (i.lon in 1:GEIA.emis.lon.dim) {
> # 3: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
> # for (i.lon in GEIA.emis.lon.dim:1) {
> # for (i.lat in GEIA.emis.lat.dim:1) {
> # 4: fails with 'dimensions of z are not length(x)(-1) times length(y)(-1)'
> # for (i.lat in GEIA.emis.lat.dim:1) {
> # for (i.lon in GEIA.emis.lon.dim:1) {
> lon <- lon.vec[i.lon]
> lat <- lat.vec[i.lat]
> GEIA.emis.grid.val <-
> global.emis.vec[
> lon.lat.vec.to.grid.index(c(lon, lat),
> n.lon=GEIA.emis.lon.dim, n.lat=GEIA.emis.lat.dim)]
> if (!is.na(GEIA.emis.grid.val)) {
> if (is.na(global.emis.mx[i.lat, i.lon])) {
> global.emis.mx[i.lat, i.lon] <- GEIA.emis.grid.val
> # start debug
> # cat(sprintf(
> # 'debug: %s: writing val=%f to global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
> # this.fn, GEIA.emis.grid.val, i.lon, i.lat, lon, lat))
> # end debug
> } else {
> # error if overwriting presumably-previously-written non-NA!
> cat(sprintf(
> 'ERROR: %s: overwriting val=%f with val=%f at global.emis.mx[%.0f,%.0f] for grid center=[%f,%f]\n',
> this.fn, global.emis.mx[i.lat, i.lon], GEIA.emis.grid.val,
> i.lon, i.lat, lon, lat))
> return # TODO: abend
> } # end testing target != NA (thus not previously written)
> } # end testing source != NA (don't write if is.na(lookup)
> } # end for loop over lats
> } # end for loop over lons
> # Now draw the damn thing
> # 1: TODO: FIXME: why do I need to transpose global.emis.mx?
> image(lon.vec, lat.vec, t(global.emis.mx))
> # 2,3,4: how it should work ?!?
> # image(lon.vec, lat.vec, global.emis.mx)
> map(add=TRUE)
> # </visual input check>
> # write output to netCDF
> library(ncdf4)
> # output file path (not currently used by package=ncdf4)
> netcdf.fp <- sprintf('%s/%s', netcdf.dir, netcdf.fn)
1> # create dimensions and dimensional variables
1> time.vec <- c(0) # annual value, corresponding to no specific year
1> time.dim <- ncdim_def(
1> name=time.var.name,
1> units=time.var.units,
1> vals=time.vec,
1> unlim=TRUE,
1> create_dimvar=TRUE,
1> calendar=time.var.calendar,
1> longname=time.var.long_name)
1> lon.dim <- ncdim_def(
1> name=lon.var.name,
1> units=lon.var.units,
1> vals=lon.vec,
1> unlim=FALSE,
1> create_dimvar=TRUE,
1> longname=lon.var.long_name)
1> lat.dim <- ncdim_def(
1> name=lat.var.name,
1> units=lat.var.units,
1> vals=lat.vec,
1> unlim=FALSE,
1> create_dimvar=TRUE,
1> longname=lat.var.long_name)
2> # create data variable (as container--can't put data until we have a file)
2> emis.var <- ncvar_def(
2> name=emis.var.name,
2> units=emis.var.units,
2> # dim=c(time.dim, lat.dim, lon.dim),
2> # dim=list(time.dim, lat.dim, lon.dim),
2> # note dim order desired for result=var(time, lat, lon)
2> dim=list(lon.dim, lat.dim, time.dim),
2> missval=as.double(emis.var._FillValue),
2> longname=emis.var.long_name,
2> prec="double")
> # get current time for creation_date
> # system(intern=TRUE) -> return char vector, one member per output line)
> netcdf.timestamp <- system('date', intern=TRUE)
> # create netCDF file
> netcdf.file <- nc_create(
> filename=netcdf.fn,
> # vars=list(emis.var),
> # verbose=TRUE)
> vars=list(emis.var))
> # Write data to data variable: gotta have file first.
> # Gotta convert 2d global.emis.mx[lat,lon] to 3d global.emis.arr[time,lat,lon]
> # Do this before adding _FillValue to prevent:
> # > Error in R_nc4_put_vara_double: NetCDF: Not a valid data type or _FillValue type mismatch
> ## global.emis.arr <- global.emis.mx
> ## dim(global.emis.arr) <- c(1, dim(global.emis.mx))
> ## global.emis.arr[1,,] <- global.emis.mx
> # Note
> # * global.emis.mx[lat,lon]
> # * datavar needs [lon, lat, time] (with time *last*)
> ncvar_put(
> nc=netcdf.file,
> varid=emis.var,
> # vals=global.emis.arr,
> vals=t(global.emis.mx),
> # start=rep.int(1, length(dim(global.emis.arr))),
> start=c(1, 1, 1),
> # count=dim(global.emis.arr))
> count=c(-1,-1, 1)) # -1 -> all data
> # Write netCDF attributes
> # Note: can't pass *.dim as varid, even though these are coordinate vars :-(
> # add datavar attributes
> ncatt_put(
> nc=netcdf.file,
> # varid=lon.var,
> varid=lon.var.name,
> attname="comment",
> attval=lon.var.comment,
> prec="text")
> ncatt_put(
> nc=netcdf.file,
> # varid=lat.var,
> varid=lat.var.name,
> attname="comment",
> attval=lat.var.comment,
> prec="text")
> # put _FillValue after putting data!
> ncatt_put(
> nc=netcdf.file,
> varid=emis.var,
> attname="_FillValue",
> attval=emis.var._FillValue,
> prec="float") # why is "emi_n2o:missing_value = -999."?
> # add global attributes (varid=0)
> ncatt_put(
> nc=netcdf.file,
> varid=0,
> attname="creation_date",
> attval=netcdf.timestamp,
> prec="text")
> ncatt_put(
> nc=netcdf.file,
> varid=0,
> attname="source_file",
> attval=netcdf.source_file,
> prec="text")
> ncatt_put(
> nc=netcdf.file,
> varid=0,
> attname="Conventions",
> attval=netcdf.Conventions,
> prec="text")
> # flush to file (there may not be data on disk before this point)
> # nc_sync(netcdf.file) # so we don't hafta reopen the file, below
> # Nope: per David W. Pierce Mon, 27 Aug 2012 21:35:35 -0700, ncsync is not enough
> nc_close(netcdf.file)
> nc_open(netcdf.fn,
> write=FALSE, # will only read below
> readunlim=TRUE) # it's a small file
> # <simple output check>
> system(sprintf('ls -alth %s', netcdf.fp))
> system(sprintf('ncdump -h %s', netcdf.fp))
> # </simple output check>
> # <visual output check>
> # TODO: do plot-related refactoring! allow to work with projects={ioapi, this}
> # <copied from plotLayersForTimestep.r>
> library(fields)
> # double-sprintf-ing to set precision by constant: cool or brittle?
> stats.precision <- 3 # sigdigs to use for min, median, max of obs
> stat.str <- sprintf('%%.%ig', stats.precision)
> # use these in function=subtitle.stats as sprintf inputs
> max.str <- sprintf('max=%s', stat.str)
> med.str <- sprintf('med=%s', stat.str)
> min.str <- sprintf('min=%s', stat.str)
> # </copied from plotLayersForTimestep.r>
> # Get the data out of the datavar, to test reusability
> # target.data <- emis.var[,,1] # fails, with
> # > Error in emis.var[, , 1] : incorrect number of dimensions
> target.data <- ncvar_get(
> nc=netcdf.file,
> # varid=emis.var,
> varid=emis.var.name,
> # read all the data
> # start=rep(1, emis.var$ndims),
> start=c(1, 1, 1),
> # count=rep(-1, emis.var$ndims))
> count=c(-1, -1, 1))
> # MAJOR: all of the above fail with
> # > Error in if (nc$var[[li]]$hasAddOffset) addOffset = nc$var[[li]]$addOffset else addOffset = 0 :
> # > argument is of length zero
> # Note that, if just using the raw data, the following plot code works.
> target.data <- t(global.emis.mx)
> # <simple output check/>
> dim(target.data) # n.lon, n.lat
> # <copied from windowEmissions.r>
> palette.vec <- c("grey","purple","deepskyblue2","green","yellow","orange","red","brown")
> colors <- colorRampPalette(palette.vec)
> probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1))
> # </copied from windowEmissions.r>
> # <copy/mod from plotLayersForTimestep.r>
> plot.layer(target.data,
> title=netcdf.title,
> subtitle=subtitle.stats(target.data),
> x.centers=lon.vec,
> y.centers=lat.vec,
> q.vec=probabilities.vec,
> colors=colors)
> # </copy/mod from plotLayersForTimestep.r>
> map(add=TRUE)
> # </visual output check>
> # teardown
> dev.off()
> nc_close(netcdf.file)
More information about the R-help
mailing list