[R-sig-Geo] Parse EWKT Polygon/MultiLineString/MultiPolygon Text

Josh O'Brien joshmobrien at gmail.com
Fri Nov 13 20:20:20 CET 2015


Here is a good start, at least:

library(rgl)

## Helper functions
isLeaf <- function(x) {
    length(gregexpr("(", x, fixed=TRUE)[[1]]) == 1
}

extractBranches <- function(x) {
    x <- gsub("^\\(|\\)$", "",  x)
    pattern <- "\\(((?>[^()]+|(?R))*)\\)"
    m <- gregexpr(pattern, x, perl=TRUE)
    regmatches(x,m)[[1]]
}

leafToMatrix <- function(s, isPoly=FALSE) {
    x <- gsub("[\\(\\),]", " ", s)
    v <- scan(textConnection(x), quiet=TRUE)
    m <- matrix(v, ncol=4, byrow=TRUE)[,1:3]
    if(isPoly & !identical(m[1,], m[nrow(m),])) m <- rbind(m, m[1,])
    m
}

## (A few simple function calls, to see what the helper functions do.)
isLeaf("(0 0)")
isLeaf("((0 0),(1,1)")
extractBranches("((0 0),((1 1),(2 2)))")
leafToMatrix("(0 0 0 0, 1 1 0 1, 1 2 2 2)")


## The parsing engine
parseBranches <- function(x, isPoly=FALSE) {
    lapply(x, function(X) {
               if (isLeaf(X)) {
                   leafToMatrix(X, isPoly = isPoly)
               } else {
                   parseBranches(extractBranches(X), isPoly = isPoly)
               }
           })
}
parseBranches("((0 0 0 0, 9 9 9 9),((1 1 1 1, 6 6 6 6),(2 2 2 2,5 5 5 5)))")


## Main function, to be passed a single WKT string
parseWKT <- function(x) {
    x <- gsub("\n", "", x)
    geometry <- gsub("(^[^\\(]*)(.*)", "\\1", x)
    isPoly <- grepl("POLYGON", geometry)
    x <- gsub(geometry, "", x)
    parseBranches(x, isPoly=isPoly)[[1]]
}



## Examples
A <- "SRID=32611;MULTIPOLYGON(((0 0 0 0,4 0 1 0,4 4 1 0,0 4 1 0,0 0 1 0),
(1 1 1 0,2 1 1 0,2 2 1 0,1 2 1 0,1 1 0 0)),
((-1 -1 1 0,-1 -2 1 0,-2 -2 1 0,-2 -1 1 0,-1 -1 1 0)))"
AA <- parseWKT(A)
colors <- c("red", "blue", "green")
for(i in seq_along(AA)) lapply(AA[[i]], lines3d, color=colors[i])

B <- "SRID=32611;MULTILINESTRING((0 0 0 0,1 1 0 1,1 2 2 2),
(2 3 2 0,3 2 2 1,5 4 3 3))"
BB <- parseWKT(B)
colors <- c("green", "gold", "black")
for(i in seq_along(BB)) lines3d(BB[[i]], color=colors[i])

C <- "SRID=32611;LINESTRING(0 0 0 0,1 1 1 1,1 2 2 2 )"
CC <- parseWKT(C)
lines3d(CC)



More information about the R-sig-Geo mailing list