## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) library(hexify) library(sf) library(ggplot2) ## ----one-grid-many-datasets--------------------------------------------------- # Step 1: Define the grid once # This is your shared spatial reference system grid <- hex_grid(area_km2 = 5000) print(grid) # Step 2: Create multiple datasets with different structures set.seed(123) # Dataset 1: Bird observations (note different column names) bird_obs <- data.frame( species = sample(c("Passer domesticus", "Turdus merula", "Parus major"), 50, replace = TRUE), longitude = runif(50, -10, 30), latitude = runif(50, 35, 60) ) # Dataset 2: Mammal records mammal_obs <- data.frame( species = sample(c("Vulpes vulpes", "Meles meles", "Sciurus vulgaris"), 40, replace = TRUE), lon = runif(40, -10, 30), lat = runif(40, 35, 60) ) # Dataset 3: Climate stations climate_data <- data.frame( station_id = paste0("WS", 1:20), x = runif(20, -10, 30), y = runif(20, 35, 60), temp_c = rnorm(20, 12, 5) ) # Step 3: Attach all datasets to the SAME grid birds <- hexify(bird_obs, lon = "longitude", lat = "latitude", grid = grid) mammals <- hexify(mammal_obs, lon = "lon", lat = "lat", grid = grid) climate <- hexify(climate_data, lon = "x", lat = "y", grid = grid) cat("Birds: ", nrow(as.data.frame(birds)), "observations in", n_cells(birds), "cells\n") cat("Mammals:", nrow(as.data.frame(mammals)), "observations in", n_cells(mammals), "cells\n") cat("Climate:", nrow(as.data.frame(climate)), "stations in", n_cells(climate), "cells\n") ## ----cell-level-analysis------------------------------------------------------ # Extract data frames with cell IDs birds_df <- as.data.frame(birds) birds_df$cell_id <- birds@cell_id mammals_df <- as.data.frame(mammals) mammals_df$cell_id <- mammals@cell_id climate_df <- as.data.frame(climate) climate_df$cell_id <- climate@cell_id # Aggregate each dataset by cell bird_richness <- aggregate( species ~ cell_id, data = birds_df, FUN = function(x) length(unique(x)) ) names(bird_richness)[2] <- "bird_species" mammal_richness <- aggregate( species ~ cell_id, data = mammals_df, FUN = function(x) length(unique(x)) ) names(mammal_richness)[2] <- "mammal_species" mean_temp <- aggregate( temp_c ~ cell_id, data = climate_df, FUN = mean ) names(mean_temp)[2] <- "mean_temp" # Join datasets by cell_id - guaranteed to align because same grid combined <- merge(bird_richness, mammal_richness, by = "cell_id", all = TRUE) combined <- merge(combined, mean_temp, by = "cell_id", all = TRUE) head(combined) ## ----grid-rect, fig.width=7, fig.height=5------------------------------------- # Create grid specification grid <- hex_grid(area_km2 = 5000) # Generate hexagons over Western Europe europe_hexes <- grid_rect(c(-10, 35, 25, 60), grid) # Get European countries for context europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "gray95", color = "gray60") + geom_sf(data = europe_hexes, fill = NA, color = "steelblue", linewidth = 0.4) + coord_sf(xlim = c(-10, 25), ylim = c(35, 60)) + labs(title = sprintf("Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal() ## ----grid-polygon, fig.width=6, fig.height=6---------------------------------- # Get France boundary france <- hexify_world[hexify_world$name == "France", ] # Generate grid covering mainland France grid <- hex_grid(area_km2 = 2000) france_grid <- grid_rect(c(-5, 41, 10, 52), grid) # Clip grid to France boundary france_grid_clipped <- st_intersection(france_grid, st_geometry(france)) ggplot() + geom_sf(data = france, fill = "gray95", color = "gray40", linewidth = 0.5) + geom_sf(data = france_grid_clipped, fill = alpha("steelblue", 0.3), color = "steelblue", linewidth = 0.3) + coord_sf(xlim = c(-5, 10), ylim = c(41, 52)) + labs(title = sprintf("Hexagonal Grid Clipped to France (~%.0f km² cells)", grid@area_km2)) + theme_minimal() ## ----grid-global, fig.width=8, fig.height=4, warning=FALSE-------------------- # Coarse global grid (be careful with fine grids - many cells!) grid <- hex_grid(area_km2 = 500000) global_hexes <- grid_global(grid) ggplot() + geom_sf(data = hexify_world, fill = "gray90", color = "gray70", linewidth = 0.2) + geom_sf(data = global_hexes, fill = NA, color = "darkgreen", linewidth = 0.3) + labs(title = sprintf("Global Hexagonal Grid (~%.0f km² cells)", grid@area_km2)) + theme_minimal() + theme(axis.text = element_blank(), axis.ticks = element_blank()) ## ----multi-resolution--------------------------------------------------------- # Sample data set.seed(42) observations <- data.frame( species = sample(c("Species A", "Species B", "Species C"), 100, replace = TRUE), lon = runif(100, -10, 30), lat = runif(100, 35, 60) ) # Fine resolution (~1000 km² cells) grid_fine <- hex_grid(area_km2 = 1000) obs_fine <- hexify(observations, lon = "lon", lat = "lat", grid = grid_fine) # Coarse resolution (~10000 km² cells) grid_coarse <- hex_grid(area_km2 = 10000) obs_coarse <- hexify(observations, lon = "lon", lat = "lat", grid = grid_coarse) cat(sprintf("Fine resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_fine), grid_fine@area_km2)) cat(sprintf("Coarse resolution: %d unique cells (area: %.1f km²)\n", n_cells(obs_coarse), grid_coarse@area_km2)) ## ----multi-scale-aggregate---------------------------------------------------- # Extract data with cell IDs fine_df <- as.data.frame(obs_fine) fine_df$cell_id <- obs_fine@cell_id coarse_df <- as.data.frame(obs_coarse) coarse_df$cell_id <- obs_coarse@cell_id # Species richness at each scale richness_fine <- aggregate(species ~ cell_id, data = fine_df, FUN = function(x) length(unique(x))) richness_coarse <- aggregate(species ~ cell_id, data = coarse_df, FUN = function(x) length(unique(x))) cat(sprintf("Fine scale: mean %.2f species per cell\n", mean(richness_fine$species))) cat(sprintf("Coarse scale: mean %.2f species per cell\n", mean(richness_coarse$species))) ## ----spatial-joins------------------------------------------------------------ # Dataset 1: Weather stations stations <- data.frame( station_id = paste0("ST", 1:50), lon = runif(50, -10, 30), lat = runif(50, 35, 60), temperature = rnorm(50, 15, 5) ) # Dataset 2: Cities cities <- data.frame( city = c("Vienna", "Paris", "London", "Berlin", "Rome", "Madrid", "Prague", "Warsaw", "Budapest", "Amsterdam"), lon = c(16.37, 2.35, -0.12, 13.40, 12.50, -3.70, 14.42, 21.01, 19.04, 4.90), lat = c(48.21, 48.86, 51.51, 52.52, 41.90, 40.42, 50.08, 52.23, 47.50, 52.37) ) # Use a coarse grid for joining disparate points grid <- hex_grid(area_km2 = 50000) # Hexify both datasets with the same grid stations_hex <- hexify(stations, lon = "lon", lat = "lat", grid = grid) cities_hex <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Extract with cell IDs stations_df <- as.data.frame(stations_hex) stations_df$cell_id <- stations_hex@cell_id cities_df <- as.data.frame(cities_hex) cities_df$cell_id <- cities_hex@cell_id # Join by cell_id city_weather <- merge( cities_df[, c("city", "cell_id")], aggregate(temperature ~ cell_id, data = stations_df, FUN = mean), by = "cell_id", all.x = TRUE ) city_weather ## ----choosing-resolution------------------------------------------------------ # Target: 100 km² cells grid_100 <- hex_grid(area_km2 = 100, aperture = 3) cat(sprintf("Target ~100 km²: resolution %d (actual: %.1f km²)\n", grid_100@resolution, grid_100@area_km2)) # Target: 1000 km² cells grid_1000 <- hex_grid(area_km2 = 1000, aperture = 3) cat(sprintf("Target ~1000 km²: resolution %d (actual: %.1f km²)\n", grid_1000@resolution, grid_1000@area_km2)) # Target: 10000 km² cells grid_10000 <- hex_grid(area_km2 = 10000, aperture = 3) cat(sprintf("Target ~10000 km²: resolution %d (actual: %.1f km²)\n", grid_10000@resolution, grid_10000@area_km2)) ## ----resolution-table, echo=FALSE--------------------------------------------- comparison <- hexify_compare_resolutions(aperture = 3, res_range = 0:15) comparison$n_cells_fmt <- ifelse( comparison$n_cells > 1e6, sprintf("%.1fM", comparison$n_cells / 1e6), ifelse(comparison$n_cells > 1e3, sprintf("%.1fK", comparison$n_cells / 1e3), as.character(comparison$n_cells)) ) knitr::kable( comparison[, c("resolution", "n_cells_fmt", "cell_area_km2", "cell_spacing_km")], col.names = c("Resolution", "# Cells", "Cell Area (km²)", "Spacing (km)"), digits = 1 ) ## ----compare-apertures-------------------------------------------------------- target_area <- 1000 # km² cat(sprintf("Target: ~%d km² cells\n\n", target_area)) for (ap in c(3, 4, 7)) { grid <- hex_grid(area_km2 = target_area, aperture = ap) n_cells <- 10 * (ap^grid@resolution) + 2 cat(sprintf("Aperture %d: resolution %d -> %.1f km² (%s cells globally)\n", ap, grid@resolution, grid@area_km2, format(n_cells, big.mark = ","))) } ## ----sf-workflow-------------------------------------------------------------- # Hexify some data grid <- hex_grid(area_km2 = 20000) result <- hexify(cities, lon = "lon", lat = "lat", grid = grid) # Convert to sf points (uses cell centers) sf_points <- as_sf(result, geometry = "point") class(sf_points) # Convert to sf polygons (for choropleth maps) sf_polys <- as_sf(result, geometry = "polygon") class(sf_polys) # Or generate polygons directly from cell IDs unique_cells <- cells(result) cell_polys <- cell_to_sf(unique_cells, grid) ## ----sf-plot, fig.width=7, fig.height=5--------------------------------------- europe <- hexify_world[hexify_world$continent == "Europe", ] ggplot() + geom_sf(data = europe, fill = "ivory", color = "gray70") + geom_sf(data = cell_polys, fill = "steelblue", alpha = 0.5, color = "darkblue") + coord_sf(xlim = c(-10, 25), ylim = c(35, 58)) + labs(title = "European Cities - Hexagonal Grid") + theme_minimal() ## ----export-formats, eval=FALSE----------------------------------------------- # # Generate a grid # grid <- hex_grid(area_km2 = 10000) # europe <- grid_rect(c(-10, 35, 25, 60), grid) # # # Export to various formats # st_write(europe, "europe_grid.gpkg", layer = "hexgrid") # GeoPackage # st_write(europe, "europe_grid.shp") # Shapefile # st_write(europe, "europe_grid.geojson") # GeoJSON # st_write(europe, "europe_grid.kml", layer = "hexgrid") # KML (Google Earth) ## ----edge-cases--------------------------------------------------------------- data_with_na <- data.frame( lon = c(16.37, NA, 2.35, 13.40), lat = c(48.21, 48.86, NA, 52.52) ) grid <- hex_grid(area_km2 = 1000) result <- hexify(data_with_na, lon = "lon", lat = "lat", grid = grid) # Check which rows have valid cell assignments cat("Cell IDs:", result@cell_id, "\n") cat("NA indicates invalid coordinates\n") ## ----dateline, eval=FALSE----------------------------------------------------- # # For polygons crossing the date line # wrapped <- st_wrap_dateline( # polygons, # options = c("WRAPDATELINE=YES", "DATELINEOFFSET=180"), # quiet = TRUE # )