Advanced Features

2025-12-17

library(manureshed)
#> 
#> =================================================================
#> manureshed package loaded successfully!
#> =================================================================
#> 
#> Built-in Data (Downloaded on-demand from OSF):
#>   • NuGIS data: 1987 - 2016 (all spatial scales)
#>   • WWTP data: 2007 - 2016 (nitrogen and phosphorus)
#>   • Spatial boundaries: county, HUC8, HUC2
#>   • Texas supplemental data (automatic for HUC8)
#> 
#> Quick Start:
#>   check_builtin_data()           # Check data availability
#>   download_all_data()            # Download all datasets
#>   quick_analysis()               # Complete analysis + visuals
#>   ?run_builtin_analysis          # Main workflow function
#> 
#> Data Management:
#>   clear_data_cache()             # Clear downloaded data
#>   download_osf_data()            # Download specific dataset
#> 
#> Documentation:
#>   vignette('getting-started')    # Getting started guide
#>   ?manureshed                    # Package overview
#> =================================================================
#> 
#> Data Summary:
#>   OSF Repository: https://osf.io/g39xa/
#>   Available scales: county, huc8, huc2
#>   Years available: 1987 - 2016
#>   WWTP years: 2007 - 2016 (nitrogen, phosphorus)
#>   Methodology Paper: Akanbi, O.D., Gupta, A., Mandayam, V., Flynn, K.C.,
#>       Yarus, J.M., Barcelos, E.I., French, R.H., 2026. Towards circular nutrient economies: An
#>       integrated manureshed framework for agricultural and municipal resource management.
#>       Resources, Conservation and Recycling, https://doi.org/10.1016/j.resconrec.2025.108697
#> 
#>   Cached datasets: 12/10 downloaded
#> 

Overview

This vignette covers advanced features for power users:

Custom Thresholds

Understanding Thresholds

The package excludes areas with little cropland from analysis. The default is 500 hectares (1,234 acres), but you can change this:

# More restrictive (exclude more areas)
results_conservative <- run_builtin_analysis(
  scale = "huc8",
  year = 2016,
  nutrients = "nitrogen",
  cropland_threshold = 2000  # 2000 acres instead of 1234
)

# Less restrictive (include more areas)
results_liberal <- run_builtin_analysis(
  scale = "huc8", 
  year = 2016,
  nutrients = "nitrogen",
  cropland_threshold = 500   # 500 acres
)

# Compare the difference
conservative_excluded <- sum(results_conservative$agricultural$N_class == "Excluded")
liberal_excluded <- sum(results_liberal$agricultural$N_class == "Excluded")

print(paste("Conservative:", conservative_excluded, "excluded"))
print(paste("Liberal:", liberal_excluded, "excluded"))

Automatic Threshold Calculation

For HUC8 and HUC2 scales, the package calculates thresholds automatically based on county data:

# Load data for threshold calculation
county_data <- load_builtin_nugis("county", 2016)
huc8_data <- load_builtin_nugis("huc8", 2016)

# Calculate appropriate threshold for HUC8
custom_threshold <- get_cropland_threshold(
  scale = "huc8",
  county_data = county_data,
  target_data = huc8_data,
  baseline_ha = 750  # Use 750 ha instead of 500 ha baseline
)

print(paste("Custom threshold:", round(custom_threshold, 2), "acres"))

# Use in analysis
results <- run_builtin_analysis(
  scale = "huc8",
  year = 2016,
  nutrients = "nitrogen",
  cropland_threshold = custom_threshold
)

State-Specific Analysis

Single State Analysis

# Analyze specific states
iowa_results <- run_state_analysis(
  state = "IA",
  scale = "county", 
  year = 2016,
  nutrients = c("nitrogen", "phosphorus"),
  include_wwtp = TRUE
)

# Quick state analysis with maps
texas_results <- quick_state_analysis(
  state = "TX",
  scale = "huc8",
  year = 2015,
  nutrients = "nitrogen",
  create_maps = TRUE,
  create_networks = TRUE
)

Multi-State Comparison

# Compare agricultural states
corn_belt_states <- c("IA", "IL", "IN", "NE", "OH")
state_results <- list()

for (state in corn_belt_states) {
  state_results[[state]] <- run_state_analysis(
    state = state,
    scale = "county",
    year = 2016, 
    nutrients = "nitrogen",
    include_wwtp = TRUE,
    verbose = FALSE  # Reduce output
  )
}

# Compare state summaries
state_summary <- do.call(rbind, lapply(names(state_results), function(state) {
  result <- state_results[[state]]
  classes <- table(result$agricultural$N_class)
  data.frame(
    State = state,
    Total_Counties = sum(classes),
    Source_Counties = classes["Source"] %||% 0,
    Sink_Counties = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE),
    Source_Percent = round((classes["Source"] %||% 0) / sum(classes) * 100, 1)
  )
}))

print(state_summary)

Batch Processing

Multiple Years

# Analyze multiple years efficiently
batch_results <- batch_analysis_years(
  years = 2012:2016,
  scale = "huc8",
  nutrients = "nitrogen", 
  include_wwtp = TRUE,
  create_comparative_plots = TRUE
)

# Check which years succeeded
successful_years <- names(batch_results$results)
print(paste("Successful years:", paste(successful_years, collapse = ", ")))

Parallel Processing

For faster processing of multiple analyses:

# Use multiple CPU cores
parallel_results <- batch_analysis_parallel(
  years = 2014:2016,
  n_cores = 2,  # Use 2 cores
  scale = "county",
  nutrients = "nitrogen",
  include_wwtp = TRUE
)

# Check results
successful <- sum(!sapply(parallel_results, function(x) "error" %in% names(x)))
print(paste("Successful analyses:", successful, "out of", length(parallel_results)))

Enhanced Batch Analysis

For comprehensive batch analysis with full visualizations:

# Full batch analysis with all visualizations
enhanced_results <- batch_analysis_enhanced(
  years = 2015:2016,  # Use fewer years for demonstration
  scale = "county",
  nutrients = "nitrogen",
  create_all_visualizations = TRUE,
  create_comparative_plots = TRUE
)

Performance Optimization

Benchmarking

Test analysis performance:

# Benchmark different configurations
benchmark_results <- benchmark_analysis(
  scale = "county",
  year = 2016,
  nutrients = "nitrogen",
  n_runs = 3,
  include_wwtp = TRUE
)

print(benchmark_results)

# Compare scales
scales_to_test <- c("county", "huc8", "huc2")
benchmark_comparison <- list()

for (scale in scales_to_test) {
  benchmark_comparison[[scale]] <- benchmark_analysis(
    scale = scale,
    year = 2016,
    nutrients = "nitrogen", 
    n_runs = 2,
    include_wwtp = TRUE
  )
}

# Compare timing
timing_comparison <- data.frame(
  Scale = scales_to_test,
  Mean_Time_Seconds = sapply(benchmark_comparison, function(x) x$timing$mean),
  Memory_MB = sapply(benchmark_comparison, function(x) x$memory_mb$mean)
)

print(timing_comparison)

Memory Management

# Clear cache to free up space
clear_data_cache()

# Check package health
health_status <- health_check(verbose = TRUE)

# Test OSF connection
connection_ok <- test_osf_connection()
print(paste("OSF connection:", ifelse(connection_ok, "OK", "Failed")))

Advanced Spatial Analysis

Transition Probabilities

Analyze how nutrient classifications change across space:

# Run analysis
results <- run_builtin_analysis(
  scale = "huc8",
  year = 2016,
  nutrients = "nitrogen",
  include_wwtp = TRUE
)

# Calculate centroids for spatial analysis
centroids <- add_centroid_coordinates(results$integrated$nitrogen)

# Calculate transition probabilities
agri_transitions <- calculate_transition_probabilities(
  centroids, "N_class"
)

combined_transitions <- calculate_transition_probabilities(
  centroids, "combined_N_class"
)

# Compare transition matrices
print("Agricultural transitions:")
print(agri_transitions)

print("Combined (WWTP + Agricultural) transitions:")
print(combined_transitions)

# Create network plots
create_network_plot(
  agri_transitions, "nitrogen", "Agricultural",
  "network_agricultural.png"
)

create_network_plot(
  combined_transitions, "nitrogen", "Combined", 
  "network_combined.png"
)

Spatial Statistics

# Calculate spatial statistics
library(sf)

# Get results as spatial data
spatial_results <- results$integrated$nitrogen

# Calculate areas of different classifications
class_areas <- spatial_results %>%
  mutate(area_km2 = as.numeric(st_area(.)) / 1000000) %>%
  st_drop_geometry() %>%
  group_by(combined_N_class) %>%
  summarise(
    count = n(),
    total_area_km2 = sum(area_km2),
    mean_area_km2 = mean(area_km2),
    .groups = 'drop'
  )

print(class_areas)

# Find neighboring relationships
neighbors <- st_touches(spatial_results)
neighbor_summary <- data.frame(
  ID = spatial_results$ID,
  N_neighbors = lengths(neighbors),
  Class = spatial_results$combined_N_class
)

print("Average neighbors by classification:")
neighbor_summary %>%
  group_by(Class) %>%
  summarise(mean_neighbors = mean(N_neighbors), .groups = 'drop')

Custom Analysis Workflows

Research-Specific Analysis

# Example: Livestock-intensive regions analysis
analyze_livestock_regions <- function(states, year = 2016) {
  
  results <- list()
  
  for (state in states) {
    # Custom threshold for livestock regions
    county_data <- load_builtin_nugis("county", year)
    
    # Calculate state-specific threshold
    state_county <- county_data[substr(county_data$ID, 1, 2) == get_state_fips(state), ]
    custom_threshold <- quantile(state_county$cropland, 0.25)  # 25th percentile
    
    # Run analysis
    state_result <- run_state_analysis(
      state = state,
      scale = "county",
      year = year,
      nutrients = "nitrogen",
      include_wwtp = TRUE,
      cropland_threshold = custom_threshold,
      verbose = FALSE
    )
    
    results[[state]] <- state_result
  }
  
  return(results)
}

# Apply to livestock states
livestock_states <- c("IA", "NE", "NC", "MN")
livestock_results <- analyze_livestock_regions(livestock_states, 2016)

Time Series Analysis

# Custom time series analysis
analyze_trends <- function(scale, years, nutrient = "nitrogen") {
  
  trend_data <- list()
  
  for (year in years) {
    # Check if WWTP data available
    use_wwtp <- year >= 2007 && year <= 2016
    
    result <- run_builtin_analysis(
      scale = scale,
      year = year,
      nutrients = nutrient,
      include_wwtp = use_wwtp,
      save_outputs = FALSE,
      verbose = FALSE
    )
    
    # Extract classification summary
    if (use_wwtp && "integrated" %in% names(result)) {
      classes <- table(result$integrated[[nutrient]][[paste0("combined_", toupper(substr(nutrient, 1, 1)), "_class")]])
    } else {
      classes <- table(result$agricultural[[paste0(toupper(substr(nutrient, 1, 1)), "_class")]])
    }
    
    trend_data[[as.character(year)]] <- list(
      year = year,
      wwtp_included = use_wwtp,
      classes = classes,
      source_count = classes["Source"] %||% 0,
      sink_count = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE)
    )
  }
  
  return(trend_data)
}

# Analyze nitrogen trends
nitrogen_trends <- analyze_trends("huc8", 2008:2016, "nitrogen")

# Convert to data frame for plotting
trend_df <- do.call(rbind, lapply(nitrogen_trends, function(x) {
  data.frame(
    Year = x$year,
    WWTP_Included = x$wwtp_included,
    Source_Count = x$source_count,
    Sink_Count = x$sink_count,
    Total_Units = sum(x$classes)
  )
}))

print(trend_df)

Data Export and Integration

Export for Other Software

# Export for GIS software
gis_files <- export_for_gis(
  results,
  output_dir = "gis_export",
  formats = c("shapefile", "geojson")
)

# Export for publication
pub_files <- export_for_publication(
  results,
  output_dir = "publication_export",
  dpi = 600
)

# Export for policy makers
policy_files <- export_for_policy(
  results,
  output_dir = "policy_export"
)

print("Created files:")
print(c(gis_files, pub_files, policy_files))

Integration with Other Packages

# Example: Using with tigris for custom boundaries
if (requireNamespace("tigris", quietly = TRUE)) {
  # Get state boundary
  ohio_boundary <- tigris::states() %>%
    filter(STUSPS == "OH") %>%
    st_transform(5070)  # Transform to analysis CRS
  
  # Spatial filter results
  ohio_watersheds <- st_filter(results$integrated$nitrogen, ohio_boundary)
  
  print(paste("Ohio watersheds:", nrow(ohio_watersheds)))
}

# Example: Using with nhdplusTools for watershed delineation
if (requireNamespace("nhdplusTools", quietly = TRUE)) {
  # Find watershed for a specific point
  point <- st_sfc(st_point(c(-83.0458, 42.3314)), crs = 4326)  # Detroit
  watershed_info <- nhdplusTools::discover_nhdplus_id(point)
  
  # Filter results to this watershed
  if (!is.null(watershed_info$huc8)) {
    watershed_result <- results$integrated$nitrogen %>%
      filter(ID == watershed_info$huc8)
    print(watershed_result)
  }
}

Quality Control

Advanced Validation

# Comprehensive quality check
validation_report <- list()

# Check data completeness
validation_report$completeness <- list(
  total_units = nrow(results$agricultural),
  missing_n_class = sum(is.na(results$agricultural$N_class)),
  excluded_percent = sum(results$agricultural$N_class == "Excluded", na.rm = TRUE) / 
                    nrow(results$agricultural) * 100
)

# Check balance calculations
if ("integrated" %in% names(results)) {
  n_data <- results$integrated$nitrogen
  validation_report$balance_check <- list(
    impossible_sources = sum(n_data$combined_N_surplus <= 0 & 
                            n_data$combined_N_class == "Source", na.rm = TRUE),
    impossible_sinks = sum(n_data$combined_N_surplus > 0 & 
                          n_data$combined_N_class %in% c("Sink_Deficit", "Sink_Fertilizer"), na.rm = TRUE)
  )
}

# Check spatial validity
if (inherits(results$agricultural, "sf")) {
  validation_report$spatial_check <- list(
    invalid_geometries = sum(!st_is_valid(results$agricultural)),
    crs = st_crs(results$agricultural)$input
  )
}

print("Validation Report:")
print(str(validation_report))

Tips for Advanced Users

Performance Tips

# 1. Use appropriate scales
# County: ~3000 units, good for policy analysis
# HUC8: ~2000 units, good for watershed analysis  
# HUC2: ~18 units, good for regional analysis

# 2. Manage memory for large analyses
gc()  # Garbage collection
clear_data_cache()  # Clear package cache

# 3. Use parallel processing for multiple years
# But be careful not to use too many cores

# 4. Save intermediate results
save_spatial_data(results$agricultural, "intermediate_results.rds")

Reproducibility

# Always document your analysis parameters
analysis_params <- list(
  scale = "huc8",
  year = 2016,
  nutrients = "nitrogen", 
  cropland_threshold = 1234,
  include_wwtp = TRUE,
  analysis_date = Sys.Date(),
  package_version = packageVersion("manureshed")
)

# Save parameters with results
results$analysis_params <- analysis_params
save_analysis_summary(results, "analysis_summary.json", format = "json")

This covers the main advanced features. The package is designed to be flexible for power users while remaining accessible for basic analyses.