From 5dcb1f7f537e1719ee7ebb55266c26fb446b6034 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 8 Jan 2025 10:26:22 -0100 Subject: [PATCH 01/57] WIP: geometry lookup --- Project.toml | 8 +- src/Rasters.jl | 4 + src/geometry_lookup.jl | 226 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 src/geometry_lookup.jl diff --git a/Project.toml b/Project.toml index 8ef210f98..c087c7b84 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Flatten = "4c728ea3-d9ee-5c9a-9642-b6f7d7dc04fa" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeometryOps = "3251bfac-6a57-4b6d-aa61-ac1fef2975ab" GeometryOpsCore = "05efe853-fabf-41c8-927e-7063c8b9f013" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" @@ -25,6 +26,7 @@ ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +SortTileRecursiveTree = "746ee33f-1797-42c2-866d-db2fce69d14d" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] @@ -69,8 +71,9 @@ GeoDataFrames = "0.3" GeoFormatTypes = "0.4" GeoInterface = "1.0" GeometryBasics = "0.4" -GeometryOpsCore = "0.1.1" -Makie = "0.20, 0.21" +GeometryOps = "0.1.12" +GeometryOpsCore = "0.1" +Makie = "0.20, 0.21, 0.22" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" OffsetArrays = "1" @@ -83,6 +86,7 @@ Reexport = "0.2, 1.0" SafeTestsets = "0.1" Setfield = "0.6, 0.7, 0.8, 1" Shapefile = "0.10, 0.11" +SortTileRecursiveTree = "0.1.1" Statistics = "1" StatsBase = "0.34" Test = "1" diff --git a/src/Rasters.jl b/src/Rasters.jl index 94af83dfe..b433c7a38 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -21,6 +21,7 @@ import Adapt, FillArrays, Flatten, GeoInterface, + GeometryOps, GeometryOpsCore, OffsetArrays, ProgressMeter, @@ -29,6 +30,7 @@ import Adapt, RecipesBase, Reexport, Setfield, + SortTileRecursiveTree, Statistics Reexport.@reexport using DimensionalData, GeoFormatTypes @@ -76,6 +78,7 @@ export Extent, extent const DD = DimensionalData const DA = DiskArrays const GI = GeoInterface +const GO = GeometryOps const LA = Lookups # DimensionalData documentation urls @@ -108,6 +111,7 @@ include("array.jl") include("stack.jl") include("series.jl") include("crs.jl") +include("geometry_lookup.jl") const RasterStackOrArray = Union{AbstractRasterStack,AbstractRaster} const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack} diff --git a/src/geometry_lookup.jl b/src/geometry_lookup.jl new file mode 100644 index 000000000..4e2a7bca5 --- /dev/null +++ b/src/geometry_lookup.jl @@ -0,0 +1,226 @@ +""" + GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) + + +The other thing I'm thinking of is that we could have a feature collection in `data` so that you can persist per geom attrs + +A lookup type for geometry dimensions in vector data cubes. + +`GeometryLookup` provides efficient spatial indexing and lookup for geometries using an STRtree (Sort-Tile-Recursive tree). +It is used as the lookup type for geometry dimensions in vector data cubes, enabling fast spatial queries and operations. + +It spans the dimensions given to it in `dims`, as well as the dimension it's wrapped in - you would construct a DimArray with a GeometryLookup +like `DimArray(data, Geometry(GeometryLookup(data, dims)))`. Here, `Geometry` is a dimension - but selectors in X and Y will also eventually work! + + +# Examples + +```julia +using Rasters + +using NaturalEarth +import GeometryOps as GO + +# construct the polygon lookup +polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry +polygon_lookup = GeometryLookup(polygons, (X(), Y())) + +# create a DimArray with the polygon lookup +dv = rand(Geometry(polygon_lookup)) + +# select the polygon with the centroid of the 88th polygon +dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true +``` + +""" +struct GeometryLookup{T, D} <: Lookups.Lookup{T, 1} + data::Vector{T} + tree::SortTileRecursiveTree.STRtree + dims::D +end + +function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) + # First, retrieve the geometries - from a table, vector of geometries, etc. + geometries = _get_geometries(data, geometrycolumn) + # Check that the geometries are actually geometries + if any(!GI.isgeometry, geometries) + throw(ArgumentError(""" + The collection passed in to `GeometryLookup` has some elements that are not geometries + (`GI.isgeometry(x) == false` for some `x` in `data`). + """)) + end + # Make sure there are only two dimensions + if length(dims) != 2 + throw(ArgumentError(""" + The `dims` argument to `GeometryLookup` must have two dimensions, but it has $(length(dims)) dimensions (`$(dims)`). + Please make sure that it has only two dimensions, like `(X(), Y())`. + """)) + end + # Build the lookup accelerator tree + tree = SortTileRecursiveTree.STRtree(geometries) + GeometryLookup(geometries, tree, dims) +end + +DD.dims(l::GeometryLookup) = l.dims +DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims + +DD.order(::GeometryLookup) = Lookups.Unordered() + +DD.parent(lookup::GeometryLookup) = lookup.data + +DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l + +function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nothing, dims = lookup.dims) + new_tree = if data == lookup.data + lookup.tree + else + SortTileRecursiveTree.STRtree(data) + end + GeometryLookup(data, new_tree, dims) +end + +# Return an `Int` or Vector{Bool} +Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) = + selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K + dimsel = map(rebuild, map(name2dim, K), values(sel)) + selectindices(lookup, dimsel) +end +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) + if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) + i = findfirst(x -> all(map(_matches, sel, x)), lookup) + isnothing(i) && _coord_not_found_error(sel) + return i + else + return [_matches(sel, x) for x in lookup] + end +end + +function _maybe_get_candidates(tree, selector_extent) + if Extents.disjoint(tree.rootnode.extent, selector_extent) + return error(""" + The geometry with extent $(GI.extent(val(sel))) is outside of the extent of the geometry lookup. + + The geometry lookup has extent $(GI.extent(tree.rootnode.extent)) + """) + end + potential_candidates = SortTileRecursiveTree.query(tree, selector_extent) + + isempty(potential_candidates) && return error(""" + The geometry with extent $(GI.extent(val(sel))) does not interact with any of the geometries in the lookup. + """) + + return potential_candidates +end + +function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) + lookup_ext = lookup.tree.rootnode.extent + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup.tree, sel_ext) + + for candidate in potential_candidates + if GO.contains(lookup.data[candidate], val(sel)) + return candidate + end + end + return error(""" + The geometry with extent $(GI.extent(val(sel))) is not contained by any of the geometries in the lookup. + """) +end + + +function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) + lookup_ext = lookup.tree.rootnode.extent + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup.tree, sel_ext) + + for candidate in potential_candidates + if GO.intersects(lookup.data[candidate], val(sel)) + return candidate + end + end + return error(""" + The geometry with extent $(GI.extent(val(sel))) does not touch any of the geometries in the lookup. + """) +end + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains}, Union{<: At, <: Contains}}) + xval, yval = val(xs), val(ys) + + lookup_ext = lookup.tree.rootnode.extent + + if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] + potential_candidates = SortTileRecursiveTree.query(lookup.tree, (xval, yval)) + if isempty(potential_candidates) + return error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) # no geometry intersects with it + else + for candidate in potential_candidates + if GO.contains(lookup.data[candidate], (xval, yval)) + return candidate + end + end + return error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) # no geometry intersects with it + end + else + return error(""" + The point ($xval, $yval) is outside of the extent of the geometry lookup. + """) # outside of extent / geometrylookup bbox + end +end + +@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) + +function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) + print(io, " ") + show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) +end + +# Dimension methods + +@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = + rebuild(dim, [map(x -> zero(x), dim.val[1])]) + +function DD._format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) + checkaxis(dim, axis) + return dim +end + +# Local functions +_val_or_nothing(::Nothing) = nothing +_val_or_nothing(d::DD.Dimension) = val(d) + +#= + + + +DD.@dim Geometry + +using NaturalEarth +polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + +polygon_lookup = GeometryLookup(polygons, SortTileRecursiveTree.STRtree(polygons), (X(), Y())) + +dv = rand(Geometry(polygon_lookup)) + + +@test_throws ErrorException dv[Geometry=(X(At(1)), Y(At(2)))] +@test dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] + + + + + +Geometry(Where(GO.contains(geom2))) + +X(At(1)), Y(At(2)), Ti(Near(2010)) + + +# TODO: +# metadata for crs +# +=# \ No newline at end of file From 4bccf4092488317a6987d9e96c96b309f9fad373 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 8 Jan 2025 23:38:30 -0100 Subject: [PATCH 02/57] Multi/polygon writing --- src/geometry_lookup.jl | 122 +++++++++++++++++++++++++++++++++++------ 1 file changed, 104 insertions(+), 18 deletions(-) diff --git a/src/geometry_lookup.jl b/src/geometry_lookup.jl index 4e2a7bca5..98d017af1 100644 --- a/src/geometry_lookup.jl +++ b/src/geometry_lookup.jl @@ -2,8 +2,6 @@ GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) -The other thing I'm thinking of is that we could have a feature collection in `data` so that you can persist per geom attrs - A lookup type for geometry dimensions in vector data cubes. `GeometryLookup` provides efficient spatial indexing and lookup for geometries using an STRtree (Sort-Tile-Recursive tree). @@ -61,6 +59,14 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) GeometryLookup(geometries, tree, dims) end +#= + +## DD methods for the lookup + +Here we define DimensionalData's methods for the lookup. +This is broadly standard except for the `rebuild` method, which is used to update the tree accelerator when the data changes. +=# + DD.dims(l::GeometryLookup) = l.dims DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims @@ -70,6 +76,7 @@ DD.parent(lookup::GeometryLookup) = lookup.data DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l +# Make sure that the tree is rebuilt if the data changes function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nothing, dims = lookup.dims) new_tree = if data == lookup.data lookup.tree @@ -79,6 +86,15 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nothing, GeometryLookup(data, new_tree, dims) end +#= +## Lookups methods + +Here we define the methods for the Lookups API. +The main entry point is `selectindices`, which is used to select the indices of the geometries that match the selector. + +We need to define methods that take selectors and convert them to extents, then GeometryOps needs +=# + # Return an `Int` or Vector{Bool} Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) = selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) @@ -97,6 +113,11 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) end end +""" + _maybe_get_candidates(tree, selector_extent) + +Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. +""" function _maybe_get_candidates(tree, selector_extent) if Extents.disjoint(tree.rootnode.extent, selector_extent) return error(""" @@ -115,8 +136,7 @@ function _maybe_get_candidates(tree, selector_extent) end function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) - lookup_ext = lookup.tree.rootnode.extent - sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup.tree, sel_ext) for candidate in potential_candidates @@ -194,33 +214,99 @@ end _val_or_nothing(::Nothing) = nothing _val_or_nothing(d::DD.Dimension) = val(d) -#= +# I/O utils +function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) + n_points_per_geom_vec = GI.npoint.(geoms) + total_n_points = sum(n_points_per_geom_vec) + # Create a vector of the total number of points + xs = fill(0.0, total_n_points) + ys = fill(0.0, total_n_points) -DD.@dim Geometry + ngeoms = length(geoms) + nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0) -using NaturalEarth -polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + node_count_vec = fill(0, ngeoms) + part_node_count_vec = fill(0, nrings) + interior_ring_vec = fill(0, nrings) -polygon_lookup = GeometryLookup(polygons, SortTileRecursiveTree.STRtree(polygons), (X(), Y())) + current_xy_index = 1 + current_ring_index = 1 -dv = rand(Geometry(polygon_lookup)) + for (i, geom) in enumerate(geoms) + + this_geom_npoints = GI.npoint(geom) + node_count_vec[i] = this_geom_npoints + + # push individual components of the ring + for poly in GO.flatten(GI.PolygonTrait, geom) + exterior_ring = GI.getexterior(poly) + for point in GI.getpoint(exterior_ring) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring) + interior_ring_vec[current_ring_index] = 0 + current_ring_index += 1 + + if GI.nring(poly) == 1 + continue + else + for hole in GI.gethole(poly) + for point in GI.getpoint(hole) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + part_node_count_vec[current_ring_index] = GI.npoint(hole) + interior_ring_vec[current_ring_index] = 1 + current_ring_index += 1 + end + end + end + end + return xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec + +end -@test_throws ErrorException dv[Geometry=(X(At(1)), Y(At(2)))] -@test dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] +function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) + error("Not implemented yet") +end +function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) + error("Not implemented yet") +end +function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec) + current_xy_index = 1 + current_ring_index = 1 + current_geom_index = 1 + geoms = Vector{GO.GeometryBasics.MultiPolygon{2, Float64}}(undef, length(node_count_vec)) -Geometry(Where(GO.contains(geom2))) + for (i, npoints) in enumerate(node_count_vec) -X(At(1)), Y(At(2)), Ti(Near(2010)) + this_geom_npoints = npoints + # this_geom_nholes = + end +end -# TODO: -# metadata for crs -# -=# \ No newline at end of file +# total_area_of_intersection = 0.0 +# current_area_of_intersection = 0.0 +# last_point = nothing +# apply_with_signal(trait, geom) do subgeom, state +# if state == :start +# total_area_of_intersection += current_area_of_intersection +# current_area_of_intersection = 0.0 +# last_point = nothing +# elseif state == :continue +# # shoelace formula for this point +# elseif state == :end +# # finish off the shoelace formula +# end +# end \ No newline at end of file From 67179029d6884f31a358c76b63a6ba9436540a62 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 9 Jan 2025 00:00:36 -0100 Subject: [PATCH 03/57] Add crs, reproject --- src/geometry_lookup.jl | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/geometry_lookup.jl b/src/geometry_lookup.jl index 98d017af1..f070928fd 100644 --- a/src/geometry_lookup.jl +++ b/src/geometry_lookup.jl @@ -31,13 +31,14 @@ dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true ``` """ -struct GeometryLookup{T, D} <: Lookups.Lookup{T, 1} +struct GeometryLookup{T, D, CRS} <: Lookups.Lookup{T, 1} data::Vector{T} tree::SortTileRecursiveTree.STRtree dims::D + crs::CRS end -function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) +function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nothing) # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) # Check that the geometries are actually geometries @@ -56,7 +57,11 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) end # Build the lookup accelerator tree tree = SortTileRecursiveTree.STRtree(geometries) - GeometryLookup(geometries, tree, dims) + + true_crs = isnothing(crs) ? GI.crs(first(geometries)) : crs + + + GeometryLookup(geometries, tree, dims, crs) end #= @@ -68,6 +73,7 @@ This is broadly standard except for the `rebuild` method, which is used to updat =# DD.dims(l::GeometryLookup) = l.dims + DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims DD.order(::GeometryLookup) = Lookups.Unordered() @@ -77,13 +83,19 @@ DD.parent(lookup::GeometryLookup) = lookup.data DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l # Make sure that the tree is rebuilt if the data changes -function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nothing, dims = lookup.dims) +function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw) new_tree = if data == lookup.data lookup.tree else SortTileRecursiveTree.STRtree(data) end - GeometryLookup(data, new_tree, dims) + new_crs = if isnokw(crs) + GI.crs(first(data)) + else + crs + end + + GeometryLookup(data, new_tree, dims, new_crs) end #= @@ -215,6 +227,17 @@ _val_or_nothing(::Nothing) = nothing _val_or_nothing(d::DD.Dimension) = val(d) +#= +## Reproject + +Reproject just forwards to `GO.reproject`. +=# + +function reproject(target::GeoFormat, l::GeometryLookup) + source = crs(l) + return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true)) +end + # I/O utils function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) n_points_per_geom_vec = GI.npoint.(geoms) From 8d6f6705281bb085b240db801b2bdbb8695be414 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Fri, 10 Jan 2025 09:44:52 -0100 Subject: [PATCH 04/57] Hook up CRS, export GeometryLookup --- src/Rasters.jl | 2 +- src/geometry_lookup.jl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Rasters.jl b/src/Rasters.jl index b433c7a38..323e4b6fb 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -64,7 +64,7 @@ export Planar, Spherical export AbstractRaster, Raster export AbstractRasterStack, RasterStack export AbstractRasterSeries, RasterSeries -export Projected, Mapped +export Projected, Mapped, GeometryLookup export Band export missingval, boolmask, missingmask, replace_missing, replace_missing!, aggregate, aggregate!, disaggregate, disaggregate!, mask, mask!, diff --git a/src/geometry_lookup.jl b/src/geometry_lookup.jl index f070928fd..aa77fd28c 100644 --- a/src/geometry_lookup.jl +++ b/src/geometry_lookup.jl @@ -64,6 +64,8 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = GeometryLookup(geometries, tree, dims, crs) end +crs(l::GeometryLookup) = l.crs + #= ## DD methods for the lookup From b614376252165bf66b7bbf89f2935ebf069322be Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Fri, 10 Jan 2025 09:45:32 -0100 Subject: [PATCH 05/57] add super basic tests --- test/geometry_lookup.jl | 31 +++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 32 insertions(+) create mode 100644 test/geometry_lookup.jl diff --git a/test/geometry_lookup.jl b/test/geometry_lookup.jl new file mode 100644 index 000000000..0107e54b0 --- /dev/null +++ b/test/geometry_lookup.jl @@ -0,0 +1,31 @@ +using NaturalEarth +using Test + +using Rasters, DimensionalData +import Proj + +@testset "construction" begin + # fetch land polygons from Natural Earth + land_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + # create a DimVector from the land polygons + gl = GeometryLookup(land_polygons) + @test crs(gl) == EPSG(4326) + @test all(GO.equals, zip(val(gl), land_polygons)) +end + +@testset "reprojecting a GeometryLookup" begin + # fetch land polygons from Natural Earth + land_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + # create a DimVector from the land polygons + dv = Raster(rand(Dim{:Geometry}(GeometryLookup(land_polygons)))) + # reproject the full vector data cube (vector data vector, in this case :D) + target_crs = ProjString("+proj=wintri +type=crs") + reprojected_via_rasters = val(dims(reproject(target_crs, dv), Dim{:Geometry})) + reprojected_via_geometryops = GO.reproject(land_polygons; source_crs = EPSG(4326), target_crs = target_crs) + # test that the reprojected geometries are the same + @test all(splat(GO.equals), zip( + reprojected_via_rasters, # reproject the vdc, get the geometries from it + reprojected_via_geometryops # reproject the geometries directly + ) + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index e4337545c..9f9dfe266 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,6 +22,7 @@ end @time @safetestset "reproject" begin include("reproject.jl") end @time @safetestset "warp" begin include("warp.jl") end @time @safetestset "cellarea" begin include("cellarea.jl") end +@time @safetestset "geometrylookups" begin include("geometry_lookups.jl") end # CommondataModel sources @time @safetestset "commondatamodel" begin include("sources/commondatamodel.jl") end From ab7d427fdc4cc4c9d18b6a479e0952cec85debdc Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 11:23:55 -0400 Subject: [PATCH 06/57] move geometry_lookup to a folder --- src/Rasters.jl | 3 +- src/{ => geometry_lookup}/geometry_lookup.jl | 39 ++++++++++++++------ 2 files changed, 30 insertions(+), 12 deletions(-) rename src/{ => geometry_lookup}/geometry_lookup.jl (91%) diff --git a/src/Rasters.jl b/src/Rasters.jl index 6926737f6..1b9c00a06 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -115,7 +115,6 @@ include("array.jl") include("stack.jl") include("series.jl") include("crs.jl") -include("geometry_lookup.jl") const RasterStackOrArray = Union{AbstractRasterStack,AbstractRaster} const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack} @@ -123,6 +122,8 @@ const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack} include("utils.jl") include("skipmissing.jl") +include("geometry_lookup/geometry_lookup.jl") + include("table_ops.jl") include("create.jl") include("read.jl") diff --git a/src/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl similarity index 91% rename from src/geometry_lookup.jl rename to src/geometry_lookup/geometry_lookup.jl index aa77fd28c..b47cb2649 100644 --- a/src/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -31,9 +31,10 @@ dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true ``` """ -struct GeometryLookup{T, D, CRS} <: Lookups.Lookup{T, 1} - data::Vector{T} - tree::SortTileRecursiveTree.STRtree +struct GeometryLookup{A <: AbstractVector, D, M <: GO.Manifold, Tree, CRS} <: Lookups.Lookup{A, 1} + manifold::M + data::A + tree::Tree dims::D crs::CRS end @@ -60,8 +61,8 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = true_crs = isnothing(crs) ? GI.crs(first(geometries)) : crs - - GeometryLookup(geometries, tree, dims, crs) + # TODO: auto manifold detection and best tree type for that manifold + GeometryLookup(GO.Planar(), geometries, tree, dims, crs) end crs(l::GeometryLookup) = l.crs @@ -85,9 +86,19 @@ DD.parent(lookup::GeometryLookup) = lookup.data DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l # Make sure that the tree is rebuilt if the data changes -function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw) - new_tree = if data == lookup.data - lookup.tree +function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw, manifold = nokw) + new_tree = if isnokw(tree) + if data == lookup.data + lookup.tree + else + SortTileRecursiveTree.STRtree(data) + end + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa Type || + tree(data) + else + tree + end else SortTileRecursiveTree.STRtree(data) end @@ -97,7 +108,13 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim crs end - GeometryLookup(data, new_tree, dims, new_crs) + new_manifold = if isnokw(manifold) + lookup.manifold + else + manifold + end + + GeometryLookup(new_manifold, data, new_tree, dims, new_crs) end #= @@ -140,7 +157,7 @@ function _maybe_get_candidates(tree, selector_extent) The geometry lookup has extent $(GI.extent(tree.rootnode.extent)) """) end - potential_candidates = SortTileRecursiveTree.query(tree, selector_extent) + potential_candidates = GO.SpatialTreeInterface.query(tree, Base.Fix1(Extents.intersects, selector_extent)) isempty(potential_candidates) && return error(""" The geometry with extent $(GI.extent(val(sel))) does not interact with any of the geometries in the lookup. @@ -185,7 +202,7 @@ function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: lookup_ext = lookup.tree.rootnode.extent if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] - potential_candidates = SortTileRecursiveTree.query(lookup.tree, (xval, yval)) + potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) if isempty(potential_candidates) return error(""" The point ($xval, $yval) is not within any of the geometries in the lookup. From 1c55155355d7485ef38b6f606ca90dcfa80f01af Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 11:24:26 -0400 Subject: [PATCH 07/57] add overrides for zonal when it's passed a GeometryLookup or dim that wraps that --- src/Rasters.jl | 1 + src/geometry_lookup/methods.jl | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 src/geometry_lookup/methods.jl diff --git a/src/Rasters.jl b/src/Rasters.jl index 1b9c00a06..835d867de 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -123,6 +123,7 @@ include("utils.jl") include("skipmissing.jl") include("geometry_lookup/geometry_lookup.jl") +include("geometry_lookup/methods.jl") include("table_ops.jl") include("create.jl") diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl new file mode 100644 index 000000000..13705525e --- /dev/null +++ b/src/geometry_lookup/methods.jl @@ -0,0 +1,28 @@ + +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) + return _zonal(f, x, nothing, Dim{:Geometry}(data); kw...) +end +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dim{Name, <: GeometryLookup}; + progress=true, threaded=true, geometrycolumn=nothing, kw... +) where Name + geoms = data.val.data + # TODO: deliberately filter geoms based on extent and tree + # so that we don't waste time calling `mask` on lots of geometries + # that are outside the extent of the raster + # but that is for later. + n = length(geoms) + n == 0 && return [] + zs, start_index = _alloc_zonal(f, x, geoms, n; kw...) + start_index == n + 1 && return zs + _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i + zs[i] = _zonal(f, x, geoms[i]; kw...) + end + @show typeof(zs) + if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} + println("Got a vector of DimArrays") + return cat(zs...; dims = data) + else + println("Got a vector of other things") + return Raster(zs, (data,)) + end +end From 55cb66da17df28ea35d6898e71c9f04f65f90b38 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 11:34:55 -0400 Subject: [PATCH 08/57] make zonal use the fast-path missing return only if skipmissing = true This way we can force skipmissing=false and manually do it, if we want e.g. vector data cubes. --- src/methods/zonal.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index f522a4ad2..c92ecc65a 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -81,7 +81,9 @@ _zonal(f, x::Raster, of::Extents.Extent; skipmissing) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing) cropped = crop(x; to=ext, touches=true) - length(cropped) > 0 || return missing + if length(cropped) == 0 && skipmissing == true + return map(_ -> missing, x) + end return maplayers(cropped) do A _maybe_skipmissing_call(f, A, skipmissing) end @@ -97,7 +99,9 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; skipmissing, kw... ) cropped = crop(x; to=geom, touches=true) - length(cropped) > 0 || return missing + if length(cropped) == 0 && skipmissing == true + return missing + end masked = mask(cropped; with=geom, kw...) return _maybe_skipmissing_call(f, masked, skipmissing) end @@ -105,10 +109,14 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; skipmissing, kw... ) cropped = crop(st; to=geom, touches=true) - length(cropped) > 0 || return map(_ -> missing, st) + if length(cropped) == 0 && skipmissing == true + return map(_ -> missing, st) + end masked = mask(cropped; with=geom, kw...) return maplayers(masked) do A - length(A) > 0 || return missing + if length(A) == 0 && skipmissing == true + return missing + end _maybe_skipmissing_call(f, A, skipmissing) end end From 5f6b3686ef3f16da0274a2decbf7a42be11ce555 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 15:13:40 -0400 Subject: [PATCH 09/57] implement automatic spatial slicing for datacubes with style and ease --- src/methods/zonal.jl | 63 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index c92ecc65a..e5f8a33e6 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -23,6 +23,11 @@ These can be used when `of` is or contains (a) GeoInterface.jl compatible object `f` will be passed an iterator over the values, which loses all spatial information. if `false` `f` will be passes a masked `Raster` or `RasterStack`, and will be responsible for handling missing values itself. The default value is `true`. +- `spatialslices`: if `true`, and the input Raster has more dimensions than `X` and `Y`, then we will + apply the function `f` to each spatial slice of the raster (literally, `mapslices(f, x; dims = (X, Y))`), + and return a vector data cube or stack of the results. + if `false`, then we will apply `f` to the full cropped raster, and return a vector of results (one per geometry) + as usual. # Example @@ -60,15 +65,15 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=dims(x, (Val{DD.XDim}(), Val{DD.YDim}())), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. - _zonal(f, _prepare_for_burning(x), of; skipmissing, kw...) + _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=dims(x, (Val{DD.XDim}(), Val{DD.YDim}())), kw...) open(x) do xo - _zonal(f, _prepare_for_burning(xo), of; skipmissing, kw...) + _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, kw...) end end @@ -77,15 +82,14 @@ _zonal(f, x::RasterStackOrArray, of::RasterStackOrArray; kw...) = _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = _zonal(f, x, Extents.extent(of); kw...) # We don't need to `mask` with an extent, it's square so `crop` will do enough. -_zonal(f, x::Raster, of::Extents.Extent; skipmissing) = - _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing) +_zonal(f, x::Raster, of::Extents.Extent; skipmissing, spatialslices) = _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), crop(x; to=of, touches=true), skipmissing) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing, spatialslices) cropped = crop(x; to=ext, touches=true) if length(cropped) == 0 && skipmissing == true return map(_ -> missing, x) end return maplayers(cropped) do A - _maybe_skipmissing_call(f, A, skipmissing) + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), A, skipmissing) end end # Otherwise of is a geom, table or vector @@ -96,17 +100,17 @@ _zonal(f, x, ::GI.AbstractFeatureCollectionTrait, fc; kw...) = _zonal(f, x::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) = _zonal(f, x, GI.geometry(feature); kw...) function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; - skipmissing, kw... + skipmissing, spatialslices, kw... ) cropped = crop(x; to=geom, touches=true) if length(cropped) == 0 && skipmissing == true return missing end masked = mask(cropped; with=geom, kw...) - return _maybe_skipmissing_call(f, masked, skipmissing) + return _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing, kw... + skipmissing, spatialslices, kw... ) cropped = crop(st; to=geom, touches=true) if length(cropped) == 0 && skipmissing == true @@ -117,7 +121,7 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; if length(A) == 0 && skipmissing == true return missing end - _maybe_skipmissing_call(f, A, skipmissing) + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), A, skipmissing) end end function _zonal(f, x::RasterStackOrArray, ::Nothing, data; @@ -156,3 +160,38 @@ function _alloc_zonal(f, x, geoms, n; kw...) end _maybe_skipmissing_call(f, A, sm) = sm ? f(skipmissing(A)) : f(A) + +function _mapspatialslices(f, x::AbstractArray{T, N}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) where {T, N} + iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) + return f.(iterator) +end + +function _mapspatialslices(f, s::SkipMissingVal; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) + x = s.x # get the raster out of the SkipMissingVal + iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) + return @. f(skipmissing(iterator)) +end + +_maybe_spatialsliceify(f, spatialslices::Bool) = spatialslices ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}())) : f +_maybe_spatialsliceify(f, spatialslices::DD.AllDims) = _SpatialSliceify(f, spatialslices) + +""" + _SpatialSliceify(f, dims) + +A callable struct that applies `mapslices(f, x; dims = spatialdims)` to the input array `x`, and removes empty dimensions. + +```jldoctest +data = ones(10, 10, 10, 10); +f = _SpatialSliceify(sum, (1, 2)) +size(f(data)) + +# output +(10, 10) +``` +""" +struct _SpatialSliceify{F, D} + f::F + dims::D +end + +(r::_SpatialSliceify{F, D})(x) where {F, D} = _mapspatialslices(r.f, x; spatialdims = r.dims) From 09d8ac3ce29c944d268007768dccf368f3d6e9fc Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 15:39:24 -0400 Subject: [PATCH 10/57] add comments, set `spatialslices` to `_True` initially for assured const --- src/methods/zonal.jl | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index e5f8a33e6..b73e0605d 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -65,13 +65,13 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=dims(x, (Val{DD.XDim}(), Val{DD.YDim}())), kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, spatialslices=dims(x, (Val{DD.XDim}(), Val{DD.YDim}())), kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), kw...) open(x) do xo _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, kw...) end @@ -159,20 +159,26 @@ function _alloc_zonal(f, x, geoms, n; kw...) return zs, n_missing + 1 end -_maybe_skipmissing_call(f, A, sm) = sm ? f(skipmissing(A)) : f(A) +# Optionally wrap the input argument in `skipmissing(A)` is `sm` is true. +_maybe_skipmissing_call(f, A, sm) = istrue(sm) ? f(skipmissing(A)) : f(A) -function _mapspatialslices(f, x::AbstractArray{T, N}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) where {T, N} +# the only reason we have AbstractDimArray here is to make sure that DD.otherdims is available. +# We could probably get away with just AbstractArray here otherwise. +# The reason this is not just mapslices is because this drops the sliced dimensions automatically, +# which is what we want. +function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) return f.(iterator) end - -function _mapspatialslices(f, s::SkipMissingVal; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) +# SkipMissingVal and SkipMissing both store the initial value in the `x` property, +# so we can use the same thing to extract it. +function _mapspatialslices(f, s::Union{SkipMissingVal, Base.SkipMissing}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) x = s.x # get the raster out of the SkipMissingVal iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) return @. f(skipmissing(iterator)) end -_maybe_spatialsliceify(f, spatialslices::Bool) = spatialslices ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}())) : f +_maybe_spatialsliceify(f, spatialslices) = istrue(spatialslices) ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}())) : f _maybe_spatialsliceify(f, spatialslices::DD.AllDims) = _SpatialSliceify(f, spatialslices) """ From 3757a35c2e1e6d271d242e97018922c804281a6b Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 15:39:37 -0400 Subject: [PATCH 11/57] fix geomlookup method --- src/geometry_lookup/geometry_lookup.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index b47cb2649..fc810a4aa 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -94,7 +94,7 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim SortTileRecursiveTree.STRtree(data) end elseif GO.SpatialTreeInterface.isspatialtree(tree) - if tree isa Type || + if tree isa DataType tree(data) else tree From aa1bcc5f8fbda2746e45d9de6ef5bb98cf439e37 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 15:51:35 -0400 Subject: [PATCH 12/57] wip fancy rebuild / dim detection --- src/geometry_lookup/methods.jl | 41 +++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index 13705525e..a094bd16b 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -1,28 +1,47 @@ -function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) +@noinline function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) return _zonal(f, x, nothing, Dim{:Geometry}(data); kw...) end -function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dim{Name, <: GeometryLookup}; - progress=true, threaded=true, geometrycolumn=nothing, kw... +@noinline function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dim{Name, <: GeometryLookup}; + progress=true, threaded=true, geometrycolumn=nothing, spatialslices, kw... ) where Name geoms = data.val.data # TODO: deliberately filter geoms based on extent and tree # so that we don't waste time calling `mask` on lots of geometries # that are outside the extent of the raster # but that is for later. + + if istrue(spatialslices) && data.val.dims != (X(), Y()) && dims(x, data.val.dims) != dims(x, (Val{DD.XDim}(), Val{DD.YDim}())) + spatialslices = dims(x, data.val.dims) # use the dimensions in the geometry lookup! + end + n = length(geoms) n == 0 && return [] - zs, start_index = _alloc_zonal(f, x, geoms, n; kw...) + zs, start_index = _alloc_zonal(f, x, geoms, n; spatialslices, kw...) start_index == n + 1 && return zs _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i zs[i] = _zonal(f, x, geoms[i]; kw...) end - @show typeof(zs) - if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} - println("Got a vector of DimArrays") - return cat(zs...; dims = data) + + return_lookup_dims = if istrue(spatialslices) + dims(data, (Val{DD.XDim}(), Val{DD.YDim}())) + elseif spatialslices isa DD.AllDims + dims(data, spatialslices) + else # fallback + (X(), Y()) + end + + return_lookup = rebuild(data.val; dims = return_lookup_dims) + + return_dimension = rebuild(data, return_lookup) + + if zs isa AbstractVector{<: Union{<: AbstractDimArray, <: AbstractDimStack}} + backing_array = __do_cat_with_last_dim(zs) + z_dims = dims(first(zs)) + new_dims = (z_dims..., return_dimension) + return rebuild(x; data = backing_array, dims = new_dims) else - println("Got a vector of other things") - return Raster(zs, (data,)) + # TODO: how should we reconstruct a rasterstack from a vector of named tuples? + return Raster(zs, (return_dimension,)) end -end +end \ No newline at end of file From debb290fd922ed9dcd3a54eeb337034cfe1199f0 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:01:22 -0400 Subject: [PATCH 13/57] implement isfalse so that we can check that the thing is explicitly false, this is quite useful for spatialslice because you can provide anything but false there, and it assumes true --- src/utils.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/utils.jl b/src/utils.jl index fb2329544..81b9fae3c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -553,6 +553,13 @@ istrue(::_True) = true istrue(::_False) = false istrue(::Type{_True}) = true istrue(::Type{_False}) = false +istrue(x) = x == true + +isfalse(::_True) = false +isfalse(::_False) = true +isfalse(::Type{_True}) = false +isfalse(::Type{_False}) = true +isfalse(x) = x == false # skipinvalid: can G and I be missing. skipmissing: can nametypes be missing _rowtype(x, g, args...; kw...) = _rowtype(x, typeof(g), args...; kw...) From 16ef85b8cd5ee1d8cc0557f84c10f6c36bc3fee7 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:15:58 -0400 Subject: [PATCH 14/57] add a missingval kwarg so we can fix things correctly --- src/methods/zonal.jl | 149 ++++++++++++++++++++++++++++++------------- 1 file changed, 105 insertions(+), 44 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index b73e0605d..4fb42161a 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -65,15 +65,15 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), missingval = missingval(x), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. - _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, kw...) + _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, missingval, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), missingval = missingval(x), kw...) open(x) do xo - _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, kw...) + _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, missingval, kw...) end end @@ -82,14 +82,14 @@ _zonal(f, x::RasterStackOrArray, of::RasterStackOrArray; kw...) = _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = _zonal(f, x, Extents.extent(of); kw...) # We don't need to `mask` with an extent, it's square so `crop` will do enough. -_zonal(f, x::Raster, of::Extents.Extent; skipmissing, spatialslices) = _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing, spatialslices) +_zonal(f, x::Raster, of::Extents.Extent; skipmissing, spatialslices, missingval) = _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), crop(x; to=of, touches=true), skipmissing) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing, spatialslices, missingval) cropped = crop(x; to=ext, touches=true) if length(cropped) == 0 && skipmissing == true - return map(_ -> missing, x) + return map(_ -> missingval, x) end return maplayers(cropped) do A - _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), A, skipmissing) + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), A, skipmissing) end end # Otherwise of is a geom, table or vector @@ -100,63 +100,97 @@ _zonal(f, x, ::GI.AbstractFeatureCollectionTrait, fc; kw...) = _zonal(f, x::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...) = _zonal(f, x, GI.geometry(feature); kw...) function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; - skipmissing, spatialslices, kw... + skipmissing, spatialslices, missingval, kw... ) cropped = crop(x; to=geom, touches=true) - if length(cropped) == 0 && skipmissing == true - return missing + masked = if length(cropped) == 0 + if istrue(skipmissing) && isfalse(spatialslices) + return missingval + end + cropped # don't mask if we know there is nothing to mask, otherwise it errors + else + mask(cropped; with=geom, kw...) end - masked = mask(cropped; with=geom, kw...) - return _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), masked, skipmissing) + return _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing, spatialslices, kw... + skipmissing, spatialslices, missingval, kw... ) cropped = crop(st; to=geom, touches=true) - if length(cropped) == 0 && skipmissing == true - return map(_ -> missing, st) + masked = if length(cropped) == 0 + if istrue(skipmissing) && isfalse(spatialslices) + return map(_ -> missingval, st) + end + cropped # don't mask if we know there is nothing to mask, otherwise it errors + else + mask(cropped; with=geom, kw...) end - masked = mask(cropped; with=geom, kw...) return maplayers(masked) do A - if length(A) == 0 && skipmissing == true - return missing + if length(A) == 0 && (istrue(skipmissing) && isfalse(spatialslices)) + return missingval end - _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices), A, skipmissing) + _maybe_skipmissing_call(_maybe_spatialsliceify(f, spatialslices, missingval), A, skipmissing) end end function _zonal(f, x::RasterStackOrArray, ::Nothing, data; - progress=true, threaded=true, geometrycolumn=nothing, kw... + progress=true, threaded=true, geometrycolumn=nothing, missingval, kw... ) geoms = _get_geometries(data, geometrycolumn) n = length(geoms) n == 0 && return [] - zs, start_index = _alloc_zonal(f, x, geoms, n; kw...) + zs, start_index = _alloc_zonal(f, x, geoms, n; missingval, kw...) start_index == n + 1 && return zs _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i - zs[i] = _zonal(f, x, geoms[i]; kw...) + zs[i] = _zonal(f, x, geoms[i]; missingval, kw...) + end + if zs isa Vector{<: Union{<: AbstractDimArray, <: AbstractDimStack}} + return end return zs end -function _alloc_zonal(f, x, geoms, n; kw...) - # Find first non-missing entry and count number of missing entries +function _alloc_zonal(f, x, geoms, n; spatialslices = _True(), missingval, kw...) n_missing::Int = 0 - z1 = _zonal(f, x, first(geoms); kw...) - for geom in geoms - z1 = _zonal(f, x, geom; kw...) - if !ismissing(z1) - break + if isfalse(spatialslices) + # Find first non-missing entry and count number of missing entries + z1 = _zonal(f, x, first(geoms); spatialslices, missingval, kw...) + for geom in geoms + z1 = _zonal(f, x, geom; spatialslices, missingval, kw...) + if !(ismissing(z1) || z1 == missingval) + break + end + n_missing += 1 end - n_missing += 1 - end - zs = Vector{Union{Missing,typeof(z1)}}(undef, n) - zs[1:n_missing] .= missing - # Exit early when all elements are missing - if n_missing == n + zs = Vector{Union{typeof(missingval), typeof(z1)}}(undef, n) + zs[1:n_missing] .= missingval + # Exit early when all elements are missing + if n_missing == n + return zs, n_missing + 1 + end + zs[n_missing + 1] = z1 + return zs, n_missing + 1 + else # spatialslices is true, we know we need an output raster + z1 = _zonal(f, x, first(geoms); spatialslices, missingval, kw...) + _missing_array = z1 + for geom in geoms + z1 = _zonal(f, x, geom; spatialslices, missingval, kw...) + if !all(ismissing, z1) # here, z1 is a raster, so it's fine - but maybe we should have a better check... + break + end + n_missing += 1 + end + # TODO: just bite the bullet and use map. alloc_zonal is a mess. + zs = Vector{Union{typeof(z1), typeof(_missing_array)}}(undef, n) + for i in 1:n + zs[i] = _missing_array + end + # Exit early when all elements are missing + if n_missing == n + return zs, n_missing + 1 + end + zs[n_missing + 1] = z1 return zs, n_missing + 1 end - zs[n_missing + 1] = z1 - return zs, n_missing + 1 end # Optionally wrap the input argument in `skipmissing(A)` is `sm` is true. @@ -166,17 +200,24 @@ _maybe_skipmissing_call(f, A, sm) = istrue(sm) ? f(skipmissing(A)) : f(A) # We could probably get away with just AbstractArray here otherwise. # The reason this is not just mapslices is because this drops the sliced dimensions automatically, # which is what we want. -function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) - iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) - return f.(iterator) +function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()), missingval = missingval(x)) + dimswewant = DD.otherdims(x, spatialdims) + slicedims = rebuild.(dims(x, dimswewant), axes.((x,), dimswewant)) + if any(isempty, DD.dims(x, spatialdims)) + # If any of the spatial dims are empty, we can just return a constant missing array + # this way we don't construct the dimslices at all... + missing_array = FillArray{Union{typeof(missingval), eltype(x)}, length(dimswewant)}(missingval, length.(dimswewant)) + return rebuild(x; data = missing_array, dims = dimswewant, refdims = ()) + end + iterator = (rebuild(x; data = d, dims = dims(d)) for d in DD.DimSlices(x; dims = slicedims, drop = true)) + return rebuild(x; data = f.(iterator), dims = dimswewant, refdims = ()) end # SkipMissingVal and SkipMissing both store the initial value in the `x` property, # so we can use the same thing to extract it. function _mapspatialslices(f, s::Union{SkipMissingVal, Base.SkipMissing}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) - x = s.x # get the raster out of the SkipMissingVal - iterator = DD.DimSlices(x; dims = DD.otherdims(x, spatialdims), drop = true) - return @. f(skipmissing(iterator)) + return _mapspatialslices(f ∘ skipmissing, s.x; spatialdims) end + _maybe_spatialsliceify(f, spatialslices) = istrue(spatialslices) ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}())) : f _maybe_spatialsliceify(f, spatialslices::DD.AllDims) = _SpatialSliceify(f, spatialslices) @@ -201,3 +242,23 @@ struct _SpatialSliceify{F, D} end (r::_SpatialSliceify{F, D})(x) where {F, D} = _mapspatialslices(r.f, x; spatialdims = r.dims) + +# This is a helper function that concatenates an array of arrays along their last dimension. +# and returns a ConcatDiskArray so that it doesn't allocate at all.\ +# Users can always rechunk later. But this saves us a lot of time when doing datacube ops. +# And the chunk pattern is available in the concat diskarray. +function __do_cat_with_last_dim(input_arrays) + As = Missings.disallowmissing(input_arrays) + dims = ndims(first(As)) + 1 + sz = map(ntuple(identity, dims)) do i + i == dims ? length(As) : 1 + end + cdas = reshape(As, sz) + backing_array = DiskArrays.ConcatDiskArray(cdas) + return backing_array +end + +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 1}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 2}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 3}},)) +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 4}},)) \ No newline at end of file From af2e447b1db41d474135da9c75af2c5b595f2a22 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:16:09 -0400 Subject: [PATCH 15/57] missingval support --- src/methods/zonal.jl | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 4fb42161a..4a5b16e99 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -144,7 +144,9 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; zs[i] = _zonal(f, x, geoms[i]; missingval, kw...) end if zs isa Vector{<: Union{<: AbstractDimArray, <: AbstractDimStack}} - return + backing_array = __do_cat_with_last_dim(zs) + return_dimension = Dim{:Geometry}(axes(zs, 1)) + return rebuild(x; data = backing_array, dims = DD.format((dims(first(zs))..., return_dimension), backing_array), missingval = missingval) end return zs end @@ -206,7 +208,7 @@ function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}() if any(isempty, DD.dims(x, spatialdims)) # If any of the spatial dims are empty, we can just return a constant missing array # this way we don't construct the dimslices at all... - missing_array = FillArray{Union{typeof(missingval), eltype(x)}, length(dimswewant)}(missingval, length.(dimswewant)) + missing_array = FillArrays.Fill{Union{typeof(missingval), eltype(x)}, length(dimswewant)}(missingval, length.(dimswewant)) return rebuild(x; data = missing_array, dims = dimswewant, refdims = ()) end iterator = (rebuild(x; data = d, dims = dims(d)) for d in DD.DimSlices(x; dims = slicedims, drop = true)) @@ -214,13 +216,13 @@ function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}() end # SkipMissingVal and SkipMissing both store the initial value in the `x` property, # so we can use the same thing to extract it. -function _mapspatialslices(f, s::Union{SkipMissingVal, Base.SkipMissing}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}())) - return _mapspatialslices(f ∘ skipmissing, s.x; spatialdims) +function _mapspatialslices(f, s::Union{SkipMissingVal, Base.SkipMissing}; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()), missingval = missingval(s.x)) + return _mapspatialslices(f ∘ skipmissing, s.x; spatialdims, missingval) end -_maybe_spatialsliceify(f, spatialslices) = istrue(spatialslices) ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}())) : f -_maybe_spatialsliceify(f, spatialslices::DD.AllDims) = _SpatialSliceify(f, spatialslices) +_maybe_spatialsliceify(f, spatialslices, missingval = missing) = istrue(spatialslices) ? _SpatialSliceify(f, (Val{DD.XDim}(), Val{DD.YDim}()), missingval) : f +_maybe_spatialsliceify(f, spatialslices::DD.AllDims, missingval = missing) = _SpatialSliceify(f, spatialslices, missingval) """ _SpatialSliceify(f, dims) @@ -236,19 +238,20 @@ size(f(data)) (10, 10) ``` """ -struct _SpatialSliceify{F, D} +struct _SpatialSliceify{F, D, M} f::F dims::D + missingval::M end -(r::_SpatialSliceify{F, D})(x) where {F, D} = _mapspatialslices(r.f, x; spatialdims = r.dims) +(r::_SpatialSliceify{F, D, M})(x) where {F, D, M} = _mapspatialslices(r.f, x; spatialdims = r.dims, missingval = r.missingval) # This is a helper function that concatenates an array of arrays along their last dimension. # and returns a ConcatDiskArray so that it doesn't allocate at all.\ # Users can always rechunk later. But this saves us a lot of time when doing datacube ops. # And the chunk pattern is available in the concat diskarray. function __do_cat_with_last_dim(input_arrays) - As = Missings.disallowmissing(input_arrays) + As = Missings.disallowmissing(collect(input_arrays)) dims = ndims(first(As)) + 1 sz = map(ntuple(identity, dims)) do i i == dims ? length(As) : 1 From 233018f95711fbfa72fe07e05e732a3cc9add3c7 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:16:46 -0400 Subject: [PATCH 16/57] fix run: remove static, let threaded be anything for extensibility --- src/utils.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 81b9fae3c..ef6c4e081 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -459,15 +459,15 @@ _progress(args...; kw...) = ProgressMeter.Progress(args...; dt=0.1, color=:blue, @noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) # Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) +function _run(f, range::OrdinalRange, threaded, progress::Bool, desc::String) p = progress ? _progress(length(range); desc) : nothing - if threaded - Threads.@threads :static for i in range + if isfalse(threaded) + for i in range f(i) isnothing(p) || ProgressMeter.next!(p) end else - for i in range + Threads.@threads for i in range f(i) isnothing(p) || ProgressMeter.next!(p) end From 41ef26e404f8e0c6ae5d74534cd88f7f5fe47f49 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:17:10 -0400 Subject: [PATCH 17/57] pass spatial slices through the geometrylookup zonal --- src/geometry_lookup/methods.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index a094bd16b..369209482 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -20,7 +20,7 @@ end zs, start_index = _alloc_zonal(f, x, geoms, n; spatialslices, kw...) start_index == n + 1 && return zs _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i - zs[i] = _zonal(f, x, geoms[i]; kw...) + zs[i] = _zonal(f, x, geoms[i]; spatialslices, kw...) end return_lookup_dims = if istrue(spatialslices) @@ -31,14 +31,14 @@ end (X(), Y()) end - return_lookup = rebuild(data.val; dims = return_lookup_dims) + return_lookup = rebuild(data.val; dims = rebuild.(return_lookup_dims, (:,))) return_dimension = rebuild(data, return_lookup) - if zs isa AbstractVector{<: Union{<: AbstractDimArray, <: AbstractDimStack}} + if zs isa AbstractVector{<: Union{<: AbstractDimArray, <: AbstractDimStack, Missing}} backing_array = __do_cat_with_last_dim(zs) z_dims = dims(first(zs)) - new_dims = (z_dims..., return_dimension) + new_dims = DD.format((z_dims..., return_dimension), backing_array) return rebuild(x; data = backing_array, dims = new_dims) else # TODO: how should we reconstruct a rasterstack from a vector of named tuples? From 1b7d26c62fa06de87aff28bfb9b848e949bb5186 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:17:24 -0400 Subject: [PATCH 18/57] disallow missings in geom lookup --- src/geometry_lookup/geometry_lookup.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index fc810a4aa..55950d0bd 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -42,6 +42,7 @@ end function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nothing) # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) + geometries = Missings.disallowmissing(geometries) # Check that the geometries are actually geometries if any(!GI.isgeometry, geometries) throw(ArgumentError(""" @@ -114,7 +115,7 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim manifold end - GeometryLookup(new_manifold, data, new_tree, dims, new_crs) + GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) end #= From 19775dad7be8cfe696ed15d69354452dc9eb8e44 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:36:55 -0400 Subject: [PATCH 19/57] allow metadata kwarg to rebuild we need an interface / spec for what rebuild must accept in DD!! --- src/geometry_lookup/geometry_lookup.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 55950d0bd..94ca66539 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -77,17 +77,15 @@ This is broadly standard except for the `rebuild` method, which is used to updat =# DD.dims(l::GeometryLookup) = l.dims - DD.dims(d::DD.Dimension{<: GeometryLookup}) = val(d).dims - DD.order(::GeometryLookup) = Lookups.Unordered() - DD.parent(lookup::GeometryLookup) = lookup.data - +# TODO: format for geometry lookup DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l # Make sure that the tree is rebuilt if the data changes -function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw, manifold = nokw) +function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw, manifold = nokw, metadata = nokw) + # TODO: metadata support for geometry lookup new_tree = if isnokw(tree) if data == lookup.data lookup.tree From 97474fefdf4e23e6af72cdc0f6bd05726fedfbaf Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:37:16 -0400 Subject: [PATCH 20/57] plotting support for dim 1 vectordatacubes --- ext/RastersMakieExt/plotrecipes.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ext/RastersMakieExt/plotrecipes.jl b/ext/RastersMakieExt/plotrecipes.jl index fac962fb9..fd4fbf456 100644 --- a/ext/RastersMakieExt/plotrecipes.jl +++ b/ext/RastersMakieExt/plotrecipes.jl @@ -332,3 +332,10 @@ _prepare_dimarray(A) = DimArray(map(x -> _convert_with_missing(x, missingval(A)) _convert_with_missing(x::Real, missingval) = isequal(x, missingval) || ismissing(x) ? NaN32 : Float32(x) _convert_with_missing(x, missingval) = isequal(x, missingval) ? missing : x + +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} = nothing +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} + geometries = val(only(dims(A))).data + return Makie.SpecApi.Poly(geometries; color = replace_missing(A, NaN).data) +end \ No newline at end of file From e1ab26d41e75a37bd8d88aa4fd1c36d9ac797875 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 17:37:28 -0400 Subject: [PATCH 21/57] default missingval is missing not nothing --- src/methods/zonal.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 4a5b16e99..cd29995d6 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -65,13 +65,13 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), missingval = missingval(x), kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), missingval = isnothing(missingval(x)) ? missing : missingval(x), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, missingval, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), missingval = missingval(x), kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), missingval = isnothing(missingval(x)) ? missing : missingval(x), kw...) open(x) do xo _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, missingval, kw...) end From b0293caefbdac950f8731121ca9fb8d899f50d64 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:01:50 -0400 Subject: [PATCH 22/57] Geometry named dim --- src/dimensions.jl | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/dimensions.jl b/src/dimensions.jl index 56f2cc7fb..4fec2e82c 100644 --- a/src/dimensions.jl +++ b/src/dimensions.jl @@ -8,7 +8,7 @@ const SpatialDim = Union{XDim,YDim,ZDim} Band [`Dimension`]($DDdimdocs) for multi-band rasters. -## Example: +## Example ```julia banddim = Band(10:10:100) # Or @@ -18,3 +18,23 @@ mean(A; dims=Band) ``` """ @dim Band + +""" + Geometry <: Dimension + + Geometry(geoms) + +Geometry [`Dimension`]($DDdimdocs) for vector data cubes. + +## Example +```julia +geomdim = Geometry(GeometryLookup(polygons)) +# Or +val = A[Geometry(1)] +# Or +val = A[Geometry(Touches(other_geom))] # this is automatically accelerated by spatial trees! +# Or +mean(A; dims=Geometry) +``` +""" +@dim Geometry \ No newline at end of file From dba05f35c692406fd141c051bf4049c10b8bc822 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:02:10 -0400 Subject: [PATCH 23/57] harvest crs from collections --- src/geometry_lookup/geometry_lookup.jl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 94ca66539..d866e7183 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -39,10 +39,19 @@ struct GeometryLookup{A <: AbstractVector, D, M <: GO.Manifold, Tree, CRS} <: Lo crs::CRS end -function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nothing) +function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nokw) + # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) geometries = Missings.disallowmissing(geometries) + + if isnokw(crs) + crs = GI.crs(data) + if isnothing(crs) + crs = GI.crs(first(geometries)) + end + end + # Check that the geometries are actually geometries if any(!GI.isgeometry, geometries) throw(ArgumentError(""" From 1ba940bd01e8037f8a5596554bd2567e0e4e8b4e Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:02:29 -0400 Subject: [PATCH 24/57] move io to new file --- src/Rasters.jl | 3 +- src/geometry_lookup/geometry_lookup.jl | 84 ------------------------- src/geometry_lookup/io.jl | 86 ++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 85 deletions(-) create mode 100644 src/geometry_lookup/io.jl diff --git a/src/Rasters.jl b/src/Rasters.jl index 835d867de..1adf11dac 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -65,7 +65,7 @@ export AbstractRaster, Raster export AbstractRasterStack, RasterStack export AbstractRasterSeries, RasterSeries export Projected, Mapped, GeometryLookup -export Band +export Band, Geometry export missingval, boolmask, missingmask, replace_missing, replace_missing!, aggregate, aggregate!, disaggregate, disaggregate!, mask, mask!, resample, warp, zonal, crop, extend, trim, slice, combine, points, @@ -124,6 +124,7 @@ include("skipmissing.jl") include("geometry_lookup/geometry_lookup.jl") include("geometry_lookup/methods.jl") +include("geometry_lookup/io.jl") include("table_ops.jl") include("create.jl") diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index d866e7183..e8b8c65b8 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -68,8 +68,6 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = end # Build the lookup accelerator tree tree = SortTileRecursiveTree.STRtree(geometries) - - true_crs = isnothing(crs) ? GI.crs(first(geometries)) : crs # TODO: auto manifold detection and best tree type for that manifold GeometryLookup(GO.Planar(), geometries, tree, dims, crs) @@ -264,88 +262,6 @@ function reproject(target::GeoFormat, l::GeometryLookup) source = crs(l) return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true)) end - -# I/O utils -function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) - n_points_per_geom_vec = GI.npoint.(geoms) - total_n_points = sum(n_points_per_geom_vec) - - # Create a vector of the total number of points - xs = fill(0.0, total_n_points) - ys = fill(0.0, total_n_points) - - ngeoms = length(geoms) - nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0) - - node_count_vec = fill(0, ngeoms) - part_node_count_vec = fill(0, nrings) - interior_ring_vec = fill(0, nrings) - - current_xy_index = 1 - current_ring_index = 1 - - for (i, geom) in enumerate(geoms) - - this_geom_npoints = GI.npoint(geom) - node_count_vec[i] = this_geom_npoints - - # push individual components of the ring - for poly in GO.flatten(GI.PolygonTrait, geom) - exterior_ring = GI.getexterior(poly) - for point in GI.getpoint(exterior_ring) - xs[current_xy_index] = GI.x(point) - ys[current_xy_index] = GI.y(point) - current_xy_index += 1 - end - part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring) - interior_ring_vec[current_ring_index] = 0 - current_ring_index += 1 - - if GI.nring(poly) == 1 - continue - else - for hole in GI.gethole(poly) - for point in GI.getpoint(hole) - xs[current_xy_index] = GI.x(point) - ys[current_xy_index] = GI.y(point) - current_xy_index += 1 - end - part_node_count_vec[current_ring_index] = GI.npoint(hole) - interior_ring_vec[current_ring_index] = 1 - current_ring_index += 1 - end - end - end - end - - return xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec - -end - -function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) - error("Not implemented yet") -end - -function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) - error("Not implemented yet") -end - - -function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec) - current_xy_index = 1 - current_ring_index = 1 - current_geom_index = 1 - - geoms = Vector{GO.GeometryBasics.MultiPolygon{2, Float64}}(undef, length(node_count_vec)) - - for (i, npoints) in enumerate(node_count_vec) - - this_geom_npoints = npoints - # this_geom_nholes = - end -end - - # total_area_of_intersection = 0.0 # current_area_of_intersection = 0.0 # last_point = nothing diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl new file mode 100644 index 000000000..a29815727 --- /dev/null +++ b/src/geometry_lookup/io.jl @@ -0,0 +1,86 @@ + + + +function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) + error("Not implemented yet") +end + +function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) + error("Not implemented yet") +end + + + +function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) + n_points_per_geom_vec = GI.npoint.(geoms) + total_n_points = sum(n_points_per_geom_vec) + + # Create a vector of the total number of points + xs = fill(0.0, total_n_points) + ys = fill(0.0, total_n_points) + + ngeoms = length(geoms) + nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0) + + node_count_vec = fill(0, ngeoms) + part_node_count_vec = fill(0, nrings) + interior_ring_vec = fill(0, nrings) + + current_xy_index = 1 + current_ring_index = 1 + + for (i, geom) in enumerate(geoms) + + this_geom_npoints = GI.npoint(geom) + node_count_vec[i] = this_geom_npoints + + # push individual components of the ring + for poly in GO.flatten(GI.PolygonTrait, geom) + exterior_ring = GI.getexterior(poly) + for point in GI.getpoint(exterior_ring) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring) + interior_ring_vec[current_ring_index] = 0 + current_ring_index += 1 + + if GI.nring(poly) == 1 + continue + else + for hole in GI.gethole(poly) + for point in GI.getpoint(hole) + xs[current_xy_index] = GI.x(point) + ys[current_xy_index] = GI.y(point) + current_xy_index += 1 + end + part_node_count_vec[current_ring_index] = GI.npoint(hole) + interior_ring_vec[current_ring_index] = 1 + current_ring_index += 1 + end + end + end + end + + return xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec + +end + + + + +function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec) + current_xy_index = 1 + current_ring_index = 1 + current_geom_index = 1 + + geoms = Vector{GO.GeometryBasics.MultiPolygon{2, Float64}}(undef, length(node_count_vec)) + + for (i, npoints) in enumerate(node_count_vec) + + this_geom_npoints = npoints + # this_geom_nholes = + end +end + From 02372288ee928e861fff0cd659ba9408dca65020 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:02:45 -0400 Subject: [PATCH 25/57] dispatch on Dimension not Dim --- ext/RastersMakieExt/plotrecipes.jl | 10 +++++----- src/geometry_lookup/methods.jl | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ext/RastersMakieExt/plotrecipes.jl b/ext/RastersMakieExt/plotrecipes.jl index fd4fbf456..e9624653d 100644 --- a/ext/RastersMakieExt/plotrecipes.jl +++ b/ext/RastersMakieExt/plotrecipes.jl @@ -333,9 +333,9 @@ _prepare_dimarray(A) = DimArray(map(x -> _convert_with_missing(x, missingval(A)) _convert_with_missing(x::Real, missingval) = isequal(x, missingval) || ismissing(x) ? NaN32 : Float32(x) _convert_with_missing(x, missingval) = isequal(x, missingval) ? missing : x -Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} = nothing -Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} = nothing -function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dim{Name, <: Rasters.GeometryLookup}}}) where {T, Name} - geometries = val(only(dims(A))).data - return Makie.SpecApi.Poly(geometries; color = replace_missing(A, NaN).data) +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} + geometries = lookup(only(dims(A))).data + return Makie.SpecApi.Poly(geometries; color = replace_missing(A, NaN).data, label = string(Rasters.DD.name(only(dims(A))))) end \ No newline at end of file diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index 369209482..1bfcd2664 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -1,10 +1,10 @@ -@noinline function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) - return _zonal(f, x, nothing, Dim{:Geometry}(data); kw...) +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) + return _zonal(f, x, nothing, Geometry(data); kw...) end -@noinline function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dim{Name, <: GeometryLookup}; +function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: GeometryLookup}; progress=true, threaded=true, geometrycolumn=nothing, spatialslices, kw... -) where Name +) geoms = data.val.data # TODO: deliberately filter geoms based on extent and tree # so that we don't waste time calling `mask` on lots of geometries From b69192e594386f6d8759cae490d5b7e735f6faee Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:34:43 -0400 Subject: [PATCH 26/57] plot for skip missing --- ext/RastersMakieExt/plotrecipes.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ext/RastersMakieExt/plotrecipes.jl b/ext/RastersMakieExt/plotrecipes.jl index e9624653d..a3bb97053 100644 --- a/ext/RastersMakieExt/plotrecipes.jl +++ b/ext/RastersMakieExt/plotrecipes.jl @@ -338,4 +338,18 @@ Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dimens function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} geometries = lookup(only(dims(A))).data return Makie.SpecApi.Poly(geometries; color = replace_missing(A, NaN).data, label = string(Rasters.DD.name(only(dims(A))))) +end + + +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing +Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing +Makie.expand_dimensions(::Makie.NoConversion, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing + +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} + return Makie.convert_arguments(Makie.Poly, A.x[ismissing.(A.x)]) +end + +function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} + return Makie.convert_arguments(Makie.Poly, A.x[ismissing.(A.x)]) end \ No newline at end of file From 0d31b48fddfc75ca2c8dd1648017472c5f94478a Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:34:59 -0400 Subject: [PATCH 27/57] fix crs --- src/geometry_lookup/geometry_lookup.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index e8b8c65b8..6bea2eea1 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -109,7 +109,12 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim SortTileRecursiveTree.STRtree(data) end new_crs = if isnokw(crs) - GI.crs(first(data)) + data_crs = GI.crs(data) + if isnothing(data_crs) + lookup.crs + else + data_crs + end else crs end @@ -259,7 +264,7 @@ Reproject just forwards to `GO.reproject`. =# function reproject(target::GeoFormat, l::GeometryLookup) - source = crs(l) + source = l.crs return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true)) end # total_area_of_intersection = 0.0 From 9db69a16e6dac865aa53f708af5f721cf806eba4 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 22 Apr 2025 18:35:07 -0400 Subject: [PATCH 28/57] use traittarget in io --- src/geometry_lookup/io.jl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index a29815727..db3800d59 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -1,17 +1,17 @@ -function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) +function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.LineStringTrait, <: GI.MultiLineStringTrait}}, geoms) error("Not implemented yet") end -function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) +function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.PointTrait, <: GI.MultiPointTrait}}, geoms) error("Not implemented yet") end -function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) +function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.PolygonTrait, <: GI.MultiPolygonTrait}}, geoms) n_points_per_geom_vec = GI.npoint.(geoms) total_n_points = sum(n_points_per_geom_vec) @@ -67,8 +67,19 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo end +#= +function _def_dim_var!(ds::AbstractDataset, dim::Dimension{<: GeometryLookup}) + dimname = lowercase(string(DD.name(dim))) + haskey(ds.dim, dimname) && return nothing + CDM.defDim(ds, dimname, length(dim)) + lookup(dim) isa NoLookup && return nothing + attribdict = _attribdict(NoMetadata()) + CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) + +end +=# function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec) current_xy_index = 1 From f123e704fe5e2d78fdb046b1047cafa52e4dd15d Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:19:31 -0400 Subject: [PATCH 29/57] move lookup methods to lookups.jl --- src/Rasters.jl | 1 + src/geometry_lookup/geometry_lookup.jl | 138 ----------------------- src/geometry_lookup/lookups.jl | 150 +++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 138 deletions(-) create mode 100644 src/geometry_lookup/lookups.jl diff --git a/src/Rasters.jl b/src/Rasters.jl index 1adf11dac..afd5db110 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -123,6 +123,7 @@ include("utils.jl") include("skipmissing.jl") include("geometry_lookup/geometry_lookup.jl") +include("geometry_lookup/lookups.jl") include("geometry_lookup/methods.jl") include("geometry_lookup/io.jl") diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 6bea2eea1..951e90365 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -128,145 +128,7 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) end -#= -## Lookups methods - -Here we define the methods for the Lookups API. -The main entry point is `selectindices`, which is used to select the indices of the geometries that match the selector. - -We need to define methods that take selectors and convert them to extents, then GeometryOps needs -=# - -# Return an `Int` or Vector{Bool} -Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) = - selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) -function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K - dimsel = map(rebuild, map(name2dim, K), values(sel)) - selectindices(lookup, dimsel) -end -Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel -function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) - if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) - i = findfirst(x -> all(map(_matches, sel, x)), lookup) - isnothing(i) && _coord_not_found_error(sel) - return i - else - return [_matches(sel, x) for x in lookup] - end -end - -""" - _maybe_get_candidates(tree, selector_extent) - -Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. -""" -function _maybe_get_candidates(tree, selector_extent) - if Extents.disjoint(tree.rootnode.extent, selector_extent) - return error(""" - The geometry with extent $(GI.extent(val(sel))) is outside of the extent of the geometry lookup. - - The geometry lookup has extent $(GI.extent(tree.rootnode.extent)) - """) - end - potential_candidates = GO.SpatialTreeInterface.query(tree, Base.Fix1(Extents.intersects, selector_extent)) - - isempty(potential_candidates) && return error(""" - The geometry with extent $(GI.extent(val(sel))) does not interact with any of the geometries in the lookup. - """) - - return potential_candidates -end - -function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) - - potential_candidates = _maybe_get_candidates(lookup.tree, sel_ext) - - for candidate in potential_candidates - if GO.contains(lookup.data[candidate], val(sel)) - return candidate - end - end - return error(""" - The geometry with extent $(GI.extent(val(sel))) is not contained by any of the geometries in the lookup. - """) -end - - -function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) - lookup_ext = lookup.tree.rootnode.extent - sel_ext = GI.extent(val(sel)) - potential_candidates = _maybe_get_candidates(lookup.tree, sel_ext) - for candidate in potential_candidates - if GO.intersects(lookup.data[candidate], val(sel)) - return candidate - end - end - return error(""" - The geometry with extent $(GI.extent(val(sel))) does not touch any of the geometries in the lookup. - """) -end - -function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains}, Union{<: At, <: Contains}}) - xval, yval = val(xs), val(ys) - - lookup_ext = lookup.tree.rootnode.extent - - if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] - potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) - if isempty(potential_candidates) - return error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) # no geometry intersects with it - else - for candidate in potential_candidates - if GO.contains(lookup.data[candidate], (xval, yval)) - return candidate - end - end - return error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) # no geometry intersects with it - end - else - return error(""" - The point ($xval, $yval) is outside of the extent of the geometry lookup. - """) # outside of extent / geometrylookup bbox - end -end - -@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) - -function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) - print(io, " ") - show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) -end - -# Dimension methods - -@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = - rebuild(dim, [map(x -> zero(x), dim.val[1])]) - -function DD._format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) - checkaxis(dim, axis) - return dim -end - -# Local functions -_val_or_nothing(::Nothing) = nothing -_val_or_nothing(d::DD.Dimension) = val(d) - - -#= -## Reproject - -Reproject just forwards to `GO.reproject`. -=# - -function reproject(target::GeoFormat, l::GeometryLookup) - source = l.crs - return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true)) -end # total_area_of_intersection = 0.0 # current_area_of_intersection = 0.0 # last_point = nothing diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl new file mode 100644 index 000000000..6692c3fc2 --- /dev/null +++ b/src/geometry_lookup/lookups.jl @@ -0,0 +1,150 @@ +#= +# Lookups methods + +Here we define the methods for the Lookups API. +The main entry point is `selectindices`, which is used to select the indices of the geometries that match the selector. + +We need to define methods that take selectors and convert them to extents, then GeometryOps needs +=# + + +# Utility functions first! + +# Bounds - get the bounds of the lookup +Lookups.bounds(lookup::GeometryLookup) = if isempty(lookup.data) + Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) +else + if isnothing(lookup.tree) + mapreduce(GI.extent, Extents.union, lookup.data) + else + GI.extent(lookup.tree) + end +end + +# Return an `Int` or Vector{Bool} +Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) = + selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K + dimsel = map(rebuild, map(name2dim, K), values(sel)) + selectindices(lookup, dimsel) +end +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) + if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) + i = findfirst(x -> all(map(_matches, sel, x)), lookup) + isnothing(i) && _coord_not_found_error(sel) + return i + else + return [_matches(sel, x) for x in lookup] + end +end + +# Selector implementations that use geometry (like Contains, Touches, etc.) +""" + _maybe_get_candidates(lookup, selector_extent) + +Get the candidates for the selector extent. If the selector extent is disjoint from the tree rootnode extent, return an error. +""" +function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) + tree = lookup.tree + if isnothing(tree) + return 1:length(lookup) + end + + if Extents.disjoint(GI.extent(tree), selector_extent) + return Int[] #=error(""" + The geometry with extent $(GI.extent(val(sel))) is outside of the extent of the geometry lookup. + + The geometry lookup has extent $(GI.extent(tree)) + """)=# + end + + potential_candidates = GO.SpatialTreeInterface.query(tree, Base.Fix1(Extents.intersects, selector_extent)) + + isempty(potential_candidates) && return Int[] #=error(""" + The geometry with extent $(GI.extent(val(sel))) does not interact with any of the geometries in the lookup. + """)=# + + return potential_candidates +end + +function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) + + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + + for candidate in potential_candidates + if GO.contains(lookup.data[candidate], val(sel)) + return candidate + end + end + return error(""" + The geometry with extent $(GI.extent(val(sel))) is not contained by any of the geometries in the lookup. + """) +end + + +function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) + lookup_ext = lookup.tree.rootnode.extent + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + + for candidate in potential_candidates + if GO.intersects(lookup.data[candidate], val(sel)) + return candidate + end + end + return Int[] #=error(""" + The geometry with extent $(GI.extent(val(sel))) does not touch any of the geometries in the lookup. + """) =# +end + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains}, Union{<: At, <: Contains}}) + xval, yval = val(xs), val(ys) + + lookup_ext = lookup.tree.rootnode.extent + + if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] + potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) + if isempty(potential_candidates) + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + else + for candidate in potential_candidates + if GO.contains(lookup.data[candidate], (xval, yval)) + return candidate + end + end + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + end + else + return Int[] #=error(""" + The point ($xval, $yval) is outside of the extent of the geometry lookup. + """) =# # outside of extent / geometrylookup bbox + end +end + +@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) + +function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) + print(io, " ") + show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) +end + +# Dimension methods + +@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = + rebuild(dim, [map(x -> zero(x), dim.val[1])]) + +function DD._format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) + checkaxis(dim, axis) + return dim +end + +# Local functions +_val_or_nothing(::Nothing) = nothing +_val_or_nothing(d::DD.Dimension) = val(d) + + From 645e9dcf95a3e16d41f1015f57041671908e5845 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:20:23 -0400 Subject: [PATCH 30/57] don't slice if the dims are already only x and y --- src/methods/zonal.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index cd29995d6..d6dbf4cac 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -204,6 +204,9 @@ _maybe_skipmissing_call(f, A, sm) = istrue(sm) ? f(skipmissing(A)) : f(A) # which is what we want. function _mapspatialslices(f, x::AbstractDimArray; spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()), missingval = missingval(x)) dimswewant = DD.otherdims(x, spatialdims) + if isempty(dimswewant) + return f(x) + end slicedims = rebuild.(dims(x, dimswewant), axes.((x,), dimswewant)) if any(isempty, DD.dims(x, spatialdims)) # If any of the spatial dims are empty, we can just return a constant missing array @@ -261,6 +264,15 @@ function __do_cat_with_last_dim(input_arrays) return backing_array end +# This is a wrapper around the helper function that performs the final cat and rebuild, but on +# a dimarray. +function _cat_and_rebuild_parent(parent, children, newdim) + backing_array = __do_cat_with_last_dim(children) # see zonal.jl for implementation + children_dims = dims(first(children)) + final_dims = DD.format((children_dims..., newdim), backing_array) + return rebuild(parent; data = backing_array, dims = final_dims) +end + precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 1}},)) precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 2}},)) precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 3}},)) From cee0dae08f5d76f6363c0d8336a4afaee6f2875b Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:36:20 -0400 Subject: [PATCH 31/57] add tests but GeometryOps is broken for this one --- test/geometry_lookup.jl | 57 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/test/geometry_lookup.jl b/test/geometry_lookup.jl index 0107e54b0..9b4f33314 100644 --- a/test/geometry_lookup.jl +++ b/test/geometry_lookup.jl @@ -2,26 +2,29 @@ using NaturalEarth using Test using Rasters, DimensionalData +using Rasters.Lookups import Proj +import GeometryOps as GO, GeoInterface as GI +using Extents @testset "construction" begin # fetch land polygons from Natural Earth - land_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry # create a DimVector from the land polygons - gl = GeometryLookup(land_polygons) + gl = GeometryLookup(country_polygons) @test crs(gl) == EPSG(4326) - @test all(GO.equals, zip(val(gl), land_polygons)) + @test all(GO.equals, zip(val(gl), country_polygons)) end @testset "reprojecting a GeometryLookup" begin # fetch land polygons from Natural Earth - land_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110) # create a DimVector from the land polygons - dv = Raster(rand(Dim{:Geometry}(GeometryLookup(land_polygons)))) + dv = Raster(rand(Dim{:Geometry}(GeometryLookup(country_polygons)))) # reproject the full vector data cube (vector data vector, in this case :D) target_crs = ProjString("+proj=wintri +type=crs") reprojected_via_rasters = val(dims(reproject(target_crs, dv), Dim{:Geometry})) - reprojected_via_geometryops = GO.reproject(land_polygons; source_crs = EPSG(4326), target_crs = target_crs) + reprojected_via_geometryops = GO.reproject(country_polygons; source_crs = EPSG(4326), target_crs = target_crs) # test that the reprojected geometries are the same @test all(splat(GO.equals), zip( reprojected_via_rasters, # reproject the vdc, get the geometries from it @@ -29,3 +32,45 @@ end ) ) end + +# @testset "Tree types" begin +ras2d = Raster( + rand( + X(Sampled(-180:179, sampling = Intervals(Start()))), + Y(Sampled(-90:89, sampling = Intervals(Start()))), + ); + crs = EPSG(4326) +) + +@testset "Different tree types" begin + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110) + country_lookup = GeometryLookup(country_polygons) + country_lookup_notree = GeometryLookup(country_polygons; tree = nothing) + country_lookup_flat_notree = GeometryLookup(country_polygons; tree = GO.SpatialTreeInterface.FlatNoTree) + + results = zonal(sum, ras2d; of = country_polygons, threaded = false) + + results_regular = zonal(sum, ras2d; of = country_lookup) + results_notree = zonal(sum, ras2d; of = country_lookup_notree) + results_flat_notree = zonal(sum, ras2d; of = country_lookup_flat_notree) + + @testset "Zonal" begin + @test results_regular isa Raster + @test hasdim(results_regular, Geometry) + @test lookup(results_regular, Geometry) isa GeometryLookup + @test val(lookup(results_regular, Geometry)) == country_lookup + @test val(lookup(results_regular, Geometry)) == country_polygons.geometry + + @test results == results_regular + @test results_regular == results_notree + @test results_regular == results_flat_notree + end + + @testset "Selectors" begin + @testset "Touches that is contained in a geometry" begin + usa_extent = Extent(X = (103, 104), Y = (44, 45)) # squarely within the USA + # broken because of GeometryOps????? + @test_broken only(DimensionalData.dims2indices(results_regular, Geometry(Touches(usa_extent)))) == findfirst(==("USA"), country_polygons.ADM0_A3) + end + end +end \ No newline at end of file From e600bed05fd2b2677baec583134d6fdfd98f495e Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:36:30 -0400 Subject: [PATCH 32/57] better label in plot --- ext/RastersMakieExt/plotrecipes.jl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ext/RastersMakieExt/plotrecipes.jl b/ext/RastersMakieExt/plotrecipes.jl index a3bb97053..342b1c13b 100644 --- a/ext/RastersMakieExt/plotrecipes.jl +++ b/ext/RastersMakieExt/plotrecipes.jl @@ -335,14 +335,24 @@ _convert_with_missing(x, missingval) = isequal(x, missingval) ? missing : x Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing Makie.expand_dimensions(::Makie.NoConversion, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} = nothing + function Makie.convert_arguments(::Type{<: Makie.Poly}, A::Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}) where {T} geometries = lookup(only(dims(A))).data - return Makie.SpecApi.Poly(geometries; color = replace_missing(A, NaN).data, label = string(Rasters.DD.name(only(dims(A))))) + color = replace_missing(A, NaN).data + label = string(Rasters.DD.name(A)) + isempty(label) && (label = string(Rasters.DD.name(only(dims(A))))) + + return Makie.SpecApi.Poly( + geometries; + color = color, + label = label + ) end Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing Makie.expand_dimensions(::Type{<: Makie.Poly}, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing + Makie.expand_dimensions(::Makie.NoConversion, A::Base.SkipMissing{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing Makie.expand_dimensions(::Makie.NoConversion, A::Rasters.SkipMissingVal{<: Raster{T, 1, <: Tuple{<: Dimension{<:GeometryLookup}}}}) where {T} = nothing From aed90c83e808a70bec8490ba9675a29e75fd77b2 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:36:43 -0400 Subject: [PATCH 33/57] move reproject to methods --- src/geometry_lookup/methods.jl | 54 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index 1bfcd2664..017cb37ed 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -1,4 +1,21 @@ +#= +## Reproject +Reproject just forwards to `GO.reproject`. Since regular reproject here in Rasters and GO reproject both need Proj, +this is not too bad. +=# + +function reproject(target::GeoFormat, l::GeometryLookup) + source = l.crs + # TODO: allow GDAL reproject for its antimeridian cutting + return rebuild(l; data = GO.reproject(l.data; source_crs = source, target_crs = target, always_xy = true), crs = target) +end + +#= +## Zonal + +Zonal with a geometry lookup or a geometry dimension should return a vector data cube. +=# function _zonal(f, x::RasterStackOrArray, ::Nothing, data::GeometryLookup; kw...) return _zonal(f, x, nothing, Geometry(data); kw...) end @@ -7,8 +24,8 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: Geometry ) geoms = data.val.data # TODO: deliberately filter geoms based on extent and tree - # so that we don't waste time calling `mask` on lots of geometries - # that are outside the extent of the raster + # so that we don't waste time descending through the pipeline + # for geometries that are outside the extent of the raster # but that is for later. if istrue(spatialslices) && data.val.dims != (X(), Y()) && dims(x, data.val.dims) != dims(x, (Val{DD.XDim}(), Val{DD.YDim}())) @@ -23,25 +40,36 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: Geometry zs[i] = _zonal(f, x, geoms[i]; spatialslices, kw...) end - return_lookup_dims = if istrue(spatialslices) - dims(data, (Val{DD.XDim}(), Val{DD.YDim}())) - elseif spatialslices isa DD.AllDims + return_lookup_dims = if spatialslices isa DD.AllDims dims(data, spatialslices) + elseif istrue(spatialslices) + dims(data, (Val{DD.XDim}(), Val{DD.YDim}())) else # fallback (X(), Y()) end - - return_lookup = rebuild(data.val; dims = rebuild.(return_lookup_dims, (:,))) + # Note here that e.g. `X()` is actually `X(:)`. That's why we rebuild the dims with colons - + # so that we get a "neutral materialized dimension" out of it. + return_lookup = rebuild(lookup(data); dims = rebuild.(return_lookup_dims, (:,))) return_dimension = rebuild(data, return_lookup) - if zs isa AbstractVector{<: Union{<: AbstractDimArray, <: AbstractDimStack, Missing}} - backing_array = __do_cat_with_last_dim(zs) - z_dims = dims(first(zs)) - new_dims = DD.format((z_dims..., return_dimension), backing_array) - return rebuild(x; data = backing_array, dims = new_dims) + if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} + _cat_and_rebuild_parent(x, zs, return_dimension) + elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} + dimarrays = NamedTuple{names(st)}( + ntuple(length(names(st))) do i + _cat_and_rebuild_parent(layers(st)[i], (layers(z)[i] for z in zs), return_dimension) + end + ) + return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) else # TODO: how should we reconstruct a rasterstack from a vector of named tuples? return Raster(zs, (return_dimension,)) end -end \ No newline at end of file +end + +#= +## Crop + +Cropping a geometry lookup to either a geometry lookup, a geometry, or a bounding box should return a geometry lookup. +=# \ No newline at end of file From 5ecde8fafd39f7176fd6c04c91d9ef9e935baaab Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 10:36:58 -0400 Subject: [PATCH 34/57] handle empty data, do not create trees for it --- src/geometry_lookup/geometry_lookup.jl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index 951e90365..c15acb030 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -39,7 +39,7 @@ struct GeometryLookup{A <: AbstractVector, D, M <: GO.Manifold, Tree, CRS} <: Lo crs::CRS end -function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nokw) +function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nokw, tree = nokw) # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) @@ -67,8 +67,24 @@ function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = """)) end # Build the lookup accelerator tree - tree = SortTileRecursiveTree.STRtree(geometries) - + tree = if isnokw(tree) + SortTileRecursiveTree.STRtree(geometries) + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa DataType + tree(geometries) + else + tree + end + elseif isnothing(tree) + nothing + else + throw(ArgumentError(""" + Got an argument for `tree` which is not a valid spatial tree (according to `GeometryOps.SpatialTreeInterface`) + nor `nokw` or `nothing` + + Type is $(typeof(tree)) + """)) + end # TODO: auto manifold detection and best tree type for that manifold GeometryLookup(GO.Planar(), geometries, tree, dims, crs) end @@ -96,6 +112,8 @@ function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dim new_tree = if isnokw(tree) if data == lookup.data lookup.tree + elseif isempty(data) + nothing else SortTileRecursiveTree.STRtree(data) end From 07ba496d396a14288e055c97aa915dcfa8c8e8ee Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 15:49:05 -0400 Subject: [PATCH 35/57] make geometry lookup an abstract merged --- src/geometry_lookup/geometry_lookup.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index c15acb030..c86be1300 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -31,7 +31,7 @@ dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true ``` """ -struct GeometryLookup{A <: AbstractVector, D, M <: GO.Manifold, Tree, CRS} <: Lookups.Lookup{A, 1} +struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} <: DD.Dimensions.AbstractMergedLookup{T} manifold::M data::A tree::Tree From 4624c5aa70e00843a62800daeb214d35f2b46729 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 15:49:17 -0400 Subject: [PATCH 36/57] enable lookups for per-dim touches and intervals --- src/geometry_lookup/lookups.jl | 54 +++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index 6692c3fc2..60320b039 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -7,9 +7,6 @@ The main entry point is `selectindices`, which is used to select the indices of We need to define methods that take selectors and convert them to extents, then GeometryOps needs =# - -# Utility functions first! - # Bounds - get the bounds of the lookup Lookups.bounds(lookup::GeometryLookup) = if isempty(lookup.data) Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) @@ -22,8 +19,10 @@ else end # Return an `Int` or Vector{Bool} -Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) = +function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) + @show sel selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +end function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K dimsel = map(rebuild, map(name2dim, K), values(sel)) selectindices(lookup, dimsel) @@ -31,11 +30,11 @@ end Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) - i = findfirst(x -> all(map(_matches, sel, x)), lookup) + i = findfirst(x -> all(map(Lookups._matches, sel, x)), lookup) isnothing(i) && _coord_not_found_error(sel) return i else - return [_matches(sel, x) for x in lookup] + return [Lookups._matches(sel, x) for x in lookup] end end @@ -98,10 +97,51 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) """) =# end +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{ <: Touches}, Union{ <: Touches}}) + target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) + + potential_candidates = _maybe_get_candidates(lookup, target_ext) + if isempty(potential_candidates) + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + else + for candidate in potential_candidates + if GO.intersects(lookup.data[candidate], target_ext) + return candidate + end + end + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + end +end + + +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: DD.IntervalSets.ClosedInterval}, Union{<: DD.IntervalSets.ClosedInterval}}) + target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) + + potential_candidates = _maybe_get_candidates(lookup, target_ext) + if isempty(potential_candidates) + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + else + for candidate in potential_candidates + if GO.covers(target_ext, lookup.data[candidate]) + return candidate + end + end + return Int[] #=error(""" + The point ($xval, $yval) is not within any of the geometries in the lookup. + """) =# # no geometry intersects with it + end +end + function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains}, Union{<: At, <: Contains}}) xval, yval = val(xs), val(ys) - lookup_ext = lookup.tree.rootnode.extent + lookup_ext = Lookups.bounds(lookup) if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) From f13d0dbe1cf22e49c7534e2e58779d98d5b06d26 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 23:32:32 -0400 Subject: [PATCH 37/57] add sources --- Project.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Project.toml b/Project.toml index c88dc9f61..d25c8f06c 100644 --- a/Project.toml +++ b/Project.toml @@ -116,3 +116,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeoDataFrames", "GeometryBasics", "GRIBDatasets", "Makie", "NCDatasets", "Plots", "Proj", "RasterDataSources", "SafeTestsets", "Shapefile", "StableRNGs", "StatsBase", "Test", "ZarrDatasets"] + + +[sources] +DimensionalData = {url = "https://github.com/rafaqz/DimensionalData.jl", rev = "as/individualindexing"} +GeometryOps = {url = "https://github.com/JuliaGeo/GeometryOps.jl", rev = "as/extentforwarding_for_predicates"} \ No newline at end of file From d0c68f17159afc9673ec34a6bdc652bc37111aa3 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 23:32:44 -0400 Subject: [PATCH 38/57] rebuild stacks in regular zonal --- src/methods/zonal.jl | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index d6dbf4cac..2b911779d 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -143,11 +143,18 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i zs[i] = _zonal(f, x, geoms[i]; missingval, kw...) end - if zs isa Vector{<: Union{<: AbstractDimArray, <: AbstractDimStack}} - backing_array = __do_cat_with_last_dim(zs) - return_dimension = Dim{:Geometry}(axes(zs, 1)) - return rebuild(x; data = backing_array, dims = DD.format((dims(first(zs))..., return_dimension), backing_array), missingval = missingval) - end + + return_dimension = Dim{:Geometry}(axes(zs, 1)) + + if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} + _cat_and_rebuild_parent(x, zs, return_dimension) + elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} + dimarrays = NamedTuple{names(st)}( + ntuple(length(names(st))) do i + _cat_and_rebuild_parent(layers(st)[i], (layers(z)[i] for z in zs), return_dimension) + end + ) + return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) return zs end From cd711002e6f0520f58b3200cdf460cdaf0eb46f4 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 23 Apr 2025 23:33:00 -0400 Subject: [PATCH 39/57] updates for DD pr --- src/geometry_lookup/geometry_lookup.jl | 11 ++++++++--- src/geometry_lookup/methods.jl | 1 - 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index c86be1300..fc0dce63e 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -31,7 +31,7 @@ dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true ``` """ -struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} <: DD.Dimensions.AbstractMergedLookup{T} +struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} <: DD.Dimensions.MultiDimensionalLookup{T} manifold::M data::A tree::Tree @@ -39,7 +39,7 @@ struct GeometryLookup{T, A <: AbstractVector{T}, D, M <: GO.Manifold, Tree, CRS} crs::CRS end -function GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing, crs = nokw, tree = nokw) +function GeometryLookup(data, dims=(X(), Y()); geometrycolumn=nothing, crs=nokw, tree=nokw) # First, retrieve the geometries - from a table, vector of geometries, etc. geometries = _get_geometries(data, geometrycolumn) @@ -107,7 +107,12 @@ DD.parent(lookup::GeometryLookup) = lookup.data DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l # Make sure that the tree is rebuilt if the data changes -function DD.rebuild(lookup::GeometryLookup; data = lookup.data, tree = nokw, dims = lookup.dims, crs = nokw, manifold = nokw, metadata = nokw) +function DD.rebuild( + lookup::GeometryLookup; + data=lookup.data, tree=nokw, + dims=lookup.dims, crs=nokw, + manifold=nokw, metadata=nokw + ) # TODO: metadata support for geometry lookup new_tree = if isnokw(tree) if data == lookup.data diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index 017cb37ed..4057086de 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -63,7 +63,6 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: Geometry ) return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) else - # TODO: how should we reconstruct a rasterstack from a vector of named tuples? return Raster(zs, (return_dimension,)) end end From 3bd745135930548d607d07a68537d8a92d2233ed Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 24 Apr 2025 06:09:58 -0400 Subject: [PATCH 40/57] fix --- src/methods/zonal.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 2b911779d..8b2ccf5c3 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -155,6 +155,7 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; end ) return rebuild(x; data = dimarrays, dims = (dims(first(zs))..., return_dimension)) + end return zs end From f89e23b6329a257777090464fa489027cec69cbd Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 24 Apr 2025 08:25:17 -0400 Subject: [PATCH 41/57] make crs better on rasters --- src/crs.jl | 24 +++++++++++++++++++----- src/geometry_lookup/geometry_lookup.jl | 3 ++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/crs.jl b/src/crs.jl index 9cd094596..d32d828d4 100644 --- a/src/crs.jl +++ b/src/crs.jl @@ -9,12 +9,26 @@ coordinate reference system at all. See [`setcrs`](@ref) to set it manually. """ function GeoInterface.crs(obj::Union{<:AbstractRaster,<:AbstractRasterStack,<:AbstractRasterSeries, <:DimTuple}) - if hasdim(obj, Y) - crs(dims(obj, Y)) - elseif hasdim(obj, X) - crs(dims(obj, X)) + each_dim_crs = map(crs, dims(obj)) + firstcrs = findfirst(!isnothing, each_dim_crs) + if isnothing(firstcrs) + return nothing else - nothing + for (dim, crs) in zip(dims(obj), each_dim_crs) + if !isnothing(crs) && crs !== each_dim_crs[firstcrs] + throw(ArgumentError(""" + All dimensions must have the same crs, but dims $(name(dim)) and $(name(dims(obj, firstcrs))) + have different CRS: + $(each_dim_crs[firstcrs]) + + and + + $(crs) + """ + )) + end + end + return each_dim_crs[firstcrs] end end GeoInterface.crs(dim::Dimension) = crs(lookup(dim)) diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl index fc0dce63e..762bf451c 100644 --- a/src/geometry_lookup/geometry_lookup.jl +++ b/src/geometry_lookup/geometry_lookup.jl @@ -89,7 +89,8 @@ function GeometryLookup(data, dims=(X(), Y()); geometrycolumn=nothing, crs=nokw, GeometryLookup(GO.Planar(), geometries, tree, dims, crs) end -crs(l::GeometryLookup) = l.crs +GeoInterface.crs(l::GeometryLookup) = l.crs +setcrs(l::GeometryLookup, crs) = rebuild(l; crs) #= From 2e44421b081832f1405ddcdbc916a44547858719 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 24 Apr 2025 10:12:30 -0400 Subject: [PATCH 42/57] allow Near on points --- src/geometry_lookup/lookups.jl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index 60320b039..e039a3b5b 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -81,6 +81,24 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) """) end +function Lookups.selectindices(lookup::GeometryLookup, sel::Near) + geom = val(sel) + @assert GI.isgeometry(geom) + # TODO: temporary + @assert GI.trait(geom) isa GI.PointTrait "Only point geometries are supported for the near lookup at this point! We will add more geometry support in the future." + + # Get the nearest geometry + # TODO: this sucks! Use some branch and bound algorithm + # on the spatial tree instead. + # if pointtrait + return findmin(x -> GO.distance(geom, x), lookup.data)[2] + # else + # findmin(x -> GO.distance(GO.GEOS(), geom, x), lookup.data)[2] + # end + # this depends on LibGEOS being installed. + +end + function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) lookup_ext = lookup.tree.rootnode.extent From 2184b92a6f7b182b207003747c82eaade15f70b5 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Thu, 24 Apr 2025 10:15:42 -0400 Subject: [PATCH 43/57] enable at on points / forward for other things --- src/geometry_lookup/lookups.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index e039a3b5b..6e6b264de 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -81,6 +81,14 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) """) end +function Lookups.selectindices(lookup::GeometryLookup, sel::At) + if GI.trait(val(sel)) isa GI.PointTrait + Lookups.selectindices(lookup, (At(GI.x(val(sel))), At(GI.y(val(sel))))) + else # invoke the default method + Lookups.at(lookup, sel) + end +end + function Lookups.selectindices(lookup::GeometryLookup, sel::Near) geom = val(sel) @assert GI.isgeometry(geom) From 88230bb3f4809e9be924df893c8bb7f20deb0c32 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 10:31:42 -0400 Subject: [PATCH 44/57] fix touches --- src/geometry_lookup/lookups.jl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index 6e6b264de..f14a59e53 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -113,14 +113,9 @@ function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) sel_ext = GI.extent(val(sel)) potential_candidates = _maybe_get_candidates(lookup, sel_ext) - for candidate in potential_candidates - if GO.intersects(lookup.data[candidate], val(sel)) - return candidate - end + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], val(sel)) end - return Int[] #=error(""" - The geometry with extent $(GI.extent(val(sel))) does not touch any of the geometries in the lookup. - """) =# end function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{ <: Touches}, Union{ <: Touches}}) From 626b8ee4e9ec848fdcc9612fff0c100e6b30a715 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 10:32:09 -0400 Subject: [PATCH 45/57] minor fixes --- src/methods/zonal.jl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 8b2ccf5c3..2cd001886 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -166,7 +166,7 @@ function _alloc_zonal(f, x, geoms, n; spatialslices = _True(), missingval, kw... z1 = _zonal(f, x, first(geoms); spatialslices, missingval, kw...) for geom in geoms z1 = _zonal(f, x, geom; spatialslices, missingval, kw...) - if !(ismissing(z1) || z1 == missingval) + if !(ismissing(z1) || z1 === missingval) break end n_missing += 1 @@ -262,9 +262,10 @@ end # Users can always rechunk later. But this saves us a lot of time when doing datacube ops. # And the chunk pattern is available in the concat diskarray. function __do_cat_with_last_dim(input_arrays) + # This assumes that the input array is a vector of arrays. As = Missings.disallowmissing(collect(input_arrays)) dims = ndims(first(As)) + 1 - sz = map(ntuple(identity, dims)) do i + sz = ntuple(dims) do i i == dims ? length(As) : 1 end cdas = reshape(As, sz) @@ -272,6 +273,16 @@ function __do_cat_with_last_dim(input_arrays) return backing_array end +function __do_cat_with_last_dim_multidim_version(As) + # This CANNOT assume that the input array is a vector of arrays. + new_n_dims = ndims(As) + ndims(first(As)) + sz = ntuple(new_n_dims) do i + i <= ndims(first(As)) ? 1 : size(As, i-1) + end + cdas = reshape(As, sz) + backing_array = DiskArrays.ConcatDiskArray(cdas) + return backing_array +end # This is a wrapper around the helper function that performs the final cat and rebuild, but on # a dimarray. function _cat_and_rebuild_parent(parent, children, newdim) From 899b36ab55c44990f9fb013de79c188014056a69 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 10:32:32 -0400 Subject: [PATCH 46/57] make extract multidimensional --- src/methods/extract.jl | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/methods/extract.jl b/src/methods/extract.jl index d54fecbcd..3b120271f 100644 --- a/src/methods/extract.jl +++ b/src/methods/extract.jl @@ -170,10 +170,46 @@ Base.@constprop :aggressive @inline function extract(x::RasterStackOrArray, data gs = _get_geometries(data, geometrycolumn) gs, (isempty(gs) || all(ismissing, gs)) ? missing : first(Base.skipmissing(gs)) end + # Taken from zonal.jl and _mapspatialslices there. + spatialdims = (Val{DD.XDim}(), Val{DD.YDim}()) + dimswewant = DD.otherdims(x, spatialdims) + + if isempty(dimswewant) # the raster is 2 dimensional in spatial dims only + e = Extractor(x, g1; name, skipmissing, flatten, id, geometry, index) + id_init = 1 + return _extract(x, e, id_init, g; kw...) + else # the raster has other dims than spatial dims, so we need to slice through them + # and return...that's right...a VECTOR DATA CUBE! + # Taken from zonal.jl and _mapspatialslices there. + # just get the first index of the "other" dims. + # This assumes they are nonzero in length but that seems reasonable, for now. + slicedims = rebuild.(dims(x, dimswewant), 1) + ras_for_extractor_construction = view(x, slicedims...) + @assert isfalse(skipmissing) "skipmissing must be false for >2d data cubes" + # TODO: this is inefficient, because it's doing the burning once per slice + # instead, it should do it once total and then reuse, even if that allocates. + extractor = Extractor(ras_for_extractor_construction, g1; name, skipmissing = false, flatten, id, geometry, index) + id_init = 1 + result_for_each_spatial_slice = _mapspatialslices(x; spatialdims) do xy_ras + id_init = 1 + res1 = _extract(xy_ras, extractor, id_init, g; kw...) + # Remove the geometry from the result + if isa(res1, AbstractArray) + return Base.structdiff.(res1, ((; geometry=1,),)) + else + return Base.structdiff(res1, (; geometry=1,),) + end + end - e = Extractor(x, g1; name, skipmissing, flatten, id, geometry, index) - id_init = 1 - return _extract(x, e, id_init, g; kw...) + if GI.isgeometry(data) || GI.isfeature(data) + rebuild(result_for_each_spatial_slice; refdims = Geometry(data)) + else # featurecollection, table, vector. + backing_array = __do_cat_with_last_dim_multidim_version(result_for_each_spatial_slice) + backing_dim = Geometry(GeometryLookup(g)) + return rebuild(result_for_each_spatial_slice; data = backing_array, dims = (backing_dim, dims(result_for_each_spatial_slice)...)) + end + end + end # TODO use a GeometryOpsCore method like `applyreduce` here? From 26ce1a044f1efc44a9ce6c201982985aa0da6cc3 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 10:32:47 -0400 Subject: [PATCH 47/57] make the change non breaking --- src/methods/zonal.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 2cd001886..0a9ed4d3f 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -65,13 +65,13 @@ insertcols!(january_stats, 1, :country => first.(split.(countries.ADMIN, r"[^A-Z 3 columns and 243 rows omitted ``` """ -function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_True(), missingval = isnothing(missingval(x)) ? missing : missingval(x), kw...) +function zonal(f, x::RasterStack; of, skipmissing=true, spatialslices=_False(), missingval=isnothing(missingval(x)) ? missing : missingval(x), kw...) # TODO: open currently doesn't work so well for large rasterstacks, # we need to fix that before we can go back to this being a single method # on `RasterStackOrArray`. _zonal(f, _prepare_for_burning(x), of; skipmissing, spatialslices, missingval, kw...) end -function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_True(), missingval = isnothing(missingval(x)) ? missing : missingval(x), kw...) +function zonal(f, x::Raster; of, skipmissing=true, spatialslices=_False(), missingval=isnothing(missingval(x)) ? missing : missingval(x), kw...) open(x) do xo _zonal(f, _prepare_for_burning(xo), of; skipmissing, spatialslices, missingval, kw...) end From a97ae9f794a87f942871092ff26156d82ba97526 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 16:07:19 -0400 Subject: [PATCH 48/57] Clean up lookups --- src/geometry_lookup/lookups.jl | 100 ++++++++------------------------- 1 file changed, 23 insertions(+), 77 deletions(-) diff --git a/src/geometry_lookup/lookups.jl b/src/geometry_lookup/lookups.jl index f14a59e53..3d63ea93e 100644 --- a/src/geometry_lookup/lookups.jl +++ b/src/geometry_lookup/lookups.jl @@ -18,16 +18,18 @@ else end end -# Return an `Int` or Vector{Bool} +# Return an `Int` or Vector{Bool} +# Base case: got a standard index that can go into getindex on a base Array +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +# other cases: +# - decompose selectors function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) - @show sel selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) end function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K dimsel = map(rebuild, map(name2dim, K), values(sel)) selectindices(lookup, dimsel) end -Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) i = findfirst(x -> all(map(Lookups._matches, sel, x)), lookup) @@ -46,39 +48,21 @@ Get the candidates for the selector extent. If the selector extent is disjoint """ function _maybe_get_candidates(lookup::GeometryLookup, selector_extent) tree = lookup.tree - if isnothing(tree) - return 1:length(lookup) - end - - if Extents.disjoint(GI.extent(tree), selector_extent) - return Int[] #=error(""" - The geometry with extent $(GI.extent(val(sel))) is outside of the extent of the geometry lookup. - - The geometry lookup has extent $(GI.extent(tree)) - """)=# - end - - potential_candidates = GO.SpatialTreeInterface.query(tree, Base.Fix1(Extents.intersects, selector_extent)) - - isempty(potential_candidates) && return Int[] #=error(""" - The geometry with extent $(GI.extent(val(sel))) does not interact with any of the geometries in the lookup. - """)=# - + isnothing(tree) && return 1:length(lookup) + Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] + potential_candidates = GO.SpatialTreeInterface.query( + tree, + Base.Fix1(Extents.intersects, selector_extent) + ) + isempty(potential_candidates) && return Int[] return potential_candidates end function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) - potential_candidates = _maybe_get_candidates(lookup, sel_ext) - - for candidate in potential_candidates - if GO.contains(lookup.data[candidate], val(sel)) - return candidate - end + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], val(sel)) end - return error(""" - The geometry with extent $(GI.extent(val(sel))) is not contained by any of the geometries in the lookup. - """) end function Lookups.selectindices(lookup::GeometryLookup, sel::At) @@ -109,10 +93,8 @@ end function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) - lookup_ext = lookup.tree.rootnode.extent sel_ext = GI.extent(val(sel)) potential_candidates = _maybe_get_candidates(lookup, sel_ext) - return filter(potential_candidates) do candidate GO.intersects(lookup.data[candidate], val(sel)) end @@ -120,70 +102,34 @@ end function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{ <: Touches}, Union{ <: Touches}}) target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) - potential_candidates = _maybe_get_candidates(lookup, target_ext) - if isempty(potential_candidates) - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it - else - for candidate in potential_candidates - if GO.intersects(lookup.data[candidate], target_ext) - return candidate - end - end - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], target_ext) end end function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: DD.IntervalSets.ClosedInterval}, Union{<: DD.IntervalSets.ClosedInterval}}) target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) - potential_candidates = _maybe_get_candidates(lookup, target_ext) - if isempty(potential_candidates) - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it - else - for candidate in potential_candidates - if GO.covers(target_ext, lookup.data[candidate]) - return candidate - end - end - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it + filter(potential_candidates) do candidate + GO.covers(target_ext, lookup.data[candidate]) end end -function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains}, Union{<: At, <: Contains}}) +function Lookups.selectindices(lookup::GeometryLookup, (xs, ys)::Tuple{Union{<: At, <: Contains, <: Real}, Union{<: At, <: Contains, <: Real}}) xval, yval = val(xs), val(ys) lookup_ext = Lookups.bounds(lookup) if lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2] potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) - if isempty(potential_candidates) - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it - else - for candidate in potential_candidates - if GO.contains(lookup.data[candidate], (xval, yval)) - return candidate - end - end - return Int[] #=error(""" - The point ($xval, $yval) is not within any of the geometries in the lookup. - """) =# # no geometry intersects with it + isempty(potential_candidates) && return Int[] + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], (xval, yval)) end else - return Int[] #=error(""" - The point ($xval, $yval) is outside of the extent of the geometry lookup. - """) =# # outside of extent / geometrylookup bbox + return Int[] end end From 056cd0dd740f4e422a8093618149a847af580cbf Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 23:19:56 -0400 Subject: [PATCH 49/57] Update methods.jl --- src/geometry_lookup/methods.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index 4057086de..d8e26086b 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -54,7 +54,7 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: Geometry return_dimension = rebuild(data, return_lookup) if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} - _cat_and_rebuild_parent(x, zs, return_dimension) + return _cat_and_rebuild_parent(x, zs, return_dimension) elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} dimarrays = NamedTuple{names(st)}( ntuple(length(names(st))) do i @@ -71,4 +71,4 @@ end ## Crop Cropping a geometry lookup to either a geometry lookup, a geometry, or a bounding box should return a geometry lookup. -=# \ No newline at end of file +=# From 3b052b9efd1be4ba5700d13232f592f6c2d7e00f Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 26 Apr 2025 23:20:17 -0400 Subject: [PATCH 50/57] Update zonal.jl --- src/methods/zonal.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 0a9ed4d3f..1f91c5189 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -147,7 +147,7 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; return_dimension = Dim{:Geometry}(axes(zs, 1)) if zs isa AbstractVector{<: Union{<: AbstractDimArray, Missing}} - _cat_and_rebuild_parent(x, zs, return_dimension) + return _cat_and_rebuild_parent(x, zs, return_dimension) elseif zs isa AbstractVector{<: Union{<: AbstractDimStack, Missing}} dimarrays = NamedTuple{names(st)}( ntuple(length(names(st))) do i @@ -295,4 +295,4 @@ end precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 1}},)) precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 2}},)) precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 3}},)) -precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 4}},)) \ No newline at end of file +precompile(__do_cat_with_last_dim, (Vector{Raster{<: Any, 4}},)) From 629cf6a6cc79cc705e8f7abfe280c1f3f3207042 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Mon, 28 Apr 2025 20:32:35 -0400 Subject: [PATCH 51/57] only run zonal if not on fast, all-is-missing path --- src/geometry_lookup/methods.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/geometry_lookup/methods.jl b/src/geometry_lookup/methods.jl index d8e26086b..d1a6cd79a 100644 --- a/src/geometry_lookup/methods.jl +++ b/src/geometry_lookup/methods.jl @@ -35,9 +35,10 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data::Dimension{<: Geometry n = length(geoms) n == 0 && return [] zs, start_index = _alloc_zonal(f, x, geoms, n; spatialslices, kw...) - start_index == n + 1 && return zs - _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i - zs[i] = _zonal(f, x, geoms[i]; spatialslices, kw...) + if start_index != n + 1 + _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i + zs[i] = _zonal(f, x, geoms[i]; spatialslices, kw...) + end end return_lookup_dims = if spatialslices isa DD.AllDims From 4a56b50f629eca3530c07620f4a99663ad406cba Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Mon, 28 Apr 2025 21:25:35 -0400 Subject: [PATCH 52/57] basic netcdf io, could be more optimized / streamlike --- src/geometry_lookup/nc_io.jl | 107 +++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/geometry_lookup/nc_io.jl diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl new file mode 100644 index 000000000..e9b685bd9 --- /dev/null +++ b/src/geometry_lookup/nc_io.jl @@ -0,0 +1,107 @@ +using NCDatasets +import NCDatasets.CommonDataModel as CDM + +import Rasters + + +ds = NCDataset("/Users/anshul/i.nc") +var = ds["someData"] + +knowndims = Rasters._dims(var) + +unknowndims_idxs = findall(Rasters.isnolookup ∘ Rasters.lookup, knowndims) + +if length(unknowndims_idxs) > 1 + throw(ArgumentError("Only one unknown dimension is supported")) +elseif length(unknowndims_idxs) == 0 + return knowndims +end + +u_idx = only(unknowndims_idxs) + +u_dim_name = CDM.dimnames(var)[u_idx] + +has_geometry = haskey(CDM.attribs(var), "geometry") +if !has_geometry + throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) +end +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +haskey(geometry_container_attribs, "geometry_type") && +geometry_container_attribs["geometry_type"] == "polygon" || +throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) + +@assert haskey(ds, geometry_container_attribs["node_count"]) +node_count_var = ds[geometry_container_attribs["node_count"]] +only(CDM.dimnames(node_count_var)) != u_dim_name && throw(ArgumentError("node_count variable $u_dim_name does not match the unknown dimension $u_dim_name")) + +node_count = collect(node_count_var) +node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) +part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) +interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) + +current_xy_index = 1 +current_ring_index = 1 +current_geom_index = 1 + + +# Initialize variables for ring assembly +start = 1 +stop = part_node_count[1] +rings = [node_coordinates[start:stop]] + +# Assemble all rings +geoms = Vector{GI.MultiPolygon{2, Float64}}(undef, length(node_count)) + +for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(rings, node_coordinates[start:stop]) + # Ensure rings are closed by adding the first point at the end + push!(rings[end], rings[end][1]) +end + +# Assemble multipolygons +current_ring = 1 +for (geom_idx, total_nodes) in enumerate(node_count) + # Find all rings that belong to this polygon + polygon_rings = [] + accumulated_nodes = 0 + + while current_ring <= length(part_node_count) && accumulated_nodes < total_nodes + ring = rings[current_ring] + push!(polygon_rings, (GI.LinearRing(ring), interior_ring[current_ring])) + accumulated_nodes += part_node_count[current_ring] + current_ring += 1 + end + + # Create polygons from rings + polygons = GI.Polygon[] + current_exterior = nothing + current_holes = GI.LinearRing[] + + for (ring, is_interior) in polygon_rings + if is_interior == 0 + # If we have a previous exterior ring, create a polygon with it and its holes + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...])) + current_holes = GI.LinearRing[] + end + current_exterior = ring + else + push!(current_holes, ring) + end + end + + # Add the last polygon if we have an exterior ring + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...])) + end + + # Create multipolygon from all polygons + geoms[geom_idx] = GI.MultiPolygon(polygons) +end + From 3c1cc29bba9e6f230bb6f85b6a2f96ad5cf20223 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Mon, 28 Apr 2025 21:39:48 -0400 Subject: [PATCH 53/57] fix --- src/geometry_lookup/io.jl | 17 ++++++++++------- src/geometry_lookup/nc_io.jl | 6 +++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index db3800d59..c147b2ceb 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -12,16 +12,16 @@ end function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.PolygonTrait, <: GI.MultiPolygonTrait}}, geoms) + + ngeoms = length(geoms) + nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0, threaded = false) n_points_per_geom_vec = GI.npoint.(geoms) - total_n_points = sum(n_points_per_geom_vec) + total_n_points = sum(n_points_per_geom_vec) - nrings # Create a vector of the total number of points xs = fill(0.0, total_n_points) ys = fill(0.0, total_n_points) - ngeoms = length(geoms) - nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0) - node_count_vec = fill(0, ngeoms) part_node_count_vec = fill(0, nrings) interior_ring_vec = fill(0, nrings) @@ -37,12 +37,14 @@ function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.Polygo # push individual components of the ring for poly in GO.flatten(GI.PolygonTrait, geom) exterior_ring = GI.getexterior(poly) - for point in GI.getpoint(exterior_ring) + for point_idx in 1:GI.npoint(exterior_ring)-1 + point = GI.getpoint(exterior_ring, point_idx) xs[current_xy_index] = GI.x(point) ys[current_xy_index] = GI.y(point) current_xy_index += 1 end part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring) + part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring)-1 interior_ring_vec[current_ring_index] = 0 current_ring_index += 1 @@ -50,12 +52,13 @@ function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.Polygo continue else for hole in GI.gethole(poly) - for point in GI.getpoint(hole) + for point_idx in 1:GI.npoint(hole)-1 + point = GI.getpoint(hole, point_idx) xs[current_xy_index] = GI.x(point) ys[current_xy_index] = GI.y(point) current_xy_index += 1 end - part_node_count_vec[current_ring_index] = GI.npoint(hole) + part_node_count_vec[current_ring_index] = GI.npoint(hole)-1 interior_ring_vec[current_ring_index] = 1 current_ring_index += 1 end diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index e9b685bd9..9c76ae88c 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -52,18 +52,18 @@ current_geom_index = 1 start = 1 stop = part_node_count[1] rings = [node_coordinates[start:stop]] +push!(rings[end], node_coordinates[start]) # Assemble all rings -geoms = Vector{GI.MultiPolygon{2, Float64}}(undef, length(node_count)) - for i in 2:length(part_node_count) start = stop + 1 stop = start + part_node_count[i] - 1 push!(rings, node_coordinates[start:stop]) # Ensure rings are closed by adding the first point at the end - push!(rings[end], rings[end][1]) + push!(rings[end], node_coordinates[start]) end +geoms = Vector{GI.MultiPolygon{false, false}}(undef, length(node_count)) # Assemble multipolygons current_ring = 1 for (geom_idx, total_nodes) in enumerate(node_count) From 71700f53b44a7615d34545273c92d71d56141151 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 09:17:20 -0400 Subject: [PATCH 54/57] add some test files as text that can be fed into ncgen --- test/data/lines.ncgen | 51 +++++++++++++++++++++++++++++ test/data/multilines.ncgen | 60 ++++++++++++++++++++++++++++++++++ test/data/multipoints.ncgen | 49 ++++++++++++++++++++++++++++ test/data/multipolygons.ncgen | 61 +++++++++++++++++++++++++++++++++++ test/data/points.ncgen | 48 +++++++++++++++++++++++++++ 5 files changed, 269 insertions(+) create mode 100644 test/data/lines.ncgen create mode 100644 test/data/multilines.ncgen create mode 100644 test/data/multipoints.ncgen create mode 100644 test/data/multipolygons.ncgen create mode 100644 test/data/points.ncgen diff --git a/test/data/lines.ncgen b/test/data/lines.ncgen new file mode 100644 index 000000000..e277f32bc --- /dev/null +++ b/test/data/lines.ncgen @@ -0,0 +1,51 @@ +netcdf lines { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + int node_count(instance) ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; +} \ No newline at end of file diff --git a/test/data/multilines.ncgen b/test/data/multilines.ncgen new file mode 100644 index 000000000..4f1873185 --- /dev/null +++ b/test/data/multilines.ncgen @@ -0,0 +1,60 @@ +netcdf multilines { +dimensions: + node = 12 ; + instance = 2 ; + part = 4 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + float geometry_container ; + geometry_container:geometry_type = "line" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:grid_mapping = "datum" ; + geometry_container:coordinates = "lat lon" ; + geometry_container:part_node_count = "part_node_count" ; + geometry_container:interior_ring = "interior_ring" ; + int node_count(instance) ; + int part_node_count(part) ; + int interior_ring(part) ; + float datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:semi_major_axis = 6378137. ; + datum:inverse_flattening = 298.257223563 ; + datum:longitude_of_prime_meridian = 0. ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + x = 20, 10, 0, 5, 10, 15, 20, 10, 0, 50, 40, 30 ; + y = 0, 15, 0, 5, 10, 5, 20, 35, 20, 0, 15, 0 ; + lat = 25, 7 ; + lon = 10, 40 ; + node_count = 9, 3 ; + part_node_count = 3, 3, 3, 3 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + +} diff --git a/test/data/multipoints.ncgen b/test/data/multipoints.ncgen new file mode 100644 index 000000000..92ab1037c --- /dev/null +++ b/test/data/multipoints.ncgen @@ -0,0 +1,49 @@ +netcdf multipoints { +dimensions: + instance = 2 ; + node = 5 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "point" ; + geometry_container:node_coordinates = "x y" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + node_count = 3, 2 ; + x = 30, 10, 40, 50, 50 ; + y = 10, 30, 40, 60, 50 ; +} \ No newline at end of file diff --git a/test/data/multipolygons.ncgen b/test/data/multipolygons.ncgen new file mode 100644 index 000000000..599f4371e --- /dev/null +++ b/test/data/multipolygons.ncgen @@ -0,0 +1,61 @@ +netcdf multipolygons { +dimensions: + node = 12 ; + instance = 2 ; + part = 4 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + float geometry_container ; + geometry_container:geometry_type = "polygon" ; + geometry_container:node_count = "node_count" ; + geometry_container:node_coordinates = "x y" ; + geometry_container:grid_mapping = "datum" ; + geometry_container:coordinates = "lat lon" ; + geometry_container:part_node_count = "part_node_count" ; + geometry_container:interior_ring = "interior_ring" ; + int node_count(instance) ; + int part_node_count(part) ; + int interior_ring(part) ; + float datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:semi_major_axis = 6378137. ; + datum:inverse_flattening = 298.257223563 ; + datum:longitude_of_prime_meridian = 0. ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + x = 20, 10, 0, 5, 10, 15, 20, 10, 0, 50, 40, 30 ; + y = 0, 15, 0, 5, 10, 5, 20, 35, 20, 0, 15, 0 ; + lat = 25, 7 ; + lon = 10, 40 ; + node_count = 9, 3 ; + part_node_count = 3, 3, 3, 3 ; + interior_ring = 0, 1, 0, 0 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + +} diff --git a/test/data/points.ncgen b/test/data/points.ncgen new file mode 100644 index 000000000..1b8e56c9f --- /dev/null +++ b/test/data/points.ncgen @@ -0,0 +1,48 @@ +netcdf points { +dimensions: + instance = 2 ; + node = 2 ; + time = 4 ; +variables: + int time(time) ; + time:units = "days since 2000-01-01" ; + double lat(instance) ; + lat:units = "degrees_north" ; + lat:standard_name = "latitude" ; + lat:nodes = "y" ; + double lon(instance) ; + lon:units = "degrees_east" ; + lon:standard_name = "longitude" ; + lon:nodes = "x" ; + int datum ; + datum:grid_mapping_name = "latitude_longitude" ; + datum:longitude_of_prime_meridian = 0.0 ; + datum:semi_major_axis = 6378137.0 ; + datum:inverse_flattening = 298.257223563 ; + int geometry_container ; + geometry_container:geometry_type = "point" ; + geometry_container:node_coordinates = "x y" ; + double x(node) ; + x:units = "degrees_east" ; + x:standard_name = "longitude" ; + x:axis = "X" ; + double y(node) ; + y:units = "degrees_north" ; + y:standard_name = "latitude" ; + y:axis = "Y" ; + double someData(instance, time) ; + someData:coordinates = "time lat lon" ; + someData:grid_mapping = "datum" ; + someData:geometry = "geometry_container" ; +// global attributes: + :featureType = "timeSeries" ; +data: + time = 1, 2, 3, 4 ; + lat = 30, 50 ; + lon = 10, 60 ; + someData = + 1, 2, 3, 4, + 1, 2, 3, 4 ; + x = 30, 50; + y = 10, 60; +} \ No newline at end of file From eb6fa755b5a9f52e0978488946054f5091f2ae44 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 09:17:33 -0400 Subject: [PATCH 55/57] add decoders for all geomtraits --- src/geometry_lookup/io.jl | 206 ++++++++++++++++++++++++++++++++++---- 1 file changed, 187 insertions(+), 19 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index c147b2ceb..764a0fff4 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -1,17 +1,18 @@ +#= +# Geometry encoding and decoding from CF conventions - - -function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.LineStringTrait, <: GI.MultiLineStringTrait}}, geoms) +Encode functions will always return a named tuple with the standard +=# +function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) error("Not implemented yet") end -function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.PointTrait, <: GI.MultiPointTrait}}, geoms) + +function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) error("Not implemented yet") end - - -function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.PolygonTrait, <: GI.MultiPolygonTrait}}, geoms) +function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) ngeoms = length(geoms) nrings = GO.applyreduce(GI.nring, +, GI.PolygonTrait(), geoms; init = 0, threaded = false) @@ -43,7 +44,7 @@ function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.Polygo ys[current_xy_index] = GI.y(point) current_xy_index += 1 end - part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring) + # Main.@infiltrate part_node_count_vec[current_ring_index] = GI.npoint(exterior_ring)-1 interior_ring_vec[current_ring_index] = 0 current_ring_index += 1 @@ -65,9 +66,17 @@ function _geometry_cf_encode(::GeometryOpsCore.TraitTarget{<: Union{<: GI.Polygo end end end - - return xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec - + # Note: this does not encode the `lat` and `lon` variables that might hold a representative point of the polygon, like a centroid. + # The names in this named tuple are standard CF conventions. + # node_coordinates_x and node_coordinates_y are the coordinates of the nodes of the rings. + # but cf encodes them as a space separated string. That's the only difference. + return (; + node_coordinates_x = xs, + node_coordinates_y = ys, + node_count = node_count_vec, + part_node_count = part_node_count_vec, + interior_ring = interior_ring_vec + ) end #= @@ -84,17 +93,176 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension{<: GeometryLookup}) end =# -function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, xs, ys, node_count_vec, part_node_count_vec, interior_ring_vec) - current_xy_index = 1 - current_ring_index = 1 - current_geom_index = 1 - geoms = Vector{GO.GeometryBasics.MultiPolygon{2, Float64}}(undef, length(node_count_vec)) +function _geometry_cf_decode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, ds, geometry_container_attribs; crs = nothing) + # First of all, we assert certain things about the geometry container and what it has. + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + # only(CDM.dimnames(node_count_var)) != u_dim_name && throw(ArgumentError("node_count variable $u_dim_name does not match the unknown dimension $u_dim_name")) + + # Load and create all the data we need. + node_count = collect(node_count_var) + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + + # We can take a fast path for polygons, if we know that there are no multipart polygons. + if !haskey(geometry_container_attribs, "part_node_count") + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return map(node_count_starts, node_count_stops) do start, stop + GI.Polygon([GI.LinearRing(node_coordinates[start:stop]; crs)]; crs) + end + end + + + part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) + interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) + + # First, we assemble all the rings. That's the slightly complex part. + # After rings are assembled, we assemble the polygons and multipolygons from the rings. + + # Initialize variables for ring assembly + start = 1 + stop = part_node_count[1] + rings = [node_coordinates[start:stop]] + push!(rings[end], node_coordinates[start]) + + # Assemble all rings + for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(rings, node_coordinates[start:stop]) + # Ensure rings are closed by adding the first point at the end + push!(rings[end], node_coordinates[start]) + end + + # Now, we proceed to assemble the polygons and multipolygons from the rings. + # TODO: no better way to get the tuple type, at least for now. + _lr = GI.LinearRing([(0.0, 0.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0)]; crs) + _p = GI.Polygon([_lr]; crs) + _mp = GI.MultiPolygon([_p]; crs) + geoms = Vector{typeof(_mp)}(undef, length(node_count)) + # Assemble multipolygons + current_ring = 1 + for (geom_idx, total_nodes) in enumerate(node_count) + # Find all rings that belong to this polygon + polygon_rings = Tuple{typeof(_lr), Int8}[] + accumulated_nodes = 0 + + while current_ring <= length(part_node_count) && accumulated_nodes < total_nodes + ring = rings[current_ring] + push!(polygon_rings, (GI.LinearRing(ring; crs), interior_ring[current_ring])) + accumulated_nodes += part_node_count[current_ring] + current_ring += 1 + end + + # Create polygons from rings + polygons = typeof(_p)[] + current_exterior = nothing + current_holes = typeof(_lr)[] + + for (ring, is_interior) in polygon_rings + if is_interior == 0 + # If we have a previous exterior ring, create a polygon with it and its holes + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) + current_holes = typeof(_lr)[] + end + current_exterior = ring + else + push!(current_holes, ring) + end + end + + # Add the last polygon if we have an exterior ring + if !isnothing(current_exterior) + push!(polygons, GI.Polygon([current_exterior, current_holes...]; crs)) + end + + # Create multipolygon from all polygons + geoms[geom_idx] = GI.MultiPolygon(polygons; crs) + end + + return geoms + +end - for (i, npoints) in enumerate(node_count_vec) - this_geom_npoints = npoints - # this_geom_nholes = + +function _geometry_cf_decode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, ds, geometry_container_attribs; crs = nothing) + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + + # Load and create all the data we need. + node_count = collect(node_count_var) + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + + # we can use a fast path for lines, if we know that there are no multipart lines. + if !haskey(geometry_container_attribs, "part_node_count") + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return GI.LineString.(getindex.((node_coordinates,), (:).(node_count_starts, node_count_stops)); crs) end + + # otherwise, we need to decode the multipart lines. + part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) + + # Initialize variables for line assembly + start = 1 + stop = part_node_count[1] + lines = [node_coordinates[start:stop]] + + # Assemble all lines + for i in 2:length(part_node_count) + start = stop + 1 + stop = start + part_node_count[i] - 1 + push!(lines, node_coordinates[start:stop]) + end + + # Now assemble the multilinestrings + _ls = GI.LineString(node_coordinates[1:2]; crs) + _mls = GI.MultiLineString([_ls]; crs) + geoms = Vector{typeof(_mls)}(undef, length(node_count)) + + # Assemble multilinestrings + current_line = 1 + for (geom_idx, total_nodes) in enumerate(node_count) + # Find all lines that belong to this multilinestring + multilinestring_lines = typeof(_ls)[] + accumulated_nodes = 0 + + while current_line <= length(part_node_count) && accumulated_nodes < total_nodes + line = lines[current_line] + push!(multilinestring_lines, GI.LineString(line; crs)) + accumulated_nodes += part_node_count[current_line] + current_line += 1 + end + + # Create multilinestring from all lines + geoms[geom_idx] = GI.MultiLineString(multilinestring_lines; crs) + end + + return geoms end +function _geometry_cf_decode(::Union{GI.PointTrait, GI.MultiPointTrait}, ds, geometry_container_attribs; crs = nothing) + + node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) + # We can take a fast path for points, if we know that there are no multipoints + if haskey(geometry_container_attribs, "node_count") + @assert haskey(ds, geometry_container_attribs["node_count"]) + node_count_var = ds[geometry_container_attribs["node_count"]] + node_count = collect(node_count_var) + if !all(==(1), node_count) + # we have multipoints + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return map(node_count_starts, node_count_stops) do start, stop + GI.MultiPoint(node_coordinates[start:stop]; crs) + end + end + end + + # finally, if we have no node count, or all node counts are 1, we just return the points + return GI.Point.(node_coordinates; crs) + +end \ No newline at end of file From e9f5186fceaa3c83bdcee1a48b038fcfffaf7379 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 09:17:47 -0400 Subject: [PATCH 56/57] use the test files in nc_io.jl which should now be a test file --- src/geometry_lookup/nc_io.jl | 140 +++++++++++++++++------------------ 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index 9c76ae88c..6fc16d50c 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -2,9 +2,19 @@ using NCDatasets import NCDatasets.CommonDataModel as CDM import Rasters +import GeoInterface as GI + + +import NCDatasets: NetCDF_jll + +# generate dataset +output_path = tempname() * ".nc" +testfile = "multipolygons.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + -ds = NCDataset("/Users/anshul/i.nc") var = ds["someData"] knowndims = Rasters._dims(var) @@ -34,74 +44,64 @@ haskey(geometry_container_attribs, "geometry_type") && geometry_container_attribs["geometry_type"] == "polygon" || throw(ArgumentError("We only support polygon geometry types at this time, got $geometry_type")) -@assert haskey(ds, geometry_container_attribs["node_count"]) -node_count_var = ds[geometry_container_attribs["node_count"]] -only(CDM.dimnames(node_count_var)) != u_dim_name && throw(ArgumentError("node_count variable $u_dim_name does not match the unknown dimension $u_dim_name")) - -node_count = collect(node_count_var) -node_coordinates = collect(zip(getindex.((ds,), split(geometry_container_attribs["node_coordinates"], " "))...)) -part_node_count = collect(ds[geometry_container_attribs["part_node_count"]]) -interior_ring = collect(ds[geometry_container_attribs["interior_ring"]]) - -current_xy_index = 1 -current_ring_index = 1 -current_geom_index = 1 - - -# Initialize variables for ring assembly -start = 1 -stop = part_node_count[1] -rings = [node_coordinates[start:stop]] -push!(rings[end], node_coordinates[start]) - -# Assemble all rings -for i in 2:length(part_node_count) - start = stop + 1 - stop = start + part_node_count[i] - 1 - push!(rings, node_coordinates[start:stop]) - # Ensure rings are closed by adding the first point at the end - push!(rings[end], node_coordinates[start]) -end +geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) -geoms = Vector{GI.MultiPolygon{false, false}}(undef, length(node_count)) -# Assemble multipolygons -current_ring = 1 -for (geom_idx, total_nodes) in enumerate(node_count) - # Find all rings that belong to this polygon - polygon_rings = [] - accumulated_nodes = 0 - - while current_ring <= length(part_node_count) && accumulated_nodes < total_nodes - ring = rings[current_ring] - push!(polygon_rings, (GI.LinearRing(ring), interior_ring[current_ring])) - accumulated_nodes += part_node_count[current_ring] - current_ring += 1 - end - - # Create polygons from rings - polygons = GI.Polygon[] - current_exterior = nothing - current_holes = GI.LinearRing[] - - for (ring, is_interior) in polygon_rings - if is_interior == 0 - # If we have a previous exterior ring, create a polygon with it and its holes - if !isnothing(current_exterior) - push!(polygons, GI.Polygon([current_exterior, current_holes...])) - current_holes = GI.LinearRing[] - end - current_exterior = ring - else - push!(current_holes, ring) - end - end - - # Add the last polygon if we have an exterior ring - if !isnothing(current_exterior) - push!(polygons, GI.Polygon([current_exterior, current_holes...])) - end - - # Create multipolygon from all polygons - geoms[geom_idx] = GI.MultiPolygon(polygons) -end + +# lines now + +output_path = tempname() * ".nc" +testfile = "lines.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) + + +output_path = tempname() * ".nc" +testfile = "multilines.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +geoms = Rasters._geometry_cf_decode(GI.MultiLineStringTrait(), ds, geometry_container_attribs) + + +output_path = tempname() * ".nc" +testfile = "multipoints.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +geoms = Rasters._geometry_cf_decode(GI.MultiPointTrait(), ds, geometry_container_attribs) + + + +output_path = tempname() * ".nc" +testfile = "points.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) + +var = ds["someData"] +geometry_container_varname = CDM.attribs(var)["geometry"] +geometry_container_var = ds[geometry_container_varname] + +geometry_container_attribs = CDM.attribs(geometry_container_var) + +geoms = Rasters._geometry_cf_decode(GI.PointTrait(), ds, geometry_container_attribs) \ No newline at end of file From e94b28bc8d4c2853f3151ca2d3e29e4bfef62ff6 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Sat, 3 May 2025 10:01:04 -0400 Subject: [PATCH 57/57] fully implement, and "test" all six geom types --- src/geometry_lookup/io.jl | 86 +++++++++++++++++++++++++++++++----- src/geometry_lookup/nc_io.jl | 65 ++++++++++++++++++--------- test/data/multipoints.ncgen | 2 + 3 files changed, 122 insertions(+), 31 deletions(-) diff --git a/src/geometry_lookup/io.jl b/src/geometry_lookup/io.jl index 764a0fff4..4986f99db 100644 --- a/src/geometry_lookup/io.jl +++ b/src/geometry_lookup/io.jl @@ -4,12 +4,69 @@ Encode functions will always return a named tuple with the standard =# function _geometry_cf_encode(::Union{GI.PointTrait, GI.MultiPointTrait}, geoms) - error("Not implemented yet") + if all(x -> GI.trait(x) isa GI.PointTrait, geoms) + return (; + node_coordinates_x = GI.x.(geoms), + node_coordinates_y = GI.y.(geoms), + ) + end + # else: we have some multipolygons in here + npoints = sum(GI.npoint, geoms) + flat_xs = Vector{Float64}(undef, npoints) + flat_ys = Vector{Float64}(undef, npoints) + + i::Int = 0 + # flatten is guaranteed to iterate in order. + flattener = GO.flatten(GI.PointTrait, geoms) do point + i += 1 + flat_xs[i] = GI.x(point) + flat_ys[i] = GI.y(point) + end + # iterate over flattener to populate the arrays, + # without allocating an extra array. + foreach(identity, flattener) + + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + node_count = GI.npoint.(geoms) + ) end function _geometry_cf_encode(::Union{GI.LineStringTrait, GI.MultiLineStringTrait}, geoms) - error("Not implemented yet") + # There is a fast path without encoding part_node_count if all geoms are linestrings. + npoints = sum(GI.npoint, geoms) + flat_xs = Vector{Float64}(undef, npoints) + flat_ys = Vector{Float64}(undef, npoints) + + i::Int = 0 + # flatten is guaranteed to iterate in order. + flattener = GO.flatten(GI.PointTrait, geoms) do point + i += 1 + flat_xs[i] = GI.x(point) + flat_ys[i] = GI.y(point) + end + # iterate over flattener to populate the arrays, + # without allocating an extra array. + foreach(identity, flattener) + + # If all geoms are linestrings, we can take a fast path. + if all(x -> GI.trait(x) isa GI.LineStringTrait, geoms) + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + node_count = GI.npoint.(geoms) + ) + end + + # Otherwise, we need to encode part_node_count for multilinestrings. + return (; + node_coordinates_x = flat_xs, + node_coordinates_y = flat_ys, + part_node_count = collect(GO.flatten(GI.npoint, GI.LineStringTrait, geoms)), + node_count = GI.npoint.(geoms) + ) end function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geoms) @@ -33,7 +90,10 @@ function _geometry_cf_encode(::Union{GI.PolygonTrait, GI.MultiPolygonTrait}, geo for (i, geom) in enumerate(geoms) this_geom_npoints = GI.npoint(geom) - node_count_vec[i] = this_geom_npoints + # Bear in mind, that the last point (which == first point) + # of the linear ring is removed when encoding, so not included + # in the node count. + node_count_vec[i] = this_geom_npoints - GI.nring(geom) # push individual components of the ring for poly in GO.flatten(GI.PolygonTrait, geom) @@ -252,13 +312,19 @@ function _geometry_cf_decode(::Union{GI.PointTrait, GI.MultiPointTrait}, ds, geo @assert haskey(ds, geometry_container_attribs["node_count"]) node_count_var = ds[geometry_container_attribs["node_count"]] node_count = collect(node_count_var) - if !all(==(1), node_count) - # we have multipoints - node_count_stops = cumsum(node_count) - node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] - return map(node_count_starts, node_count_stops) do start, stop - GI.MultiPoint(node_coordinates[start:stop]; crs) - end + # The code below could be a fast path, but we don't want + # to arbitrarily change the output type of the decoder. + # MultiPoints should always roundtrip and write as multipoints. + # if !all(==(1), node_count) + # do nothing + # else + # return a fast path + # end + # we have multipoints + node_count_stops = cumsum(node_count) + node_count_starts = [1, node_count_stops[1:end-1] .+ 1...] + return map(node_count_starts, node_count_stops) do start, stop + GI.MultiPoint(node_coordinates[start:stop]; crs) end end diff --git a/src/geometry_lookup/nc_io.jl b/src/geometry_lookup/nc_io.jl index 6fc16d50c..d5ebc7c20 100644 --- a/src/geometry_lookup/nc_io.jl +++ b/src/geometry_lookup/nc_io.jl @@ -1,20 +1,13 @@ using NCDatasets import NCDatasets.CommonDataModel as CDM +import NCDatasets.NetCDF_jll + +using Test import Rasters import GeoInterface as GI - -import NCDatasets: NetCDF_jll - -# generate dataset -output_path = tempname() * ".nc" -testfile = "multipolygons.ncgen" -run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) -ds = NCDataset(output_path) - - - +#= var = ds["someData"] knowndims = Rasters._dims(var) @@ -35,6 +28,13 @@ has_geometry = haskey(CDM.attribs(var), "geometry") if !has_geometry throw(ArgumentError("Variable $u_dim_name does not have a geometry attribute")) end +=# + +# generate dataset +output_path = tempname() * ".nc" +testfile = "multipolygons.ncgen" +run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) +ds = NCDataset(output_path) geometry_container_varname = CDM.attribs(var)["geometry"] geometry_container_var = ds[geometry_container_varname] @@ -46,7 +46,13 @@ throw(ArgumentError("We only support polygon geometry types at this time, got $g geoms = Rasters._geometry_cf_decode(GI.PolygonTrait(), ds, geometry_container_attribs) +encoded = Rasters._geometry_cf_encode(GI.PolygonTrait(), geoms) +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test encoded.part_node_count == ds["part_node_count"] +@test encoded.interior_ring == ds["interior_ring"] # lines now @@ -54,29 +60,39 @@ output_path = tempname() * ".nc" testfile = "lines.ncgen" run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) ds = NCDataset(output_path) - var = ds["someData"] geometry_container_varname = CDM.attribs(var)["geometry"] geometry_container_var = ds[geometry_container_varname] - geometry_container_attribs = CDM.attribs(geometry_container_var) - geoms = Rasters._geometry_cf_decode(GI.LineStringTrait(), ds, geometry_container_attribs) +encoded = Rasters._geometry_cf_encode(GI.LineStringTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] +@test !hasproperty(encoded, :part_node_count) + output_path = tempname() * ".nc" testfile = "multilines.ncgen" run(`$(NetCDF_jll.ncgen) -k nc4 -b -o $output_path $(joinpath(dirname(dirname(Base.pathof(Rasters))), "test", "data", testfile))`) ds = NCDataset(output_path) - var = ds["someData"] geometry_container_varname = CDM.attribs(var)["geometry"] geometry_container_var = ds[geometry_container_varname] - geometry_container_attribs = CDM.attribs(geometry_container_var) - geoms = Rasters._geometry_cf_decode(GI.MultiLineStringTrait(), ds, geometry_container_attribs) +encoded = Rasters._geometry_cf_encode(GI.MultiLineStringTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.part_node_count == ds["part_node_count"] +@test encoded.node_count == ds["node_count"] + + + output_path = tempname() * ".nc" testfile = "multipoints.ncgen" @@ -86,11 +102,14 @@ ds = NCDataset(output_path) var = ds["someData"] geometry_container_varname = CDM.attribs(var)["geometry"] geometry_container_var = ds[geometry_container_varname] - geometry_container_attribs = CDM.attribs(geometry_container_var) - geoms = Rasters._geometry_cf_decode(GI.MultiPointTrait(), ds, geometry_container_attribs) +encoded = Rasters._geometry_cf_encode(GI.MultiPointTrait(), geoms) + +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test encoded.node_count == ds["node_count"] output_path = tempname() * ".nc" @@ -101,7 +120,11 @@ ds = NCDataset(output_path) var = ds["someData"] geometry_container_varname = CDM.attribs(var)["geometry"] geometry_container_var = ds[geometry_container_varname] - geometry_container_attribs = CDM.attribs(geometry_container_var) +geoms = Rasters._geometry_cf_decode(GI.PointTrait(), ds, geometry_container_attribs) + +encoded = Rasters._geometry_cf_encode(GI.PointTrait(), geoms) -geoms = Rasters._geometry_cf_decode(GI.PointTrait(), ds, geometry_container_attribs) \ No newline at end of file +@test encoded.node_coordinates_x == ds["x"] +@test encoded.node_coordinates_y == ds["y"] +@test !hasproperty(encoded, :node_count) \ No newline at end of file diff --git a/test/data/multipoints.ncgen b/test/data/multipoints.ncgen index 92ab1037c..5eff70acf 100644 --- a/test/data/multipoints.ncgen +++ b/test/data/multipoints.ncgen @@ -19,9 +19,11 @@ variables: datum:longitude_of_prime_meridian = 0.0 ; datum:semi_major_axis = 6378137.0 ; datum:inverse_flattening = 298.257223563 ; + int node_count(instance) ; int geometry_container ; geometry_container:geometry_type = "point" ; geometry_container:node_coordinates = "x y" ; + geometry_container:node_count = "node_count" ; double x(node) ; x:units = "degrees_east" ; x:standard_name = "longitude" ;