Skip to content

Breaking: more consistent (dis)aggregation of lookups #914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
62 changes: 33 additions & 29 deletions src/methods/aggregate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,18 @@
aggregate(method, l::Lookup, scale::Colon) = aggregate(method, l, length(l))
aggregate(method, l::Lookup, scale::Nothing) = aggregate(method, l, 1)
function aggregate(method, l::Lookup, scale::Int)
start, stop = _endpoints(method, l, scale)
if issampled(l) && isordered(l)
newl = l[start:scale:stop]
sp = aggregate(method, span(l), scale)
return rebuild(newl; span=sp)
if issampled(l) && isordered(l) && isregular(l)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't aggregating Irregular ok in some cases?

Like its a bit wrong to do with mean but fine with sum?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in lookup aggregation. How should we aggregate irregular lookups?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just like they were in the past... Start and end locus are especially simple. It's just dropping the other values.

Center is a bit more complicated, I guess the middle of the aggregated range?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an extra check for regular - so for irregular lookups it works basically like it used to.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we might want the old behaviour for point lookups as well - I'm not sure

start, stop = _endpoints(method, l, scale)
sp = aggregate(span(l), scale)
return rebuild(l; data = start:val(sp):stop, span=sp)
else
# Categorical and Unordered lookups are just broken
# by aggregate, so use NoLookup
return NoLookup(Base.OneTo(length(start:scale:stop)))
return NoLookup(Base.OneTo(length(l) ÷ scale))
end
end
aggregate(method, span::Span, scale) = span
aggregate(method, span::Regular, scale) = Regular(val(span) * scale)
aggregate(span::Span, scale) = span

Check warning on line 119 in src/methods/aggregate.jl

View check run for this annotation

Codecov / codecov/patch

src/methods/aggregate.jl#L119

Added line #L119 was not covered by tests
aggregate(span::Regular, scale) = Regular(val(span) * scale)

"""
aggregate!(method, dst::AbstractRaster, src::AbstractRaster, scale; skipmissing=false)
Expand Down Expand Up @@ -149,7 +148,9 @@
)
comparedims(dst, src; length=false)
intscale = _scale2int(Ag(), dims(src), scale; verbose)
offsets = _agoffset.(loci, intscale)
# offsets determines which cell within each window is used -
# if picking the center locus with uneven intscale just round up
offsets = ceil.(Int, _agoffset.(loci, (ForwardOrdered(),), intscale))
# Cache the source if its a disk array
src1 = isdisk(src) ? DiskArrays.cache(src) : src
# Broadcast will make the dest arrays chunks when needed
Expand Down Expand Up @@ -262,20 +263,20 @@
function disaggregate(dim::Dimension, scale)
rebuild(dim, disaggregate(locus, lookup(dim), scale))
end
function disaggregate(lookup::Lookup, scale)
intscale = _scale2int(DisAg(), lookup, scale)
intscale == 1 && return lookup
function disaggregate(l::Lookup, scale)
intscale = _scale2int(DisAg(), l, scale)
intscale == 1 && return l

len = length(lookup) * intscale
step_ = step(lookup) / intscale
start = lookup[1] - _agoffset(Start(), intscale) * step_
len = length(l) * intscale
step_ = step(l) / intscale
start = first(l) - _agoffset(l, intscale) * step_
stop = start + (len - 1) * step_
index = LinRange(start, stop, len)
if lookup isa AbstractSampled
sp = disaggregate(locus, span(lookup), intscale)
return rebuild(lookup; data=index, span=sp)
if l isa AbstractSampled
sp = disaggregate(locus, span(l), intscale)
rebuild(l; data=index, span=sp)
else
return rebuild(lookup; data=index)
rebuild(l; data=index)

Check warning on line 279 in src/methods/aggregate.jl

View check run for this annotation

Codecov / codecov/patch

src/methods/aggregate.jl#L279

Added line #L279 was not covered by tests
end
end

Expand Down Expand Up @@ -428,17 +429,20 @@
@inline _scale2int(::DisAg, l::Lookup, scale::Int) = scale

_agoffset(locus::Locus, l::Lookup, scale::Int) = _agoffset(locus, scale)
_agoffset(method, l::Lookup, scale::Int) = _agoffset(locus(l), scale)
_agoffset(method, l::Lookup, scale::Int) = _agoffset(l, scale)

Check warning on line 432 in src/methods/aggregate.jl

View check run for this annotation

Codecov / codecov/patch

src/methods/aggregate.jl#L432

Added line #L432 was not covered by tests
_agoffset(l::Lookup, scale::Int) = _agoffset(locus(l), order(l), scale)
_agoffset(x, scale::Colon) = 0
_agoffset(locus::Start, scale::Int) = 0
_agoffset(locus::End, scale::Int) = scale - 1
_agoffset(locus::Center, scale::Int) = scale ÷ 2

_endpoints(method, l::Lookup, scale::Colon) = firstindex(l), lastindex(l)
_endpoints(method, l::Lookup, scale::Nothing) = firstindex(l), lastindex(l)
function _endpoints(method, l::Lookup, scale::Int)
start = firstindex(l) + _agoffset(method, l, scale)
stop = (length(l) ÷ scale) * scale
_agoffset(locus::Start, ::ForwardOrdered, scale::Int) = 0

Check warning on line 435 in src/methods/aggregate.jl

View check run for this annotation

Codecov / codecov/patch

src/methods/aggregate.jl#L435

Added line #L435 was not covered by tests
_agoffset(locus::End, ::ForwardOrdered, scale::Int) = scale - 1
_agoffset(locus::Start, ::ReverseOrdered, scale::Int) = scale - 1
_agoffset(locus::End, ::ReverseOrdered, scale::Int) = 0

Check warning on line 438 in src/methods/aggregate.jl

View check run for this annotation

Codecov / codecov/patch

src/methods/aggregate.jl#L438

Added line #L438 was not covered by tests
_agoffset(locus::Center, ::Ordered, scale::Int) = (scale-1)/2

_endpoints(_, l::Lookup, scale::Int) = _endpoints(locus(l), l, scale)
function _endpoints(locus::Locus, l::Lookup, scale::Int)
offset = step(l)*_agoffset(locus, order(l), scale)
start = first(l) + offset
stop = l[(length(l) ÷ scale)*scale] + offset
return start, stop
end

Expand Down
34 changes: 26 additions & 8 deletions test/aggregate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,32 @@ series = RasterSeries([stack1, stack2], (Ti(dates),))
lat = Y(Sampled(LinRange(3, 13, 6), ForwardOrdered(), Regular(2.0), Intervals(Start()), NoMetadata()))
aglat = aggregate(Start(), lat, 3)
@test span(lookup(aglat)) == Regular(6.0)
@test disaggregate(Start(), aglat, 3) == lat
@test disaggregate(aglat, 3) == lat

aglon = aggregate(Start(), dimz[1], 3)
aglon = aggregate(Center(), dimz[1], 3)
@test step(lookup(aglon)) === 30.0
@test val(aglon) == [30.0]
disaglon = disaggregate(Start(), aglon, 3)
@test val(aglon) == [40.0]
disaglon = disaggregate(aglon, 3)
@test index(disaglon) == index(dimz[1])
@test span(disaglon) == span(dimz[1])
@test sampling(disaglon) == sampling(dimz[1])

aglat = aggregate(Start(), dimz[2], 3)
aglat = aggregate(Center(), dimz[2], 3)
@test step(lookup(aglat)) === 15.0
@test index(aglat) == LinRange(-10.0, 5.0, 2)
disaglat = disaggregate(Start(), aglat, 3)
@test index(aglat) == LinRange(-5.0, 10.0, 2)
disaglat = disaggregate(aglat, 3)
# The last item is lost due to rounding in `aggregate`
@test index(disaglat) != index(dimz[2])
@test index(disaglat) === LinRange(-10.0, 15.0, 6)
@test span(disaglat) == span(dimz[2])
@test sampling(disaglat) == sampling(dimz[2])

# Disaggregation preserves extent of the original dimension
lat2 = shiftlocus(Center(), lat)
disaglat2 = disaggregate(lat2, 2)
lat3 = shiftlocus(End(), lat)
disaglat3 = disaggregate(lat3, 2)
@test bounds(lat2) == bounds(disaglat2) == bounds(disaglat3)
end

@testset "aggregate a single dim" begin
Expand Down Expand Up @@ -207,7 +214,7 @@ end
end

@testset "Aggregate different index lookups" begin
dimz = Y([1, 3, 2]), Dim{:category}([:a, :b, :c]), X([10, 20, 30, 40])
dimz = Y([1, 3, 2]), Dim{:category}([:a, :b, :c]), X([10, 20, 30, 40]; span = Regular(10))
a1 = [1 2 3; 4 5 6; 7 8 9]
A = cat(a1, a1 .+ 10, a1 .+ 20, a1 .+ 30, dims=3)
da = Raster(A, dimz)
Expand Down Expand Up @@ -243,3 +250,14 @@ end
@test eager_disag_series == lazy_disag_series
@test all(x -> all(x -> x isa SubArray, parent(x)), lazy_disag_series)
end

@testset "(Dis)aggregating preserves extent" begin
for data in (5:10:115, 115:-10:5)
for interval in (Start(), Center(), End())
x = X(Sampled(data; sampling = Intervals(interval))) |> Rasters.format
xag = aggregate(sum, x, 3)
xdisag = disaggregate(xag, 3)
@test Rasters.bounds(x) == Rasters.bounds(xag) == Rasters.bounds(xdisag)
end
end
end
Loading