Skip to content

Commit 09115a8

Browse files
committed
Masks weren't being defined for short static length single loops.
1 parent 8679bf4 commit 09115a8

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <elrodc@gmail.com>"]
4-
version = "0.8.18"
4+
version = "0.8.19"
55

66
[deps]
77
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"

src/lowering.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
322322
end
323323
function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask::Bool)
324324
UF = unrollfactor(us, n)
325-
UF == 1 && return lower_no_unroll(ls, us, n, inclmask)
325+
isone(UF) && return lower_no_unroll(ls, us, n, inclmask)
326326
@unpack u₁loopnum, vectorizedloopnum, u₁, u₂ = us
327327
order = names(ls)
328328
loopsym = order[n]
@@ -354,6 +354,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
354354
q = Expr(:while, tc, body)
355355
end
356356
remblock = Expr(:block)
357+
(nisvectorized && (UFt > 0) && isone(num_loops(ls))) && push!(remblock.args, definemask(loop))
357358
else
358359
remblock = init_remblock(loop, ls.lssm[], n)#loopsym)
359360
q = Expr(:while, tc, body)

test/copy.jl

+10-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,13 @@ using LoopVectorization, OffsetArrays, Test
108108
end
109109
B
110110
end
111-
111+
function copy3!(B, A)
112+
@assert (length(B) 3) && (length(A) 3)
113+
@avx for i in 1:3
114+
B[i] = A[i]
115+
end
116+
B
117+
end
112118

113119
for T (Float32, Float64, Int32, Int64)
114120
@show T, @__LINE__
@@ -179,5 +185,8 @@ using LoopVectorization, OffsetArrays, Test
179185

180186
@test reversecopy1!(zeros(T, 10), collect(1:10)) == reversecopy1avx!(zeros(T, 10), collect(1:10))
181187
@test reversecopy2!(zeros(T, 10), OffsetArray(collect(1:10), -10:-1)) == reversecopy2avx!(zeros(T, 10), OffsetArray(collect(1:10), -10:-1))
188+
189+
x = rand(R, 3); y = similar(x);
190+
@test copy3!(y, x) == x
182191
end
183192
end

0 commit comments

Comments
 (0)