Skip to content

Commit acf388a

Browse files
committed
Renamed benchmarks -> benchmark, and made tweaks, including finally fixing and testing LowDimArray.
1 parent 20544db commit acf388a

13 files changed

+68
-18
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <elrodc@gmail.com>"]
4-
version = "0.3.3"
4+
version = "0.3.4"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
File renamed without changes.

benchmark/benchmarks.jl

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
using BenchmarkTools
3+
4+
suite = BenchmarkGroup()
5+
suite["linalg"] = BenchmarkGroup(["matmul","dot"])
6+
7+
include(joinpath(@__DIR__, "looptests.jl"))
8+
9+
for n 1:256
10+
11+
end
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

src/broadcast.jl

+8-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,14 @@ end
8888
struct LowDimArray{D,T,N,A<:DenseArray{T,N}} <: DenseArray{T,N}
8989
data::A
9090
end
91-
@inline Base.pointer(A::LowDimArray) = pointer(A)
91+
@inline Base.pointer(A::LowDimArray) = pointer(A.data)
92+
Base.size(A::LowDimArray) = Base.size(A.data)
93+
@generated function VectorizationBase.stridedpointer(A::LowDimArray{D,T,N}) where {D,T,N}
94+
s = Expr(:tuple, [Expr(:ref, :strideA, n) for n 1+D[1]:N if D[n]]...)
95+
f = D[1] ? :PackedStridedPointer : :SparseStridedPointer
96+
Expr(:block, Expr(:meta,:inline), Expr(:(=), :strideA, Expr(:call, :strides, Expr(:(.), :A, QuoteNode(:data)))),
97+
Expr(:call, Expr(:(.), :VectorizationBase, QuoteNode(f)), Expr(:call, :pointer, Expr(:(.), :A, QuoteNode(:data))), s))
98+
end
9299
function LowDimArray{D}(data::A) where {D,T,N,A <: AbstractArray{T,N}}
93100
LowDimArray{D,T,N,A}(data)
94101
end

src/determinestrategy.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ function unroll_no_reductions(ls, order, vectorized, Wshift, size_T)
124124
end
125125
end
126126
# heuristic guess
127-
round(Int, (compute_rt + load_rt + 1) / compute_rt)
127+
# @show compute_rt, load_rt
128+
min(2, round(Int, (compute_rt + load_rt + 1) / compute_rt))
128129
end
129130
function determine_unroll_factor(
130131
ls::LoopSet, order::Vector{Symbol}, unrolled::Symbol, vectorized::Symbol = first(order)

src/lowering.jl

+25-13
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ function lower_store_vectorized!(
278278
for u 0:U-1
279279
name, mo = name_mo(var, op, u, W, vecnotunrolled, unrolled)
280280
instrcall = Expr(:call,lv(:vstore!), ptr, name, mo)
281+
# @show mask, vecnotunrolled, u, U
281282
if mask !== nothing && (vecnotunrolled || u == U - 1)
282283
push!(instrcall.args, mask)
283284
end
@@ -500,6 +501,7 @@ function lower_nest(
500501
nisvectorized = loopsym === vectorized
501502
nisunrolled = false
502503
nistiled = false
504+
# @show n, mask
503505
if istiled
504506
if n == nloops
505507
loopsym = tiledsym(loopsym)
@@ -571,12 +573,6 @@ function add_vec_rem_iter(
571573
)
572574
loopq = lower_nest(ls, n, vectorized, U, T, loopqold, loopstart, W, nothing, Uexprtype)
573575
if order[n] === vectorized
574-
vecloop = ls.loops[vectorized]
575-
comparison = if vecloop.hintexact
576-
Expr(:call, :(!=), vectorized, vecloop.rangehint)
577-
else
578-
Expr(:call, :(!=), vectorized, vecloop.rangesym)
579-
end
580576
loopq = Expr(
581577
:block, loopq,
582578
lower_nest(ls, n, vectorized, U, T, loopqold, loopstart, W, Symbol("##mask##"), :if)
@@ -586,15 +582,28 @@ function add_vec_rem_iter(
586582
end
587583
function lower_set(ls::LoopSet, vectorized::Symbol, U::Int, T::Int, W::Symbol, ::Nothing, Uexprtype::Symbol)
588584
# @show U, T, W
585+
loopstart = 0
589586
istiled = T != -1
590587
order = names(ls)
591588
unrolled = order[end - istiled]
592589
unrolled === vectorized && return lower_set_unrolled_is_vectorized(ls, vectorized, U, T, W, nothing, Uexprtype)
590+
ns = 1
593591
nl = num_loops(ls) - istiled
594-
loopq = add_vec_rem_iter( ls, 1, vectorized, U, T, nothing, 0, W, nl == 1 ? Uexprtype : :while, order )
595-
for n 2:nl
592+
exprtype = nl == ns ? Uexprtype : :while
593+
nomaskq = lower_nest(ls, 1, vectorized, U, T, nothing, loopstart, W, nothing, exprtype)
594+
maskq = lower_nest(ls, 1, vectorized, U, T, nothing, loopstart, W, Symbol("##mask##"), order[ns] === vectorized ? :if : exprtype)
595+
while order[ns] !== vectorized
596+
ns += 1
597+
exprtype = nl == ns ? Uexprtype : :while
598+
nomaskq = lower_nest(ls, ns, vectorized, U, T, nomaskq, loopstart, W, nothing, exprtype)
599+
maskq = lower_nest(ls, ns, vectorized, U, T, maskq, loopstart, W, Symbol("##mask##"), order[ns] === vectorized ? :if : exprtype)
600+
end
601+
ns += 1
602+
loopq = Expr(:block, nomaskq, maskq)
603+
for n ns:nl
596604
exprtype = n == nl ? Uexprtype : :while
597-
loopq = add_vec_rem_iter( ls, n, vectorized, U, T, loopq, 0, W, exprtype, order )
605+
loopq = lower_nest(ls, n, vectorized, U, T, loopq, loopstart, W, nothing, exprtype)
606+
# loopq = add_vec_rem_iter( ls, n, vectorized, U, T, loopq, 0, W, exprtype, order )
598607
end
599608
loopq
600609
end
@@ -782,6 +791,7 @@ function lower_unrolled_dynamic!(
782791
Ureduct = -1
783792
end
784793
Ut = U
794+
vecisunrolled = unrolled === vectorized
785795
local remblock::Expr
786796
firstiter = true
787797
while true
@@ -792,13 +802,13 @@ function lower_unrolled_dynamic!(
792802
end
793803
push!(q.args, loopq)
794804
elseif U == 1 #
795-
if unrolled === vectorized
805+
if vecisunrolled
796806
push!(remblock.args, lower_set(ls, vectorized, Ut, T, W, Symbol("##mask##"), :block))
797807
else
798808
push!(remblock.args, lower_set(ls, vectorized, Ut, T, W, nothing, :block))
799809
end
800810
else
801-
remblocknew = if unrolled === vectorized
811+
remblocknew = if vecisunrolled
802812
itercount = if unrolledloop.hintexact
803813
Expr(:call, :-, unrolledloop.rangehint, Expr(:call, lv(:valmuladd), W, Ut, 1))
804814
else
@@ -813,12 +823,12 @@ function lower_unrolled_dynamic!(
813823
Expr(:call, :>, unrolled, Expr(:call, :-, unrolled_numitersym, Ut + 1))
814824
end
815825
Expr(Ut == 1 ? :if : :elseif, comparison, lower_set(ls, vectorized, Ut, T, W, nothing, :block))
826+
# Expr(Ut == 1 ? :if : :elseif, comparison, lower_set(ls, vectorized, Ut, T, W, Symbol("##mask##"), :block))
816827
end
817828
push!(remblock.args, remblocknew)
818829
remblock = remblocknew
819830
end
820-
if Ut == U || Ut == Ureduct
821-
firstiter || break
831+
if firstiter
822832
firstiter = false
823833
if manageouterreductions && Ureduct < U
824834
Udiff = U - Ureduct
@@ -834,6 +844,8 @@ function lower_unrolled_dynamic!(
834844
end
835845
remblock = Expr(:block)
836846
push!(q.args, Expr(:if, comparison, remblock))
847+
elseif !(Ut < U - 1 + vecisunrolled) || Ut == Ureduct
848+
break
837849
else
838850
Ut += 1
839851
end

test/runtests.jl

+21-2
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@ using LinearAlgebra
365365
B[j,i] = A[j,i] - x[j]
366366
end)
367367
lssubcol = LoopVectorization.LoopSet(subcolq);
368-
@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], :j, 4, -1)
368+
# @test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], :j, 4, -1)
369+
@test LoopVectorization.choose_order(lssubcol) == (Symbol[:j,:i], :j, 2, -1)
369370
## @avx is SLOWER!!!!
370371
## need to fix!
371372
function mysubcol!(B, A, x)
@@ -465,8 +466,25 @@ end
465466
M, N = 37, 47
466467
# M = 77;
467468
for T (Float32, Float64)
468-
a = rand(T, M); B = rand(T, M, N); c = rand(T, N); c′ = c';
469469

470+
a = rand(T,100,100,100);
471+
b = rand(T,100,100,1);
472+
bl = LowDimArray{(true,true,false)}(b);
473+
br = reshape(b, (100,100));
474+
c1 = a .+ b;
475+
c2 = @avx a .+ bl;
476+
@test c1 c2
477+
fill!(c2, 99999.9);
478+
@avx c2 .= a .+ br;
479+
@test c1 c2
480+
br = reshape(b, (100,1,100));
481+
bl = LowDimArray{(true,false,true)}(br);
482+
@. c1 = a + br;
483+
fill!(c2, 99999.9);
484+
@avx @. c2 = a + bl;
485+
@test c1 c2
486+
487+
a = rand(T, M); B = rand(T, M, N); c = rand(T, N); c′ = c';
470488
d1 = @. a + B * c′;
471489
d2 = @avx @. a + B * c′;
472490

@@ -534,6 +552,7 @@ end
534552
D1 = C .^ 0.3;
535553
D2 = @avx C .^ 0.3;
536554
@test D1 D2
555+
537556
end
538557
end
539558

0 commit comments

Comments
 (0)