@@ -278,6 +278,7 @@ function lower_store_vectorized!(
278
278
for u ∈ 0 : U- 1
279
279
name, mo = name_mo (var, op, u, W, vecnotunrolled, unrolled)
280
280
instrcall = Expr (:call ,lv (:vstore! ), ptr, name, mo)
281
+ # @show mask, vecnotunrolled, u, U
281
282
if mask != = nothing && (vecnotunrolled || u == U - 1 )
282
283
push! (instrcall. args, mask)
283
284
end
@@ -500,6 +501,7 @@ function lower_nest(
500
501
nisvectorized = loopsym === vectorized
501
502
nisunrolled = false
502
503
nistiled = false
504
+ # @show n, mask
503
505
if istiled
504
506
if n == nloops
505
507
loopsym = tiledsym (loopsym)
@@ -571,12 +573,6 @@ function add_vec_rem_iter(
571
573
)
572
574
loopq = lower_nest (ls, n, vectorized, U, T, loopqold, loopstart, W, nothing , Uexprtype)
573
575
if order[n] === vectorized
574
- vecloop = ls. loops[vectorized]
575
- comparison = if vecloop. hintexact
576
- Expr (:call , :(!= ), vectorized, vecloop. rangehint)
577
- else
578
- Expr (:call , :(!= ), vectorized, vecloop. rangesym)
579
- end
580
576
loopq = Expr (
581
577
:block , loopq,
582
578
lower_nest (ls, n, vectorized, U, T, loopqold, loopstart, W, Symbol (" ##mask##" ), :if )
@@ -586,15 +582,28 @@ function add_vec_rem_iter(
586
582
end
587
583
function lower_set (ls:: LoopSet , vectorized:: Symbol , U:: Int , T:: Int , W:: Symbol , :: Nothing , Uexprtype:: Symbol )
588
584
# @show U, T, W
585
+ loopstart = 0
589
586
istiled = T != - 1
590
587
order = names (ls)
591
588
unrolled = order[end - istiled]
592
589
unrolled === vectorized && return lower_set_unrolled_is_vectorized (ls, vectorized, U, T, W, nothing , Uexprtype)
590
+ ns = 1
593
591
nl = num_loops (ls) - istiled
594
- loopq = add_vec_rem_iter ( ls, 1 , vectorized, U, T, nothing , 0 , W, nl == 1 ? Uexprtype : :while , order )
595
- for n ∈ 2 : nl
592
+ exprtype = nl == ns ? Uexprtype : :while
593
+ nomaskq = lower_nest (ls, 1 , vectorized, U, T, nothing , loopstart, W, nothing , exprtype)
594
+ maskq = lower_nest (ls, 1 , vectorized, U, T, nothing , loopstart, W, Symbol (" ##mask##" ), order[ns] === vectorized ? :if : exprtype)
595
+ while order[ns] != = vectorized
596
+ ns += 1
597
+ exprtype = nl == ns ? Uexprtype : :while
598
+ nomaskq = lower_nest (ls, ns, vectorized, U, T, nomaskq, loopstart, W, nothing , exprtype)
599
+ maskq = lower_nest (ls, ns, vectorized, U, T, maskq, loopstart, W, Symbol (" ##mask##" ), order[ns] === vectorized ? :if : exprtype)
600
+ end
601
+ ns += 1
602
+ loopq = Expr (:block , nomaskq, maskq)
603
+ for n ∈ ns: nl
596
604
exprtype = n == nl ? Uexprtype : :while
597
- loopq = add_vec_rem_iter ( ls, n, vectorized, U, T, loopq, 0 , W, exprtype, order )
605
+ loopq = lower_nest (ls, n, vectorized, U, T, loopq, loopstart, W, nothing , exprtype)
606
+ # loopq = add_vec_rem_iter( ls, n, vectorized, U, T, loopq, 0, W, exprtype, order )
598
607
end
599
608
loopq
600
609
end
@@ -782,6 +791,7 @@ function lower_unrolled_dynamic!(
782
791
Ureduct = - 1
783
792
end
784
793
Ut = U
794
+ vecisunrolled = unrolled === vectorized
785
795
local remblock:: Expr
786
796
firstiter = true
787
797
while true
@@ -792,13 +802,13 @@ function lower_unrolled_dynamic!(
792
802
end
793
803
push! (q. args, loopq)
794
804
elseif U == 1 #
795
- if unrolled === vectorized
805
+ if vecisunrolled
796
806
push! (remblock. args, lower_set (ls, vectorized, Ut, T, W, Symbol (" ##mask##" ), :block ))
797
807
else
798
808
push! (remblock. args, lower_set (ls, vectorized, Ut, T, W, nothing , :block ))
799
809
end
800
810
else
801
- remblocknew = if unrolled === vectorized
811
+ remblocknew = if vecisunrolled
802
812
itercount = if unrolledloop. hintexact
803
813
Expr (:call , :- , unrolledloop. rangehint, Expr (:call , lv (:valmuladd ), W, Ut, 1 ))
804
814
else
@@ -813,12 +823,12 @@ function lower_unrolled_dynamic!(
813
823
Expr (:call , :> , unrolled, Expr (:call , :- , unrolled_numitersym, Ut + 1 ))
814
824
end
815
825
Expr (Ut == 1 ? :if : :elseif , comparison, lower_set (ls, vectorized, Ut, T, W, nothing , :block ))
826
+ # Expr(Ut == 1 ? :if : :elseif, comparison, lower_set(ls, vectorized, Ut, T, W, Symbol("##mask##"), :block))
816
827
end
817
828
push! (remblock. args, remblocknew)
818
829
remblock = remblocknew
819
830
end
820
- if Ut == U || Ut == Ureduct
821
- firstiter || break
831
+ if firstiter
822
832
firstiter = false
823
833
if manageouterreductions && Ureduct < U
824
834
Udiff = U - Ureduct
@@ -834,6 +844,8 @@ function lower_unrolled_dynamic!(
834
844
end
835
845
remblock = Expr (:block )
836
846
push! (q. args, Expr (:if , comparison, remblock))
847
+ elseif ! (Ut < U - 1 + vecisunrolled) || Ut == Ureduct
848
+ break
837
849
else
838
850
Ut += 1
839
851
end
0 commit comments