@@ -277,41 +277,46 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
277
277
tc = terminatecondition (ls, us, n, inclmask, 1 )
278
278
body = lower_block (ls, us, n, inclmask, 1 )
279
279
# align_loop = isone(n) & (ls.align_loops[] > 0)
280
- isstatic = isstaticloop (loop) # & (!align_loop )
281
- if ! isstatic && (usorig. u₁ == us. u₁) && (usorig. u₂ == us. u₂) && ! inclmask
280
+ loopisstatic = isstaticloop (loop)
281
+ if ! loopisstatic && (usorig. u₁ == us. u₁) && (usorig. u₂ == us. u₂) && ! inclmask
282
282
tc = expect (tc)
283
283
end
284
+ W = nisvectorized ? ls. vector_width[] : 1
285
+ loopisstatic &= (! iszero (W))
284
286
# q = if align_loop
285
287
# Expr(:block, align_inner_loop_expr(ls, us, loop), Expr(:while, tc, body))
286
288
# elseif nisvectorized
287
- q = if nisvectorized
289
+ if loopisstatic && length (loop) ≤ 8 W
290
+ q = Expr (:block )
291
+ foreach (_ -> push! (q. args, body), 1 : (length (loop) ÷ W))
292
+ elseif nisvectorized
288
293
# Expr(:block, loopiteratesatleastonce(loop, true), Expr(:while, expect(tc), body))
289
- Expr (:block , Expr (:while , tc, body))
290
- elseif isstatic && length (loop) ≤ 8
291
- bodyq = Expr (:block )
292
- foreach (_ -> push! (bodyq. args, body), 1 : length (loop))
293
- bodyq
294
+ q = Expr (:block , Expr (:while , tc, body))
294
295
else
295
296
termcond = gensym (:maybeterm )
296
297
push! (body. args, Expr (:(= ), termcond, tc))
297
- Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
298
+ q = Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
298
299
# Expr(:block, Expr(:while, expect(tc), body))
299
300
# Expr(:block, assume(tc), Expr(:while, tc, body))
300
301
# push!(body.args, Expr(:&&, expect(Expr(:call, :!, tc)), Expr(:break)))
301
302
# Expr(:block, assume(tc), Expr(:while, true, body))
302
303
# push!(body.args, Expr(:||, expect(tc), Expr(:break)))
303
304
# Expr(:block, Expr(:while, true, body))
304
305
end
305
- if nisvectorized
306
+ if nisvectorized && ! (loopisstatic && iszero ( length (loop) & (W - 1 )))
306
307
# tc = terminatecondition(loop, us, n, loopsym, true, 1)
307
- tc = terminatecondition (ls, us, n, true , 1 )
308
308
body = lower_block (ls, us, n, true , 1 )
309
309
if isone (num_loops (ls))
310
310
pushfirst! (body. args, definemask (loop))
311
311
# elseif align_loop
312
312
# pushfirst!(body.args, definemask_for_alignment_cleanup(loop))
313
313
end
314
- push! (q. args, Expr (:if , tc, body))
314
+ if loopisstatic
315
+ push! (q. args, body)
316
+ else
317
+ tc = terminatecondition (ls, us, n, true , 1 )
318
+ push! (q. args, Expr (:if , tc, body))
319
+ end
315
320
end
316
321
Expr (:block , Expr (:let , sl, q))
317
322
end
@@ -353,6 +358,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
353
358
remblock = init_remblock (loop, ls. lssm[], n)# loopsym)
354
359
q = Expr (:while , tc, body)
355
360
end
361
+ # @show loopsym, loopisstatic, UFW
356
362
q = if unsigned (Ureduct) < unsigned (UF) # unsigned(-1) == typemax(UInt); is logic relying on twos-complement bad?
357
363
UF_cleanup = UF - Ureduct
358
364
us_cleanup = nisunrolled ? UnrollSpecification (us, UF_cleanup, u₂) : UnrollSpecification (us, u₁, UF_cleanup)
0 commit comments