Skip to content

Commit 35cdf26

Browse files
committed
Fast Path StringCoding.countPostives and StringCoding.hasNegative for Power
Fast path the StringCoding methods countPositives and hasNegative on Power, since their logics are similar, they can be implemented by a single instrinsic. Signed-off-by: Luke Li <luke.li@ibm.com>
1 parent 0c2bd78 commit 35cdf26

File tree

2 files changed

+213
-0
lines changed

2 files changed

+213
-0
lines changed

runtime/compiler/p/codegen/J9CodeGenerator.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,14 @@ J9::Power::CodeGenerator::initialize()
133133
cg->setEnableTLHPrefetching();
134134
}
135135

136+
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8) &&
137+
comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX) &&
138+
!TR::Compiler->om.canGenerateArraylets() &&
139+
!TR::Compiler->om.isOffHeapAllocationEnabled())
140+
{
141+
cg->setSupportsInlineStringCodingCountPositives();
142+
}
143+
136144
//This env-var does 3 things:
137145
// 1. Prevents batch clear in frontend/j9/rossa.cpp
138146
// 2. Prevents all allocations to nonZeroTLH

runtime/compiler/p/codegen/J9TreeEvaluator.cpp

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11693,6 +11693,196 @@ static bool inlineIntrinsicInflate(TR::Node *node, TR::CodeGenerator *cg)
1169311693
return true;
1169411694
}
1169511695

11696+
static TR::Register *inlineStringCodingHasNegativesOrCountPositives(TR::Node *node,
11697+
TR::CodeGenerator *cg,
11698+
bool isCountPositives)
11699+
{
11700+
TR::Compilation *comp = cg->comp();
11701+
bool isLE = comp->target().cpu.isLittleEndian();
11702+
11703+
TR::Register *startReg = cg->gprClobberEvaluate(node->getChild(0)); // array
11704+
TR::Register *indexReg = cg->gprClobberEvaluate(node->getChild(1)); // offset
11705+
TR::Register *lengthReg = cg->evaluate(node->getChild(2)); // length
11706+
11707+
TR::Register *tempReg = cg->allocateRegister();
11708+
11709+
TR::Register *cr6 = cg->allocateRegister(TR_CCR);
11710+
11711+
TR::Register *vconstant0Reg = cg->allocateRegister(TR_VRF);
11712+
TR::Register *vtmp1Reg = cg->allocateRegister(TR_VRF);
11713+
TR::Register *vtmp2Reg = cg->allocateRegister(TR_VRF);
11714+
11715+
TR::LabelSymbol *VSXLabel = generateLabelSymbol(cg);
11716+
TR::LabelSymbol *serialLabel = generateLabelSymbol(cg);
11717+
TR::LabelSymbol *vecResultLabel = generateLabelSymbol(cg);
11718+
TR::LabelSymbol *resultLabel = generateLabelSymbol(cg);
11719+
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
11720+
11721+
// check empty
11722+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpi4, node, cr6, lengthReg, 0);
11723+
generateConditionalBranchInstruction(cg, TR::InstOpCode::ble, node, resultLabel, cr6);
11724+
11725+
// skip over or load the header
11726+
#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION)
11727+
if (TR::Compiler->om.isOffHeapAllocationEnabled())
11728+
{
11729+
generateTrg1MemInstruction(
11730+
cg, TR::InstOpCode::ld, node, startReg,
11731+
TR::MemoryReference::createWithDisplacement(
11732+
cg, startReg, TR::Compiler->om.offsetOfContiguousDataAddrField(), 8)
11733+
);
11734+
}
11735+
else
11736+
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
11737+
{
11738+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, startReg, startReg,
11739+
TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
11740+
}
11741+
11742+
// get the starting address
11743+
generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, startReg, startReg, indexReg);
11744+
// make the index 0 since everything we need is relative to the offset
11745+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, indexReg, 0);
11746+
11747+
// check the first byte
11748+
generateTrg1MemInstruction(cg, TR::InstOpCode::lbzx, node, tempReg,
11749+
TR::MemoryReference::createWithIndexReg(cg, startReg, indexReg, 1));
11750+
generateTrg1Src1Instruction(cg, TR::InstOpCode::extsb, node, tempReg, tempReg);
11751+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpi4, node, cr6, tempReg, 0);
11752+
// when seeking negatives, we need to return 1
11753+
if (!isCountPositives)
11754+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, tempReg, 1);
11755+
generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, endLabel, cr6);
11756+
// if we only have one byte end it here, and return 0 for hasNegative
11757+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, indexReg, indexReg, 1);
11758+
generateTrg1Src2Instruction(cg, TR::InstOpCode::cmp4, node, cr6, indexReg, lengthReg);
11759+
if (!isCountPositives)
11760+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, tempReg, 0);
11761+
generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, node, endLabel, cr6);
11762+
11763+
// ready the zero reg
11764+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vxor, node, vconstant0Reg, vconstant0Reg, vconstant0Reg);
11765+
// tempReg marks the end where we could use lxv
11766+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, tempReg, lengthReg, -15);
11767+
11768+
// --- start of VSXLoop
11769+
generateLabelInstruction(cg, TR::InstOpCode::label, node, VSXLabel);
11770+
// go to residue if we don't have enough items to do one load
11771+
generateTrg1Src2Instruction(cg, TR::InstOpCode::cmp4, node, cr6, indexReg, tempReg);
11772+
generateConditionalBranchInstruction(cg, TR::InstOpCode::bge, node, serialLabel, cr6);
11773+
11774+
// load 16 items
11775+
generateTrg1Src2Instruction(cg, TR::InstOpCode::lxvw4x, node, vtmp1Reg, startReg, indexReg);
11776+
if (isLE)
11777+
{
11778+
// swap around the shorts in each word; we need 2 instructions to load 16
11779+
generateTrg1ImmInstruction(cg, TR::InstOpCode::vspltisw, node, vtmp2Reg, 8);
11780+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vadduwm, node, vtmp2Reg, vtmp2Reg, vtmp2Reg);
11781+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vrlw, node, vtmp1Reg, vtmp1Reg, vtmp2Reg);
11782+
// then swap around the bytes in each short
11783+
generateTrg1ImmInstruction(cg, TR::InstOpCode::vspltish, node, vtmp2Reg, 8);
11784+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vrlh, node, vtmp1Reg, vtmp1Reg, vtmp2Reg);
11785+
}
11786+
// bit 2 of cr6 (ZERO) will not be set if any comparison is true
11787+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vcmpgtsb_r, node, vtmp1Reg, vconstant0Reg, vtmp1Reg);
11788+
// branch when the ZERO bit is not set
11789+
generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, vecResultLabel, cr6);
11790+
11791+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, indexReg, indexReg, 16);
11792+
generateLabelInstruction(cg, TR::InstOpCode::b, node, VSXLabel);
11793+
11794+
// --- this label is only used when we exit from the VSXLoop
11795+
generateLabelInstruction(cg, TR::InstOpCode::label, node, vecResultLabel);
11796+
if (isCountPositives) // jump to the serial label which sould soon count to the value we want
11797+
{
11798+
//generateTrg1Src1Instruction(cg, TR::InstOpCode::vclzlsbb, node, returnReg, vtmp1Reg);
11799+
//generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, returnReg, returnReg, indexReg);
11800+
generateLabelInstruction(cg, TR::InstOpCode::b, node, serialLabel);
11801+
}
11802+
else // just report 1
11803+
{
11804+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, tempReg, 1);
11805+
generateLabelInstruction(cg, TR::InstOpCode::b, node, endLabel);
11806+
}
11807+
11808+
// --- serialLabel to deal with whatever remains
11809+
generateLabelInstruction(cg, TR::InstOpCode::label, node, serialLabel);
11810+
generateTrg1Src2Instruction(cg, TR::InstOpCode::cmp4, node, cr6, indexReg, lengthReg);
11811+
// if we reach the end, indexReg is len already, so we don't need to do anything for countPositives
11812+
if (isCountPositives)
11813+
generateConditionalBranchInstruction(cg, TR::InstOpCode::bge, node, endLabel, cr6);
11814+
else
11815+
generateConditionalBranchInstruction(cg, TR::InstOpCode::bge, node, resultLabel, cr6);
11816+
11817+
generateTrg1MemInstruction(cg, TR::InstOpCode::lbzx, node, tempReg,
11818+
TR::MemoryReference::createWithIndexReg(cg, startReg, indexReg, 1));
11819+
generateTrg1Src1Instruction(cg, TR::InstOpCode::extsb, node, tempReg, tempReg);
11820+
11821+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpi4, node, cr6, tempReg, 0);
11822+
// when seeking negatives, we need to return 1
11823+
if (!isCountPositives)
11824+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, tempReg, 1);
11825+
generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, endLabel, cr6);
11826+
11827+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, indexReg, indexReg, 1);
11828+
generateLabelInstruction(cg, TR::InstOpCode::b, node, serialLabel);
11829+
11830+
// --- load the length for countPositves; load 0 for hasNegative
11831+
generateLabelInstruction(cg, TR::InstOpCode::label, node, resultLabel);
11832+
if (isCountPositives)
11833+
generateTrg1Src1Instruction(cg, TR::InstOpCode::mr, node, indexReg, lengthReg);
11834+
else
11835+
generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, tempReg, 0);
11836+
// end
11837+
11838+
TR::RegisterDependencyConditions *deps =
11839+
new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 8, cg->trMemory());
11840+
11841+
deps->addPostCondition(startReg, TR::RealRegister::NoReg);
11842+
deps->getPostConditions()->getRegisterDependency(deps->getAddCursorForPost() - 1)->setExcludeGPR0();
11843+
11844+
deps->addPostCondition(indexReg, TR::RealRegister::NoReg);
11845+
deps->getPostConditions()->getRegisterDependency(deps->getAddCursorForPost() - 1)->setExcludeGPR0();
11846+
11847+
deps->addPostCondition(lengthReg, TR::RealRegister::NoReg);
11848+
deps->addPostCondition(tempReg, TR::RealRegister::NoReg);
11849+
deps->getPostConditions()->getRegisterDependency(deps->getAddCursorForPost() - 1)->setExcludeGPR0();
11850+
11851+
deps->addPostCondition(cr6, TR::RealRegister::cr6);
11852+
deps->addPostCondition(vconstant0Reg, TR::RealRegister::NoReg);
11853+
deps->addPostCondition(vtmp1Reg, TR::RealRegister::NoReg);
11854+
deps->addPostCondition(vtmp2Reg, TR::RealRegister::NoReg);
11855+
11856+
generateDepLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps);
11857+
11858+
if (isCountPositives) // if countPositives, indexReg will contain the first negative value
11859+
{
11860+
node->setRegister(indexReg);
11861+
cg->stopUsingRegister(tempReg);
11862+
}
11863+
else // if hasNegative, we will have a tempReg ready with zero or one
11864+
{
11865+
node->setRegister(tempReg);
11866+
cg->stopUsingRegister(indexReg);
11867+
}
11868+
11869+
cg->stopUsingRegister(startReg);
11870+
cg->stopUsingRegister(lengthReg);
11871+
cg->stopUsingRegister(cr6);
11872+
cg->stopUsingRegister(vconstant0Reg);
11873+
cg->stopUsingRegister(vtmp1Reg);
11874+
cg->stopUsingRegister(vtmp2Reg);
11875+
11876+
for (int32_t i = 0; i < node->getNumChildren(); i++)
11877+
{
11878+
cg->decReferenceCount(node->getChild(i));
11879+
}
11880+
11881+
if (isCountPositives) // if countPositives, indexReg will contain the first negative value
11882+
return indexReg;
11883+
return tempReg; // if hasNegative, we will have a tempReg ready with zero or one
11884+
}
11885+
1169611886
/*
1169711887
* Arraycopy evaluator needs a version of inlineArrayCopy that can be used inside internal control flow. For this version of inlineArrayCopy, registers must
1169811888
* be allocated outside of this function so the dependency at the end of the control flow knows about them.
@@ -12261,6 +12451,21 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
1226112451
}
1226212452
break;
1226312453

12454+
case TR::java_lang_StringCoding_hasNegatives:
12455+
if (cg->getSupportsInlineStringCodingHasNegatives())
12456+
{
12457+
resultReg = inlineStringCodingHasNegativesOrCountPositives(node, cg, false);
12458+
return true;
12459+
}
12460+
break;
12461+
case TR::java_lang_StringCoding_countPositives:
12462+
if (cg->getSupportsInlineStringCodingCountPositives())
12463+
{
12464+
resultReg = inlineStringCodingHasNegativesOrCountPositives(node, cg, true);
12465+
return true;
12466+
}
12467+
break;
12468+
1226412469
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
1226512470
// In Java9 this can be either the jdk.internal JNI method or the sun.misc Java wrapper.
1226612471
// In Java8 it will be sun.misc which will contain the JNI directly.

0 commit comments

Comments
 (0)