Skip to content

Commit f32ce3a

Browse files
authored
Merge pull request #21360 from VermaSh/enable_stringLatin1_inflate
2 parents 0bbf967 + 4f96875 commit f32ce3a

File tree

1 file changed

+67
-22
lines changed

1 file changed

+67
-22
lines changed

runtime/compiler/z/codegen/J9TreeEvaluator.cpp

Lines changed: 67 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,30 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
292292
TR::Register *charArrayReferenceRegister = cg->gprClobberEvaluate(charArrayReferenceNode);
293293
TR::Register *dstOffRegister = cg->gprClobberEvaluate(dstOffNode);
294294

295+
// Offset to be added to array object pointer to get to the data elements
296+
int32_t offsetToDataElements = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
297+
#ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
298+
if (TR::Compiler->om.isOffHeapAllocationEnabled())
299+
{
300+
// Load first data element address for source array
301+
generateRXInstruction(cg,
302+
TR::InstOpCode::getLoadOpCode(),
303+
node,
304+
sourceArrayReferenceRegister,
305+
generateS390MemoryReference(sourceArrayReferenceRegister, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
306+
307+
// Load first data element address for char array
308+
generateRXInstruction(cg,
309+
TR::InstOpCode::getLoadOpCode(),
310+
node,
311+
charArrayReferenceRegister,
312+
generateS390MemoryReference(charArrayReferenceRegister, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
313+
314+
// We'll be loading first data element address from array header so no need for offset
315+
offsetToDataElements = 0;
316+
}
317+
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
318+
295319
// Adjust the array reference (source and destination) with offset in advance
296320
if (srcOffNode->getOpCodeValue() == TR::iconst)
297321
{
@@ -326,8 +350,8 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
326350
// charArrayReference is the destination array. Since the vector loop below processes 16 bytes into 16 chars per iteration, we will store 32 bytes per iteration.
327351
// We use the `VST` instruction twice to store 16 bytes at a time. Hence, we need a "low" and "high" memref for the char array in order to store all 32 bytes per iteration
328352
// of the vector loop.
329-
TR::MemoryReference *charArrayReferenceMemRefLow = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
330-
TR::MemoryReference *charArrayReferenceMemRefHigh = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
353+
TR::MemoryReference *charArrayReferenceMemRefLow = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements, cg);
354+
TR::MemoryReference *charArrayReferenceMemRefHigh = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 16, cg);
331355

332356
// numCharsMinusResidue is used as a scratch register to hold temporary values throughout the algorithm.
333357
TR::Register *numCharsMinusResidue = cg->allocateRegister();
@@ -354,7 +378,7 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
354378
// We keep executing the vector tight loop below until only the residual characters remain to process.
355379
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, srcOffRegister, numCharsMinusResidue, TR::InstOpCode::COND_BH, handleResidueLabel, false, false);
356380
TR::Register* registerV1 = cg->allocateRegister(TR_VRF);
357-
TR::MemoryReference *sourceArrayMemRef = generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
381+
TR::MemoryReference *sourceArrayMemRef = generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements, cg);
358382
// Do a vector load to batch process the characters.
359383
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerV1, sourceArrayMemRef);
360384
TR::Register* registerV2 = cg->allocateRegister(TR_VRF);
@@ -375,9 +399,9 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
375399
// Once we reach this label, only the residual characters need to be processed.
376400
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleResidueLabel);
377401

378-
TR::MemoryReference *sourceArrayMemRef2 = generateS390MemoryReference(sourceArrayReferenceRegister2, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
379-
TR::MemoryReference *charArrayReferenceMemRefLow2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
380-
TR::MemoryReference *charArrayReferenceMemRefHigh2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
402+
TR::MemoryReference *sourceArrayMemRef2 = generateS390MemoryReference(sourceArrayReferenceRegister2, offsetToDataElements, cg);
403+
TR::MemoryReference *charArrayReferenceMemRefLow2 = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements, cg);
404+
TR::MemoryReference *charArrayReferenceMemRefHigh2 = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 16, cg);
381405

382406
TR::Register *quoRegister = cg->allocateRegister();
383407
// Do lenRegister / 16 to calculate remaining number of chars using the Divide Logical (DLR) instruction.
@@ -443,26 +467,26 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
443467
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
444468

445469
// 7 chars left
446-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
447-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 12, cg));
470+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 6, cg));
471+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 12, cg));
448472
// 6 chars left
449-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 5, cg));
450-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 10, cg));
473+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 5, cg));
474+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 10, cg));
451475
// 5 chars left
452-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
453-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 8, cg));
476+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 4, cg));
477+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 8, cg));
454478
// 4 chars left
455-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 3, cg));
456-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
479+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 3, cg));
480+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 6, cg));
457481
// 3 chars left
458-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
459-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
482+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 2, cg));
483+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 4, cg));
460484
// 2 chars left
461-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 1, cg));
462-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
485+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 1, cg));
486+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 2, cg));
463487
// 1 chars left
464-
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
465-
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
488+
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 0, cg));
489+
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 0, cg));
466490

467491
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 14, cg);
468492
dependencies->addPostConditionIfNotAlreadyInserted(sourceArrayReferenceRegister, TR::RealRegister::AssignAny);
@@ -917,6 +941,27 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
917941
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
918942
}
919943

944+
#ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
945+
if (TR::Compiler->om.isOffHeapAllocationEnabled())
946+
{
947+
// Load first data element address for input array
948+
generateRXInstruction(cg,
949+
TR::InstOpCode::getLoadOpCode(),
950+
node,
951+
inputPtrReg,
952+
generateS390MemoryReference(inputPtrReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
953+
}
954+
else
955+
#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
956+
{
957+
// Add array header size to get address of first data element
958+
generateRXInstruction(cg,
959+
TR::InstOpCode::getLoadAddressOpCode(),
960+
node,
961+
inputPtrReg,
962+
generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
963+
}
964+
920965
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
921966
cFlowRegionStart->setStartInternalControlFlow();
922967
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
@@ -934,7 +979,7 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
934979
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
935980

936981
// Load bytes and search for out of range character
937-
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
982+
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, 0, cg));
938983

939984
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
940985

@@ -956,7 +1001,7 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
9561001
// VLL and VSTL work on indices so we subtract 1
9571002
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1);
9581003
// Load residue bytes and check for out of range character
959-
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
1004+
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, 0, cg));
9601005

9611006
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
9621007
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, isCountPositives ? processCountPositivesOutOfRangeChar : processOutOfRangeChar);

0 commit comments

Comments
 (0)