@@ -292,6 +292,30 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
292
292
TR::Register *charArrayReferenceRegister = cg->gprClobberEvaluate(charArrayReferenceNode);
293
293
TR::Register *dstOffRegister = cg->gprClobberEvaluate(dstOffNode);
294
294
295
+ // Offset to be added to array object pointer to get to the data elements
296
+ int32_t offsetToDataElements = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
297
+ #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
298
+ if (TR::Compiler->om.isOffHeapAllocationEnabled())
299
+ {
300
+ // Load first data element address for source array
301
+ generateRXInstruction(cg,
302
+ TR::InstOpCode::getLoadOpCode(),
303
+ node,
304
+ sourceArrayReferenceRegister,
305
+ generateS390MemoryReference(sourceArrayReferenceRegister, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
306
+
307
+ // Load first data element address for char array
308
+ generateRXInstruction(cg,
309
+ TR::InstOpCode::getLoadOpCode(),
310
+ node,
311
+ charArrayReferenceRegister,
312
+ generateS390MemoryReference(charArrayReferenceRegister, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
313
+
314
+ // We'll be loading first data element address from array header so no need for offset
315
+ offsetToDataElements = 0;
316
+ }
317
+ #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
318
+
295
319
// Adjust the array reference (source and destination) with offset in advance
296
320
if (srcOffNode->getOpCodeValue() == TR::iconst)
297
321
{
@@ -326,8 +350,8 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
326
350
// charArrayReference is the destination array. Since the vector loop below processes 16 bytes into 16 chars per iteration, we will store 32 bytes per iteration.
327
351
// We use the `VST` instruction twice to store 16 bytes at a time. Hence, we need a "low" and "high" memref for the char array in order to store all 32 bytes per iteration
328
352
// of the vector loop.
329
- TR::MemoryReference *charArrayReferenceMemRefLow = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg);
330
- TR::MemoryReference *charArrayReferenceMemRefHigh = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
353
+ TR::MemoryReference *charArrayReferenceMemRefLow = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements , cg);
354
+ TR::MemoryReference *charArrayReferenceMemRefHigh = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 16, cg);
331
355
332
356
// numCharsMinusResidue is used as a scratch register to hold temporary values throughout the algorithm.
333
357
TR::Register *numCharsMinusResidue = cg->allocateRegister();
@@ -354,7 +378,7 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
354
378
// We keep executing the vector tight loop below until only the residual characters remain to process.
355
379
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, srcOffRegister, numCharsMinusResidue, TR::InstOpCode::COND_BH, handleResidueLabel, false, false);
356
380
TR::Register* registerV1 = cg->allocateRegister(TR_VRF);
357
- TR::MemoryReference *sourceArrayMemRef = generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg);
381
+ TR::MemoryReference *sourceArrayMemRef = generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements , cg);
358
382
// Do a vector load to batch process the characters.
359
383
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerV1, sourceArrayMemRef);
360
384
TR::Register* registerV2 = cg->allocateRegister(TR_VRF);
@@ -375,9 +399,9 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
375
399
// Once we reach this label, only the residual characters need to be processed.
376
400
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleResidueLabel);
377
401
378
- TR::MemoryReference *sourceArrayMemRef2 = generateS390MemoryReference(sourceArrayReferenceRegister2, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg);
379
- TR::MemoryReference *charArrayReferenceMemRefLow2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg);
380
- TR::MemoryReference *charArrayReferenceMemRefHigh2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
402
+ TR::MemoryReference *sourceArrayMemRef2 = generateS390MemoryReference(sourceArrayReferenceRegister2, offsetToDataElements , cg);
403
+ TR::MemoryReference *charArrayReferenceMemRefLow2 = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements , cg);
404
+ TR::MemoryReference *charArrayReferenceMemRefHigh2 = generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 16, cg);
381
405
382
406
TR::Register *quoRegister = cg->allocateRegister();
383
407
// Do lenRegister / 16 to calculate remaining number of chars using the Divide Logical (DLR) instruction.
@@ -443,26 +467,26 @@ J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerato
443
467
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
444
468
445
469
// 7 chars left
446
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
447
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 12, cg));
470
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 6, cg));
471
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 12, cg));
448
472
// 6 chars left
449
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 5, cg));
450
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 10, cg));
473
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 5, cg));
474
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 10, cg));
451
475
// 5 chars left
452
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
453
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 8, cg));
476
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 4, cg));
477
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 8, cg));
454
478
// 4 chars left
455
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 3, cg));
456
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
479
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 3, cg));
480
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 6, cg));
457
481
// 3 chars left
458
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
459
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
482
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 2, cg));
483
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 4, cg));
460
484
// 2 chars left
461
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 1, cg));
462
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
485
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 1, cg));
486
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 2, cg));
463
487
// 1 chars left
464
- generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
465
- generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
488
+ generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, offsetToDataElements + 0, cg));
489
+ generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, offsetToDataElements + 0, cg));
466
490
467
491
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 14, cg);
468
492
dependencies->addPostConditionIfNotAlreadyInserted(sourceArrayReferenceRegister, TR::RealRegister::AssignAny);
@@ -917,6 +941,27 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
917
941
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0);
918
942
}
919
943
944
+ #ifdef J9VM_GC_SPARSE_HEAP_ALLOCATION
945
+ if (TR::Compiler->om.isOffHeapAllocationEnabled())
946
+ {
947
+ // Load first data element address for input array
948
+ generateRXInstruction(cg,
949
+ TR::InstOpCode::getLoadOpCode(),
950
+ node,
951
+ inputPtrReg,
952
+ generateS390MemoryReference(inputPtrReg, cg->comp()->fej9()->getOffsetOfContiguousDataAddrField(), cg));
953
+ }
954
+ else
955
+ #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */
956
+ {
957
+ // Add array header size to get address of first data element
958
+ generateRXInstruction(cg,
959
+ TR::InstOpCode::getLoadAddressOpCode(),
960
+ node,
961
+ inputPtrReg,
962
+ generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
963
+ }
964
+
920
965
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
921
966
cFlowRegionStart->setStartInternalControlFlow();
922
967
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
@@ -934,7 +979,7 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
934
979
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
935
980
936
981
// Load bytes and search for out of range character
937
- generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg));
982
+ generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, 0 , cg));
938
983
939
984
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
940
985
@@ -956,7 +1001,7 @@ J9::Z::TreeEvaluator::inlineStringCodingHasNegativesOrCountPositives(TR::Node *n
956
1001
// VLL and VSTL work on indices so we subtract 1
957
1002
generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1);
958
1003
// Load residue bytes and check for out of range character
959
- generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() , cg));
1004
+ generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, 0 , cg));
960
1005
961
1006
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0);
962
1007
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, isCountPositives ? processCountPositivesOutOfRangeChar : processOutOfRangeChar);
0 commit comments