@@ -165,48 +165,50 @@ <h1><a class="anchor" id="autotoc_md67"></a>
165
165
< tr class ="markdownTableRowOdd ">
166
166
< td class ="markdownTableBodyRight "> NVIDIA L40S </ td > < td class ="markdownTableBodyRight "> Single-precision GPU </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 1.7 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
167
167
< tr class ="markdownTableRowEven ">
168
- < td class ="markdownTableBodyRight "> NVIDIA P100 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 2.4 </ td > < td class ="markdownTableBodyLeft "> NVHPC 23.5 </ td > < td class ="markdownTableBodyLeft "> GT CSE Internal </ td > </ tr >
168
+ < td class ="markdownTableBodyRight "> AMD EPYC 9654 </ td > < td class ="markdownTableBodyRight "> Genoa </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 96/96 cores </ td > < td class ="markdownTableBodyRight "> 1.7 </ td > < td class ="markdownTableBodyLeft "> Inten oneAPI 2021.9 </ td > < td class ="markdownTableBodyLeft "> DOD Carpenter </ td > </ tr >
169
169
< tr class ="markdownTableRowOdd ">
170
- < td class ="markdownTableBodyRight "> AMD EPYC 9534 </ td > < td class ="markdownTableBodyRight "> Genoa </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 2.7 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
170
+ < td class ="markdownTableBodyRight "> NVIDIA P100 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 2.4 </ td > < td class ="markdownTableBodyLeft "> NVHPC 23.5 </ td > < td class ="markdownTableBodyLeft "> GT CSE Internal </ td > </ tr >
171
171
< tr class ="markdownTableRowEven ">
172
- < td class ="markdownTableBodyRight "> NVIDIA A40 </ td > < td class ="markdownTableBodyRight "> Single-precision GPU </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 3.3 </ td > < td class ="markdownTableBodyLeft "> NVHPC 22.11 </ td > < td class ="markdownTableBodyLeft "> NCSA Delta </ td > </ tr >
172
+ < td class ="markdownTableBodyRight "> AMD EPYC 9534 </ td > < td class ="markdownTableBodyRight "> Genoa </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 2.7 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
173
173
< tr class ="markdownTableRowOdd ">
174
- < td class ="markdownTableBodyRight "> NVIDIA Grace CPU </ td > < td class ="markdownTableBodyRight "> Arm, Neoverse V2 </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 72/72 cores </ td > < td class ="markdownTableBodyRight "> 3.7 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.1 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
174
+ < td class ="markdownTableBodyRight "> NVIDIA A40 </ td > < td class ="markdownTableBodyRight "> Single-precision GPU </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 3.3 </ td > < td class ="markdownTableBodyLeft "> NVHPC 22.11 </ td > < td class ="markdownTableBodyLeft "> NCSA Delta </ td > </ tr >
175
175
< tr class ="markdownTableRowEven ">
176
- < td class ="markdownTableBodyRight "> NVIDIA RTX6000 </ td > < td class ="markdownTableBodyRight "> Single-precision GPU </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 3.9 </ td > < td class ="markdownTableBodyLeft "> NVHPC 22.11 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
176
+ < td class ="markdownTableBodyRight "> NVIDIA Grace CPU </ td > < td class ="markdownTableBodyRight "> Arm, Neoverse V2 </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 72/72 cores </ td > < td class ="markdownTableBodyRight "> 3.7 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.1 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
177
177
< tr class ="markdownTableRowOdd ">
178
- < td class ="markdownTableBodyRight "> AMD EPYC 7763 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 4.1 </ td > < td class ="markdownTableBodyLeft "> GNU 11.4.0 </ td > < td class ="markdownTableBodyLeft "> NCSA Delta </ td > </ tr >
178
+ < td class ="markdownTableBodyRight "> NVIDIA RTX6000 </ td > < td class ="markdownTableBodyRight "> Single-precision GPU </ td > < td class ="markdownTableBodyRight "> GPU </ td > < td class ="markdownTableBodyRight "> 1 GPU </ td > < td class ="markdownTableBodyRight "> 3.9 </ td > < td class ="markdownTableBodyLeft "> NVHPC 22.11 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
179
179
< tr class ="markdownTableRowEven ">
180
- < td class ="markdownTableBodyRight "> AMD EPYC 7713 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 5.0 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3 .0 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
180
+ < td class ="markdownTableBodyRight "> AMD EPYC 7763 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 4.1 </ td > < td class ="markdownTableBodyLeft "> GNU 11.4 .0 </ td > < td class ="markdownTableBodyLeft "> NCSA Delta </ td > </ tr >
181
181
< tr class ="markdownTableRowOdd ">
182
- < td class ="markdownTableBodyRight "> Intel Xeon 8480CL </ td > < td class ="markdownTableBodyRight "> Platinum, Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 56/56 cores </ td > < td class ="markdownTableBodyRight "> 5.0 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
182
+ < td class ="markdownTableBodyRight "> AMD EPYC 7713 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 64/64 cores </ td > < td class ="markdownTableBodyRight "> 5.0 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
183
183
< tr class ="markdownTableRowEven ">
184
- < td class ="markdownTableBodyRight "> Intel Xeon 6454S </ td > < td class ="markdownTableBodyRight "> Gold , Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 5.6 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
184
+ < td class ="markdownTableBodyRight "> Intel Xeon 8480CL </ td > < td class ="markdownTableBodyRight "> Platinum , Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 56/56 cores </ td > < td class ="markdownTableBodyRight "> 5.0 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Phoenix </ td > </ tr >
185
185
< tr class ="markdownTableRowOdd ">
186
- < td class ="markdownTableBodyRight "> Intel Xeon 8462Y+ </ td > < td class ="markdownTableBodyRight "> Platinum , Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.2 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
186
+ < td class ="markdownTableBodyRight "> Intel Xeon 6454S </ td > < td class ="markdownTableBodyRight "> Gold , Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 5.6 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
187
187
< tr class ="markdownTableRowEven ">
188
- < td class ="markdownTableBodyRight "> Intel Xeon 6548Y + </ td > < td class ="markdownTableBodyRight "> Gold, Emerald Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.6 </ td > < td class ="markdownTableBodyLeft "> Intel oneAPI 2021.9 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
188
+ < td class ="markdownTableBodyRight "> Intel Xeon 8462Y + </ td > < td class ="markdownTableBodyRight "> Platinum, Sapphire Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.2 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
189
189
< tr class ="markdownTableRowOdd ">
190
- < td class ="markdownTableBodyRight "> Intel Xeon 8352Y </ td > < td class ="markdownTableBodyRight "> Platinum, Ice Lake </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.6 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
190
+ < td class ="markdownTableBodyRight "> Intel Xeon 6548Y+ </ td > < td class ="markdownTableBodyRight "> Gold, Emerald Rapids </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.6 </ td > < td class ="markdownTableBodyLeft "> Intel oneAPI 2021.9 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
191
191
< tr class ="markdownTableRowEven ">
192
- < td class ="markdownTableBodyRight "> Ampere Altra Q80-28 </ td > < td class ="markdownTableBodyRight "> Arm, Neoverse-N1 </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 80/80 cores </ td > < td class ="markdownTableBodyRight "> 6.8 </ td > < td class ="markdownTableBodyLeft "> GNU 12.2.0 </ td > < td class ="markdownTableBodyLeft "> OLCF Wombat </ td > </ tr >
192
+ < td class ="markdownTableBodyRight "> Intel Xeon 8352Y </ td > < td class ="markdownTableBodyRight "> Platinum, Ice Lake </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 6.6 </ td > < td class ="markdownTableBodyLeft "> NVHPC 24.5 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
193
193
< tr class ="markdownTableRowOdd ">
194
- < td class ="markdownTableBodyRight "> AMD EPYC 7513 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 7.4 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3 .0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
194
+ < td class ="markdownTableBodyRight "> Ampere Altra Q80-28 </ td > < td class ="markdownTableBodyRight "> Arm, Neoverse-N1 </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 80/80 cores </ td > < td class ="markdownTableBodyRight "> 6.8 </ td > < td class ="markdownTableBodyLeft "> GNU 12.2 .0 </ td > < td class ="markdownTableBodyLeft "> OLCF Wombat </ td > </ tr >
195
195
< tr class ="markdownTableRowEven ">
196
- < td class ="markdownTableBodyRight "> AMD EPYC 7452 </ td > < td class ="markdownTableBodyRight "> Rome </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 8 .4 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
196
+ < td class ="markdownTableBodyRight "> AMD EPYC 7513 </ td > < td class ="markdownTableBodyRight "> Milan </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 7 .4 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3.0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
197
197
< tr class ="markdownTableRowOdd ">
198
- < td class ="markdownTableBodyRight "> IBM Power10 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 24/24 cores </ td > < td class ="markdownTableBodyRight "> 10 </ td > < td class ="markdownTableBodyLeft "> GNU 13 .3.1 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
198
+ < td class ="markdownTableBodyRight "> AMD EPYC 7452 </ td > < td class ="markdownTableBodyRight "> Rome </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 32/32 cores </ td > < td class ="markdownTableBodyRight "> 8.4 </ td > < td class ="markdownTableBodyLeft "> GNU 12 .3.0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
199
199
< tr class ="markdownTableRowEven ">
200
- < td class ="markdownTableBodyRight "> AMD EPYC 7401 </ td > < td class ="markdownTableBodyRight "> Naples </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 24/24 cores </ td > < td class ="markdownTableBodyRight "> 10 </ td > < td class ="markdownTableBodyLeft "> GNU 10 .3.1 </ td > < td class ="markdownTableBodyLeft "> LLNL Corona </ td > </ tr >
200
+ < td class ="markdownTableBodyRight "> IBM Power10 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 24/24 cores </ td > < td class ="markdownTableBodyRight "> 10 </ td > < td class ="markdownTableBodyLeft "> GNU 13 .3.1 </ td > < td class ="markdownTableBodyLeft "> GT Rogues Gallery </ td > </ tr >
201
201
< tr class ="markdownTableRowOdd ">
202
- < td class ="markdownTableBodyRight "> Apple M1 Pro </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 8/10 cores </ td > < td class ="markdownTableBodyRight "> 14 </ td > < td class ="markdownTableBodyLeft "> GNU 13.2.0 </ td > < td class ="markdownTableBodyLeft "> N/A </ td > </ tr >
202
+ < td class ="markdownTableBodyRight "> AMD EPYC 7401 </ td > < td class ="markdownTableBodyRight "> Naples </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 24/24 cores </ td > < td class ="markdownTableBodyRight "> 10 </ td > < td class ="markdownTableBodyLeft "> GNU 10.3.1 </ td > < td class ="markdownTableBodyLeft "> LLNL Corona </ td > </ tr >
203
203
< tr class ="markdownTableRowEven ">
204
- < td class ="markdownTableBodyRight "> Intel Xeon Gold 6226 </ td > < td class ="markdownTableBodyRight "> Cascade Lake </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 12/12 cores </ td > < td class ="markdownTableBodyRight "> 17 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3 .0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
204
+ < td class ="markdownTableBodyRight "> Apple M1 Pro </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 8/10 cores </ td > < td class ="markdownTableBodyRight "> 14 </ td > < td class ="markdownTableBodyLeft "> GNU 13.2 .0 </ td > < td class ="markdownTableBodyLeft "> N/A </ td > </ tr >
205
205
< tr class ="markdownTableRowOdd ">
206
- < td class ="markdownTableBodyRight "> Apple M1 Max </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 8/10 cores </ td > < td class ="markdownTableBodyRight "> 18 </ td > < td class ="markdownTableBodyLeft "> GNU 14.1 .0 </ td > < td class ="markdownTableBodyLeft "> N/A </ td > </ tr >
206
+ < td class ="markdownTableBodyRight "> Intel Xeon Gold 6226 </ td > < td class ="markdownTableBodyRight "> Cascade Lake </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 12/12 cores </ td > < td class ="markdownTableBodyRight "> 17 </ td > < td class ="markdownTableBodyLeft "> GNU 12.3 .0 </ td > < td class ="markdownTableBodyLeft "> GT ICE </ td > </ tr >
207
207
< tr class ="markdownTableRowEven ">
208
- < td class ="markdownTableBodyRight "> IBM Power9 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 20/21 cores </ td > < td class ="markdownTableBodyRight "> 21 </ td > < td class ="markdownTableBodyLeft "> GNU 9 .1.0 </ td > < td class ="markdownTableBodyLeft "> OLCF Summit </ td > </ tr >
208
+ < td class ="markdownTableBodyRight "> Apple M1 Max </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 8/10 cores </ td > < td class ="markdownTableBodyRight "> 18 </ td > < td class ="markdownTableBodyLeft "> GNU 14 .1.0 </ td > < td class ="markdownTableBodyLeft "> N/A </ td > </ tr >
209
209
< tr class ="markdownTableRowOdd ">
210
+ < td class ="markdownTableBodyRight "> IBM Power9 </ td > < td class ="markdownTableBodyRight "> </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 20/21 cores </ td > < td class ="markdownTableBodyRight "> 21 </ td > < td class ="markdownTableBodyLeft "> GNU 9.1.0 </ td > < td class ="markdownTableBodyLeft "> OLCF Summit </ td > </ tr >
211
+ < tr class ="markdownTableRowEven ">
210
212
< td class ="markdownTableBodyRight "> Intel Xeon E5-2650V4 </ td > < td class ="markdownTableBodyRight "> Broadwell </ td > < td class ="markdownTableBodyRight "> CPU </ td > < td class ="markdownTableBodyRight "> 12/12 cores </ td > < td class ="markdownTableBodyRight "> 27 </ td > < td class ="markdownTableBodyLeft "> NVHPC 23.5 </ td > < td class ="markdownTableBodyLeft "> GT CSE Internal </ td > </ tr >
211
213
</ table >
212
214
< p > < b > All grind times are in nanoseconds (ns) per grid point (gp) per equation (eq) per right-hand side (rhs) evaluation, so X ns/gp/eq/rhs. Lower is better.</ b > </ p >
0 commit comments