Skip to content

Commit 7cb2fee

Browse files
authored
Merge pull request #13095 from edgargabriel/topic/memkind-optimizations-1
first set of memkind optimizations
2 parents 9fb2d4e + ee8e53a commit 7cb2fee

File tree

12 files changed

+147
-31
lines changed

12 files changed

+147
-31
lines changed

ompi/communicator/comm.c

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,11 @@ int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opa
448448
if (info) {
449449
opal_info_dup(info, &(newcomp->super.s_info));
450450
}
451-
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
451+
ompi_info_memkind_assert_type type;
452+
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
453+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
454+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
455+
}
452456

453457
/* Set name for debugging purposes */
454458
snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %s CREATE FROM %s",
@@ -705,8 +709,11 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key,
705709
if (info) {
706710
opal_info_dup(info, &(newcomp->super.s_info));
707711
}
708-
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
709-
712+
ompi_info_memkind_assert_type type;
713+
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
714+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
715+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
716+
}
710717
/* Activate the communicator and init coll-component */
711718
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
712719

@@ -997,7 +1004,11 @@ static int ompi_comm_split_type_core(ompi_communicator_t *comm,
9971004
if (info) {
9981005
opal_infosubscribe_change_info(&newcomp->super, info);
9991006
}
1000-
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
1007+
ompi_info_memkind_assert_type type;
1008+
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
1009+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1010+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1011+
}
10011012

10021013
/* Activate the communicator and init coll-component */
10031014
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
@@ -1351,7 +1362,11 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp
13511362
if (info) {
13521363
opal_infosubscribe_change_info(&newcomp->super, info);
13531364
}
1354-
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info);
1365+
ompi_info_memkind_assert_type type;
1366+
ompi_info_memkind_copy_or_set (&comm->instance->super, &newcomp->super, info, &type);
1367+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1368+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1369+
}
13551370

13561371
/* activate communicator and init coll-module */
13571372
rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode);
@@ -1442,7 +1457,12 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
14421457
if (info) {
14431458
opal_info_dup(info, &(newcomp->super.s_info));
14441459
}
1445-
ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info);
1460+
1461+
ompi_info_memkind_assert_type type;
1462+
ompi_info_memkind_copy_or_set (&comm->super, &newcomp->super, info, &type);
1463+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1464+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1465+
}
14461466
}
14471467

14481468
ompi_comm_request_schedule_append (request, ompi_comm_idup_getcid, subreq, subreq[0] ? 1 : 0);
@@ -1594,7 +1614,11 @@ int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info
15941614
if (NULL == newcomp->super.s_info) {
15951615
return OMPI_ERR_OUT_OF_RESOURCE;
15961616
}
1597-
ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info);
1617+
ompi_info_memkind_assert_type type;
1618+
ompi_info_memkind_copy_or_set (&group->grp_instance->super, &newcomp->super, info, &type);
1619+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1620+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1621+
}
15981622

15991623
/* activate communicator and init coll-module. use the group allreduce implementation as
16001624
* no collective module has yet been selected. the tag does not matter as any tag will
@@ -1736,7 +1760,12 @@ int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, om
17361760

17371761
// Copy info if there is one.
17381762
newcomp->super.s_info = OBJ_NEW(opal_info_t);
1739-
ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super, &ompi_mpi_info_null.info.super);
1763+
ompi_info_memkind_assert_type type;
1764+
ompi_info_memkind_copy_or_set (&local_comm->instance->super, &newcomp->super,
1765+
&ompi_mpi_info_null.info.super, &type);
1766+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1767+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1768+
}
17401769

17411770
*newintercomm = newcomp;
17421771

@@ -1900,7 +1929,11 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
19001929
if (info) {
19011930
opal_info_dup(info, &(newcomp->super.s_info));
19021931
}
1903-
ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info);
1932+
ompi_info_memkind_assert_type type;
1933+
ompi_info_memkind_copy_or_set (&local_group->grp_instance->super, &newcomp->super, info, &type);
1934+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
1935+
newcomp->c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
1936+
}
19041937

19051938
/* activate communicator and init coll-module */
19061939
rc = ompi_comm_activate (&newcomp, local_comm, leader_comm, &local_leader, &leader_comm_remote_leader,

ompi/communicator/comm_init.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,16 @@ int ompi_comm_init_mpi3 (void)
305305
char *memkind_requested = getenv ("OMPI_MCA_mpi_memory_alloc_kinds");
306306
if (NULL != memkind_requested) {
307307
char *memkind_provided;
308+
ompi_info_memkind_assert_type type;
308309

309-
ompi_info_memkind_process (memkind_requested, &memkind_provided);
310+
ompi_info_memkind_process (memkind_requested, &memkind_provided, &type);
310311
opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
311312
opal_infosubscribe_subscribe (&ompi_mpi_comm_self.comm.super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
312313
opal_infosubscribe_subscribe (&ompi_mpi_comm_world.comm.instance->super, "mpi_memory_alloc_kinds", memkind_provided, ompi_info_memkind_cb);
314+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
315+
ompi_mpi_comm_world.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
316+
ompi_mpi_comm_self.comm.c_assertions |= OMPI_COMM_ASSERT_NO_ACCEL_BUF;
317+
}
313318
free (memkind_provided);
314319
}
315320

ompi/communicator/communicator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
106106
#define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008
107107
#define OMPI_COMM_ASSERT_LAZY_BARRIER 0x00000010
108108
#define OMPI_COMM_ASSERT_ACTIVE_POLL 0x00000020
109+
#define OMPI_COMM_ASSERT_NO_ACCEL_BUF 0x00000040
109110

110111
#define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag)
111112
#define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG)
@@ -114,6 +115,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
114115
#define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE)
115116
#define OMPI_COMM_CHECK_ASSERT_LAZY_BARRIER(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_LAZY_BARRIER)
116117
#define OMPI_COMM_CHECK_ASSERT_ACTIVE_POLL(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ACTIVE_POLL)
118+
#define OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ACCEL_BUF)
117119

118120
/**
119121
* Modes required for acquiring the new comm-id.

ompi/file/file.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
1919
* Copyright (c) 2024 Triad National Security, LLC. All rights
2020
* reserved.
21-
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
21+
* Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
2222
* $COPYRIGHT$
2323
*
2424
* Additional copyrights may follow
@@ -124,8 +124,11 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename,
124124
if (info) {
125125
opal_info_dup(info, &(file->super.s_info));
126126
}
127-
ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info);
128-
127+
ompi_info_memkind_assert_type type;
128+
ompi_info_memkind_copy_or_set (&comm->instance->super, &file->super, info, &type);
129+
if (OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL == type) {
130+
file->f_flags |= OMPI_FILE_ASSERT_NO_ACCEL_BUF;
131+
}
129132
file->f_amode = amode;
130133
file->f_filename = strdup(filename);
131134
if (NULL == file->f_filename) {

ompi/file/file.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@
3939
/*
4040
* Flags
4141
*/
42-
#define OMPI_FILE_ISCLOSED 0x00000001
43-
#define OMPI_FILE_HIDDEN 0x00000002
44-
42+
#define OMPI_FILE_ISCLOSED 0x00000001
43+
#define OMPI_FILE_HIDDEN 0x00000002
44+
#define OMPI_FILE_ASSERT_NO_ACCEL_BUF 0x00000004
4545
BEGIN_C_DECLS
4646

4747
/**

ompi/info/info_memkind.c

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind
5656
*/
5757

5858
/* Separate requested_str into an array of individual entries */
59+
int current_max = 0;
5960
char **memkind_combos = opal_argv_split(memkind_str, ',');
6061
int max_num_memkinds = opal_argv_count(memkind_combos);
6162

@@ -71,7 +72,6 @@ static void ompi_info_memkind_extract (const char* memkind_str, int *num_memkind
7172

7273
int iter = 0;
7374
char *m = memkind_combos[iter];
74-
int current_max = 0;
7575
while (m != NULL) {
7676
bool name_found = false;
7777
char **tmp_str = opal_argv_split (m, ':');
@@ -419,19 +419,52 @@ static bool ompi_info_memkind_validate (const char *assert_str, const char *pare
419419
return ret;
420420
}
421421

422+
static bool ompi_info_memkind_check_no_accel (int num_memkinds, ompi_memkind_t *memkinds)
423+
{
424+
bool result = true;
425+
426+
for (int i = 0; i < num_memkinds; i++) {
427+
if (!strncmp(memkinds[i].im_name, "system", strlen("system"))) {
428+
continue;
429+
}
430+
if (!strncmp(memkinds[i].im_name, "mpi", strlen("mpi"))) {
431+
continue;
432+
}
433+
result = false;
434+
break;
435+
}
436+
437+
return result;
438+
}
422439

423-
int ompi_info_memkind_process (const char* requested_str, char **provided_str)
440+
static bool ompi_info_memkind_check_no_accel_from_string (char *mstring)
441+
{
442+
bool ret = false;
443+
int num_memkinds;
444+
ompi_memkind_t *memkinds = NULL;
445+
446+
ompi_info_memkind_extract (mstring, &num_memkinds, &memkinds);
447+
if (NULL != memkinds) {
448+
ret = ompi_info_memkind_check_no_accel (num_memkinds, memkinds);
449+
ompi_info_memkind_free(num_memkinds, memkinds);
450+
}
451+
452+
return ret;
453+
}
454+
int ompi_info_memkind_process (const char* requested_str, char **provided_str,
455+
ompi_info_memkind_assert_type *type)
424456
{
425457
int err;
426458
char *tmp_str = NULL;
427-
428459
int num_requested_memkinds, num_available_memkinds, num_provided_memkinds;
429460
ompi_memkind_t *requested_memkinds = NULL ;
430461
ompi_memkind_t *available_memkinds = NULL;
431462
ompi_memkind_t *provided_memkinds = NULL;
463+
ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED;
432464

433465
if (NULL == requested_str) {
434466
*provided_str = NULL;
467+
*type = assert_type;
435468
return OMPI_SUCCESS;
436469
}
437470

@@ -448,6 +481,10 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str)
448481
goto exit;
449482
}
450483

484+
if (ompi_info_memkind_check_no_accel (num_provided_memkinds, provided_memkinds)) {
485+
assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL;
486+
}
487+
451488
ompi_info_memkind_str_create(num_provided_memkinds, provided_memkinds, &tmp_str);
452489

453490
exit:
@@ -459,6 +496,7 @@ int ompi_info_memkind_process (const char* requested_str, char **provided_str)
459496
}
460497
// Don't free the available_memkinds, they will be released in info_finalize;
461498

499+
*type = assert_type;
462500
*provided_str = tmp_str;
463501
return err;
464502
}
@@ -504,15 +542,17 @@ const char *ompi_info_memkind_cb (opal_infosubscriber_t *obj, const char *key, c
504542
** value of another info key (mpi_memory_alloc_kinds).
505543
*/
506544
int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubscriber_t *child,
507-
opal_info_t *info)
545+
opal_info_t *info, ompi_info_memkind_assert_type *type)
508546
{
509547
opal_cstring_t *parent_val;
510548
opal_cstring_t *assert_val;
549+
ompi_info_memkind_assert_type assert_type = OMPI_INFO_MEMKIND_ASSERT_UNDEFINED;
511550
char *final_str = NULL;
512551
int flag;
513552

514553
opal_info_get(parent->s_info, "mpi_memory_alloc_kinds", &parent_val, &flag);
515554
if (0 == flag) {
555+
*type = assert_type;
516556
return OMPI_SUCCESS;
517557
}
518558
final_str = (char*) parent_val->string;
@@ -539,6 +579,12 @@ int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent, opal_infosubsc
539579
opal_infosubscribe_subscribe (child, "mpi_memory_alloc_kinds", final_str,
540580
ompi_info_memkind_cb);
541581
OBJ_RELEASE(parent_val);
582+
583+
if (ompi_info_memkind_check_no_accel_from_string(final_str)) {
584+
assert_type = OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL;
585+
}
586+
587+
*type = assert_type;
542588
return OMPI_SUCCESS;
543589
}
544590

ompi/info/info_memkind.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,26 @@ struct ompi_memkind_t {
2525
};
2626
typedef struct ompi_memkind_t ompi_memkind_t;
2727

28+
typedef enum {
29+
OMPI_INFO_MEMKIND_ASSERT_UNDEFINED = 0, // no statement on memkind usage
30+
OMPI_INFO_MEMKIND_ASSERT_NO_ACCEL, // no accelerator memory is used
31+
OMPI_INFO_MEMKIND_ASSERT_ACCEL_DEVICE_ONLY, // only accelerator device memory used
32+
OMPI_INFO_MEMKIND_ASSERT_ACCEL_ALL // only accelerator memory (no restrictors) used
33+
} ompi_info_memkind_assert_type;
34+
2835
/*
2936
** Given a string of user requested memory alloc kinds, create
3037
** a string with the actually support memory kinds by the library.
3138
**
3239
** @param[IN]: requested_str input string
3340
** @param[OUT]: provided_str result string
41+
** @param[OUT]: type guarantuees given on memkind utilization
3442
**
3543
** @return: OMPI_SUCCESS or error on failure
3644
*/
3745
OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str,
38-
char **provided_str);
46+
char **provided_str,
47+
ompi_info_memkind_assert_type *type);
3948
/*
4049
** Set the memory_alloc_kind info object on the child object, either
4150
** by copying it from the parent object, or adjusting it based
@@ -46,12 +55,14 @@ OMPI_DECLSPEC int ompi_info_memkind_process (const char* requested_str,
4655
** @param [INOUT]: child child object
4756
** @param[IN]: info info object provided by code during object creation
4857
** (e.g. MPI_Comm_dup_with_info, MPI_File_open, etc.)
58+
** @param[OUT]: type guarantuees given on memkind utilization
4959
**
5060
** @return: OMPI_SUCCESS or error on failure
5161
*/
5262
OMPI_DECLSPEC int ompi_info_memkind_copy_or_set (opal_infosubscriber_t *parent,
5363
opal_infosubscriber_t *child,
54-
opal_info_t *info);
64+
opal_info_t *info,
65+
ompi_info_memkind_assert_type *type);
5566

5667
/*
5768
** free the array of available memkinds when shutting down the info

ompi/instance/instance.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,13 +859,14 @@ int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t
859859
/* Copy info if there is one. */
860860
if (OPAL_UNLIKELY(NULL != info)) {
861861
opal_cstring_t *memkind_requested;
862+
ompi_info_memkind_assert_type type;
862863
int flag;
863864

864865
new_instance->super.s_info = OBJ_NEW(opal_info_t);
865866
opal_info_get(info, "mpi_memory_alloc_kinds", &memkind_requested, &flag);
866867
if (1 == flag) {
867868
char *memkind_provided;
868-
ompi_info_memkind_process (memkind_requested->string, &memkind_provided);
869+
ompi_info_memkind_process (memkind_requested->string, &memkind_provided, &type);
869870
opal_infosubscribe_subscribe (&new_instance->super, "mpi_memory_alloc_kinds",
870871
memkind_provided, ompi_info_memkind_cb);
871872
free (memkind_provided);

ompi/mca/coll/accelerator/coll_accelerator_module.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ mca_coll_accelerator_comm_query(struct ompi_communicator_t *comm,
8282
return NULL;
8383
}
8484

85+
if (OMPI_COMM_CHECK_ASSERT_NO_ACCEL_BUF(comm)) {
86+
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
87+
"coll:accelerator:comm_query: NO_ACCEL_BUF assertion set: disqualifying myself");
88+
return NULL;
89+
}
90+
8591
accelerator_module = OBJ_NEW(mca_coll_accelerator_module_t);
8692
if (NULL == accelerator_module) {
8793
return NULL;

0 commit comments

Comments
 (0)