Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Enable Shared Cache Support #511

Draft
wants to merge 6 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/test_api.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif

#include <stdio.h>
#include <stdlib.h>
Expand Down
8 changes: 3 additions & 5 deletions scripts/SLURM/scr_scavenge.in
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,11 @@ my $cmd = undef;
`$bindir/scr_log_event -i $jobid -p $prefixdir -T 'SCAVENGE_START' -D $dset -S $start_time`;

# gather files via pdsh
#$cmd = "srun -n 1 -N 1 -w %h $bindir/scr_copy --cntldir $cntldir --id $dset --prefix $prefixdir --buf $buf_size $crc_flag $downnodes_spaced";
$cmd = "srun -n1 -N1 -w %h $bindir/scr_copy --cntldir $cntldir --id $dset --prefix $prefixdir --buf $buf_size $crc_flag $downnodes_spaced";
print "$prog: ", scalar(localtime), "\n";
# Does not work with "$cmd" for some reason using -Rexec
#print "$prog: $pdsh -Rexec -f 256 -S -w '$upnodes' \"$cmd\" >$output 2>$error\n";
# `$pdsh -Rexec-f 256 -S -w '$upnodes' "$cmd" >$output 2>$error`;
print "$prog: $pdsh -Rexec -f 256 -S -w '$upnodes' srun -n1 -N1 -w %h $bindir/scr_copy --cntldir $cntldir --id $dset --prefix $prefixdir --buf $buf_size $crc_flag $downnodes_spaced";
`$pdsh -Rexec -f 256 -S -w '$upnodes' srun -n1 -N1 -w %h $bindir/scr_copy --cntldir $cntldir --id $dset --prefix $prefixdir --buf $buf_size $crc_flag $downnodes_spaced`;
print "$prog: $pdsh -Rexec -f 256 -S -w '$upnodes' $cmd >$output 2>$error\n";
`$pdsh -Rexec -f 256 -S -w '$upnodes' $cmd >$output 2>$error`;

# print pdsh output to screen
if ($conf{verbose}) {
Expand Down
23 changes: 0 additions & 23 deletions src/scr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2372,29 +2372,6 @@ int SCR_Init()
reddesc->directory, __FILE__, __LINE__
);
}

/* set up artificially node-local directories if the store view is global */
if (! strcmp(store->view, "GLOBAL")) {
/* make sure we can create directories */
if (! store->can_mkdir) {
scr_abort(-1, "Cannot use global view storage %s without mkdir enabled: @%s:%d",
store->name, __FILE__, __LINE__
);
}

/* create directory on rank 0 of each node */
int node_rank;
MPI_Comm_rank(scr_comm_node, &node_rank);
if(node_rank == 0) {
spath* path = spath_from_str(reddesc->directory);
spath_append_strf(path, "node.%d", scr_my_hostid);
spath_reduce(path);
char* path_str = spath_strdup(path);
spath_delete(&path);
scr_mkdir(path_str, S_IRWXU | S_IRWXG);
scr_free(&path_str);
}
}
} else {
scr_abort(-1, "Invalid store for redundancy descriptor @ %s:%d",
__FILE__, __LINE__
Expand Down
14 changes: 4 additions & 10 deletions src/scr_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,21 @@ Dataset cache functions
=========================================
*/

static char* scr_cache_dir_from_str(const char* dir, const char* storage_view, int id)
static char* scr_cache_dir_from_str(const char* dir, int id)
{
/* build the dataset directory name */
spath* path = spath_from_str(dir);
if(! strcmp(storage_view, "GLOBAL")) {
spath_append_strf(path, "node.%d", scr_my_hostid);
}
spath_append_strf(path, "scr.dataset.%d", id);
spath_reduce(path);
char* str = spath_strdup(path);
spath_delete(&path);
return str;
}

static char* scr_cache_dir_hidden_from_str(const char* dir, const char* storage_view, int id)
static char* scr_cache_dir_hidden_from_str(const char* dir, int id)
{
/* build the dataset directory name */
spath* path = spath_from_str(dir);
if(! strcmp(storage_view, "GLOBAL")) {
spath_append_strf(path, "node.%d", scr_my_hostid);
}
spath_append_strf(path, "scr.dataset.%d", id);
spath_append_str(path, ".scr");
spath_reduce(path);
Expand Down Expand Up @@ -71,7 +65,7 @@ char* scr_cache_dir_get(const scr_reddesc* red, int id)
}

/* build the dataset directory name */
char* str = scr_cache_dir_from_str(red->directory, store->view, id);
char* str = scr_cache_dir_from_str(red->directory, id);
return str;
}

Expand All @@ -96,7 +90,7 @@ char* scr_cache_dir_hidden_get(const scr_reddesc* red, int id)
}

/* build the hidden directory name */
char* str = scr_cache_dir_hidden_from_str(red->directory, store->view, id);
char* str = scr_cache_dir_hidden_from_str(red->directory, id);
return str;
}

Expand Down
15 changes: 7 additions & 8 deletions src/scr_copy.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,6 @@ int main (int argc, char *argv[])
/* get pointer to name of entry */
const char* entryname = de->d_name;

int rank;
char* value = NULL;
size_t nmatch = 5;
regmatch_t pmatch[5];
Expand All @@ -586,16 +585,16 @@ int main (int argc, char *argv[])
/* get the MPI rank of the file */
value = strndup(entryname + pmatch[1].rm_so, (size_t)(pmatch[1].rm_eo - pmatch[1].rm_so));
if (value != NULL) {
rank = atoi(value);
int rank = atoi(value);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@adammoody - clang14 produced some warnings that I fixed. Most were small (like security warnings), but others like this are indicative of a potental bug. I rearranged this slightly so that rank would only be used when it was set. I moved the if block around to what I believe should hopefully be an equivalent of what the inention was previously.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, good catch!

scr_free(&value);
}

/* found a filemap, copy its files */
int tmp_rc = copy_files_for_filemap(path_prefix, path_scr, cache_path, entryname, rank, &args, hostname);
if (tmp_rc != 0) {
rc = tmp_rc;
/* found a filemap, copy its files */
int tmp_rc = copy_files_for_filemap(path_prefix, path_scr, cache_path, entryname, rank, &args, hostname);
if (tmp_rc != 0) {
rc = tmp_rc;
}
continue;
}
continue;
}

/* look for file names like: "reddescmap.er.0.redset" */
Expand Down
2 changes: 2 additions & 0 deletions src/scr_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ int scr_file_copy(
unsigned long buf_size,
uLong* crc)
{
scr_dbg(1, "scr_file_copy(%s --> %s)", src_file, dst_file);

/* check that we got something for a source file */
if (src_file == NULL || strcmp(src_file, "") == 0) {
scr_err("Invalid source file @ %s:%d",
Expand Down
1 change: 0 additions & 1 deletion src/scr_keys.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ Define common hash key strings
#define SCR_CONFIG_KEY_GROUP_RANK ("GROUP_RANK")
#define SCR_CONFIG_KEY_MKDIR ("MKDIR")
#define SCR_CONFIG_KEY_FLUSH ("FLUSH")
#define SCR_CONFIG_KEY_VIEW ("VIEW")

#define SCR_META_KEY_CKPT ("CKPT")
#define SCR_META_KEY_RANKS ("RANKS")
Expand Down
8 changes: 4 additions & 4 deletions src/scr_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ int scr_log_run(time_t start, int procs, int nodes)
buf[sizeof(buf)-2] = '\n';
buf[sizeof(buf)-1] = '\0';
}
syslog(SCR_LOG_SYSLOG_LEVEL, buf);
syslog(SCR_LOG_SYSLOG_LEVEL, "%s", buf);
}

if (db_enable) {
Expand Down Expand Up @@ -1117,7 +1117,7 @@ int scr_log_halt(const char* reason)
buf[sizeof(buf)-2] = '\n';
buf[sizeof(buf)-1] = '\0';
}
syslog(LOG_INFO, buf);
syslog(LOG_INFO, "%s", buf);
}

if (db_enable) {
Expand Down Expand Up @@ -1212,7 +1212,7 @@ int scr_log_event(
buf[sizeof(buf)-2] = '\n';
buf[sizeof(buf)-1] = '\0';
}
syslog(LOG_INFO, buf);
syslog(LOG_INFO, "%s", buf);
}

if (db_enable) {
Expand Down Expand Up @@ -1335,7 +1335,7 @@ int scr_log_transfer(
buf[sizeof(buf)-2] = '\n';
buf[sizeof(buf)-1] = '\0';
}
syslog(LOG_INFO, buf);
syslog(LOG_INFO, "%s", buf);
}

if (db_enable) {
Expand Down
2 changes: 1 addition & 1 deletion src/scr_reddesc.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ static int scr_reddesc_type_int_from_str(const char* value, int* type)
{
int rc = SCR_SUCCESS;

int copy_type;
int copy_type = SCR_COPY_NULL;
if (strcasecmp(value, "SINGLE") == 0) {
copy_type = SCR_COPY_SINGLE;
} else if (strcasecmp(value, "PARTNER") == 0) {
Expand Down
22 changes: 2 additions & 20 deletions src/scr_storedesc.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ static int scr_storedesc_init(scr_storedesc* s)
s->max_count = 0;
s->can_mkdir = 0;
s->xfer = NULL;
s->view = NULL;
s->comm = MPI_COMM_NULL;
s->rank = MPI_PROC_NULL;
s->ranks = 0;
Expand All @@ -60,7 +59,6 @@ static int scr_storedesc_free(scr_storedesc* s)
/* free the strings we strdup'd */
scr_free(&s->name);
scr_free(&s->xfer);
scr_free(&s->view);

/* free the communicator we created */
if (s->comm != MPI_COMM_NULL) {
Expand Down Expand Up @@ -92,7 +90,6 @@ static int scr_storedesc_copy(scr_storedesc* out, const scr_storedesc* in)
out->max_count = in->max_count;
out->can_mkdir = in->can_mkdir;
out->xfer = strdup(in->xfer);
out->view = strdup(in->view);
MPI_Comm_dup(in->comm, &out->comm);
out->rank = in->rank;
out->ranks = in->ranks;
Expand Down Expand Up @@ -162,16 +159,6 @@ static int scr_storedesc_create_from_hash(
kvtree_util_get_str(hash, SCR_CONFIG_KEY_FLUSH, &flush_type);
s->xfer = strdup(flush_type);

/* set the view of the store. Default to PRIVATE */
/* strdup the view if one exists */
char* tmp_view = NULL;
kvtree_util_get_str(hash, SCR_CONFIG_KEY_VIEW, &tmp_view);
if (tmp_view != NULL) {
s->view = strdup(tmp_view);
} else {
s->view = strdup("PRIVATE");
}

/* get communicator of ranks that can access this storage device,
* assume node-local storage unless told otherwise */
char* group = SCR_GROUP_NODE;
Expand Down Expand Up @@ -212,10 +199,7 @@ int scr_storedesc_dir_create(const scr_storedesc* store, const char* dir)

/* rank 0 creates the directory */
int rc = SCR_SUCCESS;
if (!strcmp(store->view, "GLOBAL") && store->can_mkdir && scr_my_rank_host == 0) {
scr_dbg(2, "Creating directory: %s", dir);
rc = scr_mkdir(dir, S_IRWXU | S_IRWXG);
} else if (store->rank == 0 && store->can_mkdir) {
if (store->rank == 0 && store->can_mkdir) {
scr_dbg(2, "Creating directory: %s", dir);
rc = scr_mkdir(dir, S_IRWXU | S_IRWXG);
}
Expand Down Expand Up @@ -244,9 +228,7 @@ int scr_storedesc_dir_delete(const scr_storedesc* store, const char* dir)

/* rank 0 deletes the directory */
int rc = SCR_SUCCESS;
if ((store->rank == 0 || (scr_my_rank_host == 0 && !strcmp(store->view, "GLOBAL")))
&& store->can_mkdir)
{
if (store->rank == 0 && store->can_mkdir) {
/* delete directory */
if (scr_rmdir(dir) != SCR_SUCCESS) {
/* whoops, something failed when we tried to delete our directory */
Expand Down
1 change: 0 additions & 1 deletion src/scr_storedesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ typedef struct {
int max_count; /* maximum number of datasets to be stored in device */
int can_mkdir; /* flag indicating whether mkdir/rmdir work */
char* xfer; /* AXL xfer type string (bbapi, sync, pthread, etc..) */
char* view; /* indicates whether store is node-local or global */
MPI_Comm comm; /* communicator of processes that can access storage */
int rank; /* local rank of process in communicator */
int ranks; /* number of ranks in communicator */
Expand Down