19 #ifdef HAVE_GETCPU_SYSCALL 22 # include <sys/syscall.h> 37 #include <sys/types.h> 41 #include <cuda_runtime.h> 47 # include <sys/statvfs.h> 48 # elif (defined __APPLE__) 49 # include <sys/param.h> 50 # include <sys/mount.h> 52 # include <sys/statfs.h> 54 # include <sys/time.h> 62 #define RANDALGO_GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL 98 #ifdef HAVE_GPU_DIRECT 105 size_t size = bytes /
sizeof(uint64_t);
106 uint64_t * buffi = (uint64_t*) buf;
109 uint64_t rand_state_local;
110 unsigned seed = rand_seed + pretendRank + item;
111 rand_state_local = rand_r(&seed);
112 for (
size_t i = 0; i < size; i++) {
114 rand_state_local >>= 3;
115 buffi[i] = rand_state_local;
122 for(
size_t i=0; i < size; i+=512, k++){
123 buffi[i] = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32;
138 #ifdef HAVE_GPU_DIRECT 144 uint64_t * buffi = (uint64_t*) buf;
146 const size_t size = bytes / 8;
148 for(
size_t i=0; i < size; i++){
149 switch(dataPacketType){
154 unsigned seed = rand_seed + pretendRank;
155 uint64_t hi = ((uint64_t) rand_r(& seed) << 32);
156 uint64_t lo = (uint64_t) rand_r(& seed);
161 buffi[i] = ((uint64_t) pretendRank) << 32 | rand_seed + i;
167 for(
size_t i=size*8; i < bytes; i++){
174 #ifdef HAVE_GPU_DIRECT 175 cudaMemset(buffer, 0x42, bytes > 512 ? 512 : bytes);
178 buffer[0] = ~buffer[0];
184 #ifdef HAVE_GPU_DIRECT 191 uint64_t * buffi = (uint64_t*) buffer;
196 uint64_t rand_state_local;
197 unsigned seed = rand_seed + pretendRank + item;
198 rand_state_local = rand_r(&seed);
199 const size_t size = bytes / 8;
200 for(
size_t i=0; i < size; i++){
203 switch(dataPacketType){
206 rand_state_local >>= 3;
207 exp = rand_state_local;
210 unsigned seed = rand_seed + pretendRank;
211 uint64_t hi = ((uint64_t) rand_r(& seed) << 32);
212 uint64_t lo = (uint64_t) rand_r(& seed);
217 exp = ((uint64_t) pretendRank) << 32 | rand_seed + i;
222 exp = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32;
229 for(
size_t i=size*8; i < bytes; i++){
230 if(buffer[i] != (
char) i){
239 void * d = malloc(size);
241 ERR(
"Could not malloc an array");
250 va_start(args, format);
251 vsnprintf(msg, 4096, format, args);
253 fprintf(
out_logfile,
"%s: Process %d: FAILED in %s, %s\n",
267 rc = sscanf(size_str,
" %d %% ", &percent);
270 if (percent > 100 || percent < 0)
271 ERR(
"percentage must be between 0 and 100");
274 page_size = sysconf(_SC_PAGESIZE);
276 page_size = getpagesize();
279 #ifdef _SC_PHYS_PAGES 280 num_pages = sysconf(_SC_PHYS_PAGES);
282 ERR(
"sysconf(_SC_PHYS_PAGES) is not supported");
284 ERR(
"sysconf(_SC_PHYS_PAGES) is not supported");
286 mem = page_size * num_pages;
288 return mem / 100 * percent;
303 ERRF(
"Unknown packet type \"%c\"; generic assumed\n", t);
321 ERR(
"Unrecognized I/O API");
338 WARN(
"cannot use O_DIRECT");
339 # define O_DIRECT 000000 341 # define O_DIRECT O_DIRECTIO 358 static time_t currentTime;
359 char* currentTimePtr;
361 if ((currentTime = time(
NULL)) == -1)
362 ERR(
"cannot get current time");
364 #if (_POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _BSD_SOURCE || _SVID_SOURCE || _POSIX_SOURCE) 365 static char threadSafeBuff[32];
366 if ((currentTimePtr = ctime_r(¤tTime, threadSafeBuff)) ==
NULL) {
367 ERR(
"cannot read current time");
370 if ((currentTimePtr = ctime(¤tTime)) ==
NULL) {
371 ERR(
"cannot read current time");
375 return (currentTimePtr);
389 for (i = 0; i < ((size /
sizeof(
IOR_size_t)) / 4); i++) {
390 for (j = 0; j < 4; j++) {
405 MPI_Comm_size(comm, &num_ranks);
406 int *node_map = (
int*)malloc(
sizeof(
int) * num_ranks);
411 FAIL(
"gethostname()");
418 MPI_Bcast(roothost,
MAX_PATHLEN, MPI_CHAR, 0, comm);
421 int same_as_root = strcmp(roothost,localhost) == 0;
422 MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm);
423 if ( print_nodemap &&
rank==0) {
425 for (
int i = 0; i < num_ranks; i++ ) {
432 ret = node_map[1] == 1;
433 MPI_Bcast(&ret, 1, MPI_INT, 0, comm);
438 void initCUDA(
int blockMapping,
int rank,
int numNodes,
int tasksPerNode,
int useGPUID){
441 cudaError_t cret = cudaGetDeviceCount(& device_count);
442 if(cret != cudaSuccess){
443 ERRF(
"cudaGetDeviceCount() error: %d %s", (
int) cret, cudaGetErrorString(cret));
454 device = (rank % tasksPerNode) % device_count;
456 device = (rank / numNodes) % device_count;
458 cret = cudaSetDevice(device);
460 cret = cudaSetDevice(useGPUID);
462 if(cret != cudaSuccess){
463 WARNF(
"cudaSetDevice(%d) error: %s", useGPUID, cudaGetErrorString(cret));
477 if (getenv(
"IOR_FAKE_NODES")){
478 int numNodes = atoi(getenv(
"IOR_FAKE_NODES"));
480 MPI_Comm_rank(comm, & rank);
482 printf(
"Fake number of node: using %d\n", numNodes);
487 MPI_Comm shared_comm;
489 int local_result = 0;
492 MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
493 "MPI_Comm_split_type() error");
494 MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank),
"MPI_Comm_rank() error");
495 local_result = shared_rank == 0? 1 : 0;
496 MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm),
497 "MPI_Allreduce() error");
498 MPI_CHECK(MPI_Comm_free(&shared_comm),
"MPI_Comm_free() error");
503 int numTasksOnNode0 = 0;
508 return ((numTasks - 1) / numTasksOnNode0) + 1;
516 MPI_CHECK(MPI_Comm_size(comm, &numTasks),
"cannot get number of tasks");
552 if (getenv(
"IOR_FAKE_TASK_PER_NODES")){
553 int tasksPerNode = atoi(getenv(
"IOR_FAKE_TASK_PER_NODES"));
555 MPI_Comm_rank(comm, & rank);
557 printf(
"Fake tasks per node: using %d\n", tasksPerNode);
562 MPI_Comm shared_comm;
564 int tasks_on_node_rank0 = 0;
565 int local_result = 0;
567 MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
568 "MPI_Comm_split_type() error");
569 MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank),
"MPI_Comm_rank() error");
570 if (
rank == 0 && shared_rank == 0) {
571 MPI_CHECK(MPI_Comm_size(shared_comm, &local_result),
"MPI_Comm_size() error");
573 MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm),
574 "MPI_Allreduce() error");
575 MPI_CHECK(MPI_Comm_free(&shared_comm),
"MPI_Comm_free() error");
577 return tasks_on_node_rank0;
589 MPI_Comm_size(comm, & size);
598 fprintf(
out_logfile,
"V-1: Entering count_tasks_per_node...\n" );
603 FAIL(
"gethostname()");
607 for (i = 0; i < size-1; i++) {
608 MPI_Recv(hostname,
MAX_PATHLEN, MPI_CHAR, MPI_ANY_SOURCE,
609 MPI_ANY_TAG, comm, &status);
610 if (strcmp(hostname, localhost) == 0) {
616 MPI_Send(localhost,
MAX_PATHLEN, MPI_CHAR, 0, 0, comm);
618 MPI_Bcast(&count, 1, MPI_INT, 0, comm);
628 void ExtractHint(
char *settingVal,
char *valueVal,
char *hintString)
630 char *settingPtr, *valuePtr, *tmpPtr2;
633 settingPtr = (
char *)strtok(hintString,
" =");
634 valuePtr = (
char *)strtok(
NULL,
" =\t\r\n");
636 tmpPtr2 = (
char *) strstr(settingPtr,
"IOR_HINT__MPI__");
637 if (settingPtr == tmpPtr2) {
638 settingPtr += strlen(
"IOR_HINT__MPI__");
640 tmpPtr2 = (
char *) strstr(hintString,
"IOR_HINT__GPFS__");
642 if (settingPtr == tmpPtr2) {
643 settingPtr += strlen(
"IOR_HINT__GPFS__");
645 fprintf(
out_logfile,
"WARNING: Unable to set unknown hint type (not implemented.)\n");
649 strcpy(settingVal, settingPtr);
650 strcpy(valueVal, valuePtr);
656 void SetHints(MPI_Info * mpiHints,
char *hintsFileName)
673 MPI_CHECK(MPI_Info_create(mpiHints),
"cannot create info object");
676 for (i = 0; environ[i] !=
NULL; i++) {
678 if (strncmp(environ[i],
"IOR_HINT", strlen(
"IOR_HINT")) == 0) {
679 strcpy(hintString, environ[i]);
681 MPI_CHECK(MPI_Info_set(*mpiHints, settingVal, valueVal),
682 "cannot set info object");
687 if (hintsFileName !=
NULL && strcmp(hintsFileName,
"") != 0) {
690 fd = fopen(hintsFileName,
"r");
692 WARN(
"cannot open hints file");
697 (hintString,
"IOR_HINT",
698 strlen(
"IOR_HINT")) == 0) {
702 (*mpiHints, settingVal,
704 "cannot set info object");
709 ERR(
"cannot close hints file");
719 char key[MPI_MAX_INFO_VAL];
720 char value[MPI_MAX_INFO_VAL];
723 MPI_CHECK(MPI_Info_get_nkeys(*mpiHints, &nkeys),
724 "cannot get info object keys");
726 for (i = 0; i < nkeys; i++) {
727 MPI_CHECK(MPI_Info_get_nthkey(*mpiHints, i, key),
728 "cannot get info object key");
729 MPI_CHECK(MPI_Info_get(*mpiHints, key, MPI_MAX_INFO_VAL - 1,
731 "cannot get info object value");
745 rc = sscanf(size_str,
"%lld%c", &size, &range);
747 switch ((
int)range) {
761 }
else if (rc == 0) {
774 WARN(
"Backend doesn't implement statfs");
777 int ret = backend->
statfs(filename, & stat, backend_options);
779 WARN(
"Backend returned error during statfs");
782 long long int totalFileSystemSize;
783 long long int freeFileSystemSize;
784 long long int totalInodes;
785 long long int freeInodes;
786 double totalFileSystemSizeHR;
787 double usedFileSystemPercentage;
788 double usedInodePercentage;
789 char *fileSystemUnitStr;
793 usedFileSystemPercentage = (1 - ((double)freeFileSystemSize / (
double)totalFileSystemSize)) * 100;
794 totalFileSystemSizeHR = (double)totalFileSystemSize / (
double)(1<<30);
799 usedInodePercentage = (1 - ((double)freeInodes / (
double)totalInodes)) * 100;
801 fileSystemUnitStr =
"GiB";
802 if (totalFileSystemSizeHR > 1024) {
803 totalFileSystemSizeHR = (double)totalFileSystemSize / (
double)((
long long)1<<40);
804 fileSystemUnitStr =
"TiB";
809 "FS", totalFileSystemSizeHR, fileSystemUnitStr,
810 usedFileSystemPercentage);
812 (
double)totalInodes / (
double)(1<<20),
813 usedInodePercentage);
817 fprintf(
out_resultfile,
"\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",",
818 totalFileSystemSizeHR, fileSystemUnitStr,
819 usedFileSystemPercentage);
820 fprintf(
out_resultfile,
"\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n",
821 (
double)totalInodes / (
double)(1<<20),
822 usedInodePercentage);
833 int Regex(
char *
string,
char *pattern)
840 regcomp(®Ex, pattern, REG_EXTENDED);
841 if (regexec(®Ex,
string, 1, ®Match, 0) == 0) {
854 int uname(
struct utsname *name)
856 DWORD nodeNameSize =
sizeof(name->nodename) - 1;
858 memset(name, 0,
sizeof(
struct utsname));
859 if (!GetComputerNameEx
860 (ComputerNameDnsFullyQualified, name->nodename, &nodeNameSize))
861 ERR(
"GetComputerNameEx failed");
863 strncpy(name->sysname,
"Windows",
sizeof(name->sysname) - 1);
865 strncpy(name->release,
"-",
sizeof(name->release) - 1);
866 strncpy(name->version,
"-",
sizeof(name->version) - 1);
867 strncpy(name->machine,
"-",
sizeof(name->machine) - 1);
879 struct timeval timer;
881 if (gettimeofday(&timer, (
struct timezone *)
NULL) != 0)
882 ERR(
"cannot use gettimeofday()");
883 timeVal = (double)timer.tv_sec + ((
double)timer.tv_usec / 1000000);
897 double roottimestamp;
899 MPI_CHECK(MPI_Barrier(com),
"barrier error");
901 MPI_CHECK(MPI_Reduce(×tamp, &min, 1, MPI_DOUBLE,
903 "cannot reduce tasks' times");
904 MPI_CHECK(MPI_Reduce(×tamp, &max, 1, MPI_DOUBLE,
906 "cannot reduce tasks' times");
909 roottimestamp = timestamp;
910 MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, com),
911 "cannot broadcast root's time");
922 static char datestring[80];
923 time_t cur_timestamp;
926 fprintf(
out_logfile,
"V-1: Entering PrintTimestamp...\n" );
930 cur_timestamp = time(
NULL);
931 strftime(datestring, 80,
"%m/%d/%Y %T", localtime(&cur_timestamp));
939 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
942 FILE * out = fopen(filename,
"r");
945 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
948 int ret = fscanf(out,
"%lld", & data);
954 MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com);
963 FILE * out = fopen(filename,
"w");
965 FAIL(
"Cannot write to the stonewalling file!");
967 fprintf(out,
"%lld", (
long long) count);
975 if (
rank == 0 && delay > 0) {
977 fprintf(
out_logfile,
"delaying %d seconds . . .\n", delay);
991 char m_str[8], g_str[8], t_str[8];
997 strcpy(m_str,
"MiB");
998 strcpy(g_str,
"GiB");
999 strcpy(t_str,
"TiB");
1004 strcpy(m_str,
"MB");
1005 strcpy(g_str,
"GB");
1006 strcpy(t_str,
"TB");
1011 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
1012 (
double)((
double)value / t), t_str);
1014 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / t), t_str);
1016 }
else if (value >= g) {
1018 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
1019 (
double)((
double)value / g), g_str);
1021 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / g), g_str);
1023 }
else if (value >= m) {
1025 snprintf(valueStr,
MAX_STR-1,
"%.2f %s",
1026 (
double)((
double)value / m), m_str);
1028 snprintf(valueStr,
MAX_STR-1,
"%d %s", (
int)(value / m), m_str);
1030 }
else if (value >= 0) {
1031 snprintf(valueStr,
MAX_STR-1,
"%d bytes", (
int)value);
1033 snprintf(valueStr,
MAX_STR-1,
"-");
1038 #if defined(HAVE_GETCPU_SYSCALL) 1043 return syscall(SYS_getcpu, core, chip,
NULL);
1045 #elif defined(HAVE_RDTSCP_ASM) 1049 unsigned long a,d,c;
1050 __asm__
volatile(
"rdtscp" :
"=a" (a),
"=d" (d),
"=c" (c));
1051 *chip = (c & 0xFFF000)>>12;
1053 return ((
unsigned long)a) | (((
unsigned long)d) << 32);;
1058 #warning GetProcessorAndCore is implemented as a dummy 1079 if (cudaMallocManaged((
void**) & buf, size, cudaMemAttachGlobal) != cudaSuccess){
1080 ERR(
"Cannot allocate buffer on GPU");
1084 ERR(
"No CUDA supported, cannot allocate on the GPU");
1087 #ifdef HAVE_GPU_DIRECT 1088 if (cudaMalloc((
void**) & buf, size) != cudaSuccess){
1089 ERR(
"Cannot allocate buffer on GPU");
1093 ERR(
"No GPUDirect supported, cannot allocate on the GPU");
1098 long pageSize = sysconf(_SC_PAGESIZE);
1100 size_t pageSize = getpagesize();
1103 pageMask = pageSize - 1;
1104 buf =
safeMalloc(size + pageSize +
sizeof(
void *));
1106 tmp = buf +
sizeof(
char *);
1107 aligned = tmp + pageSize - ((size_t) tmp & pageMask);
1110 tmp = aligned -
sizeof(
void *);
1111 *(
void **)tmp = buf;
1113 return (
void *)aligned;
1123 if (cudaFree(buf) != cudaSuccess){
1124 WARN(
"Cannot free buffer on GPU");
1128 ERR(
"No CUDA supported, cannot free on the GPU");
1131 free(*(
void **)((
char *)buf -
sizeof(
char *)));
int verify_memory_pattern_gpu(uint64_t item, char *buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType)
void invalidate_buffer_pattern(char *buffer, size_t bytes, ior_memory_flags type)
char * HumanReadable(IOR_offset_t value, int base)
int GetNumTasks(MPI_Comm comm)
unsigned long GetProcessorAndCore(int *chip, int *core)
void ShowHints(MPI_Info *mpiHints)
unsigned int incompressibleSeed
void * airoi_update_module_options(const ior_aiori_t *backend, options_all_t *opt)
char *(* get_version)(void)
#define RANDALGO_GOLDEN_RATIO_PRIME
int64_t ReadStoneWallingIterations(char *const filename, MPI_Comm com)
enum OutputFormat_t outputFormat
int(* statfs)(const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *module_options)
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
int setTimeStampSignature
IOR_offset_t StringToBytes(char *size_str)
#define MPI_CHECK(MPI_STATUS, MSG)
const ior_aiori_t * aiori_select(const char *api)
ior_dataPacketType_e dataPacketType
ior_dataPacketType_e parsePacketType(char t)
static double TimeDeviation(MPI_Comm com)
static option_help options[]
void init_clock(MPI_Comm com)
char * CurrentTimeString(void)
void updateParsedOptions(IOR_param_t *options, options_all_t *global_options)
int GetNumNodes(MPI_Comm comm)
void initCUDA(int blockMapping, int rank, int numNodes, int tasksPerNode, int useGPUID)
double GetTimeStamp(void)
static const ior_aiori_t * backend
void StoreStoneWallingIterations(char *const filename, int64_t count)
static options_all_t * global_options
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
#define WARNF(FORMAT,...)
void generate_memory_pattern(char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
int64_t string_to_bytes(char *size_str)
void ExtractHint(char *settingVal, char *valueVal, char *hintString)
void generate_memory_pattern_gpu(char *buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType)
void ShowFileSystemSize(char *filename, const struct ior_aiori *backend, void *backend_options)
const struct ior_aiori * backend
void SetHints(MPI_Info *mpiHints, char *hintsFileName)
void set_o_direct_flag(int *flag)
void FailMessage(int rank, const char *location, char *format,...)
int verify_memory_pattern(uint64_t item, char *buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
void DelaySecs(int delay)
int Regex(char *string, char *pattern)
size_t NodeMemoryStringToBytes(char *size_str)
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
void update_write_memory_pattern_gpu(uint64_t item, char *buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType)
void DumpBuffer(void *buffer, size_t size)
long long int IOR_offset_t
int GetNumTasksOnNode0(MPI_Comm comm)
void * safeMalloc(uint64_t size)
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)