IOR
ior.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * Copyright (c) 2003, The Regents of the University of California *
7 * See the file COPYRIGHT for a complete copyright notice and license. *
8 * *
9 \******************************************************************************/
10 
11 #ifdef HAVE_CONFIG_H
12 # include "config.h"
13 #endif
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 #include <ctype.h> /* tolower() */
19 #include <errno.h>
20 #include <math.h>
21 #include <mpi.h>
22 #include <string.h>
23 
24 #if defined(HAVE_STRINGS_H)
25 #include <strings.h>
26 #endif
27 
28 #include <sys/stat.h> /* struct stat */
29 #include <time.h>
30 
31 #ifndef _WIN32
32 # include <sys/time.h> /* gettimeofday() */
33 # include <sys/utsname.h> /* uname() */
34 #endif
35 
36 #ifdef HAVE_CUDA
37 #include <cuda_runtime.h>
38 #endif
39 
40 #include <assert.h>
41 
42 #include "ior.h"
43 #include "ior-internal.h"
44 #include "aiori.h"
45 #include "utilities.h"
46 #include "parse_options.h"
47 
48 enum {
56 };
57 
58 /* file scope globals */
59 extern char **environ;
60 static int totalErrorCount;
61 static const ior_aiori_t *backend;
62 
63 static void DestroyTests(IOR_test_t *tests_head);
64 static char *PrependDir(IOR_param_t *, char *);
65 static char **ParseFileName(char *, int *);
66 static void InitTests(IOR_test_t *);
67 static void TestIoSys(IOR_test_t *);
68 static void ValidateTests(IOR_param_t * params, MPI_Comm com);
70  aiori_fd_t *fd, const int access,
71  IOR_io_buffers *ioBuffers);
72 
74  aiori_xfer_hint_t * hints = & p->hints;
75  hints->dryRun = p->dryRun;
76  hints->filePerProc = p->filePerProc;
77  hints->collective = p->collective;
78  hints->numTasks = p->numTasks;
79  hints->numNodes = p->numNodes;
80  hints->randomOffset = p->randomOffset;
81  hints->fsyncPerWrite = p->fsyncPerWrite;
82  hints->segmentCount = p->segmentCount;
83  hints->blockSize = p->blockSize;
84  hints->transferSize = p->transferSize;
87 
88  if(backend->xfer_hints){
89  backend->xfer_hints(hints);
90  }
91 }
92 
94 
95 /*
96  Returns 1 if the process participates in the test
97  */
98 static int test_initialize(IOR_test_t * test){
99  int range[3];
100  IOR_param_t *params = &test->params;
101  MPI_Group orig_group, new_group;
102 
103  /* set up communicator for test */
104  MPI_CHECK(MPI_Comm_group(params->mpi_comm_world, &orig_group),
105  "MPI_Comm_group() error");
106  range[0] = 0; /* first rank */
107  range[1] = params->numTasks - 1; /* last rank */
108  range[2] = 1; /* stride */
109  MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group),
110  "MPI_Group_range_incl() error");
111  MPI_CHECK(MPI_Comm_create(params->mpi_comm_world, new_group, & params->testComm),
112  "MPI_Comm_create() error");
113  MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error");
114  MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error");
115 
116 
117  if (params->testComm == MPI_COMM_NULL) {
118  /* tasks not in the group do not participate in this test, this matches the proceses in test_finalize() that participate */
119  MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error");
120  return 0;
121  }
122 
123  /* Setup global variables */
124  testComm = params->testComm;
125  verbose = test->params.verbose;
126  backend = test->params.backend;
127 
130  }
131 
132 
133  if(backend->initialize){
134  backend->initialize(test->params.backend_options);
135  }
136  ior_set_xfer_hints(& test->params);
138 
139  if (rank == 0 && verbose >= VERBOSE_0) {
140  ShowTestStart(& test->params);
141  }
142  return 1;
143 }
144 
145 static void test_finalize(IOR_test_t * test){
146  backend = test->params.backend;
147  if(backend->finalize){
148  backend->finalize(test->params.backend_options);
149  }
150  MPI_CHECK(MPI_Barrier(test->params.mpi_comm_world), "barrier error");
151  MPI_CHECK(MPI_Comm_free(& testComm), "MPI_Comm_free() error");
152 }
153 
154 
155 IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){
156  IOR_test_t *tests_head;
157  IOR_test_t *tptr;
158  out_logfile = world_out;
159  out_resultfile = world_out;
160 
161  MPI_CHECK(MPI_Comm_rank(world_com, &rank), "cannot get rank");
162 
163  /* setup tests, and validate parameters */
164  tests_head = ParseCommandLine(argc, argv, world_com);
165  InitTests(tests_head);
166 
167  PrintHeader(argc, argv);
168 
169  /* perform each test */
170  for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
171  int participate = test_initialize(tptr);
172  if( ! participate ) continue;
173  totalErrorCount = 0;
174  TestIoSys(tptr);
175  tptr->results->errors = totalErrorCount;
176  ShowTestEnd(tptr);
177  test_finalize(tptr);
178  }
179 
180  PrintLongSummaryAllTests(tests_head);
181 
182  /* display finish time */
183  PrintTestEnds();
184  return tests_head;
185 }
186 
187 
188 
189 int ior_main(int argc, char **argv)
190 {
191  IOR_test_t *tests_head;
192  IOR_test_t *tptr;
193 
194  out_logfile = stdout;
195  out_resultfile = stdout;
196 
197  /*
198  * check -h option from commandline without starting MPI;
199  */
200  tests_head = ParseCommandLine(argc, argv, MPI_COMM_WORLD);
201 
202  /* start the MPI code */
203  MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI");
204 
205  MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &rank), "cannot get rank");
206 
207  /* set error-handling */
208  /*MPI_CHECK(MPI_Errhandler_set(mpi_comm_world, MPI_ERRORS_RETURN),
209  "cannot set errhandler"); */
210 
211  /* setup tests, and validate parameters */
212  InitTests(tests_head);
213 
214  PrintHeader(argc, argv);
215 
216  /* perform each test */
217  for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
218  int participate = test_initialize(tptr);
219  if( ! participate ) continue;
220 
221  // This is useful for trapping a running MPI process. While
222  // this is sleeping, run the script 'testing/hdfs/gdb.attach'
223  if (verbose >= VERBOSE_4) {
224  fprintf(out_logfile, "\trank %d: sleeping\n", rank);
225  sleep(5);
226  fprintf(out_logfile, "\trank %d: awake.\n", rank);
227  }
228 
229  TestIoSys(tptr);
230  ShowTestEnd(tptr);
231  test_finalize(tptr);
232  }
233 
234  if (verbose <= VERBOSE_0)
235  /* always print final summary */
236  verbose = VERBOSE_1;
237  PrintLongSummaryAllTests(tests_head);
238 
239  /* display finish time */
240  PrintTestEnds();
241 
242  MPI_CHECK(MPI_Finalize(), "cannot finalize MPI");
243 
244  DestroyTests(tests_head);
245 
246  return totalErrorCount;
247 }
248 
249 /***************************** F U N C T I O N S ******************************/
250 
251 /*
252  * Initialize an IOR_param_t structure to the defaults
253  */
254 void init_IOR_Param_t(IOR_param_t * p, MPI_Comm com)
255 {
256  const char *default_aiori = aiori_default ();
257  assert (NULL != default_aiori);
258 
259  memset(p, 0, sizeof(IOR_param_t));
260  p->api = strdup(default_aiori);
261  p->platform = strdup("HOST(OSTYPE)");
262  p->testFileName = strdup("testFile");
263 
264  p->writeFile = p->readFile = FALSE;
265  p->checkWrite = p->checkRead = FALSE;
266 
267  p->minTimeDuration = 0;
268 
269  /*
270  * These can be overridden from the command-line but otherwise will be
271  * set from MPI.
272  */
273  p->numTasks = -1;
274  p->numNodes = -1;
275  p->numTasksOnNode0 = -1;
276  p->gpuID = -1;
277 
278  p->repetitions = 1;
279  p->repCounter = -1;
280  p->open = WRITE;
281  p->taskPerNodeOffset = 1;
282  p->segmentCount = 1;
283  p->blockSize = 1048576;
284  p->transferSize = 262144;
285  p->randomSeed = -1;
286  p->incompressibleSeed = 573;
287  p->testComm = com; // this com might change for smaller tests
288  p->mpi_comm_world = com;
289 
290  p->URI = NULL;
291 }
292 
293 static void
295  double timerVal,
296  char *timeString, int access, int outlierThreshold)
297 {
298  char accessString[MAX_STR];
299  double sum, mean, sqrDiff, var, sd;
300 
301  /* for local timerVal, don't compensate for wall clock delta */
302  //timerVal += wall_clock_delta;
303 
304  MPI_CHECK(MPI_Allreduce
305  (&timerVal, &sum, 1, MPI_DOUBLE, MPI_SUM, testComm),
306  "MPI_Allreduce()");
307  mean = sum / numTasks;
308  sqrDiff = pow((mean - timerVal), 2);
309  MPI_CHECK(MPI_Allreduce
310  (&sqrDiff, &var, 1, MPI_DOUBLE, MPI_SUM, testComm),
311  "MPI_Allreduce()");
312  var = var / numTasks;
313  sd = sqrt(var);
314 
315  if (access == WRITE) {
316  strcpy(accessString, "write");
317  } else { /* READ */
318  strcpy(accessString, "read");
319  }
320  if (fabs(timerVal - mean) > (double)outlierThreshold) {
321  char hostname[MAX_STR];
322  int ret = gethostname(hostname, MAX_STR);
323  if (ret != 0)
324  strcpy(hostname, "unknown");
325 
326  WARNF("for %s, task %d, %s %s is %f (mean=%f, stddev=%f)\n",
327  hostname, rank, accessString, timeString, timerVal, mean, sd);
328  }
329 }
330 
331 /*
332  * Check for outliers in start/end times and elapsed create/xfer/close times.
333  */
334 static void
335 CheckForOutliers(IOR_param_t *test, const double *timer, const int access)
336 {
338  "start time", access, test->outlierThreshold);
340  timer[IOR_TIMER_OPEN_STOP] - timer[IOR_TIMER_OPEN_START],
341  "elapsed create time", access, test->outlierThreshold);
344  "elapsed transfer time", access,
345  test->outlierThreshold);
348  "elapsed close time", access, test->outlierThreshold);
349  DisplayOutliers(test->numTasks, timer[IOR_TIMER_CLOSE_STOP], "end time",
350  access, test->outlierThreshold);
351 }
352 
353 /*
354  * Check if actual file size equals expected size; if not use actual for
355  * calculating performance rate.
356  */
357 static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t dataMoved, int rep, const int access)
358 {
359  IOR_param_t *params = &test->params;
360  IOR_results_t *results = test->results;
361  IOR_point_t *point = (access == WRITE) ? &results[rep].write :
362  &results[rep].read;
363 
364  /* get the size of the file */
365  IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
366  aggFileSizeFromStat = backend->get_file_size(params->backend_options, testFilename);
367 
368  if (params->hints.filePerProc == TRUE) {
369  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
370  MPI_LONG_LONG_INT, MPI_SUM, testComm),
371  "cannot reduce total data moved");
372  aggFileSizeFromStat = tmpSum;
373  } else {
374  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
375  MPI_LONG_LONG_INT, MPI_MIN, testComm),
376  "cannot reduce total data moved");
377  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
378  MPI_LONG_LONG_INT, MPI_MAX, testComm),
379  "cannot reduce total data moved");
380  if (tmpMin != tmpMax) {
381  if (rank == 0) {
382  WARN("inconsistent file size by different tasks");
383  }
384  /* incorrect, but now consistent across tasks */
385  aggFileSizeFromStat = tmpMin;
386  }
387  }
388  point->aggFileSizeFromStat = aggFileSizeFromStat;
389 
390  MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer,
391  1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
392  "cannot total data moved");
393 
394  if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
395  if (verbose >= VERBOSE_0 && rank == 0) {
396  if ((params->expectedAggFileSize
397  != point->aggFileSizeFromXfer)
398  || (point->aggFileSizeFromStat
399  != point->aggFileSizeFromXfer)) {
400  WARNF("Expected aggregate file size = %lld", (long long) params->expectedAggFileSize);
401  WARNF("Stat() of aggregate file size = %lld", (long long) point->aggFileSizeFromStat);
402  WARNF("Using actual aggregate bytes moved = %lld", (long long) point->aggFileSizeFromXfer);
403  if(params->deadlineForStonewalling){
404  WARN("Maybe caused by deadlineForStonewalling");
405  }
406  }
407  }
408  }
409 
410  point->aggFileSizeForBW = point->aggFileSizeFromXfer;
411 }
412 
413 /*
414  * Compare buffers after reading/writing each transfer. Displays only first
415  * difference in buffers and returns total errors counted.
416  */
417 static size_t
418 CompareData(void *expectedBuffer, size_t size, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access)
419 {
420  assert(access == WRITECHECK || access == READCHECK);
421  return verify_memory_pattern(offset, expectedBuffer, size, test->timeStampSignatureValue, fillrank, test->dataPacketType, test->gpuMemoryFlags);
422 }
423 
424 /*
425  * Count all errors across all tasks; report errors found.
426  */
427 static int CountErrors(IOR_param_t * test, int access, int errors)
428 {
429  int allErrors = 0;
430 
431  if (test->checkWrite || test->checkRead) {
432  MPI_CHECK(MPI_Reduce(&errors, &allErrors, 1, MPI_INT, MPI_SUM,
433  0, testComm), "cannot reduce errors");
434  MPI_CHECK(MPI_Bcast(&allErrors, 1, MPI_INT, 0, testComm),
435  "cannot broadcast allErrors value");
436  if (allErrors != 0) {
437  totalErrorCount += allErrors;
438  test->errorFound = TRUE;
439  }
440  if (rank == 0 && allErrors != 0) {
441  if (allErrors < 0) {
442  WARN("overflow in errors counted");
443  allErrors = -1;
444  }
445  WARNF("Incorrect data on %s (%d errors found).\n",
446  access == WRITECHECK ? "write" : "read", allErrors);
447  fprintf(out_logfile,
448  "Used Time Stamp %u (0x%x) for Data Signature\n",
451  }
452  }
453  return (allErrors);
454 }
455 
457 {
458  int reps;
459  if (test->results != NULL)
460  return;
461 
462  reps = test->params.repetitions;
463  test->results = (IOR_results_t *) safeMalloc(sizeof(IOR_results_t) * reps);
464 }
465 
467 {
468  if (test->results != NULL) {
469  free(test->results);
470  }
471 }
472 
473 
477 IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num)
478 {
479  IOR_test_t *newTest = NULL;
480 
481  newTest = (IOR_test_t *) malloc(sizeof(IOR_test_t));
482  if (newTest == NULL)
483  ERR("malloc() of IOR_test_t failed");
484  newTest->params = *init_params;
485  newTest->params.platform = GetPlatformName();
486  newTest->params.id = test_num;
487  newTest->next = NULL;
488  newTest->results = NULL;
489 
490  return newTest;
491 }
492 
493 static void DestroyTest(IOR_test_t *test)
494 {
495  FreeResults(test);
496  free(test);
497 }
498 
499 static void DestroyTests(IOR_test_t *tests_head)
500 {
501  IOR_test_t *tptr, *next;
502 
503  for (tptr = tests_head; tptr != NULL; tptr = next) {
504  next = tptr->next;
505  DestroyTest(tptr);
506  }
507 }
508 
509 /*
510  * Distribute IOR_HINTs to all tasks' environments.
511  */
512 static void DistributeHints(MPI_Comm com)
513 {
514  char hint[MAX_HINTS][MAX_STR], fullHint[MAX_STR], hintVariable[MAX_STR];
515  int hintCount = 0, i;
516 
517  if (rank == 0) {
518  for (i = 0; environ[i] != NULL; i++) {
519  if (strncmp(environ[i], "IOR_HINT", strlen("IOR_HINT"))
520  == 0) {
521  hintCount++;
522  if (hintCount == MAX_HINTS) {
523  WARN("exceeded max hints; reset MAX_HINTS and recompile");
524  hintCount = MAX_HINTS;
525  break;
526  }
527  /* assume no IOR_HINT is greater than MAX_STR in length */
528  strncpy(hint[hintCount - 1], environ[i],
529  MAX_STR - 1);
530  }
531  }
532  }
533 
534  MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, 0, com), "cannot broadcast hints");
535  for (i = 0; i < hintCount; i++) {
536  MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, 0, com),
537  "cannot broadcast hints");
538  strcpy(fullHint, hint[i]);
539  strcpy(hintVariable, strtok(fullHint, "="));
540  if (getenv(hintVariable) == NULL) {
541  /* doesn't exist in this task's environment; better set it */
542  if (putenv(hint[i]) != 0)
543  WARN("cannot set environment variable");
544  }
545  }
546 }
547 
548 /*
549  * Return string describing machine name and type.
550  */
552 {
553  char nodeName[MAX_STR], *p, *start, sysName[MAX_STR];
554  char platformName[MAX_STR];
555  struct utsname name;
556 
557  if (uname(&name) != 0) {
558  WARN("cannot get platform name");
559  sprintf(sysName, "%s", "Unknown");
560  sprintf(nodeName, "%s", "Unknown");
561  } else {
562  sprintf(sysName, "%s", name.sysname);
563  sprintf(nodeName, "%s", name.nodename);
564  }
565 
566  start = nodeName;
567  if (strlen(nodeName) == 0) {
568  p = start;
569  } else {
570  /* point to one character back from '\0' */
571  p = start + strlen(nodeName) - 1;
572  }
573  /*
574  * to cut off trailing node number, search backwards
575  * for the first non-numeric character
576  */
577  while (p != start) {
578  if (*p < '0' || *p > '9') {
579  *(p + 1) = '\0';
580  break;
581  } else {
582  p--;
583  }
584  }
585 
586  sprintf(platformName, "%s(%s)", nodeName, sysName);
587  return strdup(platformName);
588 }
589 
590 
591 
592 /*
593  * Parse file name.
594  */
595 static char **ParseFileName(char *name, int *count)
596 {
597  char **fileNames, *tmp, *token;
598  char delimiterString[3] = { FILENAME_DELIMITER, '\n', '\0' };
599  int i = 0;
600 
601  *count = 0;
602  tmp = name;
603 
604  /* pass one */
605  /* if something there, count the first item */
606  if (*tmp != '\0') {
607  (*count)++;
608  }
609  /* count the rest of the filenames */
610  while (*tmp != '\0') {
611  if (*tmp == FILENAME_DELIMITER) {
612  (*count)++;
613  }
614  tmp++;
615  }
616 
617  fileNames = (char **)malloc((*count) * sizeof(char **));
618  if (fileNames == NULL)
619  ERR("out of memory");
620 
621  /* pass two */
622  token = strtok(name, delimiterString);
623  while (token != NULL) {
624  fileNames[i] = token;
625  token = strtok(NULL, delimiterString);
626  i++;
627  }
628  return (fileNames);
629 }
630 
631 
632 /*
633  * Return test file name to access.
634  * for single shared file, fileNames[0] is returned in testFileName
635  */
636 void GetTestFileName(char *testFileName, IOR_param_t * test)
637 {
638  char **fileNames;
639  char initialTestFileName[MAX_PATHLEN];
640  char testFileNameRoot[MAX_STR];
641  char tmpString[MAX_STR];
642  int count;
643  int socket, core;
644 
645  /* parse filename for multiple file systems */
646  strcpy(initialTestFileName, test->testFileName);
647  if(test->dualMount){
648  GetProcessorAndCore(&socket, &core);
649  sprintf(tmpString, "%s%d/%s",initialTestFileName, socket, "data");
650  strcpy(initialTestFileName, tmpString);
651  }
652  fileNames = ParseFileName(initialTestFileName, &count);
653  if (count > 1 && test->uniqueDir == TRUE)
654  ERR("cannot use multiple file names with unique directories");
655  if (test->filePerProc) {
656  strcpy(testFileNameRoot,
657  fileNames[((rank +
658  rankOffset) % test->numTasks) % count]);
659  } else {
660  strcpy(testFileNameRoot, fileNames[0]);
661  }
662 
663  /* give unique name if using multiple files */
664  if (test->filePerProc) {
665  /*
666  * prepend rank subdirectory before filename
667  * e.g., /dir/file => /dir/<rank>/file
668  */
669  if (test->uniqueDir == TRUE) {
670  strcpy(testFileNameRoot,
671  PrependDir(test, testFileNameRoot));
672  }
673  sprintf(testFileName, "%s.%08d", testFileNameRoot,
674  (rank + rankOffset) % test->numTasks);
675  } else {
676  strcpy(testFileName, testFileNameRoot);
677  }
678 
679  /* add suffix for multiple files */
680  if (test->repCounter > -1) {
681  sprintf(tmpString, ".%d", test->repCounter);
682  strcat(testFileName, tmpString);
683  }
684  free (fileNames);
685 }
686 
687 /*
688  * From absolute directory, insert rank as subdirectory. Allows each task
689  * to write to its own directory. E.g., /dir/file => /dir/<rank>/file.
690  */
691 static char *PrependDir(IOR_param_t * test, char *rootDir)
692 {
693  char *dir;
694  char *fname;
695  int i;
696 
697  dir = (char *)malloc(MAX_STR + 1);
698  if (dir == NULL)
699  ERR("out of memory");
700 
701  /* get dir name */
702  strcpy(dir, rootDir);
703  i = strlen(dir) - 1;
704  while (i > 0) {
705  if (dir[i] == '\0' || dir[i] == '/') {
706  dir[i] = '/';
707  dir[i + 1] = '\0';
708  break;
709  }
710  i--;
711  }
712 
713  /* get file name */
714  fname = rootDir + i + 1;
715 
716  /* create directory with rank as subdirectory */
717  sprintf(dir + i + 1, "%d", (rank + rankOffset) % test->numTasks);
718 
719  /* dir doesn't exist, so create */
720  if (backend->access(dir, F_OK, test->backend_options) != 0) {
721  if (backend->mkdir(dir, S_IRWXU, test->backend_options) < 0) {
722  ERRF("cannot create directory: %s", dir);
723  }
724 
725  /* check if correct permissions */
726  } else if (backend->access(dir, R_OK, test->backend_options) != 0 ||
727  backend->access(dir, W_OK, test->backend_options) != 0 ||
728  backend->access(dir, X_OK, test->backend_options) != 0) {
729  ERRF("invalid directory permissions: %s", dir);
730  }
731 
732  /* concatenate dir and file names */
733  strcat(dir, "/");
734  strcat(dir, fname);
735 
736  return dir;
737 }
738 
739 /******************************************************************************/
740 /*
741  * Reduce test results, and show if verbose set.
742  */
743 static void
744 ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int access)
745 {
746  double reduced[IOR_NB_TIMERS] = { 0 };
747  double diff[IOR_NB_TIMERS / 2 + 1];
748  double totalTime, accessTime;
749  IOR_param_t *params = &test->params;
750  double bw, iops, latency, minlatency;
751  int i;
752  MPI_Op op;
753 
754  assert(access == WRITE || access == READ);
755 
756  /* Find the minimum start time of the even numbered timers, and the
757  maximum finish time for the odd numbered timers */
758  for (i = 0; i < IOR_NB_TIMERS; i++) {
759  op = i % 2 ? MPI_MAX : MPI_MIN;
760  MPI_CHECK(MPI_Reduce(&timer[i], &reduced[i], 1, MPI_DOUBLE,
761  op, 0, testComm), "MPI_Reduce()");
762  }
763 
764  /* Calculate elapsed times and throughput numbers */
765  for (i = 0; i < IOR_NB_TIMERS / 2; i++)
766  diff[i] = reduced[2 * i + 1] - reduced[2 * i];
767 
768  totalTime = reduced[IOR_TIMER_CLOSE_STOP] - reduced[IOR_TIMER_OPEN_START];
769  accessTime = reduced[IOR_TIMER_RDWR_STOP] - reduced[IOR_TIMER_RDWR_START];
770 
771  IOR_point_t *point = (access == WRITE) ? &test->results[rep].write :
772  &test->results[rep].read;
773 
774  point->time = totalTime;
775 
776  if (verbose < VERBOSE_0)
777  return;
778 
779  bw = (double)point->aggFileSizeForBW / totalTime;
780 
781  /* For IOPS in this iteration, we divide the total amount of IOs from
782  * all ranks over the entire access time (first start -> last end). */
783  iops = (point->aggFileSizeForBW / params->transferSize) / accessTime;
784 
785  /* For Latency, we divide the total access time for each task over the
786  * number of I/Os issued from that task; then reduce and display the
787  * minimum (best) latency achieved. So what is reported is the average
788  * latency of all ops from a single task, then taking the minimum of
789  * that between all tasks. */
790  latency = (timer[IOR_TIMER_RDWR_STOP] - timer[IOR_TIMER_RDWR_START]) / (params->blockSize / params->transferSize);
791  MPI_CHECK(MPI_Reduce(&latency, &minlatency, 1, MPI_DOUBLE,
792  MPI_MIN, 0, testComm), "MPI_Reduce()");
793 
794  /* Only rank 0 tallies and prints the results. */
795  if (rank != 0)
796  return;
797 
798  PrintReducedResult(test, access, bw, iops, latency, diff, totalTime, rep);
799 }
800 
801 /*
802  * Check for file(s), then remove all files if file-per-proc, else single file.
803  *
804  */
805 static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test)
806 {
807  int tmpRankOffset = 0;
808  if (filePerProc) {
809  /* in random tasks, delete own file */
810  if (test->reorderTasksRandom == TRUE) {
811  tmpRankOffset = rankOffset;
812  rankOffset = 0;
813  GetTestFileName(testFileName, test);
814  }
815  if (backend->access(testFileName, F_OK, test->backend_options) == 0) {
816  if (verbose >= VERBOSE_3) {
817  fprintf(out_logfile, "task %d removing %s\n", rank,
818  testFileName);
819  }
820  backend->delete(testFileName, test->backend_options);
821  }
822  if (test->reorderTasksRandom == TRUE) {
823  rankOffset = tmpRankOffset;
824  GetTestFileName(testFileName, test);
825  }
826  } else {
827  if ((rank == 0) && (backend->access(testFileName, F_OK, test->backend_options) == 0)) {
828  if (verbose >= VERBOSE_3) {
829  fprintf(out_logfile, "task %d removing %s\n", rank,
830  testFileName);
831  }
832  backend->delete(testFileName, test->backend_options);
833  }
834  }
835 }
836 
837 /*
838  * Setup tests by parsing commandline and creating test script.
839  * Perform a sanity-check on the configured parameters.
840  */
841 static void InitTests(IOR_test_t *tests)
842 {
843  if(tests == NULL){
844  return;
845  }
846  MPI_Comm com = tests->params.mpi_comm_world;
847  int mpiNumNodes = 0;
848  int mpiNumTasks = 0;
849  int mpiNumTasksOnNode0 = 0;
850 
851  verbose = tests->params.verbose;
853 
854  /*
855  * These default values are the same for every test and expensive to
856  * retrieve so just do it once.
857  */
858  mpiNumNodes = GetNumNodes(com);
859  mpiNumTasks = GetNumTasks(com);
860  mpiNumTasksOnNode0 = GetNumTasksOnNode0(com);
861 
862  /*
863  * Since there is no guarantee that anyone other than
864  * task 0 has the environment settings for the hints, pass
865  * the hint=value pair to everyone else in mpi_comm_world
866  */
867  DistributeHints(com);
868 
869  /* check validity of tests and create test queue */
870  while (tests != NULL) {
871  IOR_param_t *params = & tests->params;
872  params->testComm = com;
873 
874  /* use MPI values if not overridden on command-line */
875  if (params->numNodes == -1) {
876  params->numNodes = mpiNumNodes;
877  }
878  if (params->numTasks == -1) {
879  params->numTasks = mpiNumTasks;
880  } else if (params->numTasks > mpiNumTasks) {
881  if (rank == 0) {
882  WARNF("More tasks requested (%d) than available (%d),",
883  params->numTasks, mpiNumTasks);
884  WARNF(" running with %d tasks.\n", mpiNumTasks);
885  }
886  params->numTasks = mpiNumTasks;
887  }
888  if (params->numTasksOnNode0 == -1) {
889  params->numTasksOnNode0 = mpiNumTasksOnNode0;
890  }
891 
892  params->tasksBlockMapping = QueryNodeMapping(com,false);
893  params->expectedAggFileSize =
894  params->blockSize * params->segmentCount * params->numTasks;
895 
896  ValidateTests(&tests->params, com);
897  tests = tests->next;
898  }
899 
900  init_clock(com);
901 }
902 
903 /*
904  * Setup transfer buffers, creating and filling as needed.
905  */
906 static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test,
907  int pretendRank)
908 {
909  ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags);
910 }
911 
912 /*
913  * Free transfer buffers.
914  */
915 static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test)
916 
917 {
918  aligned_buffer_free(ioBuffers->buffer, test->gpuMemoryFlags);
919 }
920 
921 
922 
923 /*
924  * malloc a buffer, touching every page in an attempt to defeat lazy allocation.
925  */
926 static void *malloc_and_touch(size_t size)
927 {
928  size_t page_size;
929  char *buf;
930  char *ptr;
931 
932  if (size == 0)
933  return NULL;
934 
935  page_size = sysconf(_SC_PAGESIZE);
936 
937  buf = (char *)malloc(size);
938  if (buf == NULL)
939  return NULL;
940 
941  for (ptr = buf; ptr < buf+size; ptr += page_size) {
942  *ptr = (char)1;
943  }
944 
945  return (void *)buf;
946 }
947 
948 static void file_hits_histogram(IOR_param_t *params)
949 {
950  int *rankoffs = NULL;
951  int *filecont = NULL;
952  int *filehits = NULL;
953  int ifile;
954  int jfile;
955 
956  if (rank == 0) {
957  rankoffs = (int *)malloc(params->numTasks * sizeof(int));
958  filecont = (int *)malloc(params->numTasks * sizeof(int));
959  filehits = (int *)malloc(params->numTasks * sizeof(int));
960  }
961 
962  MPI_CHECK(MPI_Gather(&rankOffset, 1, MPI_INT, rankoffs,
963  1, MPI_INT, 0, params->testComm),
964  "MPI_Gather error");
965 
966  if (rank != 0)
967  return;
968 
969  memset((void *)filecont, 0, params->numTasks * sizeof(int));
970  for (ifile = 0; ifile < params->numTasks; ifile++) {
971  filecont[(ifile + rankoffs[ifile]) % params->numTasks]++;
972  }
973  memset((void *)filehits, 0, params->numTasks * sizeof(int));
974  for (ifile = 0; ifile < params->numTasks; ifile++)
975  for (jfile = 0; jfile < params->numTasks; jfile++) {
976  if (ifile == filecont[jfile])
977  filehits[ifile]++;
978  }
979  fprintf(out_logfile, "#File Hits Dist:");
980  jfile = 0;
981  ifile = 0;
982  while (jfile < params->numTasks && ifile < params->numTasks) {
983  fprintf(out_logfile, " %d", filehits[ifile]);
984  jfile += filehits[ifile], ifile++;
985  }
986  fprintf(out_logfile, "\n");
987  free(rankoffs);
988  free(filecont);
989  free(filehits);
990 }
991 
992 
993 int test_time_elapsed(IOR_param_t *params, double startTime)
994 {
995  double endTime;
996 
997  if (params->maxTimeDuration == 0)
998  return 0;
999 
1000  endTime = startTime + (params->maxTimeDuration * 60);
1001 
1002  return GetTimeStamp() >= endTime;
1003 }
1004 
1005 /*
1006  * hog some memory as a rough simulation of a real application's memory use
1007  */
1008 static void *HogMemory(IOR_param_t *params)
1009 {
1010  size_t size;
1011  void *buf;
1012 
1013  if (params->memoryPerTask != 0) {
1014  size = params->memoryPerTask;
1015  } else if (params->memoryPerNode != 0) {
1016  if (verbose >= VERBOSE_3)
1017  fprintf(out_logfile, "This node hogging %ld bytes of memory\n",
1018  params->memoryPerNode);
1019  size = params->memoryPerNode / params->numTasksOnNode0;
1020  } else {
1021  return NULL;
1022  }
1023 
1024  if (verbose >= VERBOSE_3)
1025  fprintf(out_logfile, "This task hogging %ld bytes of memory\n", size);
1026 
1027  buf = malloc_and_touch(size);
1028  if (buf == NULL)
1029  ERR("malloc of simulated applciation buffer failed");
1030 
1031  return buf;
1032 }
1033 /*
1034  * Write times taken during each iteration of the test.
1035  */
1036 static void
1037 WriteTimes(IOR_param_t *test, const double *timer, const int iteration,
1038  const int access)
1039 {
1040  char timerName[MAX_STR];
1041 
1042  for (int i = 0; i < IOR_NB_TIMERS; i++) {
1043 
1044  if (access == WRITE) {
1045  switch (i) {
1046  case IOR_TIMER_OPEN_START:
1047  strcpy(timerName, "write open start");
1048  break;
1049  case IOR_TIMER_OPEN_STOP:
1050  strcpy(timerName, "write open stop");
1051  break;
1052  case IOR_TIMER_RDWR_START:
1053  strcpy(timerName, "write start");
1054  break;
1055  case IOR_TIMER_RDWR_STOP:
1056  strcpy(timerName, "write stop");
1057  break;
1058  case IOR_TIMER_CLOSE_START:
1059  strcpy(timerName, "write close start");
1060  break;
1061  case IOR_TIMER_CLOSE_STOP:
1062  strcpy(timerName, "write close stop");
1063  break;
1064  default:
1065  strcpy(timerName, "invalid timer");
1066  break;
1067  }
1068  }
1069  else {
1070  switch (i) {
1071  case IOR_TIMER_OPEN_START:
1072  strcpy(timerName, "read open start");
1073  break;
1074  case IOR_TIMER_OPEN_STOP:
1075  strcpy(timerName, "read open stop");
1076  break;
1077  case IOR_TIMER_RDWR_START:
1078  strcpy(timerName, "read start");
1079  break;
1080  case IOR_TIMER_RDWR_STOP:
1081  strcpy(timerName, "read stop");
1082  break;
1083  case IOR_TIMER_CLOSE_START:
1084  strcpy(timerName, "read close start");
1085  break;
1086  case IOR_TIMER_CLOSE_STOP:
1087  strcpy(timerName, "read close stop");
1088  break;
1089  default:
1090  strcpy(timerName, "invalid timer");
1091  break;
1092  }
1093  }
1094  fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
1095  test->id, iteration, (int)rank, timer[i],
1096  timerName);
1097  }
1098 }
1099 
1100 static void StoreRankInformation(IOR_test_t *test, double *timer, const int rep, const int access){
1101  IOR_param_t *params = &test->params;
1102  double totalTime = timer[IOR_TIMER_CLOSE_STOP] - timer[IOR_TIMER_OPEN_START];
1103  double accessTime = timer[IOR_TIMER_RDWR_STOP] - timer[IOR_TIMER_RDWR_START];
1104  double times[] = {totalTime, accessTime};
1105 
1106  if(rank == 0){
1107  FILE* fd = fopen(params->saveRankDetailsCSV, "a");
1108  if (fd == NULL){
1109  FAIL("Cannot open saveRankPerformanceDetailsCSV file for writes!");
1110  }
1111  int size;
1112  MPI_Comm_size(params->testComm, & size);
1113  double *all_times = malloc(2* size * sizeof(double));
1114  MPI_Gather(times, 2, MPI_DOUBLE, all_times, 2, MPI_DOUBLE, 0, params->testComm);
1115  IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : &test->results[rep].read;
1116  double file_size = ((double) point->aggFileSizeForBW) / size;
1117 
1118  for(int i=0; i < size; i++){
1119  char buff[1024];
1120  sprintf(buff, "%s,%d,%.10e,%.10e,%.10e,%.10e\n", access==WRITE ? "write" : "read", i, all_times[i*2], all_times[i*2+1], file_size/all_times[i*2], file_size/all_times[i*2+1] );
1121  int ret = fwrite(buff, strlen(buff), 1, fd);
1122  if(ret != 1){
1123  WARN("Couln't append to saveRankPerformanceDetailsCSV file\n");
1124  break;
1125  }
1126  }
1127  fclose(fd);
1128  }else{
1129  MPI_Gather(& times, 2, MPI_DOUBLE, NULL, 2, MPI_DOUBLE, 0, testComm);
1130  }
1131 }
1132 
1133 static void ProcessIterResults(IOR_test_t *test, double *timer, const int rep, const int access){
1134  IOR_param_t *params = &test->params;
1135 
1136  if (verbose >= VERBOSE_3)
1137  WriteTimes(params, timer, rep, access);
1138  ReduceIterResults(test, timer, rep, access);
1139  if (params->outlierThreshold) {
1140  CheckForOutliers(params, timer, access);
1141  }
1142 
1143  if(params->saveRankDetailsCSV){
1144  StoreRankInformation(test, timer, rep, access);
1145  }
1146 }
1147 
1148 /*
1149  * Using the test parameters, run iteration(s) of single test.
1150  */
1151 static void TestIoSys(IOR_test_t *test)
1152 {
1153  IOR_param_t *params = &test->params;
1154  IOR_results_t *results = test->results;
1155  char testFileName[MAX_STR];
1156  double timer[IOR_NB_TIMERS];
1157  double startTime;
1158  int pretendRank;
1159  int rep;
1160  aiori_fd_t *fd;
1161  IOR_offset_t dataMoved; /* for data rate calculation */
1162  void *hog_buf;
1163  IOR_io_buffers ioBuffers;
1164 
1165  if (rank == 0 && verbose >= VERBOSE_1) {
1166  fprintf(out_logfile, "Participating tasks : %d\n", params->numTasks);
1167  fflush(out_logfile);
1168  }
1169  if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) {
1170  fprintf(out_logfile,
1171  "Using reorderTasks '-C' (useful to avoid read cache in client)\n");
1172  fflush(out_logfile);
1173  }
1174  /* show test setup */
1175  if (rank == 0 && verbose >= VERBOSE_0)
1176  ShowSetup(params);
1177 
1178  hog_buf = HogMemory(params);
1179 
1180  pretendRank = (rank + rankOffset) % params->numTasks;
1181 
1182  /* IO Buffer Setup */
1183 
1184  if (params->setTimeStampSignature) { // initialize the buffer properly
1185  params->timeStampSignatureValue = (unsigned int) params->setTimeStampSignature;
1186  }
1187 
1188  XferBuffersSetup(&ioBuffers, params, pretendRank);
1189 
1190  /* Initial time stamp */
1191  startTime = GetTimeStamp();
1192 
1193  /* loop over test iterations */
1194  uint64_t params_saved_wearout = params->stoneWallingWearOutIterations;
1195 
1196  /* Check if the file exists and warn users */
1197  if((params->writeFile || params->checkWrite) && (params->hints.filePerProc || rank == 0)){
1198  struct stat sb;
1199  GetTestFileName(testFileName, params);
1200  int ret = backend->stat(testFileName, & sb, params->backend_options);
1201  if(ret == 0) {
1202  WARNF("The file \"%s\" exists already and will be %s", testFileName,
1203  params->useExistingTestFile ? "overwritten" : "deleted");
1204  }
1205  }
1206 
1207  for (rep = 0; rep < params->repetitions; rep++) {
1208  /* Get iteration start time in seconds in task 0 and broadcast to
1209  all tasks */
1210  if (rank == 0) {
1211  if (! params->setTimeStampSignature) {
1212  time_t currentTime;
1213  if ((currentTime = time(NULL)) == -1) {
1214  ERR("cannot get current time");
1215  }
1216  params->timeStampSignatureValue = (unsigned int)currentTime;
1217  }
1218  if (verbose >= VERBOSE_2) {
1219  fprintf(out_logfile,
1220  "Using Time Stamp %u (0x%x) for Data Signature\n",
1221  params->timeStampSignatureValue,
1222  params->timeStampSignatureValue);
1223  }
1224  if (rep == 0 && verbose >= VERBOSE_0) {
1225  PrintTableHeader();
1226  }
1227  }
1228  MPI_CHECK(MPI_Bcast
1229  (&params->timeStampSignatureValue, 1, MPI_UNSIGNED, 0,
1230  testComm), "cannot broadcast start time value");
1231 
1232  generate_memory_pattern((char*) ioBuffers.buffer, params->transferSize, params->timeStampSignatureValue, pretendRank, params->dataPacketType, params->gpuMemoryFlags);
1233 
1234  /* use repetition count for number of multiple files */
1235  if (params->multiFile)
1236  params->repCounter = rep;
1237 
1238  /*
1239  * write the file(s), getting timing between I/O calls
1240  */
1241 
1242  if (params->writeFile && !test_time_elapsed(params, startTime)) {
1243  GetTestFileName(testFileName, params);
1244  if (verbose >= VERBOSE_3) {
1245  fprintf(out_logfile, "task %d writing %s\n", rank,
1246  testFileName);
1247  }
1248  DelaySecs(params->interTestDelay);
1249  if (params->useExistingTestFile == FALSE) {
1250  RemoveFile(testFileName, params->filePerProc,
1251  params);
1252  }
1253 
1254  params->stoneWallingWearOutIterations = params_saved_wearout;
1255  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1256  params->open = WRITE;
1258  fd = backend->create(testFileName, IOR_WRONLY | IOR_CREAT | IOR_TRUNC, params->backend_options);
1259  if(fd == NULL) FAIL("Cannot create file");
1260  timer[IOR_TIMER_OPEN_STOP] = GetTimeStamp();
1261  if (params->intraTestBarriers)
1262  MPI_CHECK(MPI_Barrier(testComm),
1263  "barrier error");
1264  if (rank == 0 && verbose >= VERBOSE_1) {
1265  fprintf(out_logfile,
1266  "Commencing write performance test: %s",
1267  CurrentTimeString());
1268  }
1270  dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
1271  if (params->verbose >= VERBOSE_4) {
1272  fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
1273  fflush(out_logfile);
1274  }
1275  timer[IOR_TIMER_RDWR_STOP] = GetTimeStamp();
1276  if (params->intraTestBarriers)
1277  MPI_CHECK(MPI_Barrier(testComm),
1278  "barrier error");
1280  backend->close(fd, params->backend_options);
1281 
1283  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1284 
1285  /* check if stat() of file doesn't equal expected file size,
1286  use actual amount of byte moved */
1287  CheckFileSize(test, testFileName, dataMoved, rep, WRITE);
1288 
1289  ProcessIterResults(test, timer, rep, WRITE);
1290 
1291  /* check if in this round we run write with stonewalling */
1292  if(params->deadlineForStonewalling > 0){
1293  params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed;
1294  }
1295  }
1296 
1297  /*
1298  * perform a check of data, reading back data and comparing
1299  * against what was expected to be written
1300  */
1301  if (params->checkWrite && !test_time_elapsed(params, startTime)) {
1302  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1303  if (rank == 0 && verbose >= VERBOSE_1) {
1304  fprintf(out_logfile,
1305  "Verifying contents of the file(s) just written.\n");
1306  fprintf(out_logfile, "%s\n", CurrentTimeString());
1307  }
1308  if (params->reorderTasks) {
1309  /* move two nodes away from writing node */
1310  int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
1311  if (params->tasksBlockMapping) {
1312  shift = params->numTasksOnNode0; /* switch to by-slot (contiguous block) mapping */
1313  }
1314  rankOffset = (2 * shift) % params->numTasks;
1315  }
1316 
1317  GetTestFileName(testFileName, params);
1318  params->open = WRITECHECK;
1319  fd = backend->open(testFileName, IOR_RDONLY, params->backend_options);
1320  if(fd == NULL) FAIL("Cannot open file");
1321  dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
1322  backend->close(fd, params->backend_options);
1323  rankOffset = 0;
1324  }
1325  /*
1326  * read the file(s), getting timing between I/O calls
1327  */
1328  if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) {
1329  /* check for stonewall */
1330  if(params->stoneWallingStatusFile){
1332  if(params->stoneWallingWearOutIterations == -1 && rank == 0){
1333  WARN("Could not read back the stonewalling status from the file!");
1334  params->stoneWallingWearOutIterations = 0;
1335  }
1336  }
1337  int operation_flag = READ;
1338  if ( params->checkRead ){
1339  // actually read and then compare the buffer
1340  operation_flag = READCHECK;
1341  }
1342  /* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */
1343  /* Constant process offset reading */
1344  if (params->reorderTasks) {
1345  /* move one node away from writing node */
1346  int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
1347  if (params->tasksBlockMapping) {
1348  shift=params->numTasksOnNode0; /* switch to a by-slot (contiguous block) mapping */
1349  }
1350  rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
1351  }
1352  /* random process offset reading */
1353  if (params->reorderTasksRandom) {
1354  /* this should not intefere with randomOffset within a file because GetOffsetArrayRandom */
1355  /* seeds every rand() call */
1356  int nodeoffset;
1357  unsigned int iseed0;
1358  nodeoffset = params->taskPerNodeOffset;
1359  nodeoffset = (nodeoffset < params->numNodes) ? nodeoffset : params->numNodes - 1;
1360  if (params->reorderTasksRandomSeed < 0)
1361  iseed0 = -1 * params->reorderTasksRandomSeed + rep;
1362  else
1363  iseed0 = params->reorderTasksRandomSeed;
1364  srand(rank + iseed0);
1365  {
1366  rankOffset = rand() % params->numTasks;
1367  }
1368  while (rankOffset <
1369  (nodeoffset * params->numTasksOnNode0)) {
1370  rankOffset = rand() % params->numTasks;
1371  }
1372  /* Get more detailed stats if requested by verbose level */
1373  if (verbose >= VERBOSE_2) {
1374  file_hits_histogram(params);
1375  }
1376  }
1377  /* Using globally passed rankOffset, following function generates testFileName to read */
1378  GetTestFileName(testFileName, params);
1379 
1380  if (verbose >= VERBOSE_3) {
1381  fprintf(out_logfile, "task %d reading %s\n", rank,
1382  testFileName);
1383  }
1384  DelaySecs(params->interTestDelay);
1385  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1386  params->open = READ;
1388  fd = backend->open(testFileName, IOR_RDONLY, params->backend_options);
1389  if(fd == NULL) FAIL("Cannot open file");
1390  timer[IOR_TIMER_OPEN_STOP] = GetTimeStamp();
1391  if (params->intraTestBarriers)
1392  MPI_CHECK(MPI_Barrier(testComm),
1393  "barrier error");
1394  if (rank == 0 && verbose >= VERBOSE_1) {
1395  fprintf(out_logfile,
1396  "Commencing read performance test: %s\n",
1397  CurrentTimeString());
1398  }
1400  dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
1401  timer[IOR_TIMER_RDWR_STOP] = GetTimeStamp();
1402  if (params->intraTestBarriers)
1403  MPI_CHECK(MPI_Barrier(testComm),
1404  "barrier error");
1406  backend->close(fd, params->backend_options);
1408 
1409  /* check if stat() of file doesn't equal expected file size,
1410  use actual amount of byte moved */
1411  CheckFileSize(test, testFileName, dataMoved, rep, READ);
1412 
1413  ProcessIterResults(test, timer, rep, READ);
1414  }
1415 
1416  if (!params->keepFile
1417  && !(params->errorFound && params->keepFileWithError)) {
1418  double start, finish;
1419  start = GetTimeStamp();
1420  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1421  RemoveFile(testFileName, params->filePerProc, params);
1422  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1423  finish = GetTimeStamp();
1424  PrintRemoveTiming(start, finish, rep);
1425  } else {
1426  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1427  }
1428  params->errorFound = FALSE;
1429  rankOffset = 0;
1430 
1431  }
1432  PrintRepeatEnd();
1433 
1434  if (params->summary_every_test) {
1437  } else {
1438  PrintShortSummary(test);
1439  }
1440 
1441  XferBuffersFree(&ioBuffers, params);
1442 
1443  if (hog_buf != NULL)
1444  free(hog_buf);
1445 }
1446 
1447 /*
1448  * Determine if valid tests from parameters.
1449  */
1450 static void ValidateTests(IOR_param_t * test, MPI_Comm com)
1451 {
1452  IOR_param_t defaults;
1453  init_IOR_Param_t(&defaults, com);
1454 
1455  if (test->gpuDirect && test->gpuMemoryFlags == IOR_MEMORY_TYPE_CPU )
1456  ERR("GPUDirect requires a non-CPU memory type");
1457  if (test->gpuMemoryFlags == IOR_MEMORY_TYPE_GPU_DEVICE_ONLY && ! test->gpuDirect )
1458  ERR("Using GPU Device memory only requires the usage of GPUDirect");
1459  if (test->stoneWallingStatusFile && test->keepFile == 0)
1460  ERR("a StoneWallingStatusFile is only sensible when splitting write/read into multiple executions of ior, please use -k");
1461  if (test->stoneWallingStatusFile && test->stoneWallingWearOut == 0 && test->writeFile)
1462  ERR("the StoneWallingStatusFile is only sensible for a write test when using stoneWallingWearOut");
1463  if (test->deadlineForStonewalling == 0 && test->stoneWallingWearOut > 0)
1464  ERR("the stoneWallingWearOut is only sensible when setting a stonewall deadline with -D");
1465  if (test->stoneWallingStatusFile && test->testscripts)
1466  WARN("the StoneWallingStatusFile only preserves the last experiment, make sure that each run uses a separate status file!");
1467  if (test->repetitions <= 0)
1468  WARN_RESET("too few test repetitions",
1469  test, &defaults, repetitions);
1470  if (test->numTasks <= 0)
1471  ERR("too few tasks for testing");
1472  if (test->interTestDelay < 0)
1473  WARN_RESET("inter-test delay must be nonnegative value",
1474  test, &defaults, interTestDelay);
1475  if (test->readFile != TRUE && test->writeFile != TRUE
1476  && test->checkRead != TRUE && test->checkWrite != TRUE)
1477  ERR("test must write, read, or check read/write file");
1478  if(! test->setTimeStampSignature && test->writeFile != TRUE && test->checkRead == TRUE)
1479  ERR("using readCheck only requires to write a timeStampSignature -- use -G");
1480  if (test->segmentCount < 0)
1481  ERR("segment count must be positive value");
1482  if ((test->blockSize % sizeof(IOR_size_t)) != 0)
1483  ERR("block size must be a multiple of access size");
1484  if (test->blockSize < 0)
1485  ERR("block size must be non-negative integer");
1486  if ((test->transferSize % sizeof(IOR_size_t)) != 0)
1487  ERR("transfer size must be a multiple of access size");
1488  if (test->transferSize < 0)
1489  ERR("transfer size must be non-negative integer");
1490  if (test->transferSize == 0) {
1491  ERR("test will not complete with zero transfer size");
1492  } else {
1493  if ((test->blockSize % test->transferSize) != 0)
1494  ERR("block size must be a multiple of transfer size");
1495  }
1496  if (test->blockSize < test->transferSize)
1497  ERR("block size must not be smaller than transfer size");
1498  if (test->randomOffset && test->blockSize == test->transferSize)
1499  ERR("IOR will randomize access within a block and repeats the same pattern for all segments, therefore choose blocksize > transferSize");
1500  if (! test->randomOffset && test->randomPrefillBlocksize)
1501  ERR("Setting the randomPrefill option without using random is not useful");
1502  if (test->randomPrefillBlocksize && (test->blockSize % test->randomPrefillBlocksize != 0))
1503  ERR("The randomPrefill option must divide the blockSize");
1504  /* specific APIs */
1505  if ((strcasecmp(test->api, "MPIIO") == 0)
1506  && (test->blockSize < sizeof(IOR_size_t)
1507  || test->transferSize < sizeof(IOR_size_t)))
1508  ERR("block/transfer size may not be smaller than IOR_size_t for MPIIO");
1509  if ((strcasecmp(test->api, "HDF5") == 0)
1510  && (test->blockSize < sizeof(IOR_size_t)
1511  || test->transferSize < sizeof(IOR_size_t)))
1512  ERR("block/transfer size may not be smaller than IOR_size_t for HDF5");
1513  if ((strcasecmp(test->api, "NCMPI") == 0)
1514  && (test->blockSize < sizeof(IOR_size_t)
1515  || test->transferSize < sizeof(IOR_size_t)))
1516  ERR("block/transfer size may not be smaller than IOR_size_t for NCMPI");
1517  if (((strcasecmp(test->api, "POSIX") != 0)
1518  && (strcasecmp(test->api, "MPIIO") != 0)
1519  && (strcasecmp(test->api, "HDF5") != 0)
1520  && (strcasecmp(test->api, "NCMPI") != 0)
1521  && (strcasecmp(test->api, "DUMMY") != 0)
1522  && (strcasecmp(test->api, "AIO") != 0)
1523  && (strcasecmp(test->api, "PMDK") != 0)
1524  && (strcasecmp(test->api, "MMAP") != 0)
1525  && (strcasecmp(test->api, "HDFS") != 0)
1526  && (strcasecmp(test->api, "DFS") != 0)
1527  && (strcasecmp(test->api, "Gfarm") != 0)
1528  && (strcasecmp(test->api, "RADOS") != 0)
1529  && (strcasecmp(test->api, "CEPHFS") != 0)) && test->fsync)
1530  WARN_RESET("fsync() not supported in selected backend",
1531  test, &defaults, fsync);
1532  /* parameter consistency */
1533  if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE)
1534  ERR("Both Constant and Random task re-ordering specified. Choose one and resubmit");
1535  if (test->randomOffset && test->reorderTasksRandom
1536  && test->filePerProc == FALSE)
1537  ERR("random offset and random reorder tasks specified with single-shared-file. Choose one and resubmit");
1538  if (test->randomOffset && test->reorderTasks
1539  && test->filePerProc == FALSE)
1540  ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit");
1541  if (test->randomOffset && test->checkRead && test->randomSeed == -1)
1542  ERR("random offset with read check option requires to set the random seed");
1543  if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset)
1544  ERR("random offset not available with HDF5");
1545  if ((strcasecmp(test->api, "NCMPI") == 0) && test->randomOffset)
1546  ERR("random offset not available with NCMPI");
1547  if ((strcasecmp(test->api, "NCMPI") == 0) && test->filePerProc)
1548  ERR("file-per-proc not available in current NCMPI");
1549 
1550  backend = test->backend;
1551  ior_set_xfer_hints(test);
1552  /* allow the backend to validate the options */
1553  if(test->backend->check_params){
1554  int check = test->backend->check_params(test->backend_options);
1555  if (check){
1556  ERR("The backend returned that the test parameters are invalid.");
1557  }
1558  }
1559 }
1560 
1574 IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count)
1575 {
1576  int seed;
1577  IOR_offset_t i;
1578  IOR_offset_t offsets;
1579  IOR_offset_t offsetCnt = 0;
1580  IOR_offset_t *offsetArray;
1581 
1582  if (test->filePerProc) {
1583  /* set up seed, each process can determine which regions to access individually */
1584  if (test->randomSeed == -1) {
1585  seed = time(NULL);
1586  test->randomSeed = seed;
1587  } else {
1588  seed = test->randomSeed + pretendRank;
1589  }
1590  }else{
1591  /* Shared file requires that the seed is synchronized */
1592  if (test->randomSeed == -1) {
1593  // all processes need to have the same seed.
1594  if(rank == 0){
1595  seed = time(NULL);
1596  }
1597  MPI_CHECK(MPI_Bcast(& seed, 1, MPI_INT, 0, test->testComm), "cannot broadcast random seed value");
1598  test->randomSeed = seed;
1599  }else{
1600  seed = test->randomSeed;
1601  }
1602  }
1603  srandom(seed);
1604 
1605  /* count needed offsets (pass 1) */
1606  if (test->filePerProc) {
1607  offsets = test->blockSize / test->transferSize;
1608  }else{
1609  offsets = 0;
1610  for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) {
1611  // this counts which process get how many transferes in the shared file
1612  if ((rand() % test->numTasks) == pretendRank) {
1613  offsets++;
1614  }
1615  }
1616  }
1617 
1618  /* setup empty array */
1619  offsetArray = (IOR_offset_t *) safeMalloc(offsets * sizeof(IOR_offset_t));
1620 
1621  *out_count = offsets;
1622 
1623  if (test->filePerProc) {
1624  /* fill array */
1625  for (i = 0; i < offsets; i++) {
1626  offsetArray[i] = i * test->transferSize;
1627  }
1628  } else {
1629  /* fill with offsets (pass 2) */
1630  srandom(seed); /* need same seed to get same transfers as counted in the beginning*/
1631  for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) {
1632  if ((rand() % test->numTasks) == pretendRank) {
1633  offsetArray[offsetCnt] = i;
1634  offsetCnt++;
1635  }
1636  }
1637  }
1638  /* reorder array */
1639  for (i = 0; i < offsets; i++) {
1640  IOR_offset_t value, tmp;
1641  value = rand() % offsets;
1642  tmp = offsetArray[value];
1643  offsetArray[value] = offsetArray[i];
1644  offsetArray[i] = tmp;
1645  }
1646 
1647  return (offsetArray);
1648 }
1649 
1650 static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){
1651  IOR_offset_t amtXferred = 0;
1652 
1653  void *buffer = ioBuffers->buffer;
1654  if (access == WRITE) {
1655  /* fills each transfer with a unique pattern
1656  * containing the offset into the file */
1657  update_write_memory_pattern(offset, ioBuffers->buffer, transfer, test->setTimeStampSignature, pretendRank, test->dataPacketType, test->gpuMemoryFlags);
1658  amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1659  if (amtXferred != transfer)
1660  ERR("cannot write to file");
1661  if (test->fsyncPerWrite)
1662  backend->fsync(fd, test->backend_options);
1663  if (test->interIODelay > 0){
1664  struct timespec wait = {test->interIODelay / 1000 / 1000, 1000l * (test->interIODelay % 1000000)};
1665  nanosleep( & wait, NULL);
1666  }
1667  } else if (access == READ) {
1668  amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1669  if (amtXferred != transfer)
1670  ERR("cannot read from file");
1671  if (test->interIODelay > 0){
1672  struct timespec wait = {test->interIODelay / 1000 / 1000, 1000l * (test->interIODelay % 1000000)};
1673  nanosleep( & wait, NULL);
1674  }
1675  } else if (access == WRITECHECK) {
1676  invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1677  amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1678  if (amtXferred != transfer)
1679  ERR("cannot read from file write check");
1680  *errors += CompareData(buffer, transfer, test, offset, pretendRank, WRITECHECK);
1681  } else if (access == READCHECK) {
1682  invalidate_buffer_pattern(buffer, transfer, test->gpuMemoryFlags);
1683  amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options);
1684  if (amtXferred != transfer){
1685  ERR("cannot read from file");
1686  }
1687  *errors += CompareData(buffer, transfer, test, offset, pretendRank, READCHECK);
1688  }
1689  return amtXferred;
1690 }
1691 
1692 static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pretendRank, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int startSegment, int endSegment){
1693  // prefill the whole file already with an invalid pattern
1694  int offsets = test->blockSize / test->randomPrefillBlocksize;
1695  void * oldBuffer = ioBuffers->buffer;
1696  int errors;
1697  ioBuffers->buffer = randomPrefillBuffer;
1698  for (IOR_offset_t i = startSegment; i < endSegment; i++){
1699  for (int j = 0; j < offsets; j++) {
1700  IOR_offset_t offset = j * test->randomPrefillBlocksize;
1701  if (test->filePerProc) {
1702  offset += i * test->blockSize;
1703  } else {
1704  offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
1705  }
1706  WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & errors, test, fd, ioBuffers, WRITE);
1707  }
1708  }
1709  ioBuffers->buffer = oldBuffer;
1710 }
1711 
1712 /*
1713  * Write or Read data to file(s). This loops through the strides, writing
1714  * out the data to each block in transfer sizes, until the remainder left is 0.
1715  */
1717  aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers)
1718 {
1719  int errors = 0;
1720  uint64_t pairCnt = 0;
1721  int pretendRank;
1722  IOR_offset_t dataMoved = 0; /* for data rate calculation */
1723  double startForStonewall;
1724  int hitStonewall;
1725  IOR_offset_t i, j;
1726  IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ?
1727  &results->write : &results->read;
1728 
1729  /* initialize values */
1730  pretendRank = (rank + rankOffset) % test->numTasks;
1731 
1732  // offsetArray = GetOffsetArraySequential(test, pretendRank);
1733 
1734  IOR_offset_t offsets;
1735  IOR_offset_t * offsets_rnd;
1736  if (test->randomOffset) {
1737  offsets_rnd = GetOffsetArrayRandom(test, pretendRank, & offsets);
1738  }else{
1739  offsets = (test->blockSize / test->transferSize);
1740  }
1741 
1742  void * randomPrefillBuffer = NULL;
1743  if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){
1744  randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize, test->gpuMemoryFlags);
1745  // store invalid data into the buffer
1746  memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize);
1747  }
1748 
1749  // start timer after random offset was generated
1750  startForStonewall = GetTimeStamp();
1751  hitStonewall = 0;
1752 
1753  if(randomPrefillBuffer && test->deadlineForStonewalling == 0){
1754  double t_start = GetTimeStamp();
1755  prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, 0, test->segmentCount);
1756  if(rank == 0 && verbose > VERBOSE_1){
1757  fprintf(out_logfile, "Random prefill took: %fs\n", GetTimeStamp() - t_start);
1758  }
1759  // must synchronize processes to ensure they are not running ahead
1760  MPI_Barrier(test->testComm);
1761  }
1762 
1763  do{ // to ensure the benchmark runs a certain time
1764  for (i = 0; i < test->segmentCount && !hitStonewall; i++) {
1765  if(randomPrefillBuffer && test->deadlineForStonewalling != 0){
1766  // prefill the whole segment with data, this needs to be done collectively
1767  double t_start = GetTimeStamp();
1768  prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, i, i+1);
1769  MPI_Barrier(test->testComm);
1770  if(rank == 0 && verbose > VERBOSE_1){
1771  fprintf(out_logfile, "Random: synchronizing segment count with barrier and prefill took: %fs\n", GetTimeStamp() - t_start);
1772  }
1773  }
1774  for (j = 0; j < offsets && !hitStonewall ; j++) {
1775  IOR_offset_t offset;
1776  if (test->randomOffset) {
1777  if(test->filePerProc){
1778  offset = offsets_rnd[j] + (i * test->blockSize);
1779  }else{
1780  offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize);
1781  }
1782  }else{
1783  offset = j * test->transferSize;
1784  if (test->filePerProc) {
1785  offset += i * test->blockSize;
1786  } else {
1787  offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
1788  }
1789  }
1790  dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1791  pairCnt++;
1792 
1793  hitStonewall = ((test->deadlineForStonewalling != 0
1794  && (GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling))
1795  || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ;
1796 
1797  if ( test->collective && test->deadlineForStonewalling ) {
1798  // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop
1799  // it absolutely must be an 'all or none':
1800  MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, testComm), "hitStonewall broadcast failed");
1801  }
1802  }
1803  }
1804  } while((GetTimeStamp() - startForStonewall) < test->minTimeDuration);
1805  if (test->stoneWallingWearOut){
1806  if (verbose >= VERBOSE_1){
1807  fprintf(out_logfile, "%d: stonewalling pairs accessed: %lld\n", rank, (long long) pairCnt);
1808  }
1809  long long data_moved_ll = (long long) dataMoved;
1810  long long pairs_accessed_min = 0;
1811  MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed,
1812  1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved");
1813  double stonewall_runtime = GetTimeStamp() - startForStonewall;
1814  point->stonewall_time = stonewall_runtime;
1815  MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min,
1816  1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
1817  MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed,
1818  1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
1819  MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_total_data_accessed,
1820  1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved");
1821 
1822  if(rank == 0){
1824  fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n",
1825  pairs_accessed_min, point->pairs_accessed,
1826  point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 , point->stonewall_time);
1827  }
1828  if(pairCnt != point->pairs_accessed){
1829  // some work needs still to be done, complete the current block !
1830  i--;
1831  if(j == offsets){
1832  j = 0; // current block is completed
1833  i++;
1834  }
1835  for ( ; pairCnt < point->pairs_accessed; i++) {
1836  if(i == test->segmentCount) i = 0; // wrap over, necessary to deal with minTimeDuration
1837  for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) {
1838  IOR_offset_t offset;
1839  if (test->randomOffset) {
1840  if(test->filePerProc){
1841  offset = offsets_rnd[j] + (i * test->blockSize);
1842  }else{
1843  offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize);
1844  }
1845  }else{
1846  offset = j * test->transferSize;
1847  if (test->filePerProc) {
1848  offset += i * test->blockSize;
1849  } else {
1850  offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize);
1851  }
1852  }
1853  dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & errors, test, fd, ioBuffers, access);
1854  pairCnt++;
1855  }
1856  j = 0;
1857  }
1858  }
1859  }else{
1860  point->pairs_accessed = pairCnt;
1861  }
1862 
1863  totalErrorCount += CountErrors(test, access, errors);
1864 
1865  if (access == WRITE && test->fsync == TRUE) {
1866  backend->fsync(fd, test->backend_options); /*fsync after all accesses */
1867  }
1868  if(randomPrefillBuffer){
1869  aligned_buffer_free(randomPrefillBuffer, test->gpuMemoryFlags);
1870  }
1871 
1872  return (dataMoved);
1873 }
void invalidate_buffer_pattern(char *buffer, size_t bytes, ior_memory_flags type)
Definition: utilities.c:172
int reorderTasks
Definition: ior.h:103
int uniqueDir
Definition: ior.h:120
#define ERRF(FORMAT,...)
Definition: aiori-debug.h:67
void init_IOR_Param_t(IOR_param_t *p, MPI_Comm com)
Definition: ior.c:254
int GetNumTasks(MPI_Comm comm)
Definition: utilities.c:513
IOR_test_t * ParseCommandLine(int argc, char **argv, MPI_Comm com)
int reorderTasksRandomSeed
Definition: ior.h:106
int ior_main(int argc, char **argv)
Definition: ior.c:189
size_t pairs_accessed
Definition: ior.h:162
int warningAsErrors
Definition: ior.h:154
long long stonewall_avg_data_accessed
Definition: ior.h:166
unsigned long GetProcessorAndCore(int *chip, int *core)
Definition: utilities.c:1057
MPI_Comm mpi_comm_world
Definition: ior.h:84
int errors
Definition: ior.h:175
int multiFile
Definition: ior.h:96
static void file_hits_histogram(IOR_param_t *params)
Definition: ior.c:948
static void DisplayOutliers(int numTasks, double timerVal, char *timeString, int access, int outlierThreshold)
Definition: ior.c:294
void PrintTestEnds()
Definition: ior-output.c:216
IOR_offset_t segmentCount
Definition: aiori.h:71
unsigned int incompressibleSeed
Definition: ior.h:134
#define VERBOSE_0
Definition: iordef.h:106
static void ValidateTests(IOR_param_t *params, MPI_Comm com)
Definition: ior.c:1450
char * GetPlatformName()
Definition: ior.c:551
IOR_offset_t aggFileSizeFromStat
Definition: ior.h:169
unsigned int timeStampSignatureValue
Definition: ior.h:132
int filePerProc
Definition: ior.h:102
FILE * out_logfile
Definition: utilities.c:74
int gpuID
Definition: ior.h:89
#define VERBOSE_3
Definition: iordef.h:109
double stonewall_time
Definition: ior.h:164
int repetitions
Definition: ior.h:94
int64_t ReadStoneWallingIterations(char *const filename, MPI_Comm com)
Definition: utilities.c:936
IOR_offset_t segmentCount
Definition: ior.h:112
IOR_offset_t blockSize
Definition: aiori.h:72
int keepFile
Definition: ior.h:109
void PrintHeader(int argc, char **argv)
Definition: ior-output.c:274
char ** environ
static void XferBuffersFree(IOR_io_buffers *ioBuffers, IOR_param_t *test)
Definition: ior.c:915
int checkRead
Definition: ior.h:108
void PrintLongSummaryOneTest(IOR_test_t *test)
Definition: ior-output.c:642
int test_time_elapsed(IOR_param_t *params, double startTime)
Definition: ior.c:993
int numTasksOnNode0
Definition: ior.h:92
void(* delete)(char *, aiori_mod_opt_t *module_options)
Definition: aiori.h:100
void FreeResults(IOR_test_t *test)
Definition: ior.c:466
IOR_offset_t transferSize
Definition: ior.h:114
size_t memoryPerNode
Definition: ior.h:137
#define WRITECHECK
Definition: iordef.h:101
IOR_param_t params
Definition: ior.h:182
void PrintLongSummaryHeader()
Definition: ior-output.c:652
ior_memory_flags gpuMemoryFlags
Definition: ior.h:87
#define READCHECK
Definition: iordef.h:103
int(* mkdir)(const char *path, mode_t mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:105
int errorFound
Definition: ior.h:111
IOR_offset_t aggFileSizeFromXfer
Definition: ior.h:170
double sd
Definition: ior-internal.h:35
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
Definition: utilities.c:402
static int totalErrorCount
Definition: ior.c:60
#define FAIL(...)
Definition: aiori-debug.h:16
int numTasks
static void * HogMemory(IOR_param_t *params)
Definition: ior.c:1008
int summary_every_test
Definition: ior.h:119
IOR_offset_t * GetOffsetArrayRandom(IOR_param_t *test, int pretendRank, IOR_offset_t *out_count)
Definition: ior.c:1574
static void DestroyTest(IOR_test_t *test)
Definition: ior.c:493
static void ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int access)
Definition: ior.c:744
int numNodes
Definition: ior.h:91
int setTimeStampSignature
Definition: ior.h:131
IOR_offset_t expectedAggFileSize
Definition: aiori.h:74
#define IOR_RDONLY
Definition: aiori.h:28
int(* access)(const char *path, int mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:107
#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER)
Definition: aiori-debug.h:20
int fsyncPerWrite
Definition: ior.h:147
int interTestDelay
Definition: ior.h:97
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: aiori-debug.h:97
#define WRITE
Definition: iordef.h:100
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE *world_out)
Definition: ior.c:155
int maxTimeDuration
Definition: ior.h:128
char * testFileName
Definition: ior.h:79
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers)
Definition: ior.c:1716
char * stoneWallingStatusFile
Definition: ior.h:126
void ShowTestStart(IOR_param_t *params)
Definition: ior-output.c:328
#define READ
Definition: iordef.h:102
void * backend_options
Definition: ior.h:143
MPI_Comm testComm
Definition: ior.h:83
int taskPerNodeOffset
Definition: ior.h:104
#define IOR_CREAT
Definition: aiori.h:32
static char ** ParseFileName(char *, int *)
Definition: ior.c:595
double sum
Definition: ior-internal.h:36
int fsync
Definition: ior.h:148
double var
Definition: ior-internal.h:34
struct IOR_test_t * next
Definition: ior.h:184
ior_dataPacketType_e dataPacketType
Definition: ior.h:141
char * testscripts
Definition: ior.h:139
int outlierThreshold
Definition: ior.h:129
static void ProcessIterResults(IOR_test_t *test, double *timer, const int rep, const int access)
Definition: ior.c:1133
int intraTestBarriers
Definition: ior.h:153
void GetTestFileName(char *testFileName, IOR_param_t *test)
Definition: ior.c:636
MPI_Comm testComm
Definition: utilities.c:73
int reorderTasksRandom
Definition: ior.h:105
int(* check_params)(aiori_mod_opt_t *)
Definition: aiori.h:113
int checkWrite
Definition: ior.h:107
IOR_point_t write
Definition: ior.h:176
#define IOR_TRUNC
Definition: aiori.h:33
static void DistributeHints(MPI_Comm com)
Definition: ior.c:512
void ShowSetup(IOR_param_t *params)
Definition: ior-output.c:415
static aiori_xfer_hint_t * hints
Definition: aiori-aio.c:75
Definition: ior.h:49
void init_clock(MPI_Comm com)
Definition: utilities.c:917
IOR_offset_t aggFileSizeForBW
Definition: ior.h:171
void(* initialize)(aiori_mod_opt_t *options)
Definition: aiori.h:109
int verbose
Definition: ior.h:130
static void XferBuffersSetup(IOR_io_buffers *ioBuffers, IOR_param_t *test, int pretendRank)
Definition: ior.c:906
char * CurrentTimeString(void)
Definition: utilities.c:356
void PrintRemoveTiming(double start, double finish, int rep)
Definition: ior-output.c:742
#define WARN(MSG)
Definition: aiori-debug.h:45
void(* fsync)(aiori_fd_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:102
int collective
Definition: aiori.h:66
static int test_initialize(IOR_test_t *test)
Definition: ior.c:98
double time
Definition: ior.h:161
IOR_point_t read
Definition: ior.h:177
static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t *test)
Definition: ior.c:805
static void CheckForOutliers(IOR_param_t *test, const double *timer, const int access)
Definition: ior.c:335
IOR_offset_t(* get_file_size)(aiori_mod_opt_t *module_options, char *filename)
Definition: aiori.h:103
int dryRun
Definition: ior.h:85
IOR_offset_t expectedAggFileSize
Definition: ior.h:115
char * platform
Definition: ior.h:78
int singleXferAttempt
Definition: aiori.h:75
int GetNumNodes(MPI_Comm comm)
Definition: utilities.c:476
int gpuDirect
Definition: ior.h:88
void initCUDA(int blockMapping, int rank, int numNodes, int tasksPerNode, int useGPUID)
Definition: utilities.c:438
int singleXferAttempt
Definition: ior.h:146
static void DestroyTests(IOR_test_t *tests_head)
Definition: ior.c:499
static size_t CompareData(void *expectedBuffer, size_t size, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access)
Definition: ior.c:418
void(* xfer_hints)(aiori_xfer_hint_t *params)
Definition: aiori.h:96
void(* close)(aiori_fd_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:99
int aiori_warning_as_errors
Definition: ior.c:93
int(* stat)(const char *path, struct stat *buf, aiori_mod_opt_t *module_options)
Definition: aiori.h:108
int interIODelay
Definition: ior.h:98
static void CheckFileSize(IOR_test_t *test, char *testFilename, IOR_offset_t dataMoved, int rep, const int access)
Definition: ior.c:357
char * saveRankDetailsCSV
Definition: ior.h:118
FILE * out_resultfile
Definition: utilities.c:75
double GetTimeStamp(void)
Definition: utilities.c:876
static void WriteTimes(IOR_param_t *test, const double *timer, const int iteration, const int access)
Definition: ior.c:1037
static void prefillSegment(IOR_param_t *test, void *randomPrefillBuffer, int pretendRank, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int startSegment, int endSegment)
Definition: ior.c:1692
IOR_offset_t transferSize
Definition: aiori.h:73
void PrintShortSummary(IOR_test_t *test)
Definition: ior-output.c:695
static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, int *errors, IOR_param_t *test, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int access)
Definition: ior.c:1650
int stoneWallingWearOut
Definition: ior.h:123
static const ior_aiori_t * backend
Definition: ior.c:61
void PrintRepeatEnd()
Definition: ior-output.c:201
aiori_fd_t *(* create)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:90
long long stonewall_min_data_accessed
Definition: ior.h:165
IOR_offset_t(* xfer)(int access, aiori_fd_t *, IOR_size_t *, IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t *module_options)
Definition: aiori.h:97
IOR_test_t * CreateTest(IOR_param_t *init_params, int test_num)
Definition: ior.c:477
aiori_xfer_hint_t hints
Definition: ior.h:156
char * URI
Definition: ior.h:150
static void TestIoSys(IOR_test_t *)
Definition: ior.c:1151
void * buffer
Definition: ior.h:51
void PrintTableHeader()
Definition: ior-output.c:18
void PrintLongSummaryAllTests(IOR_test_t *tests_head)
Definition: ior-output.c:671
#define IOR_WRONLY
Definition: aiori.h:29
void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, double *diff_subset, double totalTime, int rep)
Definition: ior-output.c:229
int keepFileWithError
Definition: ior.h:110
int randomSeed
Definition: ior.h:133
#define FALSE
Definition: iordef.h:76
int rankOffset
Definition: utilities.c:71
int useExistingTestFile
Definition: ior.h:121
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:94
static void StoreRankInformation(IOR_test_t *test, double *timer, const int rep, const int access)
Definition: ior.c:1100
#define WARNF(FORMAT,...)
Definition: aiori-debug.h:30
int readFile
Definition: ior.h:100
long long int IOR_size_t
Definition: iordef.h:124
int tasksBlockMapping
Definition: ior.h:93
int randomOffset
Definition: ior.h:135
int numTasks
Definition: ior.h:90
size_t memoryPerTask
Definition: ior.h:136
void(* finalize)(aiori_mod_opt_t *options)
Definition: aiori.h:110
const char * aiori_default(void)
Definition: aiori.c:299
#define VERBOSE_2
Definition: iordef.h:108
void generate_memory_pattern(char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:137
int writeFile
Definition: ior.h:101
uint64_t stoneWallingWearOutIterations
Definition: ior.h:125
int verbose
Definition: utilities.c:72
#define MAX_STR
Definition: iordef.h:113
#define MAX_HINTS
Definition: iordef.h:114
int collective
Definition: ior.h:82
int randomOffset
Definition: aiori.h:69
static int CountErrors(IOR_param_t *test, int access, int errors)
Definition: ior.c:427
#define VERBOSE_4
Definition: iordef.h:110
IOR_offset_t randomPrefillBlocksize
Definition: ior.h:116
#define MAX_PATHLEN
Definition: utilities.h:31
double mean
Definition: ior-internal.h:33
static void * malloc_and_touch(size_t size)
Definition: ior.c:926
int open
Definition: ior.h:99
aiori_fd_t *(* open)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:92
const struct ior_aiori * backend
Definition: ior.h:73
int dualMount
Definition: ior.h:86
#define ERR(MSG)
Definition: aiori-debug.h:75
static char * PrependDir(IOR_param_t *, char *)
Definition: ior.c:691
int verify_memory_pattern(uint64_t item, char *buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:182
void DelaySecs(int delay)
Definition: utilities.c:974
#define VERBOSE_1
Definition: iordef.h:107
IOR_results_t * results
Definition: ior.h:183
int deadlineForStonewalling
Definition: ior.h:122
char * api
Definition: ior.h:76
#define FILENAME_DELIMITER
Definition: iordef.h:121
static void test_finalize(IOR_test_t *test)
Definition: ior.c:145
int repCounter
Definition: ior.h:95
long long stonewall_total_data_accessed
Definition: ior.h:167
int fsyncPerWrite
Definition: aiori.h:70
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
Definition: utilities.c:1119
int filePerProc
Definition: aiori.h:65
static void ior_set_xfer_hints(IOR_param_t *p)
Definition: ior.c:73
static void InitTests(IOR_test_t *)
Definition: ior.c:841
long long int IOR_offset_t
Definition: iordef.h:123
int rank
Definition: utilities.c:70
IOR_offset_t blockSize
Definition: ior.h:113
int GetNumTasksOnNode0(MPI_Comm comm)
Definition: utilities.c:551
#define TRUE
Definition: iordef.h:80
int minTimeDuration
Definition: ior.h:124
void ShowTestEnd(IOR_test_t *tptr)
Definition: ior-output.c:399
void * safeMalloc(uint64_t size)
Definition: utilities.c:238
#define NULL
Definition: iordef.h:84
int id
Definition: ior.h:152
void AllocResults(IOR_test_t *test)
Definition: ior.c:456
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)
Definition: utilities.c:1070