IOR
ior.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * Copyright (c) 2003, The Regents of the University of California *
7 * See the file COPYRIGHT for a complete copyright notice and license. *
8 * *
9 \******************************************************************************/
10 
11 #ifdef HAVE_CONFIG_H
12 # include "config.h"
13 #endif
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 #include <ctype.h> /* tolower() */
19 #include <errno.h>
20 #include <math.h>
21 #include <mpi.h>
22 #include <string.h>
23 #include <sys/stat.h> /* struct stat */
24 #include <time.h>
25 
26 #ifndef _WIN32
27 # include <sys/time.h> /* gettimeofday() */
28 # include <sys/utsname.h> /* uname() */
29 #endif
30 
31 #include <assert.h>
32 
33 #include "ior.h"
34 #include "ior-internal.h"
35 #include "aiori.h"
36 #include "utilities.h"
37 #include "parse_options.h"
38 
39 
40 /* file scope globals */
41 extern char **environ;
42 static int totalErrorCount;
43 static const ior_aiori_t *backend;
44 
45 static void DestroyTests(IOR_test_t *tests_head);
46 static char *PrependDir(IOR_param_t *, char *);
47 static char **ParseFileName(char *, int *);
48 static void InitTests(IOR_test_t * , MPI_Comm);
49 static void TestIoSys(IOR_test_t *);
50 static void ValidateTests(IOR_param_t *);
51 static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers);
52 static void WriteTimes(IOR_param_t *, double **, int, int);
53 
54 IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){
55  IOR_test_t *tests_head;
56  IOR_test_t *tptr;
57  out_logfile = world_out;
58  out_resultfile = world_out;
59  mpi_comm_world = world_com;
60 
61  MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), "cannot get number of tasks");
62  MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
64 
65  /* setup tests, and validate parameters */
66  tests_head = ParseCommandLine(argc, argv);
67  InitTests(tests_head, world_com);
68  verbose = tests_head->params.verbose;
69 
70  PrintHeader(argc, argv);
71 
72  /* perform each test */
73  for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
74  totalErrorCount = 0;
75  verbose = tptr->params.verbose;
76  if (rank == 0 && verbose >= VERBOSE_0) {
77  ShowTestStart(&tptr->params);
78  }
79  TestIoSys(tptr);
81  ShowTestEnd(tptr);
82  }
83 
84  PrintLongSummaryAllTests(tests_head);
85 
86  /* display finish time */
87  PrintTestEnds();
88  return tests_head;
89 }
90 
91 
92 
93 int ior_main(int argc, char **argv)
94 {
95  IOR_test_t *tests_head;
96  IOR_test_t *tptr;
97 
98  out_logfile = stdout;
99  out_resultfile = stdout;
100 
102 
103  /*
104  * check -h option from commandline without starting MPI;
105  */
106  tests_head = ParseCommandLine(argc, argv);
107 
108  /* start the MPI code */
109  MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI");
110 
111  mpi_comm_world = MPI_COMM_WORLD;
112  MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld),
113  "cannot get number of tasks");
114  MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
115 
117 
118  /* set error-handling */
119  /*MPI_CHECK(MPI_Errhandler_set(mpi_comm_world, MPI_ERRORS_RETURN),
120  "cannot set errhandler"); */
121 
122  /* setup tests, and validate parameters */
123  InitTests(tests_head, mpi_comm_world);
124  verbose = tests_head->params.verbose;
125 
126  PrintHeader(argc, argv);
127 
128  /* perform each test */
129  for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
130  verbose = tptr->params.verbose;
131  if (rank == 0 && verbose >= VERBOSE_0) {
132  ShowTestStart(&tptr->params);
133  }
134 
135  // This is useful for trapping a running MPI process. While
136  // this is sleeping, run the script 'testing/hdfs/gdb.attach'
137  if (verbose >= VERBOSE_4) {
138  fprintf(out_logfile, "\trank %d: sleeping\n", rank);
139  sleep(5);
140  fprintf(out_logfile, "\trank %d: awake.\n", rank);
141  }
142  TestIoSys(tptr);
143  ShowTestEnd(tptr);
144  }
145 
146  if (verbose < 0)
147  /* always print final summary */
148  verbose = 0;
149  PrintLongSummaryAllTests(tests_head);
150 
151  /* display finish time */
152  PrintTestEnds();
153 
154  DestroyTests(tests_head);
155 
156  MPI_CHECK(MPI_Finalize(), "cannot finalize MPI");
157 
158  aiori_finalize();
159 
160  return totalErrorCount;
161 }
162 
163 /***************************** F U N C T I O N S ******************************/
164 
165 /*
166  * Initialize an IOR_param_t structure to the defaults
167  */
169 {
170  const char *default_aiori = aiori_default ();
171  char *hdfs_user;
172 
173  assert (NULL != default_aiori);
174 
175  memset(p, 0, sizeof(IOR_param_t));
176 
179 
180  p->api = strdup(default_aiori);
181  p->platform = strdup("HOST(OSTYPE)");
182  p->testFileName = strdup("testFile");
183 
184  p->writeFile = p->readFile = FALSE;
185  p->checkWrite = p->checkRead = FALSE;
186 
187  p->nodes = 1;
188  p->tasksPerNode = 1;
189  p->repetitions = 1;
190  p->repCounter = -1;
191  p->open = WRITE;
192  p->taskPerNodeOffset = 1;
193  p->segmentCount = 1;
194  p->blockSize = 1048576;
195  p->transferSize = 262144;
196  p->randomSeed = -1;
197  p->incompressibleSeed = 573;
199  p->setAlignment = 1;
200  p->lustre_start_ost = -1;
201 
202  hdfs_user = getenv("USER");
203  if (!hdfs_user)
204  hdfs_user = "";
205  p->hdfs_user = strdup(hdfs_user);
206  p->hdfs_name_node = "default";
207  p->hdfs_name_node_port = 0; /* ??? */
208  p->hdfs_fs = NULL;
209  p->hdfs_replicas = 0; /* invokes the default */
210  p->hdfs_block_size = 0;
211 
212  p->URI = NULL;
213  p->part_number = 0;
214 
215  p->beegfs_numTargets = -1;
216  p->beegfs_chunkSize = -1;
217 
218  p->mmap_ptr = NULL;
219 }
220 
221 static void
223  double timerVal,
224  char *timeString, int access, int outlierThreshold)
225 {
226  char accessString[MAX_STR];
227  double sum, mean, sqrDiff, var, sd;
228 
229  /* for local timerVal, don't compensate for wall clock delta */
230  timerVal += wall_clock_delta;
231 
232  MPI_CHECK(MPI_Allreduce
233  (&timerVal, &sum, 1, MPI_DOUBLE, MPI_SUM, testComm),
234  "MPI_Allreduce()");
235  mean = sum / numTasks;
236  sqrDiff = pow((mean - timerVal), 2);
237  MPI_CHECK(MPI_Allreduce
238  (&sqrDiff, &var, 1, MPI_DOUBLE, MPI_SUM, testComm),
239  "MPI_Allreduce()");
240  var = var / numTasks;
241  sd = sqrt(var);
242 
243  if (access == WRITE) {
244  strcpy(accessString, "write");
245  } else { /* READ */
246  strcpy(accessString, "read");
247  }
248  if (fabs(timerVal - mean) > (double)outlierThreshold) {
249  fprintf(out_logfile, "WARNING: for task %d, %s %s is %f\n",
250  rank, accessString, timeString, timerVal);
251  fprintf(out_logfile, " (mean=%f, stddev=%f)\n", mean, sd);
252  fflush(out_logfile);
253  }
254 }
255 
256 /*
257  * Check for outliers in start/end times and elapsed create/xfer/close times.
258  */
259 static void CheckForOutliers(IOR_param_t * test, double **timer, int rep,
260  int access)
261 {
262  int shift;
263 
264  if (access == WRITE) {
265  shift = 0;
266  } else { /* READ */
267  shift = 6;
268  }
269 
270  DisplayOutliers(test->numTasks, timer[shift + 0][rep],
271  "start time", access, test->outlierThreshold);
273  timer[shift + 1][rep] - timer[shift + 0][rep],
274  "elapsed create time", access, test->outlierThreshold);
276  timer[shift + 3][rep] - timer[shift + 2][rep],
277  "elapsed transfer time", access,
278  test->outlierThreshold);
280  timer[shift + 5][rep] - timer[shift + 4][rep],
281  "elapsed close time", access, test->outlierThreshold);
282  DisplayOutliers(test->numTasks, timer[shift + 5][rep], "end time",
283  access, test->outlierThreshold);
284 
285 }
286 
287 /*
288  * Check if actual file size equals expected size; if not use actual for
289  * calculating performance rate.
290  */
291 static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
292 {
293  IOR_param_t *params = &test->params;
294  IOR_results_t *results = test->results;
295 
296  MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer,
297  1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
298  "cannot total data moved");
299 
300  if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
301  if (verbose >= VERBOSE_0 && rank == 0) {
302  if ((params->expectedAggFileSize
303  != results[rep].aggFileSizeFromXfer)
304  || (results[rep].aggFileSizeFromStat
305  != results[rep].aggFileSizeFromXfer)) {
306  fprintf(out_logfile,
307  "WARNING: Expected aggregate file size = %lld.\n",
308  (long long) params->expectedAggFileSize);
309  fprintf(out_logfile,
310  "WARNING: Stat() of aggregate file size = %lld.\n",
311  (long long) results[rep].aggFileSizeFromStat);
312  fprintf(out_logfile,
313  "WARNING: Using actual aggregate bytes moved = %lld.\n",
314  (long long) results[rep].aggFileSizeFromXfer);
315  if(params->deadlineForStonewalling){
316  fprintf(out_logfile,
317  "WARNING: maybe caused by deadlineForStonewalling\n");
318  }
319  }
320  }
321  }
322  results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer;
323 }
324 
325 /*
326  * Compare buffers after reading/writing each transfer. Displays only first
327  * difference in buffers and returns total errors counted.
328  */
329 static size_t
330 CompareBuffers(void *expectedBuffer,
331  void *unknownBuffer,
332  size_t size,
333  IOR_offset_t transferCount, IOR_param_t *test, int access)
334 {
335  char testFileName[MAX_PATHLEN];
336  char bufferLabel1[MAX_STR];
337  char bufferLabel2[MAX_STR];
338  size_t i, j, length, first, last;
339  size_t errorCount = 0;
340  int inError = 0;
341  unsigned long long *goodbuf = (unsigned long long *)expectedBuffer;
342  unsigned long long *testbuf = (unsigned long long *)unknownBuffer;
343 
344  if (access == WRITECHECK || access == READCHECK) {
345  strcpy(bufferLabel1, "Expected: ");
346  strcpy(bufferLabel2, "Actual: ");
347  } else {
348  ERR("incorrect argument for CompareBuffers()");
349  }
350 
351  length = size / sizeof(IOR_size_t);
352  first = -1;
353  if (verbose >= VERBOSE_3) {
354  fprintf(out_logfile,
355  "[%d] At file byte offset %lld, comparing %llu-byte transfer\n",
356  rank, test->offset, (long long)size);
357  }
358  for (i = 0; i < length; i++) {
359  if (testbuf[i] != goodbuf[i]) {
360  errorCount++;
361  if (verbose >= VERBOSE_2) {
362  fprintf(out_logfile,
363  "[%d] At transfer buffer #%lld, index #%lld (file byte offset %lld):\n",
364  rank, transferCount - 1, (long long)i,
365  test->offset +
366  (IOR_size_t) (i * sizeof(IOR_size_t)));
367  fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1);
368  fprintf(out_logfile, "%016llx\n", goodbuf[i]);
369  fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel2);
370  fprintf(out_logfile, "%016llx\n", testbuf[i]);
371  }
372  if (!inError) {
373  inError = 1;
374  first = i;
375  last = i;
376  } else {
377  last = i;
378  }
379  } else if (verbose >= VERBOSE_5 && i % 4 == 0) {
380  fprintf(out_logfile,
381  "[%d] PASSED offset = %lld bytes, transfer %lld\n",
382  rank,
383  ((i * sizeof(unsigned long long)) +
384  test->offset), transferCount);
385  fprintf(out_logfile, "[%d] GOOD %s0x", rank, bufferLabel1);
386  for (j = 0; j < 4; j++)
387  fprintf(out_logfile, "%016llx ", goodbuf[i + j]);
388  fprintf(out_logfile, "\n[%d] GOOD %s0x", rank, bufferLabel2);
389  for (j = 0; j < 4; j++)
390  fprintf(out_logfile, "%016llx ", testbuf[i + j]);
391  fprintf(out_logfile, "\n");
392  }
393  }
394  if (inError) {
395  inError = 0;
396  GetTestFileName(testFileName, test);
397  fprintf(out_logfile,
398  "[%d] FAILED comparison of buffer containing %d-byte ints:\n",
399  rank, (int)sizeof(unsigned long long int));
400  fprintf(out_logfile, "[%d] File name = %s\n", rank, testFileName);
401  fprintf(out_logfile, "[%d] In transfer %lld, ", rank,
402  transferCount);
403  fprintf(out_logfile,
404  "%lld errors between buffer indices %lld and %lld.\n",
405  (long long)errorCount, (long long)first,
406  (long long)last);
407  fprintf(out_logfile, "[%d] File byte offset = %lld:\n", rank,
408  ((first * sizeof(unsigned long long)) + test->offset));
409 
410  fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1);
411  for (j = first; j < length && j < first + 4; j++)
412  fprintf(out_logfile, "%016llx ", goodbuf[j]);
413  if (j == length)
414  fprintf(out_logfile, "[end of buffer]");
415  fprintf(out_logfile, "\n[%d] %s0x", rank, bufferLabel2);
416  for (j = first; j < length && j < first + 4; j++)
417  fprintf(out_logfile, "%016llx ", testbuf[j]);
418  if (j == length)
419  fprintf(out_logfile, "[end of buffer]");
420  fprintf(out_logfile, "\n");
421  if (test->quitOnError == TRUE)
422  ERR("data check error, aborting execution");
423  }
424  return (errorCount);
425 }
426 
427 /*
428  * Count all errors across all tasks; report errors found.
429  */
430 static int CountErrors(IOR_param_t * test, int access, int errors)
431 {
432  int allErrors = 0;
433 
434  if (test->checkWrite || test->checkRead) {
435  MPI_CHECK(MPI_Reduce(&errors, &allErrors, 1, MPI_INT, MPI_SUM,
436  0, testComm), "cannot reduce errors");
437  MPI_CHECK(MPI_Bcast(&allErrors, 1, MPI_INT, 0, testComm),
438  "cannot broadcast allErrors value");
439  if (allErrors != 0) {
440  totalErrorCount += allErrors;
441  test->errorFound = TRUE;
442  }
443  if (rank == 0 && allErrors != 0) {
444  if (allErrors < 0) {
445  WARN("overflow in errors counted");
446  allErrors = -1;
447  }
448  fprintf(out_logfile, "WARNING: incorrect data on %s (%d errors found).\n",
449  access == WRITECHECK ? "write" : "read", allErrors);
450  fprintf(out_logfile,
451  "Used Time Stamp %u (0x%x) for Data Signature\n",
454  }
455  }
456  return (allErrors);
457 }
458 
459 /*
460  * Allocate a page-aligned (required by O_DIRECT) buffer.
461  */
462 static void *aligned_buffer_alloc(size_t size)
463 {
464  size_t pageSize;
465  size_t pageMask;
466  char *buf, *tmp;
467  char *aligned;
468 
469  pageSize = getpagesize();
470  pageMask = pageSize - 1;
471  buf = malloc(size + pageSize + sizeof(void *));
472  if (buf == NULL)
473  ERR("out of memory");
474  /* find the alinged buffer */
475  tmp = buf + sizeof(char *);
476  aligned = tmp + pageSize - ((size_t) tmp & pageMask);
477  /* write a pointer to the original malloc()ed buffer into the bytes
478  preceding "aligned", so that the aligned buffer can later be free()ed */
479  tmp = aligned - sizeof(void *);
480  *(void **)tmp = buf;
481 
482  return (void *)aligned;
483 }
484 
485 /*
486  * Free a buffer allocated by aligned_buffer_alloc().
487  */
488 static void aligned_buffer_free(void *buf)
489 {
490  free(*(void **)((char *)buf - sizeof(char *)));
491 }
492 
493 static void* safeMalloc(uint64_t size){
494  void * d = malloc(size);
495  if (d == NULL){
496  ERR("Could not malloc an array");
497  }
498  memset(d, 0, size);
499  return d;
500 }
501 
503 {
504  int reps;
505  if (test->results != NULL)
506  return;
507 
508  reps = test->params.repetitions;
509  test->results = (IOR_results_t *) safeMalloc(sizeof(IOR_results_t) * reps);
510 }
511 
513 {
514  if (test->results != NULL) {
515  free(test->results);
516  }
517 }
518 
519 
523 IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num)
524 {
525  IOR_test_t *newTest = NULL;
526 
527  newTest = (IOR_test_t *) malloc(sizeof(IOR_test_t));
528  if (newTest == NULL)
529  ERR("malloc() of IOR_test_t failed");
530  newTest->params = *init_params;
531  newTest->params.platform = GetPlatformName();
532  newTest->params.id = test_num;
533  newTest->next = NULL;
534  newTest->results = NULL;
535 
536  return newTest;
537 }
538 
539 static void DestroyTest(IOR_test_t *test)
540 {
541  FreeResults(test);
542  free(test);
543 }
544 
545 static void DestroyTests(IOR_test_t *tests_head)
546 {
547  IOR_test_t *tptr, *next;
548 
549  for (tptr = tests_head; tptr != NULL; tptr = next) {
550  next = tptr->next;
551  DestroyTest(tptr);
552  }
553 }
554 
555 /*
556  * Distribute IOR_HINTs to all tasks' environments.
557  */
558 void DistributeHints(void)
559 {
560  char hint[MAX_HINTS][MAX_STR], fullHint[MAX_STR], hintVariable[MAX_STR];
561  int hintCount = 0, i;
562 
563  if (rank == 0) {
564  for (i = 0; environ[i] != NULL; i++) {
565  if (strncmp(environ[i], "IOR_HINT", strlen("IOR_HINT"))
566  == 0) {
567  hintCount++;
568  if (hintCount == MAX_HINTS) {
569  WARN("exceeded max hints; reset MAX_HINTS and recompile");
570  hintCount = MAX_HINTS;
571  break;
572  }
573  /* assume no IOR_HINT is greater than MAX_STR in length */
574  strncpy(hint[hintCount - 1], environ[i],
575  MAX_STR - 1);
576  }
577  }
578  }
579 
580  MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE,
581  0, MPI_COMM_WORLD), "cannot broadcast hints");
582  for (i = 0; i < hintCount; i++) {
583  MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE,
584  0, MPI_COMM_WORLD),
585  "cannot broadcast hints");
586  strcpy(fullHint, hint[i]);
587  strcpy(hintVariable, strtok(fullHint, "="));
588  if (getenv(hintVariable) == NULL) {
589  /* doesn't exist in this task's environment; better set it */
590  if (putenv(hint[i]) != 0)
591  WARN("cannot set environment variable");
592  }
593  }
594 }
595 
596 /*
597  * Fill buffer, which is transfer size bytes long, with known 8-byte long long
598  * int values. In even-numbered 8-byte long long ints, store MPI task in high
599  * bits and timestamp signature in low bits. In odd-numbered 8-byte long long
600  * ints, store transfer offset. If storeFileOffset option is used, the file
601  * (not transfer) offset is stored instead.
602  */
603 
604 static void
606 
607 {
608  size_t i;
609  unsigned long long hi, lo;
610  unsigned long long *buf = (unsigned long long *)buffer;
611 
612  for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) {
613  hi = ((unsigned long long) rand_r(&test->incompressibleSeed) << 32);
614  lo = (unsigned long long) rand_r(&test->incompressibleSeed);
615  buf[i] = hi | lo;
616  }
617 }
618 
620 
621 static void
622 FillBuffer(void *buffer,
623  IOR_param_t * test, unsigned long long offset, int fillrank)
624 {
625  size_t i;
626  unsigned long long hi, lo;
627  unsigned long long *buf = (unsigned long long *)buffer;
628 
629  if(test->dataPacketType == incompressible ) { /* Make for some non compressable buffers with randomish data */
630 
631  /* In order for write checks to work, we have to restart the psuedo random sequence */
633  test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */
635  }
636  FillIncompressibleBuffer(buffer, test);
637  }
638 
639  else {
640  hi = ((unsigned long long)fillrank) << 32;
641  lo = (unsigned long long)test->timeStampSignatureValue;
642  for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) {
643  if ((i % 2) == 0) {
644  /* evens contain MPI rank and time in seconds */
645  buf[i] = hi | lo;
646  } else {
647  /* odds contain offset */
648  buf[i] = offset + (i * sizeof(unsigned long long));
649  }
650  }
651  }
652 }
653 
654 /*
655  * Return string describing machine name and type.
656  */
658 {
659  char nodeName[MAX_STR], *p, *start, sysName[MAX_STR];
660  char platformName[MAX_STR];
661  struct utsname name;
662 
663  if (uname(&name) != 0) {
664  EWARN("cannot get platform name");
665  sprintf(sysName, "%s", "Unknown");
666  sprintf(nodeName, "%s", "Unknown");
667  } else {
668  sprintf(sysName, "%s", name.sysname);
669  sprintf(nodeName, "%s", name.nodename);
670  }
671 
672  start = nodeName;
673  if (strlen(nodeName) == 0) {
674  p = start;
675  } else {
676  /* point to one character back from '\0' */
677  p = start + strlen(nodeName) - 1;
678  }
679  /*
680  * to cut off trailing node number, search backwards
681  * for the first non-numeric character
682  */
683  while (p != start) {
684  if (*p < '0' || *p > '9') {
685  *(p + 1) = '\0';
686  break;
687  } else {
688  p--;
689  }
690  }
691 
692  sprintf(platformName, "%s(%s)", nodeName, sysName);
693  return strdup(platformName);
694 }
695 
696 
697 
698 /*
699  * Parse file name.
700  */
701 static char **ParseFileName(char *name, int *count)
702 {
703  char **fileNames, *tmp, *token;
704  char delimiterString[3] = { FILENAME_DELIMITER, '\n', '\0' };
705  int i = 0;
706 
707  *count = 0;
708  tmp = name;
709 
710  /* pass one */
711  /* if something there, count the first item */
712  if (*tmp != '\0') {
713  (*count)++;
714  }
715  /* count the rest of the filenames */
716  while (*tmp != '\0') {
717  if (*tmp == FILENAME_DELIMITER) {
718  (*count)++;
719  }
720  tmp++;
721  }
722 
723  fileNames = (char **)malloc((*count) * sizeof(char **));
724  if (fileNames == NULL)
725  ERR("out of memory");
726 
727  /* pass two */
728  token = strtok(name, delimiterString);
729  while (token != NULL) {
730  fileNames[i] = token;
731  token = strtok(NULL, delimiterString);
732  i++;
733  }
734  return (fileNames);
735 }
736 
737 
738 /*
739  * Return test file name to access.
740  * for single shared file, fileNames[0] is returned in testFileName
741  */
742 void GetTestFileName(char *testFileName, IOR_param_t * test)
743 {
744  char **fileNames;
745  char initialTestFileName[MAX_PATHLEN];
746  char testFileNameRoot[MAX_STR];
747  char tmpString[MAX_STR];
748  int count;
749 
750  /* parse filename for multiple file systems */
751  strcpy(initialTestFileName, test->testFileName);
752  fileNames = ParseFileName(initialTestFileName, &count);
753  if (count > 1 && test->uniqueDir == TRUE)
754  ERR("cannot use multiple file names with unique directories");
755  if (test->filePerProc) {
756  strcpy(testFileNameRoot,
757  fileNames[((rank +
758  rankOffset) % test->numTasks) % count]);
759  } else {
760  strcpy(testFileNameRoot, fileNames[0]);
761  }
762 
763  /* give unique name if using multiple files */
764  if (test->filePerProc) {
765  /*
766  * prepend rank subdirectory before filename
767  * e.g., /dir/file => /dir/<rank>/file
768  */
769  if (test->uniqueDir == TRUE) {
770  strcpy(testFileNameRoot,
771  PrependDir(test, testFileNameRoot));
772  }
773  sprintf(testFileName, "%s.%08d", testFileNameRoot,
774  (rank + rankOffset) % test->numTasks);
775  } else {
776  strcpy(testFileName, testFileNameRoot);
777  }
778 
779  /* add suffix for multiple files */
780  if (test->repCounter > -1) {
781  sprintf(tmpString, ".%d", test->repCounter);
782  strcat(testFileName, tmpString);
783  }
784  free (fileNames);
785 }
786 
787 /*
788  * From absolute directory, insert rank as subdirectory. Allows each task
789  * to write to its own directory. E.g., /dir/file => /dir/<rank>/file.
790  */
791 static char *PrependDir(IOR_param_t * test, char *rootDir)
792 {
793  char *dir;
794  char fname[MAX_STR + 1];
795  char *p;
796  int i;
797 
798  dir = (char *)malloc(MAX_STR + 1);
799  if (dir == NULL)
800  ERR("out of memory");
801 
802  /* get dir name */
803  strcpy(dir, rootDir);
804  i = strlen(dir) - 1;
805  while (i > 0) {
806  if (dir[i] == '\0' || dir[i] == '/') {
807  dir[i] = '/';
808  dir[i + 1] = '\0';
809  break;
810  }
811  i--;
812  }
813 
814  /* get file name */
815  strcpy(fname, rootDir);
816  p = fname;
817  while (i > 0) {
818  if (fname[i] == '\0' || fname[i] == '/') {
819  p = fname + (i + 1);
820  break;
821  }
822  i--;
823  }
824 
825  /* create directory with rank as subdirectory */
826  sprintf(dir, "%s%d", dir, (rank + rankOffset) % test->numTasks);
827 
828  /* dir doesn't exist, so create */
829  if (access(dir, F_OK) != 0) {
830  if (mkdir(dir, S_IRWXU) < 0) {
831  ERR("cannot create directory");
832  }
833 
834  /* check if correct permissions */
835  } else if (access(dir, R_OK) != 0 || access(dir, W_OK) != 0 ||
836  access(dir, X_OK) != 0) {
837  ERR("invalid directory permissions");
838  }
839 
840  /* concatenate dir and file names */
841  strcat(dir, "/");
842  strcat(dir, p);
843 
844  return dir;
845 }
846 
847 /******************************************************************************/
848 /*
849  * Reduce test results, and show if verbose set.
850  */
851 
852 static void ReduceIterResults(IOR_test_t *test, double **timer, int rep,
853  int access)
854 {
855  double reduced[12] = { 0 };
856  double diff[6];
857  double *diff_subset;
858  double totalTime;
859  double bw;
860  int i;
861  MPI_Op op;
862 
863  assert(access == WRITE || access == READ);
864 
865  /* Find the minimum start time of the even numbered timers, and the
866  maximum finish time for the odd numbered timers */
867  for (i = 0; i < 12; i++) {
868  op = i % 2 ? MPI_MAX : MPI_MIN;
869  MPI_CHECK(MPI_Reduce(&timer[i][rep], &reduced[i], 1, MPI_DOUBLE,
870  op, 0, testComm), "MPI_Reduce()");
871  }
872 
873  if (rank != 0) {
874  /* Only rank 0 tallies and prints the results. */
875  return;
876  }
877 
878  /* Calculate elapsed times and throughput numbers */
879  for (i = 0; i < 6; i++) {
880  diff[i] = reduced[2 * i + 1] - reduced[2 * i];
881  }
882  if (access == WRITE) {
883  totalTime = reduced[5] - reduced[0];
884  test->results[rep].writeTime = totalTime;
885  diff_subset = &diff[0];
886  } else { /* READ */
887  totalTime = reduced[11] - reduced[6];
888  test->results[rep].readTime = totalTime;
889  diff_subset = &diff[3];
890  }
891 
892  if (verbose < VERBOSE_0) {
893  return;
894  }
895 
896  bw = (double)test->results[rep].aggFileSizeForBW / totalTime;
897 
898  PrintReducedResult(test, access, bw, diff_subset, totalTime, rep);
899 }
900 
901 /*
902  * Check for file(s), then remove all files if file-per-proc, else single file.
903  *
904  */
905 static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test)
906 {
907  int tmpRankOffset = 0;
908  if (filePerProc) {
909  /* in random tasks, delete own file */
910  if (test->reorderTasksRandom == TRUE) {
911  tmpRankOffset = rankOffset;
912  rankOffset = 0;
913  GetTestFileName(testFileName, test);
914  }
915  if (backend->access(testFileName, F_OK, test) == 0) {
916  backend->delete(testFileName, test);
917  }
918  if (test->reorderTasksRandom == TRUE) {
919  rankOffset = tmpRankOffset;
920  GetTestFileName(testFileName, test);
921  }
922  } else {
923  if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) {
924  backend->delete(testFileName, test);
925  }
926  }
927 }
928 
929 /*
930  * Setup tests by parsing commandline and creating test script.
931  * Perform a sanity-check on the configured parameters.
932  */
933 static void InitTests(IOR_test_t *tests, MPI_Comm com)
934 {
935  int size;
936 
937  MPI_CHECK(MPI_Comm_size(com, & size), "MPI_Comm_size() error");
938 
939  /* count the tasks per node */
941 
942  /*
943  * Since there is no guarantee that anyone other than
944  * task 0 has the environment settings for the hints, pass
945  * the hint=value pair to everyone else in mpi_comm_world
946  */
947  DistributeHints();
948 
949  /* check validity of tests and create test queue */
950  while (tests != NULL) {
951  IOR_param_t *params = & tests->params;
952  params->testComm = com;
953  params->nodes = params->numTasks / tasksPerNode;
954  params->tasksPerNode = tasksPerNode;
955  if (params->numTasks == 0) {
956  params->numTasks = size;
957  }
958  params->expectedAggFileSize =
959  params->blockSize * params->segmentCount * params->numTasks;
960 
961  ValidateTests(&tests->params);
962  tests = tests->next;
963  }
964 
965  init_clock();
966 
967  /* seed random number generator */
969 }
970 
971 /*
972  * Setup transfer buffers, creating and filling as needed.
973  */
974 static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test,
975  int pretendRank)
976 {
977  ioBuffers->buffer = aligned_buffer_alloc(test->transferSize);
978 
979  if (test->checkWrite || test->checkRead) {
980  ioBuffers->checkBuffer = aligned_buffer_alloc(test->transferSize);
981  }
982  if (test->checkRead || test->checkWrite) {
984  }
985 
986  return;
987 }
988 
989 /*
990  * Free transfer buffers.
991  */
992 static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test)
993 
994 {
995  aligned_buffer_free(ioBuffers->buffer);
996 
997  if (test->checkWrite || test->checkRead) {
998  aligned_buffer_free(ioBuffers->checkBuffer);
999  }
1000  if (test->checkRead) {
1002  }
1003 
1004  return;
1005 }
1006 
1007 
1008 
1009 /*
1010  * malloc a buffer, touching every page in an attempt to defeat lazy allocation.
1011  */
1012 static void *malloc_and_touch(size_t size)
1013 {
1014  size_t page_size;
1015  char *buf;
1016  char *ptr;
1017 
1018  if (size == 0)
1019  return NULL;
1020 
1021  page_size = sysconf(_SC_PAGESIZE);
1022 
1023  buf = (char *)malloc(size);
1024  if (buf == NULL)
1025  return NULL;
1026 
1027  for (ptr = buf; ptr < buf+size; ptr += page_size) {
1028  *ptr = (char)1;
1029  }
1030 
1031  return (void *)buf;
1032 }
1033 
1034 static void file_hits_histogram(IOR_param_t *params)
1035 {
1036  int *rankoffs = NULL;
1037  int *filecont = NULL;
1038  int *filehits = NULL;
1039  int ifile;
1040  int jfile;
1041 
1042  if (rank == 0) {
1043  rankoffs = (int *)malloc(params->numTasks * sizeof(int));
1044  filecont = (int *)malloc(params->numTasks * sizeof(int));
1045  filehits = (int *)malloc(params->numTasks * sizeof(int));
1046  }
1047 
1048  MPI_CHECK(MPI_Gather(&rankOffset, 1, MPI_INT, rankoffs,
1049  1, MPI_INT, 0, mpi_comm_world),
1050  "MPI_Gather error");
1051 
1052  if (rank != 0)
1053  return;
1054 
1055  memset((void *)filecont, 0, params->numTasks * sizeof(int));
1056  for (ifile = 0; ifile < params->numTasks; ifile++) {
1057  filecont[(ifile + rankoffs[ifile]) % params->numTasks]++;
1058  }
1059  memset((void *)filehits, 0, params->numTasks * sizeof(int));
1060  for (ifile = 0; ifile < params->numTasks; ifile++)
1061  for (jfile = 0; jfile < params->numTasks; jfile++) {
1062  if (ifile == filecont[jfile])
1063  filehits[ifile]++;
1064  }
1065  fprintf(out_logfile, "#File Hits Dist:");
1066  jfile = 0;
1067  ifile = 0;
1068  while (jfile < params->numTasks && ifile < params->numTasks) {
1069  fprintf(out_logfile, " %d", filehits[ifile]);
1070  jfile += filehits[ifile], ifile++;
1071  }
1072  fprintf(out_logfile, "\n");
1073  free(rankoffs);
1074  free(filecont);
1075  free(filehits);
1076 }
1077 
1078 
1079 int test_time_elapsed(IOR_param_t *params, double startTime)
1080 {
1081  double endTime;
1082 
1083  if (params->maxTimeDuration == 0)
1084  return 0;
1085 
1086  endTime = startTime + (params->maxTimeDuration * 60);
1087 
1088  return GetTimeStamp() >= endTime;
1089 }
1090 
1091 /*
1092  * hog some memory as a rough simulation of a real application's memory use
1093  */
1094 static void *HogMemory(IOR_param_t *params)
1095 {
1096  size_t size;
1097  void *buf;
1098 
1099  if (params->memoryPerTask != 0) {
1100  size = params->memoryPerTask;
1101  } else if (params->memoryPerNode != 0) {
1102  if (verbose >= VERBOSE_3)
1103  fprintf(out_logfile, "This node hogging %ld bytes of memory\n",
1104  params->memoryPerNode);
1105  size = params->memoryPerNode / params->tasksPerNode;
1106  } else {
1107  return NULL;
1108  }
1109 
1110  if (verbose >= VERBOSE_3)
1111  fprintf(out_logfile, "This task hogging %ld bytes of memory\n", size);
1112 
1113  buf = malloc_and_touch(size);
1114  if (buf == NULL)
1115  ERR("malloc of simulated applciation buffer failed");
1116 
1117  return buf;
1118 }
1119 
1120 /*
1121  * Using the test parameters, run iteration(s) of single test.
1122  */
1123 static void TestIoSys(IOR_test_t *test)
1124 {
1125  IOR_param_t *params = &test->params;
1126  IOR_results_t *results = test->results;
1127  char testFileName[MAX_STR];
1128  double *timer[12];
1129  double startTime;
1130  int pretendRank;
1131  int i, rep;
1132  void *fd;
1133  MPI_Group orig_group, new_group;
1134  int range[3];
1135  IOR_offset_t dataMoved; /* for data rate calculation */
1136  void *hog_buf;
1137  IOR_io_buffers ioBuffers;
1138 
1139  /* set up communicator for test */
1140  if (params->numTasks > numTasksWorld) {
1141  if (rank == 0) {
1142  fprintf(out_logfile,
1143  "WARNING: More tasks requested (%d) than available (%d),",
1144  params->numTasks, numTasksWorld);
1145  fprintf(out_logfile, " running on %d tasks.\n",
1146  numTasksWorld);
1147  }
1148  params->numTasks = numTasksWorld;
1149  }
1150  MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group),
1151  "MPI_Comm_group() error");
1152  range[0] = 0; /* first rank */
1153  range[1] = params->numTasks - 1; /* last rank */
1154  range[2] = 1; /* stride */
1155  MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group),
1156  "MPI_Group_range_incl() error");
1157  MPI_CHECK(MPI_Comm_create(mpi_comm_world, new_group, &testComm),
1158  "MPI_Comm_create() error");
1159  MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error");
1160  MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error");
1161  params->testComm = testComm;
1162  if (testComm == MPI_COMM_NULL) {
1163  /* tasks not in the group do not participate in this test */
1164  MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
1165  return;
1166  }
1167  if (rank == 0 && verbose >= VERBOSE_1) {
1168  fprintf(out_logfile, "Participating tasks: %d\n", params->numTasks);
1169  fflush(out_logfile);
1170  }
1171  if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) {
1172  fprintf(out_logfile,
1173  "Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n");
1174  fflush(out_logfile);
1175  }
1177 
1178  /* setup timers */
1179  for (i = 0; i < 12; i++) {
1180  timer[i] = (double *)malloc(params->repetitions * sizeof(double));
1181  if (timer[i] == NULL)
1182  ERR("malloc failed");
1183  }
1184 
1185  /* bind I/O calls to specific API */
1186  backend = aiori_select(params->api);
1187  if (backend == NULL)
1188  ERR_SIMPLE("unrecognized I/O API");
1189 
1190 
1191  /* show test setup */
1192  if (rank == 0 && verbose >= VERBOSE_0)
1193  ShowSetup(params);
1194 
1195  hog_buf = HogMemory(params);
1196 
1197  pretendRank = (rank + rankOffset) % params->numTasks;
1198 
1199  /* IO Buffer Setup */
1200 
1201  if (params->setTimeStampSignature) { // initialize the buffer properly
1202  params->timeStampSignatureValue = (unsigned int)params->setTimeStampSignature;
1203  }
1204  XferBuffersSetup(&ioBuffers, params, pretendRank);
1205  reseed_incompressible_prng = TRUE; // reset pseudo random generator, necessary to guarantee the next call to FillBuffer produces the same value as it is right now
1206 
1207  /* Initial time stamp */
1208  startTime = GetTimeStamp();
1209 
1210  /* loop over test iterations */
1211  uint64_t params_saved_wearout = params->stoneWallingWearOutIterations;
1212  for (rep = 0; rep < params->repetitions; rep++) {
1213  PrintRepeatStart();
1214  /* Get iteration start time in seconds in task 0 and broadcast to
1215  all tasks */
1216  if (rank == 0) {
1217  if (! params->setTimeStampSignature) {
1218  time_t currentTime;
1219  if ((currentTime = time(NULL)) == -1) {
1220  ERR("cannot get current time");
1221  }
1222  params->timeStampSignatureValue =
1223  (unsigned int)currentTime;
1224  if (verbose >= VERBOSE_2) {
1225  fprintf(out_logfile,
1226  "Using Time Stamp %u (0x%x) for Data Signature\n",
1227  params->timeStampSignatureValue,
1228  params->timeStampSignatureValue);
1229  }
1230  }
1231  if (rep == 0 && verbose >= VERBOSE_0) {
1232  PrintTableHeader();
1233  }
1234  }
1235  MPI_CHECK(MPI_Bcast
1236  (&params->timeStampSignatureValue, 1, MPI_UNSIGNED, 0,
1237  testComm), "cannot broadcast start time value");
1238 
1239  FillBuffer(ioBuffers.buffer, params, 0, pretendRank);
1240  /* use repetition count for number of multiple files */
1241  if (params->multiFile)
1242  params->repCounter = rep;
1243 
1244  /*
1245  * write the file(s), getting timing between I/O calls
1246  */
1247 
1248  if (params->writeFile && !test_time_elapsed(params, startTime)) {
1249  GetTestFileName(testFileName, params);
1250  if (verbose >= VERBOSE_3) {
1251  fprintf(out_logfile, "task %d writing %s\n", rank,
1252  testFileName);
1253  }
1254  DelaySecs(params->interTestDelay);
1255  if (params->useExistingTestFile == FALSE) {
1256  RemoveFile(testFileName, params->filePerProc,
1257  params);
1258  }
1259 
1260  params->stoneWallingWearOutIterations = params_saved_wearout;
1261  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1262  params->open = WRITE;
1263  timer[0][rep] = GetTimeStamp();
1264  fd = backend->create(testFileName, params);
1265  timer[1][rep] = GetTimeStamp();
1266  if (params->intraTestBarriers)
1267  MPI_CHECK(MPI_Barrier(testComm),
1268  "barrier error");
1269  if (rank == 0 && verbose >= VERBOSE_1) {
1270  fprintf(out_logfile,
1271  "Commencing write performance test: %s",
1272  CurrentTimeString());
1273  }
1274  timer[2][rep] = GetTimeStamp();
1275  dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers);
1276  if (params->verbose >= VERBOSE_4) {
1277  fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
1278  fflush(out_logfile);
1279  }
1280  timer[3][rep] = GetTimeStamp();
1281  if (params->intraTestBarriers)
1282  MPI_CHECK(MPI_Barrier(testComm),
1283  "barrier error");
1284  timer[4][rep] = GetTimeStamp();
1285  backend->close(fd, params);
1286 
1287  timer[5][rep] = GetTimeStamp();
1288  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1289 
1290  /* get the size of the file just written */
1291  results[rep].aggFileSizeFromStat =
1292  backend->get_file_size(params, testComm, testFileName);
1293 
1294  /* check if stat() of file doesn't equal expected file size,
1295  use actual amount of byte moved */
1296  CheckFileSize(test, dataMoved, rep);
1297 
1298  if (verbose >= VERBOSE_3)
1299  WriteTimes(params, timer, rep, WRITE);
1300  ReduceIterResults(test, timer, rep, WRITE);
1301  if (params->outlierThreshold) {
1302  CheckForOutliers(params, timer, rep, WRITE);
1303  }
1304 
1305  /* check if in this round we run write with stonewalling */
1306  if(params->deadlineForStonewalling > 0){
1307  params->stoneWallingWearOutIterations = results[rep].pairs_accessed;
1308  }
1309  }
1310 
1311  /*
1312  * perform a check of data, reading back data and comparing
1313  * against what was expected to be written
1314  */
1315  if (params->checkWrite && !test_time_elapsed(params, startTime)) {
1316  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1317  if (rank == 0 && verbose >= VERBOSE_1) {
1318  fprintf(out_logfile,
1319  "Verifying contents of the file(s) just written.\n");
1320  fprintf(out_logfile, "%s\n", CurrentTimeString());
1321  }
1322  if (params->reorderTasks) {
1323  /* move two nodes away from writing node */
1324  rankOffset = (2 * params->tasksPerNode) % params->numTasks;
1325  }
1326 
1327  // update the check buffer
1328  FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks);
1329 
1330  reseed_incompressible_prng = TRUE; /* Re-Seed the PRNG to get same sequence back, if random */
1331 
1332  GetTestFileName(testFileName, params);
1333  params->open = WRITECHECK;
1334  fd = backend->open(testFileName, params);
1335  dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers);
1336  backend->close(fd, params);
1337  rankOffset = 0;
1338  }
1339  /*
1340  * read the file(s), getting timing between I/O calls
1341  */
1342  if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) {
1343  /* check for stonewall */
1344  if(params->stoneWallingStatusFile){
1346  if(params->stoneWallingWearOutIterations == -1 && rank == 0){
1347  fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!");
1348  params->stoneWallingWearOutIterations = 0;
1349  }
1350  }
1351  int operation_flag = READ;
1352  if ( params->checkRead ){
1353  // actually read and then compare the buffer
1354  operation_flag = READCHECK;
1355  }
1356  /* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */
1357  /* Constant process offset reading */
1358  if (params->reorderTasks) {
1359  /* move taskPerNodeOffset nodes[1==default] away from writing node */
1360  rankOffset = (params->taskPerNodeOffset *
1361  params->tasksPerNode) % params->numTasks;
1362  }
1363  /* random process offset reading */
1364  if (params->reorderTasksRandom) {
1365  /* this should not intefere with randomOffset within a file because GetOffsetArrayRandom */
1366  /* seeds every random() call */
1367  int nodeoffset;
1368  unsigned int iseed0;
1369  nodeoffset = params->taskPerNodeOffset;
1370  nodeoffset = (nodeoffset < params->nodes) ? nodeoffset : params->nodes - 1;
1371  if (params->reorderTasksRandomSeed < 0)
1372  iseed0 = -1 * params->reorderTasksRandomSeed + rep;
1373  else
1374  iseed0 = params->reorderTasksRandomSeed;
1375  srand(rank + iseed0);
1376  {
1377  rankOffset = rand() % params->numTasks;
1378  }
1379  while (rankOffset <
1380  (nodeoffset * params->tasksPerNode)) {
1381  rankOffset = rand() % params->numTasks;
1382  }
1383  /* Get more detailed stats if requested by verbose level */
1384  if (verbose >= VERBOSE_2) {
1385  file_hits_histogram(params);
1386  }
1387  }
1388  if(operation_flag == READCHECK){
1389  FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks);
1390  }
1391 
1392  /* Using globally passed rankOffset, following function generates testFileName to read */
1393  GetTestFileName(testFileName, params);
1394 
1395  if (verbose >= VERBOSE_3) {
1396  fprintf(out_logfile, "task %d reading %s\n", rank,
1397  testFileName);
1398  }
1399  DelaySecs(params->interTestDelay);
1400  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1401  params->open = READ;
1402  timer[6][rep] = GetTimeStamp();
1403  fd = backend->open(testFileName, params);
1404  timer[7][rep] = GetTimeStamp();
1405  if (params->intraTestBarriers)
1406  MPI_CHECK(MPI_Barrier(testComm),
1407  "barrier error");
1408  if (rank == 0 && verbose >= VERBOSE_1) {
1409  fprintf(out_logfile,
1410  "Commencing read performance test: %s",
1411  CurrentTimeString());
1412  }
1413  timer[8][rep] = GetTimeStamp();
1414  dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers);
1415  timer[9][rep] = GetTimeStamp();
1416  if (params->intraTestBarriers)
1417  MPI_CHECK(MPI_Barrier(testComm),
1418  "barrier error");
1419  timer[10][rep] = GetTimeStamp();
1420  backend->close(fd, params);
1421  timer[11][rep] = GetTimeStamp();
1422 
1423  /* get the size of the file just read */
1424  results[rep].aggFileSizeFromStat =
1425  backend->get_file_size(params, testComm,
1426  testFileName);
1427 
1428  /* check if stat() of file doesn't equal expected file size,
1429  use actual amount of byte moved */
1430  CheckFileSize(test, dataMoved, rep);
1431 
1432  if (verbose >= VERBOSE_3)
1433  WriteTimes(params, timer, rep, READ);
1434  ReduceIterResults(test, timer, rep, READ);
1435  if (params->outlierThreshold) {
1436  CheckForOutliers(params, timer, rep, READ);
1437  }
1438  }
1439 
1440  if (!params->keepFile
1441  && !(params->errorFound && params->keepFileWithError)) {
1442  double start, finish;
1443  start = GetTimeStamp();
1444  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1445  RemoveFile(testFileName, params->filePerProc, params);
1446  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1447  finish = GetTimeStamp();
1448  PrintRemoveTiming(start, finish, rep);
1449  } else {
1450  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
1451  }
1452  params->errorFound = FALSE;
1453  rankOffset = 0;
1454 
1455  PrintRepeatEnd();
1456  }
1457 
1458  MPI_CHECK(MPI_Comm_free(&testComm), "MPI_Comm_free() error");
1459 
1460  if (params->summary_every_test) {
1463  } else {
1464  PrintShortSummary(test);
1465  }
1466 
1467  XferBuffersFree(&ioBuffers, params);
1468 
1469  if (hog_buf != NULL)
1470  free(hog_buf);
1471  for (i = 0; i < 12; i++) {
1472  free(timer[i]);
1473  }
1474 
1475  /* Sync with the tasks that did not participate in this test */
1476  MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
1477 
1478 }
1479 
1480 /*
1481  * Determine if valid tests from parameters.
1482  */
1483 static void ValidateTests(IOR_param_t * test)
1484 {
1485  IOR_param_t defaults;
1486  init_IOR_Param_t(&defaults);
1487 
1488  if (test->repetitions <= 0)
1489  WARN_RESET("too few test repetitions",
1490  test, &defaults, repetitions);
1491  if (test->numTasks <= 0)
1492  ERR("too few tasks for testing");
1493  if (test->interTestDelay < 0)
1494  WARN_RESET("inter-test delay must be nonnegative value",
1495  test, &defaults, interTestDelay);
1496  if (test->readFile != TRUE && test->writeFile != TRUE
1497  && test->checkRead != TRUE && test->checkWrite != TRUE)
1498  ERR("test must write, read, or check read/write file");
1499  if(! test->setTimeStampSignature && test->writeFile != TRUE && test->checkRead == TRUE)
1500  ERR("using readCheck only requires to write a timeStampSignature -- use -G");
1501  if (test->segmentCount < 0)
1502  ERR("segment count must be positive value");
1503  if ((test->blockSize % sizeof(IOR_size_t)) != 0)
1504  ERR("block size must be a multiple of access size");
1505  if (test->blockSize < 0)
1506  ERR("block size must be non-negative integer");
1507  if ((test->transferSize % sizeof(IOR_size_t)) != 0)
1508  ERR("transfer size must be a multiple of access size");
1509  if (test->setAlignment < 0)
1510  ERR("alignment must be non-negative integer");
1511  if (test->transferSize < 0)
1512  ERR("transfer size must be non-negative integer");
1513  if (test->transferSize == 0) {
1514  ERR("test will not complete with zero transfer size");
1515  } else {
1516  if ((test->blockSize % test->transferSize) != 0)
1517  ERR("block size must be a multiple of transfer size");
1518  }
1519  if (test->blockSize < test->transferSize)
1520  ERR("block size must not be smaller than transfer size");
1521 
1522  /* specific APIs */
1523  if ((strcasecmp(test->api, "MPIIO") == 0)
1524  && (test->blockSize < sizeof(IOR_size_t)
1525  || test->transferSize < sizeof(IOR_size_t)))
1526  ERR("block/transfer size may not be smaller than IOR_size_t for MPIIO");
1527  if ((strcasecmp(test->api, "HDF5") == 0)
1528  && (test->blockSize < sizeof(IOR_size_t)
1529  || test->transferSize < sizeof(IOR_size_t)))
1530  ERR("block/transfer size may not be smaller than IOR_size_t for HDF5");
1531  if ((strcasecmp(test->api, "NCMPI") == 0)
1532  && (test->blockSize < sizeof(IOR_size_t)
1533  || test->transferSize < sizeof(IOR_size_t)))
1534  ERR("block/transfer size may not be smaller than IOR_size_t for NCMPI");
1535  if ((test->useFileView == TRUE)
1536  && (sizeof(MPI_Aint) < 8) /* used for 64-bit datatypes */
1537  &&((test->numTasks * test->blockSize) >
1538  (2 * (IOR_offset_t) GIBIBYTE)))
1539  ERR("segment size must be < 2GiB");
1540  if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt)
1541  WARN_RESET("retry only available in POSIX",
1542  test, &defaults, singleXferAttempt);
1543  if (((strcasecmp(test->api, "POSIX") != 0)
1544  && (strcasecmp(test->api, "MPIIO") != 0)
1545  && (strcasecmp(test->api, "MMAP") != 0)
1546  && (strcasecmp(test->api, "HDFS") != 0)
1547  && (strcasecmp(test->api, "RADOS") != 0)) && test->fsync)
1548  WARN_RESET("fsync() not supported in selected backend",
1549  test, &defaults, fsync);
1550  if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate)
1551  WARN_RESET("preallocation only available in MPIIO",
1552  test, &defaults, preallocate);
1553  if ((strcasecmp(test->api, "MPIIO") != 0) && test->useFileView)
1554  WARN_RESET("file view only available in MPIIO",
1555  test, &defaults, useFileView);
1556  if ((strcasecmp(test->api, "MPIIO") != 0) && test->useSharedFilePointer)
1557  WARN_RESET("shared file pointer only available in MPIIO",
1558  test, &defaults, useSharedFilePointer);
1559  if ((strcasecmp(test->api, "MPIIO") == 0) && test->useSharedFilePointer)
1560  WARN_RESET("shared file pointer not implemented",
1561  test, &defaults, useSharedFilePointer);
1562  if ((strcasecmp(test->api, "MPIIO") != 0) && test->useStridedDatatype)
1563  WARN_RESET("strided datatype only available in MPIIO",
1564  test, &defaults, useStridedDatatype);
1565  if ((strcasecmp(test->api, "MPIIO") == 0) && test->useStridedDatatype)
1566  WARN_RESET("strided datatype not implemented",
1567  test, &defaults, useStridedDatatype);
1568  if ((strcasecmp(test->api, "MPIIO") == 0)
1569  && test->useStridedDatatype && (test->blockSize < sizeof(IOR_size_t)
1570  || test->transferSize <
1571  sizeof(IOR_size_t)))
1572  ERR("need larger file size for strided datatype in MPIIO");
1573  if ((strcasecmp(test->api, "POSIX") == 0) && test->showHints)
1574  WARN_RESET("hints not available in POSIX",
1575  test, &defaults, showHints);
1576  if ((strcasecmp(test->api, "POSIX") == 0) && test->collective)
1577  WARN_RESET("collective not available in POSIX",
1578  test, &defaults, collective);
1579  if ((strcasecmp(test->api, "MMAP") == 0) && test->fsyncPerWrite
1580  && (test->transferSize & (sysconf(_SC_PAGESIZE) - 1)))
1581  ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite");
1582 
1583  /* parameter consitency */
1584  if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE)
1585  ERR("Both Constant and Random task re-ordering specified. Choose one and resubmit");
1586  if (test->randomOffset && test->reorderTasksRandom
1587  && test->filePerProc == FALSE)
1588  ERR("random offset and random reorder tasks specified with single-shared-file. Choose one and resubmit");
1589  if (test->randomOffset && test->reorderTasks
1590  && test->filePerProc == FALSE)
1591  ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit");
1592  if (test->randomOffset && test->checkRead)
1593  ERR("random offset not available with read check option (use write check)");
1594  if (test->randomOffset && test->storeFileOffset)
1595  ERR("random offset not available with store file offset option)");
1596 
1597 
1598  if ((strcasecmp(test->api, "MPIIO") == 0) && test->randomOffset
1599  && test->collective)
1600  ERR("random offset not available with collective MPIIO");
1601  if ((strcasecmp(test->api, "MPIIO") == 0) && test->randomOffset
1602  && test->useFileView)
1603  ERR("random offset not available with MPIIO fileviews");
1604  if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset)
1605  ERR("random offset not available with HDF5");
1606  if ((strcasecmp(test->api, "NCMPI") == 0) && test->randomOffset)
1607  ERR("random offset not available with NCMPI");
1608  if ((strcasecmp(test->api, "HDF5") != 0) && test->individualDataSets)
1609  WARN_RESET("individual datasets only available in HDF5",
1610  test, &defaults, individualDataSets);
1611  if ((strcasecmp(test->api, "HDF5") == 0) && test->individualDataSets)
1612  WARN_RESET("individual data sets not implemented",
1613  test, &defaults, individualDataSets);
1614  if ((strcasecmp(test->api, "NCMPI") == 0) && test->filePerProc)
1615  ERR("file-per-proc not available in current NCMPI");
1616  if (test->noFill) {
1617  if (strcasecmp(test->api, "HDF5") != 0) {
1618  ERR("'no fill' option only available in HDF5");
1619  } else {
1620  /* check if hdf5 available */
1621 #if defined (H5_VERS_MAJOR) && defined (H5_VERS_MINOR)
1622  /* no-fill option not available until hdf5-1.6.x */
1623 #if (H5_VERS_MAJOR > 0 && H5_VERS_MINOR > 5)
1624  ;
1625 #else
1626  char errorString[MAX_STR];
1627  sprintf(errorString,
1628  "'no fill' option not available in %s",
1629  test->apiVersion);
1630  ERR(errorString);
1631 #endif
1632 #else
1633  WARN("unable to determine HDF5 version for 'no fill' usage");
1634 #endif
1635  }
1636  }
1637  if (test->useExistingTestFile && test->lustre_set_striping)
1638  ERR("Lustre stripe options are incompatible with useExistingTestFile");
1639 
1640  /* N:1 and N:N */
1641  IOR_offset_t NtoN = test->filePerProc;
1642  IOR_offset_t Nto1 = ! NtoN;
1643  IOR_offset_t s = test->segmentCount;
1644  IOR_offset_t t = test->transferSize;
1645  IOR_offset_t b = test->blockSize;
1646 
1647  if (Nto1 && (s != 1) && (b != t)) {
1648  ERR("N:1 (strided) requires xfer-size == block-size");
1649  }
1650 }
1651 
1660  int pretendRank)
1661 {
1662  IOR_offset_t i, j, k = 0;
1663  IOR_offset_t offsets;
1664  IOR_offset_t *offsetArray;
1665 
1666  /* count needed offsets */
1667  offsets = (test->blockSize / test->transferSize) * test->segmentCount;
1668 
1669  /* setup empty array */
1670  offsetArray =
1671  (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t));
1672  if (offsetArray == NULL)
1673  ERR("malloc() failed");
1674  offsetArray[offsets] = -1; /* set last offset with -1 */
1675 
1676  /* fill with offsets */
1677  for (i = 0; i < test->segmentCount; i++) {
1678  for (j = 0; j < (test->blockSize / test->transferSize); j++) {
1679  offsetArray[k] = j * test->transferSize;
1680  if (test->filePerProc) {
1681  offsetArray[k] += i * test->blockSize;
1682  } else {
1683  offsetArray[k] +=
1684  (i * test->numTasks * test->blockSize)
1685  + (pretendRank * test->blockSize);
1686  }
1687  k++;
1688  }
1689  }
1690 
1691  return (offsetArray);
1692 }
1693 
1709 static IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank,
1710  int access)
1711 {
1712  int seed;
1713  IOR_offset_t i, value, tmp;
1714  IOR_offset_t offsets = 0;
1715  IOR_offset_t offsetCnt = 0;
1716  IOR_offset_t fileSize;
1717  IOR_offset_t *offsetArray;
1718 
1719  /* set up seed for random() */
1720  if (access == WRITE || access == READ) {
1721  test->randomSeed = seed = random();
1722  } else {
1723  seed = test->randomSeed;
1724  }
1725  srandom(seed);
1726 
1727  fileSize = test->blockSize * test->segmentCount;
1728  if (test->filePerProc == FALSE) {
1729  fileSize *= test->numTasks;
1730  }
1731 
1732  /* count needed offsets (pass 1) */
1733  for (i = 0; i < fileSize; i += test->transferSize) {
1734  if (test->filePerProc == FALSE) {
1735  // this counts which process get how many transferes in
1736  // a shared file
1737  if ((random() % test->numTasks) == pretendRank) {
1738  offsets++;
1739  }
1740  } else {
1741  offsets++;
1742  }
1743  }
1744 
1745  /* setup empty array */
1746  offsetArray =
1747  (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t));
1748  if (offsetArray == NULL)
1749  ERR("malloc() failed");
1750  offsetArray[offsets] = -1; /* set last offset with -1 */
1751 
1752  if (test->filePerProc) {
1753  /* fill array */
1754  for (i = 0; i < offsets; i++) {
1755  offsetArray[i] = i * test->transferSize;
1756  }
1757  } else {
1758  /* fill with offsets (pass 2) */
1759  srandom(seed); /* need same seed to get same transfers as counted in the beginning*/
1760  for (i = 0; i < fileSize; i += test->transferSize) {
1761  if ((random() % test->numTasks) == pretendRank) {
1762  offsetArray[offsetCnt] = i;
1763  offsetCnt++;
1764  }
1765  }
1766  }
1767  /* reorder array */
1768  for (i = 0; i < offsets; i++) {
1769  value = random() % offsets;
1770  tmp = offsetArray[value];
1771  offsetArray[value] = offsetArray[i];
1772  offsetArray[i] = tmp;
1773  }
1774  SeedRandGen(test->testComm); /* synchronize seeds across tasks */
1775 
1776  return (offsetArray);
1777 }
1778 
1779 static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offsetArray, int pretendRank,
1780  IOR_offset_t * transferCount, int * errors, IOR_param_t * test, int * fd, IOR_io_buffers* ioBuffers, int access){
1781  IOR_offset_t amtXferred = 0;
1782  IOR_offset_t transfer;
1783 
1784  void *buffer = ioBuffers->buffer;
1785  void *checkBuffer = ioBuffers->checkBuffer;
1786  void *readCheckBuffer = ioBuffers->readCheckBuffer;
1787 
1788  test->offset = offsetArray[pairCnt];
1789 
1790  transfer = test->transferSize;
1791  if (access == WRITE) {
1792  /* fills each transfer with a unique pattern
1793  * containing the offset into the file */
1794  if (test->storeFileOffset == TRUE) {
1795  FillBuffer(buffer, test, test->offset, pretendRank);
1796  }
1797  amtXferred =
1798  backend->xfer(access, fd, buffer, transfer, test);
1799  if (amtXferred != transfer)
1800  ERR("cannot write to file");
1801  } else if (access == READ) {
1802  amtXferred =
1803  backend->xfer(access, fd, buffer, transfer, test);
1804  if (amtXferred != transfer)
1805  ERR("cannot read from file");
1806  } else if (access == WRITECHECK) {
1807  memset(checkBuffer, 'a', transfer);
1808 
1809  if (test->storeFileOffset == TRUE) {
1810  FillBuffer(readCheckBuffer, test, test->offset, pretendRank);
1811  }
1812 
1813  amtXferred = backend->xfer(access, fd, checkBuffer, transfer, test);
1814  if (amtXferred != transfer)
1815  ERR("cannot read from file write check");
1816  (*transferCount)++;
1817  *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer,
1818  *transferCount, test,
1819  WRITECHECK);
1820  } else if (access == READCHECK) {
1821  amtXferred = backend->xfer(access, fd, buffer, transfer, test);
1822  if (amtXferred != transfer){
1823  ERR("cannot read from file");
1824  }
1825  if (test->storeFileOffset == TRUE) {
1826  FillBuffer(readCheckBuffer, test, test->offset, pretendRank);
1827  }
1828  *errors += CompareBuffers(readCheckBuffer, buffer, transfer, *transferCount, test, READCHECK);
1829  }
1830  return amtXferred;
1831 }
1832 
1833 /*
1834  * Write or Read data to file(s). This loops through the strides, writing
1835  * out the data to each block in transfer sizes, until the remainder left is 0.
1836  */
1837 static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers)
1838 {
1839  int errors = 0;
1840  IOR_offset_t transferCount = 0;
1841  uint64_t pairCnt = 0;
1842  IOR_offset_t *offsetArray;
1843  int pretendRank;
1844  IOR_offset_t dataMoved = 0; /* for data rate calculation */
1845  double startForStonewall;
1846  int hitStonewall;
1847 
1848  /* initialize values */
1849  pretendRank = (rank + rankOffset) % test->numTasks;
1850 
1851  if (test->randomOffset) {
1852  offsetArray = GetOffsetArrayRandom(test, pretendRank, access);
1853  } else {
1854  offsetArray = GetOffsetArraySequential(test, pretendRank);
1855  }
1856 
1857  startForStonewall = GetTimeStamp();
1858  hitStonewall = 0;
1859 
1860  /* loop over offsets to access */
1861  while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {
1862  dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
1863  pairCnt++;
1864 
1865  hitStonewall = ((test->deadlineForStonewalling != 0
1866  && (GetTimeStamp() - startForStonewall)
1867  > test->deadlineForStonewalling)) || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ;
1868  }
1869  if (test->stoneWallingWearOut){
1870  if (verbose >= VERBOSE_1){
1871  fprintf(out_logfile, "%d: stonewalling pairs accessed: %lld\n", rank, (long long) pairCnt);
1872  }
1873  long long data_moved_ll = (long long) dataMoved;
1874  long long pairs_accessed_min = 0;
1875  MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed,
1876  1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved");
1877  double stonewall_runtime = GetTimeStamp() - startForStonewall;
1878  results->stonewall_time = stonewall_runtime;
1879  MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min,
1880  1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
1881  MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed,
1882  1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
1883  MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed,
1884  1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved");
1885 
1886  if(rank == 0){
1887  fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n",
1888  pairs_accessed_min, results->pairs_accessed,
1889  results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time);
1890  results->stonewall_min_data_accessed *= test->numTasks;
1891  }
1892  if(pairs_accessed_min == pairCnt){
1893  results->stonewall_min_data_accessed = 0;
1894  results->stonewall_avg_data_accessed = 0;
1895  }
1896  if(pairCnt != results->pairs_accessed){
1897  // some work needs still to be done !
1898  for(; pairCnt < results->pairs_accessed; pairCnt++ ) {
1899  dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
1900  }
1901  }
1902  }else{
1903  results->pairs_accessed = pairCnt;
1904  }
1905 
1906 
1907  totalErrorCount += CountErrors(test, access, errors);
1908 
1909  free(offsetArray);
1910 
1911  if (access == WRITE && test->fsync == TRUE) {
1912  backend->fsync(fd, test); /*fsync after all accesses */
1913  }
1914  return (dataMoved);
1915 }
1916 
1917 /*
1918  * Write times taken during each iteration of the test.
1919  */
1920 static void
1921 WriteTimes(IOR_param_t * test, double **timer, int iteration, int writeOrRead)
1922 {
1923  char accessType[MAX_STR];
1924  char timerName[MAX_STR];
1925  int i, start = 0, stop = 0;
1926 
1927  if (writeOrRead == WRITE) {
1928  start = 0;
1929  stop = 6;
1930  strcpy(accessType, "WRITE");
1931  } else if (writeOrRead == READ) {
1932  start = 6;
1933  stop = 12;
1934  strcpy(accessType, "READ");
1935  } else {
1936  ERR("incorrect WRITE/READ option");
1937  }
1938 
1939  for (i = start; i < stop; i++) {
1940  switch (i) {
1941  case 0:
1942  strcpy(timerName, "write open start");
1943  break;
1944  case 1:
1945  strcpy(timerName, "write open stop");
1946  break;
1947  case 2:
1948  strcpy(timerName, "write start");
1949  break;
1950  case 3:
1951  strcpy(timerName, "write stop");
1952  break;
1953  case 4:
1954  strcpy(timerName, "write close start");
1955  break;
1956  case 5:
1957  strcpy(timerName, "write close stop");
1958  break;
1959  case 6:
1960  strcpy(timerName, "read open start");
1961  break;
1962  case 7:
1963  strcpy(timerName, "read open stop");
1964  break;
1965  case 8:
1966  strcpy(timerName, "read start");
1967  break;
1968  case 9:
1969  strcpy(timerName, "read stop");
1970  break;
1971  case 10:
1972  strcpy(timerName, "read close start");
1973  break;
1974  case 11:
1975  strcpy(timerName, "read close stop");
1976  break;
1977  default:
1978  strcpy(timerName, "invalid timer");
1979  break;
1980  }
1981  fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
1982  test->id, iteration, (int)rank, timer[i][iteration],
1983  timerName);
1984  }
1985 }
int reorderTasks
Definition: ior.h:105
long long stonewall_min_data_accessed
Definition: ior.h:214
int uniqueDir
Definition: ior.h:128
void PrintEarlyHeader()
Definition: ior-output.c:241
IOR_offset_t setAlignment
Definition: ior.h:165
IOR_offset_t(* get_file_size)(IOR_param_t *, MPI_Comm, char *)
Definition: aiori.h:76
int quitOnError
Definition: ior.h:114
int reorderTasksRandomSeed
Definition: ior.h:108
int ior_main(int argc, char **argv)
Definition: ior.c:93
int showHints
Definition: ior.h:126
char * hdfs_user
Definition: ior.h:168
void(* delete)(char *, IOR_param_t *)
Definition: aiori.h:73
double stonewall_time
Definition: ior.h:213
int errors
Definition: ior.h:210
int multiFile
Definition: ior.h:99
#define ERR(MSG)
Definition: iordef.h:169
static void file_hits_histogram(IOR_param_t *params)
Definition: ior.c:1034
static void DisplayOutliers(int numTasks, double timerVal, char *timeString, int access, int outlierThreshold)
Definition: ior.c:222
void PrintTestEnds()
Definition: ior-output.c:200
unsigned int incompressibleSeed
Definition: ior.h:143
#define VERBOSE_0
Definition: iordef.h:102
char * GetPlatformName()
Definition: ior.c:657
unsigned int timeStampSignatureValue
Definition: ior.h:140
int filePerProc
Definition: ior.h:104
void PrintRepeatStart()
Definition: ior-output.c:193
static int size
Definition: mdtest.c:82
#define VERBOSE_3
Definition: iordef.h:105
int noFill
Definition: ior.h:164
static void InitTests(IOR_test_t *, MPI_Comm)
Definition: ior.c:933
long long stonewall_avg_data_accessed
Definition: ior.h:215
int repetitions
Definition: ior.h:97
int64_t ReadStoneWallingIterations(char *const filename)
Definition: utilities.c:580
static void WriteTimes(IOR_param_t *, double **, int, int)
Definition: ior.c:1921
IOR_offset_t segmentCount
Definition: ior.h:116
int useStridedDatatype
Definition: ior.h:124
static void aligned_buffer_free(void *buf)
Definition: ior.c:488
int CountTasksPerNode(MPI_Comm comm)
Definition: utilities.c:166
#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER)
Definition: iordef.h:135
void * checkBuffer
Definition: ior.h:61
int keepFile
Definition: ior.h:111
void PrintHeader(int argc, char **argv)
Definition: ior-output.c:250
char ** environ
static void XferBuffersFree(IOR_io_buffers *ioBuffers, IOR_param_t *test)
Definition: ior.c:992
int checkRead
Definition: ior.h:110
void PrintLongSummaryOneTest(IOR_test_t *test)
Definition: ior-output.c:556
int useSharedFilePointer
Definition: ior.h:123
int test_time_elapsed(IOR_param_t *params, double startTime)
Definition: ior.c:1079
void FreeResults(IOR_test_t *test)
Definition: ior.c:512
static void CheckForOutliers(IOR_param_t *test, double **timer, int rep, int access)
Definition: ior.c:259
static void ValidateTests(IOR_param_t *)
Definition: ior.c:1483
IOR_offset_t transferSize
Definition: ior.h:118
size_t memoryPerNode
Definition: ior.h:146
#define WRITECHECK
Definition: iordef.h:96
IOR_param_t params
Definition: ior.h:224
void PrintLongSummaryHeader()
Definition: ior-output.c:566
#define READCHECK
Definition: iordef.h:98
int storeFileOffset
Definition: ior.h:130
int errorFound
Definition: ior.h:113
double sd
Definition: ior-internal.h:34
static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offsetArray, int pretendRank, IOR_offset_t *transferCount, int *errors, IOR_param_t *test, int *fd, IOR_io_buffers *ioBuffers, int access)
Definition: ior.c:1779
static int totalErrorCount
Definition: ior.c:42
size_t part_number
Definition: ior.h:176
char * apiVersion
Definition: ior.h:88
static void * HogMemory(IOR_param_t *params)
Definition: ior.c:1094
int summary_every_test
Definition: ior.h:127
static void DestroyTest(IOR_test_t *test)
Definition: ior.c:539
int setTimeStampSignature
Definition: ior.h:139
int hdfs_replicas
Definition: ior.h:172
unsigned int openFlags
Definition: ior.h:85
int fsyncPerWrite
Definition: ior.h:152
int interTestDelay
Definition: ior.h:100
#define GIBIBYTE
Definition: iordef.h:88
int(* access)(const char *path, int mode, IOR_param_t *param)
Definition: aiori.h:80
int lustre_start_ost
Definition: ior.h:189
#define ERR_SIMPLE(MSG)
Definition: iordef.h:178
#define WRITE
Definition: iordef.h:95
#define EWARN(MSG)
Definition: iordef.h:156
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE *world_out)
Definition: ior.c:54
int maxTimeDuration
Definition: ior.h:136
char * testFileName
Definition: ior.h:90
void(* close)(void *, IOR_param_t *)
Definition: aiori.h:72
#define VERBOSE_5
Definition: iordef.h:107
char * stoneWallingStatusFile
Definition: ior.h:134
unsigned int mode
Definition: ior.h:84
void ShowTestStart(IOR_param_t *params)
Definition: ior-output.c:312
#define READ
Definition: iordef.h:97
MPI_Comm testComm
Definition: ior.h:158
int taskPerNodeOffset
Definition: ior.h:106
void init_clock()
Definition: utilities.c:560
#define IOR_CREAT
Definition: aiori.h:37
static char ** ParseFileName(char *, int *)
Definition: ior.c:701
const ior_aiori_t * aiori_select(const char *api)
Definition: aiori.c:184
void *(* open)(char *, IOR_param_t *)
Definition: aiori.h:69
double sum
Definition: ior-internal.h:35
int fsync
Definition: ior.h:153
double var
Definition: ior-internal.h:33
struct IOR_test_t * next
Definition: ior.h:226
IOR_offset_t aggFileSizeFromXfer
Definition: ior.h:218
hdfsFS hdfs_fs
Definition: ior.h:171
double writeTime
Definition: ior.h:208
#define IOR_IRGRP
Definition: aiori.h:48
double wall_clock_delta
Definition: utilities.c:505
tPort hdfs_name_node_port
Definition: ior.h:170
int outlierThreshold
Definition: ior.h:137
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
Definition: ior.c:291
int intraTestBarriers
Definition: ior.h:202
void GetTestFileName(char *testFileName, IOR_param_t *test)
Definition: ior.c:742
MPI_Comm testComm
Definition: utilities.c:61
int reorderTasksRandom
Definition: ior.h:107
int checkWrite
Definition: ior.h:109
unsigned int reseed_incompressible_prng
Definition: ior.c:619
void(* fsync)(void *, IOR_param_t *)
Definition: aiori.h:75
void ShowSetup(IOR_param_t *params)
Definition: ior-output.c:402
void SeedRandGen(MPI_Comm testComm)
Definition: utilities.c:463
Definition: ior.h:58
int verbose
Definition: ior.h:138
static void ReduceIterResults(IOR_test_t *test, double **timer, int rep, int access)
Definition: ior.c:852
static void XferBuffersSetup(IOR_io_buffers *ioBuffers, IOR_param_t *test, int pretendRank)
Definition: ior.c:974
char * CurrentTimeString(void)
Definition: utilities.c:97
void PrintRemoveTiming(double start, double finish, int rep)
Definition: ior-output.c:690
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: iordef.h:192
static void FillBuffer(void *buffer, IOR_param_t *test, unsigned long long offset, int fillrank)
Definition: ior.c:622
static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t *test)
Definition: ior.c:905
IOR_offset_t expectedAggFileSize
Definition: ior.h:120
char * platform
Definition: ior.h:89
IOR_offset_t aggFileSizeFromStat
Definition: ior.h:217
int singleXferAttempt
Definition: ior.h:151
static void DestroyTests(IOR_test_t *tests_head)
Definition: ior.c:545
static IOR_offset_t * GetOffsetArrayRandom(IOR_param_t *test, int pretendRank, int access)
Definition: ior.c:1709
Definition: ior.h:47
IOR_offset_t aggFileSizeForBW
Definition: ior.h:219
#define IOR_IRUSR
Definition: aiori.h:44
int numTasksWorld
Definition: utilities.c:56
FILE * out_resultfile
Definition: utilities.c:64
double GetTimeStamp(void)
Definition: utilities.c:511
void PrintShortSummary(IOR_test_t *test)
Definition: ior-output.c:611
int stoneWallingWearOut
Definition: ior.h:132
static const ior_aiori_t * backend
Definition: ior.c:43
void PrintRepeatEnd()
Definition: ior-output.c:189
IOR_test_t * CreateTest(IOR_param_t *init_params, int test_num)
Definition: ior.c:523
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, void *fd, int access, IOR_io_buffers *ioBuffers)
Definition: ior.c:1837
#define IOR_IWGRP
Definition: aiori.h:49
static void * safeMalloc(uint64_t size)
Definition: ior.c:493
char * URI
Definition: ior.h:175
static void TestIoSys(IOR_test_t *)
Definition: ior.c:1123
void * buffer
Definition: ior.h:60
void PrintTableHeader()
Definition: ior-output.c:20
void DistributeHints(void)
Definition: ior.c:558
void PrintLongSummaryAllTests(IOR_test_t *tests_head)
Definition: ior-output.c:585
static size_t CompareBuffers(void *expectedBuffer, void *unknownBuffer, size_t size, IOR_offset_t transferCount, IOR_param_t *test, int access)
Definition: ior.c:330
int keepFileWithError
Definition: ior.h:112
void * mmap_ptr
Definition: ior.h:155
int randomSeed
Definition: ior.h:142
#define FALSE
Definition: iordef.h:71
int tasksPerNode
Definition: utilities.c:59
int rankOffset
Definition: utilities.c:58
int useExistingTestFile
Definition: ior.h:129
enum PACKET_TYPE dataPacketType
Definition: ior.h:147
int beegfs_numTargets
Definition: ior.h:198
void init_IOR_Param_t(IOR_param_t *p)
Definition: ior.c:168
int useFileView
Definition: ior.h:122
int readFile
Definition: ior.h:102
void *(* create)(char *, IOR_param_t *)
Definition: aiori.h:68
long long int IOR_size_t
Definition: iordef.h:124
#define WARN(MSG)
Definition: iordef.h:145
int nodes
Definition: ior.h:95
void * readCheckBuffer
Definition: ior.h:62
int tasksPerNode
Definition: ior.h:96
void aiori_finalize()
Definition: aiori.c:173
int hdfs_block_size
Definition: ior.h:173
int randomOffset
Definition: ior.h:144
int numTasks
Definition: ior.h:94
size_t memoryPerTask
Definition: ior.h:145
void aiori_initialize()
Definition: aiori.c:156
const char * aiori_default(void)
Definition: aiori.c:231
#define VERBOSE_2
Definition: iordef.h:104
int individualDataSets
Definition: ior.h:163
int writeFile
Definition: ior.h:103
static void * aligned_buffer_alloc(size_t size)
Definition: ior.c:462
void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep)
Definition: ior-output.c:210
size_t pairs_accessed
Definition: ior.h:211
uint64_t stoneWallingWearOutIterations
Definition: ior.h:133
#define MAX_STR
Definition: iordef.h:109
#define MAX_HINTS
Definition: iordef.h:110
int collective
Definition: ior.h:115
IOR_offset_t offset
Definition: ior.h:119
static int CountErrors(IOR_param_t *test, int access, int errors)
Definition: ior.c:430
#define VERBOSE_4
Definition: iordef.h:106
#define MAX_PATHLEN
Definition: utilities.h:35
double mean
Definition: ior-internal.h:32
static void * malloc_and_touch(size_t size)
Definition: ior.c:1012
int open
Definition: ior.h:101
static void FillIncompressibleBuffer(void *buffer, IOR_param_t *test)
Definition: ior.c:605
static char * PrependDir(IOR_param_t *, char *)
Definition: ior.c:791
#define IOR_RDWR
Definition: aiori.h:35
void DelaySecs(int delay)
Definition: utilities.c:617
#define VERBOSE_1
Definition: iordef.h:103
IOR_results_t * results
Definition: ior.h:225
int verbose
Definition: utilities.c:60
IOR_test_t * ParseCommandLine(int argc, char **argv)
MPI_Comm mpi_comm_world
Definition: utilities.c:62
int preallocate
Definition: ior.h:121
int deadlineForStonewalling
Definition: ior.h:131
char * api
Definition: ior.h:87
#define FILENAME_DELIMITER
Definition: iordef.h:117
int repCounter
Definition: ior.h:98
FILE * out_logfile
Definition: utilities.c:63
long long int IOR_offset_t
Definition: iordef.h:123
#define IOR_IWUSR
Definition: aiori.h:45
double readTime
Definition: ior.h:209
static IOR_offset_t * GetOffsetArraySequential(IOR_param_t *test, int pretendRank)
Definition: ior.c:1659
int rank
Definition: utilities.c:57
int numTasks
IOR_offset_t blockSize
Definition: ior.h:117
#define TRUE
Definition: iordef.h:75
IOR_offset_t(* xfer)(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *)
Definition: aiori.h:70
int lustre_set_striping
Definition: ior.h:190
void ShowTestEnd(IOR_test_t *tptr)
Definition: ior-output.c:388
const char * hdfs_name_node
Definition: ior.h:169
int beegfs_chunkSize
Definition: ior.h:199
#define NULL
Definition: iordef.h:79
int id
Definition: ior.h:201
void AllocResults(IOR_test_t *test)
Definition: ior.c:502