IOR
md-workbench.c
Go to the documentation of this file.
1 #define _POSIX_C_SOURCE 199309L
2 #include <mpi.h>
3 
4 #include <time.h>
5 #include <stdio.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <math.h>
10 #include <assert.h>
11 
12 #include "md-workbench.h"
13 #include "config.h"
14 #include "aiori.h"
15 #include "utilities.h"
16 #include "parse_options.h"
17 
18 /*
19 This is the modified version md-workbench-fs that can utilize AIORI.
20 It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics.
21  */
22 
23 #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH
24 
25 #define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);}
26 #define LLU (long long unsigned)
27 #define min(a,b) (a < b ? a : b)
28 
29 #define oprintf(...) do { fprintf(o.logfile, __VA_ARGS__); fflush(o.logfile); } while(0);
30 
31 // successfull, errors
32 typedef struct {
33  int suc;
34  int err;
35 } op_stat_t;
36 
37 // A runtime for an operation and when the operation was started
38 typedef struct{
40  float runtime;
42 
43 
44 // statistics for running a single phase
45 typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!!
46  double t; // maximum time
47  double * t_all;
48 
51 
56 
57  // time measurements of individual runs, these are not returned for now by the API!
58  uint64_t repeats;
63 
68 
69  // the maximum time for any single operation
70  double max_op_time;
73 } phase_stat_t;
74 
79  MPI_Comm com;
80  FILE * logfile;
81 
82  char * interface;
83  int num;
84  int precreate;
86 
87  mdworkbench_results_t * results; // the results
88 
90  char * packetTypeStr;
91  int offset;
94  int file_size;
95  int read_only;
98  ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */
99  int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */
100  int gpuID; /* the GPU to use for gpuDirect or memory options */
101 
104 
108 
109  //int limit_memory;
110  //int limit_memory_between_phases;
111 
114 
117 
119  char * prefix; // directory to work on
120 
122  int rank;
123  int size;
126 
129 
131 };
132 
134 
135 static void def_dset_name(char * out_name, int n, int d){
136  sprintf(out_name, "%s/%d_%d", o.prefix, n, d);
137 }
138 
139 static void def_obj_name(char * out_name, int n, int d, int i){
140  sprintf(out_name, "%s/%d_%d/file-%d", o.prefix, n, d, i);
141 }
142 
144  o = (struct benchmark_options){
145  .interface = "POSIX",
146  .prefix = "./out",
147  .num = 1000,
148  .random_seed = -1,
149  .precreate = 3000,
150  .dset_count = 10,
151  .offset = 1,
152  .iterations = 3,
153  .file_size = 3901,
154  .packetTypeStr = "t",
155  .run_info_file = "md-workbench.status",
156  .gpuID = -1,
157  };
158 }
159 
160 static void mdw_wait(double runtime){
161  double waittime = runtime * o.relative_waiting_factor;
162  //printf("waittime: %e\n", waittime);
163  if(waittime < 0.01){
164  double start;
165  start = GetTimeStamp();
166  double cur = GetTimeStamp();
167  double end = cur + waittime;
168  while (cur < end){
169  cur = GetTimeStamp();
170  }
171  }else{
172  struct timespec w;
173  w.tv_sec = (time_t) (waittime);
174  w.tv_nsec = (long) ((waittime - w.tv_sec) * 1000 * 1000 * 1000);
175  nanosleep(& w, NULL);
176  }
177 }
178 
179 static void init_stats(phase_stat_t * p, size_t repeats){
180  memset(p, 0, sizeof(phase_stat_t));
181  p->repeats = repeats;
182  size_t timer_size = repeats * sizeof(time_result_t);
183  p->time_create = (time_result_t *) malloc(timer_size);
184  p->time_read = (time_result_t *) malloc(timer_size);
185  p->time_stat = (time_result_t *) malloc(timer_size);
186  p->time_delete = (time_result_t *) malloc(timer_size);
187 }
188 
189 static float add_timed_result(double start, double phase_start_timer, time_result_t * results, size_t pos, double * max_time, double * out_op_time){
190  float curtime = start - phase_start_timer;
191  double op_time = GetTimeStamp() - start;
192  results[pos].runtime = (float) op_time;
193  results[pos].time_since_app_start = curtime;
194  if (op_time > *max_time){
195  *max_time = op_time;
196  }
197  *out_op_time = op_time;
198  return curtime;
199 }
200 
202  printf("phase\t\td name\tcreate\tdelete\tob nam\tcreate\tread\tstat\tdelete\tt_inc_b\tt_no_bar\tthp\tmax_t\n");
203 }
204 
205 static int sum_err(phase_stat_t * p){
206  return p->dset_create.err + p->dset_delete.err + p->obj_create.err + p->obj_read.err + p->obj_stat.err + p->obj_delete.err;
207 }
208 
209 static double statistics_mean(int count, double * arr){
210  double sum = 0;
211  for(int i=0; i < o.size; i++){
212  sum += arr[i];
213  }
214  return sum / o.size;
215 }
216 
217 static double statistics_std_dev(int count, double * arr){
218  double mean = statistics_mean(count, arr);
219  double sum = 0;
220  for(int i=0; i < o.size; i++){
221  sum += (mean - arr[i])*(mean - arr[i]);
222  }
223  return sqrt(sum / (o.size-1));
224 }
225 
226 static void statistics_minmax(int count, double * arr, double * out_min, double * out_max){
227  double min = 1e308;
228  double max = 0;
229  for(int i=0; i < o.size; i++){
230  min = (arr[i] < min) ? arr[i] : min;
231  max = (arr[i] > max) ? arr[i] : max;
232  }
233  *out_min = min;
234  *out_max = max;
235 }
236 
237 static void print_p_stat(char * buff, const char * name, phase_stat_t * p, double t, int print_global){
238  const double tp = (double)(p->obj_create.suc + p->obj_read.suc) * o.file_size / t / 1024 / 1024;
239 
240  const int errs = sum_err(p);
241  double r_min = 0;
242  double r_max = 0;
243  double r_mean = 0;
244  double r_std = 0;
245 
246  if(p->t_all){
247  // we can compute several derived values that provide insight about quality of service, latency distribution and load balancing
248  statistics_minmax(o.size, p->t_all, & r_min, & r_max);
249  r_mean = statistics_mean(o.size, p->t_all);
250  r_std = statistics_std_dev(o.size, p->t_all);
251  }
252 
253  if (o.print_detailed_stats){
254  sprintf(buff, "%s \t%d\t%d\t%d\t%d\t%d\t%d\t%.3fs\t%.3fs\t%.2f MiB/s %.4e", name, p->dset_create.suc, p->dset_delete.suc, p->obj_create.suc, p->obj_read.suc, p->obj_stat.suc, p->obj_delete.suc, p->t, t, tp, p->max_op_time);
255 
256  if (errs > 0){
257  sprintf(buff, "%s err\t%d\t%d\t%d\t%d\t%d\t%d", name, p->dset_create.err, p->dset_delete.err, p->obj_create.err, p->obj_read.err, p->obj_stat.err, p->obj_delete.err);
258  }
259  }else{
260  int pos = 0;
261  // single line
262  pos += sprintf(buff, "%s process max:%.2fs ", name, t);
263  if(print_global){
264  pos += sprintf(buff + pos, "min:%.2fs mean: %.2fs balance:%.1f stddev:%.1f ", r_min, r_mean, r_min/r_max * 100.0, r_std);
265  }
266  int ioops_per_iter = 4;
267  if(o.read_only){
268  ioops_per_iter = 2;
269  }
270 
271  double rate;
272 
273  switch(name[0]){
274  case('b'):
275  rate = p->obj_read.suc * ioops_per_iter / t;
276  pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es",
277  rate, // write, stat, read, delete
278  p->obj_read.suc,
279  p->obj_read.suc / t,
280  tp,
281  p->max_op_time);
282 
283  if(o.relative_waiting_factor > 1e-9){
284  pos += sprintf(buff + pos, " waiting_factor:%.2f", o.relative_waiting_factor);
285  }
286  break;
287  case('p'):
288  rate = (p->dset_create.suc + p->obj_create.suc) / t;
289  pos += sprintf(buff + pos, "rate:%.1f iops/s dsets: %d objects:%d rate:%.3f dset/s rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es",
290  rate,
291  p->dset_create.suc,
292  p->obj_create.suc,
293  p->dset_create.suc / t,
294  p->obj_create.suc / t,
295  tp,
296  p->max_op_time);
297  break;
298  case('c'):
299  rate = (p->obj_delete.suc + p->dset_delete.suc) / t;
300  pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d dsets: %d rate:%.1f obj/s rate:%.3f dset/s op-max:%.4es",
301  rate,
302  p->obj_delete.suc,
303  p->dset_delete.suc,
304  p->obj_delete.suc / t,
305  p->dset_delete.suc / t,
306  p->max_op_time);
307  break;
308  default:
309  pos = sprintf(buff, "%s: unknown phase", name);
310  break;
311  }
312 
313  if(print_global){
315  res->errors = errs;
316  o.results->errors += errs;
317  res->rate = rate;
318  res->max_op_time = p->max_op_time;
319  res->runtime = t;
320  res->iterations_done = p->repeats;
321  }
322 
323  if(! o.quiet_output || errs > 0){
324  pos += sprintf(buff + pos, " (%d errs", errs);
325  if(errs > 0){
326  pos += sprintf(buff + pos, "!!!)" );
327  }else{
328  pos += sprintf(buff + pos, ")" );
329  }
330  }
331  if(! o.quiet_output && p->stonewall_iterations){
332  pos += sprintf(buff + pos, " stonewall-iter:%d", p->stonewall_iterations);
333  }
334 
335  if(p->stats_read.max > 1e-9){
336  time_statistics_t stat = p->stats_read;
337  pos += sprintf(buff + pos, " read(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max);
338  }
339  if(p->stats_stat.max > 1e-9){
340  time_statistics_t stat = p->stats_stat;
341  pos += sprintf(buff + pos, " stat(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max);
342  }
343  if(p->stats_create.max > 1e-9){
345  pos += sprintf(buff + pos, " create(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max);
346  }
347  if(p->stats_delete.max > 1e-9){
349  pos += sprintf(buff + pos, " delete(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max);
350  }
351  }
352 }
353 
355  return x->runtime < y->runtime ? -1 : (x->runtime > y->runtime ? +1 : 0);
356 }
357 
358 static double runtime_quantile(int repeats, time_result_t * times, float quantile){
359  int pos = round(quantile * (repeats - 1) + 0.49);
360  assert(pos < repeats);
361  return times[pos].runtime;
362 }
363 
364 static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * times, time_result_t * global_times){
365  uint64_t count = 0;
366  int ret;
367  // due to stonewall, the number of repeats may be different per process
368  if(o.rank == 0){
369  MPI_Status status;
370  memcpy(global_times, times, repeats * 2 * sizeof(float));
371  count += repeats;
372  for(int i=1; i < o.size; i++){
373  int cnt;
374  ret = MPI_Recv(& global_times[count], max_repeats*2, MPI_FLOAT, i, 888, o.com, & status);
375  CHECK_MPI_RET(ret)
376  MPI_Get_count(& status, MPI_FLOAT, & cnt);
377  count += cnt / 2;
378  }
379  }else{
380  ret = MPI_Send(times, repeats * 2, MPI_FLOAT, 0, 888, o.com);
381  CHECK_MPI_RET(ret)
382  }
383 
384  return count;
385 }
386 
387 static void compute_histogram(const char * name, time_result_t * times, time_statistics_t * stats, size_t repeats, int writeLatencyFile){
388  if(writeLatencyFile && o.latency_file_prefix ){
389  char file[MAX_PATHLEN];
390  sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name);
391  FILE * f = fopen(file, "w+");
392  if(f == NULL){
393  ERRF("%d: Error writing to latency file: %s", o.rank, file);
394  return;
395  }
396  fprintf(f, "time,runtime\n");
397  for(size_t i = 0; i < repeats; i++){
398  fprintf(f, "%.7f,%.4e\n", times[i].time_since_app_start, times[i].runtime);
399  }
400  fclose(f);
401  }
402  // now sort the times and pick the quantiles
403  qsort(times, repeats, sizeof(time_result_t), (int (*)(const void *, const void *)) compare_floats);
404  stats->min = times[0].runtime;
405  stats->q1 = runtime_quantile(repeats, times, 0.25);
406  if(repeats % 2 == 0){
407  stats->median = (times[repeats/2].runtime + times[repeats/2 - 1].runtime)/2.0;
408  }else{
409  stats->median = times[repeats/2].runtime;
410  }
411  stats->q3 = runtime_quantile(repeats, times, 0.75);
412  stats->q90 = runtime_quantile(repeats, times, 0.90);
413  stats->q99 = runtime_quantile(repeats, times, 0.99);
414  stats->max = times[repeats - 1].runtime;
415 }
416 
417 static void end_phase(const char * name, phase_stat_t * p){
418  int ret;
419  char buff[MAX_PATHLEN];
420 
421  //char * limit_memory_P = NULL;
422  MPI_Barrier(o.com);
423 
424  int max_repeats = o.precreate * o.dset_count;
425  if(strcmp(name,"benchmark") == 0){
426  max_repeats = o.num * o.dset_count;
427  }
428 
429  // prepare the summarized report
430  phase_stat_t g_stat;
431  init_stats(& g_stat, (o.rank == 0 ? 1 : 0) * ((size_t) max_repeats) * o.size);
432  // reduce timers
433  ret = MPI_Reduce(& p->t, & g_stat.t, 2, MPI_DOUBLE, MPI_MAX, 0, o.com);
434  CHECK_MPI_RET(ret)
435  if(o.rank == 0) {
436  g_stat.t_all = (double*) malloc(sizeof(double) * o.size);
437  }
438  ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, o.com);
439  CHECK_MPI_RET(ret)
440  ret = MPI_Reduce(& p->dset_create, & g_stat.dset_create, 2*(2+4), MPI_INT, MPI_SUM, 0, o.com);
441  CHECK_MPI_RET(ret)
442  ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, o.com);
443  CHECK_MPI_RET(ret)
444  if( p->stonewall_iterations ){
445  ret = MPI_Reduce(& p->repeats, & g_stat.repeats, 1, MPI_UINT64_T, MPI_MIN, 0, o.com);
446  CHECK_MPI_RET(ret)
448  }
449  int write_rank0_latency_file = (o.rank == 0) && ! o.latency_keep_all;
450 
451  if(strcmp(name,"precreate") == 0){
452  uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create);
453  if(o.rank == 0){
454  compute_histogram("precreate-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all);
455  }
456  compute_histogram("precreate", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file);
457  }else if(strcmp(name,"cleanup") == 0){
458  uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete);
459  if(o.rank == 0) {
460  compute_histogram("cleanup-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all);
461  }
462  compute_histogram("cleanup", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file);
463  }else if(strcmp(name,"benchmark") == 0){
464  uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_read, g_stat.time_read);
465  if(o.rank == 0) {
466  compute_histogram("read-all", g_stat.time_read, & g_stat.stats_read, repeats, o.latency_keep_all);
467  }
468  compute_histogram("read", p->time_read, & p->stats_read, p->repeats, write_rank0_latency_file);
469 
470  repeats = aggregate_timers(p->repeats, max_repeats, p->time_stat, g_stat.time_stat);
471  if(o.rank == 0) {
472  compute_histogram("stat-all", g_stat.time_stat, & g_stat.stats_stat, repeats, o.latency_keep_all);
473  }
474  compute_histogram("stat", p->time_stat, & p->stats_stat, p->repeats, write_rank0_latency_file);
475 
476  if(! o.read_only){
477  repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create);
478  if(o.rank == 0) {
479  compute_histogram("create-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all);
480  }
481  compute_histogram("create", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file);
482 
483  repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete);
484  if(o.rank == 0) {
485  compute_histogram("delete-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all);
486  }
487  compute_histogram("delete", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file);
488  }
489  }
490 
491  if (o.rank == 0){
492  //print the stats:
493  print_p_stat(buff, name, & g_stat, g_stat.t, 1);
494  oprintf("%s\n", buff);
495  }
496 
497  if(o.process_report){
498  if(o.rank == 0){
499  print_p_stat(buff, name, p, p->t, 0);
500  oprintf("0: %s\n", buff);
501  for(int i=1; i < o.size; i++){
502  MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, o.com, MPI_STATUS_IGNORE);
503  oprintf("%d: %s\n", i, buff);
504  }
505  }else{
506  print_p_stat(buff, name, p, p->t, 0);
507  MPI_Send(buff, MAX_PATHLEN, MPI_CHAR, 0, 4711, o.com);
508  }
509  }
510 
511  if(g_stat.t_all){
512  free(g_stat.t_all);
513  }
514  if(p->time_create){
515  free(p->time_create);
516  free(p->time_read);
517  free(p->time_stat);
518  free(p->time_delete);
519  }
520  if(g_stat.time_create){
521  free(g_stat.time_create);
522  free(g_stat.time_read);
523  free(g_stat.time_stat);
524  free(g_stat.time_delete);
525  }
526 
527  // copy the result back for the API
529  memcpy(& res->stats_create, & g_stat.stats_create, sizeof(time_statistics_t));
530  memcpy(& res->stats_read, & g_stat.stats_read, sizeof(time_statistics_t));
531  memcpy(& res->stats_stat, & g_stat.stats_stat, sizeof(time_statistics_t));
532  memcpy(& res->stats_delete, & g_stat.stats_delete, sizeof(time_statistics_t));
533 
534  o.results->count++;
535 
536  // allocate memory if necessary
537  // ret = mem_preallocate(& limit_memory_P, o.limit_memory_between_phases, o.verbosity >= 3);
538  // if( ret != 0){
539  // printf("%d: Error allocating memory!\n", o.rank);
540  // }
541  // mem_free_preallocated(& limit_memory_P);
542 }
543 
544 void run_precreate(phase_stat_t * s, int current_index){
545  char dset[MAX_PATHLEN];
546  char obj_name[MAX_PATHLEN];
547  int ret;
548 
549  for(int i=0; i < o.dset_count; i++){
550  def_dset_name(dset, o.rank, i);
551 
552  ret = o.backend->mkdir(dset, DIRMODE, o.backend_options);
553  if (ret == 0){
554  s->dset_create.suc++;
555  }else{
556  s->dset_create.err++;
557  if (! o.ignore_precreate_errors){
558  ERRF("%d: Error while creating the dset: %s", o.rank, dset);
559  }
560  }
561  }
562 
565  double op_timer; // timer for individual operations
566  size_t pos = -1; // position inside the individual measurement array
567  double op_time;
568 
569  // create the obj
570  for(int f=current_index; f < o.precreate; f++){
571  for(int d=0; d < o.dset_count; d++){
572  pos++;
573  def_obj_name(obj_name, o.rank, d, f);
574 
575  op_timer = GetTimeStamp();
576  aiori_fd_t * aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options);
577  if (NULL == aiori_fh){
578  FAIL("Unable to open file %s", obj_name);
579  }
581  if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
582  s->obj_create.suc++;
583  }else{
584  s->obj_create.err++;
585  if (! o.ignore_precreate_errors){
586  ERRF("%d: Error while creating the obj: %s", o.rank, obj_name);
587  }
588  }
589  o.backend->close(aiori_fh, o.backend_options);
590 
591  add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time);
592 
593  if (o.verbosity >= 2){
594  oprintf("%d: write %s:%s (%d) pretend: %d\n", o.rank, dset, obj_name, ret, o.rank);
595  }
596  }
597  }
599 }
600 
601 /* FIFO: create a new file, write to it. Then read from the first created file, delete it... */
602 void run_benchmark(phase_stat_t * s, int * current_index_p){
603  char obj_name[MAX_PATHLEN];
604  int ret;
607  double op_timer; // timer for individual operations
608  size_t pos = -1; // position inside the individual measurement array
609  int start_index = *current_index_p;
610  int total_num = o.num;
611  int armed_stone_wall = (o.stonewall_timer > 0);
612  int f;
613  double phase_allreduce_time = 0;
614  aiori_fd_t * aiori_fh;
615 
616  for(f=0; f < total_num; f++){
617  float bench_runtime = 0; // the time since start
618  for(int d=0; d < o.dset_count; d++){
619  double op_time;
620  struct stat stat_buf;
621  const int prevFile = f + start_index;
622  pos++;
623 
624  int readRank = (o.rank - o.offset * (d+1)) % o.size;
625  readRank = readRank < 0 ? readRank + o.size : readRank;
626  def_obj_name(obj_name, readRank, d, prevFile);
627 
628  op_timer = GetTimeStamp();
629 
630  ret = o.backend->stat(obj_name, & stat_buf, o.backend_options);
631  // TODO potentially check return value must be identical to o.file_size
632 
633  bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time);
634  if(o.relative_waiting_factor > 1e-9) {
635  mdw_wait(op_time);
636  }
637 
638  if (o.verbosity >= 2){
639  oprintf("%d: stat %s (%d)\n", o.rank, obj_name, ret);
640  }
641 
642  if(ret != 0){
643  if (o.verbosity)
644  ERRF("%d: Error while stating the obj: %s", o.rank, obj_name);
645  s->obj_stat.err++;
646  continue;
647  }
648  s->obj_stat.suc++;
649 
650  if (o.verbosity >= 2){
651  oprintf("%d: read %s pretend: %d\n", o.rank, obj_name, readRank);
652  }
653 
654  op_timer = GetTimeStamp();
655  aiori_fh = o.backend->open(obj_name, IOR_RDONLY, o.backend_options);
656  if (NULL == aiori_fh){
657  FAIL("Unable to open file %s", obj_name);
658  }
659  if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) {
660  if(o.verify_read){
661  if(verify_memory_pattern(prevFile * o.dset_count + d, buf, o.file_size, o.random_seed, readRank, o.dataPacketType, o.gpuMemoryFlags) == 0){
662  s->obj_read.suc++;
663  }else{
664  s->obj_read.err++;
665  }
666  }else{
667  s->obj_read.suc++;
668  }
669  }else{
670  s->obj_read.err++;
671  WARNF("%d: Error while reading the obj: %s", o.rank, obj_name);
672  }
673  o.backend->close(aiori_fh, o.backend_options);
674 
675  bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time);
676  if(o.relative_waiting_factor > 1e-9) {
677  mdw_wait(op_time);
678  }
679  if(o.read_only){
680  continue;
681  }
682 
683  op_timer = GetTimeStamp();
684  o.backend->delete(obj_name, o.backend_options);
685  bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time);
686  if(o.relative_waiting_factor > 1e-9) {
687  mdw_wait(op_time);
688  }
689 
690  if (o.verbosity >= 2){
691  oprintf("%d: delete %s\n", o.rank, obj_name);
692  }
693  s->obj_delete.suc++;
694 
695  int writeRank = (o.rank + o.offset * (d+1)) % o.size;
696  const int newFileIndex = o.precreate + prevFile;
697  def_obj_name(obj_name, writeRank, d, newFileIndex);
698 
699  op_timer = GetTimeStamp();
700  aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options);
701  if (NULL != aiori_fh){
704 
705  if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
706  s->obj_create.suc++;
707  }else{
708  s->obj_create.err++;
709  if (! o.ignore_precreate_errors){
710  ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
711  }
712  }
713  o.backend->close(aiori_fh, o.backend_options);
714  }else{
715  if (! o.ignore_precreate_errors){
716  ERRF("%d: Error while creating the obj: %s", o.rank, obj_name);
717  }
718  WARNF("Unable to open file %s", obj_name);
719  s->obj_create.err++;
720  }
721  bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time);
722  if(o.relative_waiting_factor > 1e-9) {
723  mdw_wait(op_time);
724  }
725 
726  if (o.verbosity >= 2){
727  oprintf("%d: write %s (%d) pretend: %d\n", o.rank, obj_name, ret, writeRank);
728  }
729  } // end loop
730 
731  if(armed_stone_wall && bench_runtime >= o.stonewall_timer){
732  if(o.verbosity){
733  oprintf("%d: stonewall runtime %fs (%ds)\n", o.rank, bench_runtime, o.stonewall_timer);
734  }
736  s->stonewall_iterations = f;
737  break;
738  }
739  armed_stone_wall = 0;
740  // wear out mode, now reduce the maximum
741  int cur_pos = f + 1;
742  phase_allreduce_time = GetTimeStamp() - s->phase_start_timer;
743  int ret = MPI_Allreduce(& cur_pos, & total_num, 1, MPI_INT, MPI_MAX, o.com);
744  CHECK_MPI_RET(ret)
746  s->stonewall_iterations = total_num;
747  if(o.rank == 0){
748  oprintf("stonewall wear out %fs (%d iter)\n", bench_runtime, total_num);
749  }
750  if(f == total_num){
751  break;
752  }
753  }
754  }
755  s->t = GetTimeStamp() - s->phase_start_timer + phase_allreduce_time;
756  if(armed_stone_wall && o.stonewall_timer_wear_out){
757  int f = total_num;
758  int ret = MPI_Allreduce(& f, & total_num, 1, MPI_INT, MPI_MAX, o.com);
759  CHECK_MPI_RET(ret)
760  s->stonewall_iterations = total_num;
761  }
763  // TODO FIXME
764  int sh = s->stonewall_iterations;
765  int ret = MPI_Allreduce(& sh, & s->stonewall_iterations, 1, MPI_INT, MPI_MAX, o.com);
766  CHECK_MPI_RET(ret)
767  }
768 
769  if(! o.read_only) {
770  *current_index_p += f;
771  }
772  s->repeats = pos + 1;
774 }
775 
776 void run_cleanup(phase_stat_t * s, int start_index){
777  char dset[MAX_PATHLEN];
778  char obj_name[MAX_PATHLEN];
779  double op_timer; // timer for individual operations
780  size_t pos = -1; // position inside the individual measurement array
781 
782  for(int d=0; d < o.dset_count; d++){
783  for(int f=0; f < o.precreate; f++){
784  double op_time;
785  pos++;
786  def_obj_name(obj_name, o.rank, d, f + start_index);
787 
788  op_timer = GetTimeStamp();
789  o.backend->delete(obj_name, o.backend_options);
790  add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time);
791 
792  if (o.verbosity >= 2){
793  oprintf("%d: delete %s\n", o.rank, obj_name);
794  }
795  s->obj_delete.suc++;
796  }
797 
798  def_dset_name(dset, o.rank, d);
799  if (o.backend->rmdir(dset, o.backend_options) == 0) {
800  s->dset_delete.suc++;
801  }else{
802  oprintf("Unable to remove directory %s\n", dset);
803  }
804  if (o.verbosity >= 2){
805  oprintf("%d: delete dset %s\n", o.rank, dset);
806  }
807  }
808 }
809 
810 
811 static option_help options [] = {
812  {'O', "offset", "Offset in o.ranks between writers and readers. Writers and readers should be located on different nodes.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.offset},
813  {'a', "api", "The API (plugin) to use for the benchmark, use list to show all compiled plugins.", OPTION_OPTIONAL_ARGUMENT, 's', & o.interface},
814  {'I', "obj-per-proc", "Number of I/O operations per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.num},
815  {'L', "latency", "Measure the latency for individual operations, prefix the result files with the provided filename.", OPTION_OPTIONAL_ARGUMENT, 's', & o.latency_file_prefix},
816  {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all},
817  {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate},
818  {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count},
819  {'G', NULL, "Timestamp/Random seed for access pattern, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed},
820  {'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix},
821  {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output},
822  //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory},
823  // {'M', "lim-free-mem-phase", "Allocate memory until this limit (in MiB) is reached between the phases, but free it before starting the next phase; the time is NOT included for the phase.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory_between_phases},
824  {'S', "object-size", "Size for the created objects.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.file_size},
825  {'R', "iterations", "Number of times to rerun the main phase", OPTION_OPTIONAL_ARGUMENT, 'd', & o.iterations},
826  {'t', "waiting-time", "Waiting time relative to runtime (1.0 is 100%%)", OPTION_OPTIONAL_ARGUMENT, 'f', & o.relative_waiting_factor},
827  {'T', "adaptive-waiting", "Compute an adaptive waiting time", OPTION_FLAG, 'd', & o.adaptive_waiting_mode},
828  {'1', "run-precreate", "Run precreate phase", OPTION_FLAG, 'd', & o.phase_precreate},
829  {'2', "run-benchmark", "Run benchmark phase", OPTION_FLAG, 'd', & o.phase_benchmark},
830  {'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup},
831  {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer},
832  {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out},
833  {'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read},
834  {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|random|o|i|t|r]", OPTION_OPTIONAL_ARGUMENT, 's', & o.packetTypeStr},
835 #ifdef HAVE_CUDA
836  {0, "allocateBufferOnGPU", "Allocate I/O buffers on the GPU: X=1 uses managed memory - verifications are run on CPU; X=2 managed memory - verifications on GPU; X=3 device memory with verifications on GPU.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.gpuMemoryFlags},
837  {0, "GPUid", "Select the GPU to use, use -1 for round-robin among local procs.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.gpuID},
838 #ifdef HAVE_GPU_DIRECT
839  {0, "gpuDirect", "Allocate I/O buffers on the GPU and use gpuDirect to store data; this option is incompatible with any option requiring CPU access to data.", OPTION_FLAG, 'd', & o.gpuDirect},
840 #endif
841 #endif
842  {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number},
843  {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats},
844  {0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only},
845  {0, "ignore-precreate-errors", "Ignore errors occuring during the pre-creation phase", OPTION_FLAG, 'd', & o.ignore_precreate_errors},
846  {0, "process-reports", "Independent report per process/rank", OPTION_FLAG, 'd', & o.process_report},
847  {'v', "verbose", "Increase the verbosity level", OPTION_FLAG, 'd', & o.verbosity},
848  {0, "run-info-file", "The log file for resuming a previous run", OPTION_OPTIONAL_ARGUMENT, 's', & o.run_info_file},
850  };
851 
852 static void printTime(){
853  char buff[100];
854  time_t now = time(0);
855  strftime (buff, 100, "%Y-%m-%d %H:%M:%S", localtime (&now));
856  oprintf("%s\n", buff);
857 }
858 
859 static int return_position(){
860  int position, ret;
861  if( o.rank == 0){
862  FILE * f = fopen(o.run_info_file, "r");
863  if(! f){
864  ERRF("[ERROR] Could not open %s for restart", o.run_info_file);
865  exit(EXIT_FAILURE);
866  }
867  ret = fscanf(f, "pos: %d", & position);
868  if (ret != 1){
869  ERRF("Could not read from %s for restart", o.run_info_file);
870  exit(EXIT_FAILURE);
871  }
872  fclose(f);
873  }
874  ret = MPI_Bcast( & position, 1, MPI_INT, 0, o.com );
875  return position;
876 }
877 
878 static void store_position(int position){
879  if (o.rank != 0){
880  return;
881  }
882  FILE * f = fopen(o.run_info_file, "w");
883  if(! f){
884  ERRF("[ERROR] Could not open %s for saving data", o.run_info_file);
885  exit(EXIT_FAILURE);
886  }
887  fprintf(f, "pos: %d\n", position);
888  fclose(f);
889 }
890 
891 mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){
892  int ret;
893  int printhelp = 0;
894  char * limit_memory_P = NULL;
895  init_options();
896  init_clock(world_com);
897 
898  o.com = world_com;
900 
901  MPI_Comm_rank(o.com, & o.rank);
902  MPI_Comm_size(o.com, & o.size);
903 
904  if (o.rank == 0 && ! o.quiet_output){
905  oprintf("Args: %s", argv[0]);
906  for(int i=1; i < argc; i++){
907  oprintf(" \"%s\"", argv[i]);
908  }
909  oprintf("\n");
910  }
911 
912  memset(& o.hints, 0, sizeof(o.hints));
913  o.hints.filePerProc = 1;
914 
916  int parsed = option_parse(argc, argv, global_options);
918  if (o.backend == NULL){
919  ERR("Unrecognized I/O API");
920  }
921  if (! o.backend->enable_mdtest){
922  ERR("Backend doesn't support MDWorbench");
923  }
925 
927 
929  // enable all phases
931  }
933  if(o.rank == 0){
934  WARN("Dangerous option combination: and benchmark phase (-2) using with stonewall option (-w) without stonewall wear-out will lead to files that cannot be cleaned up using the cleanup phase(-3). Also multiple iterations are problematic.");
935  }
936  }
937  if( o.random_seed == -1 ){
938  o.random_seed = time(NULL);
939  MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, o.com);
940  }
941 
942  if(o.backend->xfer_hints){
943  o.backend->xfer_hints(& o.hints);
944  }
945  if(o.backend->check_params){
947  }
948  if (o.backend->initialize){
950  }
951 
952  int tasksBlockMapping = QueryNodeMapping(o.com, true);
953  int numNodes = GetNumNodes(o.com);
954  int numTasksOnNode0 = GetNumTasksOnNode0(o.com);
956  initCUDA(tasksBlockMapping, o.rank, numNodes, numTasksOnNode0, o.gpuID);
957  }
958 
959  int current_index = 0;
961  current_index = return_position();
962  }
963 
964  if(o.start_item_number){
965  oprintf("Using start position %lld\n", (long long) o.start_item_number);
966  current_index = o.start_item_number;
967  }
968 
969  size_t total_obj_count = o.dset_count * (size_t) (o.num * o.iterations + o.precreate) * o.size;
970  if (o.rank == 0 && ! o.quiet_output){
971  oprintf("MD-Workbench total objects: %zu workingset size: %.3f MiB (version: %s) time: ", total_obj_count, ((double) o.size) * o.dset_count * o.precreate * o.file_size / 1024.0 / 1024.0, PACKAGE_VERSION);
972  printTime();
973  if(o.num > o.precreate){
974  oprintf("WARNING: num > precreate, this may cause the situation that no objects are available to read\n");
975  }
976  }
977 
978  if ( o.rank == 0 && ! o.quiet_output ){
979  // print the set output options
980  // option_print_current(options);
981  // oprintf("\n");
982  }
983 
984  // preallocate memory if necessary
985  //ret = mem_preallocate(& limit_memory_P, o.limit_memory, o.verbosity >= 3);
986  //if(ret != 0){
987  // printf("%d: Error allocating memory\n", o.rank);
988  // MPI_Abort(o.com, 1);
989  //}
990 
991  double t_bench_start;
992  t_bench_start = GetTimeStamp();
993  phase_stat_t phase_stats;
994  size_t result_count = (2 + o.iterations) * (o.adaptive_waiting_mode ? 7 : 1);
995  o.results = malloc(sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count);
996  memset(o.results, 0, sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count);
997  o.results->count = 0;
998 
999  if(o.rank == 0 && o.print_detailed_stats && ! o.quiet_output){
1001  }
1002 
1003  if (o.phase_precreate){
1004  if (o.rank == 0){
1005  if (o.backend->mkdir(o.prefix, DIRMODE, o.backend_options) != 0) {
1006  WARNF("Unable to create test directory %s", o.prefix);
1007  }
1008  }
1009  init_stats(& phase_stats, o.precreate * o.dset_count);
1010  MPI_Barrier(o.com);
1011 
1012  // pre-creation phase
1013  phase_stats.phase_start_timer = GetTimeStamp();
1014  run_precreate(& phase_stats, current_index);
1015  phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer;
1016  end_phase("precreate", & phase_stats);
1017  }
1018 
1019  if (o.phase_benchmark){
1020  // benchmark phase
1024  }
1025  init_stats(& phase_stats, o.num * o.dset_count);
1026  MPI_Barrier(o.com);
1027  phase_stats.phase_start_timer = GetTimeStamp();
1028  run_benchmark(& phase_stats, & current_index);
1029  end_phase("benchmark", & phase_stats);
1030 
1032  o.relative_waiting_factor = 0.0625;
1033  for(int r=0; r <= 6; r++){
1034  init_stats(& phase_stats, o.num * o.dset_count);
1035  MPI_Barrier(o.com);
1036  phase_stats.phase_start_timer = GetTimeStamp();
1037  run_benchmark(& phase_stats, & current_index);
1038  end_phase("benchmark", & phase_stats);
1040  }
1041  }
1042  }
1043  }
1044 
1045  // cleanup phase
1046  if (o.phase_cleanup){
1047  init_stats(& phase_stats, o.precreate * o.dset_count);
1048  phase_stats.phase_start_timer = GetTimeStamp();
1049  run_cleanup(& phase_stats, current_index);
1050  phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer;
1051  end_phase("cleanup", & phase_stats);
1052 
1053  if (o.rank == 0){
1054  if (o.backend->rmdir(o.prefix, o.backend_options) != 0) {
1055  oprintf("Unable to remove directory %s\n", o.prefix);
1056  }
1057  }
1058  }else{
1059  store_position(current_index);
1060  }
1061 
1062  double t_all = GetTimeStamp() - t_bench_start;
1063  if(o.backend->finalize){
1065  }
1066  if (o.rank == 0 && ! o.quiet_output){
1067  oprintf("Total runtime: %.0fs time: ", t_all);
1068  printTime();
1069  }
1070  //mem_free_preallocated(& limit_memory_P);
1071  return o.results;
1072 }
void invalidate_buffer_pattern(char *buffer, size_t bytes, ior_memory_flags type)
Definition: utilities.c:172
#define ERRF(FORMAT,...)
Definition: aiori-debug.h:67
mdworkbench_results_t * md_workbench_run(int argc, char **argv, MPI_Comm world_com, FILE *out_logfile)
Definition: md-workbench.c:891
void run_benchmark(phase_stat_t *s, int *current_index_p)
Definition: md-workbench.c:602
static void def_dset_name(char *out_name, int n, int d)
Definition: md-workbench.c:135
time_statistics_t stats_read
Definition: md-workbench.c:65
time_statistics_t stats_create
Definition: md-workbench.c:64
static int sum_err(phase_stat_t *p)
Definition: md-workbench.c:205
static float add_timed_result(double start, double phase_start_timer, time_result_t *results, size_t pos, double *max_time, double *out_op_time)
Definition: md-workbench.c:189
#define LAST_OPTION
Definition: option.h:39
op_stat_t obj_read
Definition: md-workbench.c:53
void * airoi_update_module_options(const ior_aiori_t *backend, options_all_t *opt)
Definition: aiori.c:96
FILE * out_logfile
Definition: utilities.c:74
int option_parse(int argc, char **argv, options_all_t *opt_all)
Definition: option.c:414
float relative_waiting_factor
Definition: md-workbench.c:127
struct benchmark_options o
Definition: md-workbench.c:133
time_statistics_t stats_delete
Definition: md-workbench.c:67
static void statistics_minmax(int count, double *arr, double *out_min, double *out_max)
Definition: md-workbench.c:226
op_stat_t obj_stat
Definition: md-workbench.c:54
time_result_t * time_create
Definition: md-workbench.c:59
void run_precreate(phase_stat_t *s, int current_index)
Definition: md-workbench.c:544
void(* delete)(char *, aiori_mod_opt_t *module_options)
Definition: aiori.h:100
static void printTime()
Definition: md-workbench.c:852
op_stat_t obj_delete
Definition: md-workbench.c:55
int(* mkdir)(const char *path, mode_t mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:105
#define min(a, b)
Definition: md-workbench.c:27
double max_op_time
Definition: md-workbench.c:70
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
Definition: utilities.c:402
op_stat_t obj_create
Definition: md-workbench.c:52
#define FAIL(...)
Definition: aiori-debug.h:16
static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t *times, time_result_t *global_times)
Definition: md-workbench.c:364
time_statistics_t stats_stat
Definition: md-workbench.h:23
double * t_all
Definition: md-workbench.c:47
static void compute_histogram(const char *name, time_result_t *times, time_statistics_t *stats, size_t repeats, int writeLatencyFile)
Definition: md-workbench.c:387
#define IOR_RDONLY
Definition: aiori.h:28
int stonewall_timer_wear_out
Definition: md-workbench.c:97
#define WRITE
Definition: iordef.h:100
void * backend_options
Definition: md-workbench.c:77
ior_dataPacketType_e dataPacketType
Definition: md-workbench.c:89
static option_help options[]
Definition: md-workbench.c:811
static int compare_floats(time_result_t *x, time_result_t *y)
Definition: md-workbench.c:354
int(* rmdir)(const char *path, aiori_mod_opt_t *module_options)
Definition: aiori.h:106
#define READ
Definition: iordef.h:102
static double statistics_mean(int count, double *arr)
Definition: md-workbench.c:209
#define IOR_CREAT
Definition: aiori.h:32
const ior_aiori_t * aiori_select(const char *api)
Definition: aiori.c:240
time_result_t * time_stat
Definition: md-workbench.c:61
ior_dataPacketType_e parsePacketType(char t)
Definition: utilities.c:291
double phase_start_timer
Definition: md-workbench.c:71
int(* check_params)(aiori_mod_opt_t *)
Definition: aiori.h:113
void init_options()
Definition: md-workbench.c:143
static double statistics_std_dev(int count, double *arr)
Definition: md-workbench.c:217
void run_cleanup(phase_stat_t *s, int start_index)
Definition: md-workbench.c:776
uint64_t iterations_done
Definition: md-workbench.h:30
void init_clock(MPI_Comm com)
Definition: utilities.c:917
void(* initialize)(aiori_mod_opt_t *options)
Definition: aiori.h:109
ior_memory_flags gpuMemoryFlags
Definition: md-workbench.c:98
#define WARN(MSG)
Definition: aiori-debug.h:45
float time_since_app_start
Definition: md-workbench.c:39
#define DIRMODE
Definition: md-workbench.c:23
int GetNumNodes(MPI_Comm comm)
Definition: utilities.c:476
void initCUDA(int blockMapping, int rank, int numNodes, int tasksPerNode, int useGPUID)
Definition: utilities.c:438
time_statistics_t stats_create
Definition: md-workbench.h:21
void(* xfer_hints)(aiori_xfer_hint_t *params)
Definition: aiori.h:96
void(* close)(aiori_fd_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:99
time_result_t * time_read
Definition: md-workbench.c:60
op_stat_t dset_delete
Definition: md-workbench.c:50
int(* stat)(const char *path, struct stat *buf, aiori_mod_opt_t *module_options)
Definition: aiori.h:108
mdworkbench_results_t * results
Definition: md-workbench.c:87
options_all_t * airoi_create_all_module_options(option_help *global_options)
Definition: aiori.c:110
double GetTimeStamp(void)
Definition: utilities.c:876
static int return_position()
Definition: md-workbench.c:859
static void store_position(int position)
Definition: md-workbench.c:878
#define CHECK_MPI_RET(ret)
Definition: md-workbench.c:25
ior_aiori_t const * backend
Definition: md-workbench.c:76
aiori_fd_t *(* create)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:90
IOR_offset_t(* xfer)(int access, aiori_fd_t *, IOR_size_t *, IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t *module_options)
Definition: aiori.h:97
static void def_obj_name(char *out_name, int n, int d, int i)
Definition: md-workbench.c:139
#define IOR_WRONLY
Definition: aiori.h:29
static void end_phase(const char *name, phase_stat_t *p)
Definition: md-workbench.c:417
int stonewall_iterations
Definition: md-workbench.c:72
static options_all_t * global_options
Definition: parse_options.c:41
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:94
time_statistics_t stats_delete
Definition: md-workbench.h:24
time_result_t * time_delete
Definition: md-workbench.c:62
#define WARNF(FORMAT,...)
Definition: aiori-debug.h:30
long long int IOR_size_t
Definition: iordef.h:124
mdworkbench_result_t result[]
Definition: md-workbench.h:36
void(* finalize)(aiori_mod_opt_t *options)
Definition: aiori.h:110
static double runtime_quantile(int repeats, time_result_t *times, float quantile)
Definition: md-workbench.c:358
aiori_xfer_hint_t hints
Definition: md-workbench.c:78
void generate_memory_pattern(char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:137
#define oprintf(...)
Definition: md-workbench.c:29
bool enable_mdtest
Definition: aiori.h:115
static void print_detailed_stat_header()
Definition: md-workbench.c:201
static void mdw_wait(double runtime)
Definition: md-workbench.c:160
time_statistics_t stats_stat
Definition: md-workbench.c:66
#define MAX_PATHLEN
Definition: utilities.h:31
aiori_fd_t *(* open)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:92
#define ERR(MSG)
Definition: aiori-debug.h:75
int verify_memory_pattern(uint64_t item, char *buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:182
uint64_t repeats
Definition: md-workbench.c:58
static void print_p_stat(char *buff, const char *name, phase_stat_t *p, double t, int print_global)
Definition: md-workbench.c:237
ior_dataPacketType_e
Definition: iordef.h:22
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
Definition: utilities.c:1119
ior_memory_flags
Definition: iordef.h:29
uint64_t start_item_number
Definition: md-workbench.c:130
int filePerProc
Definition: aiori.h:65
static void init_stats(phase_stat_t *p, size_t repeats)
Definition: md-workbench.c:179
int GetNumTasksOnNode0(MPI_Comm comm)
Definition: utilities.c:551
time_statistics_t stats_read
Definition: md-workbench.h:22
op_stat_t dset_create
Definition: md-workbench.c:49
#define NULL
Definition: iordef.h:84
char * latency_file_prefix
Definition: md-workbench.c:102
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)
Definition: utilities.c:1070