IOR
mdtest.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2003, The Regents of the University of California.
3  * Produced at the Lawrence Livermore National Laboratory.
4  * Written by Christopher J. Morrone <morrone@llnl.gov>,
5  * Bill Loewe <loewe@loewe.net>, Tyce McLarty <mclarty@llnl.gov>,
6  * and Ryan Kroiss <rrkroiss@lanl.gov>.
7  * All rights reserved.
8  * UCRL-CODE-155800
9  *
10  * Please read the COPYRIGHT file.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License (as published by
14  * the Free Software Foundation) version 2, dated June 1991.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * terms and conditions of the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  *
25  * CVS info:
26  * $RCSfile: mdtest.c,v $
27  * $Revision: 1.4 $
28  * $Date: 2013/11/27 17:05:31 $
29  * $Author: brettkettering $
30  */
31 #include <limits.h>
32 #include <math.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <inttypes.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <stdarg.h>
40 
41 #include "option.h"
42 #include "utilities.h"
43 
44 #if HAVE_SYS_PARAM_H
45 #include <sys/param.h>
46 #endif
47 
48 #if HAVE_SYS_MOUNT_H
49 #include <sys/mount.h>
50 #endif
51 
52 #if HAVE_SYS_STATFS_H
53 #include <sys/statfs.h>
54 #endif
55 
56 #if HAVE_SYS_STATVFS_H
57 #include <sys/statvfs.h>
58 #endif
59 
60 #include <fcntl.h>
61 #include <string.h>
62 
63 #if HAVE_STRINGS_H
64 #include <strings.h>
65 #endif
66 
67 #include <unistd.h>
68 #include <dirent.h>
69 #include <errno.h>
70 #include <time.h>
71 #include <sys/time.h>
72 
73 #include "aiori.h"
74 #include "ior.h"
75 #include "mdtest.h"
76 
77 #include <mpi.h>
78 
79 #ifdef HAVE_GPFSCREATESHARING_T
80 #include <gpfs_fcntl.h>
81 #include "aiori-POSIX.h"
82 #ifndef open64 /* necessary for TRU64 -- */
83 # define open64 open /* unlikely, but may pose */
84 #endif /* not open64 */ /* conflicting prototypes */
85 #endif /* HAVE_GPFSCREATESHARING_T */
86 
87 #pragma GCC diagnostic ignored "-Wformat-overflow"
88 
89 #ifdef HAVE_LUSTRE_LUSTREAPI
90 #include <lustre/lustreapi.h>
91 #endif /* HAVE_LUSTRE_LUSTREAPI */
92 
93 #define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH
94 #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH
95 #define RELEASE_VERS META_VERSION
96 #define TEST_DIR "test-dir"
97 #define ITEM_COUNT 25000
98 
99 #define LLU "%lu"
100 
101 typedef struct {
102  int size;
103  uint64_t *rand_array;
104  char testdir[MAX_PATHLEN];
105  char testdirpath[MAX_PATHLEN];
106  char base_tree_name[MAX_PATHLEN];
107  char **filenames;
108  char hostname[MAX_PATHLEN];
109  char mk_name[MAX_PATHLEN];
110  char stat_name[MAX_PATHLEN];
111  char read_name[MAX_PATHLEN];
112  char rm_name[MAX_PATHLEN];
113  char unique_mk_dir[MAX_PATHLEN];
114  char unique_chdir_dir[MAX_PATHLEN];
115  char unique_stat_dir[MAX_PATHLEN];
116  char unique_read_dir[MAX_PATHLEN];
117  char unique_rm_dir[MAX_PATHLEN];
118  char unique_rm_uni_dir[MAX_PATHLEN];
121  ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */
122  int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */
123  int gpuID; /* the GPU to use for gpuDirect or memory options */
124 
125 
126 
127  int barriers;
137  unsigned branch_factor;
138  int depth;
139  int random_buffer_offset; /* user settable value, otherwise random */
140 
141  /*
142  * This is likely a small value, but it's sometimes computed by
143  * branch_factor^(depth+1), so we'll make it a larger variable,
144  * just in case.
145  */
147  /*
148  * As we start moving towards Exascale, we could have billions
149  * of files in a directory. Make room for that possibility with
150  * a larger variable.
151  */
152  uint64_t items;
153  uint64_t items_per_dir;
154  uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */
169  size_t write_bytes;
171  size_t read_bytes;
175  int nstride; /* neighbor stride */
177  #ifdef HAVE_LUSTRE_LUSTREAPI
178  int global_dir_layout;
179  #endif /* HAVE_LUSTRE_LUSTREAPI */
180  char * saveRankDetailsCSV; /* save the details about the performance to a file */
181  const char *prologue;
182  const char *epilogue;
183 
185  pid_t pid;
186  uid_t uid;
187 
188  /* Use the POSIX backend by default */
192  char * api;
194 
196 
197 
198 /* This structure describes the processing status for stonewalling */
199 typedef struct{
200  double start_time;
201 
203 
204  uint64_t items_start;
205  uint64_t items_done;
206 
207  uint64_t items_per_dir;
209 
210 #define CHECK_STONE_WALL(p) (((p)->stone_wall_timer_seconds != 0) && ((GetTimeStamp() - (p)->start_time) > (p)->stone_wall_timer_seconds))
211 
212 /* for making/removing unique directory && stating/deleting subdirectory */
214 
215 #define PRINT(...) fprintf(out_logfile, __VA_ARGS__);
216 
217 /* a helper function for passing debug and verbose messages.
218  use the MACRO as it will insert __LINE__ for you.
219  Pass the verbose level for root to print, then the verbose level for anyone to print.
220  Pass -1 to suppress the print for anyone.
221  Then do the standard printf stuff. This function adds the newline for you.
222 */
223 #define VERBOSE(root,any,...) VerboseMessage(root,any,__LINE__,__VA_ARGS__)
224 void VerboseMessage (int root_level, int any_level, int line, char * format, ...) {
225  if ((rank==0 && verbose >= root_level) || (any_level > 0 && verbose >= any_level)) {
226  char buffer[1024];
227  va_list args;
228  va_start (args, format);
229  vsnprintf (buffer, 1024, format, args);
230  va_end (args);
231  if (root_level == 0 && any_level == -1) {
232  /* No header when it is just the standard output */
233  fprintf( out_logfile, "%s\n", buffer );
234  } else {
235  /* add a header when the verbose is greater than 0 */
236  fprintf( out_logfile, "V-%d: Rank %3d Line %5d %s\n", root_level, rank, line, buffer );
237  }
238  fflush(out_logfile);
239  }
240 }
241 
242 void parse_dirpath(char *dirpath_arg) {
243  char * tmp, * token;
244  char delimiter_string[3] = { '@', '\n', '\0' };
245  int i = 0;
246 
247 
248  VERBOSE(1,-1, "Entering parse_dirpath on %s...", dirpath_arg );
249 
250  tmp = dirpath_arg;
251 
252  if (* tmp != '\0') o.path_count++;
253  while (* tmp != '\0') {
254  if (* tmp == '@') {
255  o.path_count++;
256  }
257  tmp++;
258  }
259  // prevent changes to the original dirpath_arg
260  dirpath_arg = strdup(dirpath_arg);
261  o.filenames = (char **) safeMalloc(o.path_count * sizeof(char **));
262 
263  token = strtok(dirpath_arg, delimiter_string);
264  while (token != NULL) {
265  o.filenames[i] = token;
266  token = strtok(NULL, delimiter_string);
267  i++;
268  }
269 }
270 
271 static void prep_testdir(int j, int dir_iter){
272  int pos = sprintf(o.testdir, "%s", o.testdirpath);
273  if ( o.testdir[strlen( o.testdir ) - 1] != '/' ) {
274  pos += sprintf(& o.testdir[pos], "/");
275  }
276  pos += sprintf(& o.testdir[pos], "%s", TEST_DIR);
277  pos += sprintf(& o.testdir[pos], ".%d-%d", j, dir_iter);
278 }
279 
280 static void phase_prepare(){
281  if (*o.prologue){
282  VERBOSE(0,5,"calling prologue: \"%s\"", o.prologue);
283  system(o.prologue);
284  }
285  if (o.barriers) {
286  MPI_Barrier(testComm);
287  }
288 }
289 
290 static void phase_end(){
291  if (o.call_sync){
292  if(! o.backend->sync){
293  FAIL("Error, backend does not provide the sync method, but you requested to use sync.\n");
294  }
296  }
297  if (*o.epilogue){
298  VERBOSE(0,5,"calling epilogue: \"%s\"", o.epilogue);
299  system(o.epilogue);
300  }
301 
302  if (o.barriers) {
303  MPI_Barrier(testComm);
304  }
305 }
306 
307 /*
308  * This function copies the unique directory name for a given option to
309  * the "to" parameter. Some memory must be allocated to the "to" parameter.
310  */
311 
312 void unique_dir_access(int opt, char *to) {
313  if (opt == MK_UNI_DIR) {
314  MPI_Barrier(testComm);
315  sprintf( to, "%s/%s", o.testdir, o.unique_chdir_dir );
316  } else if (opt == STAT_SUB_DIR) {
317  sprintf( to, "%s/%s", o.testdir, o.unique_stat_dir );
318  } else if (opt == READ_SUB_DIR) {
319  sprintf( to, "%s/%s", o.testdir, o.unique_read_dir );
320  } else if (opt == RM_SUB_DIR) {
321  sprintf( to, "%s/%s", o.testdir, o.unique_rm_dir );
322  } else if (opt == RM_UNI_DIR) {
323  sprintf( to, "%s/%s", o.testdir, o.unique_rm_uni_dir );
324  }
325  VERBOSE(1,-1,"Entering unique_dir_access, set it to %s", to );
326 }
327 
328 static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) {
329  char curr_item[MAX_PATHLEN];
330  const char *operation = create ? "create" : "remove";
331 
332  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
333  VERBOSE(3,5,"dir: "LLU"", operation, itemNum);
334  }
335 
336  //create dirs
337  sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? o.mk_name : o.rm_name, itemNum);
338  VERBOSE(3,5,"create_remove_items_helper (dirs %s): curr_item is '%s'", operation, curr_item);
339 
340  if (create) {
341  if (o.backend->mkdir(curr_item, DIRMODE, o.backend_options) == -1) {
342  WARNF("unable to create directory %s", curr_item);
343  }
344  } else {
345  if (o.backend->rmdir(curr_item, o.backend_options) == -1) {
346  WARNF("unable to remove directory %s", curr_item);
347  }
348  }
349 }
350 
351 static void remove_file (const char *path, uint64_t itemNum) {
352  char curr_item[MAX_PATHLEN];
353 
354  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
355  VERBOSE(3,5,"remove file: "LLU"\n", itemNum);
356  }
357 
358  //remove files
359  sprintf(curr_item, "%s/file.%s"LLU"", path, o.rm_name, itemNum);
360  VERBOSE(3,5,"create_remove_items_helper (non-dirs remove): curr_item is '%s'", curr_item);
361  if (!(o.shared_file && rank != 0)) {
362  o.backend->delete (curr_item, o.backend_options);
363  }
364 }
365 
366 
367 static void create_file (const char *path, uint64_t itemNum) {
368  char curr_item[MAX_PATHLEN];
369  aiori_fd_t *aiori_fh = NULL;
370 
371  if ( (itemNum % ITEM_COUNT==0 && (itemNum != 0))) {
372  VERBOSE(3,5,"create file: "LLU"", itemNum);
373  }
374 
375  //create files
376  sprintf(curr_item, "%s/file.%s"LLU"", path, o.mk_name, itemNum);
377  VERBOSE(3,5,"create_remove_items_helper (non-dirs create): curr_item is '%s'", curr_item);
378 
379  if (o.make_node) {
380  int ret;
381  VERBOSE(3,5,"create_remove_items_helper : mknod..." );
382 
383  ret = o.backend->mknod (curr_item);
384  if (ret != 0)
385  WARNF("unable to mknode file %s", curr_item);
386 
387  return;
388  } else if (o.collective_creates) {
389  VERBOSE(3,5,"create_remove_items_helper (collective): open..." );
390 
391  aiori_fh = o.backend->open (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
392  if (NULL == aiori_fh){
393  WARNF("unable to open file %s", curr_item);
394  return;
395  }
396 
397  /*
398  * !collective_creates
399  */
400  } else {
401  o.hints.filePerProc = ! o.shared_file;
402  VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." );
403 
404  aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
405  if (NULL == aiori_fh){
406  WARNF("unable to create file %s", curr_item);
407  return;
408  }
409  }
410 
411  if (o.write_bytes > 0) {
412  VERBOSE(3,5,"create_remove_items_helper: write..." );
413 
416 
417  if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
418  WARNF("unable to write file %s", curr_item);
419  }
420 
421  if (o.verify_write) {
422  o.write_buffer[0] = 42;
423  if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
424  WARNF("unable to verify write (read/back) file %s", curr_item);
425  }
427  o.verification_error += error;
428  if(error){
429  VERBOSE(1,1,"verification error in file: %s", curr_item);
430  }
431  }
432  }
433 
434  VERBOSE(3,5,"create_remove_items_helper: close..." );
435  o.backend->close (aiori_fh, o.backend_options);
436 }
437 
438 /* helper for creating/removing items */
439 void create_remove_items_helper(const int dirs, const int create, const char *path,
440  uint64_t itemNum, rank_progress_t * progress) {
441 
442  VERBOSE(1,-1,"Entering create_remove_items_helper on %s", path );
443 
444  for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) {
445  if (!dirs) {
446  if (create) {
447  create_file (path, itemNum + i);
448  } else {
449  remove_file (path, itemNum + i);
450  }
451  } else {
452  create_remove_dirs (path, create, itemNum + i);
453  }
454  if(CHECK_STONE_WALL(progress)){
455  if(progress->items_done == 0){
456  progress->items_done = i + 1;
457  }
458  return;
459  }
460  }
461  progress->items_done = progress->items_per_dir;
462 }
463 
464 /* helper function to do collective operations */
465 void collective_helper(const int dirs, const int create, const char* path, uint64_t itemNum, rank_progress_t * progress) {
466  char curr_item[MAX_PATHLEN];
467 
468  VERBOSE(1,-1,"Entering collective_helper on %s", path );
469  for (uint64_t i = progress->items_start ; i < progress->items_per_dir ; ++i) {
470  if (dirs) {
471  create_remove_dirs (path, create, itemNum + i);
472  continue;
473  }
474 
475  sprintf(curr_item, "%s/file.%s"LLU"", path, create ? o.mk_name : o.rm_name, itemNum+i);
476  VERBOSE(3,5,"create file: %s", curr_item);
477 
478  if (create) {
479  aiori_fd_t *aiori_fh;
480 
481  //create files
482  aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options);
483  if (NULL == aiori_fh) {
484  WARNF("unable to create file %s", curr_item);
485  }else{
486  o.backend->close (aiori_fh, o.backend_options);
487  }
488  } else if (!(o.shared_file && rank != 0)) {
489  //remove files
490  o.backend->delete (curr_item, o.backend_options);
491  }
492  if(CHECK_STONE_WALL(progress)){
493  progress->items_done = i + 1;
494  return;
495  }
496  }
497  progress->items_done = progress->items_per_dir;
498 }
499 
500 /* recursive function to create and remove files/directories from the
501  directory tree */
502 void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t * progress) {
503  unsigned i;
504  char dir[MAX_PATHLEN];
505  char temp_path[MAX_PATHLEN];
506  unsigned long long currDir = dirNum;
507 
508 
509  VERBOSE(1,-1,"Entering create_remove_items on %s, currDepth = %d...", path, currDepth );
510 
511 
512  memset(dir, 0, MAX_PATHLEN);
513  strcpy(temp_path, path);
514 
515  VERBOSE(3,5,"create_remove_items (start): temp_path is '%s'", temp_path );
516 
517  if (currDepth == 0) {
518  /* create items at this depth */
519  if (! o.leaf_only || (o.depth == 0 && o.leaf_only)) {
520  if (collective) {
521  collective_helper(dirs, create, temp_path, 0, progress);
522  } else {
523  create_remove_items_helper(dirs, create, temp_path, 0, progress);
524  }
525  }
526 
527  if (o.depth > 0) {
528  create_remove_items(++currDepth, dirs, create,
529  collective, temp_path, ++dirNum, progress);
530  }
531 
532  } else if (currDepth <= o.depth) {
533  /* iterate through the branches */
534  for (i=0; i< o.branch_factor; i++) {
535 
536  /* determine the current branch and append it to the path */
537  sprintf(dir, "%s.%llu/", o.base_tree_name, currDir);
538  strcat(temp_path, "/");
539  strcat(temp_path, dir);
540 
541  VERBOSE(3,5,"create_remove_items (for loop): temp_path is '%s'", temp_path );
542 
543  /* create the items in this branch */
544  if (! o.leaf_only || (o.leaf_only && currDepth == o.depth)) {
545  if (collective) {
546  collective_helper(dirs, create, temp_path, currDir* o.items_per_dir, progress);
547  } else {
548  create_remove_items_helper(dirs, create, temp_path, currDir*o.items_per_dir, progress);
549  }
550  }
551 
552  /* make the recursive call for the next level below this branch */
554  ++currDepth,
555  dirs,
556  create,
557  collective,
558  temp_path,
559  ( currDir * ( unsigned long long ) o.branch_factor ) + 1,
560  progress
561  );
562  currDepth--;
563 
564  /* reset the path */
565  strcpy(temp_path, path);
566  currDir++;
567  }
568  }
569 }
570 
571 /* stats all of the items created as specified by the input parameters */
572 void mdtest_stat(const int random, const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) {
573  struct stat buf;
574  uint64_t parent_dir, item_num = 0;
575  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
576 
577  VERBOSE(1,-1,"Entering mdtest_stat on %s", path );
578 
579  uint64_t stop_items = o.items;
580 
581  if( o.directory_loops != 1 ){
582  stop_items = o.items_per_dir;
583  }
584 
585  /* iterate over all of the item IDs */
586  for (uint64_t i = 0 ; i < stop_items ; ++i) {
587  /*
588  * It doesn't make sense to pass the address of the array because that would
589  * be like passing char **. Tested it on a Cray and it seems to work either
590  * way, but it seems that it is correct without the "&".
591  *
592  memset(&item, 0, MAX_PATHLEN);
593  */
594  memset(item, 0, MAX_PATHLEN);
595  memset(temp, 0, MAX_PATHLEN);
596 
597 
598  /* determine the item number to stat */
599  if (random) {
600  item_num = o.rand_array[i];
601  } else {
602  item_num = i;
603  }
604 
605  /* make adjustments if in leaf only mode*/
606  if (o.leaf_only) {
607  item_num += o.items_per_dir *
608  (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth ));
609  }
610 
611  /* create name of file/dir to stat */
612  if (dirs) {
613  if ( (i % ITEM_COUNT == 0) && (i != 0)) {
614  VERBOSE(3,5,"stat dir: "LLU"", i);
615  }
616  sprintf(item, "dir.%s"LLU"", o.stat_name, item_num);
617  } else {
618  if ( (i % ITEM_COUNT == 0) && (i != 0)) {
619  VERBOSE(3,5,"stat file: "LLU"", i);
620  }
621  sprintf(item, "file.%s"LLU"", o.stat_name, item_num);
622  }
623 
624  /* determine the path to the file/dir to be stat'ed */
625  parent_dir = item_num / o.items_per_dir;
626 
627  if (parent_dir > 0) { //item is not in tree's root directory
628 
629  /* prepend parent directory to item's path */
630  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
631  strcpy(item, temp);
632 
633  //still not at the tree's root dir
634  while (parent_dir > o.branch_factor) {
635  parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor);
636  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
637  strcpy(item, temp);
638  }
639  }
640 
641  /* Now get item to have the full path */
642  sprintf( temp, "%s/%s", path, item );
643  strcpy( item, temp );
644 
645  /* below temp used to be hiername */
646  VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item);
647  if (-1 == o.backend->stat (item, &buf, o.backend_options)) {
648  WARNF("unable to stat %s %s", dirs ? "directory" : "file", item);
649  }
650  }
651 }
652 
653 /* reads all of the items created as specified by the input parameters */
654 void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
655  uint64_t parent_dir, item_num = 0;
656  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
657  aiori_fd_t *aiori_fh;
658 
659  VERBOSE(1,-1,"Entering mdtest_read on %s", path );
660  char *read_buffer;
661 
662  /* allocate read buffer */
663  if (o.read_bytes > 0) {
664  read_buffer = aligned_buffer_alloc(o.read_bytes, o.gpuMemoryFlags);
666  }
667 
668  uint64_t stop_items = o.items;
669 
670  if( o.directory_loops != 1 ){
671  stop_items = o.items_per_dir;
672  }
673 
674  /* iterate over all of the item IDs */
675  for (uint64_t i = 0 ; i < stop_items ; ++i) {
676  /*
677  * It doesn't make sense to pass the address of the array because that would
678  * be like passing char **. Tested it on a Cray and it seems to work either
679  * way, but it seems that it is correct without the "&".
680  *
681  * NTH: Both are technically correct in C.
682  *
683  * memset(&item, 0, MAX_PATHLEN);
684  */
685  memset(item, 0, MAX_PATHLEN);
686  memset(temp, 0, MAX_PATHLEN);
687 
688  /* determine the item number to read */
689  if (random) {
690  item_num = o.rand_array[i];
691  } else {
692  item_num = i;
693  }
694 
695  /* make adjustments if in leaf only mode*/
696  if (o.leaf_only) {
697  item_num += o.items_per_dir *
698  (o.num_dirs_in_tree - (uint64_t) pow (o.branch_factor, o.depth));
699  }
700 
701  /* create name of file to read */
702  if (!dirs) {
703  if ((i%ITEM_COUNT == 0) && (i != 0)) {
704  VERBOSE(3,5,"read file: "LLU"", i);
705  }
706  sprintf(item, "file.%s"LLU"", o.read_name, item_num);
707  }
708 
709  /* determine the path to the file/dir to be read'ed */
710  parent_dir = item_num / o.items_per_dir;
711 
712  if (parent_dir > 0) { //item is not in tree's root directory
713 
714  /* prepend parent directory to item's path */
715  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
716  strcpy(item, temp);
717 
718  /* still not at the tree's root dir */
719  while (parent_dir > o.branch_factor) {
720  parent_dir = (unsigned long long) ((parent_dir-1) / o.branch_factor);
721  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
722  strcpy(item, temp);
723  }
724  }
725 
726  /* Now get item to have the full path */
727  sprintf( temp, "%s/%s", path, item );
728  strcpy( item, temp );
729 
730  /* below temp used to be hiername */
731  VERBOSE(3,5,"mdtest_read file: %s", item);
732 
733  o.hints.filePerProc = ! o.shared_file;
734 
735  /* open file for reading */
736  aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options);
737  if (NULL == aiori_fh) {
738  WARNF("unable to open file %s", item);
739  continue;
740  }
741 
742  /* read file */
743  if (o.read_bytes > 0) {
745  if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) {
746  WARNF("unable to read file %s", item);
747  o.verification_error += 1;
748  continue;
749  }
750  int pretend_rank = (2 * o.nstride + rank) % o.size;
751  if(o.verify_read){
752  if (o.shared_file) {
753  pretend_rank = rank;
754  }
755  int error = verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank, o.dataPacketType, o.gpuMemoryFlags);
756  o.verification_error += error;
757  if(error){
758  VERBOSE(1,1,"verification error in file: %s", item);
759  }
760  }
761  }
762 
763  /* close file */
764  o.backend->close (aiori_fh, o.backend_options);
765  }
766  if(o.read_bytes){
767  aligned_buffer_free(read_buffer, o.gpuMemoryFlags);
768  }
769 }
770 
771 /* This method should be called by rank 0. It subsequently does all of
772  the creates and removes for the other ranks */
773 void collective_create_remove(const int create, const int dirs, const int ntasks, const char *path, rank_progress_t * progress) {
774  char temp[MAX_PATHLEN];
775 
776  VERBOSE(1,-1,"Entering collective_create_remove on %s", path );
777 
778  /* rank 0 does all of the creates and removes for all of the ranks */
779  for (int i = 0 ; i < ntasks ; ++i) {
780  memset(temp, 0, MAX_PATHLEN);
781 
782  strcpy(temp, o.testdir);
783  strcat(temp, "/");
784 
785  /* set the base tree name appropriately */
786  if (o.unique_dir_per_task) {
787  sprintf(o.base_tree_name, "mdtest_tree.%d", i);
788  } else {
789  sprintf(o.base_tree_name, "mdtest_tree");
790  }
791 
792  /* Setup to do I/O to the appropriate test dir */
793  strcat(temp, o.base_tree_name);
794  strcat(temp, ".0");
795 
796  /* set all item names appropriately */
797  if (! o.shared_file) {
798  sprintf(o.mk_name, "mdtest.%d.", (i+(0*o.nstride))%ntasks);
799  sprintf(o.stat_name, "mdtest.%d.", (i+(1*o.nstride))%ntasks);
800  sprintf(o.read_name, "mdtest.%d.", (i+(2*o.nstride))%ntasks);
801  sprintf(o.rm_name, "mdtest.%d.", (i+(3*o.nstride))%ntasks);
802  }
803  if (o.unique_dir_per_task) {
804  VERBOSE(3,5,"i %d nstride %d ntasks %d", i, o.nstride, ntasks);
805  sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir,
806  (i+(0*o.nstride))%ntasks);
807  sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir,
808  (i+(1*o.nstride))%ntasks);
809  sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir,
810  (i+(2*o.nstride))%ntasks);
811  sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir,
812  (i+(3*o.nstride))%ntasks);
813  sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir,
814  (i+(4*o.nstride))%ntasks);
815  sprintf(o.unique_rm_uni_dir, "%s", o.testdir);
816  }
817 
818  /* Now that everything is set up as it should be, do the create or remove */
819  VERBOSE(3,5,"collective_create_remove (create_remove_items): temp is '%s'", temp);
820 
821  create_remove_items(0, dirs, create, 1, temp, 0, progress);
822  }
823 
824  /* reset all of the item names */
825  if (o.unique_dir_per_task) {
826  sprintf(o.base_tree_name, "mdtest_tree.0");
827  } else {
828  sprintf(o.base_tree_name, "mdtest_tree");
829  }
830  if (! o.shared_file) {
831  sprintf(o.mk_name, "mdtest.%d.", (0+(0*o.nstride))%ntasks);
832  sprintf(o.stat_name, "mdtest.%d.", (0+(1*o.nstride))%ntasks);
833  sprintf(o.read_name, "mdtest.%d.", (0+(2*o.nstride))%ntasks);
834  sprintf(o.rm_name, "mdtest.%d.", (0+(3*o.nstride))%ntasks);
835  }
836  if (o.unique_dir_per_task) {
837  sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir,
838  (0+(0*o.nstride))%ntasks);
839  sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir,
840  (0+(1*o.nstride))%ntasks);
841  sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir,
842  (0+(2*o.nstride))%ntasks);
843  sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir,
844  (0+(3*o.nstride))%ntasks);
845  sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir,
846  (0+(4*o.nstride))%ntasks);
847  sprintf(o.unique_rm_uni_dir, "%s", o.testdir);
848  }
849 }
850 
851 void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) {
852  uint64_t parent_dir, item_num = 0;
853  char item[MAX_PATHLEN], temp[MAX_PATHLEN];
854  char item_last[MAX_PATHLEN];
855 
856  if(o.backend->rename == NULL){
857  WARN("Backend doesn't support rename\n");
858  return;
859  }
860 
861  VERBOSE(1,-1,"Entering mdtest_rename on %s", path );
862 
863  uint64_t stop_items = o.items;
864 
865  if( o.directory_loops != 1 ){
866  stop_items = o.items_per_dir;
867  }
868 
869  if(stop_items == 1) return;
870 
871  /* iterate over all of the item IDs */
872  char first_item_name[MAX_PATHLEN];
873  for (uint64_t i = 0 ; i < stop_items; ++i) {
874  item_num = i;
875  /* make adjustments if in leaf only mode*/
876  if (o.leaf_only) {
877  item_num += o.items_per_dir * (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth ));
878  }
879 
880  /* create name of file/dir to stat */
881  if (dirs) {
882  sprintf(item, "dir.%s"LLU"", o.stat_name, item_num);
883  } else {
884  sprintf(item, "file.%s"LLU"", o.stat_name, item_num);
885  }
886 
887  /* determine the path to the file/dir to be stat'ed */
888  parent_dir = item_num / o.items_per_dir;
889 
890  if (parent_dir > 0) { //item is not in tree's root directory
891  /* prepend parent directory to item's path */
892  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
893  strcpy(item, temp);
894 
895  //still not at the tree's root dir
896  while (parent_dir > o.branch_factor) {
897  parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor);
898  sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item);
899  strcpy(item, temp);
900  }
901  }
902 
903  /* Now get item to have the full path */
904  sprintf( temp, "%s/%s", path, item );
905  strcpy( item, temp );
906 
907  VERBOSE(3,5,"mdtest_rename %4s: %s", (dirs ? "dir" : "file"), item);
908  if(i == 0){
909  sprintf(first_item_name, "%s-XX", item);
910  strcpy(item_last, first_item_name);
911  }else if(i == stop_items - 1){
912  strcpy(item, first_item_name);
913  }
914  if (-1 == o.backend->rename(item, item_last, o.backend_options)) {
915  WARNF("unable to rename %s %s", dirs ? "directory" : "file", item);
916  }
917 
918  strcpy(item_last, item);
919  }
920 }
921 
922 static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, double t_start, double t_end, double t_end_before_barrier){
923  res->time[test] = t_end - t_start;
924  if(isfinite(t_end_before_barrier)){
925  res->time_before_barrier[test] = t_end_before_barrier - t_start;
926  }else{
927  res->time_before_barrier[test] = res->time[test];
928  }
929  if(item_count == 0){
930  res->rate[test] = 0.0;
931  res->rate_before_barrier[test] = 0.0;
932  }else{
933  res->rate[test] = item_count/res->time[test];
934  res->rate_before_barrier[test] = item_count/res->time_before_barrier[test];
935  }
936  res->items[test] = item_count;
937  res->stonewall_last_item[test] = o.items;
938 }
939 
940 void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
941  int size;
942  double t_start, t_end, t_end_before_barrier;
943  char temp_path[MAX_PATHLEN];
944  mdtest_results_t * res = & o.summary_table[iteration];
945 
946  MPI_Comm_size(testComm, &size);
947 
948  VERBOSE(1,-1,"Entering directory_test on %s", path );
949 
950  MPI_Barrier(testComm);
951 
952  /* create phase */
953  if(o.create_only) {
954  phase_prepare();
955  t_start = GetTimeStamp();
957  progress->items_done = 0;
958  progress->start_time = GetTimeStamp();
959  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
960  prep_testdir(iteration, dir_iter);
961  if (o.unique_dir_per_task) {
962  unique_dir_access(MK_UNI_DIR, temp_path);
963  if (! o.time_unique_dir_overhead) {
964  t_start = GetTimeStamp();
965  }
966  } else {
967  sprintf( temp_path, "%s/%s", o.testdir, path );
968  }
969 
970  VERBOSE(3,-1,"directory_test: create path is '%s'", temp_path );
971 
972  /* "touch" the files */
973  if (o.collective_creates) {
974  if (rank == 0) {
975  collective_create_remove(1, 1, ntasks, temp_path, progress);
976  }
977  } else {
978  /* create directories */
979  create_remove_items(0, 1, 1, 0, temp_path, 0, progress);
980  }
981  }
982  progress->stone_wall_timer_seconds = 0;
983  t_end_before_barrier = GetTimeStamp();
984  phase_end();
985  t_end = GetTimeStamp();
986  updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier);
987  }
988 
989  /* stat phase */
990  if (o.stat_only) {
991  phase_prepare();
992  t_start = GetTimeStamp();
993  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
994  prep_testdir(iteration, dir_iter);
995  if (o.unique_dir_per_task) {
996  unique_dir_access(STAT_SUB_DIR, temp_path);
997  if (! o.time_unique_dir_overhead) {
998  t_start = GetTimeStamp();
999  }
1000  } else {
1001  sprintf( temp_path, "%s/%s", o.testdir, path );
1002  }
1003 
1004  VERBOSE(3,5,"stat path is '%s'", temp_path );
1005 
1006  /* stat directories */
1007  if (o.random_seed > 0) {
1008  mdtest_stat(1, 1, dir_iter, temp_path, progress);
1009  } else {
1010  mdtest_stat(0, 1, dir_iter, temp_path, progress);
1011  }
1012  }
1013  t_end_before_barrier = GetTimeStamp();
1014  phase_end();
1015  t_end = GetTimeStamp();
1016  updateResult(res, MDTEST_DIR_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier);
1017  }
1018 
1019  /* read phase */
1020  if (o.read_only) {
1021  phase_prepare();
1022  t_start = GetTimeStamp();
1023  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1024  prep_testdir(iteration, dir_iter);
1025  if (o.unique_dir_per_task) {
1026  unique_dir_access(READ_SUB_DIR, temp_path);
1027  if (! o.time_unique_dir_overhead) {
1028  t_start = GetTimeStamp();
1029  }
1030  } else {
1031  sprintf( temp_path, "%s/%s", o.testdir, path );
1032  }
1033 
1034  VERBOSE(3,5,"directory_test: read path is '%s'", temp_path );
1035 
1036  /* read directories */
1037  if (o.random_seed > 0) {
1038  ; /* N/A */
1039  } else {
1040  ; /* N/A */
1041  }
1042  }
1043  t_end_before_barrier = GetTimeStamp();
1044  phase_end();
1045  t_end = GetTimeStamp();
1046  updateResult(res, MDTEST_DIR_READ_NUM, o.items, t_start, t_end, t_end_before_barrier);
1047  }
1048 
1049  /* rename phase */
1050  if(o.rename_dirs && o.items > 1){
1051  phase_prepare();
1052  t_start = GetTimeStamp();
1053  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1054  prep_testdir(iteration, dir_iter);
1055  if (o.unique_dir_per_task) {
1056  unique_dir_access(STAT_SUB_DIR, temp_path);
1057  if (! o.time_unique_dir_overhead) {
1058  t_start = GetTimeStamp();
1059  }
1060  } else {
1061  sprintf( temp_path, "%s/%s", o.testdir, path );
1062  }
1063 
1064  VERBOSE(3,5,"rename path is '%s'", temp_path );
1065 
1066  rename_dir_test(1, dir_iter, temp_path, progress);
1067  }
1068  t_end_before_barrier = GetTimeStamp();
1069  phase_end();
1070  t_end = GetTimeStamp();
1071  updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, t_start, t_end, t_end_before_barrier);
1072  }
1073 
1074  /* remove phase */
1075  if (o.remove_only) {
1076  phase_prepare();
1077  t_start = GetTimeStamp();
1078  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1079  prep_testdir(iteration, dir_iter);
1080  if (o.unique_dir_per_task) {
1081  unique_dir_access(RM_SUB_DIR, temp_path);
1082  if (!o.time_unique_dir_overhead) {
1083  t_start = GetTimeStamp();
1084  }
1085  } else {
1086  sprintf( temp_path, "%s/%s", o.testdir, path );
1087  }
1088 
1089  VERBOSE(3,5,"directory_test: remove directories path is '%s'", temp_path );
1090 
1091  /* remove directories */
1092  if (o.collective_creates) {
1093  if (rank == 0) {
1094  collective_create_remove(0, 1, ntasks, temp_path, progress);
1095  }
1096  } else {
1097  create_remove_items(0, 1, 0, 0, temp_path, 0, progress);
1098  }
1099  }
1100  t_end_before_barrier = GetTimeStamp();
1101  phase_end();
1102  t_end = GetTimeStamp();
1103  updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier);
1104  }
1105 
1106  if (o.remove_only) {
1107  if (o.unique_dir_per_task) {
1108  unique_dir_access(RM_UNI_DIR, temp_path);
1109  } else {
1110  sprintf( temp_path, "%s/%s", o.testdir, path );
1111  }
1112 
1113  VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path );
1114  }
1115 
1116  VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_CREATE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_CREATE_NUM]);
1117  VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_STAT_NUM], o.summary_table[iteration].rate[MDTEST_DIR_STAT_NUM]);
1118  VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_RENAME_NUM], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]);
1119  VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_REMOVE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_REMOVE_NUM]);
1120 }
1121 
1122 /* Returns if the stonewall was hit */
1123 int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t * out_max_iter){
1124  int hit = 0;
1125  long long unsigned max_iter = 0;
1126 
1127  VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done );
1128  MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm);
1130  o.summary_table[iteration].stonewall_last_item[MDTEST_FILE_CREATE_NUM] = items_done;
1131  *out_max_iter = max_iter;
1132 
1133  // continue to the maximum...
1134  long long min_accessed = 0;
1135  MPI_Reduce(& items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm);
1136  long long sum_accessed = 0;
1137  MPI_Reduce(& items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm);
1138  o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed;
1139  o.summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * o.size;
1140 
1141  if(o.items != (sum_accessed / o.size)){
1142  VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / o.size);
1143  hit = 1;
1144  }
1145 
1146  return hit;
1147 }
1148 
1149 #ifdef HAVE_GPFSCREATESHARING_T
1150 void gpfs_createSharing(char *testDirName, int enable)
1151 {
1152  int fd, rc;
1153  int fd_oflag = O_RDONLY;
1154 
1155  struct
1156  {
1157  gpfsFcntlHeader_t header;
1158  gpfsCreateSharing_t fcreate;
1159  } createSharingHint;
1160 
1161  createSharingHint.header.totalLength = sizeof(createSharingHint);
1162  createSharingHint.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
1163  createSharingHint.header.fcntlReserved = 0;
1164 
1165  createSharingHint.fcreate.structLen = sizeof(createSharingHint.fcreate);
1166  createSharingHint.fcreate.structType = GPFS_CREATE_SHARING;
1167  createSharingHint.fcreate.enable = enable;
1168 
1169  fd = open64(testDirName, fd_oflag);
1170  if (fd < 0)
1171  ERRF("open64(\"%s\", %d) failed: %s", testDirName, fd_oflag, strerror(errno));
1172 
1173  rc = gpfs_fcntl(fd, &createSharingHint);
1174  if (verbose >= VERBOSE_2 && rc != 0) {
1175  WARNF("gpfs_fcntl(%d, ...) create sharing hint failed. rc %d", fd, rc);
1176  }
1177 }
1178 #endif /* HAVE_GPFSCREATESHARING_T */
1179 
1180 void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t_start){
1181  char temp_path[MAX_PATHLEN];
1182  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1183  prep_testdir(iteration, dir_iter);
1184 
1185  if (o.unique_dir_per_task) {
1186  unique_dir_access(MK_UNI_DIR, temp_path);
1187  VERBOSE(5,5,"operating on %s", temp_path);
1188  if (! o.time_unique_dir_overhead) {
1189  *t_start = GetTimeStamp();
1190  }
1191  } else {
1192  sprintf( temp_path, "%s/%s", o.testdir, path );
1193  }
1194 
1195  VERBOSE(3,-1,"file_test: create path is '%s'", temp_path );
1196  /* "touch" the files */
1197  if (o.collective_creates) {
1198  if (rank == 0) {
1199  collective_create_remove(1, 0, ntasks, temp_path, progress);
1200  }
1201  MPI_Barrier(testComm);
1202  }
1203 
1204  /* create files */
1205  create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
1207  // hit the stonewall
1208  uint64_t max_iter = 0;
1209  uint64_t items_done = progress->items_done + dir_iter * o.items_per_dir;
1210  int hit = updateStoneWallIterations(iteration, items_done, *t_start, & max_iter);
1211  progress->items_start = items_done;
1212  progress->items_per_dir = max_iter;
1213  if (hit){
1214  progress->stone_wall_timer_seconds = 0;
1215  VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir);
1216  create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
1217  // now reset the values
1219  o.items = progress->items_done;
1220  }
1221  if (o.stoneWallingStatusFile){
1223  }
1224  // reset stone wall timer to allow proper cleanup
1225  progress->stone_wall_timer_seconds = 0;
1226  // at the moment, stonewall can be done only with one directory_loop, so we can return here safely
1227  break;
1228  }
1229  }
1230 }
1231 
1232 void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
1233  int size;
1234  double t_start, t_end, t_end_before_barrier;
1235  char temp_path[MAX_PATHLEN];
1236  mdtest_results_t * res = & o.summary_table[iteration];
1237 
1238  MPI_Comm_size(testComm, &size);
1239 
1240  VERBOSE(3,5,"Entering file_test on %s", path);
1241 
1242  MPI_Barrier(testComm);
1243 
1244  /* create phase */
1245  if (o.create_only ) {
1246  phase_prepare();
1247  t_start = GetTimeStamp();
1248 #ifdef HAVE_GPFSCREATESHARING_T
1249  /* Enable createSharingHint */
1250  posix_options_t * hint_backend_option = (posix_options_t*) o.backend_options;
1251  if (hint_backend_option->gpfs_createsharing)
1252  {
1253  sprintf(temp_path, "%s/%s", o.testdir, path);
1254  VERBOSE(3,5,"file_test: GPFS Hint enable directory path is '%s'", temp_path);
1255  gpfs_createSharing(temp_path, 1);
1256  }
1257 #endif /* HAVE_GPFSCREATESHARING_T */
1259  progress->items_done = 0;
1260  progress->start_time = GetTimeStamp();
1261  file_test_create(iteration, ntasks, path, progress, &t_start);
1262  t_end_before_barrier = GetTimeStamp();
1263  phase_end();
1264 #ifdef HAVE_GPFSCREATESHARING_T
1265  /* Disable createSharingHint */
1266  if (hint_backend_option->gpfs_createsharing)
1267  {
1268  VERBOSE(3,5,"file_test: GPFS Hint disable directory path is '%s'", temp_path);
1269  gpfs_createSharing(temp_path, 0);
1270  }
1271 #endif /* HAVE_GPFSCREATESHARING_T */
1272  t_end = GetTimeStamp();
1273  updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier);
1274  }else{
1275  if (o.stoneWallingStatusFile){
1276  int64_t expected_items;
1277  /* The number of items depends on the stonewalling file */
1279  if(expected_items >= 0){
1280  if(o.directory_loops > 1){
1281  o.directory_loops = expected_items / o.items_per_dir;
1282  o.items = o.items_per_dir;
1283  }else{
1284  o.items = expected_items;
1285  progress->items_per_dir = o.items;
1286  }
1287  }
1288  if (rank == 0) {
1289  if(expected_items == -1){
1290  WARN("Could not read stonewall status file");
1291  }else {
1292  VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", o.items);
1293  }
1294  }
1295  }
1296  }
1297 
1298  /* stat phase */
1299  if (o.stat_only ) {
1300  phase_prepare();
1301  t_start = GetTimeStamp();
1302  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1303  prep_testdir(iteration, dir_iter);
1304  if (o.unique_dir_per_task) {
1305  unique_dir_access(STAT_SUB_DIR, temp_path);
1306  if (!o.time_unique_dir_overhead) {
1307  t_start = GetTimeStamp();
1308  }
1309  } else {
1310  sprintf( temp_path, "%s/%s", o.testdir, path );
1311  }
1312 
1313  VERBOSE(3,5,"file_test: stat path is '%s'", temp_path );
1314 
1315  /* stat files */
1316  mdtest_stat((o.random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress);
1317  }
1318  t_end_before_barrier = GetTimeStamp();
1319  phase_end();
1320  t_end = GetTimeStamp();
1321  updateResult(res, MDTEST_FILE_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier);
1322  }
1323 
1324  /* read phase */
1325  if (o.read_only ) {
1326  phase_prepare();
1327  t_start = GetTimeStamp();
1328  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1329  prep_testdir(iteration, dir_iter);
1330  if (o.unique_dir_per_task) {
1331  unique_dir_access(READ_SUB_DIR, temp_path);
1332  if (! o.time_unique_dir_overhead) {
1333  t_start = GetTimeStamp();
1334  }
1335  } else {
1336  sprintf( temp_path, "%s/%s", o.testdir, path );
1337  }
1338 
1339  VERBOSE(3,5,"file_test: read path is '%s'", temp_path );
1340 
1341  /* read files */
1342  if (o.random_seed > 0) {
1343  mdtest_read(1,0, dir_iter, temp_path);
1344  } else {
1345  mdtest_read(0,0, dir_iter, temp_path);
1346  }
1347  }
1348  t_end_before_barrier = GetTimeStamp();
1349  phase_end();
1350  t_end = GetTimeStamp();
1351  updateResult(res, MDTEST_FILE_READ_NUM, o.items, t_start, t_end, t_end_before_barrier);
1352  }
1353 
1354  /* remove phase */
1355  if (o.remove_only) {
1356  phase_prepare();
1357  t_start = GetTimeStamp();
1358  progress->items_start = 0;
1359 
1360  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
1361  prep_testdir(iteration, dir_iter);
1362  if (o.unique_dir_per_task) {
1363  unique_dir_access(RM_SUB_DIR, temp_path);
1364  if (! o.time_unique_dir_overhead) {
1365  t_start = GetTimeStamp();
1366  }
1367  } else {
1368  sprintf( temp_path, "%s/%s", o.testdir, path );
1369  }
1370 
1371  VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path );
1372 
1373  if (o.collective_creates) {
1374  if (rank == 0) {
1375  collective_create_remove(0, 0, ntasks, temp_path, progress);
1376  }
1377  } else {
1378  VERBOSE(3,5,"gonna create %s", temp_path);
1379  create_remove_items(0, 0, 0, 0, temp_path, 0, progress);
1380  }
1381  }
1382  t_end_before_barrier = GetTimeStamp();
1383  phase_end();
1384  t_end = GetTimeStamp();
1385  updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier);
1386  }
1387 
1388  if (o.remove_only) {
1389  if (o.unique_dir_per_task) {
1390  unique_dir_access(RM_UNI_DIR, temp_path);
1391  } else {
1392  strcpy( temp_path, path );
1393  }
1394 
1395  VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path );
1396  }
1397 
1398  if(o.num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */
1400  }
1401 
1402  VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].rate[MDTEST_FILE_CREATE_NUM]);
1403  if(o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){
1404  VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]);
1405  }
1406  VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_STAT_NUM], o.summary_table[iteration].rate[MDTEST_FILE_STAT_NUM]);
1407  VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_READ_NUM], o.summary_table[iteration].rate[MDTEST_FILE_READ_NUM]);
1408  VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_REMOVE_NUM], o.summary_table[iteration].rate[MDTEST_FILE_REMOVE_NUM]);
1409 }
1410 
1411 char const * mdtest_test_name(int i){
1412  switch (i) {
1413  case MDTEST_DIR_CREATE_NUM: return "Directory creation";
1414  case MDTEST_DIR_STAT_NUM: return "Directory stat";
1415  case MDTEST_DIR_READ_NUM: return "Directory read";
1416  case MDTEST_DIR_REMOVE_NUM: return "Directory removal";
1417  case MDTEST_DIR_RENAME_NUM: return "Directory rename";
1418  case MDTEST_FILE_CREATE_NUM: return "File creation";
1419  case MDTEST_FILE_STAT_NUM: return "File stat";
1420  case MDTEST_FILE_READ_NUM: return "File read";
1421  case MDTEST_FILE_REMOVE_NUM: return "File removal";
1422  case MDTEST_TREE_CREATE_NUM: return "Tree creation";
1423  case MDTEST_TREE_REMOVE_NUM: return "Tree removal";
1424  default: return "ERR INVALID TESTNAME :";
1425  }
1426  return NULL;
1427 }
1428 
1429 /*
1430  * Store the results of each process in a file
1431  */
1432 static void StoreRankInformation(int iterations, mdtest_results_t * agg){
1433  const size_t size = sizeof(mdtest_results_t) * iterations;
1434  if(rank == 0){
1435  FILE* fd = fopen(o.saveRankDetailsCSV, "a");
1436  if (fd == NULL){
1437  FAIL("Cannot open saveRankPerformanceDetails file for writes!");
1438  }
1439 
1440  mdtest_results_t * results = safeMalloc(size * o.size);
1441  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1442 
1443  char buff[4096];
1444  char * cpos = buff;
1445  cpos += sprintf(cpos, "all,%llu", (long long unsigned) o.items);
1446  for(int e = 0; e < MDTEST_LAST_NUM; e++){
1447  if(agg->items[e] == 0){
1448  cpos += sprintf(cpos, ",,");
1449  }else{
1450  cpos += sprintf(cpos, ",%.10e,%.10e", agg->items[e] / agg->time[e], agg->time[e]);
1451  }
1452  }
1453  cpos += sprintf(cpos, "\n");
1454  int ret = fwrite(buff, cpos - buff, 1, fd);
1455 
1456  for(int iter = 0; iter < iterations; iter++){
1457  for(int i=0; i < o.size; i++){
1458  mdtest_results_t * cur = & results[i * iterations + iter];
1459  cpos = buff;
1460  cpos += sprintf(cpos, "%d,", i);
1461  for(int e = 0; e < MDTEST_TREE_CREATE_NUM; e++){
1462  if(cur->items[e] == 0){
1463  cpos += sprintf(cpos, ",,");
1464  }else{
1465  cpos += sprintf(cpos, ",%.10e,%.10e", cur->items[e] / cur->time_before_barrier[e], cur->time_before_barrier[e]);
1466  }
1467  }
1468  cpos += sprintf(cpos, "\n");
1469  ret = fwrite(buff, cpos - buff, 1, fd);
1470  if(ret != 1){
1471  WARN("Couln't append to saveRankPerformanceDetailsCSV file\n");
1472  break;
1473  }
1474  }
1475  }
1476  fclose(fd);
1477  free(results);
1478  }else{
1479  /* this is a hack for now assuming all datatypes in the structure are double */
1480  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1481  }
1482 }
1483 
1484 static mdtest_results_t* get_result_index(mdtest_results_t* all_results, int proc, int iter, int interation_count){
1485  return & all_results[proc * interation_count + iter];
1486 }
1487 
1488 static void summarize_results_rank0(int iterations, mdtest_results_t * all_results, int print_time) {
1489  int start, stop;
1490  double min, max, mean, sd, sum, var, curr = 0;
1491  double imin, imax, imean, isum, icur; // calculation per iteration
1492  char const * access;
1493  /* if files only access, skip entries 0-3 (the dir tests) */
1494  if (o.files_only && ! o.dirs_only) {
1495  start = MDTEST_FILE_CREATE_NUM;
1496  } else {
1497  start = 0;
1498  }
1499 
1500  /* if directories only access, skip entries 4-7 (the file tests) */
1501  if (o.dirs_only && !o.files_only) {
1502  stop = MDTEST_FILE_CREATE_NUM;
1503  } else {
1504  stop = MDTEST_TREE_CREATE_NUM;
1505  }
1506 
1507  /* special case: if no directory or file tests, skip all */
1508  if (!o.dirs_only && !o.files_only) {
1509  start = stop = 0;
1510  }
1511 
1512  if(o.print_all_proc){
1513  fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate");
1514  for (int j = 0; j < iterations; j++) {
1515  fprintf(out_logfile, "iteration: %d\n", j);
1516  for (int i = start; i < MDTEST_LAST_NUM; i++) {
1517  access = mdtest_test_name(i);
1518  if(access == NULL){
1519  continue;
1520  }
1521  fprintf(out_logfile, "Test %s", access);
1522  for (int k=0; k < o.size; k++) {
1523  mdtest_results_t * cur = get_result_index(all_results, k, j, iterations);
1524  if(print_time){
1525  curr = cur->time_before_barrier[i];
1526  }else{
1527  curr = cur->rate_before_barrier[i];
1528  }
1529  fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr);
1530  }
1531  fprintf(out_logfile, "\n");
1532  }
1533  }
1534  }
1535 
1536  VERBOSE(0, -1, "\nSUMMARY %s (in ops/sec): (of %d iterations)", print_time ? "time" : "rate", iterations);
1537  PRINT(" Operation ");
1539  PRINT("per Rank: Max Min Mean per Iteration:");
1540  }else{
1541  PRINT(" ");
1542  }
1543  PRINT(" Max Min Mean Std Dev\n");
1544  PRINT(" --------- ");
1545 
1547  PRINT(" --- --- ---- ");
1548  }
1549  PRINT(" --- --- ---- -------\n");
1550  for (int i = start; i < stop; i++) {
1551  min = 1e308;
1552  max = 0;
1553  sum = var = 0;
1554  imin = 1e308;
1555  isum = imax = 0;
1556  double iter_result[iterations];
1557  for (int j = 0; j < iterations; j++) {
1558  icur = print_time ? 0 : 1e308;
1559  for (int k = 0; k < o.size; k++) {
1560  mdtest_results_t * cur = get_result_index(all_results, k, j, iterations);
1561  if(print_time){
1562  curr = cur->time_before_barrier[i];
1563  }else{
1564  curr = cur->rate_before_barrier[i];
1565  }
1566  if (min > curr) {
1567  min = curr;
1568  }
1569  if (max < curr) {
1570  max = curr;
1571  }
1572  sum += curr;
1573 
1574  if (print_time) {
1575  curr = cur->time[i];
1576  if (icur < curr) {
1577  icur = curr;
1578  }
1579  } else {
1580  curr = cur->rate[i];
1581  if (icur > curr) {
1582  icur = curr;
1583  }
1584  }
1585  }
1586 
1587  if (icur > imax) {
1588  imax = icur;
1589  }
1590  if (icur < imin) {
1591  imin = icur;
1592  }
1593  isum += icur;
1594  if(print_time){
1595  iter_result[j] = icur;
1596  }else{
1597  iter_result[j] = icur * o.size;
1598  }
1599  }
1600  mean = sum / iterations / o.size;
1601  imean = isum / iterations;
1602  if(! print_time){
1603  imax *= o.size;
1604  imin *= o.size;
1605  isum *= o.size;
1606  imean *= o.size;
1607  }
1608  for (int j = 0; j < iterations; j++) {
1609  var += (imean - iter_result[j]) * (imean - iter_result[j]);
1610  }
1611  var = var / (iterations - 1);
1612  sd = sqrt(var);
1613  access = mdtest_test_name(i);
1614  if (i != 2) {
1615  fprintf(out_logfile, " %-18s ", access);
1616 
1618  fprintf(out_logfile, "%14.3f ", max);
1619  fprintf(out_logfile, "%14.3f ", min);
1620  fprintf(out_logfile, "%14.3f ", mean);
1621  fprintf(out_logfile, " ");
1622  }
1623  fprintf(out_logfile, " ");
1624  fprintf(out_logfile, "%14.3f ", imax);
1625  fprintf(out_logfile, "%14.3f ", imin);
1626  fprintf(out_logfile, "%14.3f ", imean);
1627  fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd);
1628  fflush(out_logfile);
1629  }
1630  }
1631 
1632  /* calculate tree create/remove rates, applies only to Rank 0 */
1633  for (int i = MDTEST_TREE_CREATE_NUM; i < MDTEST_LAST_NUM; i++) {
1634  min = imin = 1e308;
1635  max = imax = 0;
1636  sum = var = 0;
1637  for (int j = 0; j < iterations; j++) {
1638  if(print_time){
1639  curr = o.summary_table[j].time[i];
1640  }else{
1641  curr = o.summary_table[j].rate[i];
1642  }
1643  if (min > curr) {
1644  min = curr;
1645  }
1646  if (max < curr) {
1647  max = curr;
1648  }
1649  sum += curr;
1650  if(curr > imax){
1651  imax = curr;
1652  }
1653  if(curr < imin){
1654  imin = curr;
1655  }
1656  }
1657 
1658  mean = sum / (iterations);
1659 
1660  for (int j = 0; j < iterations; j++) {
1661  if(print_time){
1662  curr = o.summary_table[j].time[i];
1663  }else{
1664  curr = o.summary_table[j].rate[i];
1665  }
1666  var += (mean - curr)*(mean - curr);
1667  }
1668  var = var / (iterations - 1);
1669  sd = sqrt(var);
1670  access = mdtest_test_name(i);
1671  fprintf(out_logfile, " %-22s ", access);
1673  fprintf(out_logfile, "%14.3f ", max);
1674  fprintf(out_logfile, "%14.3f ", min);
1675  fprintf(out_logfile, "%14.3f ", mean);
1676  fprintf(out_logfile, " ");
1677  }
1678  fprintf(out_logfile, "%14.3f ", imax);
1679  fprintf(out_logfile, "%14.3f ", imin);
1680  fprintf(out_logfile, "%14.3f ", sum / iterations);
1681  fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd);
1682  fflush(out_logfile);
1683  }
1684 }
1685 
1686 /*
1687  Output the results and summarize them into rank 0's o.summary_table
1688  */
1689 void summarize_results(int iterations, mdtest_results_t * results) {
1690  const size_t size = sizeof(mdtest_results_t) * iterations;
1691  mdtest_results_t * all_results = NULL;
1692  if(rank == 0){
1693  all_results = safeMalloc(size * o.size);
1694  memset(all_results, 0, size * o.size);
1695  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, all_results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1696  // calculate the aggregated values for all processes
1697  for(int j=0; j < iterations; j++){
1698  for(int i=0; i < MDTEST_LAST_NUM; i++){
1699  //double sum_rate = 0;
1700  double max_time = 0;
1701  double max_stonewall_time = 0;
1702  uint64_t sum_items = 0;
1703 
1704  // reduce over the processes
1705  for(int p=0; p < o.size; p++){
1706  mdtest_results_t * cur = get_result_index(all_results, p, j, iterations);
1707  //sum_rate += all_results[p + j*p]->rate[i];
1708  double t = cur->time[i];
1709  max_time = max_time < t ? t : max_time;
1710 
1711  sum_items += cur->items[i];
1712 
1713  t = cur->stonewall_time[i];
1714  max_stonewall_time = max_stonewall_time < t ? t : max_stonewall_time;
1715  }
1716 
1717  results[j].items[i] = sum_items;
1718  results[j].time[i] = max_time;
1719  results[j].stonewall_time[i] = max_stonewall_time;
1720  if(sum_items == 0){
1721  results[j].rate[i] = 0.0;
1722  }else{
1723  results[j].rate[i] = sum_items / max_time;
1724  }
1725 
1726  /* These results have already been reduced to Rank 0 */
1727  results[j].stonewall_item_sum[i] = o.summary_table[j].stonewall_item_sum[i];
1728  results[j].stonewall_item_min[i] = o.summary_table[j].stonewall_item_min[i];
1729  results[j].stonewall_time[i] = o.summary_table[j].stonewall_time[i];
1730  }
1731  }
1732  }else{
1733  MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1734  }
1735 
1736  /* share global results across processes as these are returned by the API */
1737  MPI_Bcast(results, size / sizeof(double), MPI_DOUBLE, 0, testComm);
1738 
1739  /* update relevant result values with local values as these are returned by the API */
1740  for(int j=0; j < iterations; j++){
1741  for(int i=0; i < MDTEST_LAST_NUM; i++){
1742  results[j].time_before_barrier[i] = o.summary_table[j].time_before_barrier[i];
1743  results[j].stonewall_last_item[i] = o.summary_table[j].stonewall_last_item[i];
1744  }
1745  }
1746 
1747  if(rank != 0){
1748  return;
1749  }
1750 
1751  if (o.print_rate_and_time){
1752  summarize_results_rank0(iterations, all_results, 0);
1753  summarize_results_rank0(iterations, all_results, 1);
1754  }else{
1755  summarize_results_rank0(iterations, all_results, o.print_time);
1756  }
1757 
1758  free(all_results);
1759 }
1760 
1761 /* Checks to see if the test setup is valid. If it isn't, fail. */
1763 
1764  if (((o.stone_wall_timer_seconds > 0) && (o.branch_factor > 1)) || ! o.barriers) {
1765  FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", o.branch_factor);
1766  }
1767 
1768  if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only && !o.rename_dirs) {
1769  o.create_only = o.stat_only = o.read_only = o.remove_only = o.rename_dirs = 1;
1770  VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" );
1771  }
1772 
1773  VERBOSE(1,-1,"Entering md_validate_tests..." );
1774 
1775  /* if dirs_only and files_only were both left unset, set both now */
1776  if (!o.dirs_only && !o.files_only) {
1777  o.dirs_only = o.files_only = 1;
1778  }
1779 
1780  /* if shared file 'S' access, no directory tests */
1781  if (o.shared_file) {
1782  o.dirs_only = 0;
1783  }
1784 
1785  /* check for no barriers with shifting processes for different phases.
1786  that is, one may not specify both -B and -N as it will introduce
1787  race conditions that may cause errors stat'ing or deleting after
1788  creates.
1789  */
1790  if (( o.barriers == 0 ) && ( o.nstride != 0 ) && ( rank == 0 )) {
1791  FAIL( "Possible race conditions will occur: -B not compatible with -N");
1792  }
1793 
1794  /* check for collective_creates incompatibilities */
1795  if (o.shared_file && o.collective_creates && rank == 0) {
1796  FAIL("-c not compatible with -S");
1797  }
1798  if (o.path_count > 1 && o.collective_creates && rank == 0) {
1799  FAIL("-c not compatible with multiple test directories");
1800  }
1801  if (o.collective_creates && !o.barriers) {
1802  FAIL("-c not compatible with -B");
1803  }
1804 
1805  /* check for shared file incompatibilities */
1806  if (o.unique_dir_per_task && o.shared_file && rank == 0) {
1807  FAIL("-u not compatible with -S");
1808  }
1809 
1810  /* check multiple directory paths and strided option */
1811  if (o.path_count > 1 && o.nstride > 0) {
1812  FAIL("cannot have multiple directory paths with -N strides between neighbor tasks");
1813  }
1814 
1815  /* check for shared directory and multiple directories incompatibility */
1816  if (o.path_count > 1 && o.unique_dir_per_task != 1) {
1817  FAIL("shared directory mode is not compatible with multiple directory paths");
1818  }
1819 
1820  /* check if more directory paths than ranks */
1821  if (o.path_count > o.size) {
1822  FAIL("cannot have more directory paths than MPI tasks");
1823  }
1824 
1825  /* check depth */
1826  if (o.depth < 0) {
1827  FAIL("depth must be greater than or equal to zero");
1828  }
1829  /* check branch_factor */
1830  if (o.branch_factor < 1 && o.depth > 0) {
1831  FAIL("branch factor must be greater than or equal to zero");
1832  }
1833  /* check for valid number of items */
1834  if ((o.items > 0) && (o.items_per_dir > 0)) {
1835  if(o.unique_dir_per_task){
1836  FAIL("only specify the number of items or the number of items per directory");
1837  }else if( o.items % o.items_per_dir != 0){
1838  FAIL("items must be a multiple of items per directory");
1839  }
1840  }
1841  /* check for using mknod */
1842  if (o.write_bytes > 0 && o.make_node) {
1843  FAIL("-k not compatible with -w");
1844  }
1845 
1846  if(o.verify_read && ! o.read_only)
1847  FAIL("Verify read requires that the read test is used");
1848 
1849  if(o.verify_read && o.read_bytes <= 0)
1850  FAIL("Verify read requires that read bytes is > 0");
1851 
1852  if(o.read_only && o.read_bytes <= 0)
1853  WARN("Read bytes is 0, thus, a read test will actually just open/close");
1854 
1855  if(o.create_only && o.read_only && o.read_bytes > o.write_bytes)
1856  FAIL("When writing and reading files, read bytes must be smaller than write bytes");
1857 
1858  if (rank == 0 && o.saveRankDetailsCSV){
1859  // check that the file is writeable, truncate it and add header
1860  FILE* fd = fopen(o.saveRankDetailsCSV, "w");
1861  if (fd == NULL){
1862  FAIL("Cannot open saveRankPerformanceDetails file for write!");
1863  }
1864  char * head = "rank,items";
1865  int ret = fwrite(head, strlen(head), 1, fd);
1866  for(int e = 0; e < MDTEST_LAST_NUM; e++){
1867  char buf[1024];
1868  const char * str = mdtest_test_name(e);
1869 
1870  sprintf(buf, ",rate-%s,time-%s", str, str);
1871  ret = fwrite(buf, strlen(buf), 1, fd);
1872  if(ret != 1){
1873  FAIL("Cannot write header to saveRankPerformanceDetails file");
1874  }
1875  }
1876  fwrite("\n", 1, 1, fd);
1877  fclose(fd);
1878  }
1879 }
1880 
1881 void show_file_system_size(char *file_system) {
1882  char real_path[MAX_PATHLEN];
1883  char file_system_unit_str[MAX_PATHLEN] = "GiB";
1884  char inode_unit_str[MAX_PATHLEN] = "Mi";
1885  int64_t file_system_unit_val = 1024 * 1024 * 1024;
1886  int64_t inode_unit_val = 1024 * 1024;
1887  int64_t total_file_system_size,
1888  free_file_system_size,
1889  total_inodes,
1890  free_inodes;
1891  double total_file_system_size_hr,
1892  used_file_system_percentage,
1893  used_inode_percentage;
1894  ior_aiori_statfs_t stat_buf;
1895  int ret;
1896 
1897  VERBOSE(1,-1,"Entering show_file_system_size on %s", file_system );
1898 
1899  ret = o.backend->statfs (file_system, &stat_buf, o.backend_options);
1900  if (0 != ret) {
1901  FAIL("unable to stat file system %s", file_system);
1902  }
1903 
1904  total_file_system_size = stat_buf.f_blocks * stat_buf.f_bsize;
1905  free_file_system_size = stat_buf.f_bfree * stat_buf.f_bsize;
1906 
1907  used_file_system_percentage = (1 - ((double)free_file_system_size
1908  / (double)total_file_system_size)) * 100;
1909  total_file_system_size_hr = (double)total_file_system_size
1910  / (double)file_system_unit_val;
1911  if (total_file_system_size_hr > 1024) {
1912  total_file_system_size_hr = total_file_system_size_hr / 1024;
1913  strcpy(file_system_unit_str, "TiB");
1914  }
1915 
1916  /* inodes */
1917  total_inodes = stat_buf.f_files;
1918  free_inodes = stat_buf.f_ffree;
1919 
1920  used_inode_percentage = (1 - ((double)free_inodes/(double)total_inodes))
1921  * 100;
1922 
1923  if (realpath(file_system, real_path) == NULL) {
1924  WARN("unable to use realpath() on file system");
1925  }
1926 
1927 
1928  /* show results */
1929  VERBOSE(0,-1,"Path: %s", real_path);
1930  VERBOSE(0,-1,"FS: %.1f %s Used FS: %2.1f%% Inodes: %.1f %s Used Inodes: %2.1f%%\n",
1931  total_file_system_size_hr, file_system_unit_str, used_file_system_percentage,
1932  (double)total_inodes / (double)inode_unit_val, inode_unit_str, used_inode_percentage);
1933 
1934  return;
1935 }
1936 
1938  int currDepth, char* path, int dirNum, rank_progress_t * progress) {
1939 
1940  unsigned i;
1941  char dir[MAX_PATHLEN];
1942 
1943 
1944  VERBOSE(1,5,"Entering create_remove_directory_tree on %s, currDepth = %d...", path, currDepth );
1945 
1946  if (currDepth == 0) {
1947  sprintf(dir, "%s/%s.%d/", path, o.base_tree_name, dirNum);
1948 
1949  if (create) {
1950  VERBOSE(2,5,"Making directory '%s'", dir);
1951  if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) {
1952  WARNF("unable to create tree directory '%s'", dir);
1953  }
1954 #ifdef HAVE_LUSTRE_LUSTREAPI
1955  /* internal node for branching, can be non-striped for children */
1956  if (o.global_dir_layout && \
1957  llapi_dir_set_default_lmv_stripe(dir, -1, 0,
1958  LMV_HASH_TYPE_FNV_1A_64,
1959  NULL) == -1) {
1960  FAIL("Unable to reset to global default directory layout");
1961  }
1962 #endif /* HAVE_LUSTRE_LUSTREAPI */
1963  }
1964 
1965  create_remove_directory_tree(create, ++currDepth, dir, ++dirNum, progress);
1966 
1967  if (!create) {
1968  VERBOSE(2,5,"Remove directory '%s'", dir);
1969  if (-1 == o.backend->rmdir(dir, o.backend_options)) {
1970  WARNF("Unable to remove directory %s", dir);
1971  }
1972  }
1973  } else if (currDepth <= o.depth) {
1974 
1975  char temp_path[MAX_PATHLEN];
1976  strcpy(temp_path, path);
1977  int currDir = dirNum;
1978 
1979  for (i=0; i < o.branch_factor; i++) {
1980  sprintf(dir, "%s.%d/", o.base_tree_name, currDir);
1981  strcat(temp_path, dir);
1982 
1983  if (create) {
1984  VERBOSE(2,5,"Making directory '%s'", temp_path);
1985  if (-1 == o.backend->mkdir(temp_path, DIRMODE, o.backend_options)) {
1986  WARNF("Unable to create directory %s", temp_path);
1987  }
1988  }
1989 
1990  create_remove_directory_tree(create, ++currDepth,
1991  temp_path, (o.branch_factor*currDir)+1, progress);
1992  currDepth--;
1993 
1994  if (!create) {
1995  VERBOSE(2,5,"Remove directory '%s'", temp_path);
1996  if (-1 == o.backend->rmdir(temp_path, o.backend_options)) {
1997  WARNF("Unable to remove directory %s", temp_path);
1998  }
1999  }
2000 
2001  strcpy(temp_path, path);
2002  currDir++;
2003  }
2004  }
2005 }
2006 
2007 static void mdtest_iteration(int i, int j, mdtest_results_t * summary_table){
2008  rank_progress_t progress_o;
2009  memset(& progress_o, 0 , sizeof(progress_o));
2010  progress_o.stone_wall_timer_seconds = 0;
2011  progress_o.items_per_dir = o.items_per_dir;
2012  rank_progress_t * progress = & progress_o;
2013 
2014  /* start and end times of directory tree create/remove */
2015  double startCreate, endCreate;
2016  int k;
2017 
2018  VERBOSE(1,-1,"main: * iteration %d *", j+1);
2019 
2020  if(o.create_only){
2021  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2022  if (rank >= o.path_count) {
2023  continue;
2024  }
2025  prep_testdir(j, dir_iter);
2026 
2027  VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir );
2028  if (o.backend->access(o.testdir, F_OK, o.backend_options) != 0) {
2029  if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) {
2030  WARNF("Unable to create test directory %s", o.testdir);
2031  }
2032 #ifdef HAVE_LUSTRE_LUSTREAPI
2033  /* internal node for branching, can be non-striped for children */
2034  if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) {
2035  WARN("Unable to reset to global default directory layout");
2036  }
2037 #endif /* HAVE_LUSTRE_LUSTREAPI */
2038  }
2039  }
2040 
2041  /* create hierarchical directory structure */
2042  MPI_Barrier(testComm);
2043 
2044  startCreate = GetTimeStamp();
2045  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2046  prep_testdir(j, dir_iter);
2047 
2048  if (o.unique_dir_per_task) {
2049  if (o.collective_creates && (rank == 0)) {
2050  /*
2051  * This is inside two loops, one of which already uses "i" and the other uses "j".
2052  * I don't know how this ever worked. I'm changing this loop to use "k".
2053  */
2054  for (k=0; k < o.size; k++) {
2055  sprintf(o.base_tree_name, "mdtest_tree.%d", k);
2056 
2057  VERBOSE(3,5,"main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir );
2058  /*
2059  * Let's pass in the path to the directory we most recently made so that we can use
2060  * full paths in the other calls.
2061  */
2062  create_remove_directory_tree(1, 0, o.testdir, 0, progress);
2063  if(CHECK_STONE_WALL(progress)){
2064  o.size = k;
2065  break;
2066  }
2067  }
2068  } else if (! o.collective_creates) {
2069  VERBOSE(3,5,"main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '%s'", o.testdir );
2070  /*
2071  * Let's pass in the path to the directory we most recently made so that we can use
2072  * full paths in the other calls.
2073  */
2074  create_remove_directory_tree(1, 0, o.testdir, 0, progress);
2075  }
2076  } else {
2077  if (rank == 0) {
2078  VERBOSE(3,5,"main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir );
2079 
2080  /*
2081  * Let's pass in the path to the directory we most recently made so that we can use
2082  * full paths in the other calls.
2083  */
2084  create_remove_directory_tree(1, 0 , o.testdir, 0, progress);
2085  }
2086  }
2087  }
2088  MPI_Barrier(testComm);
2089  endCreate = GetTimeStamp();
2090  summary_table->rate[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate);
2091  summary_table->time[MDTEST_TREE_CREATE_NUM] = (endCreate - startCreate);
2092  summary_table->items[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree;
2094  VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_CREATE_NUM]);
2095  }
2096 
2097  sprintf(o.unique_mk_dir, "%s.0", o.base_tree_name);
2098  sprintf(o.unique_chdir_dir, "%s.0", o.base_tree_name);
2099  sprintf(o.unique_stat_dir, "%s.0", o.base_tree_name);
2100  sprintf(o.unique_read_dir, "%s.0", o.base_tree_name);
2101  sprintf(o.unique_rm_dir, "%s.0", o.base_tree_name);
2102  o.unique_rm_uni_dir[0] = 0;
2103 
2104  if (! o.unique_dir_per_task) {
2105  VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", o.unique_mk_dir );
2106  }
2107 
2108  if (rank < i) {
2109  if (! o.shared_file) {
2110  sprintf(o.mk_name, "mdtest.%d.", (rank+(0*o.nstride))%i);
2111  sprintf(o.stat_name, "mdtest.%d.", (rank+(1*o.nstride))%i);
2112  sprintf(o.read_name, "mdtest.%d.", (rank+(2*o.nstride))%i);
2113  sprintf(o.rm_name, "mdtest.%d.", (rank+(3*o.nstride))%i);
2114  }
2115  if (o.unique_dir_per_task) {
2116  VERBOSE(3,5,"i %d nstride %d", i, o.nstride);
2117  sprintf(o.unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*o.nstride))%i);
2118  sprintf(o.unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*o.nstride))%i);
2119  sprintf(o.unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*o.nstride))%i);
2120  sprintf(o.unique_read_dir, "mdtest_tree.%d.0", (rank+(3*o.nstride))%i);
2121  sprintf(o.unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*o.nstride))%i);
2122  o.unique_rm_uni_dir[0] = 0;
2123  VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", o.unique_mk_dir, o.unique_chdir_dir, o.unique_stat_dir, o.unique_read_dir, o.unique_rm_dir);
2124  }
2125 
2126  VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", o.unique_mk_dir );
2127 
2128  if (o.dirs_only && ! o.shared_file) {
2129  if (o.pre_delay) {
2130  DelaySecs(o.pre_delay);
2131  }
2132  directory_test(j, i, o.unique_mk_dir, progress);
2133  }
2134  if (o.files_only) {
2135  if (o.pre_delay) {
2136  DelaySecs(o.pre_delay);
2137  }
2138  VERBOSE(3,5,"will file_test on %s", o.unique_mk_dir);
2139 
2140  file_test(j, i, o.unique_mk_dir, progress);
2141  }
2142  }
2143 
2144  /* remove directory structure */
2145  if (! o.unique_dir_per_task) {
2146  VERBOSE(3,-1,"main: Using o.testdir, '%s'", o.testdir );
2147  }
2148 
2149  MPI_Barrier(testComm);
2150  if (o.remove_only) {
2151  progress->items_start = 0;
2152  startCreate = GetTimeStamp();
2153  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2154  prep_testdir(j, dir_iter);
2155  if (o.unique_dir_per_task) {
2156  if (o.collective_creates && (rank == 0)) {
2157  /*
2158  * This is inside two loops, one of which already uses "i" and the other uses "j".
2159  * I don't know how this ever worked. I'm changing this loop to use "k".
2160  */
2161  for (k=0; k < o.size; k++) {
2162  sprintf(o.base_tree_name, "mdtest_tree.%d", k);
2163 
2164  VERBOSE(3,-1,"main (remove hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir );
2165 
2166  /*
2167  * Let's pass in the path to the directory we most recently made so that we can use
2168  * full paths in the other calls.
2169  */
2170  create_remove_directory_tree(0, 0, o.testdir, 0, progress);
2171  if(CHECK_STONE_WALL(progress)){
2172  o.size = k;
2173  break;
2174  }
2175  }
2176  } else if (! o.collective_creates) {
2177  VERBOSE(3,-1,"main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '%s'", o.testdir );
2178 
2179  /*
2180  * Let's pass in the path to the directory we most recently made so that we can use
2181  * full paths in the other calls.
2182  */
2183  create_remove_directory_tree(0, 0, o.testdir, 0, progress);
2184  }
2185  } else {
2186  if (rank == 0) {
2187  VERBOSE(3,-1,"V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir );
2188 
2189  /*
2190  * Let's pass in the path to the directory we most recently made so that we can use
2191  * full paths in the other calls.
2192  */
2193  create_remove_directory_tree(0, 0 , o.testdir, 0, progress);
2194  }
2195  }
2196  }
2197 
2198  MPI_Barrier(testComm);
2199  endCreate = GetTimeStamp();
2200  summary_table->rate[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate);
2201  summary_table->time[MDTEST_TREE_REMOVE_NUM] = endCreate - startCreate;
2202  summary_table->items[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree;
2204  VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_REMOVE_NUM]);
2205  VERBOSE(2,-1,"main (at end of for j loop): Removing o.testdir of '%s'\n", o.testdir );
2206 
2207  for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){
2208  prep_testdir(j, dir_iter);
2209  if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) == 0) {
2210  //if (( rank == 0 ) && access(o.testdir, F_OK) == 0) {
2211  if (o.backend->rmdir(o.testdir, o.backend_options) == -1) {
2212  WARNF("unable to remove directory %s", o.testdir);
2213  }
2214  }
2215  }
2216  } else {
2217  summary_table->rate[MDTEST_TREE_REMOVE_NUM] = 0;
2218  }
2219 }
2220 
2222  o = (mdtest_options_t) {
2223  .barriers = 1,
2224  .branch_factor = 1,
2225  .random_buffer_offset = -1,
2226  .prologue = "",
2227  .epilogue = "",
2228  .gpuID = -1,
2229  };
2230 }
2231 
2232 mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) {
2233  testComm = world_com;
2234  out_logfile = world_out;
2235  out_resultfile = world_out;
2236 
2237  init_clock(world_com);
2238 
2239  mdtest_init_args();
2240  int i, j;
2241  int numNodes;
2242  int numTasksOnNode0 = 0;
2243  MPI_Group worldgroup;
2244  struct {
2245  int first;
2246  int last;
2247  int stride;
2248  } range = {0, 0, 1};
2249  int first = 1;
2250  int last = 0;
2251  int stride = 1;
2252  int iterations = 1;
2253  int created_root_dir = 0; // was the root directory existing or newly created
2254 
2255  verbose = 0;
2256  int no_barriers = 0;
2257  char * path = "./out";
2258  int randomize = 0;
2259  char APIs[1024];
2260  char APIs_legacy[1024];
2261  aiori_supported_apis(APIs, APIs_legacy, MDTEST);
2262  char apiStr[1024];
2263  sprintf(apiStr, "API for I/O [%s]", APIs);
2264  memset(& o.hints, 0, sizeof(o.hints));
2265 
2266  char * packetType = "t";
2267 
2268  option_help options [] = {
2269  {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & o.api},
2270  {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.branch_factor},
2271  {'d', NULL, "directory or multiple directories where the test will run [dir|dir1@dir2@dir3...]", OPTION_OPTIONAL_ARGUMENT, 's', & path},
2272  {'B', NULL, "no barriers between phases", OPTION_OPTIONAL_ARGUMENT, 'd', & no_barriers},
2273  {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & o.create_only},
2274  {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & o.stat_only},
2275  {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & o.read_only},
2276  {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & o.remove_only},
2277  {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & o.dirs_only},
2278  {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & o.read_bytes},
2279  {'f', NULL, "first number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & first},
2280  {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & o.files_only},
2281 #ifdef HAVE_LUSTRE_LUSTREAPI
2282  {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout},
2283 #endif /* HAVE_LUSTRE_LUSTREAPI */
2284  {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset},
2285  {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations},
2286  {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir},
2287  {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node},
2288  {'l', NULL, "last number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & last},
2289  {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & o.leaf_only},
2290  {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items},
2291  {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.nstride},
2292  {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.pre_delay},
2293  {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & o.print_rate_and_time},
2294  {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & o.print_all_proc},
2295  {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize},
2296  {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed},
2297  {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride},
2298  {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & o.shared_file},
2299  {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & o.collective_creates},
2300  {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & o.time_unique_dir_overhead},
2301  {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & o.unique_dir_per_task},
2302  {'v', NULL, "verbosity (each instance of option increments by one)", OPTION_FLAG, 'd', & verbose},
2303  {'V', NULL, "verbosity value", OPTION_OPTIONAL_ARGUMENT, 'd', & verbose},
2304  {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & o.write_bytes},
2305  {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stone_wall_timer_seconds},
2306  {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & o.stoneWallingStatusFile},
2307  {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & o.verify_read},
2308  {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & o.verify_write},
2309  {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & o.sync_file},
2310  {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync},
2311  {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth},
2312  {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time},
2313  {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|random|o|i|t|r]", OPTION_OPTIONAL_ARGUMENT, 's', & packetType},
2314  {0, "run-cmd-before-phase", "call this external command before each phase (excluded from the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.prologue},
2315  {0, "run-cmd-after-phase", "call this external command after each phase (included in the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.epilogue},
2316 #ifdef HAVE_CUDA
2317  {0, "allocateBufferOnGPU", "Allocate I/O buffers on the GPU: X=1 uses managed memory - verifications are run on CPU; X=2 managed memory - verifications on GPU; X=3 device memory with verifications on GPU.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.gpuMemoryFlags},
2318  {0, "GPUid", "Select the GPU to use, use -1 for round-robin among local procs.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.gpuID},
2319 #ifdef HAVE_GPU_DIRECT
2320  {0, "gpuDirect", "Allocate I/O buffers on the GPU and use gpuDirect to store data; this option is incompatible with any option requiring CPU access to data.", OPTION_FLAG, 'd', & o.gpuDirect},
2321 #endif
2322 #endif
2323  {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
2324  {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
2325  {0, "showRankStatistics", "Include statistics per rank", OPTION_FLAG, 'd', & o.show_perrank_statistics},
2326 
2327  LAST_OPTION
2328  };
2330  option_parse(argc, argv, global_options);
2331  o.backend = aiori_select(o.api);
2332  if (o.backend == NULL)
2333  ERR("Unrecognized I/O API");
2334  if (! o.backend->enable_mdtest)
2335  ERR("Backend doesn't support MDTest");
2336  o.backend_options = airoi_update_module_options(o.backend, global_options);
2337 
2338  free(global_options->modules);
2339  free(global_options);
2340 
2341  o.dataPacketType = parsePacketType(packetType[0]);
2342 
2343  MPI_Comm_rank(testComm, &rank);
2344  MPI_Comm_size(testComm, &o.size);
2345 
2346  if(o.backend->xfer_hints){
2347  o.backend->xfer_hints(& o.hints);
2348  }
2349  if(o.backend->check_params){
2351  }
2352  if (o.backend->initialize){
2354  }
2355 
2356  o.pid = getpid();
2357  o.uid = getuid();
2358 
2359  numNodes = GetNumNodes(testComm);
2360  numTasksOnNode0 = GetNumTasksOnNode0(testComm);
2361 
2362  char cmd_buffer[4096];
2363  strncpy(cmd_buffer, argv[0], 4096);
2364  for (i = 1; i < argc; i++) {
2365  snprintf(&cmd_buffer[strlen(cmd_buffer)], 4096-strlen(cmd_buffer), " '%s'", argv[i]);
2366  }
2367 
2368  VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
2369  VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes);
2370  VERBOSE(0,-1,"Command line used: %s", cmd_buffer);
2371 
2372  /* adjust special variables */
2373  o.barriers = ! no_barriers;
2374  if (path != NULL){
2375  parse_dirpath(path);
2376  }
2377  if( randomize > 0 ){
2378  if (o.random_seed == 0) {
2379  /* Ensure all procs have the same random number */
2380  o.random_seed = time(NULL);
2381  MPI_Barrier(testComm);
2382  MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, testComm);
2383  }
2384  o.random_seed += rank;
2385  }
2386  if( o.random_buffer_offset == -1 ){
2387  o.random_buffer_offset = time(NULL);
2388  MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, testComm);
2389  }
2390  if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) {
2392  }else{
2393  o.directory_loops = 1;
2394  }
2396  // option_print_current(options);
2397  VERBOSE(1,-1, "api : %s", o.api);
2398  VERBOSE(1,-1, "barriers : %s", ( o.barriers ? "True" : "False" ));
2399  VERBOSE(1,-1, "collective_creates : %s", ( o.collective_creates ? "True" : "False" ));
2400  VERBOSE(1,-1, "create_only : %s", ( o.create_only ? "True" : "False" ));
2401  VERBOSE(1,-1, "dirpath(s):" );
2402  for ( i = 0; i < o.path_count; i++ ) {
2403  VERBOSE(1,-1, "\t%s", o.filenames[i] );
2404  }
2405  VERBOSE(1,-1, "dirs_only : %s", ( o.dirs_only ? "True" : "False" ));
2406  VERBOSE(1,-1, "read_bytes : "LLU"", o.read_bytes );
2407  VERBOSE(1,-1, "read_only : %s", ( o.read_only ? "True" : "False" ));
2408  VERBOSE(1,-1, "first : %d", first );
2409  VERBOSE(1,-1, "files_only : %s", ( o.files_only ? "True" : "False" ));
2410 #ifdef HAVE_LUSTRE_LUSTREAPI
2411  VERBOSE(1,-1, "global_dir_layout : %s", ( o.global_dir_layout ? "True" : "False" ));
2412 #endif /* HAVE_LUSTRE_LUSTREAPI */
2413  VERBOSE(1,-1, "iterations : %d", iterations );
2414  VERBOSE(1,-1, "items_per_dir : "LLU"", o.items_per_dir );
2415  VERBOSE(1,-1, "last : %d", last );
2416  VERBOSE(1,-1, "leaf_only : %s", ( o.leaf_only ? "True" : "False" ));
2417  VERBOSE(1,-1, "items : "LLU"", o.items );
2418  VERBOSE(1,-1, "nstride : %d", o.nstride );
2419  VERBOSE(1,-1, "pre_delay : %d", o.pre_delay );
2420  VERBOSE(1,-1, "remove_only : %s", ( o.leaf_only ? "True" : "False" ));
2421  VERBOSE(1,-1, "random_seed : %d", o.random_seed );
2422  VERBOSE(1,-1, "stride : %d", stride );
2423  VERBOSE(1,-1, "shared_file : %s", ( o.shared_file ? "True" : "False" ));
2424  VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( o.time_unique_dir_overhead ? "True" : "False" ));
2425  VERBOSE(1,-1, "stone_wall_timer_seconds: %d", o.stone_wall_timer_seconds);
2426  VERBOSE(1,-1, "stat_only : %s", ( o.stat_only ? "True" : "False" ));
2427  VERBOSE(1,-1, "unique_dir_per_task : %s", ( o.unique_dir_per_task ? "True" : "False" ));
2428  VERBOSE(1,-1, "write_bytes : "LLU"", o.write_bytes );
2429  VERBOSE(1,-1, "sync_file : %s", ( o.sync_file ? "True" : "False" ));
2430  VERBOSE(1,-1, "call_sync : %s", ( o.call_sync ? "True" : "False" ));
2431  VERBOSE(1,-1, "depth : %d", o.depth );
2432  VERBOSE(1,-1, "make_node : %d", o.make_node );
2433  int tasksBlockMapping = QueryNodeMapping(testComm, true);
2434 
2436  initCUDA(tasksBlockMapping, rank, numNodes, numTasksOnNode0, o.gpuID);
2437  }
2438 
2439  /* setup total number of items and number of items per dir */
2440  if (o.depth <= 0) {
2441  o.num_dirs_in_tree = 1;
2442  } else {
2443  if (o.branch_factor < 1) {
2444  o.num_dirs_in_tree = 1;
2445  } else if (o.branch_factor == 1) {
2446  o.num_dirs_in_tree = o.depth + 1;
2447  } else {
2448  o.num_dirs_in_tree = (pow(o.branch_factor, o.depth+1) - 1) / (o.branch_factor - 1);
2449  }
2450  }
2451  if (o.items_per_dir > 0) {
2452  if(o.items == 0){
2453  if (o.leaf_only) {
2454  o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth);
2455  } else {
2457  }
2458  }else{
2460  }
2461  } else {
2462  if (o.leaf_only) {
2463  if (o.branch_factor <= 1) {
2464  o.items_per_dir = o.items;
2465  } else {
2466  o.items_per_dir = (uint64_t) (o.items / pow(o.branch_factor, o.depth));
2467  o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth);
2468  }
2469  } else {
2472  }
2473  }
2474 
2475  /* initialize rand_array */
2476  if (o.random_seed > 0) {
2477  srand(o.random_seed);
2478 
2479  uint64_t s;
2480 
2481  o.rand_array = (uint64_t *) safeMalloc( o.items * sizeof(*o.rand_array));
2482 
2483  for (s=0; s < o.items; s++) {
2484  o.rand_array[s] = s;
2485  }
2486 
2487  /* shuffle list randomly */
2488  uint64_t n = o.items;
2489  while (n>1) {
2490  n--;
2491 
2492  /*
2493  * Generate a random number in the range 0 .. n
2494  *
2495  * rand() returns a number from 0 .. RAND_MAX. Divide that
2496  * by RAND_MAX and you get a floating point number in the
2497  * range 0 .. 1. Multiply that by n and you get a number in
2498  * the range 0 .. n.
2499  */
2500  uint64_t k = ( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n );
2501 
2502  /*
2503  * Now move the nth element to the kth (randomly chosen)
2504  * element, and the kth element to the nth element.
2505  */
2506 
2507  uint64_t tmp = o.rand_array[k];
2508  o.rand_array[k] = o.rand_array[n];
2509  o.rand_array[n] = tmp;
2510  }
2511  }
2512 
2513  /* allocate and initialize write buffer with # */
2514  if (o.write_bytes > 0) {
2517  }
2518 
2519  /* setup directory path to work in */
2520  if (o.path_count == 0) { /* special case where no directory path provided with '-d' option */
2521  char *ret = getcwd(o.testdirpath, MAX_PATHLEN);
2522  if (ret == NULL) {
2523  FAIL("Unable to get current working directory on %s", o.testdirpath);
2524  }
2525  o.path_count = 1;
2526  } else {
2527  strcpy(o.testdirpath, o.filenames[rank % o.path_count]);
2528  }
2529 
2530  /* if directory does not exist, create it */
2531  if ((rank < o.path_count) && o.backend->access(o.testdirpath, F_OK, o.backend_options) != 0) {
2532  if (o.backend->mkdir(o.testdirpath, DIRMODE, o.backend_options) != 0) {
2533  WARNF("Unable to create test directory path %s", o.testdirpath);
2534  }
2535  created_root_dir = 1;
2536  }
2537 
2538  /* display disk usage */
2539  VERBOSE(3,-1,"main (before display_freespace): o.testdirpath is '%s'", o.testdirpath );
2540 
2542  /* set the shift to mimic IOR and shift by procs per node */
2543  if (o.nstride > 0) {
2544  if ( numNodes > 1 && tasksBlockMapping ) {
2545  /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks
2546  however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */
2547  o.nstride *= numTasksOnNode0;
2548  }
2549  VERBOSE(0,5,"Shifting ranks by %d for each phase.", o.nstride);
2550  }
2551 
2552  VERBOSE(3,-1,"main (after display_freespace): o.testdirpath is '%s'", o.testdirpath );
2553 
2554  if (rank == 0) {
2555  if (o.random_seed > 0) {
2556  VERBOSE(0,-1,"random seed: %d", o.random_seed);
2557  }
2558  }
2559 
2560  if (gethostname(o.hostname, MAX_PATHLEN) == -1) {
2561  perror("gethostname");
2562  MPI_Abort(testComm, 2);
2563  }
2564 
2565  if (last == 0) {
2566  first = o.size;
2567  last = o.size;
2568  }
2569  if(first > last){
2570  FAIL("process number: first > last doesn't make sense");
2571  }
2572  if(last > o.size){
2573  FAIL("process number: last > number of processes doesn't make sense");
2574  }
2575 
2576  /* setup summary table for recording results */
2577  o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t));
2578  memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t));
2579 
2580  if (o.unique_dir_per_task) {
2581  sprintf(o.base_tree_name, "mdtest_tree.%d", rank);
2582  } else {
2583  sprintf(o.base_tree_name, "mdtest_tree");
2584  }
2585 
2586  mdtest_results_t * aggregated_results = safeMalloc(iterations * sizeof(mdtest_results_t));
2587 
2588  /* default use shared directory */
2589  strcpy(o.mk_name, "mdtest.shared.");
2590  strcpy(o.stat_name, "mdtest.shared.");
2591  strcpy(o.read_name, "mdtest.shared.");
2592  strcpy(o.rm_name, "mdtest.shared.");
2593 
2594  MPI_Comm_group(testComm, &worldgroup);
2595 
2596  last = o.size < last ? o.size : last;
2597 
2598  /* Run the tests */
2599  for (i = first; i <= last; i += stride) {
2600  sleep(1);
2601 
2602  if(i < last){
2603  MPI_Group testgroup;
2604  range.last = i - 1;
2605  MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup);
2606  MPI_Comm_create(world_com, testgroup, &testComm);
2607  MPI_Group_free(&testgroup);
2608  if(testComm == MPI_COMM_NULL){
2609  continue;
2610  }
2611  }else{
2612  MPI_Comm_dup(world_com, & testComm);
2613  }
2614  MPI_Comm_size(testComm, &o.size);
2615 
2616  if (rank == 0) {
2617  uint64_t items_all = i * o.items;
2618  if(o.num_dirs_in_tree_calc){
2619  items_all *= o.num_dirs_in_tree_calc;
2620  }
2621  if (o.files_only && o.dirs_only) {
2622  VERBOSE(0,-1,"%d tasks, "LLU" files/directories", i, items_all);
2623  } else if (o.files_only) {
2624  if (! o.shared_file) {
2625  VERBOSE(0,-1,"%d tasks, "LLU" files", i, items_all);
2626  }
2627  else {
2628  VERBOSE(0,-1,"%d tasks, 1 file", i);
2629  }
2630  } else if (o.dirs_only) {
2631  VERBOSE(0,-1,"%d tasks, "LLU" directories", i, items_all);
2632  }
2633  }
2634  VERBOSE(1,-1,"");
2635  VERBOSE(1,-1," Operation Duration Rate");
2636  VERBOSE(1,-1," --------- -------- ----");
2637 
2638  for (j = 0; j < iterations; j++) {
2639  // keep track of the current status for stonewalling
2640  mdtest_iteration(i, j, & o.summary_table[j]);
2641  }
2642  summarize_results(iterations, aggregated_results);
2643  if(o.saveRankDetailsCSV){
2644  StoreRankInformation(iterations, aggregated_results);
2645  }
2646  int total_errors = 0;
2647  MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm);
2648  if(rank == 0 && total_errors){
2649  VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors);
2650  }
2651  aggregated_results->total_errors += total_errors;
2652  MPI_Comm_free(&testComm);
2653  }
2654 
2655  MPI_Group_free(&worldgroup);
2656  testComm = world_com;
2657 
2658  if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) {
2659  FAIL("Unable to remove test directory path %s", o.testdirpath);
2660  }
2661 
2662  VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp());
2663 
2664  if (o.random_seed > 0) {
2665  free(o.rand_array);
2666  }
2667 
2668  if (o.backend->finalize){
2670  }
2671 
2672  if (o.write_bytes > 0) {
2674  }
2675  free(o.summary_table);
2676 
2677  return aggregated_results;
2678 }
char * stoneWallingStatusFile
Definition: mdtest.c:120
void invalidate_buffer_pattern(char *buffer, size_t bytes, ior_memory_flags type)
Definition: utilities.c:172
option_module * modules
Definition: option.h:36
Definition: aiori.h:120
double time_before_barrier[MDTEST_TREE_CREATE_NUM]
Definition: mdtest.h:28
ior_dataPacketType_e dataPacketType
Definition: mdtest.c:160
#define ERRF(FORMAT,...)
Definition: aiori-debug.h:67
uint64_t stonewall_item_sum[MDTEST_LAST_NUM]
Definition: mdtest.h:36
double rate[MDTEST_LAST_NUM]
Definition: mdtest.h:25
char read_name[MAX_PATHLEN]
Definition: mdtest.c:111
#define VERBOSE(root, any,...)
Definition: mdtest.c:223
uint64_t f_blocks
Definition: aiori.h:53
char stat_name[MAX_PATHLEN]
Definition: mdtest.c:110
int directory_loops
Definition: mdtest.c:155
mdtest_test_num_t
Definition: mdtest.h:8
uint64_t items
Definition: mdtest.c:152
void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t *progress, double *t_start)
Definition: mdtest.c:1180
uint64_t f_bfree
Definition: aiori.h:54
int print_all_proc
Definition: mdtest.c:158
char unique_rm_dir[MAX_PATHLEN]
Definition: mdtest.c:117
#define LAST_OPTION
Definition: option.h:39
uint64_t num_dirs_in_tree_calc
Definition: mdtest.c:154
char rm_name[MAX_PATHLEN]
Definition: mdtest.c:112
char unique_rm_uni_dir[MAX_PATHLEN]
Definition: mdtest.c:118
int(* mknod)(char *)
Definition: aiori.h:91
#define open64
Definition: aiori-HDFS.c:93
void * backend_options
Definition: mdtest.c:190
void * airoi_update_module_options(const ior_aiori_t *backend, options_all_t *opt)
Definition: aiori.c:96
CURLcode rc
Definition: aiori-S3-4c.c:111
void VerboseMessage(int root_level, int any_level, int line, char *format,...)
Definition: mdtest.c:224
FILE * out_logfile
Definition: utilities.c:74
int option_parse(int argc, char **argv, options_all_t *opt_all)
Definition: option.c:414
static void create_file(const char *path, uint64_t itemNum)
Definition: mdtest.c:367
void md_validate_tests()
Definition: mdtest.c:1762
int errno
void mdtest_init_args()
Definition: mdtest.c:2221
int time_unique_dir_overhead
Definition: mdtest.c:167
int stone_wall_timer_seconds
Definition: mdtest.c:170
aiori_xfer_hint_t hints
Definition: mdtest.c:191
char mk_name[MAX_PATHLEN]
Definition: mdtest.c:109
int64_t ReadStoneWallingIterations(char *const filename, MPI_Comm com)
Definition: utilities.c:936
mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *world_out)
Definition: mdtest.c:2232
uint64_t stonewall_item_min[MDTEST_LAST_NUM]
Definition: mdtest.h:35
void parse_dirpath(char *dirpath_arg)
Definition: mdtest.c:242
#define DIRMODE
Definition: mdtest.c:94
void(* delete)(char *, aiori_mod_opt_t *module_options)
Definition: aiori.h:100
int(* statfs)(const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:104
int unique_dir_per_task
Definition: mdtest.c:166
uint64_t num_dirs_in_tree
Definition: mdtest.c:146
int(* mkdir)(const char *path, mode_t mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:105
uint64_t items_start
Definition: mdtest.c:204
#define min(a, b)
Definition: md-workbench.c:27
int QueryNodeMapping(MPI_Comm comm, int print_nodemap)
Definition: utilities.c:402
uint64_t items_per_dir
Definition: mdtest.c:153
const char * epilogue
Definition: mdtest.c:182
char testdirpath[MAX_PATHLEN]
Definition: mdtest.c:105
uint64_t f_ffree
Definition: aiori.h:57
#define FAIL(...)
Definition: aiori-debug.h:16
uint64_t * rand_array
Definition: mdtest.c:103
unsigned branch_factor
Definition: mdtest.c:137
static mdtest_options_t o
Definition: mdtest.c:195
char * api
Definition: mdtest.c:192
int(* access)(const char *path, int mode, aiori_mod_opt_t *module_options)
Definition: aiori.h:107
static void prep_testdir(int j, int dir_iter)
Definition: mdtest.c:271
void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t *progress)
Definition: mdtest.c:502
char const * mdtest_test_name(int i)
Definition: mdtest.c:1411
void mdtest_read(int random, int dirs, const long dir_iter, char *path)
Definition: mdtest.c:654
int stone_wall_timer_seconds
Definition: mdtest.c:202
void show_file_system_size(char *file_system)
Definition: mdtest.c:1881
#define WRITE
Definition: iordef.h:100
char unique_stat_dir[MAX_PATHLEN]
Definition: mdtest.c:115
char hostname[MAX_PATHLEN]
Definition: mdtest.c:108
int verify_write
Definition: mdtest.c:132
double start_time
Definition: mdtest.c:200
void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t *progress)
Definition: mdtest.c:851
int(* rmdir)(const char *path, aiori_mod_opt_t *module_options)
Definition: aiori.h:106
int gpfs_createsharing
Definition: aiori-POSIX.h:24
#define READ
Definition: iordef.h:102
void mdtest_stat(const int random, const int dirs, const long dir_iter, const char *path, rank_progress_t *progress)
Definition: mdtest.c:572
char * PrintTimestamp()
Definition: utilities.c:921
#define IOR_CREAT
Definition: aiori.h:32
const ior_aiori_t * aiori_select(const char *api)
Definition: aiori.c:240
char base_tree_name[MAX_PATHLEN]
Definition: mdtest.c:106
void collective_helper(const int dirs, const int create, const char *path, uint64_t itemNum, rank_progress_t *progress)
Definition: mdtest.c:465
void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:1232
const char * prologue
Definition: mdtest.c:181
double rate_before_barrier[MDTEST_LAST_NUM]
Definition: mdtest.h:26
double time[MDTEST_LAST_NUM]
Definition: mdtest.h:27
size_t read_bytes
Definition: mdtest.c:171
ior_dataPacketType_e parsePacketType(char t)
Definition: utilities.c:291
uint64_t f_files
Definition: aiori.h:56
MPI_Comm testComm
Definition: utilities.c:73
static option_help options[]
Definition: aiori-CEPHFS.c:59
int(* check_params)(aiori_mod_opt_t *)
Definition: aiori.h:113
uint64_t f_bsize
Definition: aiori.h:52
char * write_buffer
Definition: mdtest.c:119
uint64_t items[MDTEST_LAST_NUM]
Definition: mdtest.h:29
void init_clock(MPI_Comm com)
Definition: utilities.c:917
void(* initialize)(aiori_mod_opt_t *options)
Definition: aiori.h:109
void collective_create_remove(const int create, const int dirs, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:773
#define WARN(MSG)
Definition: aiori-debug.h:45
static void summarize_results_rank0(int iterations, mdtest_results_t *all_results, int print_time)
Definition: mdtest.c:1488
char unique_mk_dir[MAX_PATHLEN]
Definition: mdtest.c:113
mdtest_results_t * summary_table
Definition: mdtest.c:184
int GetNumNodes(MPI_Comm comm)
Definition: utilities.c:476
void initCUDA(int blockMapping, int rank, int numNodes, int tasksPerNode, int useGPUID)
Definition: utilities.c:438
static mdtest_results_t * get_result_index(mdtest_results_t *all_results, int proc, int iter, int interation_count)
Definition: mdtest.c:1484
int(* rename)(const char *oldpath, const char *newpath, aiori_mod_opt_t *module_options)
Definition: aiori.h:111
void(* xfer_hints)(aiori_xfer_hint_t *params)
Definition: aiori.h:96
void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t *progress)
Definition: mdtest.c:940
#define PRINT(...)
Definition: mdtest.c:215
void create_remove_items_helper(const int dirs, const int create, const char *path, uint64_t itemNum, rank_progress_t *progress)
Definition: mdtest.c:439
void(* close)(aiori_fd_t *, aiori_mod_opt_t *module_options)
Definition: aiori.h:99
int(* stat)(const char *path, struct stat *buf, aiori_mod_opt_t *module_options)
Definition: aiori.h:108
FILE * out_resultfile
Definition: utilities.c:75
options_all_t * airoi_create_all_module_options(option_help *global_options)
Definition: aiori.c:110
int random_buffer_offset
Definition: mdtest.c:139
ior_memory_flags gpuMemoryFlags
Definition: mdtest.c:121
double GetTimeStamp(void)
Definition: utilities.c:876
void create_remove_directory_tree(int create, int currDepth, char *path, int dirNum, rank_progress_t *progress)
Definition: mdtest.c:1937
void aiori_supported_apis(char *APIs, char *APIs_legacy, enum bench_type type)
Definition: aiori.c:130
uint64_t items_done
Definition: mdtest.c:205
aiori_fd_t *(* create)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:90
IOR_offset_t(* xfer)(int access, aiori_fd_t *, IOR_size_t *, IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t *module_options)
Definition: aiori.h:97
static void phase_prepare()
Definition: mdtest.c:280
static void remove_file(const char *path, uint64_t itemNum)
Definition: mdtest.c:351
int collective_creates
Definition: mdtest.c:168
void StoreStoneWallingIterations(char *const filename, int64_t count)
Definition: utilities.c:959
char testdir[MAX_PATHLEN]
Definition: mdtest.c:104
#define IOR_WRONLY
Definition: aiori.h:29
static void StoreRankInformation(int iterations, mdtest_results_t *agg)
Definition: mdtest.c:1432
#define RELEASE_VERS
Definition: mdtest.c:95
static options_all_t * global_options
Definition: parse_options.c:41
void update_write_memory_pattern(uint64_t item, char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:94
#define WARNF(FORMAT,...)
Definition: aiori-debug.h:30
char unique_chdir_dir[MAX_PATHLEN]
Definition: mdtest.c:114
int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t *out_max_iter)
Definition: mdtest.c:1123
static void create_remove_dirs(const char *path, bool create, uint64_t itemNum)
Definition: mdtest.c:328
int show_perrank_statistics
Definition: mdtest.c:159
long long int IOR_size_t
Definition: iordef.h:124
size_t write_bytes
Definition: mdtest.c:169
uint64_t stonewall_last_item[MDTEST_LAST_NUM]
Definition: mdtest.h:34
void(* finalize)(aiori_mod_opt_t *options)
Definition: aiori.h:110
#define VERBOSE_2
Definition: iordef.h:108
void generate_memory_pattern(char *buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:137
char * saveRankDetailsCSV
Definition: mdtest.c:180
bool enable_mdtest
Definition: aiori.h:115
uint64_t items_per_dir
Definition: mdtest.c:207
char ** filenames
Definition: mdtest.c:107
int verbose
Definition: utilities.c:72
char unique_read_dir[MAX_PATHLEN]
Definition: mdtest.c:116
static void phase_end()
Definition: mdtest.c:290
#define MAX_PATHLEN
Definition: utilities.h:31
int print_rate_and_time
Definition: mdtest.c:157
void ShowFileSystemSize(char *filename, const struct ior_aiori *backend, void *backend_options)
Definition: utilities.c:770
aiori_fd_t *(* open)(char *, int iorflags, aiori_mod_opt_t *)
Definition: aiori.h:92
double stonewall_time[MDTEST_LAST_NUM]
Definition: mdtest.h:33
int verification_error
Definition: mdtest.c:133
#define ERR(MSG)
Definition: aiori-debug.h:75
int verify_memory_pattern(uint64_t item, char *buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType, ior_memory_flags type)
Definition: utilities.c:182
void DelaySecs(int delay)
Definition: utilities.c:974
#define TEST_DIR
Definition: mdtest.c:96
void(* sync)(aiori_mod_opt_t *)
Definition: aiori.h:114
int fsyncPerWrite
Definition: aiori.h:70
ior_dataPacketType_e
Definition: iordef.h:22
int aiori_warning_as_errors
Definition: ior.c:93
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
Definition: utilities.c:1119
ior_memory_flags
Definition: iordef.h:29
int filePerProc
Definition: aiori.h:65
static void mdtest_iteration(int i, int j, mdtest_results_t *summary_table)
Definition: mdtest.c:2007
void summarize_results(int iterations, mdtest_results_t *results)
Definition: mdtest.c:1689
void unique_dir_access(int opt, char *to)
Definition: mdtest.c:312
int rank
Definition: utilities.c:70
#define ITEM_COUNT
Definition: mdtest.c:97
int GetNumTasksOnNode0(MPI_Comm comm)
Definition: utilities.c:551
const ior_aiori_t * backend
Definition: mdtest.c:189
#define CHECK_STONE_WALL(p)
Definition: mdtest.c:210
static void updateResult(mdtest_results_t *res, mdtest_test_num_t test, uint64_t item_count, double t_start, double t_end, double t_end_before_barrier)
Definition: mdtest.c:922
void * safeMalloc(uint64_t size)
Definition: utilities.c:238
#define LLU
Definition: mdtest.c:99
#define NULL
Definition: iordef.h:84
uint64_t total_errors
Definition: mdtest.h:30
void * aligned_buffer_alloc(size_t size, ior_memory_flags type)
Definition: utilities.c:1070