IOR
aiori-MPIIO.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * Copyright (c) 2003, The Regents of the University of California *
7 * See the file COPYRIGHT for a complete copyright notice and license. *
8 * *
9 ********************************************************************************
10 *
11 * Implement abstract I/O interface for MPIIO.
12 *
13 \******************************************************************************/
14 
15 #ifdef HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 
23 #include "ior.h"
24 #include "iordef.h"
25 #include "aiori.h"
26 #include "utilities.h"
27 
28 #ifndef MPIAPI
29 #define MPIAPI /* defined as __stdcall on Windows */
30 #endif
31 
32 /**************************** P R O T O T Y P E S *****************************/
33 
34 static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, IOR_param_t *);
35 
36 static void *MPIIO_Create(char *, IOR_param_t *);
37 static void *MPIIO_Open(char *, IOR_param_t *);
38 static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *,
40 static void MPIIO_Close(void *, IOR_param_t *);
41 static char* MPIIO_GetVersion();
42 static void MPIIO_Fsync(void *, IOR_param_t *);
43 
44 
45 /************************** D E C L A R A T I O N S ***************************/
46 
48  .name = "MPIIO",
49  .create = MPIIO_Create,
50  .open = MPIIO_Open,
51  .xfer = MPIIO_Xfer,
52  .close = MPIIO_Close,
53  .delete = MPIIO_Delete,
54  .get_version = MPIIO_GetVersion,
55  .fsync = MPIIO_Fsync,
56  .get_file_size = MPIIO_GetFileSize,
57  .statfs = aiori_posix_statfs,
58  .mkdir = aiori_posix_mkdir,
59  .rmdir = aiori_posix_rmdir,
60  .access = MPIIO_Access,
61  .stat = aiori_posix_stat,
62 };
63 
64 /***************************** F U N C T I O N S ******************************/
65 
66 /*
67  * Try to access a file through the MPIIO interface.
68  */
69 int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
70 {
71  MPI_File fd;
72  int mpi_mode = MPI_MODE_UNIQUE_OPEN;
73 
74  if ((mode & W_OK) && (mode & R_OK))
75  mpi_mode |= MPI_MODE_RDWR;
76  else if (mode & W_OK)
77  mpi_mode |= MPI_MODE_WRONLY;
78  else
79  mpi_mode |= MPI_MODE_RDONLY;
80 
81  int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode,
82  MPI_INFO_NULL, &fd);
83 
84  if (!ret)
85  MPI_File_close(&fd);
86 
87  return ret;
88 }
89 
90 /*
91  * Create and open a file through the MPIIO interface.
92  */
93 static void *MPIIO_Create(char *testFileName, IOR_param_t * param)
94 {
95  return MPIIO_Open(testFileName, param);
96 }
97 
98 /*
99  * Open a file through the MPIIO interface. Setup file view.
100  */
101 static void *MPIIO_Open(char *testFileName, IOR_param_t * param)
102 {
103  int fd_mode = (int)0,
104  offsetFactor,
105  tasksPerFile,
106  transfersPerBlock = param->blockSize / param->transferSize;
107  struct fileTypeStruct {
108  int globalSizes[2], localSizes[2], startIndices[2];
109  } fileTypeStruct;
110  MPI_File *fd;
111  MPI_Comm comm;
112  MPI_Info mpiHints = MPI_INFO_NULL;
113 
114  fd = (MPI_File *) malloc(sizeof(MPI_File));
115  if (fd == NULL)
116  ERR("malloc failed()");
117 
118  *fd = 0;
119 
120  /* set IOR file flags to MPIIO flags */
121  /* -- file open flags -- */
122  if (param->openFlags & IOR_RDONLY) {
123  fd_mode |= MPI_MODE_RDONLY;
124  }
125  if (param->openFlags & IOR_WRONLY) {
126  fd_mode |= MPI_MODE_WRONLY;
127  }
128  if (param->openFlags & IOR_RDWR) {
129  fd_mode |= MPI_MODE_RDWR;
130  }
131  if (param->openFlags & IOR_APPEND) {
132  fd_mode |= MPI_MODE_APPEND;
133  }
134  if (param->openFlags & IOR_CREAT) {
135  fd_mode |= MPI_MODE_CREATE;
136  }
137  if (param->openFlags & IOR_EXCL) {
138  fd_mode |= MPI_MODE_EXCL;
139  }
140  if (param->openFlags & IOR_TRUNC) {
141  fprintf(stdout, "File truncation not implemented in MPIIO\n");
142  }
143  if (param->openFlags & IOR_DIRECT) {
144  fprintf(stdout, "O_DIRECT not implemented in MPIIO\n");
145  }
146 
147  /*
148  * MPI_MODE_UNIQUE_OPEN mode optimization eliminates the overhead of file
149  * locking. Only open a file in this mode when the file will not be con-
150  * currently opened elsewhere, either inside or outside the MPI environment.
151  */
152  fd_mode |= MPI_MODE_UNIQUE_OPEN;
153 
154  if (param->filePerProc) {
155  comm = MPI_COMM_SELF;
156  } else {
157  comm = testComm;
158  }
159 
160  SetHints(&mpiHints, param->hintsFileName);
161  /*
162  * note that with MP_HINTS_FILTERED=no, all key/value pairs will
163  * be in the info object. The info object that is attached to
164  * the file during MPI_File_open() will only contain those pairs
165  * deemed valid by the implementation.
166  */
167  /* show hints passed to file */
168  if (rank == 0 && param->showHints) {
169  fprintf(stdout, "\nhints passed to MPI_File_open() {\n");
170  ShowHints(&mpiHints);
171  fprintf(stdout, "}\n");
172  }
173  MPI_CHECK(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd),
174  "cannot open file");
175 
176  /* show hints actually attached to file handle */
177  if (rank == 0 && param->showHints) {
178  if (mpiHints != MPI_INFO_NULL)
179  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
180  MPI_CHECK(MPI_File_get_info(*fd, &mpiHints),
181  "cannot get file info");
182  fprintf(stdout, "\nhints returned from opened file {\n");
183  ShowHints(&mpiHints);
184  fprintf(stdout, "}\n");
185  }
186 
187  /* preallocate space for file */
188  if (param->preallocate && param->open == WRITE) {
189  MPI_CHECK(MPI_File_preallocate(*fd,
190  (MPI_Offset) (param->segmentCount
191  *
192  param->blockSize *
193  param->numTasks)),
194  "cannot preallocate file");
195  }
196  /* create file view */
197  if (param->useFileView) {
198  /* create contiguous transfer datatype */
199  MPI_CHECK(MPI_Type_contiguous
200  (param->transferSize / sizeof(IOR_size_t),
201  MPI_LONG_LONG_INT, &param->transferType),
202  "cannot create contiguous datatype");
203  MPI_CHECK(MPI_Type_commit(&param->transferType),
204  "cannot commit datatype");
205  if (param->filePerProc) {
206  offsetFactor = 0;
207  tasksPerFile = 1;
208  } else {
209  offsetFactor = (rank + rankOffset) % param->numTasks;
210  tasksPerFile = param->numTasks;
211  }
212 
213  /*
214  * create file type using subarray
215  */
216  fileTypeStruct.globalSizes[0] = 1;
217  fileTypeStruct.globalSizes[1] =
218  transfersPerBlock * tasksPerFile;
219  fileTypeStruct.localSizes[0] = 1;
220  fileTypeStruct.localSizes[1] = transfersPerBlock;
221  fileTypeStruct.startIndices[0] = 0;
222  fileTypeStruct.startIndices[1] =
223  transfersPerBlock * offsetFactor;
224 
225  MPI_CHECK(MPI_Type_create_subarray
226  (2, fileTypeStruct.globalSizes,
227  fileTypeStruct.localSizes,
228  fileTypeStruct.startIndices, MPI_ORDER_C,
230  "cannot create subarray");
231  MPI_CHECK(MPI_Type_commit(&param->fileType),
232  "cannot commit datatype");
233 
234  MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0,
236  param->fileType, "native",
237  (MPI_Info) MPI_INFO_NULL),
238  "cannot set file view");
239  }
240  if (mpiHints != MPI_INFO_NULL)
241  MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed");
242  return ((void *)fd);
243 }
244 
245 /*
246  * Write or read access to file using the MPIIO interface.
247  */
248 static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,
249  IOR_offset_t length, IOR_param_t * param)
250 {
251  /* NOTE: The second arg is (void *) for reads, and (const void *)
252  for writes. Therefore, one of the two sets of assignments below
253  will get "assignment from incompatible pointer-type" warnings,
254  if we only use this one set of signatures. */
255 
256  int (MPIAPI * Access) (MPI_File, void *, int,
257  MPI_Datatype, MPI_Status *);
258  int (MPIAPI * Access_at) (MPI_File, MPI_Offset, void *, int,
259  MPI_Datatype, MPI_Status *);
260  int (MPIAPI * Access_all) (MPI_File, void *, int,
261  MPI_Datatype, MPI_Status *);
262  int (MPIAPI * Access_at_all) (MPI_File, MPI_Offset, void *, int,
263  MPI_Datatype, MPI_Status *);
264  /*
265  * this needs to be properly implemented:
266  *
267  * int (*Access_ordered)(MPI_File, void *, int,
268  * MPI_Datatype, MPI_Status *);
269  */
270  MPI_Status status;
271 
272  /* point functions to appropriate MPIIO calls */
273  if (access == WRITE) { /* WRITE */
274  Access = (int (MPIAPI *)(MPI_File, void *, int,
275  MPI_Datatype, MPI_Status *)) MPI_File_write;
276  Access_at = (int (MPIAPI *)(MPI_File, MPI_Offset, void *, int,
277  MPI_Datatype, MPI_Status *)) MPI_File_write_at;
278  Access_all = (int (MPIAPI *) (MPI_File, void *, int,
279  MPI_Datatype, MPI_Status *)) MPI_File_write_all;
280  Access_at_all = (int (MPIAPI *) (MPI_File, MPI_Offset, void *, int,
281  MPI_Datatype, MPI_Status *)) MPI_File_write_at_all;
282  /*
283  * this needs to be properly implemented:
284  *
285  * Access_ordered = MPI_File_write_ordered;
286  */
287  } else { /* READ or CHECK */
288  Access = MPI_File_read;
289  Access_at = MPI_File_read_at;
290  Access_all = MPI_File_read_all;
291  Access_at_all = MPI_File_read_at_all;
292  /*
293  * this needs to be properly implemented:
294  *
295  * Access_ordered = MPI_File_read_ordered;
296  */
297  }
298 
299  /*
300  * 'useFileView' uses derived datatypes and individual file pointers
301  */
302  if (param->useFileView) {
303  /* find offset in file */
304  if (SeekOffset(*(MPI_File *) fd, param->offset, param) <
305  0) {
306  /* if unsuccessful */
307  length = -1;
308  } else {
309  /*
310  * 'useStridedDatatype' fits multi-strided pattern into a datatype;
311  * must use 'length' to determine repetitions (fix this for
312  * multi-segments someday, WEL):
313  * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO -S'
314  */
315  if (param->useStridedDatatype) {
316  length = param->segmentCount;
317  } else {
318  length = 1;
319  }
320  if (param->collective) {
321  /* individual, collective call */
322  MPI_CHECK(Access_all
323  (*(MPI_File *) fd, buffer, length,
324  param->transferType, &status),
325  "cannot access collective");
326  } else {
327  /* individual, noncollective call */
328  MPI_CHECK(Access
329  (*(MPI_File *) fd, buffer, length,
330  param->transferType, &status),
331  "cannot access noncollective");
332  }
333  length *= param->transferSize; /* for return value in bytes */
334  }
335  } else {
336  /*
337  * !useFileView does not use derived datatypes, but it uses either
338  * shared or explicit file pointers
339  */
340  if (param->useSharedFilePointer) {
341  /* find offset in file */
342  if (SeekOffset
343  (*(MPI_File *) fd, param->offset, param) < 0) {
344  /* if unsuccessful */
345  length = -1;
346  } else {
347  /* shared, collective call */
348  /*
349  * this needs to be properly implemented:
350  *
351  * MPI_CHECK(Access_ordered(fd.MPIIO, buffer, length,
352  * MPI_BYTE, &status),
353  * "cannot access shared, collective");
354  */
355  fprintf(stdout,
356  "useSharedFilePointer not implemented\n");
357  }
358  } else {
359  if (param->collective) {
360  /* explicit, collective call */
361  MPI_CHECK(Access_at_all
362  (*(MPI_File *) fd, param->offset,
363  buffer, length, MPI_BYTE, &status),
364  "cannot access explicit, collective");
365  } else {
366  /* explicit, noncollective call */
367  MPI_CHECK(Access_at
368  (*(MPI_File *) fd, param->offset,
369  buffer, length, MPI_BYTE, &status),
370  "cannot access explicit, noncollective");
371  }
372  }
373  }
374  if((access == WRITE) && (param->fsyncPerWrite == TRUE))
375  MPIIO_Fsync(fd, param);
376  return (length);
377 }
378 
379 /*
380  * Perform fsync().
381  */
382 static void MPIIO_Fsync(void *fdp, IOR_param_t * param)
383 {
384  if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS)
385  EWARN("fsync() failed");
386 }
387 
388 /*
389  * Close a file through the MPIIO interface.
390  */
391 static void MPIIO_Close(void *fd, IOR_param_t * param)
392 {
393  MPI_CHECK(MPI_File_close((MPI_File *) fd), "cannot close file");
394  if ((param->useFileView == TRUE) && (param->fd_fppReadCheck == NULL)) {
395  /*
396  * need to free the datatype, so done in the close process
397  */
398  MPI_CHECK(MPI_Type_free(&param->fileType),
399  "cannot free MPI file datatype");
400  MPI_CHECK(MPI_Type_free(&param->transferType),
401  "cannot free MPI transfer datatype");
402  }
403  free(fd);
404 }
405 
406 /*
407  * Delete a file through the MPIIO interface.
408  */
409 void MPIIO_Delete(char *testFileName, IOR_param_t * param)
410 {
411  MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL),
412  "cannot delete file");
413 }
414 
415 /*
416  * Determine api version.
417  */
418 static char* MPIIO_GetVersion()
419 {
420  static char ver[1024] = {};
421  int version, subversion;
422  MPI_CHECK(MPI_Get_version(&version, &subversion), "cannot get MPI version");
423  sprintf(ver, "(%d.%d)", version, subversion);
424  return ver;
425 }
426 
427 /*
428  * Seek to offset in file using the MPIIO interface.
429  */
431  IOR_param_t * param)
432 {
433  int offsetFactor, tasksPerFile;
434  IOR_offset_t tempOffset;
435 
436  tempOffset = offset;
437 
438  if (param->filePerProc) {
439  offsetFactor = 0;
440  tasksPerFile = 1;
441  } else {
442  offsetFactor = (rank + rankOffset) % param->numTasks;
443  tasksPerFile = param->numTasks;
444  }
445  if (param->useFileView) {
446  /* recall that offsets in a file view are
447  counted in units of transfer size */
448  if (param->filePerProc) {
449  tempOffset = tempOffset / param->transferSize;
450  } else {
451  /*
452  * this formula finds a file view offset for a task
453  * from an absolute offset
454  */
455  tempOffset = ((param->blockSize / param->transferSize)
456  * (tempOffset /
457  (param->blockSize * tasksPerFile)))
458  + (((tempOffset % (param->blockSize * tasksPerFile))
459  - (offsetFactor * param->blockSize))
460  / param->transferSize);
461  }
462  }
463  MPI_CHECK(MPI_File_seek(fd, tempOffset, MPI_SEEK_SET),
464  "cannot seek offset");
465  return (offset);
466 }
467 
468 /*
469  * Use MPI_File_get_size() to return aggregate file size.
470  * NOTE: This function is used by the HDF5 and NCMPI backends.
471  */
473  char *testFileName)
474 {
475  IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
476  MPI_File fd;
477  MPI_Comm comm;
478 
479  if (test->filePerProc == TRUE) {
480  comm = MPI_COMM_SELF;
481  } else {
482  comm = testComm;
483  }
484 
485  MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY,
486  MPI_INFO_NULL, &fd),
487  "cannot open file to get file size");
488  MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat),
489  "cannot get file size");
490  MPI_CHECK(MPI_File_close(&fd), "cannot close file");
491 
492  if (test->filePerProc == TRUE) {
493  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
494  MPI_LONG_LONG_INT, MPI_SUM, testComm),
495  "cannot total data moved");
496  aggFileSizeFromStat = tmpSum;
497  } else {
498  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
499  MPI_LONG_LONG_INT, MPI_MIN, testComm),
500  "cannot total data moved");
501  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
502  MPI_LONG_LONG_INT, MPI_MAX, testComm),
503  "cannot total data moved");
504  if (tmpMin != tmpMax) {
505  if (rank == 0) {
506  WARN("inconsistent file size by different tasks");
507  }
508  /* incorrect, but now consistent across tasks */
509  aggFileSizeFromStat = tmpMin;
510  }
511  }
512 
513  return (aggFileSizeFromStat);
514 }
int showHints
Definition: ior.h:126
MPI_Datatype fileType
Definition: ior.h:160
#define ERR(MSG)
Definition: iordef.h:169
void ShowHints(MPI_Info *mpiHints)
Definition: utilities.c:303
int filePerProc
Definition: ior.h:104
IOR_offset_t segmentCount
Definition: ior.h:116
int useStridedDatatype
Definition: ior.h:124
int useSharedFilePointer
Definition: ior.h:123
IOR_offset_t transferSize
Definition: ior.h:118
int aiori_posix_rmdir(const char *path, IOR_param_t *param)
Definition: aiori.c:134
MPI_Datatype transferType
Definition: ior.h:159
#define IOR_APPEND
Definition: aiori.h:36
int aiori_posix_mkdir(const char *path, mode_t mode, IOR_param_t *param)
Definition: aiori.c:129
#define IOR_RDONLY
Definition: aiori.h:33
unsigned int openFlags
Definition: ior.h:85
int fsyncPerWrite
Definition: ior.h:152
#define WRITE
Definition: iordef.h:95
#define EWARN(MSG)
Definition: iordef.h:156
int aiori_posix_statfs(const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t *param)
Definition: aiori.c:104
#define IOR_CREAT
Definition: aiori.h:37
static void * MPIIO_Create(char *, IOR_param_t *)
Definition: aiori-MPIIO.c:93
#define IOR_EXCL
Definition: aiori.h:39
char * hintsFileName
Definition: ior.h:92
MPI_Comm testComm
Definition: utilities.c:61
#define IOR_TRUNC
Definition: aiori.h:38
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: iordef.h:192
static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *)
Definition: aiori-MPIIO.c:248
Definition: ior.h:47
void * fd_fppReadCheck
Definition: ior.h:141
void MPIIO_Delete(char *testFileName, IOR_param_t *param)
Definition: aiori-MPIIO.c:409
static IOR_param_t param
Definition: mdtest.c:153
static IOR_offset_t SeekOffset(MPI_File, IOR_offset_t, IOR_param_t *)
Definition: aiori-MPIIO.c:430
int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
Definition: aiori-MPIIO.c:69
#define IOR_WRONLY
Definition: aiori.h:34
static char * MPIIO_GetVersion()
Definition: aiori-MPIIO.c:418
int rankOffset
Definition: utilities.c:58
int useFileView
Definition: ior.h:122
static void MPIIO_Fsync(void *, IOR_param_t *)
Definition: aiori-MPIIO.c:382
long long int IOR_size_t
Definition: iordef.h:124
#define WARN(MSG)
Definition: iordef.h:145
static void MPIIO_Close(void *, IOR_param_t *)
Definition: aiori-MPIIO.c:391
int numTasks
Definition: ior.h:94
static void * MPIIO_Open(char *, IOR_param_t *)
Definition: aiori-MPIIO.c:101
ior_aiori_t mpiio_aiori
Definition: aiori-MPIIO.c:47
int collective
Definition: ior.h:115
IOR_offset_t offset
Definition: ior.h:119
int open
Definition: ior.h:101
int aiori_posix_stat(const char *path, struct stat *buf, IOR_param_t *param)
Definition: aiori.c:144
void SetHints(MPI_Info *mpiHints, char *hintsFileName)
Definition: utilities.c:242
#define IOR_RDWR
Definition: aiori.h:35
int preallocate
Definition: ior.h:121
#define MPIAPI
Definition: aiori-MPIIO.c:29
char * name
Definition: aiori.h:67
long long int IOR_offset_t
Definition: iordef.h:123
int rank
Definition: utilities.c:57
IOR_offset_t blockSize
Definition: ior.h:117
#define TRUE
Definition: iordef.h:75
#define IOR_DIRECT
Definition: aiori.h:40
IOR_offset_t MPIIO_GetFileSize(IOR_param_t *test, MPI_Comm testComm, char *testFileName)
Definition: aiori-MPIIO.c:472
#define NULL
Definition: iordef.h:79