IOR
aiori-POSIX.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * Copyright (c) 2003, The Regents of the University of California *
7 * See the file COPYRIGHT for a complete copyright notice and license. *
8 * *
9 ********************************************************************************
10 *
11 * Implement of abstract I/O interface for POSIX.
12 *
13 \******************************************************************************/
14 
15 #ifdef HAVE_CONFIG_H
16 # include "config.h"
17 #endif
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 
22 #ifdef __linux__
23 # include <sys/ioctl.h> /* necessary for: */
24 # define __USE_GNU /* O_DIRECT and */
25 # include <fcntl.h> /* IO operations */
26 # undef __USE_GNU
27 #endif /* __linux__ */
28 
29 #include <errno.h>
30 #include <fcntl.h> /* IO operations */
31 #include <sys/stat.h>
32 #include <assert.h>
33 
34 
35 #ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H
36 # include <linux/lustre/lustre_user.h>
37 #elif defined(HAVE_LUSTRE_LUSTRE_USER_H)
38 # include <lustre/lustre_user.h>
39 #endif
40 #ifdef HAVE_GPFS_H
41 # include <gpfs.h>
42 #endif
43 #ifdef HAVE_GPFS_FCNTL_H
44 # include <gpfs_fcntl.h>
45 #endif
46 
47 #ifdef HAVE_BEEGFS_BEEGFS_H
48 #include <beegfs/beegfs.h>
49 #include <dirent.h>
50 #include <libgen.h>
51 #endif
52 
53 #include "ior.h"
54 #include "aiori.h"
55 #include "iordef.h"
56 #include "utilities.h"
57 
58 #ifndef open64 /* necessary for TRU64 -- */
59 # define open64 open /* unlikely, but may pose */
60 #endif /* not open64 */ /* conflicting prototypes */
61 
62 #ifndef lseek64 /* necessary for TRU64 -- */
63 # define lseek64 lseek /* unlikely, but may pose */
64 #endif /* not lseek64 */ /* conflicting prototypes */
65 
66 #ifndef O_BINARY /* Required on Windows */
67 # define O_BINARY 0
68 #endif
69 
70 /**************************** P R O T O T Y P E S *****************************/
71 static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *,
73 static void POSIX_Fsync(void *, IOR_param_t *);
74 
75 /************************** D E C L A R A T I O N S ***************************/
76 
78  .name = "POSIX",
79  .create = POSIX_Create,
80  .open = POSIX_Open,
81  .xfer = POSIX_Xfer,
82  .close = POSIX_Close,
83  .delete = POSIX_Delete,
84  .get_version = aiori_get_version,
85  .fsync = POSIX_Fsync,
86  .get_file_size = POSIX_GetFileSize,
87  .statfs = aiori_posix_statfs,
88  .mkdir = aiori_posix_mkdir,
89  .rmdir = aiori_posix_rmdir,
90  .access = aiori_posix_access,
91  .stat = aiori_posix_stat,
92 };
93 
94 /***************************** F U N C T I O N S ******************************/
95 
96 
97 #ifdef HAVE_GPFS_FCNTL_H
98 void gpfs_free_all_locks(int fd)
99 {
100  int rc;
101  struct {
102  gpfsFcntlHeader_t header;
103  gpfsFreeRange_t release;
104  } release_all;
105  release_all.header.totalLength = sizeof(release_all);
106  release_all.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
107  release_all.header.fcntlReserved = 0;
108 
109  release_all.release.structLen = sizeof(release_all.release);
110  release_all.release.structType = GPFS_FREE_RANGE;
111  release_all.release.start = 0;
112  release_all.release.length = 0;
113 
114  rc = gpfs_fcntl(fd, &release_all);
115  if (verbose >= VERBOSE_0 && rc != 0) {
116  EWARN("gpfs_fcntl release all locks hint failed.");
117  }
118 }
119 void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int access)
120 {
121  int rc;
122  struct {
123  gpfsFcntlHeader_t header;
124  gpfsAccessRange_t access;
125  } take_locks;
126 
127  take_locks.header.totalLength = sizeof(take_locks);
128  take_locks.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
129  take_locks.header.fcntlReserved = 0;
130 
131  take_locks.access.structLen = sizeof(take_locks.access);
132  take_locks.access.structType = GPFS_ACCESS_RANGE;
133  take_locks.access.start = param->offset;
134  take_locks.access.length = length;
135  take_locks.access.isWrite = (access == WRITE);
136 
137  rc = gpfs_fcntl(fd, &take_locks);
138  if (verbose >= VERBOSE_2 && rc != 0) {
139  EWARN("gpfs_fcntl access range hint failed.");
140  }
141 }
142 
143 void gpfs_access_end(int fd, IOR_offset_t length, IOR_param_t *param, int access)
144 {
145  int rc;
146  struct {
147  gpfsFcntlHeader_t header;
148  gpfsFreeRange_t free;
149  } free_locks;
150 
151 
152  free_locks.header.totalLength = sizeof(free_locks);
153  free_locks.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
154  free_locks.header.fcntlReserved = 0;
155 
156  free_locks.free.structLen = sizeof(free_locks.free);
157  free_locks.free.structType = GPFS_FREE_RANGE;
158  free_locks.free.start = param->offset;
159  free_locks.free.length = length;
160 
161  rc = gpfs_fcntl(fd, &free_locks);
162  if (verbose >= VERBOSE_2 && rc != 0) {
163  EWARN("gpfs_fcntl free range hint failed.");
164  }
165 }
166 
167 #endif
168 
169 #ifdef HAVE_BEEGFS_BEEGFS_H
170 
171 int mkTempInDir(char* dirPath)
172 {
173  unsigned long len = strlen(dirPath) + 8;
174  char* tmpfilename = (char*)malloc(sizeof (char)*len+1);
175  snprintf(tmpfilename, len, "%s/XXXXXX", dirPath);
176 
177  int fd = mkstemp(tmpfilename);
178  unlink(tmpfilename);
179  free(tmpfilename);
180 
181  return fd;
182 }
183 
184 bool beegfs_getStriping(char* dirPath, u_int16_t* numTargetsOut, unsigned* chunkSizeOut)
185 {
186  bool retVal = false;
187 
188  int fd = mkTempInDir(dirPath);
189  if (fd) {
190  unsigned stripePattern = 0;
191  retVal = beegfs_getStripeInfo(fd, &stripePattern, chunkSizeOut, numTargetsOut);
192  close(fd);
193  }
194 
195  return retVal;
196 }
197 
198 bool beegfs_isOptionSet(int opt) {
199  return opt != -1;
200 }
201 
202 /*
203  * Create a file on a BeeGFS file system with striping parameters
204  */
205 bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chunkSize)
206 {
207  bool retVal = false;
208  char* dirTmp = strdup(filepath);
209  char* dir = dirname(dirTmp);
210  DIR* parentDirS = opendir(dir);
211  if (!parentDirS) {
212  ERR("Failed to get directory");
213  }
214  else
215  {
216  int parentDirFd = dirfd(parentDirS);
217  if (parentDirFd < 0)
218  {
219  ERR("Failed to get directory descriptor");
220  }
221  else
222  {
223  bool isBeegfs = beegfs_testIsBeeGFS(parentDirFd);
224  if (!isBeegfs)
225  {
226  WARN("Not a BeeGFS file system");
227  }
228  else
229  {
230  if ( !beegfs_isOptionSet(numTargets)
231  || !beegfs_isOptionSet(chunkSize)) {
232  u_int16_t defaultNumTargets = 0;
233  unsigned defaultChunkSize = 0;
234  bool haveDefaults = beegfs_getStriping(dir,
235  &defaultNumTargets,
236  &defaultChunkSize);
237  if (!haveDefaults)
238  ERR("Failed to get default BeeGFS striping values");
239 
240  numTargets = beegfs_isOptionSet(numTargets) ?
241  numTargets : defaultNumTargets;
242  chunkSize = beegfs_isOptionSet(chunkSize) ?
243  chunkSize : defaultChunkSize;
244  }
245 
246  char* filenameTmp = strdup(filepath);
247  char* filename = basename(filepath);
248  bool isFileCreated = beegfs_createFile(parentDirFd, filename,
249  mode, numTargets, chunkSize);
250  if (!isFileCreated)
251  ERR("Could not create file");
252  retVal = true;
253  free(filenameTmp);
254  }
255  }
256  closedir(parentDirS);
257  }
258  free(dirTmp);
259  return retVal;
260 }
261 #endif /* HAVE_BEEGFS_BEEGFS_H */
262 
263 
264 /*
265  * Creat and open a file through the POSIX interface.
266  */
267 void *POSIX_Create(char *testFileName, IOR_param_t * param)
268 {
269  int fd_oflag = O_BINARY;
270  int *fd;
271 
272  fd = (int *)malloc(sizeof(int));
273  if (fd == NULL)
274  ERR("Unable to malloc file descriptor");
275 
276  if (param->useO_DIRECT == TRUE)
277  set_o_direct_flag(&fd_oflag);
278 
279 #ifdef HAVE_LUSTRE_LUSTRE_USER_H
280 /* Add a #define for FASYNC if not available, as it forms part of
281  * the Lustre O_LOV_DELAY_CREATE definition. */
282 #ifndef FASYNC
283 #define FASYNC 00020000 /* fcntl, for BSD compatibility */
284 #endif
285 
286  if (param->lustre_set_striping) {
287  /* In the single-shared-file case, task 0 has to creat the
288  file with the Lustre striping options before any other processes
289  open the file */
290  if (!param->filePerProc && rank != 0) {
291  MPI_CHECK(MPI_Barrier(testComm), "barrier error");
292  fd_oflag |= O_RDWR;
293  *fd = open64(testFileName, fd_oflag, 0664);
294  if (*fd < 0)
295  ERR("open64() failed");
296  } else {
297  struct lov_user_md opts = { 0 };
298 
299  /* Setup Lustre IOCTL striping pattern structure */
300  opts.lmm_magic = LOV_USER_MAGIC;
301  opts.lmm_stripe_size = param->lustre_stripe_size;
302  opts.lmm_stripe_offset = param->lustre_start_ost;
303  opts.lmm_stripe_count = param->lustre_stripe_count;
304 
305  /* File needs to be opened O_EXCL because we cannot set
306  * Lustre striping information on a pre-existing file.*/
307 
308  fd_oflag |=
309  O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE;
310  *fd = open64(testFileName, fd_oflag, 0664);
311  if (*fd < 0) {
312  fprintf(stdout, "\nUnable to open '%s': %s\n",
313  testFileName, strerror(errno));
314  MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
315  "MPI_Abort() error");
316  } else if (ioctl(*fd, LL_IOC_LOV_SETSTRIPE, &opts)) {
317  char *errmsg = "stripe already set";
318  if (errno != EEXIST && errno != EALREADY)
319  errmsg = strerror(errno);
320  fprintf(stdout,
321  "\nError on ioctl for '%s' (%d): %s\n",
322  testFileName, *fd, errmsg);
323  MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
324  "MPI_Abort() error");
325  }
326  if (!param->filePerProc)
327  MPI_CHECK(MPI_Barrier(testComm),
328  "barrier error");
329  }
330  } else {
331 #endif /* HAVE_LUSTRE_LUSTRE_USER_H */
332 
333  fd_oflag |= O_CREAT | O_RDWR;
334 
335 #ifdef HAVE_BEEGFS_BEEGFS_H
336  if (beegfs_isOptionSet(param->beegfs_chunkSize)
337  || beegfs_isOptionSet(param->beegfs_numTargets)) {
338  bool result = beegfs_createFilePath(testFileName,
339  0664,
340  param->beegfs_numTargets,
341  param->beegfs_chunkSize);
342  if (result) {
343  fd_oflag &= ~O_CREAT;
344  } else {
345  EWARN("BeeGFS tuning failed");
346  }
347  }
348 #endif /* HAVE_BEEGFS_BEEGFS_H */
349 
350  *fd = open64(testFileName, fd_oflag, 0664);
351  if (*fd < 0)
352  ERR("open64() failed");
353 
354 #ifdef HAVE_LUSTRE_LUSTRE_USER_H
355  }
356 
357  if (param->lustre_ignore_locks) {
358  int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK;
359  if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
360  ERR("ioctl(LL_IOC_SETFLAGS) failed");
361  }
362 #endif /* HAVE_LUSTRE_LUSTRE_USER_H */
363 
364 #ifdef HAVE_GPFS_FCNTL_H
365  /* in the single shared file case, immediately release all locks, with
366  * the intent that we can avoid some byte range lock revocation:
367  * everyone will be writing/reading from individual regions */
368  if (param->gpfs_release_token ) {
369  gpfs_free_all_locks(*fd);
370  }
371 #endif
372  return ((void *)fd);
373 }
374 
375 /*
376  * Open a file through the POSIX interface.
377  */
378 void *POSIX_Open(char *testFileName, IOR_param_t * param)
379 {
380  int fd_oflag = O_BINARY;
381  int *fd;
382 
383  fd = (int *)malloc(sizeof(int));
384  if (fd == NULL)
385  ERR("Unable to malloc file descriptor");
386 
387  if (param->useO_DIRECT == TRUE)
388  set_o_direct_flag(&fd_oflag);
389 
390  fd_oflag |= O_RDWR;
391  *fd = open64(testFileName, fd_oflag);
392  if (*fd < 0)
393  ERR("open64 failed");
394 
395 #ifdef HAVE_LUSTRE_LUSTRE_USER_H
396  if (param->lustre_ignore_locks) {
397  int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK;
398  if (verbose >= VERBOSE_1) {
399  fprintf(stdout,
400  "** Disabling lustre range locking **\n");
401  }
402  if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
403  ERR("ioctl(LL_IOC_SETFLAGS) failed");
404  }
405 #endif /* HAVE_LUSTRE_LUSTRE_USER_H */
406 
407 #ifdef HAVE_GPFS_FCNTL_H
408  if(param->gpfs_release_token) {
409  gpfs_free_all_locks(*fd);
410  }
411 #endif
412  return ((void *)fd);
413 }
414 
415 /*
416  * Write or read access to file using the POSIX interface.
417  */
418 static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer,
419  IOR_offset_t length, IOR_param_t * param)
420 {
421  int xferRetries = 0;
422  long long remaining = (long long)length;
423  char *ptr = (char *)buffer;
424  long long rc;
425  int fd;
426 
427  fd = *(int *)file;
428 
429 #ifdef HAVE_GPFS_FCNTL_H
430  if (param->gpfs_hint_access) {
431  gpfs_access_start(fd, length, param, access);
432  }
433 #endif
434 
435 
436  /* seek to offset */
437  if (lseek64(fd, param->offset, SEEK_SET) == -1)
438  ERR("lseek64() failed");
439 
440  while (remaining > 0) {
441  /* write/read file */
442  if (access == WRITE) { /* WRITE */
443  if (verbose >= VERBOSE_4) {
444  fprintf(stdout,
445  "task %d writing to offset %lld\n",
446  rank,
447  param->offset + length - remaining);
448  }
449  rc = write(fd, ptr, remaining);
450  if (rc == -1)
451  ERR("write() failed");
452  if (param->fsyncPerWrite == TRUE)
453  POSIX_Fsync(&fd, param);
454  } else { /* READ or CHECK */
455  if (verbose >= VERBOSE_4) {
456  fprintf(stdout,
457  "task %d reading from offset %lld\n",
458  rank,
459  param->offset + length - remaining);
460  }
461  rc = read(fd, ptr, remaining);
462  if (rc == 0)
463  ERR("read() returned EOF prematurely");
464  if (rc == -1)
465  ERR("read() failed");
466  }
467  if (rc < remaining) {
468  fprintf(stdout,
469  "WARNING: Task %d, partial %s, %lld of %lld bytes at offset %lld\n",
470  rank,
471  access == WRITE ? "write()" : "read()",
472  rc, remaining,
473  param->offset + length - remaining);
474  if (param->singleXferAttempt == TRUE)
475  MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
476  "barrier error");
477  if (xferRetries > MAX_RETRY)
478  ERR("too many retries -- aborting");
479  }
480  assert(rc >= 0);
481  assert(rc <= remaining);
482  remaining -= rc;
483  ptr += rc;
484  xferRetries++;
485  }
486 #ifdef HAVE_GPFS_FCNTL_H
487  if (param->gpfs_hint_access) {
488  gpfs_access_end(fd, length, param, access);
489  }
490 #endif
491  return (length);
492 }
493 
494 /*
495  * Perform fsync().
496  */
497 static void POSIX_Fsync(void *fd, IOR_param_t * param)
498 {
499  if (fsync(*(int *)fd) != 0)
500  EWARN("fsync() failed");
501 }
502 
503 /*
504  * Close a file through the POSIX interface.
505  */
506 void POSIX_Close(void *fd, IOR_param_t * param)
507 {
508  if (close(*(int *)fd) != 0)
509  ERR("close() failed");
510  free(fd);
511 }
512 
513 /*
514  * Delete a file through the POSIX interface.
515  */
516 void POSIX_Delete(char *testFileName, IOR_param_t * param)
517 {
518  char errmsg[256];
519  sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n",
520  rank, testFileName);
521  if (unlink(testFileName) != 0)
522  EWARN(errmsg);
523 }
524 
525 /*
526  * Use POSIX stat() to return aggregate file size.
527  */
529  char *testFileName)
530 {
531  struct stat stat_buf;
532  IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
533 
534  if (stat(testFileName, &stat_buf) != 0) {
535  ERR("stat() failed");
536  }
537  aggFileSizeFromStat = stat_buf.st_size;
538 
539  if (test->filePerProc == TRUE) {
540  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
541  MPI_LONG_LONG_INT, MPI_SUM, testComm),
542  "cannot total data moved");
543  aggFileSizeFromStat = tmpSum;
544  } else {
545  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
546  MPI_LONG_LONG_INT, MPI_MIN, testComm),
547  "cannot total data moved");
548  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
549  MPI_LONG_LONG_INT, MPI_MAX, testComm),
550  "cannot total data moved");
551  if (tmpMin != tmpMax) {
552  if (rank == 0) {
553  WARN("inconsistent file size by different tasks");
554  }
555  /* incorrect, but now consistent across tasks */
556  aggFileSizeFromStat = tmpMin;
557  }
558  }
559 
560  return (aggFileSizeFromStat);
561 }
static void POSIX_Fsync(void *, IOR_param_t *)
Definition: aiori-POSIX.c:497
int lustre_stripe_count
Definition: ior.h:187
int lustre_stripe_size
Definition: ior.h:188
void set_o_direct_flag(int *fd)
Definition: utilities.c:73
#define ERR(MSG)
Definition: iordef.h:169
#define VERBOSE_0
Definition: iordef.h:102
int filePerProc
Definition: ior.h:104
void * POSIX_Open(char *testFileName, IOR_param_t *param)
Definition: aiori-POSIX.c:378
CURLcode rc
Definition: aiori-S3.c:121
int aiori_posix_rmdir(const char *path, IOR_param_t *param)
Definition: aiori.c:134
int gpfs_release_token
Definition: ior.h:195
ior_aiori_t posix_aiori
Definition: aiori-POSIX.c:77
int aiori_posix_mkdir(const char *path, mode_t mode, IOR_param_t *param)
Definition: aiori.c:129
int fsyncPerWrite
Definition: ior.h:152
int lustre_start_ost
Definition: ior.h:189
#define WRITE
Definition: iordef.h:95
#define EWARN(MSG)
Definition: iordef.h:156
int aiori_posix_statfs(const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t *param)
Definition: aiori.c:104
void * POSIX_Create(char *testFileName, IOR_param_t *param)
Definition: aiori-POSIX.c:267
char * aiori_get_version()
Definition: aiori.c:149
#define O_BINARY
Definition: aiori-POSIX.c:67
MPI_Comm testComm
Definition: utilities.c:61
void POSIX_Delete(char *testFileName, IOR_param_t *param)
Definition: aiori-POSIX.c:516
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: iordef.h:192
IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm, char *testFileName)
Definition: aiori-POSIX.c:528
int singleXferAttempt
Definition: ior.h:151
#define open64
Definition: aiori-POSIX.c:59
#define MAX_RETRY
Definition: iordef.h:111
static IOR_param_t param
Definition: mdtest.c:153
int beegfs_numTargets
Definition: ior.h:198
long long int IOR_size_t
Definition: iordef.h:124
#define WARN(MSG)
Definition: iordef.h:145
#define VERBOSE_2
Definition: iordef.h:104
int lustre_ignore_locks
Definition: ior.h:191
void POSIX_Close(void *fd, IOR_param_t *param)
Definition: aiori-POSIX.c:506
#define lseek64
Definition: aiori-POSIX.c:63
IOR_offset_t offset
Definition: ior.h:119
#define VERBOSE_4
Definition: iordef.h:106
int errno
int aiori_posix_stat(const char *path, struct stat *buf, IOR_param_t *param)
Definition: aiori.c:144
int aiori_posix_access(const char *path, int mode, IOR_param_t *param)
Definition: aiori.c:139
#define VERBOSE_1
Definition: iordef.h:103
int verbose
Definition: utilities.c:60
char * name
Definition: aiori.h:67
long long int IOR_offset_t
Definition: iordef.h:123
int rank
Definition: utilities.c:57
int useO_DIRECT
Definition: ior.h:125
int gpfs_hint_access
Definition: ior.h:194
#define TRUE
Definition: iordef.h:75
int lustre_set_striping
Definition: ior.h:190
int beegfs_chunkSize
Definition: ior.h:199
static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *)
Definition: aiori-POSIX.c:418
#define NULL
Definition: iordef.h:79