23 # include <sys/ioctl.h> 36 #ifdef HAVE_LUSTRE_USER 37 # ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H 38 # include <linux/lustre/lustre_user.h> 39 # elif defined(HAVE_LUSTRE_LUSTRE_USER_H) 40 # include <lustre/lustre_user.h> 47 #ifdef HAVE_GPFS_FCNTL_H 48 # include <gpfs_fcntl.h> 51 #ifdef HAVE_BEEGFS_BEEGFS_H 52 # include <beegfs/beegfs.h> 64 #ifdef HAVE_GPU_DIRECT 65 typedef long long loff_t;
66 # include <cuda_runtime.h> 72 #ifdef HAVE_GPU_DIRECT 73 CUfileHandle_t cf_handle;
83 # define lseek64 lseek 90 #ifdef HAVE_GPU_DIRECT 91 static const char* cuFileGetErrorString(CUfileError_t status){
92 if(IS_CUDA_ERR(status)){
93 return cudaGetErrorString(status.err);
95 return strerror(status.err);
98 static void init_cufile(
posix_fd * pfd){
99 CUfileDescr_t cf_descr = (CUfileDescr_t){
100 .handle.fd = pfd->
fd,
101 .type = CU_FILE_HANDLE_TYPE_OPAQUE_FD
103 CUfileError_t status = cuFileHandleRegister(& pfd->cf_handle, & cf_descr);
104 if(status.err != CU_FILE_SUCCESS){
105 WARNF(
"Could not register handle %s", cuFileGetErrorString(status));
120 if (init_values !=
NULL){
136 #ifdef HAVE_BEEGFS_BEEGFS_H 140 #ifdef HAVE_GPFS_FCNTL_H 143 #ifdef HAVE_GPFSFINEGRAINWRITESHARING_T 147 #ifdef HAVE_GPFSCREATESHARING_T 150 #endif // HAVE_GPFS_FCNTL_H 151 #ifdef HAVE_LUSTRE_USER 157 #ifdef HAVE_GPU_DIRECT 163 memcpy(help, h,
sizeof(h));
193 .enable_mdtest =
true,
209 ERR(
"beegfsChunkSize must be a power of two and >64k");
213 ERR(
"GPUDirect required direct I/O to be used!");
215 #ifndef HAVE_GPU_DIRECT 217 ERR(
"GPUDirect support is not compiled");
223 #ifdef HAVE_GPFS_FCNTL_H 224 void gpfs_free_all_locks(
int fd)
228 gpfsFcntlHeader_t header;
229 gpfsFreeRange_t release;
231 release_all.header.totalLength =
sizeof(release_all);
232 release_all.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
233 release_all.header.fcntlReserved = 0;
235 release_all.release.structLen =
sizeof(release_all.release);
236 release_all.release.structType = GPFS_FREE_RANGE;
237 release_all.release.start = 0;
238 release_all.release.length = 0;
240 rc = gpfs_fcntl(fd, &release_all);
242 WARNF(
"gpfs_fcntl(%d, ...) release all locks hint failed.", fd);
249 gpfsFcntlHeader_t header;
250 gpfsAccessRange_t access;
253 take_locks.header.totalLength =
sizeof(take_locks);
254 take_locks.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
255 take_locks.header.fcntlReserved = 0;
257 take_locks.access.structLen =
sizeof(take_locks.access);
258 take_locks.access.structType = GPFS_ACCESS_RANGE;
259 take_locks.access.start = offset;
260 take_locks.access.length = length;
261 take_locks.access.isWrite = (access ==
WRITE);
263 rc = gpfs_fcntl(fd, &take_locks);
265 WARNF(
"gpfs_fcntl(%d, ...) access range hint failed.", fd);
273 gpfsFcntlHeader_t header;
274 gpfsFreeRange_t free;
278 free_locks.header.totalLength =
sizeof(free_locks);
279 free_locks.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
280 free_locks.header.fcntlReserved = 0;
282 free_locks.free.structLen =
sizeof(free_locks.free);
283 free_locks.free.structType = GPFS_FREE_RANGE;
284 free_locks.free.start = offset;
285 free_locks.free.length = length;
287 rc = gpfs_fcntl(fd, &free_locks);
289 WARNF(
"gpfs_fcntl(%d, ...) free range hint failed.", fd);
293 #ifdef HAVE_GPFSFINEGRAINWRITESHARING_T 296 void gpfs_fineGrainWriteSharing(
int fd)
300 gpfsFcntlHeader_t header;
301 gpfsFineGrainWriteSharing_t write;
305 sharingHint.header.totalLength =
sizeof(sharingHint);
306 sharingHint.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
307 sharingHint.header.fcntlReserved = 0;
309 sharingHint.write.structLen =
sizeof(sharingHint.write);
310 sharingHint.write.structType = GPFS_FINE_GRAIN_WRITE_SHARING;
311 sharingHint.write.fineGrainWriteSharing = 1;
312 sharingHint.write.taskId = -1;
313 sharingHint.write.totalTasks = -1;
314 sharingHint.write.recordSize = -1;
316 rc = gpfs_fcntl(fd, &sharingHint);
318 WARNF(
"gpfs_fcntl(%d, ...) fine grain write sharing hint failed.", fd);
324 void gpfs_fineGrainReadSharing(
int fd)
328 gpfsFcntlHeader_t header;
329 #ifdef HAVE_GPFSFINEGRAINREADSHARING_T 330 gpfsFineGrainReadSharing_t read;
337 sharingHint.header.totalLength =
sizeof(sharingHint);
338 sharingHint.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
339 sharingHint.header.fcntlReserved = 0;
341 sharingHint.read.structLen =
sizeof(sharingHint.read);
342 #ifdef HAVE_GPFSFINEGRAINREADSHARING_T 343 sharingHint.read.structType = GPFS_FINE_GRAIN_READ_SHARING;
344 sharingHint.read.fineGrainReadSharing = 1;
346 sharingHint.read.structType = GPFS_PREFETCH;
347 sharingHint.read.prefetchEnableRead = 0;
348 sharingHint.read.prefetchEnableWrite = 1;
351 rc = gpfs_fcntl(fd, &sharingHint);
353 WARNF(
"gpfs_fcntl(%d, ...) fine grain read sharing hint failed.", fd);
359 #ifdef HAVE_BEEGFS_BEEGFS_H 361 int mkTempInDir(
char* dirPath)
363 unsigned long len = strlen(dirPath) + 8;
364 char* tmpfilename = (
char*)malloc(
sizeof (
char)*len+1);
365 snprintf(tmpfilename, len,
"%s/XXXXXX", dirPath);
367 int fd = mkstemp(tmpfilename);
374 bool beegfs_getStriping(
char* dirPath, u_int16_t* numTargetsOut,
unsigned* chunkSizeOut)
378 int fd = mkTempInDir(dirPath);
380 unsigned stripePattern = 0;
381 retVal = beegfs_getStripeInfo(fd, &stripePattern, chunkSizeOut, numTargetsOut);
388 bool beegfs_isOptionSet(
int opt) {
392 bool beegfs_compatibleFileExists(
char* filepath,
int numTargets,
int chunkSize)
394 int fd = open(filepath, O_RDWR);
399 unsigned read_stripePattern = 0;
400 u_int16_t read_numTargets = 0;
401 int read_chunkSize = 0;
403 bool retVal = beegfs_getStripeInfo(fd, &read_stripePattern, &read_chunkSize, &read_numTargets);
407 return retVal && read_numTargets == numTargets && read_chunkSize == chunkSize;
413 bool beegfs_createFilePath(
char* filepath, mode_t mode,
int numTargets,
int chunkSize)
416 char* dirTmp = strdup(filepath);
417 char* dir = dirname(dirTmp);
418 DIR* parentDirS = opendir(dir);
420 ERRF(
"Failed to get directory: %s", dir);
424 int parentDirFd = dirfd(parentDirS);
427 ERRF(
"Failed to get directory descriptor: %s", dir);
431 bool isBeegfs = beegfs_testIsBeeGFS(parentDirFd);
434 WARN(
"Not a BeeGFS file system");
438 if ( !beegfs_isOptionSet(numTargets)
439 || !beegfs_isOptionSet(chunkSize)) {
440 u_int16_t defaultNumTargets = 0;
441 unsigned defaultChunkSize = 0;
442 bool haveDefaults = beegfs_getStriping(dir,
446 ERR(
"Failed to get default BeeGFS striping values");
448 numTargets = beegfs_isOptionSet(numTargets) ?
449 numTargets : defaultNumTargets;
450 chunkSize = beegfs_isOptionSet(chunkSize) ?
451 chunkSize : defaultChunkSize;
454 char* filenameTmp = strdup(filepath);
455 char* filename = basename(filepath);
456 bool isFileCreated = beegfs_compatibleFileExists(filepath, numTargets, chunkSize)
457 || beegfs_createFile(parentDirFd, filename,
458 mode, numTargets, chunkSize);
460 ERR(
"Could not create file");
465 closedir(parentDirS);
473 #ifdef HAVE_LUSTRE_USER 474 void lustre_disable_file_locks(
const int fd) {
475 int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK;
477 INFO(
"** Disabling lustre range locking **\n");
479 if (ioctl(fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) {
480 ERRF(
"ioctl(%d, LL_IOC_SETFLAGS, ...) failed", fd);
501 #ifdef HAVE_LUSTRE_USER 505 #define FASYNC 00020000 514 pfd->
fd =
open64(testFileName, fd_oflag, mode);
516 ERRF(
"open64(\"%s\", %d, %#o) failed. Error: %s",
517 testFileName, fd_oflag, mode, strerror(
errno));
520 struct lov_user_md opts = { 0 };
523 opts.lmm_magic = LOV_USER_MAGIC;
531 fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE;
532 pfd->
fd =
open64(testFileName, fd_oflag, mode);
534 ERRF(
"Unable to open '%s': %s\n",
535 testFileName, strerror(
errno));
536 }
else if (ioctl(pfd->
fd, LL_IOC_LOV_SETSTRIPE, &opts)) {
537 char *errmsg =
"stripe already set";
539 errmsg = strerror(
errno);
540 ERRF(
"Error on ioctl for '%s' (%d): %s\n",
541 testFileName, pfd->
fd, errmsg);
550 fd_oflag |= O_CREAT | O_RDWR;
552 #ifdef HAVE_BEEGFS_BEEGFS_H 555 bool result = beegfs_createFilePath(testFileName,
560 fd_oflag &= ~O_CREAT;
562 WARN(
"BeeGFS tuning failed");
567 pfd->
fd =
open64(testFileName, fd_oflag, mode);
569 ERRF(
"open64(\"%s\", %d, %#o) failed. Error: %s",
570 testFileName, fd_oflag, mode, strerror(
errno));
573 #ifdef HAVE_LUSTRE_USER 577 lustre_disable_file_locks(pfd->
fd);
581 #ifdef HAVE_GPFS_FCNTL_H 586 gpfs_free_all_locks(pfd->
fd);
588 #ifdef HAVE_GPFSFINEGRAINWRITESHARING_T 591 gpfs_fineGrainWriteSharing(pfd->
fd);
595 #ifdef HAVE_GPU_DIRECT 610 ret = mknod(testFileName, S_IFREG | S_IRUSR, 0);
624 fd_oflag |= O_RDONLY;
626 fd_oflag |= O_WRONLY;
639 pfd->
fd =
open64(testFileName, fd_oflag);
641 ERRF(
"open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(
errno));
643 #ifdef HAVE_LUSTRE_USER 645 lustre_disable_file_locks(pfd->
fd);
649 #ifdef HAVE_GPFS_FCNTL_H 651 gpfs_free_all_locks(pfd->
fd);
653 #ifdef HAVE_GPFSFINEGRAINWRITESHARING_T 656 gpfs_fineGrainReadSharing(pfd->
fd);
660 #ifdef HAVE_GPU_DIRECT 675 long long remaining = (
long long)length;
676 char *ptr = (
char *)buffer;
687 #ifdef HAVE_GPFS_FCNTL_H 689 gpfs_access_start(fd, length, offset, access);
695 if (
lseek64(fd, offset, SEEK_SET) == -1)
696 ERRF(
"lseek64(%d, %lld, SEEK_SET) failed", fd, offset);
697 off_t mem_offset = 0;
701 .l_whence = SEEK_SET,
704 .l_type = access ==
WRITE ? F_WRLCK : F_RDLCK,
706 if(fcntl(fd, F_SETLKW, &lck) != 0){
707 WARN(
"Error with F_SETLKW");
710 while (remaining > 0) {
712 if (access ==
WRITE) {
714 INFOF(
"task %d writing to offset %lld\n",
716 offset + length - remaining);
718 #ifdef HAVE_GPU_DIRECT 720 rc = cuFileWrite(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset);
723 rc = write(fd, ptr, remaining);
724 #ifdef HAVE_GPU_DIRECT 728 WARNF(
"write(%d, %p, %lld) failed %s", fd, (
void*)ptr, remaining, strerror(
errno));
735 INFOF(
"task %d reading from offset %lld\n",
737 offset + length - remaining);
739 #ifdef HAVE_GPU_DIRECT 741 rc = cuFileRead(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset);
744 rc = read(fd, ptr, remaining);
745 #ifdef HAVE_GPU_DIRECT 749 WARNF(
"read(%d, %p, %lld) returned EOF prematurely", fd, (
void*)ptr, remaining);
750 return length - remaining;
754 WARNF(
"read(%d, %p, %lld) failed %s", fd, (
void*)ptr, remaining, strerror(
errno));
755 return length - remaining;
758 if (rc < remaining) {
759 WARNF(
"task %d, partial %s, %lld of %lld bytes at offset %lld\n",
761 access ==
WRITE ?
"write()" :
"read()",
763 offset + length - remaining);
765 WARN(
"too many retries -- aborting");
766 return length - remaining;
770 assert(rc <= remaining);
778 .l_whence = SEEK_SET,
783 if(fcntl(fd, F_SETLK, &lck) != 0){
784 WARN(
"Error with F_UNLCK");
787 #ifdef HAVE_GPFS_FCNTL_H 789 gpfs_access_end(fd, length, offset, access);
799 WARNF(
"fsync(%d) failed", fd);
805 int ret = system(
"sync");
807 FAIL(
"Error executing the sync command, ensure it exists.");
821 #ifdef HAVE_GPU_DIRECT 823 cuFileHandleDeregister(((
posix_fd*) afd)->cf_handle);
827 ERRF(
"close(%d) failed", fd);
839 if (unlink(testFileName) != 0){
840 WARNF(
"[RANK %03d]: unlink() of file \"%s\" failed",
rank, testFileName);
848 if(rename(oldfile, newfile) != 0){
849 WARNF(
"[RANK %03d]: rename() of file \"%s\" to \"%s\" failed",
rank, oldfile, newfile);
862 struct stat stat_buf;
863 IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
865 if (stat(testFileName, &stat_buf) != 0) {
866 ERRF(
"stat(\"%s\", ...) failed", testFileName);
868 aggFileSizeFromStat = stat_buf.st_size;
870 return (aggFileSizeFromStat);
874 #ifdef HAVE_GPU_DIRECT 875 CUfileError_t err = cuFileDriverOpen();
880 #ifdef HAVE_GPU_DIRECT 881 CUfileError_t err = cuFileDriverClose();
static void POSIX_Finalize(aiori_mod_opt_t *options)
struct benchmark_options o
int POSIX_Mknod(char *testFileName)
static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *)
static void POSIX_Initialize(aiori_mod_opt_t *options)
void POSIX_Close(aiori_fd_t *afd, aiori_mod_opt_t *param)
int POSIX_Rename(const char *oldfile, const char *newfile, aiori_mod_opt_t *module_options)
#define MPI_CHECK(MPI_STATUS, MSG)
int aiori_posix_stat(const char *path, struct stat *buf, aiori_mod_opt_t *module_options)
aiori_fd_t * POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t *param)
char * aiori_get_version()
aiori_fd_t * POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t *param)
static option_help options[]
void POSIX_Delete(char *testFileName, aiori_mod_opt_t *param)
static aiori_xfer_hint_t * hints
void POSIX_Sync(aiori_mod_opt_t *param)
int POSIX_check_params(aiori_mod_opt_t *param)
int aiori_posix_access(const char *path, int mode, aiori_mod_opt_t *module_options)
#define WARNF(FORMAT,...)
int aiori_posix_rmdir(const char *path, aiori_mod_opt_t *module_options)
void POSIX_Fsync(aiori_fd_t *afd, aiori_mod_opt_t *param)
void POSIX_xfer_hints(aiori_xfer_hint_t *params)
int aiori_posix_mkdir(const char *path, mode_t mode, aiori_mod_opt_t *module_options)
#define INFOF(FORMAT,...)
int aiori_posix_statfs(const char *path, ior_aiori_statfs_t *stat_buf, aiori_mod_opt_t *module_options)
void set_o_direct_flag(int *flag)
option_help * POSIX_options(aiori_mod_opt_t **init_backend_options, aiori_mod_opt_t *init_values)
long long int IOR_offset_t
int gpfs_finegrain_readsharing
IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t *test, char *testFileName)
int gpfs_finegrain_writesharing
void * safeMalloc(uint64_t size)