IOR
aiori-CEPHFS.c
Go to the documentation of this file.
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  */
4 /******************************************************************************\
5 * *
6 * (C) 2015 The University of Chicago *
7 * (C) 2020 Red Hat, Inc. *
8 * *
9 * See COPYRIGHT in top-level directory. *
10 * *
11 ********************************************************************************
12 *
13 * Implement abstract I/O interface for CEPHFS.
14 *
15 \******************************************************************************/
16 
17 #ifdef HAVE_CONFIG_H
18 # include "config.h"
19 #endif
20 
21 #include <errno.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/stat.h>
25 #include <cephfs/libcephfs.h>
26 
27 #include "ior.h"
28 #include "iordef.h"
29 #include "aiori.h"
30 #include "utilities.h"
31 
32 #define CEPH_O_RDONLY 00000000
33 #define CEPH_O_WRONLY 00000001
34 #define CEPH_O_RDWR 00000002
35 #define CEPH_O_CREAT 00000100
36 #define CEPH_O_EXCL 00000200
37 #define CEPH_O_TRUNC 00001000
38 #define CEPH_O_LAZY 00020000
39 #define CEPH_O_DIRECTORY 00200000
40 #define CEPH_O_NOFOLLOW 00400000
41 
42 /************************** O P T I O N S *****************************/
44  char * user;
45  char * conf;
46  char * prefix;
47  char * remote_prefix;
48  int olazy;
49 };
50 
51 static struct cephfs_options o = {
52  .user = NULL,
53  .conf = NULL,
54  .prefix = NULL,
55  .remote_prefix = NULL,
56  .olazy = 0,
57 };
58 
59 static option_help options [] = {
60  {0, "cephfs.user", "Username for the ceph cluster", OPTION_OPTIONAL_ARGUMENT, 's', & o.user},
61  {0, "cephfs.conf", "Config file for the ceph cluster", OPTION_OPTIONAL_ARGUMENT, 's', & o.conf},
62  {0, "cephfs.prefix", "Mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix},
63  {0, "cephfs.remote_prefix", "Remote mount prefix", OPTION_OPTIONAL_ARGUMENT, 's', & o.remote_prefix},
64  {0, "cephfs.olazy", "Enable Lazy I/O", OPTION_FLAG, 'd', & o.olazy},
66 };
67 
68 static struct ceph_mount_info *cmount;
69 
70 /**************************** P R O T O T Y P E S *****************************/
71 static void CEPHFS_Init();
72 static void CEPHFS_Final();
74 static aiori_fd_t *CEPHFS_Create(char *path, int flags, aiori_mod_opt_t *options);
75 static aiori_fd_t *CEPHFS_Open(char *path, int flags, aiori_mod_opt_t *options);
76 static IOR_offset_t CEPHFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer,
77  IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *options);
78 static void CEPHFS_Close(aiori_fd_t *, aiori_mod_opt_t *);
79 static void CEPHFS_Delete(char *path, aiori_mod_opt_t *);
80 static void CEPHFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *);
82 static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat, aiori_mod_opt_t *options);
83 static int CEPHFS_MkDir(const char *path, mode_t mode, aiori_mod_opt_t *options);
84 static int CEPHFS_RmDir(const char *path, aiori_mod_opt_t *options);
85 static int CEPHFS_Access(const char *path, int mode, aiori_mod_opt_t *options);
86 static int CEPHFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t *options);
87 static void CEPHFS_Sync(aiori_mod_opt_t *);
88 static option_help * CEPHFS_options();
89 
91 
92 /************************** D E C L A R A T I O N S ***************************/
94  .name = "CEPHFS",
95  .name_legacy = NULL,
96  .initialize = CEPHFS_Init,
97  .finalize = CEPHFS_Final,
98  .create = CEPHFS_Create,
99  .open = CEPHFS_Open,
100  .xfer = CEPHFS_Xfer,
101  .close = CEPHFS_Close,
102  .delete = CEPHFS_Delete,
103  .get_options = CEPHFS_options,
104  .get_version = aiori_get_version,
105  .xfer_hints = CEPHFS_xfer_hints,
106  .fsync = CEPHFS_Fsync,
107  .get_file_size = CEPHFS_GetFileSize,
108  .statfs = CEPHFS_StatFS,
109  .mkdir = CEPHFS_MkDir,
110  .rmdir = CEPHFS_RmDir,
111  .access = CEPHFS_Access,
112  .stat = CEPHFS_Stat,
113  .sync = CEPHFS_Sync,
114 };
115 
116 #define CEPHFS_ERR(__err_str, __ret) do { \
117  errno = -__ret; \
118  ERR(__err_str); \
119 } while(0)
120 
121 /***************************** F U N C T I O N S ******************************/
122 
124 {
125  hints = params;
126 }
127 
128 static const char* pfix(const char* path) {
129  const char* npath = path;
130  const char* prefix = o.prefix;
131  while (*prefix) {
132  if(*prefix++ != *npath++) {
133  return path;
134  }
135  }
136  return npath;
137 }
138 
140  return options;
141 }
142 
143 static void CEPHFS_Init()
144 {
145  char *remote_prefix = "/";
146 
147  /* Short circuit if the options haven't been filled yet. */
148  if (!o.user || !o.conf || !o.prefix) {
149  WARN("CEPHFS_Init() called before options have been populated!");
150  return;
151  }
152  if (o.remote_prefix != NULL) {
153  remote_prefix = o.remote_prefix;
154  }
155 
156  /* Short circuit if the mount handle already exists */
157  if (cmount) {
158  return;
159  }
160 
161  int ret;
162  /* create CEPHFS mount handle */
163  ret = ceph_create(&cmount, o.user);
164  if (ret) {
165  CEPHFS_ERR("unable to create CEPHFS mount handle", ret);
166  }
167 
168  /* set the handle using the Ceph config */
169  ret = ceph_conf_read_file(cmount, o.conf);
170  if (ret) {
171  CEPHFS_ERR("unable to read ceph config file", ret);
172  }
173 
174  /* mount the handle */
175  ret = ceph_mount(cmount, remote_prefix);
176  if (ret) {
177  CEPHFS_ERR("unable to mount cephfs", ret);
178  ceph_shutdown(cmount);
179 
180  }
181 
182  Inode *root;
183 
184  /* try retrieving the root cephfs inode */
185  ret = ceph_ll_lookup_root(cmount, &root);
186  if (ret) {
187  CEPHFS_ERR("uanble to retrieve root cephfs inode", ret);
188  ceph_shutdown(cmount);
189 
190  }
191 
192  return;
193 }
194 
195 static void CEPHFS_Final()
196 {
197  /* shutdown */
198  int ret = ceph_unmount(cmount);
199  if (ret < 0) {
200  CEPHFS_ERR("ceph_umount failed", ret);
201  }
202  ret = ceph_release(cmount);
203  if (ret < 0) {
204  CEPHFS_ERR("ceph_release failed", ret);
205  }
206  cmount = NULL;
207 }
208 
209 static aiori_fd_t *CEPHFS_Create(char *path, int flags, aiori_mod_opt_t *options)
210 {
211  return CEPHFS_Open(path, flags | IOR_CREAT, options);
212 }
213 
214 static aiori_fd_t *CEPHFS_Open(char *path, int flags, aiori_mod_opt_t *options)
215 {
216  const char *file = pfix(path);
217  int* fd;
218  fd = (int *)malloc(sizeof(int));
219 
220  mode_t mode = 0664;
221  int ceph_flags = (int) 0;
222 
223  /* set IOR file flags to CephFS flags */
224  /* -- file open flags -- */
225  if (flags & IOR_RDONLY) {
226  ceph_flags |= CEPH_O_RDONLY;
227  }
228  if (flags & IOR_WRONLY) {
229  ceph_flags |= CEPH_O_WRONLY;
230  }
231  if (flags & IOR_RDWR) {
232  ceph_flags |= CEPH_O_RDWR;
233  }
234  if (flags & IOR_APPEND) {
235  CEPHFS_ERR("File append not implemented in CephFS", EINVAL);
236  }
237  if (flags & IOR_CREAT) {
238  ceph_flags |= CEPH_O_CREAT;
239  }
240  if (flags & IOR_EXCL) {
241  ceph_flags |= CEPH_O_EXCL;
242  }
243  if (flags & IOR_TRUNC) {
244  ceph_flags |= CEPH_O_TRUNC;
245  }
246  if (flags & IOR_DIRECT) {
247  CEPHFS_ERR("O_DIRECT not implemented in CephFS", EINVAL);
248  }
249  *fd = ceph_open(cmount, file, ceph_flags, mode);
250  if (*fd < 0) {
251  CEPHFS_ERR("ceph_open failed", *fd);
252  }
253  if (o.olazy == TRUE) {
254  int ret = ceph_lazyio(cmount, *fd, 1);
255  if (ret != 0) {
256  WARN("Error enabling lazy mode");
257  }
258  }
259  return (void *) fd;
260 }
261 
262 static IOR_offset_t CEPHFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer,
263  IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *options)
264 {
265  uint64_t size = (uint64_t) length;
266  char *buf = (char *) buffer;
267  int fd = *(int *) file;
268  int ret;
269 
270  if (access == WRITE)
271  {
272  ret = ceph_write(cmount, fd, buf, size, offset);
273  if (ret < 0) {
274  CEPHFS_ERR("unable to write file to CephFS", ret);
275  } else if (ret < size) {
276  CEPHFS_ERR("short write to CephFS", ret);
277  }
278  if (hints->fsyncPerWrite == TRUE) {
279  CEPHFS_Fsync(file, options);
280  }
281  }
282  else /* READ */
283  {
284  ret = ceph_read(cmount, fd, buf, size, offset);
285  if (ret < 0) {
286  CEPHFS_ERR("unable to read file from CephFS", ret);
287  } else if (ret < size) {
288  CEPHFS_ERR("short read from CephFS", ret);
289  }
290 
291  }
292  return length;
293 }
294 
295 static void CEPHFS_Fsync(aiori_fd_t *file, aiori_mod_opt_t *options)
296 {
297  int fd = *(int *) file;
298  int ret = ceph_fsync(cmount, fd, 0);
299  if (ret < 0) {
300  CEPHFS_ERR("ceph_fsync failed", ret);
301  }
302 }
303 
304 static void CEPHFS_Close(aiori_fd_t *file, aiori_mod_opt_t *options)
305 {
306  int fd = *(int *) file;
307  int ret = ceph_close(cmount, fd);
308  if (ret < 0) {
309  CEPHFS_ERR("ceph_close failed", ret);
310  }
311  free(file);
312  return;
313 }
314 
315 static void CEPHFS_Delete(char *path, aiori_mod_opt_t *options)
316 {
317  int ret = ceph_unlink(cmount, pfix(path));
318  if (ret < 0) {
319  CEPHFS_ERR("ceph_unlink failed", ret);
320  }
321  return;
322 }
323 
324 static IOR_offset_t CEPHFS_GetFileSize(aiori_mod_opt_t *options, char *path)
325 {
326  struct stat stat_buf;
327  IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
328 
329  int ret = ceph_stat(cmount, pfix(path), &stat_buf);
330  if (ret < 0) {
331  CEPHFS_ERR("ceph_stat failed", ret);
332  }
333  aggFileSizeFromStat = stat_buf.st_size;
334 
335  if (hints->filePerProc == TRUE) {
336  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
337  MPI_LONG_LONG_INT, MPI_SUM, testComm),
338  "cannot total data moved");
339  aggFileSizeFromStat = tmpSum;
340  } else {
341  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
342  MPI_LONG_LONG_INT, MPI_MIN, testComm),
343  "cannot total data moved");
344  MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
345  MPI_LONG_LONG_INT, MPI_MAX, testComm),
346  "cannot total data moved");
347  if (tmpMin != tmpMax) {
348  if (rank == 0) {
349  WARN("inconsistent file size by different tasks");
350  }
351  /* incorrect, but now consistent across tasks */
352  aggFileSizeFromStat = tmpMin;
353  }
354  }
355 
356  return (aggFileSizeFromStat);
357 }
358 
359 static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, aiori_mod_opt_t *options)
360 {
361 #if defined(HAVE_STATVFS)
362  struct statvfs statfs_buf;
363  int ret = ceph_statfs(cmount, pfix(path), &statfs_buf);
364  if (ret < 0) {
365  CEPHFS_ERR("ceph_statfs failed", ret);
366  return -1;
367  }
368 
369  stat_buf->f_bsize = statfs_buf.f_bsize;
370  stat_buf->f_blocks = statfs_buf.f_blocks;
371  stat_buf->f_bfree = statfs_buf.f_bfree;
372  stat_buf->f_files = statfs_buf.f_files;
373  stat_buf->f_ffree = statfs_buf.f_ffree;
374 
375  return 0;
376 #else
377  WARN("ceph_statfs requires statvfs!");
378  return -1;
379 #endif
380 }
381 
382 static int CEPHFS_MkDir(const char *path, mode_t mode, aiori_mod_opt_t *options)
383 {
384  return ceph_mkdir(cmount, pfix(path), mode);
385 }
386 
387 static int CEPHFS_RmDir(const char *path, aiori_mod_opt_t *options)
388 {
389  return ceph_rmdir(cmount, pfix(path));
390 }
391 
392 static int CEPHFS_Access(const char *path, int mode, aiori_mod_opt_t *options)
393 {
394  struct stat buf;
395  return ceph_stat(cmount, pfix(path), &buf);
396 }
397 
398 static int CEPHFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t *options)
399 {
400  return ceph_stat(cmount, pfix(path), buf);
401 }
402 
403 static void CEPHFS_Sync(aiori_mod_opt_t *options)
404 {
405  int ret = ceph_sync_fs(cmount);
406  if (ret < 0) {
407  CEPHFS_ERR("ceph_sync_fs failed", ret);
408  }
409 
410 }
#define CEPH_O_TRUNC
Definition: aiori-CEPHFS.c:37
uint64_t f_blocks
Definition: aiori.h:53
static int CEPHFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:398
uint64_t f_bfree
Definition: aiori.h:54
#define LAST_OPTION
Definition: option.h:39
static struct ceph_mount_info * cmount
Definition: aiori-CEPHFS.c:68
static void CEPHFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *)
Definition: aiori-CEPHFS.c:295
static aiori_fd_t * CEPHFS_Open(char *path, int flags, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:214
static IOR_offset_t CEPHFS_GetFileSize(aiori_mod_opt_t *, char *)
Definition: aiori-CEPHFS.c:324
#define CEPH_O_WRONLY
Definition: aiori-CEPHFS.c:33
static int CEPHFS_Access(const char *path, int mode, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:392
ior_aiori_t cephfs_aiori
Definition: aiori-CEPHFS.c:93
static int CEPHFS_StatFS(const char *path, ior_aiori_statfs_t *stat, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:359
uint64_t f_ffree
Definition: aiori.h:57
#define IOR_APPEND
Definition: aiori.h:31
#define CEPHFS_ERR(__err_str, __ret)
Definition: aiori-CEPHFS.c:116
static int CEPHFS_MkDir(const char *path, mode_t mode, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:382
#define IOR_RDONLY
Definition: aiori.h:28
#define MPI_CHECK(MPI_STATUS, MSG)
Definition: aiori-debug.h:97
#define WRITE
Definition: iordef.h:100
static aiori_xfer_hint_t * hints
Definition: aiori-CEPHFS.c:90
static void CEPHFS_Init()
Definition: aiori-CEPHFS.c:143
static void CEPHFS_Close(aiori_fd_t *, aiori_mod_opt_t *)
Definition: aiori-CEPHFS.c:304
static aiori_fd_t * CEPHFS_Create(char *path, int flags, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:209
#define IOR_CREAT
Definition: aiori.h:32
char * remote_prefix
Definition: aiori-CEPHFS.c:47
#define IOR_EXCL
Definition: aiori.h:34
char * aiori_get_version()
Definition: aiori.c:235
static void CEPHFS_Final()
Definition: aiori-CEPHFS.c:195
static IOR_offset_t CEPHFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:262
uint64_t f_files
Definition: aiori.h:56
MPI_Comm testComm
Definition: utilities.c:73
static option_help options[]
Definition: aiori-CEPHFS.c:59
#define IOR_TRUNC
Definition: aiori.h:33
void CEPHFS_xfer_hints(aiori_xfer_hint_t *params)
Definition: aiori-CEPHFS.c:123
uint64_t f_bsize
Definition: aiori.h:52
#define WARN(MSG)
Definition: aiori-debug.h:45
static void CEPHFS_Delete(char *path, aiori_mod_opt_t *)
Definition: aiori-CEPHFS.c:315
#define CEPH_O_RDWR
Definition: aiori-CEPHFS.c:34
#define IOR_WRONLY
Definition: aiori.h:29
static int CEPHFS_RmDir(const char *path, aiori_mod_opt_t *options)
Definition: aiori-CEPHFS.c:387
#define CEPH_O_RDONLY
Definition: aiori-CEPHFS.c:32
long long int IOR_size_t
Definition: iordef.h:124
#define CEPH_O_CREAT
Definition: aiori-CEPHFS.c:35
static void CEPHFS_Sync(aiori_mod_opt_t *)
Definition: aiori-CEPHFS.c:403
static option_help * CEPHFS_options()
Definition: aiori-CEPHFS.c:139
static const char * pfix(const char *path)
Definition: aiori-CEPHFS.c:128
#define IOR_RDWR
Definition: aiori.h:30
int fsyncPerWrite
Definition: aiori.h:70
char * name
Definition: aiori.h:88
int filePerProc
Definition: aiori.h:65
#define CEPH_O_EXCL
Definition: aiori-CEPHFS.c:36
long long int IOR_offset_t
Definition: iordef.h:123
int rank
Definition: utilities.c:70
#define TRUE
Definition: iordef.h:80
#define IOR_DIRECT
Definition: aiori.h:35
static struct cephfs_options o
Definition: aiori-CEPHFS.c:51
#define NULL
Definition: iordef.h:84