Whamcloud - gitweb
LU-8585 llapi: use open_by_handle_at in llapi_open_by_fid 03/36603/21
authorQuentin Bouget <quentin.bouget@cea.fr>
Sun, 2 Jan 2022 16:12:42 +0000 (11:12 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 29 Nov 2022 07:06:38 +0000 (07:06 +0000)
Reimplement llapi_open_by_fid() to use llapi_fid_to_handle() and
open_by_handle_at(2) rather than using ioctl().  This works for
opens on subdirectory mountpoints, unlike ".lustre/fid/<fid>".

This patch also adds llapi_open_by_fid_at() which is similar to
llapi_open_by_fid() except that it takes an open directory file
descriptor or AT_CWD rather than a path as its first argument.

[AD:
- Move get_root_*() functions over to a new liblustreapi_root.c
  file in expectation of further enhancements to that code.
- Cache an open file handle on the root directory so repeated
  calls to llapi_open_by_fid() and llapi_fid2path() do not need
  to search for and open the same root directory path many times.
- Add man pages for newly-added functions.

  This reduces the system calls for llapi_fid_test significantly:

      original     patched
         14511        4315   total opens
         64807       34067   total syscalls
]

There may still be a need to have a fallback from open_by_handle_at()
to using ".lustre/fid/<FID>" to open the fid (if available), but
that can be added if this initial patch does not test well.  The
open_by_handle_at() method avoids reopening the "fid/" directory
each time (though this fd could also be cached), but it has the
drawback that it reconnects dentries to the root directory each time.

Signed-off-by: Quentin Bouget <quentin.bouget@cea.fr>
Change-Id: I8a4904c996389da2b0894cd9fac639a398607535
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/36603
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
13 files changed:
lustre/doc/Makefile.am
lustre/doc/llapi_fid_to_handle.3 [new file with mode: 0644]
lustre/doc/llapi_open_by_fid.3 [new file with mode: 0644]
lustre/doc/llapi_open_by_fid_at.3 [new file with mode: 0644]
lustre/include/lustre/lustreapi.h
lustre/tests/sanity.sh
lustre/utils/Makefile.am
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_fid.c
lustre/utils/liblustreapi_hsm.c
lustre/utils/liblustreapi_pcc.c
lustre/utils/liblustreapi_root.c [new file with mode: 0644]
lustre/utils/lustreapi_internal.h

index 7546f96..ba2a623 100644 (file)
@@ -101,6 +101,7 @@ LIBMAN =                                    \
        llapi_create_volatile_param.3           \
        llapi_fd2parent.3                       \
        llapi_fid_parse.3                       \
+       llapi_fid_to_handle.3                   \
        llapi_file_create.3                     \
        llapi_file_create_foreign.3             \
        llapi_file_get_stripe.3                 \
@@ -162,6 +163,8 @@ LIBMAN =                                    \
        llapi_layout_stripe_count_set.3         \
        llapi_layout_stripe_size_get.3          \
        llapi_layout_stripe_size_set.3          \
+       llapi_open_by_fid.3                     \
+       llapi_open_by_fid_at.3                  \
        llapi_param_get_paths.3                 \
        llapi_param_get_value.3                 \
        llapi_path2fid.3                        \
diff --git a/lustre/doc/llapi_fid_to_handle.3 b/lustre/doc/llapi_fid_to_handle.3
new file mode 100644 (file)
index 0000000..97b4372
--- /dev/null
@@ -0,0 +1,48 @@
+.TH llapi_fid_to_handle 3 "2022-11-08" "Lustre User API"
+.SH NAME
+llapi_fid_to_handle \- allocate an NFS handle for File Identifier
+.SH SYNOPSIS
+.nf
+.B #include <lustre/lustreapi.h>
+.PP
+.BI "int llapi_fid_to_handle(struct file_handle **" handle ", struct lu_fid *" fid ");"
+.fi
+.SH DESCRIPTION
+.PP
+.BR llapi_fid_to_handle()
+allocates an NFS file
+.I handle
+for the File Identifier given by
+.I fid
+for later use by
+.BR open_by_handle_at (2).
+This is similar to the
+.BR name_to_handle_at (2)
+function, and is useful for Lustre-specific tools and interfaces.  While the
+.BR llapi_fid_to_handle (3)
+function does not requre root access itself, the
+.BR open_by_handle_at (2)
+function
+.B does
+require root access since this bypasses all pathname-derived file access
+permissions.
+The memory allocated for
+.I handle
+and returned to the caller must be freed with
+.IR free (2).
+.SH RETURN VALUES
+.LP
+returns 0 on success or a negative errno on failure.
+.SH ERRORS
+.TP 15
+.SM -ENOMEM
+No memory was available for allocating
+.IR handle .
+.SH SEE ALSO
+.BR free (3),
+.BR llapi_fid_parse (3),
+.BR llapi_fid2path (3),
+.BR llapi_path2fid (3),
+.BR lustreapi (7),
+.BR name_to_handle (2)
+.BR open_by_handle_at (2)
diff --git a/lustre/doc/llapi_open_by_fid.3 b/lustre/doc/llapi_open_by_fid.3
new file mode 100644 (file)
index 0000000..a55dae1
--- /dev/null
@@ -0,0 +1,51 @@
+.TH llapi_open_by_fid 3 "2022-11-08" "Lustre User API"
+.SH NAME
+llapi_open_by_fid \- open a file descriptor from a File Identifier
+.SH SYNOPSIS
+.nf
+.B #include <lustre/lustreapi.h>
+.PP
+.BI "int llapi_open_by_fid(const char *" mntpath ", struct lu_fid *" fid ", int " oflag ");"
+.BI "int llapi_open_by_fid_at(int " dir_fd ", struct lu_fid *" fid ", int " oflag ");"
+.fi
+.SH DESCRIPTION
+.PP
+.BR llapi_open_by_fid()
+opens a file descriptor in the filesystem mountpoint referenced by
+.I path
+for the file identified by
+.IR fid  ,
+using the open file flags in
+.IR oflag
+(as with the normal
+.BR open (2)
+call).
+.PP
+.BR llapi_open_by_fid_at()
+opens a file descriptor on
+.I fid
+in the filesystem referenced by the open file descriptor
+.IR dir_fd .
+This is similar to the
+.IR open_by_handle_at (3)
+system call that can open files by their NFS file handle.  If
+.I dir_fd
+is
+.B AT_CWD
+then the open is performed against the local working directory.
+.SH RETURN VALUES
+.LP
+returns file descriptor number or a negative errno value on failure.
+.SH ERRORS
+.TP 15
+.SM -EINVAL
+An invalid argument was specified.
+.TP 15
+.SM -ENOMEM
+No memory was available during operation.
+.SH SEE ALSO
+.BR llapi_fid_parse (3),
+.BR llapi_fid2path (3),
+.BR llapi_path2fid (3),
+.BR lustreapi (7),
+.BR open_by_handle_at (3)
diff --git a/lustre/doc/llapi_open_by_fid_at.3 b/lustre/doc/llapi_open_by_fid_at.3
new file mode 100644 (file)
index 0000000..2bb77d3
--- /dev/null
@@ -0,0 +1 @@
+.so man3/llapi_open_by_fid.3
index 23f4f8c..b5020d0 100644 (file)
@@ -513,7 +513,13 @@ int llapi_fd2parent(int fd, unsigned int linkno, struct lu_fid *parent_fid,
                    char *name, size_t name_size);
 int llapi_rmfid(const char *path, struct fid_array *fa);
 int llapi_chomp_string(char *buf);
-int llapi_open_by_fid(const char *dir, const struct lu_fid *fid,
+
+struct file_handle;
+
+int llapi_handle_to_fid(struct file_handle **handle, const struct lu_fid *fid);
+int llapi_open_by_fid_at(int lustre_fd, const struct lu_fid *fid,
+                        int open_flags);
+int llapi_open_by_fid(const char *lustre_dir, const struct lu_fid *fid,
                      int open_flags);
 int llapi_get_version_string(char *version, unsigned int version_size);
 /* llapi_get_version() is deprecated, use llapi_get_version_string() instead */
index c51d9dc..8d3b00e 100755 (executable)
@@ -15886,7 +15886,6 @@ run_test 154f "get parent fids by reading link ea"
 
 test_154g()
 {
-       [ -n "$FILESET" ] && skip "SKIP due to FILESET set"
        [[ $MDS1_VERSION -ge $(version_code 2.6.92) &&
           $CLIENT_VERSION -gt $(version_code 2.6.99) ]] ||
                skip "Need MDS version at least 2.6.92"
index 55863ef..6a201fe 100644 (file)
@@ -109,6 +109,7 @@ liblustreapi_la_SOURCES = liblustreapi.c liblustreapi_hsm.c \
                          liblustreapi_mirror.c liblustreapi_fid.c \
                          liblustreapi_ladvise.c liblustreapi_chlg.c \
                          liblustreapi_heat.c liblustreapi_pcc.c \
+                         liblustreapi_root.c \
                          liblustreapi_lseek.c liblustreapi_swap.c
 liblustreapi_la_LDFLAGS = $(LIBREADLINE) -version-info 1:0:0 \
                          -Wl,--version-script=liblustreapi.map
index 5614fbb..e73a9cd 100644 (file)
@@ -84,8 +84,6 @@
 #include "lstddef.h"
 
 #define FORMATTED_BUF_LEN      1024
-#define MAX_LINE_LEN           256
-#define MAX_INSTANCE_LEN       32
 
 static int llapi_msg_level = LLAPI_MSG_MAX;
 const char *liblustreapi_cmd;
@@ -1212,341 +1210,6 @@ int llapi_dir_create_pool(const char *name, int mode, int stripe_offset,
        return llapi_dir_create(name, mode, &param);
 }
 
-static int get_file_dev(const char *path, dev_t *dev)
-{
-#ifdef HAVE_STATX
-       struct statx stx;
-
-       if (!dev)
-               return -EINVAL;
-       if (statx(AT_FDCWD, path, 0, 0, &stx))
-               return -errno;
-       *dev = makedev(stx.stx_dev_major, stx.stx_dev_minor);
-#else
-       struct stat st;
-
-       if (!dev)
-               return -EINVAL;
-       if (stat(path, &st) != 0)
-               return -errno;
-
-       *dev = st.st_dev;
-#endif
-       return 0;
-}
-
-static int get_root_fd(const char *rootpath, int *outfd)
-{
-       int rc = 0;
-       int fd;
-
-       fd = open(rootpath, O_RDONLY | O_DIRECTORY | O_NONBLOCK);
-       if (fd < 0) {
-               rc = -errno;
-               llapi_error(LLAPI_MSG_ERROR, rc,
-                           "cannot open '%s'", rootpath);
-       } else {
-               *outfd = fd;
-       }
-
-       return rc;
-}
-
-static struct {
-       dev_t dev;
-       char fsname[PATH_MAX];
-       char mnt_dir[PATH_MAX];
-       char nid[MAX_LINE_LEN];
-} root_cached = { 0 };
-
-static pthread_rwlock_t root_cached_lock = PTHREAD_RWLOCK_INITIALIZER;
-
-static int get_root_path_fast(int want, char *fsname, int *outfd, char *path,
-                             dev_t *dev, char *nid)
-{
-       int rc = -ENODEV;
-       int fsnamelen;
-       int mntlen;
-
-       pthread_rwlock_rdlock(&root_cached_lock);
-       if (root_cached.dev == 0)
-               goto out_unlock;
-
-       fsnamelen = strlen(root_cached.fsname);
-       mntlen = strlen(root_cached.mnt_dir);
-
-       /* Check the fsname for a match, if given */
-       if (!(want & WANT_FSNAME) && fsname &&
-           strlen(fsname) == fsnamelen &&
-           (strncmp(root_cached.fsname, fsname, fsnamelen) == 0)) {
-               rc = 0;
-               /* Check the dev for a match, if given */
-       } else if (!(want & WANT_DEV) && dev && *dev == root_cached.dev) {
-               rc = 0;
-               /* Otherwise find the longest matching path */
-       } else if (path && strlen(path) >= mntlen &&
-                  (strncmp(root_cached.mnt_dir, path, mntlen) == 0) &&
-                  (strlen(path) == mntlen || path[mntlen] == '/')) {
-               rc = 0;
-       }
-
-       if (rc)
-               goto out_unlock;
-
-       if ((want & WANT_FSNAME) && fsname)
-               strcpy(fsname, root_cached.fsname);
-       if ((want & WANT_PATH) && path)
-               strcpy(path, root_cached.mnt_dir);
-       if ((want & WANT_DEV) && dev)
-               *dev = root_cached.dev;
-       if ((want & WANT_FD) && outfd)
-               rc = get_root_fd(root_cached.mnt_dir, outfd);
-       if ((want & WANT_NID) && nid)
-               strcpy(nid, root_cached.nid);
-out_unlock:
-       pthread_rwlock_unlock(&root_cached_lock);
-       return rc;
-}
-
-static int get_root_path_slow(int want, char *fsname, int *outfd, char *path,
-                             int index, dev_t *dev, char *nid)
-{
-       struct mntent mnt;
-       char buf[PATH_MAX];
-       char *ptr, *ptr_end;
-       FILE *fp;
-       int idx = -1, mntlen = 0;
-       int rc = -ENODEV;
-       int fsnamelen = 0;
-       dev_t devmnt = 0;
-
-       /* get the mount point */
-       fp = setmntent(PROC_MOUNTS, "r");
-       if (fp == NULL) {
-               rc = -EIO;
-               llapi_error(LLAPI_MSG_ERROR, rc,
-                           "cannot retrieve filesystem mount point");
-               return rc;
-       }
-       while (getmntent_r(fp, &mnt, buf, sizeof(buf))) {
-
-               if (!llapi_is_lustre_mnt(&mnt))
-                       continue;
-
-               idx++;
-               mntlen = strlen(mnt.mnt_dir);
-               ptr = strchr(mnt.mnt_fsname, '/');
-               while (ptr && *ptr == '/')
-                       ptr++;
-               /*
-                * thanks to the call to llapi_is_lustre_mnt() above,
-                * we are sure that mnt.mnt_fsname contains ":/",
-                * so ptr should never be NULL
-                */
-               if (ptr == NULL)
-                       continue;
-               ptr_end = ptr;
-               while (*ptr_end != '/' && *ptr_end != '\0')
-                       ptr_end++;
-
-               fsnamelen = ptr_end - ptr;
-
-               /* ignore unaccessible filesystem */
-               if (get_file_dev(mnt.mnt_dir, &devmnt))
-                       continue;
-
-               if ((want & WANT_INDEX) && idx == index) {
-                       rc = 0;
-                       break;
-               }
-
-               /* Check the fsname for a match, if given */
-               if (!(want & WANT_FSNAME) && fsname &&
-                   strlen(fsname) == fsnamelen &&
-                   (strncmp(ptr, fsname, fsnamelen) == 0)) {
-                       rc = 0;
-                       break;
-               }
-
-               /* Check the dev for a match, if given */
-               if (!(want & WANT_DEV) && dev && *dev == devmnt) {
-                       rc = 0;
-                       break;
-               }
-
-               /* Otherwise find the longest matching path */
-               if (path && strlen(path) >= mntlen &&
-                   (strncmp(mnt.mnt_dir, path, mntlen) == 0) &&
-                   (strlen(path) == mntlen || path[mntlen] == '/')) {
-                       rc = 0;
-                       break;
-               }
-       }
-
-       if (rc)
-               goto out;
-
-       /* Found it */
-       if (!(want & WANT_INDEX)) {
-               /* Cache the mount point information */
-               pthread_rwlock_wrlock(&root_cached_lock);
-
-               strncpy(root_cached.fsname, ptr, fsnamelen);
-               root_cached.fsname[fsnamelen] = '\0';
-               strncpy(root_cached.mnt_dir, mnt.mnt_dir, mntlen);
-               root_cached.mnt_dir[mntlen] = '\0';
-               root_cached.dev = devmnt;
-               ptr_end = strchr(mnt.mnt_fsname, ':');
-               strncpy(root_cached.nid, mnt.mnt_fsname,
-                       ptr_end - mnt.mnt_fsname);
-               root_cached.nid[ptr_end - mnt.mnt_fsname] = '\0';
-
-               pthread_rwlock_unlock(&root_cached_lock);
-       }
-
-       if ((want & WANT_FSNAME) && fsname) {
-               strncpy(fsname, ptr, fsnamelen);
-               fsname[fsnamelen] = '\0';
-       }
-       if ((want & WANT_PATH) && path) {
-               strncpy(path, mnt.mnt_dir, mntlen);
-               path[mntlen] = '\0';
-       }
-       if ((want & WANT_DEV) && dev)
-               *dev = devmnt;
-       if ((want & WANT_FD) && outfd)
-               rc = get_root_fd(mnt.mnt_dir, outfd);
-       if ((want & WANT_NID) && nid) {
-               ptr_end = strchr(mnt.mnt_fsname, ':');
-               strncpy(nid, mnt.mnt_fsname, ptr_end - mnt.mnt_fsname);
-               nid[ptr_end - mnt.mnt_fsname] = '\0';
-       }
-
-out:
-       endmntent(fp);
-       return rc;
-}
-
-/*
- * Find the fsname, the full path, and/or an open fd.
- * Either the fsname or path must not be NULL
- */
-int get_root_path(int want, char *fsname, int *outfd, char *path, int index,
-                 dev_t *dev, char *nid)
-{
-       int rc = -ENODEV;
-
-       if (!(want & WANT_INDEX))
-               rc = get_root_path_fast(want, fsname, outfd, path, dev, nid);
-       if (rc)
-               rc = get_root_path_slow(want, fsname, outfd, path, index, dev,
-                                       nid);
-
-       if (!rc || !(want & WANT_ERROR))
-               return rc;
-
-       if (dev || !(want & WANT_DEV))
-               llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                 "'%u/%u' dev not on a mounted Lustre filesystem",
-                                 major(*dev), minor(*dev));
-       else
-               llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                 "'%s' not on a mounted Lustre filesystem",
-                                 (want & WANT_PATH) ? fsname : path);
-       return rc;
-}
-
-/*
- * search lustre mounts
- *
- * Calling this function will return to the user the mount point, mntdir, and
- * the file system name, fsname, if the user passed a buffer to this routine.
- *
- * The user inputs are pathname and index. If the pathname is supplied then
- * the value of the index will be ignored. The pathname will return data if
- * the pathname is located on a lustre mount. Index is used to pick which
- * mount point you want in the case of multiple mounted lustre file systems.
- * See function lfs_osts in lfs.c for an example of the index use.
- */
-int llapi_search_mounts(const char *pathname, int index, char *mntdir,
-                       char *fsname)
-{
-       int want = WANT_PATH, idx = -1;
-
-       if (!pathname || pathname[0] == '\0') {
-               want |= WANT_INDEX;
-               idx = index;
-       } else {
-               strcpy(mntdir, pathname);
-       }
-
-       if (fsname)
-               want |= WANT_FSNAME;
-       return get_root_path(want, fsname, NULL, mntdir, idx, NULL, NULL);
-}
-
-/* Given a path, find the corresponding Lustre fsname */
-int llapi_search_fsname(const char *pathname, char *fsname)
-{
-       dev_t dev;
-       int rc;
-
-       rc = get_file_dev(pathname, &dev);
-       if (rc) {
-               char tmp[PATH_MAX];
-               char *parent;
-               int len;
-
-               /* file does not exist try the parent */
-               len = readlink(pathname, tmp, PATH_MAX);
-               if (len != -1)
-                       tmp[len] = '\0';
-               else
-                       strncpy(tmp, pathname, PATH_MAX - 1);
-
-               parent = dirname(tmp);
-               rc = get_file_dev(parent, &dev);
-       }
-
-       if (rc) {
-               llapi_error(LLAPI_MSG_ERROR, rc,
-                           "cannot resolve path '%s'", pathname);
-               return rc;
-       }
-
-       rc = get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL, NULL, -1,
-                          &dev, NULL);
-
-       return rc;
-}
-
-int llapi_search_rootpath(char *pathname, const char *fsname)
-{
-       if (!pathname)
-               return -EINVAL;
-
-       /*
-        * pathname can be used as an argument by get_root_path(),
-        * clear it for safety
-        */
-       pathname[0] = 0;
-       return get_root_path(WANT_PATH, (char *)fsname, NULL, pathname, -1,
-                            NULL, NULL);
-}
-
-int llapi_search_rootpath_by_dev(char *pathname, dev_t dev)
-{
-       if (!pathname)
-               return -EINVAL;
-
-       /*
-        * pathname can be used as an argument by get_root_path(),
-        * clear it for safety
-        */
-       pathname[0] = 0;
-       return get_root_path(WANT_PATH, NULL, NULL, pathname, -1, &dev, NULL);
-}
-
 /**
  * Get the list of pool members.
  * \param poolname    string of format \<fsname\>.\<poolname\>
index cc76ae6..18b2729 100644 (file)
@@ -30,7 +30,7 @@
  * Copyright (c) 2018, 2019, Data Direct Networks
  */
 
-/* for O_DIRECTORY */
+/* for O_DIRECTORY and struct file_handle */
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
@@ -43,6 +43,7 @@
 #include <sys/ioctl.h>
 #include <sys/xattr.h>
 #include <unistd.h>
+#include <sched.h>
 
 #include <libcfs/util/ioctl.h>
 #include <lustre/lustreapi.h>
@@ -222,16 +223,6 @@ int llapi_fid2path(const char *path_or_device, const char *fidstr, char *path,
                goto out;
        }
 
-       if (*path_or_device == '/')
-               rc = get_root_path(WANT_FD, NULL, &mnt_fd,
-                                  (char *)path_or_device, -1, NULL, NULL);
-       else
-               rc = get_root_path(WANT_FD, (char *)path_or_device,
-                                  &mnt_fd, NULL, -1, NULL, NULL);
-
-       if (rc < 0)
-               goto out;
-
        rc = llapi_fid_parse(fidstr, &fid, NULL);
        if (rc < 0) {
                llapi_err_noerrno(LLAPI_MSG_ERROR,
@@ -241,11 +232,20 @@ int llapi_fid2path(const char *path_or_device, const char *fidstr, char *path,
                goto out;
        }
 
+       if (path_or_device[0] == '/')
+               rc = get_root_path(WANT_FD, NULL, &mnt_fd,
+                                  (char *)path_or_device, -1, NULL, NULL);
+       else
+               rc = get_root_path(WANT_FD, (char *)path_or_device,
+                                  &mnt_fd, NULL, -1, NULL, NULL);
+
+       if (rc < 0)
+               goto out;
+
+       /* mnt_fd is cached internally, no need to close it */
        rc = llapi_fid2path_at(mnt_fd, &fid, path, pathlen, recno, linkno);
-out:
-       if (!(mnt_fd < 0))
-               close(mnt_fd);
 
+out:
        return rc;
 }
 
@@ -398,6 +398,69 @@ int llapi_path2parent(const char *path, unsigned int linkno,
 }
 
 /**
+ * Convert a struct lu_fid into a struct file_handle
+ *
+ * \param[out] _handle a newly allocated struct file_handle on success
+ * \param[in]  fid     a Lustre File IDentifier
+ *
+ * \retval             0 on success
+ * \retval             negative errno if an error occured
+ *
+ * On success, the caller is responsible for freeing \p handle.
+ */
+int llapi_fid_to_handle(struct file_handle **_handle, const struct lu_fid *fid)
+{
+       struct lustre_file_handle *lfh;
+       struct file_handle *handle;
+
+       if (!_handle || !fid)
+               return -EINVAL;
+
+       handle = calloc(1, sizeof(*handle) + sizeof(*lfh));
+       if (handle == NULL)
+               return -errno;
+
+       handle->handle_bytes = sizeof(*lfh);
+       handle->handle_type = FILEID_LUSTRE;
+       lfh = (struct lustre_file_handle *)handle->f_handle;
+       /* Only lfh->lfh_child needs to be set */
+       lfh->lfh_child = *fid;
+
+       *_handle = handle;
+       return 0;
+}
+
+/**
+ * Attempt to open a file with a Lustre File IDentifier
+ *
+ * \param[in] lustre_fd                an open file descriptor for an object in lustre
+ * \param[in] fid              a Lustre File IDentifier of the file to open
+ * \param[in] flags            open(2) flags
+ *
+ * \retval                     non-negative file descriptor on success
+ * \retval                     negative errno if an error occured
+ */
+int llapi_open_by_fid_at(int lustre_fd, const struct lu_fid *fid, int flags)
+{
+       struct file_handle *handle;
+       int fd;
+       int rc;
+
+       rc = llapi_fid_to_handle(&handle, fid);
+       if (rc < 0)
+               return rc;
+
+       /* Sadly open_by_handle_at() only works for root, but this is also the
+        * case for the original approach of opening $MOUNT/.lustre/FID.
+        */
+       fd = open_by_handle_at(lustre_fd, handle, flags);
+       rc = -errno;
+       free(handle);
+
+       return fd < 0 ? rc : fd;
+}
+
+/**
  * Attempt to open a file with Lustre file identifier \a fid
  * and return an open file descriptor.
  *
@@ -411,18 +474,17 @@ int llapi_path2parent(const char *path, unsigned int linkno,
 int llapi_open_by_fid(const char *lustre_dir, const struct lu_fid *fid,
                      int flags)
 {
-       char mntdir[PATH_MAX];
-       char path[PATH_MAX + 64];
-       int rc;
+       int mnt_fd, rc;
 
-       rc = llapi_search_mounts(lustre_dir, 0, mntdir, NULL);
+       /* this will return a cached FD if available, so only one open needed.
+        * WANT_FD doesn't modify lustre_dir so casting away "const" is OK */
+       rc = get_root_path(WANT_FD, NULL, &mnt_fd, (char *)lustre_dir, 0, NULL,
+                          NULL);
        if (rc)
-               return rc;
-
-       snprintf(path, sizeof(path), "%s/.lustre/fid/"DFID, mntdir, PFID(fid));
-       rc = open(path, flags);
-       if (rc < 0)
-               rc = -errno;
+               goto out;
 
+       /* "mnt_fd" is cached internally for reuse, no need to close it */
+       rc = llapi_open_by_fid_at(mnt_fd, fid, flags);
+out:
        return rc;
 }
index e634293..374d03f 100644 (file)
@@ -1600,7 +1600,7 @@ int llapi_hsm_request(const char *path, const struct hsm_user_request *request)
        /* If error, save errno value */
        rc = rc ? -errno : 0;
 
-       close(fd);
+       /* fd is cached internally, no need to close it */
        return rc;
 }
 
index a2ed766..5ca405c 100644 (file)
@@ -193,6 +193,9 @@ int llapi_pcc_detach_fd(int fd, __u32 option)
 
        detach.pccd_opt = option;
        rc = ioctl(fd, LL_IOC_PCC_DETACH, &detach);
+       /* If error, save errno value */
+       rc = rc ? -errno : 0;
+
        return rc;
 }
 
@@ -228,8 +231,11 @@ int llapi_pcc_detach_fid(const char *mntpath, const struct lu_fid *fid,
         */
        detach.pccd_fid = *fid;
        detach.pccd_opt = option;
+
+       /* fd is cached internally, no need to close */
        rc = ioctl(fd, LL_IOC_PCC_DETACH_BY_FID, &detach);
-       close(fd);
+       rc = rc ? -errno : 0;
+
        return rc;
 }
 
diff --git a/lustre/utils/liblustreapi_root.c b/lustre/utils/liblustreapi_root.c
new file mode 100644 (file)
index 0000000..7db8b33
--- /dev/null
@@ -0,0 +1,422 @@
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the GNU Lesser General Public License
+ * (LGPL) version 2.1 or (at your discretion) any later version.
+ * (LGPL) version 2.1 accompanies this distribution, and is available at
+ * http://www.gnu.org/licenses/lgpl-2.1.html
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * LGPL HEADER END
+ */
+/*
+ * lustre/utils/liblustreapi_root.c
+ *
+ * lustreapi library for managing the root fd cache for llapi internal use.
+ *
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2017, Intel Corporation.
+ *
+ * Copyright (c) 2018, 2022, Data Direct Networks
+ */
+
+/* for O_DIRECTORY and struct file_handle */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h> /* for dirname() */
+#include <mntent.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h> /* for makedev() */
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <libcfs/util/ioctl.h>
+#include <lustre/lustreapi.h>
+#include <linux/lustre/lustre_fid.h>
+#include "lustreapi_internal.h"
+
+/* could have an array of these for a handful of different paths */
+static struct root_cache {
+       dev_t   dev;
+       char    fsname[PATH_MAX];
+       char    mnt_dir[PATH_MAX];
+       char    nid[MAX_LINE_LEN];
+       int     fd; /* cached fd on filesystem root for internal use only */
+} root_cached = { 0 };
+
+static pthread_rwlock_t root_cached_lock = PTHREAD_RWLOCK_INITIALIZER;
+
+static int get_root_fd(const char *rootpath, int *outfd)
+{
+       int rc = 0;
+       int fd;
+
+       fd = open(rootpath, O_RDONLY | O_DIRECTORY | O_NONBLOCK);
+       if (fd < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot open '%s'", rootpath);
+       } else {
+               *outfd = fd;
+       }
+
+       return rc;
+}
+
+static int get_file_dev(const char *path, dev_t *dev)
+{
+#ifdef HAVE_STATX
+       struct statx stx;
+
+       if (!dev)
+               return -EINVAL;
+       if (statx(AT_FDCWD, path, 0, 0, &stx))
+               return -errno;
+       *dev = makedev(stx.stx_dev_major, stx.stx_dev_minor);
+#else
+       struct stat st;
+
+       if (!dev)
+               return -EINVAL;
+       if (stat(path, &st) != 0)
+               return -errno;
+
+       *dev = st.st_dev;
+#endif
+       return 0;
+}
+
+static int get_root_path_fast(int want, char *fsname, int *outfd, char *path,
+                             dev_t *dev, char *nid)
+{
+       int rc = -ENODEV;
+       int fsnamelen;
+       int mntlen;
+
+       if (root_cached.dev == 0)
+               return rc;
+
+       /* hold a write lock on the cache if fd is going to be updated */
+       if ((want & WANT_FD) && outfd && root_cached.fd <= 0)
+               pthread_rwlock_wrlock(&root_cached_lock);
+       else
+               pthread_rwlock_rdlock(&root_cached_lock);
+
+       if (root_cached.dev == 0)
+               goto out_unlock;
+
+       fsnamelen = strlen(root_cached.fsname);
+       mntlen = strlen(root_cached.mnt_dir);
+
+       /* Check the dev for a match, if given */
+       if (!(want & WANT_DEV) && dev && *dev == root_cached.dev) {
+               rc = 0;
+       /* Check the fsname for a match, if given */
+       } else if (!(want & WANT_FSNAME) && fsname &&
+                  strlen(fsname) == fsnamelen &&
+                  (strncmp(root_cached.fsname, fsname, fsnamelen) == 0)) {
+               rc = 0;
+       /* Otherwise find the longest matching path */
+       } else if (path && strlen(path) >= mntlen &&
+                  (strncmp(root_cached.mnt_dir, path, mntlen) == 0) &&
+                  (strlen(path) == mntlen || path[mntlen] == '/')) {
+               rc = 0;
+       }
+
+       if (rc)
+               goto out_unlock;
+
+       if ((want & WANT_FSNAME) && fsname)
+               strcpy(fsname, root_cached.fsname);
+       if ((want & WANT_PATH) && path)
+               strcpy(path, root_cached.mnt_dir);
+       if ((want & WANT_DEV) && dev)
+               *dev = root_cached.dev;
+       if ((want & WANT_FD) && outfd) {
+               if (root_cached.fd > 0) {
+                       *outfd = root_cached.fd;
+               } else {
+                       rc = get_root_fd(root_cached.mnt_dir, outfd);
+                       if (!rc)
+                               root_cached.fd = *outfd;
+               }
+       }
+       if ((want & WANT_NID) && nid)
+               strcpy(nid, root_cached.nid);
+out_unlock:
+       pthread_rwlock_unlock(&root_cached_lock);
+
+       return rc;
+}
+
+static int get_root_path_slow(int want, char *fsname, int *outfd, char *path,
+                             int index, dev_t *dev, char *nid)
+{
+       struct mntent mnt;
+       char buf[PATH_MAX];
+       char *ptr, *ptr_end;
+       FILE *fp;
+       int idx = -1, mntlen = 0;
+       int rc = -ENODEV;
+       int fsnamelen = 0;
+       dev_t devmnt = 0;
+
+       /* get the mount point */
+       fp = setmntent(PROC_MOUNTS, "r");
+       if (fp == NULL) {
+               rc = -EIO;
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot retrieve filesystem mount point");
+               return rc;
+       }
+       while (getmntent_r(fp, &mnt, buf, sizeof(buf))) {
+
+               if (!llapi_is_lustre_mnt(&mnt))
+                       continue;
+
+               idx++;
+               mntlen = strlen(mnt.mnt_dir);
+               ptr = strchr(mnt.mnt_fsname, '/');
+               while (ptr && *ptr == '/')
+                       ptr++;
+               /*
+                * thanks to the call to llapi_is_lustre_mnt() above,
+                * we are sure that mnt.mnt_fsname contains ":/",
+                * so ptr should never be NULL
+                */
+               if (ptr == NULL)
+                       continue;
+               ptr_end = ptr;
+               while (*ptr_end != '/' && *ptr_end != '\0')
+                       ptr_end++;
+
+               fsnamelen = ptr_end - ptr;
+
+               /* ignore unaccessible filesystem */
+               if (get_file_dev(mnt.mnt_dir, &devmnt))
+                       continue;
+
+               if ((want & WANT_INDEX) && idx == index) {
+                       rc = 0;
+                       break;
+               }
+
+               /* Check the fsname for a match, if given */
+               if (!(want & WANT_FSNAME) && fsname &&
+                   strlen(fsname) == fsnamelen &&
+                   (strncmp(ptr, fsname, fsnamelen) == 0)) {
+                       rc = 0;
+                       break;
+               }
+
+               /* Check the dev for a match, if given */
+               if (!(want & WANT_DEV) && dev && *dev == devmnt) {
+                       rc = 0;
+                       break;
+               }
+
+               /* Otherwise find the longest matching path */
+               if (path && strlen(path) >= mntlen &&
+                   (strncmp(mnt.mnt_dir, path, mntlen) == 0) &&
+                   (strlen(path) == mntlen || path[mntlen] == '/')) {
+                       rc = 0;
+                       break;
+               }
+       }
+
+       if (rc)
+               goto out;
+
+       /* Found it */
+       if (!(want & WANT_INDEX)) {
+               /* Cache the mount point information */
+               pthread_rwlock_wrlock(&root_cached_lock);
+
+               if (root_cached.fd > 0) {
+                       close(root_cached.fd);
+                       root_cached.fd = 0;
+               }
+               if ((want & WANT_FD) && outfd)
+                       rc = get_root_fd(mnt.mnt_dir, &root_cached.fd);
+               strncpy(root_cached.fsname, ptr, fsnamelen);
+               root_cached.fsname[fsnamelen] = '\0';
+               strncpy(root_cached.mnt_dir, mnt.mnt_dir, mntlen);
+               root_cached.mnt_dir[mntlen] = '\0';
+               root_cached.dev = devmnt;
+               ptr_end = strchr(mnt.mnt_fsname, ':');
+               strncpy(root_cached.nid, mnt.mnt_fsname,
+                       ptr_end - mnt.mnt_fsname);
+               root_cached.nid[ptr_end - mnt.mnt_fsname] = '\0';
+
+               pthread_rwlock_unlock(&root_cached_lock);
+       }
+
+       if ((want & WANT_FSNAME) && fsname) {
+               strncpy(fsname, ptr, fsnamelen);
+               fsname[fsnamelen] = '\0';
+       }
+       if ((want & WANT_PATH) && path) {
+               strncpy(path, mnt.mnt_dir, mntlen);
+               path[mntlen] = '\0';
+       }
+       if ((want & WANT_DEV) && dev)
+               *dev = devmnt;
+       if ((want & WANT_FD) && outfd) {
+               if (root_cached.fd > 0)
+                       *outfd = root_cached.fd;
+               else
+                       rc = get_root_fd(mnt.mnt_dir, outfd);
+       }
+       if ((want & WANT_NID) && nid) {
+               ptr_end = strchr(mnt.mnt_fsname, ':');
+               strncpy(nid, mnt.mnt_fsname, ptr_end - mnt.mnt_fsname);
+               nid[ptr_end - mnt.mnt_fsname] = '\0';
+       }
+
+out:
+       endmntent(fp);
+       return rc;
+}
+
+/*
+ * Find the fsname, the full path, and/or an open fd.
+ * Either the fsname or path must not be NULL.
+ *
+ * @outfd is for llapi internal use only, do not return it to the application.
+ */
+int get_root_path(int want, char *fsname, int *outfd, char *path, int index,
+                 dev_t *dev, char *nid)
+{
+       int rc = -ENODEV;
+
+       if (!(want & WANT_INDEX))
+               rc = get_root_path_fast(want, fsname, outfd, path, dev, nid);
+       if (rc)
+               rc = get_root_path_slow(want, fsname, outfd, path, index, dev,
+                                       nid);
+
+       if (!rc || !(want & WANT_ERROR))
+               goto out_errno;
+
+       if (dev || !(want & WANT_DEV))
+               llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                 "'%u/%u' dev not on a mounted Lustre filesystem",
+                                 major(*dev), minor(*dev));
+       else
+               llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                 "'%s' not on a mounted Lustre filesystem",
+                                 (want & WANT_PATH) ? fsname : path);
+out_errno:
+       errno = -rc;
+       return rc;
+}
+/*
+ * search lustre mounts
+ *
+ * Calling this function will return to the user the mount point, mntdir, and
+ * the file system name, fsname, if the user passed a buffer to this routine.
+ *
+ * The user inputs are pathname and index. If the pathname is supplied then
+ * the value of the index will be ignored. The pathname will return data if
+ * the pathname is located on a lustre mount. Index is used to pick which
+ * mount point you want in the case of multiple mounted lustre file systems.
+ * See function lfs_osts in lfs.c for an example of the index use.
+ */
+int llapi_search_mounts(const char *pathname, int index, char *mntdir,
+                       char *fsname)
+{
+       int want = WANT_PATH, idx = -1;
+
+       if (!pathname || pathname[0] == '\0') {
+               want |= WANT_INDEX;
+               idx = index;
+       } else {
+               strcpy(mntdir, pathname);
+       }
+
+       if (fsname)
+               want |= WANT_FSNAME;
+       return get_root_path(want, fsname, NULL, mntdir, idx, NULL, NULL);
+}
+
+/* Given a path, find the corresponding Lustre fsname */
+int llapi_search_fsname(const char *pathname, char *fsname)
+{
+       dev_t dev;
+       int rc;
+
+       rc = get_file_dev(pathname, &dev);
+       if (rc) {
+               char tmp[PATH_MAX];
+               char *parent;
+               int len;
+
+               /* file does not exist try the parent */
+               len = readlink(pathname, tmp, PATH_MAX);
+               if (len != -1)
+                       tmp[len] = '\0';
+               else
+                       strncpy(tmp, pathname, PATH_MAX - 1);
+
+               parent = dirname(tmp);
+               rc = get_file_dev(parent, &dev);
+       }
+
+       if (rc) {
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot resolve path '%s'", pathname);
+               return rc;
+       }
+
+       rc = get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL, NULL, -1,
+                          &dev, NULL);
+
+       return rc;
+}
+
+
+int llapi_search_rootpath(char *pathname, const char *fsname)
+{
+       if (!pathname)
+               return -EINVAL;
+
+       /*
+        * pathname can be used as an argument by get_root_path(),
+        * clear it for safety
+        */
+       pathname[0] = 0;
+       return get_root_path(WANT_PATH, (char *)fsname, NULL, pathname, -1,
+                            NULL, NULL);
+}
+
+int llapi_search_rootpath_by_dev(char *pathname, dev_t dev)
+{
+       if (!pathname)
+               return -EINVAL;
+
+       /*
+        * pathname can be used as an argument by get_root_path(),
+        * clear it for safety
+        */
+       pathname[0] = 0;
+       return get_root_path(WANT_PATH, NULL, NULL, pathname, -1, &dev, NULL);
+}
+
index 502fb8b..72e2ecc 100644 (file)
@@ -44,6 +44,8 @@
 #include <linux/lustre/lustre_kernelcomm.h>
 
 #define MAX_IOC_BUFLEN 8192
+#define MAX_LINE_LEN    256
+#define MAX_INSTANCE_LEN  32
 
 #define WANT_PATH   0x1
 #define WANT_FSNAME 0x2