From 4fd7d5585d33240a658f57bf7399da4415a7eb6c Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Mon, 9 May 2022 15:44:29 +0200 Subject: [PATCH] LU-15833 llapi: don't use realpath in llapi_search_fsname() This patch use st_dev value to dertermine the fsname in llapi_search_fsname(). The main purpose of this is to limit the number of lstat() (realpath()) in this function. get_root_path() is modified to search a mountpoint by dev. And the last results of get_root_path() is cached to avoid reading /proc/mount for each call. A new api function llapi_search_rootpath_by_dev() is added to get the path of Lustre mountpoint using the specified device value. **Testing:** *Environement:* VMs: 1 client, 1 MDS (2MDT), 1 OSS (2 OST) Lustre tree: test{001..100}/test{001..100}/test{01..10}/file{01..05} (500000 files + 110100 folders) OS: Centos 7 (no statx) Lustre: 2.15.50_15_g1116739 *Tests* cd strace lfs getstripe -r . echo 3 > /proc/sys/vm/drop_caches /usr/bin/time lfs getstripe -r . (2 iterations) *Results* times (s): ______________________________ | user | system | real | real% | _______________|______|________|______|_______| |without patch: | 6.18 | 57.3 | 427 | 0% | |_______________|______|________|______|_______| |with patch: | 2.88 | 47.3 | 404 |-5.45% | |_______________|______|________|______|_______| strace (only significant changes are displayed): (*stat = lstat + stat + fstat) _____________________________________________ | *stat | mmap | open | read | all | _______________|________|________|________|________|_________| |without patch: | 760545 | 110142 | 330379 | 330325 | 4742658 | |_______________|________|________|________|________|_________| |with patch: | 440484 | 0 | 220277 | 19 | 3541739 | |_______________|________|________|________|________|_________| -25.32% syscalls after patching. Signed-off-by: Etienne AUJAMES Change-Id: I3812d922d5b1d194d52132cba95d11820424c5d7 Reviewed-on: https://review.whamcloud.com/47258 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/doc/Makefile.am | 2 + lustre/doc/llapi_search_rootpath.3 | 49 +++++ lustre/doc/llapi_search_rootpath_by_dev.3 | 2 + lustre/doc/lustreapi.7 | 2 + lustre/include/lustre/lustreapi.h | 1 + lustre/utils/Makefile.am | 3 +- lustre/utils/liblustreapi.c | 343 +++++++++++++++++++----------- lustre/utils/liblustreapi_fid.c | 4 +- lustre/utils/liblustreapi_hsm.c | 2 +- lustre/utils/liblustreapi_pcc.c | 2 +- lustre/utils/lustreapi_internal.h | 4 +- 11 files changed, 289 insertions(+), 125 deletions(-) create mode 100644 lustre/doc/llapi_search_rootpath.3 create mode 100644 lustre/doc/llapi_search_rootpath_by_dev.3 diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 0da96cd..7546f96 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -182,6 +182,8 @@ LIBMAN = \ llapi_search_mdt.3 \ llapi_search_ost.3 \ llapi_search_tgt.3 \ + llapi_search_rootpath.3 \ + llapi_search_rootpath_by_dev.3 \ llapi_unlink_foreign.3 SERVER_MANFILES = \ diff --git a/lustre/doc/llapi_search_rootpath.3 b/lustre/doc/llapi_search_rootpath.3 new file mode 100644 index 0000000..e672826 --- /dev/null +++ b/lustre/doc/llapi_search_rootpath.3 @@ -0,0 +1,49 @@ +.TH llapi_search_rootpath 3 "2022 May 23" "Lustre User API" +.SH NAME +llapi_search_rootpath, llapi_search_rootpath_by_dev \- +retrieve the path of a lustre mount point +.SH SYNOPSIS +.nf +.B #include +.PP +.BI "int llapi_search_rootpath( char *" pathname " ,const char *" fsname ") +.PP +.BI "int llapi_search_rootpath_by_dev( char *" pathname ", dev_t " dev ") +.fi +.SH DESCRIPTION +.PP +.BR llapi_search_rootpath() +and +.BR llapi_search_rootpath_by_dev() +find the first Lustre mount point by matching the filesystem name \fIfsname\fP or the filesystem device \fIdev\fP. +.PP +The path of the mount point found is stored into \fIpathname\fP. This buffer should be sufficient to store the path. +.PP +.BR e.g: +char pathname[PATH_MAX]; +.PP +The filesystem device can be retrieved from a file with +.BR stat() +(field \fIst_dev\fP) or +.BR statx() +(fields \fIstx_dev_major\fP and \fIstx_dev_minor\fP). +.SH RETURN VALUES +.TP +.B 0 +on success, or +.TP +-ve +error code on failure and sets errno appropriately. +.SH ERRORS +.TP 15 +.SM EINVAL +\fIpathname\fP pointer is NULL. +.TP +.SM ENODEV +The Lustre mount point was not found. +.SH SEE ALSO +.ad l +.nh +.BR lustreapi (7), +.BR stat (2), +.BR statx (2) diff --git a/lustre/doc/llapi_search_rootpath_by_dev.3 b/lustre/doc/llapi_search_rootpath_by_dev.3 new file mode 100644 index 0000000..15c6b9c --- /dev/null +++ b/lustre/doc/llapi_search_rootpath_by_dev.3 @@ -0,0 +1,2 @@ +.so man3/llapi_search_rootpath.3 + diff --git a/lustre/doc/lustreapi.7 b/lustre/doc/lustreapi.7 index 47f06ee..0e776a8 100644 --- a/lustre/doc/lustreapi.7 +++ b/lustre/doc/lustreapi.7 @@ -68,6 +68,8 @@ quotas, file layouts, etc). See the referenced man pages for details. .BR llapi_path2parent (3), .BR llapi_quotactl (3), .BR llapi_search_tgt (3), +.BR llapi_search_rootpath (3), +.BR llapi_search_rootpath_by_dev (3), .BR lustre (7) .SH AUTHOR Lustre contributors diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index 84b01e3..23f4f8c 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -472,6 +472,7 @@ int llapi_getname(const char *path, char *name, size_t namelen); int llapi_search_fileset(const char *pathname, char *fileset); int llapi_search_rootpath(char *pathname, const char *fsname); +int llapi_search_rootpath_by_dev(char *pathname, dev_t dev); int llapi_nodemap_exists(const char *name); int llapi_migrate_mdt(char *path, struct find_param *param); int llapi_mv(char *path, struct find_param *param); diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 4fe86ef..aa517b9 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -112,7 +112,8 @@ liblustreapi_la_SOURCES = liblustreapi.c liblustreapi_hsm.c \ liblustreapi_la_LDFLAGS = $(LIBREADLINE) -version-info 1:0:0 \ -Wl,--version-script=liblustreapi.map liblustreapi_la_LIBADD = $(top_builddir)/libcfs/libcfs/libcfs.la \ - $(top_builddir)/lnet/utils/lnetconfig/liblnetconfig.la + $(top_builddir)/lnet/utils/lnetconfig/liblnetconfig.la \ + -lpthread pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = lustre.pc diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 1cd3b5b..c4dc701 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -1209,19 +1210,111 @@ int llapi_dir_create_pool(const char *name, int mode, int stripe_offset, return llapi_dir_create(name, mode, ¶m); } -/* - * Find the fsname, the full path, and/or an open fd. - * Either the fsname or path must not be NULL - */ -int get_root_path(int want, char *fsname, int *outfd, char *path, int index) +static int get_file_dev(const char *path, dev_t *dev) +{ +#ifdef HAVE_STATX + struct statx stx; + + if (!dev) + return -EINVAL; + if (statx(AT_FDCWD, path, 0, 0, &stx)) + return -errno; + *dev = makedev(stx.stx_dev_major, stx.stx_dev_minor); +#else + struct stat st; + + if (!dev) + return -EINVAL; + if (stat(path, &st) != 0) + return -errno; + + *dev = st.st_dev; +#endif + return 0; +} + +static int get_root_fd(const char *rootpath, int *outfd) +{ + int rc = 0; + int fd; + + fd = open(rootpath, O_RDONLY | O_DIRECTORY | O_NONBLOCK); + if (fd < 0) { + rc = -errno; + llapi_error(LLAPI_MSG_ERROR, rc, + "cannot open '%s'", rootpath); + } else { + *outfd = fd; + } + + return rc; +} + +static struct { + dev_t dev; + char fsname[PATH_MAX]; + char mnt_dir[PATH_MAX]; +} root_cached = { 0 }; + +static pthread_rwlock_t root_cached_lock = PTHREAD_RWLOCK_INITIALIZER; + +static int get_root_path_fast(int want, char *fsname, int *outfd, char *path, + dev_t *dev) +{ + int rc = -ENODEV; + int fsnamelen; + int mntlen; + + pthread_rwlock_rdlock(&root_cached_lock); + if (root_cached.dev == 0) + goto out_unlock; + + fsnamelen = strlen(root_cached.fsname); + mntlen = strlen(root_cached.mnt_dir); + + /* Check the fsname for a match, if given */ + if (!(want & WANT_FSNAME) && fsname && + strlen(fsname) == fsnamelen && + (strncmp(root_cached.fsname, fsname, fsnamelen) == 0)) { + rc = 0; + /* Check the dev for a match, if given */ + } else if (!(want & WANT_DEV) && dev && *dev == root_cached.dev) { + rc = 0; + /* Otherwise find the longest matching path */ + } else if (path && strlen(path) >= mntlen && + (strncmp(root_cached.mnt_dir, path, mntlen) == 0) && + (strlen(path) == mntlen || path[mntlen] == '/')) { + rc = 0; + } + + if (rc) + goto out_unlock; + + if ((want & WANT_FSNAME) && fsname) + strcpy(fsname, root_cached.fsname); + if ((want & WANT_PATH) && path) + strcpy(path, root_cached.mnt_dir); + if ((want & WANT_DEV) && dev) + *dev = root_cached.dev; + if ((want & WANT_FD) && outfd) + rc = get_root_fd(root_cached.mnt_dir, outfd); + +out_unlock: + pthread_rwlock_unlock(&root_cached_lock); + return rc; +} + +static int get_root_path_slow(int want, char *fsname, int *outfd, char *path, + int index, dev_t *dev) { struct mntent mnt; - char buf[PATH_MAX], mntdir[PATH_MAX]; + char buf[PATH_MAX]; char *ptr, *ptr_end; FILE *fp; - int idx = 0, mntlen = 0, fd; + int idx = -1, mntlen = 0; int rc = -ENODEV; - int fsnamelen, mountlen; + int fsnamelen = 0; + dev_t devmnt = 0; /* get the mount point */ fp = setmntent(PROC_MOUNTS, "r"); @@ -1231,16 +1324,12 @@ int get_root_path(int want, char *fsname, int *outfd, char *path, int index) "cannot retrieve filesystem mount point"); return rc; } - while (1) { - if (getmntent_r(fp, &mnt, buf, sizeof(buf)) == NULL) - break; + while (getmntent_r(fp, &mnt, buf, sizeof(buf))) { if (!llapi_is_lustre_mnt(&mnt)) continue; - if ((want & WANT_INDEX) && (idx++ != index)) - continue; - + idx++; mntlen = strlen(mnt.mnt_dir); ptr = strchr(mnt.mnt_fsname, '/'); while (ptr && *ptr == '/') @@ -1256,60 +1345,97 @@ int get_root_path(int want, char *fsname, int *outfd, char *path, int index) while (*ptr_end != '/' && *ptr_end != '\0') ptr_end++; - /* Check the fsname for a match, if given */ - mountlen = ptr_end - ptr; - if (!(want & WANT_FSNAME) && fsname != NULL && - (fsnamelen = strlen(fsname)) > 0 && - (fsnamelen != mountlen || - (strncmp(ptr, fsname, mountlen) != 0))) + fsnamelen = ptr_end - ptr; + + /* ignore unaccessible filesystem */ + if (get_file_dev(mnt.mnt_dir, &devmnt)) continue; - /* If the path isn't set return the first one we find */ - if (path == NULL || strlen(path) == 0) { - strncpy(mntdir, mnt.mnt_dir, sizeof(mntdir) - 1); - mntdir[sizeof(mntdir) - 1] = '\0'; - if ((want & WANT_FSNAME) && fsname != NULL) { - strncpy(fsname, ptr, mountlen); - fsname[mountlen] = '\0'; - } + if ((want & WANT_INDEX) && idx == index) { + rc = 0; + break; + } + + /* Check the fsname for a match, if given */ + if (!(want & WANT_FSNAME) && fsname && + strlen(fsname) == fsnamelen && + (strncmp(ptr, fsname, fsnamelen) == 0)) { + rc = 0; + break; + } + + /* Check the dev for a match, if given */ + if (!(want & WANT_DEV) && dev && *dev == devmnt) { rc = 0; break; + } + /* Otherwise find the longest matching path */ - } else if ((strlen(path) >= mntlen) && - (strncmp(mnt.mnt_dir, path, mntlen) == 0)) { - /* check the path format */ - if (strlen(path) > mntlen && path[mntlen] != '/') - continue; - strncpy(mntdir, mnt.mnt_dir, sizeof(mntdir) - 1); - mntdir[sizeof(mntdir) - 1] = '\0'; - if ((want & WANT_FSNAME) && fsname != NULL) { - strncpy(fsname, ptr, mountlen); - fsname[mountlen] = '\0'; - } + if (path && strlen(path) >= mntlen && + (strncmp(mnt.mnt_dir, path, mntlen) == 0) && + (strlen(path) == mntlen || path[mntlen] == '/')) { rc = 0; break; } } - endmntent(fp); + + if (rc) + goto out; /* Found it */ - if (rc == 0) { - if ((want & WANT_PATH) && path != NULL) { - strncpy(path, mntdir, mntlen); - path[mntlen] = '\0'; - } - if (want & WANT_FD) { - fd = open(mntdir, O_RDONLY | O_DIRECTORY | O_NONBLOCK); - if (fd < 0) { - rc = -errno; - llapi_error(LLAPI_MSG_ERROR, rc, - "cannot open '%s'", mntdir); + if (!(want & WANT_INDEX)) { + /* Cache the mount point information */ + pthread_rwlock_wrlock(&root_cached_lock); - } else { - *outfd = fd; - } - } - } else if (want & WANT_ERROR) + strncpy(root_cached.fsname, ptr, fsnamelen); + root_cached.fsname[fsnamelen] = '\0'; + strncpy(root_cached.mnt_dir, mnt.mnt_dir, mntlen); + root_cached.mnt_dir[mntlen] = '\0'; + root_cached.dev = devmnt; + + pthread_rwlock_unlock(&root_cached_lock); + } + + if ((want & WANT_FSNAME) && fsname) { + strncpy(fsname, ptr, fsnamelen); + fsname[fsnamelen] = '\0'; + } + if ((want & WANT_PATH) && path) { + strncpy(path, mnt.mnt_dir, mntlen); + path[mntlen] = '\0'; + } + if ((want & WANT_DEV) && dev) + *dev = devmnt; + if ((want & WANT_FD) && outfd) + rc = get_root_fd(mnt.mnt_dir, outfd); + +out: + endmntent(fp); + return rc; +} + +/* + * Find the fsname, the full path, and/or an open fd. + * Either the fsname or path must not be NULL + */ +int get_root_path(int want, char *fsname, int *outfd, char *path, int index, + dev_t *dev) +{ + int rc = -ENODEV; + + if (!(want & WANT_INDEX)) + rc = get_root_path_fast(want, fsname, outfd, path, dev); + if (rc) + rc = get_root_path_slow(want, fsname, outfd, path, index, dev); + + if (!rc || !(want & WANT_ERROR)) + return rc; + + if (dev || !(want & WANT_DEV)) + llapi_err_noerrno(LLAPI_MSG_ERROR, + "'%u/%u' dev not on a mounted Lustre filesystem", + major(*dev), minor(*dev)); + else llapi_err_noerrno(LLAPI_MSG_ERROR, "'%s' not on a mounted Lustre filesystem", (want & WANT_PATH) ? fsname : path); @@ -1342,90 +1468,69 @@ int llapi_search_mounts(const char *pathname, int index, char *mntdir, if (fsname) want |= WANT_FSNAME; - return get_root_path(want, fsname, NULL, mntdir, idx); + return get_root_path(want, fsname, NULL, mntdir, idx, NULL); } /* Given a path, find the corresponding Lustre fsname */ int llapi_search_fsname(const char *pathname, char *fsname) { - char *path; + dev_t dev; int rc; - path = realpath(pathname, NULL); - if (path == NULL) { - char tmp[PATH_MAX - 1]; - char buf[PATH_MAX]; - char *ptr; + rc = get_file_dev(pathname, &dev); + if (rc) { + char tmp[PATH_MAX]; + char *parent; + int len; + + /* file does not exist try the parent */ + len = readlink(pathname, tmp, PATH_MAX); + if (len != -1) + tmp[len] = '\0'; + else + strncpy(tmp, pathname, PATH_MAX - 1); - tmp[0] = '\0'; - buf[0] = '\0'; - if (pathname[0] != '/') { - /* - * Need an absolute path, but realpath() only works for - * pathnames that actually exist. We go through the - * extra hurdle of dirname(getcwd() + pathname) in - * case the relative pathname contains ".." in it. - */ - char realpath[PATH_MAX - 1]; + parent = dirname(tmp); + rc = get_file_dev(parent, &dev); + } - if (getcwd(realpath, sizeof(realpath) - 2) == NULL) { - rc = -errno; - llapi_error(LLAPI_MSG_ERROR, rc, - "cannot get current working directory"); - return rc; - } + if (rc) { + llapi_error(LLAPI_MSG_ERROR, rc, + "cannot resolve path '%s'", pathname); + return rc; + } - rc = snprintf(tmp, sizeof(tmp), "%s/", realpath); - if (rc >= sizeof(tmp)) { - rc = -E2BIG; - llapi_error(LLAPI_MSG_ERROR, rc, - "invalid parent path '%s'", - tmp); - return rc; - } - } + rc = get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL, NULL, -1, + &dev); - rc = snprintf(buf, sizeof(buf), "%s%s", tmp, pathname); - if (rc >= sizeof(buf)) { - rc = -E2BIG; - llapi_error(LLAPI_MSG_ERROR, rc, - "invalid path '%s'", pathname); - return rc; - } - path = realpath(buf, NULL); - if (path == NULL) { - ptr = strrchr(buf, '/'); - if (ptr == NULL) { - llapi_error(LLAPI_MSG_ERROR | - LLAPI_MSG_NO_ERRNO, 0, - "cannot resolve path '%s'", - buf); - return -ENOENT; - } - *ptr = '\0'; - path = realpath(buf, NULL); - if (path == NULL) { - rc = -errno; - llapi_error(LLAPI_MSG_ERROR, rc, - "cannot resolve path '%s'", - pathname); - return rc; - } - } - } - rc = get_root_path(WANT_FSNAME | WANT_ERROR, fsname, NULL, path, -1); - free(path); return rc; } int llapi_search_rootpath(char *pathname, const char *fsname) { + if (!pathname) + return -EINVAL; + + /* + * pathname can be used as an argument by get_root_path(), + * clear it for safety + */ + pathname[0] = 0; + return get_root_path(WANT_PATH, (char *)fsname, NULL, pathname, -1, + NULL); +} + +int llapi_search_rootpath_by_dev(char *pathname, dev_t dev) +{ + if (!pathname) + return -EINVAL; + /* * pathname can be used as an argument by get_root_path(), * clear it for safety */ pathname[0] = 0; - return get_root_path(WANT_PATH, (char *)fsname, NULL, pathname, -1); + return get_root_path(WANT_PATH, NULL, NULL, pathname, -1, &dev); } /** diff --git a/lustre/utils/liblustreapi_fid.c b/lustre/utils/liblustreapi_fid.c index ac7911c..8d3f268 100644 --- a/lustre/utils/liblustreapi_fid.c +++ b/lustre/utils/liblustreapi_fid.c @@ -224,10 +224,10 @@ int llapi_fid2path(const char *path_or_device, const char *fidstr, char *path, if (*path_or_device == '/') rc = get_root_path(WANT_FD, NULL, &mnt_fd, - (char *)path_or_device, -1); + (char *)path_or_device, -1, NULL); else rc = get_root_path(WANT_FD, (char *)path_or_device, - &mnt_fd, NULL, -1); + &mnt_fd, NULL, -1, NULL); if (rc < 0) goto out; diff --git a/lustre/utils/liblustreapi_hsm.c b/lustre/utils/liblustreapi_hsm.c index a053046..052e6cd 100644 --- a/lustre/utils/liblustreapi_hsm.c +++ b/lustre/utils/liblustreapi_hsm.c @@ -1592,7 +1592,7 @@ int llapi_hsm_request(const char *path, const struct hsm_user_request *request) int rc; int fd; - rc = get_root_path(WANT_FD, NULL, &fd, (char *)path, -1); + rc = get_root_path(WANT_FD, NULL, &fd, (char *)path, -1, NULL); if (rc) return rc; diff --git a/lustre/utils/liblustreapi_pcc.c b/lustre/utils/liblustreapi_pcc.c index f538e02..b1652c2 100644 --- a/lustre/utils/liblustreapi_pcc.c +++ b/lustre/utils/liblustreapi_pcc.c @@ -212,7 +212,7 @@ int llapi_pcc_detach_fid(const char *mntpath, const struct lu_fid *fid, int fd; struct lu_pcc_detach_fid detach; - rc = get_root_path(WANT_FD, NULL, &fd, (char *)mntpath, -1); + rc = get_root_path(WANT_FD, NULL, &fd, (char *)mntpath, -1, NULL); if (rc) { llapi_error(LLAPI_MSG_ERROR, rc, "cannot get root path: %s", mntpath); diff --git a/lustre/utils/lustreapi_internal.h b/lustre/utils/lustreapi_internal.h index 7284ea9..41b7087 100644 --- a/lustre/utils/lustreapi_internal.h +++ b/lustre/utils/lustreapi_internal.h @@ -50,6 +50,7 @@ #define WANT_FD 0x4 #define WANT_INDEX 0x8 #define WANT_ERROR 0x10 +#define WANT_DEV 0x20 /* Define a fixed 4096-byte encryption unit size */ #define LUSTRE_ENCRYPTION_BLOCKBITS 12 @@ -61,7 +62,8 @@ #define PROC_MOUNTS "/proc/mounts" #endif -int get_root_path(int want, char *fsname, int *outfd, char *path, int index); +int get_root_path(int want, char *fsname, int *outfd, char *path, int index, + dev_t *dev); int llapi_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, int max_len); int llapi_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len); int sattr_cache_get_defaults(const char *const fsname, -- 1.8.3.1