Whamcloud - gitweb
LU-16747 llapi: fix race in get_root_path_slow() 82/50682/6
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Tue, 18 Apr 2023 13:34:23 +0000 (15:34 +0200)
committerOleg Drokin <green@whamcloud.com>
Tue, 20 Jun 2023 03:40:39 +0000 (03:40 +0000)
The patch bdf7788d ("LU-8585 llapi: use open_by_handle_at in
llapi_open_by_fid") caches the Lustre root fd to avoid re-openning
it each time an ioctl() is needed on the fs.

For now, only 1 entry is stored. If a llapi call is performed on
another mountpoint, llapi needs to close the old root fd and open a
new one.

A race condition exists at startup, when root_cached.fd is not
initialized yet. Several threads try to determine root information at
the same time (in get_root_path_slow()). Those threads will close(),
open() and update different "root_cached.fd".
The usage of a closed root fd will return EBADFD (e.g: in
llapi_open_by_fid(), llapi_hsm_request() or llapi_fid2path()).

This patch checks if the fs is the same before updating the root
entry. If so, the root entry (and cached root fd) will not be changed.

Add the regresion test sanityn 85 (llapi_root_test).

Test-Parameters: trivial testlist=sanityn env=ONLY=85,ONLY_REPEAT=20
Test-Parameters: testlist=sanityn
Test-Parameters: testlist=sanity
Fixes: bdf7788d ("LU-8585 llapi: use open_by_handle_at in llapi_open_by_fid")
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I681aac7d5715022e700cdb092db94deaa6bf6a8f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50682
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Guillaume Courrier <guillaume.courrier@cea.fr>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/.gitignore
lustre/tests/Makefile.am
lustre/tests/llapi_root_test.c [new file with mode: 0644]
lustre/tests/sanityn.sh
lustre/utils/liblustreapi_root.c

index ac73ecd..014fefb 100644 (file)
@@ -17,6 +17,7 @@
 /llapi_fid_test
 /llapi_hsm_test
 /llapi_layout_test
+/llapi_root_test
 /createdestroy
 /createmany
 /createtest
index 25a5c29..bec15eb 100644 (file)
@@ -84,7 +84,7 @@ THETESTS += create_foreign_file parse_foreign_file
 THETESTS += create_foreign_dir parse_foreign_dir
 THETESTS += check_fallocate splice-test lseek_test expand_truncate_test
 THETESTS += foreign_symlink_striping lov_getstripe_old io_uring_probe
-THETESTS += fadvise_dontneed_helper
+THETESTS += fadvise_dontneed_helper llapi_root_test
 
 if LIBAIO
 THETESTS += aiocp
@@ -121,6 +121,7 @@ llapi_layout_test_LDADD = $(LIBLUSTREAPI)
 llapi_hsm_test_LDADD = $(LIBLUSTREAPI)
 group_lock_test_LDADD = $(LIBLUSTREAPI)
 llapi_fid_test_LDADD = $(LIBLUSTREAPI)
+llapi_root_test_LDADD = $(LIBLUSTREAPI) -lpthread
 rw_seq_cst_vs_drop_caches_LDADD = $(PTHREAD_LIBS)
 sendfile_grouplock_LDADD = $(LIBLUSTREAPI)
 swap_lock_test_LDADD = $(LIBLUSTREAPI)
diff --git a/lustre/tests/llapi_root_test.c b/lustre/tests/llapi_root_test.c
new file mode 100644 (file)
index 0000000..0867e82
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * The purpose of this test is to check Lustre API root fd cache.
+ *
+ * The program will exit as soon as a non zero error code is returned.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <pthread.h>
+
+
+#include <lustre/lustreapi.h>
+#include <linux/lustre/lustre_idl.h>
+
+#define ERROR(fmt, ...)                                                        \
+       fprintf(stderr, "%s: %s:%d: %s: " fmt "\n",                     \
+               program_invocation_short_name, __FILE__, __LINE__,      \
+               __func__, ## __VA_ARGS__)
+
+#define DIE(fmt, ...)                  \
+       do {                           \
+               ERROR(fmt, ## __VA_ARGS__);     \
+               exit(EXIT_FAILURE);             \
+       } while (0)
+
+#define ASSERTF(cond, fmt, ...)                                                \
+       do {                                                            \
+               if (!(cond))                                            \
+                       DIE("assertion '%s' failed: "fmt,               \
+                           #cond, ## __VA_ARGS__);                     \
+       } while (0)
+
+#define PERFORM(testfn) \
+       do {                                                            \
+               cleanup();                                              \
+               fprintf(stderr, "Starting test " #testfn " at %lld\n",  \
+                       (unsigned long long)time(NULL));                \
+               testfn();                                               \
+               fprintf(stderr, "Finishing test " #testfn " at %lld\n", \
+                      (unsigned long long)time(NULL));                 \
+               cleanup();                                              \
+       } while (0)
+
+/* Name of file/directory. Will be set once and will not change. */
+static char *mainpath;  /* path to file on mountpoint 1 */
+static char *mainpath2; /* path to file on mountpoint 2 */
+
+static char mnt_dir[PATH_MAX]; /* Lustre mountpoint 1 */
+static char mnt_dir2[PATH_MAX];        /* Lustre mountpoint 2 */
+static int mnt_fd = -1;
+static int mnt_fd2 = -1;
+
+/* Cleanup our test directory. */
+static void cleanup(void)
+{
+       int rc;
+
+       rc = remove(mainpath);
+       ASSERTF(!rc || errno == ENOENT,
+               "Failed to unlink %s: %s", mainpath, strerror(errno));
+}
+
+#define TEST1_THR_NBR 20
+void *test1_thr(void *arg)
+{
+       char *fidstr = arg;
+       char path[PATH_MAX];
+       long long recno = -1;
+       int linkno = 0;
+       long long rc;
+
+       rc = llapi_fid2path(mnt_dir2, fidstr, path,
+                           sizeof(path), &recno, &linkno);
+
+       return (void *) rc;
+}
+
+/* Race on root cache at startup */
+static void test1(void)
+{
+       static pthread_t thread[TEST1_THR_NBR];
+       int fd, i, iter;
+       long long rc;
+       struct lu_fid fid;
+       char fidstr[FID_LEN + 1];
+
+       fd = creat(mainpath, 00660);
+       ASSERTF(fd >= 0, "creat failed for '%s': %s",
+               mainpath, strerror(errno));
+
+       rc = llapi_fd2fid(fd, &fid);
+       ASSERTF(rc == 0, "llapi_fd2fid failed for '%s': %s",
+               mainpath, strerror(-rc));
+       close(fd);
+
+       snprintf(fidstr, sizeof(fidstr), DFID_NOBRACE, PFID(&fid));
+       for (iter = 0; iter < 100; iter++) {
+               /* reset cache on first mountpoint */
+               fd = llapi_open_by_fid(mnt_dir, &fid, O_RDONLY);
+               ASSERTF(fd >= 0, "llapi_open_by_fid for " DFID_NOBRACE ": %d",
+                       PFID(&fid), fd);
+               close(fd);
+
+               /* start threads with llapi_open_by_fid() */
+               for (i = 0; i < TEST1_THR_NBR; i++)
+                       pthread_create(&thread[i], NULL, &test1_thr, fidstr);
+
+               for (i = 0; i < TEST1_THR_NBR; i++) {
+                       pthread_join(thread[i], (void **) &rc);
+                       ASSERTF(rc == 0,
+                               "llapi_fid2path for " DFID_NOBRACE " (iter: %d, thr:%d): %s",
+                               PFID(&fid), iter, i, strerror(-rc));
+               }
+       }
+}
+
+static void usage(char *prog)
+{
+       fprintf(stderr, "Usage: %s [-h]\n", basename(prog));
+       fprintf(stderr, "or:    %s FILEPATH1 FILEPATH2\n", basename(prog));
+       exit(EXIT_FAILURE);
+}
+
+static void process_args(int argc, char *argv[])
+{
+       /* default mountpoints used */
+       if (argc == 1)
+               return;
+
+       if (argc > 1 && argv[1][0] == '-')
+               usage(argv[0]);
+
+       if (argc <= 2 || argv[1][0] == '\0' || argv[2][0] == '\0')
+               usage(argv[0]);
+
+       mainpath = argv[1];
+       mainpath2 = argv[2];
+}
+
+
+static int fill_default_paths(void)
+{
+       static char tmp1[PATH_MAX] = "/mnt/lustre/llapi_root_test.XXXXXX";
+       static char tmp2[PATH_MAX] = "/mnt/lustre2/";
+       int fd;
+
+       /* default paths needed?*/
+       if (mainpath || mainpath2)
+               return 0;
+
+       fd = mkstemp(tmp1);
+       if (fd < 0) {
+               fprintf(stderr, "Failed to creat %s: %s\n",
+                       tmp1, strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       close(fd);
+       strcat(tmp2, basename(tmp1));
+
+       mainpath = tmp1;
+       mainpath2 = tmp2;
+
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       char fsname[8 + 1];
+       char fsname2[8 + 1];
+       int rc;
+
+       process_args(argc, argv);
+       fill_default_paths();
+       atexit(cleanup);
+
+       if (strcmp(basename(mainpath), basename(mainpath2)) != 0 ||
+                  strcmp(mainpath, mainpath2) == 0) {
+               fprintf(stderr, "%s and %s should be the same file on 2 distinct mountpoints\n",
+                       mainpath, mainpath2);
+               return EXIT_FAILURE;
+       }
+
+       rc = llapi_search_mounts(mainpath, 0, mnt_dir, fsname);
+       if (rc != 0) {
+               fprintf(stderr, "Error: %s: not a Lustre filesystem\n",
+                       mainpath);
+               return EXIT_FAILURE;
+       }
+
+       rc = llapi_search_mounts(mainpath2, 0, mnt_dir2, fsname2);
+       if (rc != 0) {
+               fprintf(stderr, "Error: %s: not a Lustre filesystem\n",
+                       mainpath2);
+               return EXIT_FAILURE;
+       }
+
+       if (strcmp(fsname, fsname2) != 0) {
+               fprintf(stderr, "%s and %s are not on the same filesystem (%s, %s)\n",
+                       mnt_dir, mnt_dir2, fsname, fsname2);
+               return EXIT_FAILURE;
+       }
+
+       mnt_fd = open(mnt_dir, O_RDONLY|O_DIRECTORY);
+       ASSERTF(mnt_fd >= 0, "cannot open '%s': %s\n", mnt_dir, strerror(errno));
+
+       mnt_fd2 = open(mnt_dir2, O_RDONLY|O_DIRECTORY);
+       ASSERTF(mnt_fd2 >= 0, "cannot open '%s': %s\n", mnt_dir2, strerror(errno));
+
+       fprintf(stderr, "Starting: %s %s %s\n\n",
+               basename(argv[0]), mainpath, mainpath2);
+
+       /* Play nice with Lustre test scripts. Non-line buffered output
+        * stream under I/O redirection may appear incorrectly.
+        */
+       setvbuf(stdout, NULL, _IOLBF, 0);
+
+       PERFORM(test1);
+
+       return EXIT_SUCCESS;
+}
index eda0b4b..e8f5841 100755 (executable)
@@ -5148,6 +5148,11 @@ test_84() {
 }
 run_test 84 "0-nlink race in lu_object_find()"
 
+test_85() {
+       llapi_root_test $DIR/$tfile $DIR2/$tfile
+}
+run_test 85 "Lustre API root cache race"
+
 test_90() {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
        local pid1
index 7db8b33..23a2b4b 100644 (file)
@@ -250,6 +250,10 @@ static int get_root_path_slow(int want, char *fsname, int *outfd, char *path,
                /* Cache the mount point information */
                pthread_rwlock_wrlock(&root_cached_lock);
 
+               /* If the entry matches the saved one -> no update needed */
+               if (strcmp(root_cached.mnt_dir, mnt.mnt_dir) == 0)
+                       goto unlock_root_cached;
+
                if (root_cached.fd > 0) {
                        close(root_cached.fd);
                        root_cached.fd = 0;
@@ -266,6 +270,7 @@ static int get_root_path_slow(int want, char *fsname, int *outfd, char *path,
                        ptr_end - mnt.mnt_fsname);
                root_cached.nid[ptr_end - mnt.mnt_fsname] = '\0';
 
+unlock_root_cached:
                pthread_rwlock_unlock(&root_cached_lock);
        }