Whamcloud - gitweb
LU-14541 llite: add rw_seq_cst_vs_drop_caches 43/47243/6
authorJohn L. Hammond <jhammond@whamcloud.com>
Fri, 6 May 2022 18:54:13 +0000 (13:54 -0500)
committerOleg Drokin <green@whamcloud.com>
Thu, 19 May 2022 21:45:51 +0000 (21:45 +0000)
Add a reproducer (rw_seq_cst_vs_drop_caches) for the read/write vs
drop_caches sequnetial consistency violation described in
LU-14541. Add an always excepted test (sanityn test_16f) to run
rw_seq_cst_vs_drop_caches.

Signed-off-by: John L. Hammond <jhammond@whamcloud.com>
Change-Id: I557ae7386b38214110a4d85ba0515e95fed7a11e
Reviewed-on: https://review.whamcloud.com/47243
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/.gitignore
lustre/tests/Makefile.am
lustre/tests/rw_seq_cst_vs_drop_caches.c [new file with mode: 0644]
lustre/tests/sanityn.sh

index ed75b36..ff01194 100644 (file)
@@ -76,6 +76,7 @@
 /rename_many
 /rmdirmany
 /runas
+/rw_seq_cst_vs_drop_caches
 /rwv
 /sendfile
 /sendfile_grouplock
index 29c2108..0e6f42e 100644 (file)
@@ -73,6 +73,7 @@ THETESTS += statone runas openfile smalliomany
 THETESTS += small_write multiop ll_sparseness_verify
 THETESTS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany
 THETESTS += openfilleddirunlink rename_many memhog euid_access
+THETESTS += rw_seq_cst_vs_drop_caches
 THETESTS += mmap_sanity writemany reads flocks_test flock_deadlock
 THETESTS += write_time_limit rwv lgetxattr_size_check checkfiemap
 THETESTS += listxattr_size_check check_fhandle_syscalls badarea_io
@@ -119,6 +120,7 @@ llapi_layout_test_LDADD = $(LIBLUSTREAPI)
 llapi_hsm_test_LDADD = $(LIBLUSTREAPI)
 group_lock_test_LDADD = $(LIBLUSTREAPI)
 llapi_fid_test_LDADD = $(LIBLUSTREAPI)
+rw_seq_cst_vs_drop_caches_LDADD = $(PTHREAD_LIBS)
 sendfile_grouplock_LDADD = $(LIBLUSTREAPI)
 swap_lock_test_LDADD = $(LIBLUSTREAPI)
 statmany_LDADD = $(LIBLUSTREAPI)
diff --git a/lustre/tests/rw_seq_cst_vs_drop_caches.c b/lustre/tests/rw_seq_cst_vs_drop_caches.c
new file mode 100644 (file)
index 0000000..5f988bb
--- /dev/null
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <pthread.h>
+
+/*
+ * Usage: rw_seq_cst_vs_drop_caches /mnt/lustre/file0 /mnt/lustre2/file0
+
+ * Race reads of the same file on two client mounts vs writes and drop
+ * caches to detect sequential consistency violations. Run
+ * indefinately.  all abort() if a consistency violation is found in
+ * which case the wait status ($?) will be 134.
+*/
+
+#define handle_error(msg)      \
+       do { perror(msg); exit(EXIT_FAILURE); } while (0)
+
+static int fd[2] = { -1, -1 };
+/* u_max is total number of writes, which are time consumg because they are
+ * contending with constant reads
+ */
+static uint64_t u, u_max = UINT64_MAX / 2;
+static uint64_t v[2];
+
+static void *access_thread_start(void *unused)
+{
+       ssize_t rc;
+       int i;
+
+       do {
+               for (i = 0; i < 2; i++) {
+                       rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
+                       if (rc < 0 || rc != sizeof(v[i]))
+                               handle_error("pread");
+               }
+       } while (v[0] <= v[1]);
+
+       fprintf(stderr, "error: u = %"PRIu64", v = %"PRIu64", %"PRIu64"\n",
+               u, v[0], v[1]);
+
+       abort();
+}
+
+static char stderr_buf[4096];
+
+int main(int argc, char *argv[])
+{
+       int drop_caches_fd = -1;
+       pthread_t access_thread;
+       struct stat st[2];
+       ssize_t rc;
+       int i;
+
+       setvbuf(stderr, stderr_buf, _IOLBF, sizeof(stderr_buf));
+
+       if (argc != 3) {
+               fprintf(stderr, "Usage: %s /mnt/lustre/file0 /mnt/lustre2/file0\n", argv[0]);
+               exit(EXIT_FAILURE);
+       }
+
+       drop_caches_fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+       assert(!(drop_caches_fd < 0));
+
+       for (i = 0; i < 2; i++) {
+               fd[i] = open(argv[i + 1], O_RDWR|O_CREAT|O_TRUNC, 0666);
+               if (fd[i] < 0)
+                       handle_error("open");
+
+               rc = fstat(fd[i], &st[i]);
+               if (rc < 0)
+                       handle_error("fstat");
+       }
+
+       /* file0 and file1 should be the same file on two different
+        * client mount points. */
+       if (st[0].st_dev != st[1].st_dev ||
+           st[0].st_ino != st[1].st_ino) {
+               fprintf(stderr, "file mismatch\n");
+               exit(EXIT_FAILURE);
+       }
+
+       rc = pwrite(fd[0], &u, sizeof(u), 0);
+       if (rc < 0 || rc != sizeof(u))
+               handle_error("pwrite");
+
+       rc = pthread_create(&access_thread, NULL, &access_thread_start, NULL);
+       if (rc != 0)
+               handle_error("pthread_create");
+
+       for (u = 1; u <= u_max; u++) {
+               rc = pwrite(fd[0], &u, sizeof(u), 0);
+               if (rc < 0 || rc != sizeof(u))
+                       handle_error("pwrite");
+
+               rc = write(drop_caches_fd, "3\n", 2);
+               if (rc < 0 || rc != 2)
+                       handle_error("drop caches");
+       }
+
+       rc = pthread_cancel(access_thread);
+       if (rc != 0)
+               handle_error("pthread_cancel");
+
+       rc = pthread_join(access_thread, NULL);
+       if (rc != 0)
+               handle_error("pthread_join");
+
+       return 0;
+}
index 005db53..a520b27 100755 (executable)
@@ -20,6 +20,8 @@ init_logging
 ALWAYS_EXCEPT="$SANITYN_EXCEPT "
 # bug number for skipped test:  LU-7105
 ALWAYS_EXCEPT+="                28 "
+# bug number for skipped test:  LU-14541
+ALWAYS_EXCEPT+="                16f"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 # skip tests for PPC until they are fixed
@@ -535,6 +537,35 @@ test_16e() { # LU-13227
 }
 run_test 16e "Verify size consistency for O_DIRECT write"
 
+test_16f() { # LU-14541
+       local file1=$DIR1/$tfile
+       local file2=$DIR2/$tfile
+       local duration=20
+       local status
+
+       timeout --preserve-status --signal=USR1 $duration \
+               rw_seq_cst_vs_drop_caches $file1 $file2
+       status=$?
+
+       case $((status & 0x7f)) in
+               0)
+                       echo OK # Computers must be fast now.
+                       ;;
+               6) # SIGABRT
+                       error "sequential consistency violation detected"
+                       ;;
+               10) # SIGUSR1
+                       echo TIMEOUT # This is fine.
+                       ;;
+               *)
+                       error "strange status '$status'"
+                       ;;
+       esac
+
+       rm -f $file1
+}
+run_test 16f "rw sequential consistency vs drop_caches"
+
 test_17() { # bug 3513, 3667
        remote_ost_nodsh && skip "remote OST with nodsh" && return