Whamcloud - gitweb
LU-6368 ldlm: Do not use cbpending for group locks 93/14093/8
authorPatrick Farrell <paf@cray.com>
Mon, 13 Apr 2015 15:36:52 +0000 (10:36 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 28 Apr 2015 05:01:37 +0000 (05:01 +0000)
Currently, the CBPENDING flag is set on group locks when
the osc lock above them is released (osc_cancel_base).

This results in a situation where a new group lock request
on a resource does not match an existing group lock because
LDLM_FL_CBPENDING is set on the existing lock.

So two group locks are granted on the same resource, which
is not valid, since a given client can only have one group
lock on a particular resource.

Since group locks are manually released and not called back
like other LDLM locks, the CBPENDING flag doesn't make
sense.  Since they must be manually released, they also
cannot go in the LDLM LRU cache and must be fully released
immediately once they are no longer in use.

This was previously accomplished by setting CBPENDING when
the corresponding osc lock is released, but as noted above,
this prevents the group lock matching some future lock
requests.

This patch uses the fact that group locks have an l_writers
reference which they keep until they are manually released,
so we remove them when they have no more reader or writer
references, without checking cbpending.

Additionally, this patch adds several sendfile tests,
courtesy of Frank Zago <fzago@cray.com>.

Signed-off-by: Patrick Farrell <paf@cray.com>
Change-Id: I2845750777cbc9849b18999f1b77f791034c50b0
Reviewed-on: http://review.whamcloud.com/14093
Reviewed-by: frank zago <fzago@cray.com>
Tested-by: Jenkins
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_lock.c
lustre/osc/osc_internal.h
lustre/osc/osc_lock.c
lustre/osc/osc_request.c
lustre/tests/Makefile.am
lustre/tests/sanity.sh
lustre/tests/sendfile_grouplock.c [new file with mode: 0644]

index 5f57dea..44049c1 100644 (file)
@@ -841,10 +841,16 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
                ldlm_set_cbpending(lock);
         }
 
-        if (!lock->l_readers && !lock->l_writers &&
-           ldlm_is_cbpending(lock)) {
-                /* If we received a blocked AST and this was the last reference,
-                 * run the callback. */
+       if (!lock->l_readers && !lock->l_writers &&
+           (ldlm_is_cbpending(lock) || lock->l_req_mode == LCK_GROUP)) {
+               /* If we received a blocked AST and this was the last reference,
+                * run the callback.
+                * Group locks are special:
+                * They must not go in LRU, but they are not called back
+                * like non-group locks, instead they are manually released.
+                * They have an l_writers reference which they keep until
+                * they are manually released, so we remove them when they have
+                * no more reader or writer references. - LU-6368 */
                if (ldlm_is_ns_srv(lock) && lock->l_export)
                         CERROR("FL_CBPENDING set on non-local lock--just a "
                                "warning\n");
@@ -907,7 +913,6 @@ EXPORT_SYMBOL(ldlm_lock_decref);
  * \a lockh and mark it for subsequent cancellation once r/w refcount
  * drops to zero instead of putting into LRU.
  *
- * Typical usage is for GROUP locks which we cannot allow to be cached.
  */
 void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode)
 {
index b226b00..9985e72 100644 (file)
@@ -110,7 +110,6 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                     osc_enqueue_upcall_f upcall,
                     void *cookie, struct ldlm_enqueue_info *einfo,
                     struct ptlrpc_request_set *rqset, int async, int agl);
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode);
 
 int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                   __u32 type, ldlm_policy_data_t *policy, __u32 mode,
index 459944a..7419ef2 100644 (file)
@@ -1017,13 +1017,15 @@ static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
 {
        struct ldlm_lock *dlmlock;
 
+       ENTRY;
+
        dlmlock = olck->ols_dlmlock;
        if (dlmlock == NULL)
-               return;
+               RETURN_EXIT;
 
        if (olck->ols_hold) {
                olck->ols_hold = 0;
-               osc_cancel_base(&olck->ols_handle, olck->ols_einfo.ei_mode);
+               ldlm_lock_decref(&olck->ols_handle, olck->ols_einfo.ei_mode);
                olck->ols_handle.cookie = 0ULL;
        }
 
@@ -1034,6 +1036,8 @@ static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck)
        lu_ref_del(&dlmlock->l_reference, "osc_lock", olck);
        LDLM_LOCK_RELEASE(dlmlock);
        olck->ols_has_ref = 0;
+
+       EXIT;
 }
 
 /**
index 84d4120..d0fa025 100644 (file)
@@ -2266,18 +2266,6 @@ int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
         RETURN(rc);
 }
 
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode)
-{
-        ENTRY;
-
-        if (unlikely(mode == LCK_GROUP))
-                ldlm_lock_decref_and_cancel(lockh, mode);
-        else
-                ldlm_lock_decref(lockh, mode);
-
-        RETURN(0);
-}
-
 static int osc_statfs_interpret(const struct lu_env *env,
                                 struct ptlrpc_request *req,
                                 struct osc_async_args *aa, int rc)
index 6afc47a..33f323c 100644 (file)
@@ -77,7 +77,7 @@ noinst_PROGRAMS += mmap_sanity writemany reads flocks_test flock_deadlock
 noinst_PROGRAMS += write_time_limit rwv lgetxattr_size_check checkfiemap
 noinst_PROGRAMS += listxattr_size_check check_fhandle_syscalls badarea_io
 noinst_PROGRAMS += llapi_layout_test orphan_linkea_check llapi_hsm_test
-noinst_PROGRAMS += group_lock_test llapi_fid_test
+noinst_PROGRAMS += group_lock_test llapi_fid_test sendfile_grouplock
 
 bin_PROGRAMS = mcreate munlink
 testdir = $(libdir)/lustre/tests
@@ -95,6 +95,7 @@ llapi_layout_test_LDADD=$(LIBLUSTREAPI)
 llapi_hsm_test_LDADD=$(LIBLUSTREAPI)
 group_lock_test_LDADD=$(LIBLUSTREAPI)
 llapi_fid_test_LDADD=$(LIBLUSTREAPI)
+sendfile_grouplock_LDADD=$(LIBLUSTREAPI)
 it_test_LDADD=$(LIBCFS)
 rwv_LDADD=$(LIBCFS)
 
index 2ec1cee..a685e63 100644 (file)
@@ -12896,6 +12896,16 @@ test_243()
 }
 run_test 243 "various group lock tests"
 
+test_244()
+{
+       test_mkdir -p $DIR/$tdir
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=35
+       sendfile_grouplock $DIR/$tdir/$tfile || \
+               error "sendfile+grouplock failed"
+       rm -rf $DIR/$tdir
+}
+run_test 244 "sendfile with group lock tests"
+
 test_250() {
        [ "$(facet_fstype ost$(($($GETSTRIPE -i $DIR/$tfile) + 1)))" = "zfs" ] \
         && skip "no 16TB file size limit on ZFS" && return
diff --git a/lustre/tests/sendfile_grouplock.c b/lustre/tests/sendfile_grouplock.c
new file mode 100644 (file)
index 0000000..42d66a3
--- /dev/null
@@ -0,0 +1,391 @@
+/*
+ * GPL HEADDER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2015 Cray Inc, all rights reserved.
+ * Author: Frank Zago.
+ *
+ * A few portions are extracted from llapi_layout_test.c
+ */
+
+/*
+ * The CRC32 implementation is from RFC 1952, which bears the
+ * following notice:
+ *
+ * Copyright (c) 1996 L. Peter Deutsch
+ *
+ * Permission is granted to copy and distribute this document for any
+ * purpose and without charge, including translations into other
+ * languages and incorporation into compilations, provided that the
+ * copyright notice and this notice are preserved, and that any
+ * substantive changes or deletions from the original are clearly
+ * marked.
+ */
+
+/*
+ * The purpose of this test is to exert the group lock ioctls in
+ * conjunction with sendfile. Some bugs were found when both were used
+ * at the same time. See LU-6368 and LU-6371.
+ *
+ * The program will exit as soon as a non zero error code is returned.
+ *
+ * It can be called like this:
+ *
+ * dd if=/dev/zero of=/mnt/lustre/foo1 bs=1M count=40
+ * ./sendfile_grouplock /mnt/lustre/foo1
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/sendfile.h>
+
+#include <lustre/lustreapi.h>
+#include <lustre/lustre_idl.h>
+
+#define ERROR(fmt, ...)                                                        \
+       fprintf(stderr, "%s: %s:%d: %s: " fmt "\n",                     \
+               program_invocation_short_name, __FILE__, __LINE__,      \
+               __func__, ## __VA_ARGS__);
+
+#define DIE(fmt, ...)                          \
+       do {                                    \
+               ERROR(fmt, ## __VA_ARGS__);     \
+               exit(EXIT_FAILURE);             \
+       } while (0)
+
+#define ASSERTF(cond, fmt, ...)                                                \
+       do {                                                            \
+               if (!(cond))                                            \
+                       DIE("assertion '%s' failed: "fmt,               \
+                           #cond, ## __VA_ARGS__);                     \
+       } while (0)
+
+#define PERFORM(testfn) \
+       do {                                                            \
+               fprintf(stderr, "Starting test " #testfn " at %lld\n",  \
+                       (unsigned long long)time(NULL));                \
+               testfn();                                               \
+               fprintf(stderr, "Finishing test " #testfn " at %lld\n", \
+                       (unsigned long long)time(NULL));                \
+       } while (0)
+
+/* This test will copy from source_file to dest_file */
+static const char *source_file;
+static char *dest_file;
+static unsigned long source_crc; /* CRC32 of original file */
+
+/*
+ * A small CRC32 implementation, from RFC 1952
+ */
+
+/* Table of CRCs of all 8-bit messages. */
+static unsigned long crc_table[256];
+
+/* Flag: has the table been computed? Initially false. */
+static int crc_table_computed;
+
+/* Make the table for a fast CRC. */
+static void make_crc_table(void)
+{
+       unsigned long c;
+
+       int n, k;
+       for (n = 0; n < 256; n++) {
+               c = (unsigned long) n;
+               for (k = 0; k < 8; k++) {
+                       if (c & 1)
+                               c = 0xedb88320L ^ (c >> 1);
+                       else
+                               c = c >> 1;
+               }
+               crc_table[n] = c;
+       }
+       crc_table_computed = 1;
+}
+
+/*
+ * Update a running crc with the bytes buf[0..len-1] and return the
+ * updated crc. The crc should be initialized to zero. Pre- and
+ * post-conditioning (one's complement) is performed within this
+ * function so it shouldn't be done by the caller. Usage example:
+ *
+ *     unsigned long crc = 0L;
+ *
+ *     while (read_buffer(buffer, length) != EOF) {
+ *             crc = update_crc(crc, buffer, length);
+ *     }
+ *     if (crc != original_crc) error();
+ */
+static unsigned long update_crc(unsigned long crc,
+                               unsigned char *buf, int len)
+{
+       unsigned long c = crc ^ 0xffffffffL;
+       int n;
+
+       if (!crc_table_computed)
+               make_crc_table();
+       for (n = 0; n < len; n++)
+               c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
+
+       return c ^ 0xffffffffL;
+}
+
+/* Cleanup our test file. */
+static void cleanup(void)
+{
+       unlink(dest_file);
+}
+
+/* Compute the CRC32 of a file */
+static unsigned long compute_crc(const char *fname)
+{
+       unsigned char buf[1024*1024];
+       unsigned long crc = 0L;
+       struct stat stbuf;
+       int fd;
+       int rc;
+       size_t filesize;
+
+       fd = open(fname, O_RDONLY);
+       ASSERTF(fd >= 0, "open failed for '%s': %s",
+               fname, strerror(errno));
+
+       rc = fstat(fd, &stbuf);
+       ASSERTF(rc == 0, "fstat of '%s' failed: %s", fname, strerror(errno));
+       filesize = stbuf.st_size;
+
+       while (filesize != 0) {
+               size_t to_read = sizeof(buf);
+               ssize_t sret;
+
+               if (to_read > filesize)
+                       to_read = filesize;
+
+               sret = read(fd, buf, to_read);
+               ASSERTF(sret >= 0, "read of %zu bytes from '%s' failed: %s",
+                       to_read, fname, strerror(errno));
+               ASSERTF(sret > 0, "unexpected EOF for '%s'",
+                       fname);
+
+               filesize -= sret;
+               crc = update_crc(crc, buf, sret);
+       }
+
+       close(fd);
+
+       return crc;
+}
+
+/* Helper. Copy a file with sendfile. The destination will be
+ * created. If a group lock is 0, it means do not take one. */
+static int sendfile_copy(const char *source, int source_gid,
+                        const char *dest, int dest_gid)
+{
+       int rc;
+       struct stat stbuf;
+       size_t filesize;
+       int fd_in;
+       int fd_out;
+
+       fd_in = open(source, O_RDONLY);
+       ASSERTF(fd_in >= 0, "open failed for '%s': %s",
+               source, strerror(errno));
+
+       rc = fstat(fd_in, &stbuf);
+       ASSERTF(rc == 0, "fstat of '%s' failed: %s", source, strerror(errno));
+       filesize = stbuf.st_size;
+
+       if (source_gid != 0) {
+               rc = llapi_group_lock(fd_in, source_gid);
+               ASSERTF(rc == 0, "cannot set group lock %d for '%s': %s",
+                       source_gid, source, strerror(-rc));
+       }
+
+       fd_out = open(dest, O_WRONLY | O_TRUNC | O_CREAT);
+       ASSERTF(fd_out >= 0, "creation failed for '%s': %s",
+               dest, strerror(errno));
+
+       if (dest_gid != 0) {
+               rc = llapi_group_lock(fd_out, dest_gid);
+               ASSERTF(rc == 0, "cannot set group lock %d for '%s': %s",
+                       dest_gid, dest, strerror(-rc));
+       }
+
+       /* Transfer by 10M blocks */
+       while (filesize != 0) {
+               size_t to_copy = 10*1024*1024;
+               ssize_t sret;
+
+               if (to_copy > filesize)
+                       to_copy = filesize;
+
+               sret = sendfile(fd_out, fd_in, NULL, to_copy);
+               rc = errno;
+
+               /* Although senfile can return less than requested,
+                * that should not happen under present conditions. At
+                * the very least, make sure that a decent size was
+                * copied. See LU-6371. */
+
+               ASSERTF(sret != 0, "sendfile read 0 bytes");
+               ASSERTF(sret > 0, "sendfile failed: %s", strerror(rc));
+               ASSERTF(sret > 100*1024,
+                       "sendfile read too little data: %zd bytes", sret);
+
+               if (sret != to_copy)
+                       fprintf(stderr,
+                              "Warning: sendfile returned %zd bytes instead of %zu requested\n",
+                              sret, to_copy);
+
+               filesize -= sret;
+
+       }
+
+       close(fd_out);
+       close(fd_in);
+
+       return 0;
+}
+
+/* Basic sendfile, without lock taken */
+static void test10(void)
+{
+       unsigned long crc;
+
+       cleanup();
+       sendfile_copy(source_file, 0, dest_file, 0);
+       sync();
+
+       crc = compute_crc(dest_file);
+       ASSERTF(source_crc == crc, "CRC differs: %lu and %lu", source_crc, crc);
+}
+
+/* sendfile, source locked */
+static void test11(void)
+{
+       unsigned long crc;
+
+       cleanup();
+       sendfile_copy(source_file, 85543, dest_file, 0);
+       sync();
+
+       crc = compute_crc(dest_file);
+       ASSERTF(source_crc == crc, "CRC differs: %lu and %lu", source_crc, crc);
+}
+
+/* sendfile, destination locked */
+static void test12(void)
+{
+       unsigned long crc;
+
+       cleanup();
+       sendfile_copy(source_file, 0, dest_file, 98765);
+       sync();
+
+       crc = compute_crc(dest_file);
+       ASSERTF(source_crc == crc, "CRC differs: %lu and %lu", source_crc, crc);
+}
+
+/* sendfile, source and destination locked, with same lock number */
+static void test13(void)
+{
+       const int gid = 8765;
+       unsigned long crc;
+
+       cleanup();
+       sendfile_copy(source_file, gid, dest_file, gid);
+       sync();
+
+       crc = compute_crc(dest_file);
+       ASSERTF(source_crc == crc, "CRC differs: %lu and %lu", source_crc, crc);
+}
+
+/* sendfile, source and destination locked, with different lock number */
+static void test14(void)
+{
+       unsigned long crc;
+
+       cleanup();
+       sendfile_copy(source_file, 98765, dest_file, 34543);
+       sync();
+
+       crc = compute_crc(dest_file);
+       ASSERTF(source_crc == crc, "CRC differs: %lu and %lu", source_crc, crc);
+}
+
+/* Basic sendfile, without lock taken, to /dev/null */
+static void test15(void)
+{
+       sendfile_copy(source_file, 0, "/dev/null", 0);
+       sync();
+}
+
+/* sendfile, source locked, to /dev/null */
+static void test16(void)
+{
+       sendfile_copy(source_file, 85543, "/dev/null", 0);
+       sync();
+}
+
+int main(int argc, char *argv[])
+{
+       int rc;
+
+       if (argc != 2 || argv[1][0] != '/') {
+               fprintf(stderr,
+                       "Argument must be an absolute path to a Lustre file\n");
+               return EXIT_FAILURE;
+       }
+
+       source_file = argv[1];
+       rc = asprintf(&dest_file, "%s-dest", source_file);
+       if (rc == -1) {
+               fprintf(stderr, "Allocation failure\n");
+               return EXIT_FAILURE;
+       }
+
+       /* Play nice with Lustre test scripts. Non-line buffered output
+        * stream under I/O redirection may appear incorrectly. */
+       setvbuf(stdout, NULL, _IOLBF, 0);
+
+       cleanup();
+       atexit(cleanup);
+
+       /* Compute crc of original file */
+       source_crc = compute_crc(source_file);
+
+       PERFORM(test10);
+       PERFORM(test11);
+       PERFORM(test12);
+       PERFORM(test13);
+       PERFORM(test14);
+       PERFORM(test15);
+       PERFORM(test16);
+
+       return EXIT_SUCCESS;
+}