f-desc = 'lock request has intent';
};
-// Skipped bits 13, 14 and 15
+// Skipped bits 13, 14
+
+flag[15] = {
+ f-name = flock_deadlock;
+ f-mask = on_wire, ast;
+ f-desc = 'flock deadlock detected';
+};
flag[16] = {
f-name = discard_data;
#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */
#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */
#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */
+#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* improved flock deadlock detection */
+
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
* flag value is not in use on some other branch. Please clear any such
OBD_CONNECT_EINPROGRESS | \
OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LAYOUTLOCK |\
- OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE)
+ OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE |\
+ OBD_CONNECT_FLOCK_DEAD)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
#ifndef LDLM_ALL_FLAGS_MASK
/** l_flags bits marked as "all_flags" bits */
-#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F132FULL
+#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL
/** l_flags bits marked as "ast" bits */
-#define LDLM_FL_AST_MASK 0x0000000080000000ULL
+#define LDLM_FL_AST_MASK 0x0000000080008000ULL
/** l_flags bits marked as "blocked" bits */
#define LDLM_FL_BLOCKED_MASK 0x000000000000000EULL
#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL
/** l_flags bits marked as "on_wire" bits */
-#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F132FULL
+#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL
/** extent, mode, or resource changed */
#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL // bit 0
#define ldlm_set_has_intent(_l) LDLM_SET_FLAG(( _l), 1ULL << 12)
#define ldlm_clear_has_intent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 12)
+/** flock deadlock detected */
+#define LDLM_FL_FLOCK_DEADLOCK 0x0000000000008000ULL // bit 15
+#define ldlm_is_flock_deadlock(_l) LDLM_TEST_FLAG(( _l), 1ULL << 15)
+#define ldlm_set_flock_deadlock(_l) LDLM_SET_FLAG(( _l), 1ULL << 15)
+#define ldlm_clear_flock_deadlock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 15)
+
/** discard (no writeback) on cancel */
#define LDLM_FL_DISCARD_DATA 0x0000000000010000ULL // bit 16
#define ldlm_is_discard_data(_l) LDLM_TEST_FLAG(( _l), 1ULL << 16)
static int hf_lustre_ldlm_fl_replay = -1;
static int hf_lustre_ldlm_fl_intent_only = -1;
static int hf_lustre_ldlm_fl_has_intent = -1;
+static int hf_lustre_ldlm_fl_flock_deadlock = -1;
static int hf_lustre_ldlm_fl_discard_data = -1;
static int hf_lustre_ldlm_fl_no_timeout = -1;
static int hf_lustre_ldlm_fl_block_nowait = -1;
{LDLM_FL_REPLAY, "LDLM_FL_REPLAY"},
{LDLM_FL_INTENT_ONLY, "LDLM_FL_INTENT_ONLY"},
{LDLM_FL_HAS_INTENT, "LDLM_FL_HAS_INTENT"},
+ {LDLM_FL_FLOCK_DEADLOCK, "LDLM_FL_FLOCK_DEADLOCK"},
{LDLM_FL_DISCARD_DATA, "LDLM_FL_DISCARD_DATA"},
{LDLM_FL_NO_TIMEOUT, "LDLM_FL_NO_TIMEOUT"},
{LDLM_FL_BLOCK_NOWAIT, "LDLM_FL_BLOCK_NOWAIT"},
return 0;
}
+static void ldlm_flock_cancel_on_deadlock(struct ldlm_lock *lock,
+ cfs_list_t *work_list)
+{
+ CDEBUG(D_INFO, "reprocess deadlock req=%p\n", lock);
+
+ if ((exp_connect_flags(lock->l_export) &
+ OBD_CONNECT_FLOCK_DEAD) == 0) {
+ CERROR("deadlock found, but client doesn't "
+ "support flock canceliation\n");
+ } else {
+ LASSERT(lock->l_completion_ast);
+ LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
+ lock->l_flags |= LDLM_FL_AST_SENT | LDLM_FL_CANCEL_ON_BLOCK |
+ LDLM_FL_FLOCK_DEADLOCK;
+ ldlm_flock_blocking_unlink(lock);
+ ldlm_resource_unlink_lock(lock);
+ ldlm_add_ast_work_item(lock, NULL, work_list);
+ }
+}
+
/**
* Process a granting attempt for flock lock.
* Must be called under ns lock held.
}
}
} else {
+ int reprocess_failed = 0;
lockmode_verify(mode);
/* This loop determines if there are existing locks
if (!ldlm_flocks_overlap(lock, req))
continue;
- if (!first_enq)
- RETURN(LDLM_ITER_CONTINUE);
+ if (!first_enq) {
+ reprocess_failed = 1;
+ if (ldlm_flock_deadlock(req, lock)) {
+ ldlm_flock_cancel_on_deadlock(req,
+ work_list);
+ RETURN(LDLM_ITER_CONTINUE);
+ }
+ continue;
+ }
if (*flags & LDLM_FL_BLOCK_NOWAIT) {
ldlm_flock_destroy(req, mode, *flags);
*flags |= LDLM_FL_BLOCK_GRANTED;
RETURN(LDLM_ITER_STOP);
}
+ if (reprocess_failed)
+ RETURN(LDLM_ITER_CONTINUE);
}
if (*flags & LDLM_FL_TEST_LOCK) {
/* ldlm_lock_enqueue() has already placed lock on the granted list. */
cfs_list_del_init(&lock->l_res_link);
- if (flags & LDLM_FL_TEST_LOCK) {
+ if (lock->l_flags & LDLM_FL_FLOCK_DEADLOCK) {
+ LDLM_DEBUG(lock, "client-side enqueue deadlock received");
+ rc = -EDEADLK;
+ } else if (flags & LDLM_FL_TEST_LOCK) {
/* fcntl(F_GETLK) request */
/* The old mode was saved in getlk->fl_type so that if the mode
* in the lock changes we can decref the appropriate refcount.*/
ldlm_process_flock_lock(lock, &noreproc, 1, &err, NULL);
}
unlock_res_and_lock(lock);
- RETURN(0);
+ RETURN(rc);
}
EXPORT_SYMBOL(ldlm_flock_completion_ast);
OBD_CONNECT_EINPROGRESS |
OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS |
- OBD_CONNECT_MAX_EASIZE;
+ OBD_CONNECT_MAX_EASIZE |
+ OBD_CONNECT_FLOCK_DEAD;
if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
data->ocd_connect_flags |= OBD_CONNECT_SOM;
"lightweight_conn",
"short_io",
"pingless",
+ "flock_deadlock",
"unknown",
NULL
};
OBD_CONNECT_SHORTIO);
LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
OBD_CONNECT_PINGLESS);
+ LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLOCK_DEAD);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
noinst_PROGRAMS += small_write multiop ll_sparseness_verify
noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany
noinst_PROGRAMS += openfilleddirunlink rename_many memhog
-noinst_PROGRAMS += mmap_sanity writemany reads flocks_test
+noinst_PROGRAMS += mmap_sanity writemany reads flocks_test flock_deadlock
noinst_PROGRAMS += write_time_limit rwv lgetxattr_size_check checkfiemap
bin_PROGRAMS = mcreate munlink
--- /dev/null
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+*/
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+*/
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/sem.h>
+#include <semaphore.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+#define LOCK_LEN 100
+
+struct flock flocks[4] = {
+ /* 1st region */
+ {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 0,
+ .l_len = LOCK_LEN,
+ .l_pid = 0,
+ },
+ /* 2nd region */
+ {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = LOCK_LEN,
+ .l_len = LOCK_LEN,
+ .l_pid = 0,
+ },
+ /* 3rd region */
+ {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = 2 * LOCK_LEN,
+ .l_len = LOCK_LEN,
+ .l_pid = 0,
+ },
+ /* 2nd & 3rd regions */
+ {
+ .l_type = F_WRLCK,
+ .l_whence = SEEK_SET,
+ .l_start = LOCK_LEN,
+ .l_len = 2 * LOCK_LEN,
+ .l_pid = 0,
+ },
+};
+
+enum {
+ FLOCK_GET = 0,
+ FLOCK_PUT = 1,
+};
+
+#define flock_call(fd, num, get, label) \
+ flocks[num].l_type = get == FLOCK_GET ? F_WRLCK : F_UNLCK; \
+ printf("%d: %s lock%d [%llu, %llu]\n", pid, \
+ get == FLOCK_GET ? "taking" : "putting", \
+ num, (unsigned long long)flocks[num].l_start, \
+ (unsigned long long)flocks[num].l_start + flocks[num].l_len); \
+ rc = fcntl(fd, F_SETLKW, &flocks[num]); \
+ if (rc < 0) { \
+ rc = errno; \
+ fprintf(stderr, "%d: failed to %s lock%d, %s\n", \
+ pid, get == FLOCK_GET ? "take" : "put", \
+ num, strerror(errno)); \
+ goto label; \
+ } else { \
+ printf("%d: done\n", pid); \
+ }
+
+void catch_alarm()
+{
+ fprintf(stderr, "lock timeout\n");
+ exit(124);
+}
+
+int main(int argc, char* argv[])
+{
+ struct sigaction act;
+ int status;
+ pid_t wpid = 0;
+ int fd, i, pid, num = 0, rc = 0;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <file>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+ fd = open(argv[1], O_RDWR|O_CREAT, (mode_t)0666);
+ if (fd < 0) {
+ fprintf(stderr, "error open file %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < 2; i++) {
+ fflush(stdout);
+ pid = fork();
+ if (pid && i == 0)
+ wpid = pid;
+ if (pid == 0)
+ wpid = 0;
+ if (pid == 0 && i == 0) {
+ pid = getpid();
+
+ flock_call(fd, num, FLOCK_GET, err_lock0);
+
+ printf("%d sleeping 1\n", pid);
+ sleep(1);
+
+ /* First of all, it should get blocked on flocks[1]
+ * 2nd child. Later, should deadlock with flocks[2]
+ * parent, after cancelling flocks[1] 2nd child. */
+ printf("%d: taking lock3 [%llu, %llu]\n", pid,
+ (unsigned long long)flocks[3].l_start,
+ (unsigned long long)flocks[3].l_start +
+ flocks[3].l_len);
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = catch_alarm;
+ sigemptyset(&act.sa_mask);
+ sigaddset(&act.sa_mask, SIGALRM);
+ if (sigaction(SIGALRM, &act, NULL) < 0) {
+ fprintf(stderr, "SIGALRM signal setup failed"
+ ", errno: %d", errno);
+ rc = 3;
+ goto err_lock1;
+ }
+ alarm(5);
+ rc = fcntl(fd, F_SETLKW, &flocks[3]);
+ if (rc >= 0) {
+ fprintf(stderr, "%d: should not succeed to "
+ "take lock3\n", pid);
+
+ flock_call(fd, 3, FLOCK_PUT, err_lock1);
+ rc = EINVAL;
+ goto err_lock1;
+ }
+ if (errno != EDEADLK) {
+ rc = errno;
+ fprintf(stderr, "%d: failed to take lock3: "
+ "%s\n", pid, strerror(errno));
+ goto err_lock1;
+ }
+
+ printf("%d: expected deadlock\n", pid);
+
+ flock_call(fd, num, FLOCK_PUT, err_lock0);
+ break;
+ } else if (pid == 0 && i == 1) {
+ pid = getpid();
+
+ flock_call(fd, 1, FLOCK_GET, err_lock0);
+
+ /* Let flocks[2] 2nd child get granted and
+ * flocks[3] 1st child, flocks[0] parent get blocked.*/
+ printf("%d sleeping 2\n", pid);
+ sleep(2);
+
+ flock_call(fd, 1, FLOCK_PUT, err_lock0);
+ break;
+ } else if (pid && i == 1) {
+ pid = getpid();
+ num = 2;
+
+ /* Let flocks[1] 2nd child get granted first */
+ printf("%d: sleeping 1\n", pid);
+ sleep(1);
+
+ flock_call(fd, num, FLOCK_GET, err_lock0);
+
+ /* Should get blocked on flocks[0], 1st shild
+ * and succeed later. */
+ flock_call(fd, 0, FLOCK_GET, err_lock1);
+
+ flock_call(fd, 0, FLOCK_PUT, err_lock1);
+ flock_call(fd, num, FLOCK_PUT, err_lock0);
+ break;
+ }
+ }
+
+ if (pid == 0)
+ sleep(2);
+ if (wpid) {
+ waitpid(wpid, &status, 0);
+ rc = WEXITSTATUS(status);
+ }
+ printf("%d Exit\n", pid);
+ close(fd);
+ return rc;
+
+err_lock1:
+ flocks[num].l_type = F_UNLCK;
+ fcntl(fd, F_SETLKW, &flocks[num]);
+err_lock0:
+ close(fd);
+ return rc;
+}
}
run_test 234 "xattr cache should not crash on ENOMEM"
+test_235() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.52) ] &&
+ skip "Need MDS version at least 2.4.52" && return
+ flock_deadlock $DIR/$tfile
+ local RC=$?
+ case $RC in
+ 0)
+ ;;
+ 124) error "process hangs on a deadlock"
+ ;;
+ *) error "error executing flock_deadlock $DIR/$tfile"
+ ;;
+ esac
+}
+run_test 235 "LU-1715: flock deadlock detection does not work properly"
+
#
# tests that do cleanup/setup should be run at the end
#
CHECK_DEFINE_64X(OBD_CONNECT_LIGHTWEIGHT);
CHECK_DEFINE_64X(OBD_CONNECT_SHORTIO);
CHECK_DEFINE_64X(OBD_CONNECT_PINGLESS);
+ CHECK_DEFINE_64X(OBD_CONNECT_FLOCK_DEAD);
CHECK_VALUE_X(OBD_CKSUM_CRC32);
CHECK_VALUE_X(OBD_CKSUM_ADLER);
OBD_CONNECT_SHORTIO);
LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n",
OBD_CONNECT_PINGLESS);
+ LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, "found 0x%.16llxULL\n",
+ OBD_CONNECT_FLOCK_DEAD);
LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
(unsigned)OBD_CKSUM_CRC32);
LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",