From: Vitaly Fertman Date: Fri, 25 Apr 2014 22:58:25 +0000 (+0400) Subject: LU-2177 ldlm: flock completion fixes. X-Git-Tag: 2.5.59~69 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=381604c5c45c7f7394185f434bb8ad1dd4ed88af LU-2177 ldlm: flock completion fixes. move checks for FAILED, DESTROYED flags under ldlm spinlock, destroy flock atomically with the check it is not destroyed yet. do not put the granted flock into the resource if this is UNLOCK, TEST, or DEADLOCK'ed flock. a set of flock tests which races reply completion for new flock request/test flock/unlock with namespace cleanup on eviction. Signed-off-by: Vitaly Fertman Change-Id: I97fa040a26eda6c6728929d10b29c2316db663e3 Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Tested-by: Alexander Lezhoev Reviewed-by: Vitaly Fertman Xyratex-bug-id: MRP-1588 Reviewed-on: http://review.whamcloud.com/10005 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- diff --git a/libcfs/include/libcfs/libcfs_fail.h b/libcfs/include/libcfs/libcfs_fail.h index f8490cc..51f07bc 100644 --- a/libcfs/include/libcfs/libcfs_fail.h +++ b/libcfs/include/libcfs/libcfs_fail.h @@ -140,6 +140,9 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set) #define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \ cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_ORSET) +#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \ + cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_RESET) + #define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \ cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET) diff --git a/libcfs/libcfs/fail.c b/libcfs/libcfs/fail.c index 509c3c2..3d1d3a0 100644 --- a/libcfs/libcfs/fail.c +++ b/libcfs/libcfs/fail.c @@ -93,8 +93,9 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set) } } - if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) && - (value & CFS_FAIL_ONCE)) + /* Take into account the current call for FAIL_ONCE for ORSET only, + * as RESET is a new fail_loc, it does not change the current call */ + if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE)) set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); /* Lost race to set CFS_FAILED_BIT. */ if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { diff --git a/lustre/include/lustre_dlm_flags.h b/lustre/include/lustre_dlm_flags.h index 283546d..27ff3f7 100644 --- a/lustre/include/lustre_dlm_flags.h +++ b/lustre/include/lustre_dlm_flags.h @@ -37,24 +37,6 @@ /** l_flags bits marked as "all_flags" bits */ #define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL -/** l_flags bits marked as "ast" bits */ -#define LDLM_FL_AST_MASK 0x0000000080008000ULL - -/** l_flags bits marked as "blocked" bits */ -#define LDLM_FL_BLOCKED_MASK 0x000000000000000EULL - -/** l_flags bits marked as "gone" bits */ -#define LDLM_FL_GONE_MASK 0x0006004000000000ULL - -/** l_flags bits marked as "inherit" bits */ -#define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL - -/** l_flags bits marked as "off_wire" bits */ -#define LDLM_FL_OFF_WIRE_MASK 0x00FFFFFF00000000ULL - -/** l_flags bits marked as "on_wire" bits */ -#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F932FULL - /** extent, mode, or resource changed */ #define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL // bit 0 #define ldlm_is_lock_changed(_l) LDLM_TEST_FLAG(( _l), 1ULL << 0) @@ -366,6 +348,27 @@ #define ldlm_set_excl(_l) LDLM_SET_FLAG(( _l), 1ULL << 55) #define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55) +/** l_flags bits marked as "ast" bits */ +#define LDLM_FL_AST_MASK (LDLM_FL_FLOCK_DEADLOCK |\ + LDLM_FL_AST_DISCARD_DATA) + +/** l_flags bits marked as "blocked" bits */ +#define LDLM_FL_BLOCKED_MASK (LDLM_FL_BLOCK_GRANTED |\ + LDLM_FL_BLOCK_CONV |\ + LDLM_FL_BLOCK_WAIT) + +/** l_flags bits marked as "gone" bits */ +#define LDLM_FL_GONE_MASK (LDLM_FL_DESTROYED |\ + LDLM_FL_FAILED) + +/** l_flags bits marked as "inherit" bits */ +/* Flags inherited from wire on enqueue/reply between client/server. */ +/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */ +/* TEST_LOCK flag to not let TEST lock to be granted. */ +#define LDLM_FL_INHERIT_MASK (LDLM_FL_CANCEL_ON_BLOCK |\ + LDLM_FL_NO_TIMEOUT |\ + LDLM_FL_TEST_LOCK) + /** test for ldlm_lock flag bit set */ #define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 49efb01..7365f60 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -345,6 +345,10 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b #define OBD_FAIL_LDLM_OST_LVB 0x31c #define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d +#define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320 +#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321 +#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 +#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 /* LOCKLESS IO */ #define OBD_FAIL_LDLM_SET_CONTENTION 0x385 diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 09655be..930db54 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -146,7 +146,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, __u64 flags) LASSERT(cfs_hlist_unhashed(&lock->l_exp_flock_hash)); cfs_list_del_init(&lock->l_res_link); - if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) { + if (flags == LDLM_FL_WAIT_NOREPROC) { /* client side - set a flag to prevent sending a CANCEL */ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING; @@ -673,27 +673,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) int rc = 0; ENTRY; + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4); + if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) { + lock_res_and_lock(lock); + lock->l_flags |= LDLM_FL_FAIL_LOC; + unlock_res_and_lock(lock); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4); + } CDEBUG(D_DLMTRACE, "flags: "LPX64" data: %p getlk: %p\n", flags, data, getlk); - /* Import invalidation. We need to actually release the lock - * references being held, so that it can go away. No point in - * holding the lock even if app still believes it has it, since - * server already dropped it anyway. Only for granted locks too. */ - if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == - (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) { - if (lock->l_req_mode == lock->l_granted_mode && - lock->l_granted_mode != LCK_NL && - NULL == data) - ldlm_lock_decref_internal(lock, lock->l_req_mode); - - /* Need to wake up the waiter if we were evicted */ - wake_up(&lock->l_waitq); - RETURN(0); - } - LASSERT(flags != LDLM_FL_WAIT_NOREPROC); + if (flags & LDLM_FL_FAILED) + goto granted; + if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV))) { if (NULL == data) @@ -733,12 +727,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - if (ldlm_is_failed(lock)) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed"); - RETURN(-EIO); - } - - LDLM_DEBUG(lock, "client-side enqueue granted"); + if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) { + lock_res_and_lock(lock); + /* DEADLOCK is always set with CBPENDING */ + lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; + unlock_res_and_lock(lock); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4); + } + if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) { + lock_res_and_lock(lock); + /* DEADLOCK is always set with CBPENDING */ + lock->l_flags |= LDLM_FL_FAIL_LOC | + LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; + unlock_res_and_lock(lock); + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4); + } lock_res_and_lock(lock); @@ -749,22 +752,54 @@ granted: if (ldlm_is_destroyed(lock)) { unlock_res_and_lock(lock); LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - RETURN(0); - } - /* take lock off the deadlock detection hash list. */ - ldlm_flock_blocking_unlink(lock); + /* An error is still to be returned, to propagate it up to + * ldlm_cli_enqueue_fini() caller. */ + RETURN(-EIO); + } /* ldlm_lock_enqueue() has already placed lock on the granted list. */ - cfs_list_del_init(&lock->l_res_link); + ldlm_resource_unlink_lock(lock); + + /* Import invalidation. We need to actually release the lock + * references being held, so that it can go away. No point in + * holding the lock even if app still believes it has it, since + * server already dropped it anyway. Only for granted locks too. */ + /* Do the same for DEADLOCK'ed locks. */ + if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) { + int mode; + + if (flags & LDLM_FL_TEST_LOCK) + LASSERT(ldlm_is_test_lock(lock)); + + if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock)) + mode = flock_type(getlk); + else + mode = lock->l_granted_mode; + + if (ldlm_is_flock_deadlock(lock)) { + LDLM_DEBUG(lock, "client-side enqueue deadlock " + "received"); + rc = -EDEADLK; + } + ldlm_flock_destroy(lock, mode, LDLM_FL_WAIT_NOREPROC); + unlock_res_and_lock(lock); + + /* Need to wake up the waiter if we were evicted */ + wake_up(&lock->l_waitq); + + /* An error is still to be returned, to propagate it up to + * ldlm_cli_enqueue_fini() caller. */ + RETURN(rc ? : -EIO); + } + + LDLM_DEBUG(lock, "client-side enqueue granted"); - if (ldlm_is_flock_deadlock(lock)) { - LDLM_DEBUG(lock, "client-side enqueue deadlock received"); - rc = -EDEADLK; - } else if (flags & LDLM_FL_TEST_LOCK) { + if (flags & LDLM_FL_TEST_LOCK) { /* fcntl(F_GETLK) request */ /* The old mode was saved in getlk->fl_type so that if the mode * in the lock changes we can decref the appropriate refcount.*/ + LASSERT(ldlm_is_test_lock(lock)); ldlm_flock_destroy(lock, flock_type(getlk), LDLM_FL_WAIT_NOREPROC); switch (lock->l_granted_mode) { diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 33ada0a..e43f5c1 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1148,6 +1148,21 @@ void ldlm_grant_lock(struct ldlm_lock *lock, cfs_list_t *work_list) check_res_locked(res); lock->l_granted_mode = lock->l_req_mode; + + if (work_list && lock->l_completion_ast != NULL) + ldlm_add_ast_work_item(lock, NULL, work_list); + + /* We should not add locks to granted list in the following cases: + * - this is an UNLOCK but not a real lock; + * - this is a TEST lock; + * - this is a F_CANCELLK lock (async flock has req_mode == 0) + * - this is a deadlock (flock cannot be granted) */ + if (lock->l_req_mode == 0 || + lock->l_req_mode == LCK_NL || + ldlm_is_test_lock(lock) || + ldlm_is_flock_deadlock(lock)) + RETURN_EXIT; + if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) ldlm_grant_lock_with_skiplist(lock); else if (res->lr_type == LDLM_EXTENT) @@ -1158,9 +1173,6 @@ void ldlm_grant_lock(struct ldlm_lock *lock, cfs_list_t *work_list) if (lock->l_granted_mode < res->lr_most_restr) res->lr_most_restr = lock->l_granted_mode; - if (work_list && lock->l_completion_ast != NULL) - ldlm_add_ast_work_item(lock, NULL, work_list); - ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock); EXIT; } @@ -1722,6 +1734,8 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns, * lock's l_flags. */ if (*flags & LDLM_FL_AST_DISCARD_DATA) ldlm_set_ast_discard_data(lock); + if (*flags & LDLM_FL_TEST_LOCK) + ldlm_set_test_lock(lock); /* This distinction between local lock trees is very important; a client * namespace only has information about locks taken by that client, and diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index d01c9d6..44b9676 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -504,20 +504,23 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, else LDLM_DEBUG(lock, "lock was granted or failed in race"); - ldlm_lock_decref_internal(lock, mode); - - /* XXX - HACK because we shouldn't call ldlm_lock_destroy() - * from llite/file.c/ll_file_flock(). */ - /* This code makes for the fact that we do not have blocking handler on - * a client for flock locks. As such this is the place where we must - * completely kill failed locks. (interrupted and those that - * were waiting to be granted when server evicted us. */ - if (lock->l_resource->lr_type == LDLM_FLOCK) { - lock_res_and_lock(lock); - ldlm_resource_unlink_lock(lock); - ldlm_lock_destroy_nolock(lock); - unlock_res_and_lock(lock); - } + /* XXX - HACK because we shouldn't call ldlm_lock_destroy() + * from llite/file.c/ll_file_flock(). */ + /* This code makes for the fact that we do not have blocking handler on + * a client for flock locks. As such this is the place where we must + * completely kill failed locks. (interrupted and those that + * were waiting to be granted when server evicted us. */ + if (lock->l_resource->lr_type == LDLM_FLOCK) { + lock_res_and_lock(lock); + if (!ldlm_is_destroyed(lock)) { + ldlm_resource_unlink_lock(lock); + ldlm_lock_decref_internal_nolock(lock, mode); + ldlm_lock_destroy_nolock(lock); + } + unlock_res_and_lock(lock); + } else { + ldlm_lock_decref_internal(lock, mode); + } } /** @@ -605,10 +608,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, *flags = ldlm_flags_from_wire(reply->lock_flags); lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & LDLM_FL_INHERIT_MASK); - /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match() - * to wait with no timeout as well */ - lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & - LDLM_FL_NO_TIMEOUT); unlock_res_and_lock(lock); CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: "LPX64"\n", diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 00f083a..11937d2 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -725,8 +725,15 @@ static void cleanup_resource(struct ldlm_resource *res, cfs_list_t *q, * will go away ... */ unlock_res(res); LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); + if (lock->l_flags & LDLM_FL_FAIL_LOC) { + schedule_timeout_and_set_state( + TASK_UNINTERRUPTIBLE, + cfs_time_seconds(4)); + set_current_state(TASK_RUNNING); + } if (lock->l_completion_ast) - lock->l_completion_ast(lock, 0, NULL); + lock->l_completion_ast(lock, + LDLM_FL_FAILED, NULL); LDLM_LOCK_RELEASE(lock); continue; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 4fa91dc..0b39e4d 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2892,6 +2892,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) struct md_op_data *op_data; struct lustre_handle lockh = {0}; ldlm_policy_data_t flock = {{0}}; + int fl_type = file_lock->fl_type; __u64 flags = 0; int rc; int rc2 = 0; @@ -2927,7 +2928,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) flock.l_flock.owner = (unsigned long)file_lock->fl_pid; - switch (file_lock->fl_type) { + switch (fl_type) { case F_RDLCK: einfo.ei_mode = LCK_PR; break; @@ -2946,8 +2947,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) einfo.ei_mode = LCK_PW; break; default: - CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", - file_lock->fl_type); + CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type); RETURN (-ENOTSUPP); } @@ -2969,15 +2969,16 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) case F_GETLK64: #endif flags = LDLM_FL_TEST_LOCK; - /* Save the old mode so that if the mode in the lock changes we - * can decrement the appropriate reader or writer refcount. */ - file_lock->fl_type = einfo.ei_mode; break; default: CERROR("unknown fcntl lock command: %d\n", cmd); RETURN (-EINVAL); } + /* Save the old mode so that if the mode in the lock changes we + * can decrement the appropriate reader or writer refcount. */ + file_lock->fl_type = einfo.ei_mode; + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) @@ -2991,6 +2992,10 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, op_data, &lockh, &flock, 0, NULL /* req */, flags); + /* Restore the file lock type if not TEST lock. */ + if (!(flags & LDLM_FL_TEST_LOCK)) + file_lock->fl_type = fl_type; + if ((file_lock->fl_flags & FL_FLOCK) && (rc == 0 || file_lock->fl_type == F_UNLCK)) rc2 = flock_lock_file_wait(file, file_lock); diff --git a/lustre/tests/flocks_test.c b/lustre/tests/flocks_test.c index ab38fa0..a4a00a8 100644 --- a/lustre/tests/flocks_test.c +++ b/lustre/tests/flocks_test.c @@ -62,9 +62,10 @@ int t_fcntl(int fd, int cmd, ...) va_end(ap); rc = fcntl(fd, cmd); if (rc == -1) { + rc = -errno; fprintf(stderr, "fcntl GETFL failed: %s\n", strerror(errno)); - return(1); + return rc; } break; case F_SETFL: @@ -72,9 +73,10 @@ int t_fcntl(int fd, int cmd, ...) va_end(ap); rc = fcntl(fd, cmd, arg); if (rc == -1) { + rc = -errno; fprintf(stderr, "fcntl SETFL %ld failed: %s\n", arg, strerror(errno)); - return(1); + return rc ; } break; case F_GETLK: @@ -84,9 +86,10 @@ int t_fcntl(int fd, int cmd, ...) va_end(ap); rc = fcntl(fd, cmd, lock); if (rc == -1) { + rc = -errno; fprintf(stderr, "fcntl cmd %d failed: %s\n", cmd, strerror(errno)); - return(1); + return rc ; } break; case F_DUPFD: @@ -94,15 +97,16 @@ int t_fcntl(int fd, int cmd, ...) va_end(ap); rc = fcntl(fd, cmd, arg); if (rc == -1) { + rc = -errno; fprintf(stderr, "fcntl F_DUPFD %d failed: %s\n", (int)arg, strerror(errno)); - return(1); + return rc; } break; default: va_end(ap); fprintf(stderr, "fcntl cmd %d not supported\n", cmd); - return(1); + return rc; } return rc; } @@ -149,7 +153,7 @@ int t1(int argc, char *argv[]) } if ((fd = open(argv[4], O_RDWR)) < 0) { - fprintf(stderr, "Couldn't open file: %s\n", argv[3]); + fprintf(stderr, "Couldn't open file: %s\n", argv[4]); return EXIT_FAILURE; } @@ -249,7 +253,7 @@ int t2(int argc, char* argv[]) t_fcntl(fd, F_SETFL, O_APPEND); rc = t_fcntl(fd, F_GETFL); - if ((rc & O_APPEND) == 0) { + if ((rc < 0) || (rc & O_APPEND) == 0) { fprintf(stderr, "error get flag: ret %x\n", rc); rc = EXIT_FAILURE; goto out; @@ -296,7 +300,7 @@ int t3(int argc, char *argv[]) } if ((fd = open(argv[2], O_RDWR)) < 0) { - fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + fprintf(stderr, "Couldn't open file: %s\n", argv[2]); return EXIT_FAILURE; } if (flock(fd, LOCK_EX | LOCK_NB) < 0) { @@ -305,7 +309,7 @@ int t3(int argc, char *argv[]) goto out; } if ((fd2 = open(argv[2], O_RDWR)) < 0) { - fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + fprintf(stderr, "Couldn't open file: %s\n", argv[2]); rc = EXIT_FAILURE; goto out; } @@ -438,6 +442,90 @@ out: return rc; } +#define T5_USAGE \ + "Usage: ./flocks_test 5 set|get|unlock [read|write] [sleep N] file1\n"\ +" set: F_SETLKW F_WRLCK\n" \ +" get: F_GETLK F_WRLCK (conflict)\n" \ +" unlock: F_SETLKW F_UNLCK\n" \ +" read|write: lock mode, write by default\n" \ +" sleep N: sleep for N secs after fcntl\n" \ +" file1: fcntl is called for this file\n" + +int t5(int argc, char *argv[]) +{ + struct flock lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + }; + + int setlk = 0, getlk = 0, unlk = 0, secs = 0; + int pos; + int fd; + int rc = 0; + + if (argc < 4 || argc > 7) { + fprintf(stderr, T5_USAGE); + return EXIT_FAILURE; + } + + if (!strncmp(argv[2], "set", 4)) + setlk = 1; + else if (!strncmp(argv[2], "get", 4)) + getlk = 1; + else if (!strncmp(argv[2], "unlock", 7)) + unlk = 1; + else { + fprintf(stderr, "Wrong 2nd argument: %s\n", argv[2]); + return EXIT_FAILURE; + } + + pos = 3; + + if (!strncmp(argv[pos], "read", 5)) { + lock.l_type = F_RDLCK; + pos++; + } else if (!strncmp(argv[pos], "write", 6)) { + lock.l_type = F_WRLCK; + pos++; + } + + if (!strncmp(argv[pos], "sleep", 6)) { + secs = atoi(argv[pos + 1]); + if (secs < 0 || secs > 10) { + fprintf(stderr, "Sleep argument is wrong: %s\n", + argv[pos + 1]); + return EXIT_FAILURE; + } + pos += 2; + } + + fd = open(argv[pos], O_RDWR); + if (fd < 0) { + fprintf(stderr, "Couldn't open file: %s\n", argv[pos]); + return EXIT_FAILURE; + } + + fprintf(stderr, "\nFLOCKS_TEST 5: %s %s flock\n", + setlk ? "SET" : getlk ? "GET" : "UNLOCK", + lock.l_type == F_WRLCK ? "write" : "read"); + + if (setlk) { + rc = t_fcntl(fd, F_SETLKW, &lock); + } else if (getlk) { + rc = t_fcntl(fd, F_GETLK, &lock); + } else if (unlk) { + lock.l_type = F_UNLCK; + rc = t_fcntl(fd, F_SETLKW, &lock); + } + + if (secs) + sleep(secs); + + close(fd); + return rc < 0 ? -rc : 0; + +} + /** ============================================================== * program entry */ @@ -470,7 +558,10 @@ int main(int argc, char* argv[]) case 4: rc = t4(argc, argv); break; - default: + case 5: + rc = t5(argc, argv); + break; + default: fprintf(stderr, "unknow test number %s\n", argv[1]); break; } diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 1d420f9..e679906 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1833,6 +1833,110 @@ test_112a() { } run_test 112a "bulk resend while orignal request is in progress" +# parameters: fail_loc CMD RC +test_120_reply() { + local PID + local PID2 + local rc=5 + local fail + + #define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320 + #define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321 + #define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 + #define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 + + echo + echo -n "** FLOCK REPLY vs. EVICTION race, lock $2" + [ "$1" = "CLEANUP" ] && + fail=0x80000320 && echo ", $1 cp first" + [ "$1" = "REPLY" ] && + fail=0x80000321 && echo ", $1 cp first" + [ "$1" = "DEADLOCK CLEANUP" ] && + fail=0x80000322 && echo " DEADLOCK, CLEANUP cp first" + [ "$1" = "DEADLOCK REPLY" ] && + fail=0x80000323 && echo " DEADLOCK, REPLY cp first" + + if [ x"$2" = x"get" ]; then + #for TEST lock, take a conflict in advance + # sleep longer than evictor to not confuse fail_loc: 2+2+4 + echo "** Taking conflict **" + flocks_test 5 set read sleep 10 $DIR/$tfile & + PID2=$! + + sleep 2 + fi + + $LCTL set_param fail_loc=$fail + + flocks_test 5 $2 write $DIR/$tfile & + PID=$! + + sleep 2 + echo "** Evicting and re-connecting client **" + mds_evict_client + + client_reconnect + + if [ x"$2" = x"get" ]; then + wait $PID2 + fi + + wait $PID + rc=$? + + # check if the return value is allowed + [ $rc -eq $3 ] && rc=0 + + $LCTL set_param fail_loc=0 + return $rc +} + +# a lock is taken, unlock vs. cleanup_resource() race for destroying +# the ORIGINAL lock. +test_120_destroy() +{ + local PID + + flocks_test 5 set write sleep 4 $DIR/$tfile & + PID=$! + sleep 2 + + # let unlock to sleep in CP CB + $LCTL set_param fail_loc=$1 + sleep 4 + + # let cleanup to cleep in CP CB + mds_evict_client + + client_reconnect + + wait $PID + rc=$? + + $LCTL set_param fail_loc=0 + return $rc +} + +test_120() { + flock_is_enabled || { skip "mount w/o flock enabled" && return; } + touch $DIR/$tfile + + test_120_reply "CLEANUP" set 5 || error "SET race failed" + test_120_reply "CLEANUP" get 5 || error "GET race failed" + test_120_reply "CLEANUP" unlock 5 || error "UNLOCK race failed" + + test_120_reply "REPLY" set 5 || error "SET race failed" + test_120_reply "REPLY" get 5 || error "GET race failed" + test_120_reply "REPLY" unlock 5 || error "UNLOCK race failed" + + # DEADLOCK tests + test_120_reply "DEADLOCK CLEANUP" set 5 || error "DEADLOCK race failed" + test_120_reply "DEADLOCK REPLY" set 35 || error "DEADLOCK race failed" + + test_120_destroy 0x320 || error "unlock-cleanup race failed" +} +run_test 120 "flock race: completion vs. evict" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 57fbe1a..e2034b8 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6748,7 +6748,7 @@ run_test 104b "$RUNAS lfs check servers test ====================" test_105a() { # doesn't work on 2.4 kernels touch $DIR/$tfile - if [ -n "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ]; then + if $(flock_is_enabled); then flocks_test 1 on -f $DIR/$tfile || error "fail flock on" else flocks_test 1 off -f $DIR/$tfile || error "fail flock off" @@ -6759,7 +6759,7 @@ run_test 105a "flock when mounted without -o flock test ========" test_105b() { touch $DIR/$tfile - if [ -n "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ]; then + if $(flock_is_enabled); then flocks_test 1 on -c $DIR/$tfile || error "fail flock on" else flocks_test 1 off -c $DIR/$tfile || error "fail flock off" @@ -6770,7 +6770,7 @@ run_test 105b "fcntl when mounted without -o flock test ========" test_105c() { touch $DIR/$tfile - if [ -n "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ]; then + if $(flock_is_enabled); then flocks_test 1 on -l $DIR/$tfile || error "fail flock on" else flocks_test 1 off -l $DIR/$tfile || error "fail flock off" @@ -6782,8 +6782,7 @@ run_test 105c "lockf when mounted without -o flock test ========" test_105d() { # bug 15924 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -p $DIR/$tdir - [ -z "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ] && - skip "mount w/o flock enabled" && return + flock_is_enabled || { skip "mount w/o flock enabled" && return; } #define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 $LCTL set_param fail_loc=0x80000315 flocks_test 2 $DIR/$tdir @@ -6791,8 +6790,7 @@ test_105d() { # bug 15924 run_test 105d "flock race (should not freeze) ========" test_105e() { # bug 22660 && 22040 - [ -z "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ] && - skip "mount w/o flock enabled" && return + flock_is_enabled || { skip "mount w/o flock enabled" && return; } touch $DIR/$tfile flocks_test 3 $DIR/$tfile } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index d6d1ee1..816078a 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2281,7 +2281,7 @@ client_evicted() { ! client_up $1 } -client_reconnect() { +client_reconnect_try() { uname -n >> $MOUNT/recon if [ -z "$CLIENTS" ]; then df $MOUNT; uname -n >> $MOUNT/recon @@ -2294,6 +2294,14 @@ client_reconnect() { rm $MOUNT/recon } +client_reconnect() { + # one client_reconnect_try call does not always do the job... + while true ; do + client_reconnect_try && break + sleep 1 + done +} + affected_facets () { local facet=$1 @@ -4397,6 +4405,13 @@ lru_resize_disable() lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size) } +flock_is_enabled() +{ + local RC=0 + [ -z "$(mount | grep "$MOUNT.*flock" | grep -v noflock)" ] && RC=1 + return $RC +} + pgcache_empty() { local FILE for FILE in `lctl get_param -N "llite.*.dump_page_cache"`; do