#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311
#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314
#define OBD_FAIL_OSC 0x400
#define OBD_FAIL_OSC_BRW_READ_BULK 0x401
unlock_res(res);
rc = ldlm_run_bl_ast_work(&rpc_list);
lock_res(res);
- if (rc == -ERESTART)
+
+ if (rc == -ERESTART) {
+ /* lock was granted while resource was unlocked. */
+ if (lock->l_granted_mode == lock->l_req_mode) {
+ /* bug 11300: if the lock has been granted,
+ * break earlier because otherwise, we will go
+ * to restart and ldlm_resource_unlink will be
+ * called and it causes the interval node to be
+ * freed. Then we will fail at
+ * ldlm_extent_add_lock() */
+ *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
+ LDLM_FL_BLOCK_WAIT);
+ GOTO(out, rc = 0);
+ }
+
GOTO(restart, -ERESTART);
+ }
+
*flags |= LDLM_FL_BLOCK_GRANTED;
/* this way we force client to wait for the lock
* endlessly once the lock is enqueued -bzzz */
int local = ns_is_client(res->lr_namespace);
ldlm_processing_policy policy;
ldlm_error_t rc = ELDLM_OK;
+ struct ldlm_interval *node = NULL;
ENTRY;
do_gettimeofday(&lock->l_enqueued_time);
}
}
+ /* For a replaying lock, it might be already in granted list. So
+ * unlinking the lock will cause the interval node to be freed, we
+ * have to allocate the interval node early otherwise we can't regrant
+ * this lock in the future. - jay */
+ if (!local && (*flags & LDLM_FL_REPLAY) && res->lr_type == LDLM_EXTENT)
+ OBD_SLAB_ALLOC(node, ldlm_interval_slab, CFS_ALLOC_IO,
+ sizeof(*node));
+
lock_res_and_lock(lock);
if (local && lock->l_req_mode == lock->l_granted_mode) {
- /* The server returned a blocked lock, but it was granted before
- * we got a chance to actually enqueue it. We don't need to do
- * anything else. */
+ /* The server returned a blocked lock, but it was granted
+ * before we got a chance to actually enqueue it. We don't
+ * need to do anything else. */
*flags &= ~(LDLM_FL_BLOCK_GRANTED |
LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
GOTO(out, ELDLM_OK);
}
+ ldlm_resource_unlink_lock(lock);
+ if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
+ if (node == NULL) {
+ ldlm_lock_destroy_nolock(lock);
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ ldlm_interval_attach(node, lock);
+ node = NULL;
+ }
+
/* Some flags from the enqueue want to make it into the AST, via the
* lock's l_flags. */
lock->l_flags |= *flags & LDLM_AST_DISCARD_DATA;
*
* FIXME (bug 268): Detect obvious lies by checking compatibility in
* granted/converting queues. */
- ldlm_resource_unlink_lock(lock);
if (local) {
if (*flags & LDLM_FL_BLOCK_CONV)
ldlm_resource_add_lock(res, &res->lr_converting, lock);
GOTO(out, rc);
out:
unlock_res_and_lock(lock);
+ if (node)
+ OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
return rc;
}
LDLM_DEBUG(lock, "client completion callback handler START");
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
+ int to = cfs_time_seconds(1);
+ while (to > 0) {
+ to = schedule_timeout(to);
+ if (lock->l_granted_mode == lock->l_req_mode ||
+ lock->l_destroyed)
+ break;
+ }
+ }
+
lock_res_and_lock(lock);
+ if (lock->l_destroyed ||
+ lock->l_granted_mode == lock->l_req_mode) {
+ /* bug 11300: the lock has already been granted */
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "Double grant race happened");
+ LDLM_LOCK_PUT(lock);
+ EXIT;
+ return;
+ }
/* If we receive the completion AST before the actual enqueue returned,
* then we might need to switch lock modes, resources, or extents. */
RETURN (0);
}
+ /* Force a known safe race, send a cancel to the server for a lock
+ * which the server has already started a blocking callback on. */
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
+ rc = ldlm_cli_cancel(&dlm_req->lock_handle[0]);
+ if (rc < 0)
+ CERROR("ldlm_cli_cancel: %d\n", rc);
+ }
+
lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle[0]);
if (!lock) {
CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock "
run_test 30 "recreate file race ========="
+test_31() {
+ mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir"
+ writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 |
+ awk 'BEGIN { FS="+" } /out/ {print $1}'`
+ #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314
+ sysctl -w lustre.fail_loc=0x314
+ reads=`LANG=C dd if=$DIR2/$tdir/$tfile of=/dev/null 2>&1 |
+ awk 'BEGIN { FS="+" } /in/ {print $1}'`
+ [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes
+}
+run_test 31 "voluntary cancel / blocking ast race=============="
+
log "cleanup: ======================================================"
check_and_cleanup_lustre