* is: res lock -> exp_bl_list_lock -> wanting_lists_spinlock.
*/
struct list_head l_exp_list;
+
+ /**
+ * Used internally to trace in-flight blocking ASTs of this lock
+ * There's no locking, so to pick items from here have to use atomic
+ * exchange primitives
+ */
+ struct ptlrpc_request *l_sent_bl_ast;
};
enum ldlm_match_flags {
struct ptlrpc_request *req,
void *arg, int rc);
/** Type of request resend call-back */
-typedef void (*ptlrpc_resend_cb_t)(struct ptlrpc_request *req,
- void *arg);
+typedef int (*ptlrpc_resend_cb_t)(struct ptlrpc_request *req, void *arg);
/**
* Definition of request pool structure.
void ptlrpc_abort_inflight(struct obd_import *imp);
void ptlrpc_cleanup_imp(struct obd_import *imp);
void ptlrpc_abort_set(struct ptlrpc_request_set *set);
+void ptlrpc_req_abort(struct ptlrpc_request *req);
struct ptlrpc_request_set *ptlrpc_prep_set(void);
struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
if (resource == NULL) {
libcfs_debug_msg(msgdata,
- "%pV ns: \?\? lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: \?\? rrc=\?\? type: \?\?\? flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
+ "%pV ns: \?\? lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: \?\? rrc=\?\? type: \?\?\? flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d/%pK\n",
&vaf,
lock,
lock->l_handle.h_cookie,
lock->l_remote_handle.cookie,
exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
lock->l_pid, lock->l_callback_timestamp,
- lock->l_lvb_type);
+ lock->l_lvb_type, lock->l_sent_bl_ast);
va_end(args);
return;
}
switch (resource->lr_type) {
case LDLM_EXTENT:
libcfs_debug_msg(msgdata,
- "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s [%llu->%llu] (req %llu->%llu) gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
+ "%pV ns: %s lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s [%llu->%llu] (req %llu->%llu) gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d/%pK\n",
&vaf,
ldlm_lock_to_ns_name(lock), lock,
lock->l_handle.h_cookie,
lock->l_remote_handle.cookie,
exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
lock->l_pid, lock->l_callback_timestamp,
- lock->l_lvb_type);
+ lock->l_lvb_type, lock->l_sent_bl_ast);
break;
case LDLM_FLOCK:
libcfs_debug_msg(msgdata,
- "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s pid: %d [%llu->%llu] flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld\n",
+ "%pV ns: %s lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s pid: %d [%llu->%llu] flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld\n",
&vaf,
ldlm_lock_to_ns_name(lock), lock,
lock->l_handle.h_cookie,
case LDLM_IBITS:
if (!lock->l_remote_handle.cookie)
libcfs_debug_msg(msgdata,
- "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s flags: %#llx pid: %u initiator: MDT%d\n",
+ "%pV ns: %s lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s flags: %#llx pid: %u initiator: MDT%04x, lvb_type: %d/%pK\n",
&vaf,
ldlm_lock_to_ns_name(lock),
lock, lock->l_handle.h_cookie,
atomic_read(&resource->lr_refcount),
ldlm_typename[resource->lr_type],
lock->l_flags, lock->l_pid,
- lock->l_policy_data.l_inodebits.li_initiator_id);
+ lock->l_policy_data.l_inodebits.li_initiator_id,
+ lock->l_lvb_type, lock->l_sent_bl_ast);
else
libcfs_debug_msg(msgdata,
- "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
+ "%pV ns: %s lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d/%pK\n",
&vaf,
ldlm_lock_to_ns_name(lock),
lock, lock->l_handle.h_cookie,
lock->l_remote_handle.cookie,
exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
lock->l_pid, lock->l_callback_timestamp,
- lock->l_lvb_type);
+ lock->l_lvb_type, lock->l_sent_bl_ast);
break;
default:
libcfs_debug_msg(msgdata,
- "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
+ "%pV ns: %s lock: %pK/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d/%pK\n",
&vaf,
ldlm_lock_to_ns_name(lock),
lock, lock->l_handle.h_cookie,
lock->l_remote_handle.cookie,
exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
lock->l_pid, lock->l_callback_timestamp,
- lock->l_lvb_type);
+ lock->l_lvb_type, lock->l_sent_bl_ast);
break;
}
va_end(args);
{
struct lnet_processid *peer = &req->rq_import->imp_connection->c_peer;
+ /* This AST was cancelled by lock cancel/convert */
+ if (rc == -ECANCELED)
+ return rc;
+
if (!req->rq_replied || (rc && rc != -EINVAL)) {
if (ldlm_is_cancel(lock)) {
LDLM_DEBUG(lock,
}
break;
case LDLM_BL_CALLBACK:
+ /* if nobody grabbed our linked request, decref to match
+ * acquisition.
+ * Three cases are possible here:
+ * - l_sent_bl_ast is equal to our request - we'll free it
+ * - it's equal to NULL - so cancel / convert thread grabbed it
+ * - it's not NULL, but not equal to req - this is a convert
+ * race, where lock convert came, and by the time we got here,
+ * the lock got another BL AST that got recorded here.
+ *
+ * This 3rd case is the most annoying handling wise. Once we
+ * acquired this request with atomic exchange there's no safe
+ * way to put it back witohut risking the other thread missing
+ * it so we have to release it here anyway and bring the
+ * original problem back - if the other AST won't be replied
+ * in time, the other blocked thread would wait for a long time.
+ *
+ * To minimize this window, we'll do an unsafe check first,
+ * it's ok to check sent_bl_ast for both NULL and non-req value
+ * because once those values were seen, they cannot magically
+ * return to the req value. Either way whatever we got we must
+ * free if not NULL.
+ */
+ if (lock->l_sent_bl_ast != NULL && lock->l_sent_bl_ast == req) {
+ struct ptlrpc_request *saved_req;
+
+ saved_req = xchg_acquire(&lock->l_sent_bl_ast, NULL);
+ if (saved_req)
+ ptlrpc_req_finished(saved_req);
+ }
+
if (rc != 0)
rc = ldlm_handle_ast_error(lock, req, rc, "blocking");
break;
RETURN(0);
}
-static void ldlm_update_resend(struct ptlrpc_request *req, void *data)
+static int ldlm_update_resend(struct ptlrpc_request *req, void *data)
{
struct ldlm_cb_async_args *ca = data;
struct ldlm_lock *lock = ca->ca_lock;
+ /* See if the lock was already canceled, if so, don't do any
+ * resends, I am afraid to use lock_is_granted because what if
+ * we've got a lock convert? Or is it also succeptible to this kind
+ * of problems?
+ */
+ if (lock->l_granted_mode == LCK_NL) {
+ req->rq_delay_limit = 1;
+ return -ESTALE;
+ }
+
ldlm_refresh_waiting_lock(lock, ldlm_bl_timeout(lock));
+ return 0;
}
static inline int ldlm_ast_fini(struct ptlrpc_request *req,
/* Do not resend after lock callback timeout */
req->rq_delay_limit = ldlm_bl_timeout(lock);
req->rq_resend_cb = ldlm_update_resend;
+
+ /* We remember the request here so we can kill the request
+ * if the lock cancel/conversion comes before AST reply.
+ * For simplicity we only do this for blocking AST because
+ * there could be only one, so we don't need to do
+ * the whole list + locking mess.
+ * It does not help that there's no lock-specific spinlocks
+ * in struct ldlm_lock.
+ * To safely use this stored RPC caller pretty much has to
+ * use atomic exchange with NULL to avoid situation of two
+ * threads working on the same RPC.
+ */
+ LASSERT(!lock->l_sent_bl_ast);
+ ptlrpc_request_addref(req);
+ lock->l_sent_bl_ast = req;
}
req->rq_send_state = LUSTRE_IMP_FULL;
struct obd_export *exp = req->rq_export;
struct ldlm_reply *dlm_rep;
struct ldlm_lock *lock;
+ struct ptlrpc_request *ast_req;
__u64 bits;
__u64 new_bits;
int rc;
GOTO(out_put, rc = -EPROTO);
}
+ /* Grab AST RPC for later abort */
+ ast_req = xchg_acquire(&lock->l_sent_bl_ast, NULL);
if (bits == new_bits) {
/*
* This can be valid situation if CONVERT RPCs are
ldlm_reprocess_all(lock->l_resource, bits);
}
+ /* See if there's an oustanding AST RPC for this lock
+ * and if yes - kill it.
+ */
+ if (ast_req) {
+ ptlrpc_req_abort(ast_req);
+ ptlrpc_req_finished(ast_req);
+ }
+
dlm_rep->lock_handle = lock->l_remote_handle;
ldlm_ibits_policy_local_to_wire(&lock->l_policy_data,
&dlm_rep->lock_desc.l_policy_data);
struct ldlm_lock *lock;
int i, count, done = 0;
unsigned int size;
+ struct ptlrpc_request *ast_req;
ENTRY;
&lock->l_export->exp_bl_lock_at,
delay);
}
+
+ /* We grab our reference early to combat races */
+ ast_req = xchg_acquire(&lock->l_sent_bl_ast, NULL);
+
ldlm_lock_cancel(lock);
+ /* See if there's an oustanding AST RPC for this lock
+ * and if yes - kill it.
+ */
+ if (ast_req) {
+ ptlrpc_req_abort(ast_req);
+ ptlrpc_req_finished(ast_req);
+ }
+
LDLM_LOCK_PUT(lock);
}
if (pres != NULL) {
}
}
+void ptlrpc_req_abort(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_err = 1;
+ req->rq_status = -ECANCELED;
+
+ if (req->rq_phase != RQ_PHASE_RPC) {
+ spin_unlock(&req->rq_lock);
+ return;
+ }
+ ptlrpc_client_wake_req(req);
+ spin_unlock(&req->rq_lock);
+}
+
/**
* Initialize the XID for the node. This is common among all requests on
* this node, and only requires the property that it is monotonically
}
if (request->rq_resend) {
lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
- if (request->rq_resend_cb != NULL)
- request->rq_resend_cb(request, &request->rq_async_args);
+ if (request->rq_resend_cb != NULL) {
+ rc = request->rq_resend_cb(request,
+ &request->rq_async_args);
+ if (rc)
+ return rc;
+ }
}
if (request->rq_memalloc)
mpflag = memalloc_noreclaim_save();