From 6b566e308fb75a1c0704a7f462e8b3e62aec5151 Mon Sep 17 00:00:00 2001 From: green Date: Thu, 5 Jan 2006 21:08:55 +0000 Subject: [PATCH] b=7313 r=adilger Latest version of reply-less lock callbacks patch with proper replay support - for testing by Cray. --- lustre/include/linux/lustre_net.h | 3 +- lustre/ldlm/ldlm_lockd.c | 4 +- lustre/ldlm/ldlm_request.c | 32 +++++++-- lustre/ptlrpc/client.c | 6 +- lustre/ptlrpc/niobuf.c | 138 +++++++++++++------------------------- lustre/ptlrpc/pinger.c | 2 +- lustre/ptlrpc/ptlrpc_module.c | 1 - 7 files changed, 80 insertions(+), 106 deletions(-) diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 783d2ba..b284713 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -583,8 +583,7 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int); int ptlrpc_reply(struct ptlrpc_request *req); int ptlrpc_error(struct ptlrpc_request *req); void ptlrpc_resend_req(struct ptlrpc_request *request); -int ptl_send_rpc(struct ptlrpc_request *request); -int ptl_send_rpc_nowait(struct ptlrpc_request *request); +int ptl_send_rpc(struct ptlrpc_request *request, int noreply); int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd); /* ptlrpc/client.c */ diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index a9def4b..70ca64d 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -489,7 +489,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, req->rq_send_state = LUSTRE_IMP_FULL; req->rq_timeout = ldlm_timeout; /* timeout for initial AST reply */ if (unlikely(instant_cancel)) { - rc = ptl_send_rpc_nowait(req); + rc = ptl_send_rpc(req, 1); } else { rc = ptlrpc_queue_wait(req); } @@ -821,6 +821,8 @@ existing_lock: else if (lock->l_granted_mode == lock->l_req_mode) ldlm_add_waiting_lock(lock); } + /* Make sure we never ever grant usual metadata locks to liblustre + clients */ if ((dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN || dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) && req->rq_export->exp_libclient) { diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 0aab832..3a527fd 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -705,11 +705,18 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) rc = ptlrpc_queue_wait(req); if (rc == ESTALE) { - CERROR("client/server (nid %s) out of sync" - " -- not fatal, flags %d\n", - libcfs_nid2str(req->rq_import-> - imp_connection->c_peer.nid), - lock->l_flags); + /* For PLAIN (inodebits) locks on liblustre clients + this is a valid race between us cancelling a lock + from lru and sending notification and server + cancelling our lock at the same time */ +#ifndef __KERNEL__ + if (lock->l_resource->lr_type != LDLM_PLAIN /* IBITS */) +#endif + CERROR("client/server (nid %s) out of sync" + " -- not fatal, flags %d\n", + libcfs_nid2str(req->rq_import-> + imp_connection->c_peer.nid), + lock->l_flags); } else if (rc == -ETIMEDOUT) { ptlrpc_req_finished(req); GOTO(restart, rc); @@ -764,6 +771,13 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) { LASSERT(!lock->l_readers && !lock->l_writers); + /* If we have chosen to canecl this lock voluntarily, we better + send cancel notification to server, so that it frees + appropriate state. This might lead to a race where while + we are doing cancel here, server is also silently + cancelling this lock. */ + lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK; + /* Setting the CBPENDING flag is a little misleading, but * prevents an important race; namely, once CBPENDING is set, * the lock can accumulate no more readers/writers. Since @@ -1128,6 +1142,14 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int size[2]; int flags; + /* If this is reply-less callback lock, we cannot replay it, since + * server might have long dropped it, but notification of that event was + * lost by network. (and server granted conflicting lock already) */ + if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { + LDLM_DEBUG(lock, "Not replaying reply-less lock:"); + ldlm_lock_cancel(lock); + RETURN(0); + } /* * If granted mode matches the requested mode, this lock is granted. * diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index e8a669e..488ad13 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -713,7 +713,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) libcfs_nid2str(imp->imp_connection->c_peer.nid), req->rq_reqmsg->opc); - rc = ptl_send_rpc(req); + rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); req->rq_net_err = 1; @@ -844,7 +844,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } } - rc = ptl_send_rpc(req); + rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d)", rc); @@ -1561,7 +1561,7 @@ restart: list_add_tail(&req->rq_list, &imp->imp_sending_list); spin_unlock_irqrestore(&imp->imp_lock, flags); - rc = ptl_send_rpc(req); + rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc); timeout = 1; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index c63c971..cfac594 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -381,63 +381,7 @@ int ptlrpc_error(struct ptlrpc_request *req) RETURN(rc); } -int ptl_send_rpc_nowait(struct ptlrpc_request *request) -{ - int rc; - struct ptlrpc_connection *connection; - unsigned long flags; - ENTRY; - - LASSERT (request->rq_type == PTL_RPC_MSG_REQUEST); - - if (request->rq_import->imp_obd && - request->rq_import->imp_obd->obd_fail) { - CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", - request->rq_import->imp_obd->obd_name); - /* this prevents us from waiting in ptlrpc_queue_wait */ - request->rq_err = 1; - RETURN(-ENODEV); - } - - connection = request->rq_import->imp_connection; - - request->rq_reqmsg->handle = request->rq_import->imp_remote_handle; - request->rq_reqmsg->type = PTL_RPC_MSG_REQUEST; - request->rq_reqmsg->conn_cnt = request->rq_import->imp_conn_cnt; - - spin_lock_irqsave (&request->rq_lock, flags); - /* If the MD attach succeeds, there _will_ be a reply_in callback */ - request->rq_receiving_reply = 0; - /* Clear any flags that may be present from previous sends. */ - request->rq_replied = 0; - request->rq_err = 0; - request->rq_timedout = 0; - request->rq_net_err = 0; - request->rq_resend = 0; - request->rq_restart = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - - ptlrpc_request_addref(request); /* +1 ref for the SENT callback */ - - request->rq_sent = CURRENT_SECONDS; - ptlrpc_pinger_sending_on_import(request->rq_import); - rc = ptl_send_buf(&request->rq_req_md_h, - request->rq_reqmsg, request->rq_reqlen, - LNET_NOACK_REQ, &request->rq_req_cbid, - connection, - request->rq_request_portal, - request->rq_xid); - if (rc == 0) { - ptlrpc_lprocfs_rpc_sent(request); - } else { - ptlrpc_req_finished (request); /* drop callback ref */ - } - - return rc; -} - - -int ptl_send_rpc(struct ptlrpc_request *request) +int ptl_send_rpc(struct ptlrpc_request *request, int noreply) { int rc; int rc2; @@ -476,24 +420,26 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_reqmsg->type = PTL_RPC_MSG_REQUEST; request->rq_reqmsg->conn_cnt = request->rq_import->imp_conn_cnt; - LASSERT (request->rq_replen != 0); - if (request->rq_repmsg == NULL) - OBD_ALLOC(request->rq_repmsg, request->rq_replen); - if (request->rq_repmsg == NULL) - GOTO(cleanup_bulk, rc = -ENOMEM); - - rc = LNetMEAttach(request->rq_reply_portal, /* XXX FIXME bug 249 */ - connection->c_peer, request->rq_xid, 0, - LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); - if (rc != 0) { - CERROR("LNetMEAttach failed: %d\n", rc); - LASSERT (rc == -ENOMEM); - GOTO(cleanup_repmsg, rc = -ENOMEM); + if (!noreply) { + LASSERT (request->rq_replen != 0); + if (request->rq_repmsg == NULL) + OBD_ALLOC(request->rq_repmsg, request->rq_replen); + if (request->rq_repmsg == NULL) + GOTO(cleanup_bulk, rc = -ENOMEM); + + rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ + connection->c_peer, request->rq_xid, 0, + LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); + GOTO(cleanup_repmsg, rc = -ENOMEM); + } } spin_lock_irqsave (&request->rq_lock, flags); /* If the MD attach succeeds, there _will_ be a reply_in callback */ - request->rq_receiving_reply = 1; + request->rq_receiving_reply = !noreply; /* Clear any flags that may be present from previous sends. */ request->rq_replied = 0; request->rq_err = 0; @@ -503,30 +449,32 @@ int ptl_send_rpc(struct ptlrpc_request *request) request->rq_restart = 0; spin_unlock_irqrestore (&request->rq_lock, flags); - reply_md.start = request->rq_repmsg; - reply_md.length = request->rq_replen; - reply_md.threshold = 1; - reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT; - reply_md.user_ptr = &request->rq_reply_cbid; - reply_md.eq_handle = ptlrpc_eq_h; - - rc = LNetMDAttach(reply_me_h, reply_md, LNET_UNLINK, - &request->rq_reply_md_h); - if (rc != 0) { - CERROR("LNetMDAttach failed: %d\n", rc); - LASSERT (rc == -ENOMEM); - spin_lock_irqsave (&request->rq_lock, flags); - /* ...but the MD attach didn't succeed... */ - request->rq_receiving_reply = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - GOTO(cleanup_me, rc -ENOMEM); + if (!noreply) { + reply_md.start = request->rq_repmsg; + reply_md.length = request->rq_replen; + reply_md.threshold = 1; + reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT; + reply_md.user_ptr = &request->rq_reply_cbid; + reply_md.eq_handle = ptlrpc_eq_h; + + rc = LNetMDAttach(reply_me_h, reply_md, LNET_UNLINK, + &request->rq_reply_md_h); + if (rc != 0) { + CERROR("LNetMDAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); + spin_lock_irqsave (&request->rq_lock, flags); + /* ...but the MD attach didn't succeed... */ + request->rq_receiving_reply = 0; + spin_unlock_irqrestore (&request->rq_lock, flags); + GOTO(cleanup_me, rc -ENOMEM); + } + + CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 + ", portal %u\n", + request->rq_replen, request->rq_xid, + request->rq_reply_portal); } - CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 - ", portal %u\n", - request->rq_replen, request->rq_xid, - request->rq_reply_portal); - ptlrpc_request_addref(request); /* +1 ref for the SENT callback */ request->rq_sent = CURRENT_SECONDS; @@ -544,6 +492,10 @@ int ptl_send_rpc(struct ptlrpc_request *request) ptlrpc_req_finished (request); /* drop callback ref */ + if (noreply) + RETURN(rc); + else + GOTO(cleanup_me, rc); cleanup_me: /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 3844c6d..2aaef4b 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -416,7 +416,7 @@ static int pinger_check_rpcs(void *arg) DEBUG_REQ(D_HA, req, "pinging %s->%s", req->rq_import->imp_obd->obd_uuid.uuid, req->rq_import->imp_target_uuid.uuid); - (void)ptl_send_rpc(req); + (void)ptl_send_rpc(req, 0); } do_check_set: diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index de70140..54c4cef 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -119,7 +119,6 @@ EXPORT_SYMBOL(ptlrpc_reply); EXPORT_SYMBOL(ptlrpc_error); EXPORT_SYMBOL(ptlrpc_resend_req); EXPORT_SYMBOL(ptl_send_rpc); -EXPORT_SYMBOL(ptl_send_rpc_nowait); /* client.c */ EXPORT_SYMBOL(ptlrpc_init_client); -- 1.8.3.1