Whamcloud - gitweb
LU-10391 lnet: switch to large lnet_processid for matching
[fs/lustre-release.git] / lustre / ptlrpc / niobuf.c
index 9fde3ac..bd8f0d4 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
 #define DEBUG_SUBSYSTEM S_RPC
@@ -104,12 +103,15 @@ static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len,
        RETURN (0);
 }
 
-static void mdunlink_iterate_helper(struct lnet_handle_md *bd_mds, int count)
+#define mdunlink_iterate_helper(mds, count) \
+               __mdunlink_iterate_helper(mds, count, false) 
+static void __mdunlink_iterate_helper(struct lnet_handle_md *bd_mds,
+                                     int count, bool discard)
 {
        int i;
 
        for (i = 0; i < count; i++)
-               LNetMDUnlink(bd_mds[i]);
+               __LNetMDUnlink(bd_mds[i], discard);
 }
 
 #ifdef HAVE_SERVER_SUPPORT
@@ -285,7 +287,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
         * but we must still wait_event_idle_timeout() in this case, to give
         * us a chance to run server_bulk_callback()
         */
-       mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+       __mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw, true);
 
        for (;;) {
                /* Network access will complete in finite time but the HUGE
@@ -312,7 +314,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
 int ptlrpc_register_bulk(struct ptlrpc_request *req)
 {
        struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-       struct lnet_process_id peer;
+       struct lnet_processid peer;
        int rc = 0;
        int posted_md;
        int total_md;
@@ -342,7 +344,9 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
 
        desc->bd_failure = 0;
 
-       peer = desc->bd_import->imp_connection->c_peer;
+       peer.pid = desc->bd_import->imp_connection->c_peer.pid;
+       lnet_nid4_to_nid(desc->bd_import->imp_connection->c_peer.nid,
+                     &peer.nid);
 
        LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
        LASSERT(desc->bd_cbid.cbid_arg == desc);
@@ -378,7 +382,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                    OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_ATTACH)) {
                        rc = -ENOMEM;
                } else {
-                       me = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
+                       me = LNetMEAttach(desc->bd_portal, &peer, mbits, 0,
                                  LNET_UNLINK, LNET_INS_AFTER);
                        rc = PTR_ERR_OR_ZERO(me);
                }
@@ -397,7 +401,6 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                        CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
                               desc->bd_import->imp_obd->obd_name, mbits,
                               posted_md, rc);
-                       LNetMEUnlink(me);
                        break;
                }
        }
@@ -418,7 +421,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
        /* Holler if peer manages to touch buffers before he knows the mbits */
        if (desc->bd_refs != total_md)
                CWARN("%s: Peer %s touched %d buffers while I registered\n",
-                     desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
+                     desc->bd_import->imp_obd->obd_name, libcfs_idstr(&peer),
                      total_md - desc->bd_refs);
        spin_unlock(&desc->bd_lock);
 
@@ -643,7 +646,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
                          LNET_ACK_REQ : LNET_NOACK_REQ,
                          &rs->rs_cb_id, req->rq_self, req->rq_source,
                          ptlrpc_req2svc(req)->srv_rep_portal,
-                         req->rq_xid, req->rq_reply_off, NULL);
+                         req->rq_rep_mbits ? req->rq_rep_mbits : req->rq_xid,
+                         req->rq_reply_off, NULL);
 out:
         if (unlikely(rc != 0))
                 ptlrpc_req_drop_rs(req);
@@ -700,8 +704,11 @@ int ptlrpc_error(struct ptlrpc_request *req)
 int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 {
        int rc;
+       __u32 opc;
        int mpflag = 0;
+       bool rep_mbits = false;
        struct lnet_handle_md bulk_cookie;
+       struct lnet_processid peer;
        struct ptlrpc_connection *connection;
        struct lnet_me *reply_me = NULL;
        struct lnet_md reply_md;
@@ -711,28 +718,34 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 
        LNetInvalidateMDHandle(&bulk_cookie);
 
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
-                RETURN(0);
+       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
+               RETURN(0);
 
-        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
-        LASSERT(request->rq_wait_ctx == 0);
+       if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DELAY_RECOV) &&
+                    lustre_msg_get_opc(request->rq_reqmsg) == MDS_CONNECT &&
+                    strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME) == 0)) {
+               RETURN(0);
+       }
 
-        /* If this is a re-transmit, we're required to have disengaged
-         * cleanly from the previous attempt */
-        LASSERT(!request->rq_receiving_reply);
+       LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
+       LASSERT(request->rq_wait_ctx == 0);
+
+       /* If this is a re-transmit, we're required to have disengaged
+        * cleanly from the previous attempt */
+       LASSERT(!request->rq_receiving_reply);
        LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
-               (imp->imp_state == LUSTRE_IMP_FULL)));
+                 (imp->imp_state == LUSTRE_IMP_FULL)));
 
        if (unlikely(obd != NULL && obd->obd_fail)) {
                CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
-                       obd->obd_name);
+                      obd->obd_name);
                /* this prevents us from waiting in ptlrpc_queue_wait */
                spin_lock(&request->rq_lock);
                request->rq_err = 1;
                spin_unlock(&request->rq_lock);
-                request->rq_status = -ENODEV;
-                RETURN(-ENODEV);
-        }
+               request->rq_status = -ENODEV;
+               RETURN(-ENODEV);
+       }
 
        connection = imp->imp_connection;
 
@@ -762,8 +775,14 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                          "Allocating new XID for resend on EINPROGRESS");
        }
 
-       if (request->rq_bulk != NULL) {
-               ptlrpc_set_bulk_mbits(request);
+       opc = lustre_msg_get_opc(request->rq_reqmsg);
+       if (opc != OST_CONNECT && opc != MDS_CONNECT &&
+           opc != MGS_CONNECT && OCD_HAS_FLAG(&imp->imp_connect_data, FLAGS2))
+               rep_mbits = imp->imp_connect_data.ocd_connect_flags2 &
+                       OBD_CONNECT2_REP_MBITS;
+
+       if ((request->rq_bulk != NULL) || rep_mbits) {
+               ptlrpc_set_mbits(request);
                lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
        }
 
@@ -781,7 +800,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
        LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
                (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
                !(imp->imp_connect_data.ocd_connect_flags &
-               OBD_CONNECT_AT));
+                 OBD_CONNECT_AT));
 
        if (request->rq_resend) {
                lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
@@ -808,16 +827,16 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                bulk_cookie = request->rq_bulk->bd_mds[0];
        }
 
-        if (!noreply) {
-                LASSERT (request->rq_replen != 0);
-                if (request->rq_repbuf == NULL) {
-                        LASSERT(request->rq_repdata == NULL);
-                        LASSERT(request->rq_repmsg == NULL);
-                        rc = sptlrpc_cli_alloc_repbuf(request,
-                                                      request->rq_replen);
-                        if (rc) {
-                                /* this prevents us from looping in
-                                 * ptlrpc_queue_wait */
+       if (!noreply) {
+               LASSERT (request->rq_replen != 0);
+               if (request->rq_repbuf == NULL) {
+                       LASSERT(request->rq_repdata == NULL);
+                       LASSERT(request->rq_repmsg == NULL);
+                       rc = sptlrpc_cli_alloc_repbuf(request,
+                                                     request->rq_replen);
+                       if (rc) {
+                               /* this prevents us from looping in
+                                * ptlrpc_queue_wait */
                                spin_lock(&request->rq_lock);
                                request->rq_err = 1;
                                spin_unlock(&request->rq_lock);
@@ -829,14 +848,17 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                        request->rq_repmsg = NULL;
                }
 
+               peer.pid = connection->c_peer.pid;
+               lnet_nid4_to_nid(connection->c_peer.nid, &peer.nid);
                if (request->rq_bulk &&
                    OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH)) {
                        reply_me = ERR_PTR(-ENOMEM);
                } else {
                        reply_me = LNetMEAttach(request->rq_reply_portal,
-                                               connection->c_peer,
-                                               request->rq_xid, 0,
-                                               LNET_UNLINK, LNET_INS_AFTER);
+                                               &peer,
+                                               rep_mbits ? request->rq_mbits :
+                                               request->rq_xid,
+                                               0, LNET_UNLINK, LNET_INS_AFTER);
                }
 
                if (IS_ERR(reply_me)) {
@@ -853,24 +875,24 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
        request->rq_receiving_reply = !noreply;
        /* Clear any flags that may be present from previous sends. */
        request->rq_req_unlinked = 0;
-        request->rq_replied = 0;
-        request->rq_err = 0;
-        request->rq_timedout = 0;
-        request->rq_net_err = 0;
-        request->rq_resend = 0;
-        request->rq_restart = 0;
+       request->rq_replied = 0;
+       request->rq_err = 0;
+       request->rq_timedout = 0;
+       request->rq_net_err = 0;
+       request->rq_resend = 0;
+       request->rq_restart = 0;
        request->rq_reply_truncated = 0;
        spin_unlock(&request->rq_lock);
 
-        if (!noreply) {
-                reply_md.start     = request->rq_repbuf;
-                reply_md.length    = request->rq_repbuf_len;
-                /* Allow multiple early replies */
-                reply_md.threshold = LNET_MD_THRESH_INF;
-                /* Manage remote for early replies */
-                reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
-                        LNET_MD_MANAGE_REMOTE |
-                        LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
+       if (!noreply) {
+               reply_md.start     = request->rq_repbuf;
+               reply_md.length    = request->rq_repbuf_len;
+               /* Allow multiple early replies */
+               reply_md.threshold = LNET_MD_THRESH_INF;
+               /* Manage remote for early replies */
+               reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
+                       LNET_MD_MANAGE_REMOTE |
+                       LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
                reply_md.user_ptr  = &request->rq_reply_cbid;
                reply_md.handler = ptlrpc_handler;
 
@@ -885,7 +907,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                        /* ...but the MD attach didn't succeed... */
                        request->rq_receiving_reply = 0;
                        spin_unlock(&request->rq_lock);
-                       GOTO(cleanup_me, rc = -ENOMEM);
+                       GOTO(cleanup_bulk, rc = -ENOMEM);
                }
                percpu_ref_get(&ptlrpc_pending);
 
@@ -895,22 +917,21 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                       request->rq_reply_portal);
        }
 
-        /* add references on request for request_out_callback */
-        ptlrpc_request_addref(request);
+       /* add references on request for request_out_callback */
+       ptlrpc_request_addref(request);
        if (obd != NULL && obd->obd_svc_stats != NULL)
                lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
-                       atomic_read(&imp->imp_inflight));
+                                   atomic_read(&imp->imp_inflight));
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
        request->rq_sent_ns = ktime_get_real();
        request->rq_sent = ktime_get_real_seconds();
        /* We give the server rq_timeout secs to process the req, and
-          add the network latency for our local timeout. */
-        request->rq_deadline = request->rq_sent + request->rq_timeout +
-                ptlrpc_at_get_net_latency(request);
-
-       ptlrpc_pinger_sending_on_import(imp);
+        * add the network latency for our local timeout.
+        */
+       request->rq_deadline = request->rq_sent + request->rq_timeout +
+               ptlrpc_at_get_net_latency(request);
 
        DEBUG_REQ(D_INFO, request, "send flags=%x",
                  lustre_msg_get_flags(request->rq_reqmsg));
@@ -924,16 +945,12 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                GOTO(out, rc);
 
        request->rq_req_unlinked = 1;
-        ptlrpc_req_finished(request);
-        if (noreply)
-                GOTO(out, rc);
-
- cleanup_me:
-       /* MEUnlink is safe; the PUT didn't even get off the ground, and
-        * nobody apart from the PUT's target has the right nid+XID to
-        * access the reply buffer.
-        */
-       LNetMEUnlink(reply_me);
+       ptlrpc_req_finished(request);
+       if (noreply)
+               GOTO(out, rc);
+
+       LNetMDUnlink(request->rq_reply_md_h);
+
        /* UNLINKED callback called synchronously */
        LASSERT(!request->rq_receiving_reply);
 
@@ -961,8 +978,8 @@ EXPORT_SYMBOL(ptl_send_rpc);
 int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
 {
        struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
-       static struct lnet_process_id match_id = {
-               .nid = LNET_NID_ANY,
+       static struct lnet_processid match_id = {
+               .nid = LNET_ANY_NID,
                .pid = LNET_PID_ANY
        };
        int rc;
@@ -979,7 +996,7 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
         * which means buffer can only be attached on local CPT, and LND
         * threads can find it by grabbing a local lock */
        me = LNetMEAttach(service->srv_req_portal,
-                         match_id, 0, ~0, LNET_UNLINK,
+                         &match_id, 0, ~0, LNET_UNLINK,
                          rqbd->rqbd_svcpt->scp_cpt >= 0 ?
                          LNET_INS_LOCAL : LNET_INS_AFTER);
        if (IS_ERR(me)) {
@@ -1006,7 +1023,6 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
 
        CERROR("ptlrpc: LNetMDAttach failed: rc = %d\n", rc);
        LASSERT(rc == -ENOMEM);
-       LNetMEUnlink(me);
        LASSERT(rc == 0);
        rqbd->rqbd_refcount = 0;