Whamcloud - gitweb
LU-10157 ptlrpc: separate number MD and refrences for bulk 75/41275/3
authorAlexey Lyashkov <c17817@cray.com>
Wed, 20 Jan 2021 01:39:27 +0000 (20:39 -0500)
committerOleg Drokin <green@whamcloud.com>
Thu, 4 Mar 2021 08:36:13 +0000 (08:36 +0000)
Introduce a bulk desc refs, it's different from MD's count ptlrpc
expects to have events from all MD's even it's filled or not. So,
number an MD's to post is related to the requested transfer size,
not a number MD's with data.

Lustre-change: https://review.whamcloud.com/37386/
Lustre-commit: 8a7f2d4b11801eae4c91904da9f9750a012a6b11

Cray-bug-id: LUS-8139
Signed-off-by: Alexey Lyashkov <c17817@cray.com>
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: Ic7d62c5c8d30fd6b681853a65429394ed2f122f2
Reviewed-on: https://review.whamcloud.com/41275
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c

index 47b229b..b47cc9d 100644 (file)
@@ -1436,6 +1436,8 @@ extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kvec_ops;
  *  Another user is readpage for MDT.
  */
 struct ptlrpc_bulk_desc {
+       /** number MD's assigned including zero-sends */
+       unsigned int bd_refs;
        /** completed with failure */
        unsigned long bd_failure:1;
        /** client side */
@@ -2069,7 +2071,7 @@ static inline int ptlrpc_server_bulk_active(struct ptlrpc_bulk_desc *desc)
        LASSERT(desc != NULL);
 
        spin_lock(&desc->bd_lock);
-       rc = desc->bd_md_count;
+       rc = desc->bd_refs;
        spin_unlock(&desc->bd_lock);
        return rc;
 }
@@ -2094,7 +2096,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
 
 
        spin_lock(&desc->bd_lock);
-       rc = desc->bd_md_count;
+       rc = desc->bd_refs;
        spin_unlock(&desc->bd_lock);
        return rc;
 }
index 03e016e..622e501 100644 (file)
@@ -259,7 +259,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
 
        LASSERT(desc != NULL);
        LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */
-       LASSERT(desc->bd_md_count == 0);         /* network hands off */
+       LASSERT(desc->bd_refs == 0);         /* network hands off */
        LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
        LASSERT(desc->bd_frag_ops != NULL);
 
index 3335a1f..bf44ad0 100644 (file)
@@ -201,8 +201,8 @@ void client_bulk_callback(struct lnet_event *ev)
 
        spin_lock(&desc->bd_lock);
        req = desc->bd_req;
-       LASSERT(desc->bd_md_count > 0);
-       desc->bd_md_count--;
+       LASSERT(desc->bd_refs > 0);
+       desc->bd_refs--;
 
        if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
                desc->bd_nob_transferred += ev->mlength;
@@ -219,7 +219,7 @@ void client_bulk_callback(struct lnet_event *ev)
 
        /* NB don't unlock till after wakeup; desc can disappear under us
         * otherwise */
-       if (desc->bd_md_count == 0)
+       if (desc->bd_refs == 0)
                ptlrpc_client_wake_req(desc->bd_req);
 
        spin_unlock(&desc->bd_lock);
@@ -451,7 +451,7 @@ void server_bulk_callback(struct lnet_event *ev)
 
        spin_lock(&desc->bd_lock);
 
-       LASSERT(desc->bd_md_count > 0);
+       LASSERT(desc->bd_refs > 0);
 
        if ((ev->type == LNET_EVENT_ACK ||
             ev->type == LNET_EVENT_REPLY) &&
@@ -467,9 +467,9 @@ void server_bulk_callback(struct lnet_event *ev)
                desc->bd_failure = 1;
 
        if (ev->unlinked) {
-               desc->bd_md_count--;
+               desc->bd_refs--;
                /* This is the last callback no matter what... */
-               if (desc->bd_md_count == 0)
+               if (desc->bd_refs == 0)
                        wake_up(&desc->bd_waitq);
        }
 
index c78d528..4d99341 100644 (file)
@@ -190,7 +190,7 @@ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
        mbits = desc->bd_req->rq_mbits & ~((__u64)desc->bd_md_max_brw - 1);
        total_md = desc->bd_req->rq_mbits - mbits + 1;
 
-       desc->bd_md_count = total_md;
+       desc->bd_refs = total_md;
        desc->bd_failure = 0;
 
        md.user_ptr = &desc->bd_cbid;
@@ -248,9 +248,9 @@ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
                 * event this creates will signal completion with failure,
                 * so we return SUCCESS here! */
                spin_lock(&desc->bd_lock);
-               desc->bd_md_count -= total_md - posted_md;
+               desc->bd_refs -= total_md - posted_md;
                spin_unlock(&desc->bd_lock);
-               LASSERT(desc->bd_md_count >= 0);
+               LASSERT(desc->bd_refs >= 0);
 
                mdunlink_iterate_helper(desc->bd_mds, posted_md);
                RETURN(0);
@@ -364,7 +364,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
 
        desc->bd_registered = 1;
        desc->bd_last_mbits = mbits;
-       desc->bd_md_count = total_md;
+       desc->bd_refs = total_md;
        md.user_ptr = &desc->bd_cbid;
        md.eq_handle = ptlrpc_eq_h;
        md.threshold = 1;                       /* PUT or GET */
@@ -405,9 +405,9 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
        if (rc != 0) {
                LASSERT(rc == -ENOMEM);
                spin_lock(&desc->bd_lock);
-               desc->bd_md_count -= total_md - posted_md;
+               desc->bd_refs -= total_md - posted_md;
                spin_unlock(&desc->bd_lock);
-               LASSERT(desc->bd_md_count >= 0);
+               LASSERT(desc->bd_refs >= 0);
                mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
                req->rq_status = -ENOMEM;
                desc->bd_registered = 0;
@@ -416,14 +416,15 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
 
        spin_lock(&desc->bd_lock);
        /* Holler if peer manages to touch buffers before he knows the mbits */
-       if (desc->bd_md_count != total_md)
+       if (desc->bd_refs != total_md)
                CWARN("%s: Peer %s touched %d buffers while I registered\n",
                      desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
-                     total_md - desc->bd_md_count);
+                     total_md - desc->bd_refs);
        spin_unlock(&desc->bd_lock);
 
-       CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, "
-              "mbits x%#llx-%#llx, portal %u\n", desc->bd_md_count,
+       CDEBUG(D_NET,
+              "Setup %u bulk %s buffers: %u pages %u bytes, mbits x%#llx-%#llx, portal %u\n",
+              desc->bd_refs,
               ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink",
               desc->bd_iov_count, desc->bd_nob,
               desc->bd_last_mbits, req->rq_mbits, desc->bd_portal);