Whamcloud - gitweb
LU-5620 ptlrpc: Add QoS for opcode in NRS-TBF
[fs/lustre-release.git] / lustre / include / lustre_net.h
index 1a66ff1..3c1f595 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2010, 2015, Intel Corporation.
+ * Copyright (c) 2010, 2016, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #define PTLRPC_MD_OPTIONS  0
 
 /**
- * Max # of bulk operations in one request.
+ * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ...
  * In order for the client and server to properly negotiate the maximum
  * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two
  * value.  The client is free to limit the actual RPC size for any bulk
- * transfer via cl_max_pages_per_rpc to some non-power-of-two value. */
-#define PTLRPC_BULK_OPS_BITS   2
+ * transfer via cl_max_pages_per_rpc to some non-power-of-two value.
+ * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. */
+#define PTLRPC_BULK_OPS_BITS   4
+#if PTLRPC_BULK_OPS_BITS > 16
+#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS."
+#endif
 #define PTLRPC_BULK_OPS_COUNT  (1U << PTLRPC_BULK_OPS_BITS)
 /**
  * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and
  */
 #define PTLRPC_MAX_BRW_BITS    (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS)
 #define PTLRPC_MAX_BRW_SIZE    (1 << PTLRPC_MAX_BRW_BITS)
-#define PTLRPC_MAX_BRW_PAGES   (PTLRPC_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define PTLRPC_MAX_BRW_PAGES   (PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT)
 
 #define ONE_MB_BRW_SIZE                (1 << LNET_MTU_BITS)
 #define MD_MAX_BRW_SIZE                (1 << LNET_MTU_BITS)
-#define MD_MAX_BRW_PAGES       (MD_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define MD_MAX_BRW_PAGES       (MD_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define DT_MAX_BRW_SIZE                PTLRPC_MAX_BRW_SIZE
-#define DT_MAX_BRW_PAGES       (DT_MAX_BRW_SIZE >> PAGE_CACHE_SHIFT)
+#define DT_MAX_BRW_PAGES       (DT_MAX_BRW_SIZE >> PAGE_SHIFT)
 #define OFD_MAX_BRW_SIZE       (1 << LNET_MTU_BITS)
 
 /* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
 #if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
 # error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
 #endif
-#if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE))
-# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_CACHE_SIZE"
+#if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE))
+# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE"
 #endif
 #if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT)
 # error "PTLRPC_MAX_BRW_SIZE too big"
   */
  /* depress threads factor for VM with small memory size */
 #define OSS_THR_FACTOR         min_t(int, 8, \
-                               NUM_CACHEPAGES >> (28 - PAGE_CACHE_SHIFT))
+                               NUM_CACHEPAGES >> (28 - PAGE_SHIFT))
 #define OSS_NTHRS_INIT         (PTLRPC_NTHRS_INIT + 1)
 #define OSS_NTHRS_BASE         64
-#define OSS_NTHRS_MAX          512
 
 /* threads for handling "create" request */
 #define OSS_CR_THR_FACTOR      1
@@ -711,13 +710,14 @@ struct ptlrpc_thread;
 
 /** RPC stages */
 enum rq_phase {
-        RQ_PHASE_NEW            = 0xebc0de00,
-        RQ_PHASE_RPC            = 0xebc0de01,
-        RQ_PHASE_BULK           = 0xebc0de02,
-        RQ_PHASE_INTERPRET      = 0xebc0de03,
-        RQ_PHASE_COMPLETE       = 0xebc0de04,
-        RQ_PHASE_UNREGISTERING  = 0xebc0de05,
-        RQ_PHASE_UNDEFINED      = 0xebc0de06
+       RQ_PHASE_NEW            = 0xebc0de00,
+       RQ_PHASE_RPC            = 0xebc0de01,
+       RQ_PHASE_BULK           = 0xebc0de02,
+       RQ_PHASE_INTERPRET      = 0xebc0de03,
+       RQ_PHASE_COMPLETE       = 0xebc0de04,
+       RQ_PHASE_UNREG_RPC      = 0xebc0de05,
+       RQ_PHASE_UNREG_BULK     = 0xebc0de06,
+       RQ_PHASE_UNDEFINED      = 0xebc0de07
 };
 
 /** Type of request interpreter call-back */
@@ -792,6 +792,8 @@ struct ptlrpc_cli_req {
        time_t                           cr_reply_deadline;
        /** when req bulk unlink must finish. */
        time_t                           cr_bulk_deadline;
+       /** when req unlink must finish. */
+       time_t                           cr_req_deadline;
        /** Portal to which this request would be sent */
        short                            cr_req_ptl;
        /** Portal where to wait for reply and where reply would be sent */
@@ -850,6 +852,7 @@ struct ptlrpc_cli_req {
 #define rq_real_sent           rq_cli.cr_sent_out
 #define rq_reply_deadline      rq_cli.cr_reply_deadline
 #define rq_bulk_deadline       rq_cli.cr_bulk_deadline
+#define rq_req_deadline                rq_cli.cr_req_deadline
 #define rq_nr_resend           rq_cli.cr_resend_nr
 #define rq_request_portal      rq_cli.cr_req_ptl
 #define rq_reply_portal                rq_cli.cr_rep_ptl
@@ -1059,7 +1062,6 @@ struct ptlrpc_request {
                                  rq_bulk_write:1,    /* request bulk write */
                                  /* server authentication flags */
                                  rq_auth_gss:1,      /* authenticated by gss */
-                                 rq_auth_remote:1,   /* authed as remote user */
                                  rq_auth_usr_root:1, /* authed as root */
                                  rq_auth_usr_mdt:1,  /* authed as mdt */
                                  rq_auth_usr_ost:1,  /* authed as ost */
@@ -1103,6 +1105,8 @@ struct ptlrpc_request {
        lnet_nid_t                       rq_self;
        /** Peer description (the other side) */
        lnet_process_id_t                rq_peer;
+       /** Descriptor for the NID from which the peer sent the request. */
+       lnet_process_id_t                rq_source;
        /**
         * service time estimate (secs)
         * If the request is not served by this time, it is marked as timed out.
@@ -1225,22 +1229,24 @@ static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req,
 static inline const char *
 ptlrpc_phase2str(enum rq_phase phase)
 {
-        switch (phase) {
-        case RQ_PHASE_NEW:
-                return "New";
-        case RQ_PHASE_RPC:
-                return "Rpc";
-        case RQ_PHASE_BULK:
-                return "Bulk";
-        case RQ_PHASE_INTERPRET:
-                return "Interpret";
-        case RQ_PHASE_COMPLETE:
-                return "Complete";
-        case RQ_PHASE_UNREGISTERING:
-                return "Unregistering";
-        default:
-                return "?Phase?";
-        }
+       switch (phase) {
+       case RQ_PHASE_NEW:
+               return "New";
+       case RQ_PHASE_RPC:
+               return "Rpc";
+       case RQ_PHASE_BULK:
+               return "Bulk";
+       case RQ_PHASE_INTERPRET:
+               return "Interpret";
+       case RQ_PHASE_COMPLETE:
+               return "Complete";
+       case RQ_PHASE_UNREG_RPC:
+               return "UnregRPC";
+       case RQ_PHASE_UNREG_BULK:
+               return "UnregBULK";
+       default:
+               return "?Phase?";
+       }
 }
 
 /**
@@ -1261,18 +1267,18 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
 #define FLAG(field, str) (field ? str : "")
 
 /** Convert bit flags into a string */
-#define DEBUG_REQ_FLAGS(req)                                                    \
-        ptlrpc_rqphase2str(req),                                                \
-        FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),                    \
-        FLAG(req->rq_err, "E"),                                                 \
-        FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
-        FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),                  \
-        FLAG(req->rq_no_resend, "N"),                                           \
-        FLAG(req->rq_waiting, "W"),                                             \
-        FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),                     \
-        FLAG(req->rq_committed, "M")
-
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
+#define DEBUG_REQ_FLAGS(req)                                                   \
+       ptlrpc_rqphase2str(req),                                               \
+       FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),                   \
+       FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"),                    \
+       FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),  \
+       FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),                 \
+       FLAG(req->rq_no_resend, "N"),                                          \
+       FLAG(req->rq_waiting, "W"),                                            \
+       FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"),                    \
+       FLAG(req->rq_committed, "M")
+
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
 
 void _debug_req(struct ptlrpc_request *req,
                 struct libcfs_debug_msg_data *data, const char *fmt, ...)
@@ -1467,12 +1473,12 @@ struct ptlrpc_bulk_desc {
                         * encrypt iov, size is either 0 or bd_iov_count.
                         */
                        lnet_kiov_t *bd_enc_vec;
-                       lnet_kiov_t bd_vec[0];
+                       lnet_kiov_t *bd_vec;
                } bd_kiov;
 
                struct {
                        struct kvec *bd_enc_kvec;
-                       struct kvec bd_kvec[0];
+                       struct kvec *bd_kvec;
                } bd_kvec;
        } bd_u;
 
@@ -1886,10 +1892,6 @@ struct ptlrpcd_ctl {
         */
        char                            pc_name[16];
        /**
-        * Environment for request interpreters to run in.
-        */
-       struct lu_env                   pc_env;
-       /**
         * CPT the thread is bound on.
         */
        int                             pc_cpt;
@@ -2041,17 +2043,16 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async);
 static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
 {
        struct ptlrpc_bulk_desc *desc;
-        int                      rc;
+       int rc;
 
-        LASSERT(req != NULL);
+       LASSERT(req != NULL);
        desc = req->rq_bulk;
 
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
-            req->rq_bulk_deadline > cfs_time_current_sec())
-                return 1;
+       if (req->rq_bulk_deadline > cfs_time_current_sec())
+               return 1;
 
-        if (!desc)
-                return 0;
+       if (!desc)
+               return 0;
 
        spin_lock(&desc->bd_lock);
        rc = desc->bd_md_count;
@@ -2081,7 +2082,8 @@ void ptlrpc_request_committed(struct ptlrpc_request *req, int force);
 void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
                         struct ptlrpc_client *);
 void ptlrpc_cleanup_client(struct obd_import *imp);
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid,
+                                                   lnet_nid_t nid4refnet);
 
 int ptlrpc_queue_wait(struct ptlrpc_request *req);
 int ptlrpc_replay_req(struct ptlrpc_request *req);
@@ -2159,7 +2161,7 @@ static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc)
        int i;
 
        for (i = 0; i < desc->bd_iov_count ; i++)
-               page_cache_release(BD_GET_KIOV(desc, i).kiov_page);
+               put_page(BD_GET_KIOV(desc, i).kiov_page);
 }
 
 static inline void ptlrpc_release_bulk_noop(struct ptlrpc_bulk_desc *desc)
@@ -2427,13 +2429,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
        if (req->rq_phase == new_phase)
                return;
 
-       if (new_phase == RQ_PHASE_UNREGISTERING) {
+       if (new_phase == RQ_PHASE_UNREG_RPC ||
+           new_phase == RQ_PHASE_UNREG_BULK) {
+               /* No embedded unregistering phases */
+               if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+                   req->rq_phase == RQ_PHASE_UNREG_BULK)
+                       return;
+
                req->rq_next_phase = req->rq_phase;
                if (req->rq_import)
                        atomic_inc(&req->rq_import->imp_unregistering);
        }
 
-       if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+       if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
+           req->rq_phase == RQ_PHASE_UNREG_BULK) {
                if (req->rq_import)
                        atomic_dec(&req->rq_import->imp_unregistering);
        }
@@ -2450,9 +2459,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
 static inline int
 ptlrpc_client_early(struct ptlrpc_request *req)
 {
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-            req->rq_reply_deadline > cfs_time_current_sec())
-                return 0;
         return req->rq_early;
 }
 
@@ -2462,20 +2468,18 @@ ptlrpc_client_early(struct ptlrpc_request *req)
 static inline int
 ptlrpc_client_replied(struct ptlrpc_request *req)
 {
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-            req->rq_reply_deadline > cfs_time_current_sec())
-                return 0;
-        return req->rq_replied;
+       if (req->rq_reply_deadline > cfs_time_current_sec())
+               return 0;
+       return req->rq_replied;
 }
 
 /** Returns true if request \a req is in process of receiving server reply */
 static inline int
 ptlrpc_client_recv(struct ptlrpc_request *req)
 {
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-            req->rq_reply_deadline > cfs_time_current_sec())
-                return 1;
-        return req->rq_receiving_reply;
+       if (req->rq_reply_deadline > cfs_time_current_sec())
+               return 1;
+       return req->rq_receiving_reply;
 }
 
 static inline int
@@ -2484,11 +2488,15 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
        int rc;
 
        spin_lock(&req->rq_lock);
-       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
-           req->rq_reply_deadline > cfs_time_current_sec()) {
+       if (req->rq_reply_deadline > cfs_time_current_sec()) {
                spin_unlock(&req->rq_lock);
                return 1;
        }
+       if (req->rq_req_deadline > cfs_time_current_sec()) {
+               spin_unlock(&req->rq_lock);
+               return 1;
+       }
+
        rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
             req->rq_receiving_reply;
        spin_unlock(&req->rq_lock);
@@ -2649,6 +2657,7 @@ void ptlrpcd_decref(void);
  * @{
  */
 const char* ll_opcode2str(__u32 opcode);
+const int ll_str2opcode(const char *ops);
 #ifdef CONFIG_PROC_FS
 void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
 void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);