Whamcloud - gitweb
LU-11128 ptlrpc: add debugging for idle connections 68/33168/3
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 14 Sep 2018 03:44:23 +0000 (21:44 -0600)
committerOleg Drokin <green@whamcloud.com>
Fri, 21 Sep 2018 03:31:08 +0000 (03:31 +0000)
Add a "debug" parameter for the idle client disconnection so that
it can log disconnect/reconnect events to the console.

Print the idle time in the "import" file.

Enable the connection debugging for all test runs.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I47a4f73542fb4864e4f35275ecfe8907493ebbe5
Reviewed-on: https://review.whamcloud.com/33168
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Nathaniel Clark <nclark@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_import.h
lustre/obdclass/lprocfs_status.c
lustre/osc/lproc_osc.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/tests/test-framework.sh

index dfa22bb..9ba6a87 100644 (file)
@@ -306,12 +306,13 @@ struct obd_import {
                                  imp_connect_tried:1,
                                  /* connected but not FULL yet */
                                  imp_connected:1;
-       __u32                     imp_connect_op;
-       __u32                     imp_idle_timeout;
-       struct obd_connect_data   imp_connect_data;
-       __u64                     imp_connect_flags_orig;
-       __u64                     imp_connect_flags2_orig;
-       int                       imp_connect_error;
+       u32                       imp_connect_op;
+       u32                       imp_idle_timeout;
+       u32                       imp_idle_debug;
+       struct obd_connect_data   imp_connect_data;
+       __u64                     imp_connect_flags_orig;
+       __u64                     imp_connect_flags2_orig;
+       int                       imp_connect_error;
 
        enum lustre_msg_magic   imp_msg_magic;
                                /* adjusted based on server capability */
index 663fdf1..e98ca6b 100644 (file)
@@ -996,11 +996,13 @@ int lprocfs_import_seq_show(struct seq_file *m, void *data)
                   "       current_connection: %s\n"
                   "       connection_attempts: %u\n"
                   "       generation: %u\n"
-                  "       in-progress_invalidations: %u\n",
+                  "       in-progress_invalidations: %u\n"
+                  "       idle: %lld sec\n",
                   nidstr,
                   imp->imp_conn_cnt,
                   imp->imp_generation,
-                  atomic_read(&imp->imp_inval_count));
+                  atomic_read(&imp->imp_inval_count),
+                  ktime_get_real_seconds() - imp->imp_last_reply_time);
        spin_unlock(&imp->imp_lock);
 
        if (obd->obd_svc_stats == NULL)
index 463c179..7ff4d11 100644 (file)
@@ -630,24 +630,35 @@ static ssize_t idle_timeout_store(struct kobject *kobj, struct attribute *attr,
                                              obd_kset.kobj);
        struct client_obd *cli = &dev->u.cli;
        struct ptlrpc_request *req;
+       unsigned int idle_debug = 0;
        unsigned int val;
        int rc;
 
-       rc = kstrtouint(buffer, 10, &val);
-       if (rc)
-               return rc;
+       if (strncmp(buffer, "debug", 5) == 0) {
+               idle_debug = D_CONSOLE;
+       } else if (strncmp(buffer, "nodebug", 6) == 0) {
+               idle_debug = D_HA;
+       } else {
+               rc = kstrtouint(buffer, 10, &val);
+               if (rc)
+                       return rc;
 
-       if (val > CONNECTION_SWITCH_MAX)
-               return -ERANGE;
+               if (val > CONNECTION_SWITCH_MAX)
+                       return -ERANGE;
+       }
 
        LPROCFS_CLIMP_CHECK(dev);
-       cli->cl_import->imp_idle_timeout = val;
-
-       /* to initiate the connection if it's in IDLE state */
-       if (!val) {
-               req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_STATFS);
-               if (req != NULL)
-                       ptlrpc_req_finished(req);
+       if (idle_debug) {
+               cli->cl_import->imp_idle_debug = idle_debug;
+       } else {
+               if (!val) {
+                       /* initiate the connection if it's in IDLE state */
+                       req = ptlrpc_request_alloc(cli->cl_import,
+                                                  &RQF_OST_STATFS);
+                       if (req != NULL)
+                               ptlrpc_req_finished(req);
+               }
+               cli->cl_import->imp_idle_timeout = val;
        }
        LPROCFS_CLIMP_EXIT(dev);
 
index 5c707de..1ac295b 100644 (file)
@@ -3252,6 +3252,7 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
        list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
        spin_unlock(&osc_shrink_lock);
        cli->cl_import->imp_idle_timeout = osc_idle_timeout;
+       cli->cl_import->imp_idle_debug = D_HA;
 
        RETURN(0);
 }
index 22c0d6f..a3b8504 100644 (file)
@@ -869,8 +869,10 @@ ptlrpc_request_alloc_internal(struct obd_import *imp,
 
        if (unlikely(imp->imp_state == LUSTRE_IMP_IDLE)) {
                int rc;
-               CDEBUG(D_INFO, "%s: connect at new req\n",
-                      imp->imp_obd->obd_name);
+               CDEBUG_LIMIT(imp->imp_idle_debug,
+                            "%s: reconnect after %llds idle\n",
+                            imp->imp_obd->obd_name, ktime_get_real_seconds() -
+                                                    imp->imp_last_reply_time);
                spin_lock(&imp->imp_lock);
                if (imp->imp_state == LUSTRE_IMP_IDLE) {
                        imp->imp_generation++;
index 82fe902..dd9ba73 100644 (file)
@@ -1696,7 +1696,9 @@ int ptlrpc_disconnect_and_idle_import(struct obd_import *imp)
        if (IS_ERR(req))
                RETURN(PTR_ERR(req));
 
-       CDEBUG(D_INFO, "%s: disconnect\n", imp->imp_obd->obd_name);
+       CDEBUG_LIMIT(imp->imp_idle_debug, "%s: disconnect after %llus idle\n",
+                    imp->imp_obd->obd_name,
+                    ktime_get_real_seconds() - imp->imp_last_reply_time);
        req->rq_interpret_reply = ptlrpc_disconnect_idle_interpret;
        ptlrpcd_add_req(req);
 
index b704b9e..ef808d8 100644 (file)
@@ -99,6 +99,7 @@ static bool ptlrpc_check_import_is_idle(struct obd_import *imp)
 
        if (!imp->imp_idle_timeout)
                return false;
+
        /* 4 comes from:
         *  - client_obd_setup() - hashed import
         *  - ptlrpcd_alloc_work()
@@ -107,6 +108,7 @@ static bool ptlrpc_check_import_is_idle(struct obd_import *imp)
         */
        if (atomic_read(&imp->imp_refcount) > 4)
                return false;
+
        /* any lock increases ns_bref being a resource holder */
        if (ns && atomic_read(&ns->ns_bref) > 0)
                return false;
index afd64d2..1604790 100755 (executable)
@@ -4804,6 +4804,7 @@ init_param_vars () {
 
        osc_ensure_active $SINGLEMDS $TIMEOUT
        osc_ensure_active client $TIMEOUT
+       $LCTL set_param osc.*.idle_timeout=debug
 
        if [ -n "$(lctl get_param -n mdc.*.connect_flags|grep jobstats)" ]; then
                local current_jobid_var=$($LCTL get_param -n jobid_var)
@@ -4811,7 +4812,7 @@ init_param_vars () {
                if [ $JOBID_VAR = "existing" ]; then
                        echo "keeping jobstats as $current_jobid_var"
                elif [ $current_jobid_var != $JOBID_VAR ]; then
-                       echo "seting jobstats to $JOBID_VAR"
+                       echo "setting jobstats to $JOBID_VAR"
 
                        set_conf_param_and_check client                 \
                                "$LCTL get_param -n jobid_var"          \