Whamcloud - gitweb
LU-7236 ptlrpc: idle connections can disconnect 82/16682/123
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Mon, 28 Sep 2015 13:50:15 +0000 (16:50 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 14 Jun 2018 03:54:09 +0000 (03:54 +0000)
 - when new request is being allocated ptlrpc initiates
   connection if it's not connected yet
 - if the import is idle (no locks, no active RPCs, no
   non-PING reply for last osc_idle_timeout seconds),
   then pinger tries to disconnect asynchronously
 - currently only client-to-OST connections can be idle
 - lctl set_param osc.*.idle_timeout=N controls new feature:
   N=0 - disable
   N>0 - seconds to idle before disconnect
 - lctl set_param osc.*.idle_connect=N to reconnect if idle
   (N is positive number)
 - OSC module parameter osc_idle_timeout controls default
   idle timeout and set to 20 seconds by default

Change-Id: I4b90eb5209a0b0e62d85fd55ad6e9cab8c03fd14
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-on: https://review.whamcloud.com/16682
Tested-by: Jenkins
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
17 files changed:
lustre/include/lustre_import.h
lustre/include/lustre_net.h
lustre/lov/lov_ea.c
lustre/lov/lov_obd.c
lustre/lov/lov_request.c
lustre/osc/lproc_osc.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/tests/conf-sanity.sh
lustre/tests/runtests
lustre/tests/sanity-flr.sh
lustre/tests/sanity.sh
lustre/tests/sanityn.sh
lustre/tests/test-framework.sh

index 9191fa1..dfa22bb 100644 (file)
@@ -101,19 +101,21 @@ enum lustre_imp_state {
         LUSTRE_IMP_RECOVER    = 8,
         LUSTRE_IMP_FULL       = 9,
         LUSTRE_IMP_EVICTED    = 10,
+       LUSTRE_IMP_IDLE       = 11,
+       LUSTRE_IMP_LAST
 };
 
 /** Returns test string representation of numeric import state \a state */
 static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
 {
-        static char* import_state_names[] = {
-                "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
-                "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
-                "RECOVER", "FULL", "EVICTED",
-        };
-
-        LASSERT (state <= LUSTRE_IMP_EVICTED);
-        return import_state_names[state];
+       static char *import_state_names[] = {
+               "<UNKNOWN>", "CLOSED",  "NEW", "DISCONN",
+               "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
+               "RECOVER", "FULL", "EVICTED", "IDLE",
+       };
+
+       LASSERT(state < LUSTRE_IMP_LAST);
+       return import_state_names[state];
 }
 
 /**
@@ -232,6 +234,8 @@ struct obd_import {
         int                       imp_state_hist_idx;
         /** Current import generation. Incremented on every reconnect */
         int                       imp_generation;
+       /** Idle connection initiated at this generation */
+       int                       imp_initiated_at;
         /** Incremented every time we send reconnection request */
         __u32                     imp_conn_cnt;
        /** 
@@ -303,6 +307,7 @@ struct obd_import {
                                  /* connected but not FULL yet */
                                  imp_connected:1;
        __u32                     imp_connect_op;
+       __u32                     imp_idle_timeout;
        struct obd_connect_data   imp_connect_data;
        __u64                     imp_connect_flags_orig;
        __u64                     imp_connect_flags2_orig;
index 2d36764..3afe692 100644 (file)
@@ -2308,6 +2308,7 @@ void ptlrpc_hr_fini(void);
 int ptlrpc_connect_import(struct obd_import *imp);
 int ptlrpc_init_import(struct obd_import *imp);
 int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
+int ptlrpc_disconnect_and_idle_import(struct obd_import *imp);
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
 void deuuidify(char *uuid, const char *prefix, char **uuid_start,
               int *uuid_len);
index d7a988f..2ca7b99 100644 (file)
@@ -65,7 +65,8 @@ static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
                return maxbytes;
 
        spin_lock(&imp->imp_lock);
-       if (imp->imp_state == LUSTRE_IMP_FULL &&
+       if ((imp->imp_state == LUSTRE_IMP_FULL ||
+           imp->imp_state == LUSTRE_IMP_IDLE) &&
            (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) &&
            imp->imp_connect_data.ocd_maxbytes > 0)
                maxbytes = imp->imp_connect_data.ocd_maxbytes;
index e906fc8..45c57fc 100644 (file)
@@ -971,27 +971,30 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 
         switch (cmd) {
         case IOC_OBD_STATFS: {
-                struct obd_ioctl_data *data = karg;
-                struct obd_device *osc_obd;
-                struct obd_statfs stat_buf = {0};
-                __u32 index;
+               struct obd_ioctl_data *data = karg;
+               struct obd_device *osc_obd;
+               struct obd_statfs stat_buf = {0};
+               struct obd_import *imp;
+               __u32 index;
                __u32 flags;
 
-                memcpy(&index, data->ioc_inlbuf2, sizeof(index));
-                if ((index >= count))
-                        RETURN(-ENODEV);
+               memcpy(&index, data->ioc_inlbuf2, sizeof(index));
+               if ((index >= count))
+                       RETURN(-ENODEV);
 
-                if (!lov->lov_tgts[index])
-                        /* Try again with the next index */
-                        RETURN(-EAGAIN);
-                if (!lov->lov_tgts[index]->ltd_active)
-                        RETURN(-ENODATA);
+               if (!lov->lov_tgts[index])
+                       /* Try again with the next index */
+                       RETURN(-EAGAIN);
+               imp = lov->lov_tgts[index]->ltd_exp->exp_obd->u.cli.cl_import;
+               if (!lov->lov_tgts[index]->ltd_active &&
+                   imp->imp_state != LUSTRE_IMP_IDLE)
+                       RETURN(-ENODATA);
 
-                osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
-                if (!osc_obd)
-                        RETURN(-EINVAL);
+               osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
+               if (!osc_obd)
+                       RETURN(-EINVAL);
 
-                /* copy UUID */
+               /* copy UUID */
                if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd),
                                 min_t(unsigned long, data->ioc_plen2,
                                       sizeof(struct obd_uuid))))
index e7dda50..a7baa12 100644 (file)
@@ -106,6 +106,7 @@ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
        wait_queue_head_t waitq;
        struct l_wait_info lwi;
        struct lov_tgt_desc *tgt;
+       struct obd_import *imp = NULL;
        int rc = 0;
 
        mutex_lock(&lov->lov_lock);
@@ -118,7 +119,11 @@ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx)
        if (likely(tgt->ltd_active))
                GOTO(out, rc = 1);
 
-       if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried)
+       if (tgt->ltd_exp)
+               imp = class_exp2cliimp(tgt->ltd_exp);
+       if (imp && imp->imp_connect_tried)
+               GOTO(out, rc = 0);
+       if (imp && imp->imp_state == LUSTRE_IMP_IDLE)
                GOTO(out, rc = 0);
 
        mutex_unlock(&lov->lov_lock);
@@ -322,47 +327,53 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
 
         /* We only get block data from the OBD */
         for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+               struct lov_tgt_desc *ltd = lov->lov_tgts[i];
                struct lov_request *req;
 
-               if (lov->lov_tgts[i] == NULL ||
-                   (oinfo->oi_flags & OBD_STATFS_NODELAY &&
-                    !lov->lov_tgts[i]->ltd_active)) {
+               if (ltd == NULL) {
                        CDEBUG(D_HA, "lov idx %d inactive\n", i);
                        continue;
                }
 
                /* skip targets that have been explicitely disabled by the
                 * administrator */
-               if (!lov->lov_tgts[i]->ltd_exp) {
+               if (!ltd->ltd_exp) {
                        CDEBUG(D_HA, "lov idx %d administratively disabled\n",
                               i);
                        continue;
                }
 
-               if (!lov->lov_tgts[i]->ltd_active)
+               if (oinfo->oi_flags & OBD_STATFS_NODELAY &&
+                   class_exp2cliimp(ltd->ltd_exp)->imp_state !=
+                   LUSTRE_IMP_IDLE && !ltd->ltd_active) {
+                       CDEBUG(D_HA, "lov idx %d inactive\n", i);
+                       continue;
+               }
+
+               if (!ltd->ltd_active)
                        lov_check_and_wait_active(lov, i);
 
                OBD_ALLOC(req, sizeof(*req));
                if (req == NULL)
                        GOTO(out_set, rc = -ENOMEM);
 
-                OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
-                if (req->rq_oi.oi_osfs == NULL) {
-                        OBD_FREE(req, sizeof(*req));
-                        GOTO(out_set, rc = -ENOMEM);
-                }
+               OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
+               if (req->rq_oi.oi_osfs == NULL) {
+                       OBD_FREE(req, sizeof(*req));
+                       GOTO(out_set, rc = -ENOMEM);
+               }
 
-                req->rq_idx = i;
-                req->rq_oi.oi_cb_up = cb_statfs_update;
-                req->rq_oi.oi_flags = oinfo->oi_flags;
+               req->rq_idx = i;
+               req->rq_oi.oi_cb_up = cb_statfs_update;
+               req->rq_oi.oi_flags = oinfo->oi_flags;
 
-                lov_set_add_req(req, set);
-        }
-        if (!set->set_count)
-                GOTO(out_set, rc = -EIO);
-        *reqset = set;
-        RETURN(rc);
+               lov_set_add_req(req, set);
+       }
+       if (!set->set_count)
+               GOTO(out_set, rc = -EIO);
+       *reqset = set;
+       RETURN(rc);
 out_set:
-        lov_fini_statfs_set(set);
-        RETURN(rc);
+       lov_fini_statfs_set(set);
+       RETURN(rc);
 }
index 9337450..3c0e138 100644 (file)
@@ -608,6 +608,67 @@ static int osc_unstable_stats_seq_show(struct seq_file *m, void *v)
 }
 LPROC_SEQ_FOPS_RO(osc_unstable_stats);
 
+static int osc_idle_timeout_seq_show(struct seq_file *m, void *v)
+{
+       struct obd_device *obd = m->private;
+       struct client_obd *cli = &obd->u.cli;
+
+       seq_printf(m, "%u\n", cli->cl_import->imp_idle_timeout);
+       return 0;
+}
+
+static ssize_t osc_idle_timeout_seq_write(struct file *f,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *off)
+{
+       struct obd_device *dev = ((struct seq_file *)f->private_data)->private;
+       struct client_obd *cli = &dev->u.cli;
+       struct ptlrpc_request *req;
+       __s64 val;
+       int rc;
+
+       rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
+       if (rc)
+               return rc;
+       if (val < 0 || val > 1)
+               return -ERANGE;
+
+       cli->cl_import->imp_idle_timeout = val;
+
+       /* to initiate the connection if it's in IDLE state */
+       if (!val) {
+               req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_STATFS);
+               if (req != NULL)
+                       ptlrpc_req_finished(req);
+       }
+
+       return count;
+}
+LPROC_SEQ_FOPS(osc_idle_timeout);
+
+static int osc_idle_connect_seq_show(struct seq_file *m, void *v)
+{
+       return 0;
+}
+
+static ssize_t osc_idle_connect_seq_write(struct file *f,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *off)
+{
+       struct obd_device *dev = ((struct seq_file *)f->private_data)->private;
+       struct client_obd *cli = &dev->u.cli;
+       struct ptlrpc_request *req;
+
+       /* to initiate the connection if it's in IDLE state */
+       req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_STATFS);
+       if (req != NULL)
+               ptlrpc_req_finished(req);
+       ptlrpc_pinger_force(cli->cl_import);
+
+       return count;
+}
+LPROC_SEQ_FOPS(osc_idle_connect);
+
 LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags);
 LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid);
 LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts);
@@ -639,6 +700,10 @@ struct lprocfs_vars lprocfs_osc_obd_vars[] = {
          .fops =       &osc_pinger_recov_fops          },
        { .name =       "unstable_stats",
          .fops =       &osc_unstable_stats_fops        },
+       { .name =       "idle_timeout",
+         .fops =       &osc_idle_timeout_fops          },
+       { .name =       "idle_connect",
+         .fops =       &osc_idle_connect_fops          },
        { NULL }
 };
 
index 62dd791..183e8f4 100644 (file)
@@ -56,6 +56,9 @@ struct ptlrpc_request_pool *osc_rq_pool;
 static unsigned int osc_reqpool_mem_max = 5;
 module_param(osc_reqpool_mem_max, uint, 0444);
 
+static int osc_idle_timeout = 20;
+module_param(osc_idle_timeout, uint, 0644);
+
 #define osc_grant_args osc_brw_async_args
 
 struct osc_setattr_args {
@@ -2648,7 +2651,7 @@ static int osc_statfs_async(struct obd_export *exp,
         struct obd_device     *obd = class_exp2obd(exp);
         struct ptlrpc_request *req;
         struct osc_async_args *aa;
-        int                    rc;
+       int rc;
         ENTRY;
 
         /* We could possibly pass max_age in the request (as an absolute
@@ -2666,15 +2669,15 @@ static int osc_statfs_async(struct obd_export *exp,
                 ptlrpc_request_free(req);
                 RETURN(rc);
         }
-        ptlrpc_request_set_replen(req);
-        req->rq_request_portal = OST_CREATE_PORTAL;
-        ptlrpc_at_set_req_timeout(req);
+       ptlrpc_request_set_replen(req);
+       req->rq_request_portal = OST_CREATE_PORTAL;
+       ptlrpc_at_set_req_timeout(req);
 
-        if (oinfo->oi_flags & OBD_STATFS_NODELAY) {
-                /* procfs requests not want stat in wait for avoid deadlock */
-                req->rq_no_resend = 1;
-                req->rq_no_delay = 1;
-        }
+       if (oinfo->oi_flags & OBD_STATFS_NODELAY) {
+               /* procfs requests not want stat in wait for avoid deadlock */
+               req->rq_no_resend = 1;
+               req->rq_no_delay = 1;
+       }
 
        req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret;
        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
@@ -2688,12 +2691,13 @@ static int osc_statfs_async(struct obd_export *exp,
 static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
                      struct obd_statfs *osfs, time64_t max_age, __u32 flags)
 {
-        struct obd_device     *obd = class_exp2obd(exp);
-        struct obd_statfs     *msfs;
-        struct ptlrpc_request *req;
-        struct obd_import     *imp = NULL;
-        int rc;
-        ENTRY;
+       struct obd_device     *obd = class_exp2obd(exp);
+       struct obd_statfs     *msfs;
+       struct ptlrpc_request *req;
+       struct obd_import     *imp = NULL;
+       int rc;
+       ENTRY;
+
 
         /*Since the request might also come from lprocfs, so we need
          *sync this with client_disconnect_export Bug15684*/
@@ -2704,49 +2708,48 @@ static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
         if (!imp)
                 RETURN(-ENODEV);
 
-        /* We could possibly pass max_age in the request (as an absolute
-         * timestamp or a "seconds.usec ago") so the target can avoid doing
-         * extra calls into the filesystem if that isn't necessary (e.g.
-         * during mount that would help a bit).  Having relative timestamps
-         * is not so great if request processing is slow, while absolute
-         * timestamps are not ideal because they need time synchronization. */
-        req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+       /* We could possibly pass max_age in the request (as an absolute
+        * timestamp or a "seconds.usec ago") so the target can avoid doing
+        * extra calls into the filesystem if that isn't necessary (e.g.
+        * during mount that would help a bit).  Having relative timestamps
+        * is not so great if request processing is slow, while absolute
+        * timestamps are not ideal because they need time synchronization. */
+       req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
 
-        class_import_put(imp);
+       class_import_put(imp);
 
-        if (req == NULL)
-                RETURN(-ENOMEM);
+       if (req == NULL)
+               RETURN(-ENOMEM);
 
-        rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
-        if (rc) {
-                ptlrpc_request_free(req);
-                RETURN(rc);
-        }
-        ptlrpc_request_set_replen(req);
-        req->rq_request_portal = OST_CREATE_PORTAL;
-        ptlrpc_at_set_req_timeout(req);
+       rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+       if (rc) {
+               ptlrpc_request_free(req);
+               RETURN(rc);
+       }
+       ptlrpc_request_set_replen(req);
+       req->rq_request_portal = OST_CREATE_PORTAL;
+       ptlrpc_at_set_req_timeout(req);
 
-        if (flags & OBD_STATFS_NODELAY) {
-                /* procfs requests not want stat in wait for avoid deadlock */
-                req->rq_no_resend = 1;
-                req->rq_no_delay = 1;
-        }
+       if (flags & OBD_STATFS_NODELAY) {
+               /* procfs requests not want stat in wait for avoid deadlock */
+               req->rq_no_resend = 1;
+               req->rq_no_delay = 1;
+       }
 
-        rc = ptlrpc_queue_wait(req);
-        if (rc)
-                GOTO(out, rc);
+       rc = ptlrpc_queue_wait(req);
+       if (rc)
+               GOTO(out, rc);
 
-        msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
-        if (msfs == NULL) {
-                GOTO(out, rc = -EPROTO);
-        }
+       msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
+       if (msfs == NULL)
+               GOTO(out, rc = -EPROTO);
 
-        *osfs = *msfs;
+       *osfs = *msfs;
 
-        EXIT;
- out:
-        ptlrpc_req_finished(req);
-        return rc;
+       EXIT;
+out:
+       ptlrpc_req_finished(req);
+       return rc;
 }
 
 static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
@@ -3194,6 +3197,7 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
        spin_lock(&osc_shrink_lock);
        list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
        spin_unlock(&osc_shrink_lock);
+       cli->cl_import->imp_idle_timeout = osc_idle_timeout;
 
        RETURN(0);
 }
index 038ff31..d0e8d78 100644 (file)
@@ -865,10 +865,31 @@ ptlrpc_request_alloc_internal(struct obd_import *imp,
                               const struct req_format *format)
 {
         struct ptlrpc_request *request;
+       int connect = 0;
 
-        request = __ptlrpc_request_alloc(imp, pool);
-        if (request == NULL)
-                return NULL;
+       if (unlikely(imp->imp_state == LUSTRE_IMP_IDLE)) {
+               int rc;
+               CDEBUG(D_INFO, "%s: connect at new req\n",
+                      imp->imp_obd->obd_name);
+               spin_lock(&imp->imp_lock);
+               if (imp->imp_state == LUSTRE_IMP_IDLE) {
+                       imp->imp_generation++;
+                       imp->imp_initiated_at = imp->imp_generation;
+                       imp->imp_state =  LUSTRE_IMP_NEW;
+                       connect = 1;
+               }
+               spin_unlock(&imp->imp_lock);
+               if (connect) {
+                       rc = ptlrpc_connect_import(imp);
+                       if (rc < 0)
+                               return NULL;
+                       ptlrpc_pinger_add_import(imp);
+               }
+       }
+
+       request = __ptlrpc_request_alloc(imp, pool);
+       if (request == NULL)
+               return NULL;
 
         req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
         req_capsule_set(&request->rq_pill, format);
@@ -1058,6 +1079,7 @@ EXPORT_SYMBOL(ptlrpc_set_destroy);
 void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
                         struct ptlrpc_request *req)
 {
+       LASSERT(req->rq_import->imp_state != LUSTRE_IMP_IDLE);
        LASSERT(list_empty(&req->rq_set_chain));
 
        if (req->rq_allow_intr)
@@ -1169,7 +1191,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
                if (atomic_read(&imp->imp_inval_count) != 0) {
                         DEBUG_REQ(D_ERROR, req, "invalidate in flight");
                         *status = -EIO;
-               } else if (req->rq_no_delay) {
+               } else if (req->rq_no_delay &&
+                          imp->imp_generation != imp->imp_initiated_at) {
+                       /* ignore nodelay for requests initiating connections */
                         *status = -EWOULDBLOCK;
                } else if (req->rq_allow_replay &&
                          (imp->imp_state == LUSTRE_IMP_REPLAY ||
@@ -1852,8 +1876,11 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                        spin_unlock(&imp->imp_lock);
                                        GOTO(interpret, req->rq_status);
                                }
+                               /* ignore on just initiated connections */
                                if (ptlrpc_no_resend(req) &&
-                                   !req->rq_wait_ctx) {
+                                   !req->rq_wait_ctx &&
+                                   imp->imp_generation !=
+                                   imp->imp_initiated_at) {
                                        req->rq_status = -ENOTCONN;
                                        ptlrpc_rqphase_move(req,
                                                            RQ_PHASE_INTERPRET);
index 7027114..443bc32 100644 (file)
@@ -161,12 +161,13 @@ void reply_in_callback(struct lnet_event *ev)
                           ev->mlength, ev->offset, req->rq_replen);
         }
 
-       req->rq_import->imp_last_reply_time = ktime_get_real_seconds();
+       if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING)
+               req->rq_import->imp_last_reply_time = ktime_get_real_seconds();
 
 out_wake:
-        /* NB don't unlock till after wakeup; req can disappear under us
-         * since we don't have our own ref */
-        ptlrpc_client_wake_req(req);
+       /* NB don't unlock till after wakeup; req can disappear under us
+        * since we don't have our own ref */
+       ptlrpc_client_wake_req(req);
        spin_unlock(&req->rq_lock);
        EXIT;
 }
index 658f339..82fe902 100644 (file)
@@ -968,6 +968,21 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        }
 
        if (rc) {
+               struct ptlrpc_request *free_req;
+               struct ptlrpc_request *tmp;
+
+               /* abort all delayed requests initiated connection */
+               list_for_each_entry_safe(free_req, tmp, &imp->imp_delayed_list,
+                                        rq_list) {
+                       spin_lock(&free_req->rq_lock);
+                       if (free_req->rq_no_resend) {
+                               free_req->rq_err = 1;
+                               free_req->rq_status = -EIO;
+                               ptlrpc_client_wake_req(free_req);
+                       }
+                       spin_unlock(&free_req->rq_lock);
+               }
+
                /* if this reconnect to busy export - not need select new target
                 * for connecting*/
                imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc);
@@ -1528,15 +1543,12 @@ out:
        RETURN(rc);
 }
 
-int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+static struct ptlrpc_request *ptlrpc_disconnect_prep_req(struct obd_import *imp)
 {
        struct ptlrpc_request *req;
        int rq_opc, rc = 0;
        ENTRY;
 
-       if (imp->imp_obd->obd_force)
-               GOTO(set_state, rc);
-
        switch (imp->imp_connect_op) {
        case OST_CONNECT:
                rq_opc = OST_DISCONNECT;
@@ -1553,9 +1565,46 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                       "(connect_op %d): rc = %d\n",
                       imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
                       imp->imp_connect_op, rc);
-               RETURN(rc);
+               RETURN(ERR_PTR(rc));
        }
 
+       req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
+                                       LUSTRE_OBD_VERSION, rq_opc);
+       if (req == NULL)
+               RETURN(NULL);
+
+       /* We are disconnecting, do not retry a failed DISCONNECT rpc if
+        * it fails.  We can get through the above with a down server
+        * if the client doesn't know the server is gone yet. */
+       req->rq_no_resend = 1;
+
+       /* We want client umounts to happen quickly, no matter the
+          server state... */
+       req->rq_timeout = min_t(int, req->rq_timeout,
+                               INITIAL_CONNECT_TIMEOUT);
+
+       IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
+       req->rq_send_state =  LUSTRE_IMP_CONNECTING;
+       ptlrpc_request_set_replen(req);
+
+       RETURN(req);
+}
+
+int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
+{
+       struct ptlrpc_request *req;
+       int rc = 0;
+       ENTRY;
+
+       if (imp->imp_obd->obd_force)
+               GOTO(set_state, rc);
+
+       /* probably the import has been disconnected already being idle */
+       spin_lock(&imp->imp_lock);
+       if (imp->imp_state == LUSTRE_IMP_IDLE)
+               GOTO(out, rc);
+       spin_unlock(&imp->imp_lock);
+
        if (ptlrpc_import_in_recovery(imp)) {
                struct l_wait_info lwi;
                long timeout_jiffies;
@@ -1588,25 +1637,11 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                GOTO(out, rc);
        spin_unlock(&imp->imp_lock);
 
-        req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT,
-                                        LUSTRE_OBD_VERSION, rq_opc);
-        if (req) {
-                /* We are disconnecting, do not retry a failed DISCONNECT rpc if
-                 * it fails.  We can get through the above with a down server
-                 * if the client doesn't know the server is gone yet. */
-                req->rq_no_resend = 1;
-
-                /* We want client umounts to happen quickly, no matter the
-                   server state... */
-                req->rq_timeout = min_t(int, req->rq_timeout,
-                                        INITIAL_CONNECT_TIMEOUT);
-
-                IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING);
-                req->rq_send_state =  LUSTRE_IMP_CONNECTING;
-                ptlrpc_request_set_replen(req);
-                rc = ptlrpc_queue_wait(req);
-                ptlrpc_req_finished(req);
-        }
+       req = ptlrpc_disconnect_prep_req(imp);
+       if (IS_ERR(req))
+               GOTO(set_state, rc = PTR_ERR(req));
+       rc = ptlrpc_queue_wait(req);
+       ptlrpc_req_finished(req);
 
 set_state:
        spin_lock(&imp->imp_lock);
@@ -1624,6 +1659,51 @@ out:
 }
 EXPORT_SYMBOL(ptlrpc_disconnect_import);
 
+static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env,
+                                           struct ptlrpc_request *req,
+                                           void *data, int rc)
+{
+       struct obd_import *imp = req->rq_import;
+
+       LASSERT(imp->imp_state == LUSTRE_IMP_CONNECTING);
+       spin_lock(&imp->imp_lock);
+       IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_IDLE);
+       memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+       spin_unlock(&imp->imp_lock);
+
+       return 0;
+}
+
+int ptlrpc_disconnect_and_idle_import(struct obd_import *imp)
+{
+       struct ptlrpc_request *req;
+       ENTRY;
+
+       if (imp->imp_obd->obd_force)
+               RETURN(0);
+
+       if (ptlrpc_import_in_recovery(imp))
+               RETURN(0);
+
+       spin_lock(&imp->imp_lock);
+       if (imp->imp_state != LUSTRE_IMP_FULL) {
+               spin_unlock(&imp->imp_lock);
+               RETURN(0);
+       }
+       spin_unlock(&imp->imp_lock);
+
+       req = ptlrpc_disconnect_prep_req(imp);
+       if (IS_ERR(req))
+               RETURN(PTR_ERR(req));
+
+       CDEBUG(D_INFO, "%s: disconnect\n", imp->imp_obd->obd_name);
+       req->rq_interpret_reply = ptlrpc_disconnect_idle_interpret;
+       ptlrpcd_add_req(req);
+
+       RETURN(0);
+}
+EXPORT_SYMBOL(ptlrpc_disconnect_and_idle_import);
+
 void ptlrpc_cleanup_imp(struct obd_import *imp)
 {
        ENTRY;
index 19b7d01..b704b9e 100644 (file)
@@ -92,11 +92,40 @@ int ptlrpc_obd_ping(struct obd_device *obd)
 }
 EXPORT_SYMBOL(ptlrpc_obd_ping);
 
+static bool ptlrpc_check_import_is_idle(struct obd_import *imp)
+{
+       struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+       time64_t now;
+
+       if (!imp->imp_idle_timeout)
+               return false;
+       /* 4 comes from:
+        *  - client_obd_setup() - hashed import
+        *  - ptlrpcd_alloc_work()
+        *  - ptlrpcd_alloc_work()
+        *  - ptlrpc_pinger_add_import
+        */
+       if (atomic_read(&imp->imp_refcount) > 4)
+               return false;
+       /* any lock increases ns_bref being a resource holder */
+       if (ns && atomic_read(&ns->ns_bref) > 0)
+               return false;
+
+       now = ktime_get_real_seconds();
+       if (now - imp->imp_last_reply_time < imp->imp_idle_timeout)
+               return false;
+
+       return true;
+}
+
 static int ptlrpc_ping(struct obd_import *imp)
 {
        struct ptlrpc_request   *req;
        ENTRY;
 
+       if (ptlrpc_check_import_is_idle(imp))
+               RETURN(ptlrpc_disconnect_and_idle_import(imp));
+
        req = ptlrpc_prep_ping(imp);
        if (req == NULL) {
                CERROR("OOM trying to ping %s->%s\n",
index 5453813..935c46c 100644 (file)
@@ -837,7 +837,7 @@ test_22() {
        fi
        mount_client $MOUNT || error "mount_client $MOUNT failed"
        wait_osc_import_state mds ost FULL
-       wait_osc_import_state client ost FULL
+       wait_osc_import_ready client ost
        check_mount || error "check_mount failed"
        pass
 
@@ -3354,7 +3354,7 @@ test_46a() {
        # wait until osts in sync
        for (( i=2; i<=$OSTCOUNT; i++ )); do
            wait_osc_import_state mds ost$i FULL
-           wait_osc_import_state client ost$i FULL
+           wait_osc_import_ready client ost$i
        done
 
        #second client see all ost's
@@ -3548,7 +3548,7 @@ lazystatfs() {
        [ $RC1 -ne 0 ] && log "lazystatfs multiop failed"
        wait $PID || { RC1=$?; log "multiop return error "; }
 
-       $LFS df &
+       $LFS df -l &
        PID=$!
        sleep 5
        kill -s 0 $PID
@@ -3720,7 +3720,7 @@ test_50g() {
        setup
        start_ost2 || error "Unable to start OST2"
         wait_osc_import_state mds ost2 FULL
-        wait_osc_import_state client ost2 FULL
+       wait_osc_import_ready client ost2
 
        local PARAM="${FSNAME}-OST0001.osc.active"
 
index 758d7de..51f0353 100755 (executable)
@@ -24,6 +24,7 @@ RUNTESTS_SRC=${RUNTESTS_SRC:-"/etc /bin"}
 
 check_and_setup_lustre
 test_1() {
+sleep 5 # let MDS refresh aggregated statfs
 # Include some extra space for the status file
 USED=$(df -P $DIR | awk '{ print $3 }' | tail -n 1)
 
@@ -125,6 +126,7 @@ $RMDIRMANY $DST/d 100 || error "$RMDIRMANY cleanup failed"
 log "done"
 
 wait_delete_completed
+sleep 5 # let MDS refresh aggregated statfs
 NOWUSED=$(($(df -P $DIR | awk '{ print $3 }' | tail -n 1)))
 if [ $(expr $NOWUSED - $USED) -gt 1024 ]; then
        error "Space not all freed: now ${NOWUSED}kB, was ${USED}kB."
index da9b067..1ac707d 100644 (file)
@@ -64,7 +64,7 @@ stop_osts() {
        done
 
        for idx in "$@"; do
-               wait_osc_import_state client ost$idx DISCONN
+               wait_osc_import_state client ost$idx "\(DISCONN\|IDLE\)"
        done
 }
 
index 515225e..bf4956f 100755 (executable)
@@ -7180,29 +7180,6 @@ test_77g() { # bug 10889
 }
 run_test 77g "checksum error on OST write, read"
 
-test_77j() { # bug 13805
-       [ $PARALLEL == "yes" ] && skip "skip parallel run"
-       $GSS && skip_env "could not run with gss"
-
-       #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
-       lctl set_param fail_loc=0x40c
-       remount_client $MOUNT
-       lctl set_param fail_loc=0
-       # wait async osc connect to finish and reflect updated state value
-       local i
-       for (( i=0; i < OSTCOUNT; i++ )) ; do
-               wait_osc_import_state client ost$((i+1)) FULL
-       done
-
-       for VALUE in $(lctl get_param osc.*osc-[^mM]*.checksum_type); do
-               PARAM=$(echo ${VALUE[0]} | cut -d "=" -f1)
-               algo=$(lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g')
-               [ "$algo" = "adler" ] || error "algo set to $algo instead of adler"
-       done
-       remount_client $MOUNT
-}
-run_test 77j "client only supporting ADLER32"
-
 test_77k() { # LU-10906
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
        $GSS && skip_env "could not run with gss"
index 29129ea..53499c2 100755 (executable)
@@ -1144,7 +1144,7 @@ cleanup_34() {
        do_nodes $(comma_list $(osts_nodes)) \
                "lctl set_param -n fail_loc=0 2>/dev/null || true"
        for i in $(seq $OSTCOUNT); do
-               wait_osc_import_state client ost$i FULL
+               wait_osc_import_ready client ost$i
        done
 }
 
index e5ffa49..834d243 100755 (executable)
@@ -3379,7 +3379,9 @@ fail() {
        local clients=${CLIENTS:-$HOSTNAME}
 
        facet_failover $* || error "failover: $?"
-       wait_clients_import_state "$clients" "$facets" FULL
+       # to initiate all OSC idling connections
+       clients_up
+       wait_clients_import_state "$clients" "$facets" "\(FULL\|IDLE\)"
        clients_up || error "post-failover stat: $?"
 }
 
@@ -6116,6 +6118,7 @@ check_grant() {
 
        # sync all the data and make sure no pending data on server
        do_nodes $clients sync
+       clients_up # initiate all idling connections
 
        # get client grant
        client_grant=$(do_nodes $clients \
@@ -6690,7 +6693,7 @@ calc_sum () {
 }
 
 calc_osc_kbytes () {
-       df $MOUNT > /dev/null
+       $LFS df $MOUNT > /dev/null
        $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
 }
 
@@ -6828,7 +6831,7 @@ _wait_import_state () {
     local i=0
 
        CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
-    while [ "${CONN_STATE}" != "${expected}" ]; do
+    while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
         if [ "${expected}" == "DISCONN" ]; then
             # for disconn we can check after proc entry is removed
             [ "x${CONN_STATE}" == "x" ] && return 0
@@ -6973,6 +6976,10 @@ wait_osc_import_state() {
        fi
 }
 
+wait_osc_import_ready() {
+       wait_osc_import_state $1 $2 "\(FULL\|IDLE\)"
+}
+
 _wait_mgc_import_state() {
        local facet=$1
        local expected=$2
@@ -7035,7 +7042,7 @@ wait_dne_interconnect() {
 
        if [ $MDSCOUNT -gt 1 ]; then
                for num in $(seq $MDSCOUNT); do
-                       wait_osc_import_state mds mds$num FULL
+                       wait_osc_import_ready mds mds$num
                done
        fi
 }
@@ -7088,7 +7095,7 @@ wait_clients_import_state () {
                local params=$(expand_list $params $proc_path)
        done
 
-       if ! do_rpc_nodes "$list" wait_import_state_mount $expected $params;
+       if ! do_rpc_nodes "$list" wait_import_state_mount "$expected" $params;
        then
                error "import is not in ${expected} state"
                return 1