From: Alex Zhuravlev Date: Mon, 28 Sep 2015 13:50:15 +0000 (+0300) Subject: LU-7236 ptlrpc: idle connections can disconnect X-Git-Tag: 2.11.53~48 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=5a6ceb664f07812c351786c1043da71ff5027f8c LU-7236 ptlrpc: idle connections can disconnect - when new request is being allocated ptlrpc initiates connection if it's not connected yet - if the import is idle (no locks, no active RPCs, no non-PING reply for last osc_idle_timeout seconds), then pinger tries to disconnect asynchronously - currently only client-to-OST connections can be idle - lctl set_param osc.*.idle_timeout=N controls new feature: N=0 - disable N>0 - seconds to idle before disconnect - lctl set_param osc.*.idle_connect=N to reconnect if idle (N is positive number) - OSC module parameter osc_idle_timeout controls default idle timeout and set to 20 seconds by default Change-Id: I4b90eb5209a0b0e62d85fd55ad6e9cab8c03fd14 Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/16682 Tested-by: Jenkins Reviewed-by: Dmitry Eremin Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: James Simmons --- diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 9191fa1..dfa22bb 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -101,19 +101,21 @@ enum lustre_imp_state { LUSTRE_IMP_RECOVER = 8, LUSTRE_IMP_FULL = 9, LUSTRE_IMP_EVICTED = 10, + LUSTRE_IMP_IDLE = 11, + LUSTRE_IMP_LAST }; /** Returns test string representation of numeric import state \a state */ static inline char * ptlrpc_import_state_name(enum lustre_imp_state state) { - static char* import_state_names[] = { - "", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", - "RECOVER", "FULL", "EVICTED", - }; - - LASSERT (state <= LUSTRE_IMP_EVICTED); - return import_state_names[state]; + static char *import_state_names[] = { + "", "CLOSED", "NEW", "DISCONN", + "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", + "RECOVER", "FULL", "EVICTED", "IDLE", + }; + + LASSERT(state < LUSTRE_IMP_LAST); + return import_state_names[state]; } /** @@ -232,6 +234,8 @@ struct obd_import { int imp_state_hist_idx; /** Current import generation. Incremented on every reconnect */ int imp_generation; + /** Idle connection initiated at this generation */ + int imp_initiated_at; /** Incremented every time we send reconnection request */ __u32 imp_conn_cnt; /** @@ -303,6 +307,7 @@ struct obd_import { /* connected but not FULL yet */ imp_connected:1; __u32 imp_connect_op; + __u32 imp_idle_timeout; struct obd_connect_data imp_connect_data; __u64 imp_connect_flags_orig; __u64 imp_connect_flags2_orig; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 2d36764..3afe692 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2308,6 +2308,7 @@ void ptlrpc_hr_fini(void); int ptlrpc_connect_import(struct obd_import *imp); int ptlrpc_init_import(struct obd_import *imp); int ptlrpc_disconnect_import(struct obd_import *imp, int noclose); +int ptlrpc_disconnect_and_idle_import(struct obd_import *imp); int ptlrpc_import_recovery_state_machine(struct obd_import *imp); void deuuidify(char *uuid, const char *prefix, char **uuid_start, int *uuid_len); diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index d7a988f..2ca7b99 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -65,7 +65,8 @@ static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt) return maxbytes; spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_FULL && + if ((imp->imp_state == LUSTRE_IMP_FULL || + imp->imp_state == LUSTRE_IMP_IDLE) && (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) && imp->imp_connect_data.ocd_maxbytes > 0) maxbytes = imp->imp_connect_data.ocd_maxbytes; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index e906fc82..45c57fc 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -971,27 +971,30 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { case IOC_OBD_STATFS: { - struct obd_ioctl_data *data = karg; - struct obd_device *osc_obd; - struct obd_statfs stat_buf = {0}; - __u32 index; + struct obd_ioctl_data *data = karg; + struct obd_device *osc_obd; + struct obd_statfs stat_buf = {0}; + struct obd_import *imp; + __u32 index; __u32 flags; - memcpy(&index, data->ioc_inlbuf2, sizeof(index)); - if ((index >= count)) - RETURN(-ENODEV); + memcpy(&index, data->ioc_inlbuf2, sizeof(index)); + if ((index >= count)) + RETURN(-ENODEV); - if (!lov->lov_tgts[index]) - /* Try again with the next index */ - RETURN(-EAGAIN); - if (!lov->lov_tgts[index]->ltd_active) - RETURN(-ENODATA); + if (!lov->lov_tgts[index]) + /* Try again with the next index */ + RETURN(-EAGAIN); + imp = lov->lov_tgts[index]->ltd_exp->exp_obd->u.cli.cl_import; + if (!lov->lov_tgts[index]->ltd_active && + imp->imp_state != LUSTRE_IMP_IDLE) + RETURN(-ENODATA); - osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); - if (!osc_obd) - RETURN(-EINVAL); + osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); + if (!osc_obd) + RETURN(-EINVAL); - /* copy UUID */ + /* copy UUID */ if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), min_t(unsigned long, data->ioc_plen2, sizeof(struct obd_uuid)))) diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index e7dda50..a7baa12 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -106,6 +106,7 @@ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx) wait_queue_head_t waitq; struct l_wait_info lwi; struct lov_tgt_desc *tgt; + struct obd_import *imp = NULL; int rc = 0; mutex_lock(&lov->lov_lock); @@ -118,7 +119,11 @@ static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx) if (likely(tgt->ltd_active)) GOTO(out, rc = 1); - if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried) + if (tgt->ltd_exp) + imp = class_exp2cliimp(tgt->ltd_exp); + if (imp && imp->imp_connect_tried) + GOTO(out, rc = 0); + if (imp && imp->imp_state == LUSTRE_IMP_IDLE) GOTO(out, rc = 0); mutex_unlock(&lov->lov_lock); @@ -322,47 +327,53 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, /* We only get block data from the OBD */ for (i = 0; i < lov->desc.ld_tgt_count; i++) { + struct lov_tgt_desc *ltd = lov->lov_tgts[i]; struct lov_request *req; - if (lov->lov_tgts[i] == NULL || - (oinfo->oi_flags & OBD_STATFS_NODELAY && - !lov->lov_tgts[i]->ltd_active)) { + if (ltd == NULL) { CDEBUG(D_HA, "lov idx %d inactive\n", i); continue; } /* skip targets that have been explicitely disabled by the * administrator */ - if (!lov->lov_tgts[i]->ltd_exp) { + if (!ltd->ltd_exp) { CDEBUG(D_HA, "lov idx %d administratively disabled\n", i); continue; } - if (!lov->lov_tgts[i]->ltd_active) + if (oinfo->oi_flags & OBD_STATFS_NODELAY && + class_exp2cliimp(ltd->ltd_exp)->imp_state != + LUSTRE_IMP_IDLE && !ltd->ltd_active) { + CDEBUG(D_HA, "lov idx %d inactive\n", i); + continue; + } + + if (!ltd->ltd_active) lov_check_and_wait_active(lov, i); OBD_ALLOC(req, sizeof(*req)); if (req == NULL) GOTO(out_set, rc = -ENOMEM); - OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs)); - if (req->rq_oi.oi_osfs == NULL) { - OBD_FREE(req, sizeof(*req)); - GOTO(out_set, rc = -ENOMEM); - } + OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs)); + if (req->rq_oi.oi_osfs == NULL) { + OBD_FREE(req, sizeof(*req)); + GOTO(out_set, rc = -ENOMEM); + } - req->rq_idx = i; - req->rq_oi.oi_cb_up = cb_statfs_update; - req->rq_oi.oi_flags = oinfo->oi_flags; + req->rq_idx = i; + req->rq_oi.oi_cb_up = cb_statfs_update; + req->rq_oi.oi_flags = oinfo->oi_flags; - lov_set_add_req(req, set); - } - if (!set->set_count) - GOTO(out_set, rc = -EIO); - *reqset = set; - RETURN(rc); + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); out_set: - lov_fini_statfs_set(set); - RETURN(rc); + lov_fini_statfs_set(set); + RETURN(rc); } diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 9337450..3c0e138 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -608,6 +608,67 @@ static int osc_unstable_stats_seq_show(struct seq_file *m, void *v) } LPROC_SEQ_FOPS_RO(osc_unstable_stats); +static int osc_idle_timeout_seq_show(struct seq_file *m, void *v) +{ + struct obd_device *obd = m->private; + struct client_obd *cli = &obd->u.cli; + + seq_printf(m, "%u\n", cli->cl_import->imp_idle_timeout); + return 0; +} + +static ssize_t osc_idle_timeout_seq_write(struct file *f, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *dev = ((struct seq_file *)f->private_data)->private; + struct client_obd *cli = &dev->u.cli; + struct ptlrpc_request *req; + __s64 val; + int rc; + + rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1'); + if (rc) + return rc; + if (val < 0 || val > 1) + return -ERANGE; + + cli->cl_import->imp_idle_timeout = val; + + /* to initiate the connection if it's in IDLE state */ + if (!val) { + req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_STATFS); + if (req != NULL) + ptlrpc_req_finished(req); + } + + return count; +} +LPROC_SEQ_FOPS(osc_idle_timeout); + +static int osc_idle_connect_seq_show(struct seq_file *m, void *v) +{ + return 0; +} + +static ssize_t osc_idle_connect_seq_write(struct file *f, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct obd_device *dev = ((struct seq_file *)f->private_data)->private; + struct client_obd *cli = &dev->u.cli; + struct ptlrpc_request *req; + + /* to initiate the connection if it's in IDLE state */ + req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_STATFS); + if (req != NULL) + ptlrpc_req_finished(req); + ptlrpc_pinger_force(cli->cl_import); + + return count; +} +LPROC_SEQ_FOPS(osc_idle_connect); + LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags); LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid); LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts); @@ -639,6 +700,10 @@ struct lprocfs_vars lprocfs_osc_obd_vars[] = { .fops = &osc_pinger_recov_fops }, { .name = "unstable_stats", .fops = &osc_unstable_stats_fops }, + { .name = "idle_timeout", + .fops = &osc_idle_timeout_fops }, + { .name = "idle_connect", + .fops = &osc_idle_connect_fops }, { NULL } }; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 62dd791..183e8f4 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -56,6 +56,9 @@ struct ptlrpc_request_pool *osc_rq_pool; static unsigned int osc_reqpool_mem_max = 5; module_param(osc_reqpool_mem_max, uint, 0444); +static int osc_idle_timeout = 20; +module_param(osc_idle_timeout, uint, 0644); + #define osc_grant_args osc_brw_async_args struct osc_setattr_args { @@ -2648,7 +2651,7 @@ static int osc_statfs_async(struct obd_export *exp, struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req; struct osc_async_args *aa; - int rc; + int rc; ENTRY; /* We could possibly pass max_age in the request (as an absolute @@ -2666,15 +2669,15 @@ static int osc_statfs_async(struct obd_export *exp, ptlrpc_request_free(req); RETURN(rc); } - ptlrpc_request_set_replen(req); - req->rq_request_portal = OST_CREATE_PORTAL; - ptlrpc_at_set_req_timeout(req); + ptlrpc_request_set_replen(req); + req->rq_request_portal = OST_CREATE_PORTAL; + ptlrpc_at_set_req_timeout(req); - if (oinfo->oi_flags & OBD_STATFS_NODELAY) { - /* procfs requests not want stat in wait for avoid deadlock */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } + if (oinfo->oi_flags & OBD_STATFS_NODELAY) { + /* procfs requests not want stat in wait for avoid deadlock */ + req->rq_no_resend = 1; + req->rq_no_delay = 1; + } req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret; CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); @@ -2688,12 +2691,13 @@ static int osc_statfs_async(struct obd_export *exp, static int osc_statfs(const struct lu_env *env, struct obd_export *exp, struct obd_statfs *osfs, time64_t max_age, __u32 flags) { - struct obd_device *obd = class_exp2obd(exp); - struct obd_statfs *msfs; - struct ptlrpc_request *req; - struct obd_import *imp = NULL; - int rc; - ENTRY; + struct obd_device *obd = class_exp2obd(exp); + struct obd_statfs *msfs; + struct ptlrpc_request *req; + struct obd_import *imp = NULL; + int rc; + ENTRY; + /*Since the request might also come from lprocfs, so we need *sync this with client_disconnect_export Bug15684*/ @@ -2704,49 +2708,48 @@ static int osc_statfs(const struct lu_env *env, struct obd_export *exp, if (!imp) RETURN(-ENODEV); - /* We could possibly pass max_age in the request (as an absolute - * timestamp or a "seconds.usec ago") so the target can avoid doing - * extra calls into the filesystem if that isn't necessary (e.g. - * during mount that would help a bit). Having relative timestamps - * is not so great if request processing is slow, while absolute - * timestamps are not ideal because they need time synchronization. */ - req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS); + /* We could possibly pass max_age in the request (as an absolute + * timestamp or a "seconds.usec ago") so the target can avoid doing + * extra calls into the filesystem if that isn't necessary (e.g. + * during mount that would help a bit). Having relative timestamps + * is not so great if request processing is slow, while absolute + * timestamps are not ideal because they need time synchronization. */ + req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS); - class_import_put(imp); + class_import_put(imp); - if (req == NULL) - RETURN(-ENOMEM); + if (req == NULL) + RETURN(-ENOMEM); - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS); - if (rc) { - ptlrpc_request_free(req); - RETURN(rc); - } - ptlrpc_request_set_replen(req); - req->rq_request_portal = OST_CREATE_PORTAL; - ptlrpc_at_set_req_timeout(req); + rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } + ptlrpc_request_set_replen(req); + req->rq_request_portal = OST_CREATE_PORTAL; + ptlrpc_at_set_req_timeout(req); - if (flags & OBD_STATFS_NODELAY) { - /* procfs requests not want stat in wait for avoid deadlock */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } + if (flags & OBD_STATFS_NODELAY) { + /* procfs requests not want stat in wait for avoid deadlock */ + req->rq_no_resend = 1; + req->rq_no_delay = 1; + } - rc = ptlrpc_queue_wait(req); - if (rc) - GOTO(out, rc); + rc = ptlrpc_queue_wait(req); + if (rc) + GOTO(out, rc); - msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); - if (msfs == NULL) { - GOTO(out, rc = -EPROTO); - } + msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); + if (msfs == NULL) + GOTO(out, rc = -EPROTO); - *osfs = *msfs; + *osfs = *msfs; - EXIT; - out: - ptlrpc_req_finished(req); - return rc; + EXIT; +out: + ptlrpc_req_finished(req); + return rc; } static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, @@ -3194,6 +3197,7 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) spin_lock(&osc_shrink_lock); list_add_tail(&cli->cl_shrink_list, &osc_shrink_list); spin_unlock(&osc_shrink_lock); + cli->cl_import->imp_idle_timeout = osc_idle_timeout; RETURN(0); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 038ff31..d0e8d78 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -865,10 +865,31 @@ ptlrpc_request_alloc_internal(struct obd_import *imp, const struct req_format *format) { struct ptlrpc_request *request; + int connect = 0; - request = __ptlrpc_request_alloc(imp, pool); - if (request == NULL) - return NULL; + if (unlikely(imp->imp_state == LUSTRE_IMP_IDLE)) { + int rc; + CDEBUG(D_INFO, "%s: connect at new req\n", + imp->imp_obd->obd_name); + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_IDLE) { + imp->imp_generation++; + imp->imp_initiated_at = imp->imp_generation; + imp->imp_state = LUSTRE_IMP_NEW; + connect = 1; + } + spin_unlock(&imp->imp_lock); + if (connect) { + rc = ptlrpc_connect_import(imp); + if (rc < 0) + return NULL; + ptlrpc_pinger_add_import(imp); + } + } + + request = __ptlrpc_request_alloc(imp, pool); + if (request == NULL) + return NULL; req_capsule_init(&request->rq_pill, request, RCL_CLIENT); req_capsule_set(&request->rq_pill, format); @@ -1058,6 +1079,7 @@ EXPORT_SYMBOL(ptlrpc_set_destroy); void ptlrpc_set_add_req(struct ptlrpc_request_set *set, struct ptlrpc_request *req) { + LASSERT(req->rq_import->imp_state != LUSTRE_IMP_IDLE); LASSERT(list_empty(&req->rq_set_chain)); if (req->rq_allow_intr) @@ -1169,7 +1191,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, if (atomic_read(&imp->imp_inval_count) != 0) { DEBUG_REQ(D_ERROR, req, "invalidate in flight"); *status = -EIO; - } else if (req->rq_no_delay) { + } else if (req->rq_no_delay && + imp->imp_generation != imp->imp_initiated_at) { + /* ignore nodelay for requests initiating connections */ *status = -EWOULDBLOCK; } else if (req->rq_allow_replay && (imp->imp_state == LUSTRE_IMP_REPLAY || @@ -1852,8 +1876,11 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); } + /* ignore on just initiated connections */ if (ptlrpc_no_resend(req) && - !req->rq_wait_ctx) { + !req->rq_wait_ctx && + imp->imp_generation != + imp->imp_initiated_at) { req->rq_status = -ENOTCONN; ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 7027114..443bc32 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -161,12 +161,13 @@ void reply_in_callback(struct lnet_event *ev) ev->mlength, ev->offset, req->rq_replen); } - req->rq_import->imp_last_reply_time = ktime_get_real_seconds(); + if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING) + req->rq_import->imp_last_reply_time = ktime_get_real_seconds(); out_wake: - /* NB don't unlock till after wakeup; req can disappear under us - * since we don't have our own ref */ - ptlrpc_client_wake_req(req); + /* NB don't unlock till after wakeup; req can disappear under us + * since we don't have our own ref */ + ptlrpc_client_wake_req(req); spin_unlock(&req->rq_lock); EXIT; } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 658f339..82fe902 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -968,6 +968,21 @@ static int ptlrpc_connect_interpret(const struct lu_env *env, } if (rc) { + struct ptlrpc_request *free_req; + struct ptlrpc_request *tmp; + + /* abort all delayed requests initiated connection */ + list_for_each_entry_safe(free_req, tmp, &imp->imp_delayed_list, + rq_list) { + spin_lock(&free_req->rq_lock); + if (free_req->rq_no_resend) { + free_req->rq_err = 1; + free_req->rq_status = -EIO; + ptlrpc_client_wake_req(free_req); + } + spin_unlock(&free_req->rq_lock); + } + /* if this reconnect to busy export - not need select new target * for connecting*/ imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc); @@ -1528,15 +1543,12 @@ out: RETURN(rc); } -int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) +static struct ptlrpc_request *ptlrpc_disconnect_prep_req(struct obd_import *imp) { struct ptlrpc_request *req; int rq_opc, rc = 0; ENTRY; - if (imp->imp_obd->obd_force) - GOTO(set_state, rc); - switch (imp->imp_connect_op) { case OST_CONNECT: rq_opc = OST_DISCONNECT; @@ -1553,9 +1565,46 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) "(connect_op %d): rc = %d\n", imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), imp->imp_connect_op, rc); - RETURN(rc); + RETURN(ERR_PTR(rc)); } + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT, + LUSTRE_OBD_VERSION, rq_opc); + if (req == NULL) + RETURN(NULL); + + /* We are disconnecting, do not retry a failed DISCONNECT rpc if + * it fails. We can get through the above with a down server + * if the client doesn't know the server is gone yet. */ + req->rq_no_resend = 1; + + /* We want client umounts to happen quickly, no matter the + server state... */ + req->rq_timeout = min_t(int, req->rq_timeout, + INITIAL_CONNECT_TIMEOUT); + + IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING); + req->rq_send_state = LUSTRE_IMP_CONNECTING; + ptlrpc_request_set_replen(req); + + RETURN(req); +} + +int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) +{ + struct ptlrpc_request *req; + int rc = 0; + ENTRY; + + if (imp->imp_obd->obd_force) + GOTO(set_state, rc); + + /* probably the import has been disconnected already being idle */ + spin_lock(&imp->imp_lock); + if (imp->imp_state == LUSTRE_IMP_IDLE) + GOTO(out, rc); + spin_unlock(&imp->imp_lock); + if (ptlrpc_import_in_recovery(imp)) { struct l_wait_info lwi; long timeout_jiffies; @@ -1588,25 +1637,11 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) GOTO(out, rc); spin_unlock(&imp->imp_lock); - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT, - LUSTRE_OBD_VERSION, rq_opc); - if (req) { - /* We are disconnecting, do not retry a failed DISCONNECT rpc if - * it fails. We can get through the above with a down server - * if the client doesn't know the server is gone yet. */ - req->rq_no_resend = 1; - - /* We want client umounts to happen quickly, no matter the - server state... */ - req->rq_timeout = min_t(int, req->rq_timeout, - INITIAL_CONNECT_TIMEOUT); - - IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING); - req->rq_send_state = LUSTRE_IMP_CONNECTING; - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); - } + req = ptlrpc_disconnect_prep_req(imp); + if (IS_ERR(req)) + GOTO(set_state, rc = PTR_ERR(req)); + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); set_state: spin_lock(&imp->imp_lock); @@ -1624,6 +1659,51 @@ out: } EXPORT_SYMBOL(ptlrpc_disconnect_import); +static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env, + struct ptlrpc_request *req, + void *data, int rc) +{ + struct obd_import *imp = req->rq_import; + + LASSERT(imp->imp_state == LUSTRE_IMP_CONNECTING); + spin_lock(&imp->imp_lock); + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_IDLE); + memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); + spin_unlock(&imp->imp_lock); + + return 0; +} + +int ptlrpc_disconnect_and_idle_import(struct obd_import *imp) +{ + struct ptlrpc_request *req; + ENTRY; + + if (imp->imp_obd->obd_force) + RETURN(0); + + if (ptlrpc_import_in_recovery(imp)) + RETURN(0); + + spin_lock(&imp->imp_lock); + if (imp->imp_state != LUSTRE_IMP_FULL) { + spin_unlock(&imp->imp_lock); + RETURN(0); + } + spin_unlock(&imp->imp_lock); + + req = ptlrpc_disconnect_prep_req(imp); + if (IS_ERR(req)) + RETURN(PTR_ERR(req)); + + CDEBUG(D_INFO, "%s: disconnect\n", imp->imp_obd->obd_name); + req->rq_interpret_reply = ptlrpc_disconnect_idle_interpret; + ptlrpcd_add_req(req); + + RETURN(0); +} +EXPORT_SYMBOL(ptlrpc_disconnect_and_idle_import); + void ptlrpc_cleanup_imp(struct obd_import *imp) { ENTRY; diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 19b7d01..b704b9e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -92,11 +92,40 @@ int ptlrpc_obd_ping(struct obd_device *obd) } EXPORT_SYMBOL(ptlrpc_obd_ping); +static bool ptlrpc_check_import_is_idle(struct obd_import *imp) +{ + struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; + time64_t now; + + if (!imp->imp_idle_timeout) + return false; + /* 4 comes from: + * - client_obd_setup() - hashed import + * - ptlrpcd_alloc_work() + * - ptlrpcd_alloc_work() + * - ptlrpc_pinger_add_import + */ + if (atomic_read(&imp->imp_refcount) > 4) + return false; + /* any lock increases ns_bref being a resource holder */ + if (ns && atomic_read(&ns->ns_bref) > 0) + return false; + + now = ktime_get_real_seconds(); + if (now - imp->imp_last_reply_time < imp->imp_idle_timeout) + return false; + + return true; +} + static int ptlrpc_ping(struct obd_import *imp) { struct ptlrpc_request *req; ENTRY; + if (ptlrpc_check_import_is_idle(imp)) + RETURN(ptlrpc_disconnect_and_idle_import(imp)); + req = ptlrpc_prep_ping(imp); if (req == NULL) { CERROR("OOM trying to ping %s->%s\n", diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 5453813..935c46c 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -837,7 +837,7 @@ test_22() { fi mount_client $MOUNT || error "mount_client $MOUNT failed" wait_osc_import_state mds ost FULL - wait_osc_import_state client ost FULL + wait_osc_import_ready client ost check_mount || error "check_mount failed" pass @@ -3354,7 +3354,7 @@ test_46a() { # wait until osts in sync for (( i=2; i<=$OSTCOUNT; i++ )); do wait_osc_import_state mds ost$i FULL - wait_osc_import_state client ost$i FULL + wait_osc_import_ready client ost$i done #second client see all ost's @@ -3548,7 +3548,7 @@ lazystatfs() { [ $RC1 -ne 0 ] && log "lazystatfs multiop failed" wait $PID || { RC1=$?; log "multiop return error "; } - $LFS df & + $LFS df -l & PID=$! sleep 5 kill -s 0 $PID @@ -3720,7 +3720,7 @@ test_50g() { setup start_ost2 || error "Unable to start OST2" wait_osc_import_state mds ost2 FULL - wait_osc_import_state client ost2 FULL + wait_osc_import_ready client ost2 local PARAM="${FSNAME}-OST0001.osc.active" diff --git a/lustre/tests/runtests b/lustre/tests/runtests index 758d7de..51f0353 100755 --- a/lustre/tests/runtests +++ b/lustre/tests/runtests @@ -24,6 +24,7 @@ RUNTESTS_SRC=${RUNTESTS_SRC:-"/etc /bin"} check_and_setup_lustre test_1() { +sleep 5 # let MDS refresh aggregated statfs # Include some extra space for the status file USED=$(df -P $DIR | awk '{ print $3 }' | tail -n 1) @@ -125,6 +126,7 @@ $RMDIRMANY $DST/d 100 || error "$RMDIRMANY cleanup failed" log "done" wait_delete_completed +sleep 5 # let MDS refresh aggregated statfs NOWUSED=$(($(df -P $DIR | awk '{ print $3 }' | tail -n 1))) if [ $(expr $NOWUSED - $USED) -gt 1024 ]; then error "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index da9b067..1ac707d 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -64,7 +64,7 @@ stop_osts() { done for idx in "$@"; do - wait_osc_import_state client ost$idx DISCONN + wait_osc_import_state client ost$idx "\(DISCONN\|IDLE\)" done } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 515225e..bf4956f 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7180,29 +7180,6 @@ test_77g() { # bug 10889 } run_test 77g "checksum error on OST write, read" -test_77j() { # bug 13805 - [ $PARALLEL == "yes" ] && skip "skip parallel run" - $GSS && skip_env "could not run with gss" - - #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c - lctl set_param fail_loc=0x40c - remount_client $MOUNT - lctl set_param fail_loc=0 - # wait async osc connect to finish and reflect updated state value - local i - for (( i=0; i < OSTCOUNT; i++ )) ; do - wait_osc_import_state client ost$((i+1)) FULL - done - - for VALUE in $(lctl get_param osc.*osc-[^mM]*.checksum_type); do - PARAM=$(echo ${VALUE[0]} | cut -d "=" -f1) - algo=$(lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g') - [ "$algo" = "adler" ] || error "algo set to $algo instead of adler" - done - remount_client $MOUNT -} -run_test 77j "client only supporting ADLER32" - test_77k() { # LU-10906 [ $PARALLEL == "yes" ] && skip "skip parallel run" $GSS && skip_env "could not run with gss" diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 29129ea..53499c2 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -1144,7 +1144,7 @@ cleanup_34() { do_nodes $(comma_list $(osts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" for i in $(seq $OSTCOUNT); do - wait_osc_import_state client ost$i FULL + wait_osc_import_ready client ost$i done } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index e5ffa49..834d243 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3379,7 +3379,9 @@ fail() { local clients=${CLIENTS:-$HOSTNAME} facet_failover $* || error "failover: $?" - wait_clients_import_state "$clients" "$facets" FULL + # to initiate all OSC idling connections + clients_up + wait_clients_import_state "$clients" "$facets" "\(FULL\|IDLE\)" clients_up || error "post-failover stat: $?" } @@ -6116,6 +6118,7 @@ check_grant() { # sync all the data and make sure no pending data on server do_nodes $clients sync + clients_up # initiate all idling connections # get client grant client_grant=$(do_nodes $clients \ @@ -6690,7 +6693,7 @@ calc_sum () { } calc_osc_kbytes () { - df $MOUNT > /dev/null + $LFS df $MOUNT > /dev/null $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum } @@ -6828,7 +6831,7 @@ _wait_import_state () { local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq) - while [ "${CONN_STATE}" != "${expected}" ]; do + while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do if [ "${expected}" == "DISCONN" ]; then # for disconn we can check after proc entry is removed [ "x${CONN_STATE}" == "x" ] && return 0 @@ -6973,6 +6976,10 @@ wait_osc_import_state() { fi } +wait_osc_import_ready() { + wait_osc_import_state $1 $2 "\(FULL\|IDLE\)" +} + _wait_mgc_import_state() { local facet=$1 local expected=$2 @@ -7035,7 +7042,7 @@ wait_dne_interconnect() { if [ $MDSCOUNT -gt 1 ]; then for num in $(seq $MDSCOUNT); do - wait_osc_import_state mds mds$num FULL + wait_osc_import_ready mds mds$num done fi } @@ -7088,7 +7095,7 @@ wait_clients_import_state () { local params=$(expand_list $params $proc_path) done - if ! do_rpc_nodes "$list" wait_import_state_mount $expected $params; + if ! do_rpc_nodes "$list" wait_import_state_mount "$expected" $params; then error "import is not in ${expected} state" return 1