After the OST or MDT was restarted, the lwp reconnection can be
failed for -EALREADY because the connect count in the connecttion
request is less then the value saved in the corresponding export
at MDT0000, which could cause the system hang.
The patch also changes lustre_lwp_connect to use OBD_CONNECT_MDS_MDS
flag only when the connection is between MDTs.
Lustre-change: https://review.whamcloud.com/32536
Lustre-commit:
0814d5077343953115f50982a2e93cebb29bda68
Change-Id: I9ae7b4faadc65fdaa78458a06315b1739d144feb
Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Minh Diep <mdiep@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/33977
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
* cause namespace inconsistency */
spin_lock(&export->exp_lock);
export->exp_connecting = 1;
* cause namespace inconsistency */
spin_lock(&export->exp_lock);
export->exp_connecting = 1;
+ export->exp_conn_cnt = 0;
spin_unlock(&export->exp_lock);
conn.cookie = export->exp_handle.h_cookie;
rc = EALREADY;
spin_unlock(&export->exp_lock);
conn.cookie = export->exp_handle.h_cookie;
rc = EALREADY;
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid),
atomic_read(&export->exp_refcount));
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid),
atomic_read(&export->exp_refcount));
- GOTO(out, rc = -EBUSY);
- } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) {
- if (!strstr(cluuid.uuid, "mdt"))
- LCONSOLE_WARN("%s: Rejecting reconnect from the "
- "known client %s (at %s) because it "
- "is indicating it is a new client",
- target->obd_name, cluuid.uuid,
- libcfs_nid2str(req->rq_peer.nid));
- GOTO(out, rc = -EALREADY);
- } else {
- OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
- }
+ GOTO(out, rc = -EBUSY);
+ } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 &&
+ rc != EALREADY) {
+ if (!strstr(cluuid.uuid, "mdt"))
+ LCONSOLE_WARN("%s: Rejecting reconnect from the "
+ "known client %s (at %s) because it "
+ "is indicating it is a new client",
+ target->obd_name, cluuid.uuid,
+ libcfs_nid2str(req->rq_peer.nid));
+ GOTO(out, rc = -EALREADY);
+ } else {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout);
+ }
if (rc < 0) {
GOTO(out, rc);
if (rc < 0) {
GOTO(out, rc);
}
EXPORT_SYMBOL(lustre_notify_lwp_list);
}
EXPORT_SYMBOL(lustre_notify_lwp_list);
-static int lustre_lwp_connect(struct obd_device *lwp)
+static int lustre_lwp_connect(struct obd_device *lwp, bool is_mdt)
{
struct lu_env env;
struct lu_context session_ctx;
{
struct lu_env env;
struct lu_context session_ctx;
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
data->ocd_version = LUSTRE_VERSION_CODE;
data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
data->ocd_version = LUSTRE_VERSION_CODE;
- data->ocd_connect_flags |= OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID |
- OBD_CONNECT_AT | OBD_CONNECT_LRU_RESIZE |
- OBD_CONNECT_FULL20 | OBD_CONNECT_LVB_TYPE |
- OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_LFSCK |
- OBD_CONNECT_BULK_MBITS;
+ data->ocd_connect_flags |= OBD_CONNECT_FID | OBD_CONNECT_AT |
+ OBD_CONNECT_LRU_RESIZE | OBD_CONNECT_FULL20 |
+ OBD_CONNECT_LVB_TYPE | OBD_CONNECT_LIGHTWEIGHT |
+ OBD_CONNECT_LFSCK | OBD_CONNECT_BULK_MBITS;
+
+ if (is_mdt)
+ data->ocd_connect_flags |= OBD_CONNECT_MDS_MDS;
+
OBD_ALLOC_PTR(uuid);
if (uuid == NULL)
GOTO(out, rc = -ENOMEM);
OBD_ALLOC_PTR(uuid);
if (uuid == NULL)
GOTO(out, rc = -ENOMEM);
obd = class_name2obd(lwpname);
LASSERT(obd != NULL);
obd = class_name2obd(lwpname);
LASSERT(obd != NULL);
- rc = lustre_lwp_connect(obd);
+ rc = lustre_lwp_connect(obd, strstr(lsi->lsi_svname, "-MDT") != NULL);
if (rc == 0) {
obd->u.cli.cl_max_mds_easize = MAX_MD_SIZE;
spin_lock(&lsi->lsi_lwp_lock);
if (rc == 0) {
obd->u.cli.cl_max_mds_easize = MAX_MD_SIZE;
spin_lock(&lsi->lsi_lwp_lock);