There are two problems:
See following assertion:
lod_add_device() lustre-OSTe42a-osc-MDT0000:
can't set up pool, failed with -12
osp_disconnect() ASSERTION( imp != ((void *)0) ) failed:
osp_disconnect() LBUG
CPU: 1 PID: 10059 Comm: llog_process_th
Problem is obd_disconnect() will cleanup @imp and set NULL.
->osp_obd_disconnect
->class_manual_cleanup
->class_process_config
->class_cleanup
->obd_precleanup
->osp_device_fini
->client_obd_cleanup
While ldo_process_config() will try to access @imp again:
->ldo_process_config
->osp_shutdown
->osp_disconnect
->LASSERT(imp != NULL)
Another problem is if we failed before obd_connect().
we will hang on with mount:
->ldo_process_config
->osp_shutdown
->osp_disconnect
->ptlrpc_disconnect_import
->rc = l_wait_event(imp->imp_recovery_waitq,
!ptlrpc_import_in_recovery(imp), &lwi);
Since connect is not called, imp state will stay LUSTRE_IMP_NEW.
Fix this by check whether we are in recovery properly, only consider
we are in recovery if we are in following states:
LUSTRE_IMP_CONNECTING = 4,
LUSTRE_IMP_REPLAY = 5,
LUSTRE_IMP_REPLAY_LOCKS = 6,
LUSTRE_IMP_REPLAY_WAIT = 7,
LUSTRE_IMP_RECOVER = 8,
Change-Id: I2113b95a421bae7117f3057d5f0fdf78db95caa3
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/32994
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Gu Zheng <gzheng@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
struct obd_uuid obd_uuid;
bool for_ost;
bool lock = false;
+ bool connected = false;
ENTRY;
CDEBUG(D_CONFIG, "osp:%s idx:%d gen:%d\n", osp, index, gen);
obd->obd_name, osp, rc);
GOTO(out_cleanup, rc);
}
+ connected = true;
/* Allocate ost descriptor and fill it */
OBD_ALLOC_PTR(tgt_desc);
if (!tgt_desc)
- GOTO(out_conn, rc = -ENOMEM);
+ GOTO(out_cleanup, rc = -ENOMEM);
tgt_desc->ltd_tgt = dt_dev;
tgt_desc->ltd_exp = exp;
}
out_desc:
OBD_FREE_PTR(tgt_desc);
-out_conn:
- obd_disconnect(exp);
out_cleanup:
/* XXX OSP needs us to send down LCFG_CLEANUP because it uses
* objects from the MDT stack. See LU-7184. */
lcfg->lcfg_command = LCFG_CLEANUP;
lu_dev->ld_ops->ldo_process_config(env, lu_dev, lcfg);
+ if (connected)
+ obd_disconnect(exp);
+
return rc;
}
int in_recovery = 1;
spin_lock(&imp->imp_lock);
- if (imp->imp_state == LUSTRE_IMP_FULL ||
- imp->imp_state == LUSTRE_IMP_CLOSED ||
- imp->imp_state == LUSTRE_IMP_DISCON ||
+ if (imp->imp_state <= LUSTRE_IMP_DISCON ||
+ imp->imp_state >= LUSTRE_IMP_FULL ||
imp->imp_obd->obd_no_recov)
in_recovery = 0;
spin_unlock(&imp->imp_lock);