When mdt_init0 fails it has to wait until zombie workqueue has all
disconnected exports destroyed before mdt_device_alloc will free the
mdt_device. Otherwise, zombie workqueue refers to freed mdt_device
via:
general protection fault: 0000 [#1] SMP
..
Workqueue: obd_zombid obd_zombie_exp_cull [obdclass]
..
[<
ffffffffc08829c5>] tgt_client_free+0x1e5/0x3c0 [ptlrpc]
[<
ffffffffc0ec2327>] mdt_destroy_export+0x57/0x200 [mdt]
[<
ffffffffc05bf20e>] class_export_destroy+0xee/0x490 [obdclass]
[<
ffffffffc05bf5c5>] obd_zombie_exp_cull+0x15/0x20 [obdclass]
[<
ffffffff93ab1d2f>] process_one_work+0x17f/0x440
- mdt_init0
call to target_recovery_fini is moved so that it is called on every
failure after successful tgt_init.
obd_zombie_barrier is to be called after
target_recovery_fini->class_disconnect_exports
obd->obd_fail is set so that mdt_export_cleanup->tgt_client_del did
not clear client's slot in last_rcvd in case of server start failure
- mdt_quota_init
class_manual_clean does class_detach, goto is added to avoid
repeated call to class_detach
- qmt_device_init0
start qmt rebalance thread with SVC_STARTING flag so that
qmt_start_reba_thread waited until the thread has started.
Otherwise, qmt_device may get freed before qmt rebalance thread is
stopped
Tests for failures during mdt_init0 are added
- conf-sanity.sh:test_5i leads to general protection fault
- conf-sanity.sh:test_5h causes
rmmod: ERROR: Module mdt is in use
Cray-bug-id: LUS-2403
Signed-off-by: Vladimir Saveliev <c17830@cray.com>
Test-Parameters: trivial testlist=conf-sanity envdefinitions=ONLY=5
Change-Id: Ic9dc9e167f6c2e47a5f97e59b5bd26c5231c23ce
Reviewed-on: https://review.whamcloud.com/34724
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Sergey Cheremencev <c17829@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#define OBD_FAIL_QUOTA_EDQUOT 0xA02
#define OBD_FAIL_QUOTA_DELAY_REINT 0xA03
#define OBD_FAIL_QUOTA_RECOVERABLE_ERR 0xA04
+#define OBD_FAIL_QUOTA_INIT 0xA05
#define OBD_FAIL_LPROC_REMOVE 0xB00
mdt->mdt_qmt_dev = obd->obd_lu_dev;
/* configure local quota objects */
- rc = mdt->mdt_qmt_dev->ld_ops->ldo_prepare(env,
- &mdt->mdt_lu_dev,
- mdt->mdt_qmt_dev);
+ if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_INIT))
+ rc = -EBADF;
+ else
+ rc = mdt->mdt_qmt_dev->ld_ops->ldo_prepare(env,
+ &mdt->mdt_lu_dev,
+ mdt->mdt_qmt_dev);
if (rc)
GOTO(class_cleanup, rc);
if (rc) {
class_manual_cleanup(obd);
mdt->mdt_qmt_dev = NULL;
+ GOTO(lcfg_cleanup, rc);
}
class_detach:
if (rc)
err_procfs:
mdt_procfs_fini(m);
err_recovery:
- target_recovery_fini(obd);
upcall_cache_cleanup(m->mdt_identity_cache);
m->mdt_identity_cache = NULL;
err_free_hsm:
err_fs_cleanup:
mdt_fs_cleanup(env, m);
err_tgt:
+ /* keep recoverable clients */
+ obd->obd_fail = 1;
+ target_recovery_fini(obd);
+ obd_exports_barrier(obd);
+ obd_zombie_barrier();
tgt_fini(env, &m->mdt_lut);
err_free_ns:
ldlm_namespace_free(m->mdt_namespace, NULL, 0);
}
/* stop rebalance thread */
- qmt_stop_reba_thread(qmt);
+ if (!qmt->qmt_child->dd_rdonly)
+ qmt_stop_reba_thread(qmt);
/* disconnect from OSD */
if (qmt->qmt_child_exp != NULL) {
GOTO(out, rc);
/* set up and start rebalance thread */
- thread_set_flags(&qmt->qmt_reba_thread, SVC_STOPPED);
+ thread_set_flags(&qmt->qmt_reba_thread, SVC_STARTING);
init_waitqueue_head(&qmt->qmt_reba_thread.t_ctl_waitq);
INIT_LIST_HEAD(&qmt->qmt_reba_list);
spin_lock_init(&qmt->qmt_reba_lock);
ENTRY;
OBD_ALLOC_PTR(env);
- if (env == NULL)
+ if (env == NULL) {
+ thread_set_flags(thread, SVC_STOPPED);
RETURN(-ENOMEM);
+ }
rc = lu_env_init(env, LCT_MD_THREAD);
if (rc) {
CERROR("%s: failed to init env.", qmt->qmt_svname);
+ thread_set_flags(thread, SVC_STOPPED);
OBD_FREE_PTR(env);
RETURN(rc);
}
}
run_test 5g "handle missing debugfs"
+test_5h() {
+ setup
+
+ stop mds1
+ #define OBD_FAIL_MDS_FS_SETUP 0x135
+ do_facet mds1 "$LCTL set_param fail_loc=0x80000135"
+ start_mdt 1 && error "start mdt should fail"
+ start_mdt 1 || error "start mdt failed"
+ client_up || error "client_up failed"
+ cleanup
+}
+run_test 5h "start mdt failure at mdt_fs_setup()"
+
+test_5i() {
+ setup
+
+ stop mds1
+ #define OBD_FAIL_QUOTA_INIT 0xA05
+ do_facet mds1 "$LCTL set_param fail_loc=0x80000A05"
+ start_mdt 1 && error "start mdt should fail"
+ start_mdt 1 || error "start mdt failed"
+ client_up || error "client_up failed"
+ cleanup
+}
+run_test 5i "start mdt failure at mdt_quota_init()"
+
test_6() {
setup
manual_umount_client