The MDS unload happen when type of MDT has no reference.
The MDT drop it during obd_cleanup. So race window located
between obd_cleanup and server_stop_servers.
Lustre can lost MDS obd_device during server_start_targets
between MDS checking and taking the type reference, if another MDT
stops.
The patch takes one more reference for a MDT type at
server_start_targets, and put it at server_stop_servers.
This patch adds sanity test 278. It reproduces the next race
started cleanup of MDT01
started cleanup of MDT00
finished cleanup of MDT00
started MDT00 mount, checked MDS exist
finished cleanup of MDT01, and cleanup of MDS also
asserted during MDT00 initialization
Cray-bug-id: LUS-7275
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I9ae3bc2ec1d23c8d436f143d12e26209fdb6b083
Reviewed-on: https://review.whamcloud.com/35652
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
own backing_dev_info */
struct list_head lsi_lwp_list;
spinlock_t lsi_lwp_lock;
own backing_dev_info */
struct list_head lsi_lwp_list;
spinlock_t lsi_lwp_lock;
- unsigned long lsi_lwp_started:1;
+ unsigned long lsi_lwp_started:1,
+ lsi_server_started:1;
};
#define LSI_UMOUNT_FAILOVER 0x00200000
};
#define LSI_UMOUNT_FAILOVER 0x00200000
#define OBD_FAIL_OBD_NO_LRU 0x609
#define OBD_FAIL_OBDCLASS_MODULE_LOAD 0x60a
#define OBD_FAIL_OBD_ZERO_NLINK_RACE 0x60b
#define OBD_FAIL_OBD_NO_LRU 0x609
#define OBD_FAIL_OBDCLASS_MODULE_LOAD 0x60a
#define OBD_FAIL_OBD_ZERO_NLINK_RACE 0x60b
+#define OBD_FAIL_OBD_STOP_MDS_RACE 0x60c
#define OBD_FAIL_TGT_REPLY_NET 0x700
#define OBD_FAIL_TGT_CONN_RACE 0x701
#define OBD_FAIL_TGT_REPLY_NET 0x700
#define OBD_FAIL_TGT_CONN_RACE 0x701
#endif
#include <linux/statfs.h>
#include <linux/version.h>
#endif
#include <linux/statfs.h>
#include <linux/version.h>
+#include <linux/delay.h>
#include <llog_swab.h>
#include <lustre_disk.h>
#include <llog_swab.h>
#include <lustre_disk.h>
struct obd_device *obd = NULL;
struct obd_type *type = NULL;
int rc = 0;
struct obd_device *obd = NULL;
struct obd_type *type = NULL;
int rc = 0;
ENTRY;
mutex_lock(&server_start_lock);
ENTRY;
mutex_lock(&server_start_lock);
/* if this was an MDT, and there are no more MDT's, clean up the MDS */
if (lsiflags & LDD_F_SV_TYPE_MDT) {
obd = class_name2obd(LUSTRE_MDS_OBDNAME);
/* if this was an MDT, and there are no more MDT's, clean up the MDS */
if (lsiflags & LDD_F_SV_TYPE_MDT) {
obd = class_name2obd(LUSTRE_MDS_OBDNAME);
- if (obd != NULL)
- type = class_search_type(LUSTRE_MDT_NAME);
- }
-
+ type = class_search_type(LUSTRE_MDT_NAME);
+ } else if (lsiflags & LDD_F_SV_TYPE_OST) {
/* if this was an OST, and there are no more OST's, clean up the OSS */
/* if this was an OST, and there are no more OST's, clean up the OSS */
- if (lsiflags & LDD_F_SV_TYPE_OST) {
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
obd = class_name2obd(LUSTRE_OSS_OBDNAME);
- if (obd != NULL)
- type = class_search_type(LUSTRE_OST_NAME);
+ type = class_search_type(LUSTRE_OST_NAME);
- if (obd != NULL && (type == NULL || type->typ_refcnt == 0)) {
+ /* server_stop_servers is a pair of server_start_targets
+ * Here we put type which was taken at server_start_targets.
+ * If type is NULL then there is a wrong logic around type or
+ * type reference. */
+ LASSERTF(type, "Server flags %d, obd %s\n", lsiflags,
+ obd ? obd->obd_name : "NULL");
+
+ type_last = (type->typ_refcnt == 1);
+
+ class_put_type(type);
+ if (obd != NULL && type_last) {
obd->obd_force = 1;
/* obd_fail doesn't mean much on a server obd */
rc = class_manual_cleanup(obd);
obd->obd_force = 1;
/* obd_fail doesn't mean much on a server obd */
rc = class_manual_cleanup(obd);
struct config_llog_instance cfg;
struct lu_env mgc_env;
struct lu_device *dev;
struct config_llog_instance cfg;
struct lu_env mgc_env;
struct lu_device *dev;
+ char *name_service, *obd_name_service = NULL;
+ struct obd_type *type = NULL;
int rc;
ENTRY;
CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_svname);
int rc;
ENTRY;
CDEBUG(D_MOUNT, "starting target %s\n", lsi->lsi_svname);
+ LASSERTF(IS_MDT(lsi) || IS_OST(lsi), "designed for MDT or OST only\n");
+
- /* make sure the MDS is started */
- mutex_lock(&server_start_lock);
- obd = class_name2obd(LUSTRE_MDS_OBDNAME);
- if (!obd) {
- rc = lustre_start_simple(LUSTRE_MDS_OBDNAME,
- LUSTRE_MDS_NAME,
- LUSTRE_MDS_OBDNAME"_uuid",
- NULL, NULL, NULL, NULL);
- if (rc) {
- mutex_unlock(&server_start_lock);
- CERROR("failed to start MDS: %d\n", rc);
- RETURN(rc);
- }
- }
- mutex_unlock(&server_start_lock);
+ obd_name_service = LUSTRE_MDS_OBDNAME;
+ name_service = LUSTRE_MDS_NAME;
+ } else {
+ obd_name_service = LUSTRE_OSS_OBDNAME;
+ name_service = LUSTRE_OSS_NAME;
- /* If we're an OST, make sure the global OSS is running */
- if (IS_OST(lsi)) {
- /* make sure OSS is started */
- mutex_lock(&server_start_lock);
- obd = class_name2obd(LUSTRE_OSS_OBDNAME);
- if (!obd) {
- rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
- LUSTRE_OSS_NAME,
- LUSTRE_OSS_OBDNAME"_uuid",
- NULL, NULL, NULL, NULL);
- if (rc) {
- mutex_unlock(&server_start_lock);
- CERROR("failed to start OSS: %d\n", rc);
- RETURN(rc);
- }
+ /* make sure MDS/OSS is started */
+ mutex_lock(&server_start_lock);
+ obd = class_name2obd(obd_name_service);
+ if (!obd) {
+ rc = lustre_start_simple(obd_name_service, name_service,
+ (IS_MDT(lsi) ?
+ LUSTRE_MDS_OBDNAME"_uuid" :
+ LUSTRE_OSS_OBDNAME"_uuid"),
+ NULL, NULL, NULL, NULL);
+ if (rc) {
+ mutex_unlock(&server_start_lock);
+ CERROR("failed to start %s: %d\n",
+ obd_name_service, rc);
+ RETURN(rc);
- mutex_unlock(&server_start_lock);
+ }
+ /* hold a type reference and put it at server_stop_servers */
+ type = class_get_type(IS_MDT(lsi) ?
+ LUSTRE_MDT_NAME : LUSTRE_OST_NAME);
+ lsi->lsi_server_started = 1;
+ mutex_unlock(&server_start_lock);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_OBD_STOP_MDS_RACE) &&
+ IS_MDT(lsi)) {
+ OBD_RACE(OBD_FAIL_OBD_STOP_MDS_RACE);
+ msleep(2 * MSEC_PER_SEC);
}
rc = lu_env_init(&mgc_env, LCT_MG_THREAD);
}
rc = lu_env_init(&mgc_env, LCT_MG_THREAD);
out_env:
lu_env_fini(&mgc_env);
out_stop_service:
out_env:
lu_env_fini(&mgc_env);
out_stop_service:
- if (rc != 0)
- server_stop_servers(lsi->lsi_flags);
+ /* in case of error upper function call
+ * server_put_super->server_stop_servers()
+ */
char *tmpname, *extraname = NULL;
int tmpname_sz;
int lsiflags = lsi->lsi_flags;
char *tmpname, *extraname = NULL;
int tmpname_sz;
int lsiflags = lsi->lsi_flags;
+ bool stop_servers = lsi->lsi_server_started;
ENTRY;
LASSERT(IS_SERVER(lsi));
ENTRY;
LASSERT(IS_SERVER(lsi));
* to .put_super, so we better make sure we clean up! */
obd->obd_force = 1;
class_manual_cleanup(obd);
* to .put_super, so we better make sure we clean up! */
obd->obd_force = 1;
class_manual_cleanup(obd);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_OBD_STOP_MDS_RACE)) {
+ int idx;
+ server_name2index(lsi->lsi_svname, &idx, NULL);
+ /* sleeping for MDT0001 */
+ if (idx == 1)
+ OBD_RACE(OBD_FAIL_OBD_STOP_MDS_RACE);
+ }
} else {
CERROR("no obd %s\n", lsi->lsi_svname);
server_deregister_mount(lsi->lsi_svname);
} else {
CERROR("no obd %s\n", lsi->lsi_svname);
server_deregister_mount(lsi->lsi_svname);
/* Stop the servers (MDS, OSS) if no longer needed. We must wait
until the target is really gone so that our type refcount check
is right. */
/* Stop the servers (MDS, OSS) if no longer needed. We must wait
until the target is really gone so that our type refcount check
is right. */
- server_stop_servers(lsiflags);
+ if (stop_servers)
+ server_stop_servers(lsiflags);
/* In case of startup or cleanup err, stop related obds */
if (extraname) {
/* In case of startup or cleanup err, stop related obds */
if (extraname) {
}
run_test 277 "Direct IO shall drop page cache"
}
run_test 277 "Direct IO shall drop page cache"
+test_278() {
+ [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+ [[ "$(facet_host mds1)" != "$(facet_host mds2)" ]] &&
+ skip "needs the same host for mdt1 mdt2" && return
+
+ local pid1
+ local pid2
+
+#define OBD_FAIL_OBD_STOP_MDS_RACE 0x60b
+ do_facet mds2 $LCTL set_param fail_loc=0x8000060c
+ stop mds2 &
+ pid2=$!
+
+ stop mds1
+
+ echo "Starting MDTs"
+ start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS
+ wait $pid2
+#For the error assertion will happen. lu_env_get_key(..., &mdt_thread_key)
+#will return NULL
+ do_facet mds2 $LCTL set_param fail_loc=0
+
+ start mds2 $(mdsdevname 2) $MDS_MOUNT_OPTS
+}
+run_test 278 "Race starting MDS between MDTs stop/start"
+
cleanup_test_300() {
trap 0
umask $SAVE_UMASK
cleanup_test_300() {
trap 0
umask $SAVE_UMASK