From d841e34f661b243cdc52797b5c5377688dbac587 Mon Sep 17 00:00:00 2001 From: nathan Date: Wed, 23 Nov 2005 19:15:54 +0000 Subject: [PATCH] Branch b1_4_mountconf b=8007 turn off initial recovery on mgc so connect can time out --- lustre/include/linux/lustre_import.h | 2 +- lustre/ldlm/ldlm_lib.c | 1 + lustre/mgc/mgc_request.c | 31 +++++++++++++ lustre/obdclass/obd_mount.c | 88 +++++++++++++++++------------------- lustre/utils/mount_lustre.c | 4 +- 5 files changed, 76 insertions(+), 50 deletions(-) diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index aefa287..8639bef 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -90,7 +90,7 @@ struct obd_import { imp_dlm_fake:1, imp_server_timeout:1, imp_initial_recov:1, imp_force_verify:1, imp_pingable:1, imp_resend_replay:1, - imp_deactive:1; + imp_deactive:1, imp_initial_recov_bk:1; __u32 imp_connect_op; struct obd_connect_data imp_connect_data; }; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 542dd76..0344aa4 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -290,6 +290,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) imp->imp_connect_op = connect_op; imp->imp_generation = 0; imp->imp_initial_recov = 1; + imp->imp_initial_recov_bk = 0; INIT_LIST_HEAD(&imp->imp_pinger_chain); memcpy(imp->imp_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), LUSTRE_CFG_BUFLEN(lcfg, 1)); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index dcfd7c4..57041ee 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -390,6 +390,36 @@ out: return rc; } +#define INIT_RECOV_BACKUP "init_recov_bk" +int mgc_set_info(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val) +{ + struct obd_import *imp = class_exp2cliimp(exp); + int rc = -EINVAL; + + /* Try to "recover" the initial connection; i.e. retry */ + if (keylen == strlen("initial_recov") && + memcmp(key, "initial_recov", strlen("initial_recov")) == 0) { + if (vallen != sizeof(int)) + RETURN(-EINVAL); + imp->imp_initial_recov = *(int *)val; + CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", + exp->exp_obd->obd_name, imp->imp_initial_recov); + RETURN(0); + } + /* Turn off initial_recov after we try all backup servers once */ + if (keylen == strlen(INIT_RECOV_BACKUP) && + memcmp(key, INIT_RECOV_BACKUP, strlen(INIT_RECOV_BACKUP)) == 0) { + if (vallen != sizeof(int)) + RETURN(-EINVAL); + imp->imp_initial_recov_bk = *(int *)val; + CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", + exp->exp_obd->obd_name, imp->imp_initial_recov_bk); + RETURN(0); + } + return(rc); +} + static int mgc_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) @@ -465,6 +495,7 @@ struct obd_ops mgc_obd_ops = { .o_connect = client_connect_import, .o_disconnect = client_disconnect_export, .o_iocontrol = mgc_iocontrol, + .o_set_info = mgc_set_info, .o_import_event = mgc_import_event, .o_llog_init = mgc_llog_init, .o_llog_finish = mgc_llog_finish, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 1fada17..b6c7f43 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -335,8 +335,8 @@ out: return(err); } -/* Get the log "profile" from a MGS and process it. MGS might be remote - or local. This func should work for both clients and servers */ +/* Get the log "profile" from a MGS and process it. + This func should work for both clients and servers */ int lustre_get_process_log(struct super_block *sb, char *profile, struct config_llog_instance *cfg) { @@ -344,65 +344,61 @@ int lustre_get_process_log(struct super_block *sb, char *profile, struct obd_device *mgc = lsi->lsi_mgc; struct lustre_handle mgc_conn = {0, }; struct obd_export *exp = NULL; - struct llog_ctxt *ctxt; - int err, rc; + struct llog_ctxt *rctxt, *lctxt; + int allow_recov = 0; + int rc; LASSERT(mgc); CDEBUG(D_MOUNT, "parsing config log %s\n", profile); - err = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL); - if (!err) { - exp = class_conn2export(&mgc_conn); - ctxt = llog_get_context(exp->exp_obd, LLOG_CONFIG_REPL_CTXT); - } else { - /* If we couldn't connect to the MGS, try reading a copy - of the config log stored locally on disk */ - CERROR("cannot connect to MGS: rc = %d\n" - "Will try local log\n", err); - /* FIXME set up local originator with mgc_fs_setup - could use ioctl (can't call directly because of layering) - */ - ctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT); - /* FIXME set this up anyhow, and copy the mgs remote log - to the local disk */ + rctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT); + lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT); + if (!lctxt || !rctxt) { + CERROR("missing llog context\n"); + return(-EINVAL); } + + /* FIXME set up local llog originator with mgc_fs_setup + could use ioctl (can't call directly because of layering). */ - if (!ctxt) { - CERROR("no config llog context\n"); - GOTO(out, rc = -EINVAL); + /* Don't retry if connect fails */ + rc = obd_set_info(mgc->obd_self_export, + strlen("initial_recov"), "initial_recov", + sizeof(allow_recov), &allow_recov); + if (rc) + goto out; + + rc = obd_connect(&mgc_conn, mgc, &(mgc->obd_uuid), NULL); + if (rc) { + CERROR("connect failed %d\n", rc); + goto out; } + exp = class_conn2export(&mgc_conn); + LASSERT(exp->exp_obd == mgc); + + //FIXME Copy the mgs remote log to the local disk #if 0 /* For debugging, it's useful to just dump the log */ - rc = class_config_dump_llog(ctxt, profile, cfg); + class_config_dump_llog(rctxt, profile, cfg); #endif - rc = class_config_parse_llog(ctxt, profile, cfg); - - //FIXME cleanup local originator with mgc_fs_cleanup (if necessary) - - switch (rc) { - case 0: - break; - case -EINVAL: + rc = class_config_parse_llog(rctxt, profile, cfg); + obd_disconnect(exp); + if (rc) { LCONSOLE_ERROR("%s: The configuration '%s' could not be read " - "from the MGS. Make sure this node and the " - "MGS are running compatible versions of " - "Lustre.\n", - mgc->obd_name, profile); - /* fall through */ - default: - CERROR("class_config_parse_llog failed: rc = %d\n", rc); - break; + "from the MGS (%d). Trying local log.\n", + mgc->obd_name, profile, rc); + /* If we couldn't connect to the MGS, try reading a copy + of the config log stored locally on disk */ + rc = class_config_parse_llog(lctxt, profile, cfg); + if (rc) { + LCONSOLE_ERROR("%s: Can't read the local config (%d)\n", + mgc->obd_name, rc); + } } out: - /* We don't so much care about errors in cleaning up the config llog - * connection, as we have already read the config by this point. */ - if (exp) { - err = obd_disconnect(exp); - if (err) - CERROR("disconnect failed: rc = %d\n", err); - } + //FIXME cleanup local originator with mgc_fs_cleanup return (rc); } diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 1057ced..5aefe32 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -123,7 +123,7 @@ update_mtab_entry(char *spec, char *mtpt, char *type, char *opts, #define MAXNIDSTR 1024 static char *convert_hostnames(char *s1) { - char *converted, *s2, *c; + char *converted, *s2 = 0, *c; int left = MAXNIDSTR; lnet_nid_t nid; @@ -218,11 +218,9 @@ static int parse_one_option(const char *check, int *flagp) int parse_options(char *orig_options, int *flagp) { - int val; char *options, *opt, *nextopt; options = calloc(strlen(orig_options) + 1, 1); - *flagp = 0; nextopt = orig_options; while ((opt = strsep(&nextopt, ","))) { -- 1.8.3.1