From 5990e1de526565d454e6ad384b9c80e3b03cd7d4 Mon Sep 17 00:00:00 2001 From: nathan Date: Tue, 14 Feb 2006 22:46:11 +0000 Subject: [PATCH] Branch b1_4_mountconf b=9866, 9843 9866 mount ost exclusion list: mount -o exclude=lustre-OST0000:lustre-OST0001 -t lustre uml1:/lustre-client /mnt/lustre will start a client with selected osc's in "inactive" state, allowing nonblocking (degraded) filesystem usage. Also adds foundation for 9843 skipping of so-marked config commands. --- lustre/include/linux/lustre_cfg.h | 1 + lustre/include/linux/lustre_disk.h | 16 ++--- lustre/include/linux/lustre_idl.h | 1 + lustre/include/linux/obd_class.h | 16 +++-- lustre/llite/llite_lib.c | 2 +- lustre/lov/lov_obd.c | 86 +++++++---------------- lustre/mgc/mgc_request.c | 17 +++-- lustre/mgs/mgs_llog.c | 37 ++++------ lustre/obdclass/obd_config.c | 59 ++++++++++++++-- lustre/obdclass/obd_mount.c | 135 ++++++++++++++++++++++++++++++++++--- lustre/utils/mkfs_lustre.c | 97 ++++++++++++++------------ lustre/utils/mount_lustre.c | 3 +- 12 files changed, 306 insertions(+), 164 deletions(-) diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h index 971f2f1..0791d37 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/linux/lustre_cfg.h @@ -55,6 +55,7 @@ enum lcfg_command_type { LCFG_MARKER = 0x00ce010, LCFG_LOG_START = 0x00ce011, LCFG_LOG_END = 0x00ce012, + LCFG_LOV_ADD_INA = 0x00ce013, }; struct lustre_cfg_bufs { diff --git a/lustre/include/linux/lustre_disk.h b/lustre/include/linux/lustre_disk.h index acd02c5..3d34b5f 100644 --- a/lustre/include/linux/lustre_disk.h +++ b/lustre/include/linux/lustre_disk.h @@ -86,11 +86,11 @@ struct lustre_disk_data { __u32 ldd_config_ver; /* config rewrite count - not used */ __u32 ldd_flags; /* LDD_SV_TYPE */ + __u32 ldd_svindex; /* server index (0001), must match + svname */ enum ldd_mount_type ldd_mount_type; /* target fs type LDD_MT_* */ char ldd_fsname[64]; /* filesystem this server is part of */ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/ - __u16 ldd_svindex; /* server index (0001), must match - svname */ __u16 ldd_mgsnid_count; __u16 ldd_failnid_count; /* server failover nid count */ lnet_nid_t ldd_mgsnid[MTI_NIDS_MAX]; /* mgs nid list; lmd can @@ -119,7 +119,8 @@ struct lustre_disk_data { #define MT_STR(data) mt_str((data)->ldd_mount_type) /* Make the mdt/ost server obd name based on the filesystem name */ -static inline int sv_make_name(__u32 flags, __u16 index, char *fs, char *name) +static inline int server_make_name(__u32 flags, __u16 index, char *fs, + char *name) { if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { sprintf(name, "%.8s-%s%04x", fs, @@ -134,11 +135,8 @@ static inline int sv_make_name(__u32 flags, __u16 index, char *fs, char *name) return 0; } -static inline void ldd_make_sv_name(struct lustre_disk_data *ldd) -{ - sv_make_name(ldd->ldd_flags, ldd->ldd_svindex, - ldd->ldd_fsname, ldd->ldd_svname); -} +/* Get the index from the obd name */ +int server_name2index(char *svname, unsigned long *idx, char **endptr); /****************** mount command *********************/ @@ -158,6 +156,8 @@ struct lustre_mount_data { char *lmd_dev; /* device or file system name */ char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ + __u32 *lmd_exclude; /* array of OSTs to ignore */ + int lmd_exclude_count; /* number of valid entries in array */ }; #define LMD_FLG_CLIENT 0x0002 /* Mounting a client only */ diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 1d0c738..fde3354 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -1016,6 +1016,7 @@ extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); #define CM_START 0x01 #define CM_END 0x02 #define CM_SKIP 0x04 +#define CM_START_SKIP (CM_START | CM_SKIP) struct cfg_marker { __u32 cm_step; /* aka config version */ diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 99e557f..a966a55 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -104,11 +104,20 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg); int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg); void class_decref(struct obd_device *obd); +#define CFG_F_START 0x01 /* Set when we start updating from a log */ +#define CFG_F_MARKER 0x02 /* We are within a maker */ +#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ +#define CFG_F_COMPAT146 0x08 /* Translation to new obd names required */ +#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ + + /* Passed as data param to class_config_parse_llog */ struct config_llog_instance { - char * cfg_instance; - struct obd_uuid cfg_uuid; - int cfg_last_idx; /* for partial llog processing */ + char * cfg_instance; + struct super_block *cfg_sb; + struct obd_uuid cfg_uuid; + int cfg_last_idx; /* for partial llog processing */ + int cfg_flags; }; int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); @@ -118,7 +127,6 @@ int class_config_dump_llog(struct llog_ctxt *ctxt, char *name, /* list of active configuration logs */ struct config_llog_data { char *cld_logname; - struct super_block *cld_sb; struct ldlm_res_id cld_resid; struct config_llog_instance cld_cfg; struct list_head cld_list_chain; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index d0fb2e3..6281c92 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -546,7 +546,7 @@ void ll_options(char *options, int *flags) return; } - CERROR("Parsing opts %s\n", options); + CDEBUG(D_CONFIG, "Parsing opts %s\n", options); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) for (this_char = strtok (options, ","); this_char != NULL; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index f36cebe..cbaff42 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -202,10 +202,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) { struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgt; - struct obd_export *exp; - __u64 connect_flags = data ? data->ocd_connect_flags : 0; - int rc, rc2, i; + int rc; ENTRY; lov->ocd.ocd_connect_flags = OBD_CONNECT_EMPTY; @@ -213,49 +210,12 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, lov->ocd = *data; rc = class_connect(conn, obd, cluuid); - if (rc) - RETURN(rc); - - exp = class_conn2export(conn); - - /* We don't want to actually do the underlying connections more than - * once, so keep track. */ - lov->connects++; - if (lov->connects > 1) { - class_export_put(exp); - RETURN(0); - } + if (!rc) + lov->connects++; + CDEBUG(D_CONFIG, "connect #%d\n", lov->connects); - for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { - if (obd_uuid_empty(&tgt->uuid)) - continue; - rc = lov_connect_obd(obd, tgt, 0, data); - if (rc) - GOTO(out_disc, rc); - if (data) - connect_flags &= data->ocd_connect_flags; - } + /* target connects are done in lov_add_target */ - if (data) - data->ocd_connect_flags = connect_flags; - - class_export_put(exp); - RETURN (0); - - out_disc: - while (i-- > 0) { - struct obd_uuid uuid; - --tgt; - --lov->desc.ld_active_tgt_count; - tgt->active = 0; - /* save for CERROR below; (we know it's terminated) */ - uuid = tgt->uuid; - rc2 = obd_disconnect(tgt->ltd_exp); - if (rc2) - CERROR("error: LOV target %s disconnect on OST idx %d: " - "rc = %d\n", uuid.uuid, i, rc2); - } - class_disconnect(exp); RETURN (rc); } @@ -331,8 +291,11 @@ static int lov_disconnect(struct obd_export *exp) /* Only disconnect the underlying layers on the final disconnect. */ lov->connects--; - if (lov->connects != 0) + if (lov->connects != 0) { + /* why should there be more than 1 connect? */ + CERROR("disconnect #%d\n", lov->connects); RETURN(rc); + } /* Let's hold another reference so lov_del_obd doesn't spin through putref every time */ @@ -460,7 +423,7 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, } static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, - int index, int gen) + int index, int gen, int active) { struct lov_obd *lov = &obd->u.lov; struct lov_tgt_desc *tgt; @@ -469,8 +432,8 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, int rc; ENTRY; - CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d\n", - uuidp->uuid, index, gen); + CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n", + uuidp->uuid, index, gen, active); if (index < 0) { CERROR("request to add OBD %s at invalid index: %d\n", @@ -522,11 +485,6 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", index, tgt->ltd_gen, lov->desc.ld_tgt_count); - if (lov->connects == 0) - /* lov_connect hasn't been called yet. So we'll do the - lov_connect_obd on this obd when that fn first runs. */ - RETURN(0); - if (tgt->ltd_exp) { struct obd_device *osc_obd; @@ -542,14 +500,15 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, RETURN(-ENOMEM); *ocd = lov->ocd; } - rc = lov_connect_obd(obd, tgt, 1, ocd); + rc = lov_connect_obd(obd, tgt, active, ocd); if (ocd) OBD_FREE(ocd, sizeof(*ocd)); if (rc) GOTO(out, rc); idx = index; - rc = lov_notify(obd, tgt->ltd_exp->exp_obd, OBD_NOTIFY_ACTIVE, + rc = lov_notify(obd, tgt->ltd_exp->exp_obd, + active ? OBD_NOTIFY_ACTIVE : OBD_NOTIFY_INACTIVE, (void *)&idx); out: @@ -801,6 +760,7 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf) switch(cmd = lcfg->lcfg_command) { case LCFG_LOV_ADD_OBD: + case LCFG_LOV_ADD_INA: case LCFG_LOV_DEL_OBD: { /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */ if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) @@ -813,7 +773,9 @@ static int lov_process_config(struct obd_device *obd, obd_count len, void *buf) if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1) GOTO(out, rc = -EINVAL); if (cmd == LCFG_LOV_ADD_OBD) - rc = lov_add_target(obd, &obd_uuid, index, gen); + rc = lov_add_target(obd, &obd_uuid, index, gen, 1); + else if (cmd == LCFG_LOV_ADD_INA) + rc = lov_add_target(obd, &obd_uuid, index, gen, 0); else rc = lov_del_target(obd, &obd_uuid, index, gen); GOTO(out, rc); @@ -1558,14 +1520,16 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, { struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_async_page *lap; - int rc; + int rc = 0; ENTRY; - if (!page) + if (!page) { + while (!lov->tgts[rc].ltd_exp) + rc++; return size_round(sizeof(*lap)) + - obd_prep_async_page(lov->tgts[0].ltd_exp, NULL, NULL, + obd_prep_async_page(lov->tgts[rc].ltd_exp, NULL, NULL, NULL, 0, NULL, NULL, NULL); - + } ASSERT_LSM_MAGIC(lsm); LASSERT(loi == NULL); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index f6a4073..af3abec 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -141,9 +141,10 @@ static int config_log_add(char *logname, struct config_llog_instance *cfg, GOTO(out, rc = -ENOMEM); } strcpy(cld->cld_logname, logname); - cld->cld_sb = sb; cld->cld_cfg = *cfg; cld->cld_cfg.cfg_last_idx = 0; + cld->cld_cfg.cfg_flags = 0; + cld->cld_cfg.cfg_sb = sb; if (cfg->cfg_instance != NULL) { OBD_ALLOC(cld->cld_cfg.cfg_instance, strlen(cfg->cfg_instance) + 1); @@ -372,7 +373,7 @@ static int mgc_async_requeue(void *data) /* re-send server info every time, in case MGS needs to regen its logs */ - server_register_target(cld->cld_sb); + server_register_target(cld->cld_cfg.cfg_sb); rc = mgc_process_log(the_mgc, cld); class_export_put(the_mgc->obd_self_export); @@ -834,7 +835,7 @@ static int mgc_process_log(struct obd_device *mgc, struct lustre_handle lockh; struct client_obd *cli = &mgc->u.cli; struct lvfs_run_ctxt saved; - struct lustre_sb_info *lsi = s2lsi(cld->cld_sb); + struct lustre_sb_info *lsi = s2lsi(cld->cld_cfg.cfg_sb); int rc, rcl, flags = 0, must_pop = 0; ENTRY; @@ -949,10 +950,16 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf) config_log_add(logname, cfg, sb); cld = config_log_get(logname, cfg); - if (IS_ERR(cld)) + if (IS_ERR(cld)) { rc = PTR_ERR(cld); - else + } else { + /* COMPAT_146 */ + /* For old logs, there was no start marker. */ + /* FIXME only set this for old logs! */ + cld->cld_cfg.cfg_flags |= CFG_F_MARKER; + rc = mgc_process_log(obd, cld); + } config_log_put(); break; } diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index e4fd902..a5bdb50 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -47,18 +47,6 @@ #include "mgs_internal.h" -static inline int sv_name2index(char *svname, unsigned long *idx) -{ - char *dash = strchr(svname, '-'); - if (!dash) { - CERROR("Can't understand server name %s\n", svname); - return(-EINVAL); - } - *idx = simple_strtoul(dash + 4, NULL, 16); - return 0; -} - - /******************** DB functions *********************/ /* from the (client) config log, figure out: @@ -110,8 +98,9 @@ static int mgsdb_handler(struct llog_handle *llh, struct llog_rec_hdr *rec, /* attach 0:MDC_uml1_mdsA_MNT_client 1:mdc 2:1d834_MNT_client_03f */ if ((lcfg->lcfg_command == LCFG_ATTACH) && (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) { - rc = sv_name2index(lustre_cfg_string(lcfg, 0), &index); - if (rc) { + rc = server_name2index(lustre_cfg_string(lcfg, 0), + &index, NULL); + if (rc != LDD_F_SV_TYPE_MDT) { CWARN("Unparsable MDC name %s, assuming index 0\n", lustre_cfg_string(lcfg, 0)); index = 0; @@ -411,8 +400,8 @@ int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti) } set_bit(mti->mti_stripe_index, imap); - sv_make_name(mti->mti_flags, mti->mti_stripe_index, - mti->mti_fsname, mti->mti_svname); + server_make_name(mti->mti_flags, mti->mti_stripe_index, + mti->mti_fsname, mti->mti_svname); CDEBUG(D_MGS, "Set new index for %s to %d\n", mti->mti_svname, mti->mti_stripe_index); @@ -686,10 +675,10 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *db, /* This should always be the first entry in a log. rc = mgs_clear_log(obd, logname); */ rc = record_start_log(obd, &llh, logname); - rc = record_marker(obd, llh, db, CM_START, mti->mti_svname,"lov setup"); + rc = record_marker(obd, llh, db, CM_START, lovname, "lov setup"); rc = record_attach(obd, llh, lovname, "lov", uuid); rc = record_lov_setup(obd, llh, lovname, lovdesc); - rc = record_marker(obd, llh, db, CM_END, mti->mti_svname, "lov setup"); + rc = record_marker(obd, llh, db, CM_END, lovname, "lov setup"); rc = record_end_log(obd, &llh); OBD_FREE(lovdesc, sizeof(*lovdesc)); @@ -1059,8 +1048,8 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *db, CDEBUG(D_MGS, "Upgrade MDT\n"); /* Need to set the mdsuuid first */ mti->mti_stripe_index = 0; - sv_make_name(mti->mti_flags, mti->mti_stripe_index, - mti->mti_fsname, mti->mti_svname); + server_make_name(mti->mti_flags, mti->mti_stripe_index, + mti->mti_fsname, mti->mti_svname); sprintf(mti->mti_uuid, "mdsA_UUID"); if (mgs_log_is_empty(obd, mti->mti_svname)) { CERROR("The MDT log %s is missing.\n", mti->mti_svname); @@ -1081,8 +1070,8 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *db, omti.mti_flags |= LDD_F_SV_TYPE_OST; omti.mti_flags &= ~(LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_MGS); omti.mti_stripe_index = 0; - sv_make_name(omti.mti_flags, omti.mti_stripe_index, - omti.mti_fsname, omti.mti_svname); + server_make_name(omti.mti_flags, omti.mti_stripe_index, + omti.mti_fsname, omti.mti_svname); sprintf(omti.mti_uuid, "ost1_UUID"); if (!mgs_log_is_empty(obd, omti.mti_svname)) { CERROR("The OST log %s already exists.\n", @@ -1092,8 +1081,8 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *db, } omti.mti_stripe_index = 1; - sv_make_name(omti.mti_flags, omti.mti_stripe_index, - omti.mti_fsname, omti.mti_svname); + server_make_name(omti.mti_flags, omti.mti_stripe_index, + omti.mti_fsname, omti.mti_svname); sprintf(omti.mti_uuid, "ost2_UUID"); if (!mgs_log_is_empty(obd, omti.mti_svname)) { CERROR("The OST log %s already exists.\n", diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index b1ebf84..eca87c8 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -711,6 +711,12 @@ out: int class_config_dump_handler(struct llog_handle * handle, struct llog_rec_hdr *rec, void *data); +#ifdef __KERNEL__ +extern int lustre_check_exclusion(struct super_block *sb, char *svname); +#else +#define lustre_check_exclusion(a,b) 0 +#endif + static int class_config_llog_handler(struct llog_handle * handle, struct llog_rec_hdr *rec, void *data) { @@ -739,16 +745,57 @@ static int class_config_llog_handler(struct llog_handle * handle, if (rc) GOTO(out, rc); - /* FIXME check cm_flags for skip - must save state, - probably in handle + /* Figure out config state info */ if (lcfg->lcfg_command == LCFG_MARKER) { struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); + CDEBUG(D_CONFIG|D_WARNING, "Marker, cfg_flg=%#x\n", + cfg->cfg_flags); + if (marker->cm_flags & CM_START) { + /* all previous flags off */ + cfg->cfg_flags = CFG_F_MARKER; + if (marker->cm_flags & CM_SKIP) { + cfg->cfg_flags |= CFG_F_SKIP; + // FIXME warning + CDEBUG(D_CONFIG|D_WARNING, "SKIP %d\n", + marker->cm_step); + } + if (lustre_check_exclusion(cfg->cfg_sb, + marker->cm_svname)) { + cfg->cfg_flags |= CFG_F_EXCLUDE; + // FIXME warning + CDEBUG(D_CONFIG|D_WARNING, "EXCLUDE %d\n", + marker->cm_step); + } + } else if (marker->cm_flags & CM_END) { + cfg->cfg_flags = 0; + } } - */ + /* A config command without a start marker before it is + illegal (1.4.6. compat must set it artificially) */ + if (!(cfg->cfg_flags & CFG_F_MARKER) && + (lcfg->lcfg_command != LCFG_MARKER)) { + CWARN("Config not inside markers, ignoring! (%#x)\n", + cfg->cfg_flags); + cfg->cfg_flags |= CFG_F_SKIP; + } + + if (cfg->cfg_flags & CFG_F_SKIP) { + // FIXME warning + CDEBUG(D_CONFIG|D_WARNING, "skipping %#x\n", + cfg->cfg_flags); + rc = 0; + /* No processing! */ + break; + } + + if ((cfg->cfg_flags & CFG_F_EXCLUDE) && + (lcfg->lcfg_command == LCFG_LOV_ADD_OBD)) + /* Add inactive instead */ + lcfg->lcfg_command = LCFG_LOV_ADD_INA; lustre_cfg_bufs_init(&bufs, lcfg); - if (cfg && cfg->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0) { + if (cfg && cfg->cfg_instance && LUSTRE_CFG_BUFLEN(lcfg, 0) > 0){ inst = 1; inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) + strlen(cfg->cfg_instance) + 1; @@ -896,9 +943,9 @@ int class_config_dump_handler(struct llog_handle * handle, } if (lcfg->lcfg_command == LCFG_MARKER) { struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); - ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)'%s'", + ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'", marker->cm_step, marker->cm_flags, - marker->cm_comment); + marker->cm_svname, marker->cm_comment); } else { for (i = 0; i < lcfg->lcfg_bufcount; i++) { ptr += snprintf(ptr, end-ptr, "%d:%s ", i, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index ae01049..80c185d 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -37,6 +37,7 @@ #include #include #include +#include static int (*client_fill_super)(struct super_block *sb) = NULL; @@ -970,7 +971,7 @@ out_servers: RETURN(rc); } -/***************** mount **************/ +/***************** lustre superblock **************/ struct lustre_sb_info *lustre_init_lsi(struct super_block *sb) { @@ -986,6 +987,7 @@ struct lustre_sb_info *lustre_init_lsi(struct super_block *sb) RETURN(NULL); } + lsi->lsi_lmd->lmd_exclude_count = 0; s2lsi_nocast(sb) = lsi; /* we take 1 extra ref for our setup */ atomic_set(&lsi->lsi_mounts, 1); @@ -1015,6 +1017,10 @@ static int lustre_free_lsi(struct super_block *sb) if (lsi->lsi_lmd->lmd_opts != NULL) OBD_FREE(lsi->lsi_lmd->lmd_opts, strlen(lsi->lsi_lmd->lmd_opts) + 1); + if (lsi->lsi_lmd->lmd_exclude_count) + OBD_FREE(lsi->lsi_lmd->lmd_exclude, + sizeof(lsi->lsi_lmd->lmd_exclude[0]) * + lsi->lsi_lmd->lmd_exclude_count); OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd)); } @@ -1365,6 +1371,28 @@ out: RETURN(rc); } +/* Get the index from the obd name. + rc = server type, or + rc < 0 on error */ +int server_name2index(char *svname, unsigned long *idx, char **endptr) +{ + int rc; + char *dash = strchr(svname, '-'); + if (!dash) { + CERROR("Can't understand server name %s\n", svname); + return(-EINVAL); + } + + if (strncmp(dash + 1, "MDT", 3) == 0) + rc = LDD_F_SV_TYPE_MDT; + else if (strncmp(dash + 1, "OST", 3) == 0) + rc = LDD_F_SV_TYPE_OST; + else + return(-EINVAL); + + *idx = simple_strtoul(dash + 4, endptr, 16); + return rc; +} /*************** mount common betweeen server and client ***************/ @@ -1408,12 +1436,94 @@ static void lmd_print(struct lustre_mount_data *lmd) PRINT_CMD(PRINT_MASK, "flags: %x\n", lmd->lmd_flags); if (lmd->lmd_opts) PRINT_CMD(PRINT_MASK, "options: %s\n", lmd->lmd_opts); + for (i = 0; i < lmd->lmd_exclude_count; i++) { + PRINT_CMD(PRINT_MASK, "exclude %d: OST%04x\n", i, + lmd->lmd_exclude[i]); + } } +/* Is this server on the exclusion list */ +int lustre_check_exclusion(struct super_block *sb, char *svname) +{ + struct lustre_sb_info *lsi = s2lsi(sb); + struct lustre_mount_data *lmd = lsi->lsi_lmd; + unsigned long index; + int i, rc; + ENTRY; + + rc = server_name2index(svname, &index, NULL); + if (rc != LDD_F_SV_TYPE_OST) + RETURN(0); + + CDEBUG(D_MOUNT, "Check exclusion %s (%ld) in %d of %s\n", svname, + index, lmd->lmd_exclude_count, lmd->lmd_dev); + + for(i = 0; i < lmd->lmd_exclude_count; i++) { + if (index == lmd->lmd_exclude[i]) { + CWARN("Excluding %s (on exclusion list)\n", svname); + RETURN(1); + } + } + RETURN(0); +} + +/* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */ +static int lmd_make_exclusion(struct lustre_mount_data *lmd, char *ptr) +{ + char *s1 = ptr, *s2; + unsigned long index, *exclude_list; + int rc = 0; + ENTRY; + + /* temp storage until we figure out how many we have */ + OBD_ALLOC(exclude_list, sizeof(index) * MAX_OBD_DEVICES); + if (!exclude_list) + RETURN(-ENOMEM); + + /* we enter this fn pointing at the '=' */ + while (*s1 && *s1 != ' ' && *s1 != ',') { + s1++; + rc = server_name2index(s1, &index, &s2); + if (rc < 0) { + CERROR("Can't parse %s\n", s1); + break; + } + if (rc == LDD_F_SV_TYPE_OST) + exclude_list[lmd->lmd_exclude_count++] = index; + else + CDEBUG(D_MOUNT, "ignoring exclude %.7s\n", s1); + s1 = s2; + /* now we are pointing at ':' (next exclude) + or ',' (end of excludes) */ + + if (lmd->lmd_exclude_count >= MAX_OBD_DEVICES) + break; + } + if (rc >= 0) /* non-err */ + rc = 0; + + if (lmd->lmd_exclude_count) { + /* permanent, freed in lustre_free_lsi */ + OBD_ALLOC(lmd->lmd_exclude, sizeof(index) * + lmd->lmd_exclude_count); + if (lmd->lmd_exclude) { + memcpy(lmd->lmd_exclude, exclude_list, + sizeof(index) * lmd->lmd_exclude_count); + } else { + rc = -ENOMEM; + lmd->lmd_exclude_count = 0; + } + } + OBD_FREE(exclude_list, sizeof(index) * MAX_OBD_DEVICES); + RETURN(rc); +} + +/* mount -v -t lustre uml1:uml2:/lustre-client /mnt/lustre */ static int lmd_parse(char *options, struct lustre_mount_data *lmd) { char *s1, *s2, *devname = NULL; struct lustre_mount_data *raw = (struct lustre_mount_data *)options; + int rc = 0; ENTRY; LASSERT(lmd); @@ -1426,8 +1536,8 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) /* Options should be a string - try to detect old lmd data */ if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) { LCONSOLE_ERROR("You're using an old version of " - "/sbin/mount.lustre. Please install version " - "1.%d\n", LMD_MAGIC & 0xFF); + "/sbin/mount.lustre. Please install version " + "%s\n", LUSTRE_VERSION_STRING); RETURN(-EINVAL); } lmd->lmd_magic = LMD_MAGIC; @@ -1436,9 +1546,11 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) lmd->lmd_flags |= LMD_FLG_RECOVER; s1 = options; - while(*s1) { + while (*s1) { + /* Skip whitespace and extra commas */ while (*s1 == ' ' || *s1 == ',') s1++; + /* Client options are parsed in ll_options: eg. flock, user_xattr, acl */ @@ -1450,11 +1562,11 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) else if (strncmp(s1, "nosvc", 5) == 0) lmd->lmd_flags |= LMD_FLG_NOSVC; + /* ost exclusion list */ else if (strncmp(s1, "exclude=", 8) == 0) { - CERROR("Exclude: %s\n", s1); - /* FIXME implement */ - /* store exlusion list in lmd_exclude, mdt & client - must check */ + rc = lmd_make_exclusion(lmd, s1 + 7); + if (rc) + goto invalid; } /* Linux 2.4 doesn't pass the device, so we stuck it at the @@ -1465,7 +1577,9 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) must be the last one. */ *s1 = 0; } - s2 = strstr(s1, ","); + + /* Find next opt */ + s2 = strchr(s1, ','); if (s2 == NULL) break; s1 = s2 + 1; @@ -1533,7 +1647,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd) lmd->lmd_magic = LMD_MAGIC; lmd_print(lmd); - RETURN(0); + RETURN(rc); invalid: CERROR("Bad mount options %s\n", options); @@ -1666,5 +1780,6 @@ EXPORT_SYMBOL(lustre_end_log); EXPORT_SYMBOL(server_get_mount); EXPORT_SYMBOL(server_put_mount); EXPORT_SYMBOL(server_register_target); +EXPORT_SYMBOL(server_name2index); diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 4dc3795a..f7d748d 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -147,29 +147,37 @@ int get_os_version() int run_command(char *cmd) { - int rc; - - if (verbose > 1) - printf("cmd: %s\n", cmd); - - strcat(cmd, " >/tmp/mkfs.log 2>&1"); - - /* Can't use popen because we need the rv of the command */ - rc = system(cmd); - if (rc) { - char buf[128]; - FILE *fp; - fp = fopen("/tmp/mkfs.log", "r"); - if (fp) { - while (fgets(buf, sizeof(buf), fp) != NULL) { - if (rc || verbose > 2) - printf(" %s", buf); - } - fclose(fp); - } - } - return rc; -} + char log[] = "/tmp/mkfs_logXXXXXX"; + int fd, rc; + + if (verbose > 1) + printf("cmd: %s\n", cmd); + + if ((fd = mkstemp(log)) >= 0) { + close(fd); + strcat(cmd, " >"); + strcat(cmd, log); + } + strcat(cmd, " 2>&1"); + + /* Can't use popen because we need the rv of the command */ + rc = system(cmd); + if (rc && fd >= 0) { + char buf[128]; + FILE *fp; + fp = fopen(log, "r"); + if (fp) { + while (fgets(buf, sizeof(buf), fp) != NULL) { + if (rc || verbose > 2) + printf(" %s", buf); + } + fclose(fp); + } + } + if (fd >= 0) + remove(log); + return rc; +} /*============ disk dev functions ===================*/ @@ -765,7 +773,7 @@ void set_defaults(struct mkfs_opts *mop) else mop->mo_ldd.ldd_mount_type = LDD_MT_LDISKFS; - mop->mo_ldd.ldd_svindex = -1; + mop->mo_ldd.ldd_svindex = 0xFFFF; mop->mo_ldd.ldd_stripe_count = 1; mop->mo_ldd.ldd_stripe_sz = 1024 * 1024; mop->mo_ldd.ldd_stripe_pattern = 0; @@ -871,7 +879,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, return 1; case 'i': if (IS_MDT(&mop->mo_ldd) || IS_OST(&mop->mo_ldd)) { - mop->mo_ldd.ldd_svindex = atoi(optarg); + mop->mo_ldd.ldd_svindex = atol(optarg); mop->mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX; } else { badopt(long_opt[longidx].name, "MDT,OST"); @@ -977,6 +985,7 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop, int main(int argc, char *const argv[]) { struct mkfs_opts mop; + struct lustre_disk_data *ldd; char *mountopts = NULL; char always_mountopts[512] = ""; char default_mountopts[512] = ""; @@ -1046,8 +1055,8 @@ int main(int argc, char *const argv[]) if (ret) goto out; - if (!(IS_MDT(&mop.mo_ldd) || IS_OST(&mop.mo_ldd) || - IS_MGS(&mop.mo_ldd))) { + ldd = &mop.mo_ldd; + if (!(IS_MDT(ldd) || IS_OST(ldd) || IS_MGS(ldd))) { fatal(); fprintf(stderr, "must set target type :{mdt,ost,mgs}\n"); usage(stderr); @@ -1055,14 +1064,13 @@ int main(int argc, char *const argv[]) goto out; } - if (IS_MDT(&mop.mo_ldd) && !IS_MGS(&mop.mo_ldd) && - mop.mo_ldd.ldd_mgsnid_count == 0) { + if (IS_MDT(ldd) && !IS_MGS(ldd) && ldd->ldd_mgsnid_count == 0) { vprint("No management node specified, adding MGS to this " "MDT\n"); - mop.mo_ldd.ldd_flags |= LDD_F_SV_TYPE_MGS; + ldd->ldd_flags |= LDD_F_SV_TYPE_MGS; } - if (!IS_MGS(&mop.mo_ldd) && (mop.mo_ldd.ldd_mgsnid_count == 0)) { + if (!IS_MGS(ldd) && (ldd->ldd_mgsnid_count == 0)) { fatal(); fprintf(stderr, "Must specify either --mgs or --mgsnid\n"); usage(stderr); @@ -1070,20 +1078,20 @@ int main(int argc, char *const argv[]) } /* These are the permanent mount options (always included) */ - switch (mop.mo_ldd.ldd_mount_type) { + switch (ldd->ldd_mount_type) { case LDD_MT_EXT3: case LDD_MT_LDISKFS: { sprintf(always_mountopts, "errors=remount-ro"); - if (IS_MDT(&mop.mo_ldd) || IS_MGS(&mop.mo_ldd)) + if (IS_MDT(ldd) || IS_MGS(ldd)) strcat(always_mountopts, ",iopen_nopriv,user_xattr"); - if ((get_os_version() == 24) && IS_OST(&mop.mo_ldd)) + if ((get_os_version() == 24) && IS_OST(ldd)) strcat(always_mountopts, ",asyncdel"); #if 0 /* Files created while extents are enabled cannot be read if mounted with a kernel that doesn't include the CFS patches.*/ - if (IS_OST(&mop.mo_ldd) && - mop.mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) { + if (IS_OST(ldd) && + ldd->ldd_mount_type == LDD_MT_LDISKFS) { strcat(default_mountopts, ",extents,mballoc"); } #endif @@ -1098,8 +1106,8 @@ int main(int argc, char *const argv[]) default: { fatal(); fprintf(stderr, "unknown fs type %d '%s'\n", - mop.mo_ldd.ldd_mount_type, - MT_STR(&mop.mo_ldd)); + ldd->ldd_mount_type, + MT_STR(ldd)); ret = EINVAL; goto out; } @@ -1108,27 +1116,28 @@ int main(int argc, char *const argv[]) if (mountopts) { /* If user specifies mount opts, don't use defaults, but always use always_mountopts */ - sprintf(mop.mo_ldd.ldd_mount_opts, "%s,%s", + sprintf(ldd->ldd_mount_opts, "%s,%s", always_mountopts, mountopts); } else { #ifdef TUNEFS - if (*mop.mo_ldd.ldd_mount_opts == 0) + if (ldd->ldd_mount_opts[0] == 0) /* use the defaults unless old opts exist */ #endif { if (default_mountopts[0]) - sprintf(mop.mo_ldd.ldd_mount_opts, "%s,%s", + sprintf(ldd->ldd_mount_opts, "%s,%s", always_mountopts, default_mountopts); else - strcpy(mop.mo_ldd.ldd_mount_opts, + strcpy(ldd->ldd_mount_opts, always_mountopts); } } - ldd_make_sv_name(&(mop.mo_ldd)); + server_make_name(ldd->ldd_flags, ldd->ldd_svindex, + ldd->ldd_fsname, ldd->ldd_svname); if (verbose > 0) - print_ldd("Permanent disk data", &(mop.mo_ldd)); + print_ldd("Permanent disk data", ldd); if (print_only) { printf("exiting before disk write.\n"); diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index d25fa68..d01016c 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -61,7 +61,8 @@ void usage(FILE *out) "\t: one or more comma separated of:\n" "\t\t(no)flock,(no)user_xattr,(no)acl\n" "\t\tnosvc: only start MGC/MGS obds\n" - "\t\texclude=[:] : colon-separated list of inactive OSTs\n" + "\t\texclude=[:] : colon-separated list of " + "inactive OSTs (e.g. lustre-OST0001)\n" ); exit((out != stdout) ? EINVAL : 0); } -- 1.8.3.1