From 060503ee5573c39431eee76b107ff9c51055ccf2 Mon Sep 17 00:00:00 2001 From: nathan Date: Fri, 17 Feb 2006 00:51:21 +0000 Subject: [PATCH] Branch b1_4_mountconf b=9860 upgrading from 1.4.6 - don't copy the old client log, just the old mdt log. Regen client log as osts/mdt re-register after upgrade. This means that new clients won't know about all the old osc's that the mdt knows about, so don't start them until after all servers are upgraded. But this also means no frankenstein hybrid old/new client log, and we don't need to know the client name. Use the old uuids parsed out of last_rcvd for the upgrading servers. We can go back to 1.4.6 with no action required. --- lustre/include/linux/lustre_idl.h | 7 +-- lustre/mgc/mgc_request.c | 8 +++ lustre/mgs/mgs_handler.c | 3 +- lustre/mgs/mgs_llog.c | 103 +++++++++++++++++++------------------- lustre/obdclass/obd_config.c | 11 ++-- lustre/obdclass/obd_mount.c | 4 +- lustre/utils/llog_reader.c | 17 +++++-- lustre/utils/mkfs_lustre.c | 9 ++-- 8 files changed, 90 insertions(+), 72 deletions(-) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index d12e1de..ea692b4 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -1013,9 +1013,10 @@ struct mgs_target_info { extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); -#define CM_START 0x01 -#define CM_END 0x02 -#define CM_SKIP 0x04 +#define CM_START 0x01 +#define CM_END 0x02 +#define CM_SKIP 0x04 +#define CM_UPGRADE146 0x08 #define CM_START_SKIP (CM_START | CM_SKIP) struct cfg_marker { diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 167ba59..48e46ef 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -352,6 +352,9 @@ static int mgc_async_requeue(void *data) int rc; ENTRY; + if (!data) + RETURN(-EINVAL); + lock_kernel(); ptlrpc_daemonize(); SIGNAL_MASK_LOCK(current, flags); @@ -415,6 +418,11 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, break; } + if (!data) { + CERROR("missing data, won't requeue\n"); + break; + } + /* Reenque the lock in a separate thread, because we must return from this fn before that lock can be taken. */ rc = kernel_thread(mgc_async_requeue, data, diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 553876a..e7126a0 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -382,7 +382,8 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) } mti->mti_flags &= ~LDD_F_UPGRADE14; - //mti->mti_flags |= LDD_F_REWRITE_LDD; + /* Turn off the upgrade flag permanently */ + mti->mti_flags |= LDD_F_REWRITE_LDD; } /* end COMPAT_146 */ diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 4951c5a..1adc4e9 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -400,6 +400,7 @@ int mgs_set_index(struct obd_device *obd, struct mgs_target_info *mti) } set_bit(mti->mti_stripe_index, imap); + fsdb->fsdb_flags &= ~FSDB_EMPTY; server_make_name(mti->mti_flags, mti->mti_stripe_index, mti->mti_fsname, mti->mti_svname); @@ -546,7 +547,7 @@ static int record_marker(struct obd_device *obd, struct llog_handle *llh, struct lustre_cfg *lcfg; int rc; - CDEBUG(D_MGS, "lcfg marker\n"); + CDEBUG(D_MGS, "marker %#x %s\n", flags, comment); if (flags & CM_START) fsdb->fsdb_gen++; @@ -661,7 +662,6 @@ static int mgs_write_log_lov(struct obd_device *obd, struct fs_db *fsdb, OBD_ALLOC(lovdesc, sizeof(*lovdesc)); if (lovdesc == NULL) RETURN(-ENOMEM); - /* Use defaults here, will fix them later with LCFG_PARAM */ lovdesc->ld_magic = LOV_DESC_MAGIC; lovdesc->ld_tgt_count = 0; lovdesc->ld_default_stripe_count = mti->mti_stripe_count; @@ -745,6 +745,7 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, name_create("lov_", oldname, &lovname); CERROR("lov name: %s\n", lovname); } else { + /* Make up our own uuid and lov name */ snprintf(mti->mti_uuid, sizeof(mti->mti_uuid), "%s_UUID", mti->mti_svname); name_create(mti->mti_fsname, "-mdtlov", &lovname); @@ -777,8 +778,8 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, OBD_IOC_LOV_SETSTRIPE / LL_IOC_LOV_SETSTRIPE. Or, heck, just make them use lfs setstripe on the root... */ if (!first_log) { - /* Fix lov settings if they were set by something other - than the MDT */ + /* Fix default lov settings if they were set by something other + than the MDT. */ OBD_ALLOC(s1, 256); if (s1) { s2 = sprintf(s1, "default_stripe_size="LPU64, @@ -787,8 +788,8 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, mti->mti_stripe_count) + s2 + 1; s4 = sprintf(s3, "default_stripe_offset="LPU64, mti->mti_stripe_offset) + s3 + 1; - s5 = sprintf(s4, "default_stripe_pattern=%u", - mti->mti_stripe_pattern) + s4 + 1; + s5 = sprintf(s4, "default_stripe_pattern=%u", + mti->mti_stripe_pattern) + s4 + 1; LASSERT(s5 - s1 < 256); record_param(obd, llh, lovname, s1, s2, s3, s4); } @@ -803,17 +804,14 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add mdt"); rc = record_end_log(obd, &llh); -#if 0 /* this would be for trying to update an old client log */ - if (mti->mti_flags & LDD_F_UPGRADE14) - /* If we're upgrading, the client log is done. */ - GOTO(out_nocli, rc); -#endif - /* Append the mdt info to the client log */ name_create(mti->mti_fsname, "-client", &cliname); name_destroy(lovname); name_create(mti->mti_fsname, "-clilov", &lovname); - if (first_log) { + if (first_log || + /* If we're upgrading, the MDT log will exist but not the client. */ + ((mti->mti_flags & LDD_F_UPGRADE14) && + mgs_log_is_empty(obd, cliname))) { /* Start client log */ rc = mgs_write_log_lov(obd, fsdb, mti, cliname, lovname); } @@ -851,7 +849,6 @@ static int mgs_write_log_mdt(struct obd_device *obd, struct fs_db *fsdb, name_destroy(mdcname); name_destroy(nodeuuid); name_destroy(cliname); -out_nocli: name_destroy(lovname); RETURN(rc); } @@ -859,7 +856,7 @@ out_nocli: /* Add the ost info to the client/mdt lov */ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti, - char *logname, char *lovname) + char *logname, char *lovname, int flags) { struct llog_handle *llh = NULL; char *nodeuuid, *oscname, *oscuuid, *lovuuid; @@ -867,7 +864,8 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, int i, rc; if (mgs_log_is_empty(obd, logname)) { - /* The first time an osc is added, setup the lov */ + /* The first item in the log must be the lov, so we have + somewhere to add our osc. */ rc = mgs_write_log_lov(obd, fsdb, mti, logname, lovname); } @@ -891,7 +889,8 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, #08 L lov_modify_tgts add 0:lov1 1:ost1_UUID 2(index):0 3(gen):1 */ rc = record_start_log(obd, &llh, logname); - rc = record_marker(obd, llh, fsdb, CM_START, mti->mti_svname,"add osc"); + rc = record_marker(obd, llh, fsdb, CM_START | flags, mti->mti_svname, + "add osc"); for (i = 0; i < mti->mti_nid_count; i++) { CDEBUG(D_MGS, "add nid %s\n", libcfs_nid2str(mti->mti_nids[i])); rc = record_add_uuid(obd, llh, mti->mti_nids[i], nodeuuid); @@ -901,7 +900,8 @@ static int mgs_write_log_osc(struct obd_device *obd, struct fs_db *fsdb, rc = mgs_write_log_failnids(obd, mti, llh, oscname); snprintf(index, sizeof(index), "%d", mti->mti_stripe_index); rc = record_lov_add(obd, llh, lovname, mti->mti_uuid, index, "1"); - rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add osc"); + rc = record_marker(obd, llh, fsdb, CM_END | flags, mti->mti_svname, + "add osc"); rc = record_end_log(obd, &llh); name_destroy(lovuuid); @@ -916,7 +916,7 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, { struct llog_handle *llh = NULL; char *logname, *lovname; - int rc; + int rc, flags = 0; ENTRY; CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname); @@ -955,27 +955,26 @@ static int mgs_write_log_ost(struct obd_device *obd, struct fs_db *fsdb, rc = record_marker(obd, llh, fsdb, CM_END, mti->mti_svname, "add ost"); rc = record_end_log(obd, &llh); -#if 0 /* this would be for trying to update an old client log */ - if (mti->mti_flags & LDD_F_UPGRADE14) - /* If we're upgrading, the client log is done. */ - RETURN(rc); -#endif - /* We also have to update the other logs where this osc is part of the lov */ + /* Append ost info to mdt log */ + if (mti->mti_flags & LDD_F_UPGRADE14) + /* If we're upgrading, the old mdt log already has our + entry. Let's do a fake one for fun. */ + flags = CM_SKIP | CM_UPGRADE146; /* FIXME add to all mdt logs for CMD */ // FIXME need real mdt name -- but MDT may not have registered yet! name_create(mti->mti_fsname, "-MDT0000", &logname); name_create(mti->mti_fsname, "-mdtlov", &lovname); - mgs_write_log_osc(obd, fsdb, mti, logname, lovname); + mgs_write_log_osc(obd, fsdb, mti, logname, lovname, flags); name_destroy(lovname); name_destroy(logname); - + /* Append ost info to the client log */ name_create(mti->mti_fsname, "-client", &logname); name_create(mti->mti_fsname, "-clilov", &lovname); - mgs_write_log_osc(obd, fsdb, mti, logname, lovname); + mgs_write_log_osc(obd, fsdb, mti, logname, lovname, 0); name_destroy(lovname); name_destroy(logname); @@ -1012,10 +1011,6 @@ int mgs_write_log_target(struct obd_device *obd, } up(&fsdb->fsdb_sem); - if (!rc) - /* I know the db isn't empty if we succeeded in writing. */ - fsdb->fsdb_flags &= ~FSDB_EMPTY; - return rc; } @@ -1023,6 +1018,7 @@ int mgs_write_log_target(struct obd_device *obd, /* COMPAT_146 */ /***************** upgrade pre-mountconf logs to mountconf *****************/ +#if 0 int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb, struct mgs_target_info *mti) { @@ -1052,16 +1048,6 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb, */ - /* - Why not just create logs normally, as servers register? - That way logs are modern (except have old uuids) - - Old clients can continue to use upgraded OSTs - - New clients will only start with upgraded OSTs - - MDT won't know about old OSTs, only upgraded :( - Don't need 1,2,3 above (normal server gen at first mount), uuid from - old last_rcvd - - */ /* old mdt log: old osc's were part of old lov, mount opt connects mdt to lov @@ -1079,13 +1065,11 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb, } /* Append the MDT startup sequence to the old log (lconf used to start the MDT directly) */ - /* FIXME Old logs already have an old mount opt - which we should drop */ rc = mgs_write_log_mdt(obd, fsdb, mti); if (rc) RETURN(rc); -#if 0 /* this would be for trying to update an old client log */ + /* this would be for trying to update an old client log */ struct llog_handle *llh = NULL; char *cliname; CDEBUG(D_MGS, "Upgrade client\n"); @@ -1104,7 +1088,6 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb, "upgrade to 1.6"); rc = record_end_log(obd, &llh); name_destroy(cliname); -#endif } if ((mti->mti_flags & LDD_F_SV_TYPE_OST)) { @@ -1115,6 +1098,7 @@ int mgs_upgrade_logs_14(struct obd_device *obd, struct fs_db *fsdb, RETURN(rc); } +#endif /* first connect of upgraded servers */ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) @@ -1123,6 +1107,15 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) int rc = 0; ENTRY; + /* Create client and ost log normally, as servers register. + That way logs are modern (except have old uuids (from last_rcvd)) + - Old clients can continue to use upgraded OSTs + - New clients will only start with upgraded OSTs + - MDT won't know about old OSTs, only upgraded, so we need the old + MDT log in order for old clients to work. (Old clients connect to + the MDT, not the MGS, for their logs, and will therefore receive + the old client log from the MDT /LOGS dir.) */ + CDEBUG(D_MGS, "upgrading server %s from pre-1.6\n", mti->mti_svname); server_mti_print("upgrade", mti); @@ -1132,18 +1125,26 @@ int mgs_upgrade_sv_14(struct obd_device *obd, struct mgs_target_info *mti) RETURN(rc); if (fsdb->fsdb_flags & FSDB_EMPTY) - CWARN("missing client log\n"); + /* First server to upgrade sees this */ + CWARN("info: missing client log\n"); if (!(fsdb->fsdb_flags & FSDB_EMPTY) && (fsdb->fsdb_gen == 0)) { /* There were no markers in the client log, meaning we have not updated the logs for this fs */ - CWARN("Old log, not updated yet\n"); + CWARN("info: found old, unupdated client log\n"); } - rc = mgs_upgrade_logs_14(obd, fsdb, mti); - if (rc) - RETURN(rc); + if ((mti->mti_flags & LDD_F_SV_TYPE_MDT) && + mgs_log_is_empty(obd, mti->mti_svname)) { + LCONSOLE_ERROR("The old MDT log %s is missing. Was " + "tunefs.lustre successful?\n", + mti->mti_svname); + RETURN(-ENOENT); + } + /* FIXME Old MDT log already has an old mount opt + which we should drop */ + rc = mgs_write_log_target(obd, mti); RETURN(rc); } /* end COMPAT_146 */ diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index eca87c8..a240921 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -756,15 +756,14 @@ static int class_config_llog_handler(struct llog_handle * handle, if (marker->cm_flags & CM_SKIP) { cfg->cfg_flags |= CFG_F_SKIP; // FIXME warning - CDEBUG(D_CONFIG|D_WARNING, "SKIP %d\n", + CDEBUG(D_CONFIG|D_WARNING, "SKIP #%d\n", marker->cm_step); - } - if (lustre_check_exclusion(cfg->cfg_sb, - marker->cm_svname)) { + } else if (lustre_check_exclusion(cfg->cfg_sb, + marker->cm_svname)) { cfg->cfg_flags |= CFG_F_EXCLUDE; // FIXME warning - CDEBUG(D_CONFIG|D_WARNING, "EXCLUDE %d\n", - marker->cm_step); + CDEBUG(D_CONFIG|D_WARNING, + "EXCLUDE %d\n", marker->cm_step); } } else if (marker->cm_flags & CM_END) { cfg->cfg_flags = 0; diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 534f93b..04efe95 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -871,6 +871,9 @@ int server_register_target(struct super_block *sb) GOTO(out, rc); } + /* Always update our flags */ + ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD; + /* If this flag is set, it means the MGS wants us to change our on-disk data. (So far this means just the index.) */ if (mti->mti_flags & LDD_F_REWRITE_LDD) { @@ -882,7 +885,6 @@ int server_register_target(struct super_block *sb) strncpy(ldd->ldd_svname, mti->mti_svname, sizeof(ldd->ldd_svname)); /* or ldd_make_sv_name(ldd); */ - ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD; ldd_write(&mgc->obd_lvfs_ctxt, ldd); /* FIXME write last_rcvd?, disk label? */ diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c index 116d01a..a6c60ba 100644 --- a/lustre/utils/llog_reader.c +++ b/lustre/utils/llog_reader.c @@ -245,10 +245,13 @@ static void print_setup_cfg(struct lustre_cfg *lcfg) return; } -void print_lustre_cfg(struct lustre_cfg *lcfg) +void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip) { enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command); + if (*skip > 0) + printf("SKIP "); + switch(cmd){ case(LCFG_ATTACH):{ printf("attach "); @@ -326,6 +329,12 @@ void print_lustre_cfg(struct lustre_cfg *lcfg) } case(LCFG_MARKER):{ struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); + if (marker->cm_flags & CM_SKIP) { + if (marker->cm_flags & CM_START) + (*skip)++; + if (marker->cm_flags & CM_END) + (*skip)--; + } printf("marker %d (flags=%#x) %.16s '%s'", marker->cm_step, marker->cm_flags, marker->cm_svname, marker->cm_comment); break; @@ -340,9 +349,9 @@ void print_lustre_cfg(struct lustre_cfg *lcfg) void print_records(struct llog_rec_hdr** recs,int rec_number) { __u32 lopt; - int i; + int i, skip = 0; - for(i=0;ilrh_index)); @@ -353,7 +362,7 @@ void print_records(struct llog_rec_hdr** recs,int rec_number) printf("L "); lcfg = (struct lustre_cfg *) ((char*)(recs[i]) + sizeof(struct llog_rec_hdr)); - print_lustre_cfg(lcfg); + print_lustre_cfg(lcfg, &skip); } if (lopt == PTL_CFG_REC){ diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index d48c302..fb220a4 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -55,7 +55,7 @@ command_t cmdlist[] = { #define INDEX_UNASSIGNED 0xFFFF static char *progname; -static int verbose = 0; +static int verbose = 1; static int print_only = 0; @@ -559,7 +559,6 @@ int write_local_files(struct mkfs_opts *mop) int ret = 0; /* Mount this device temporarily in order to write these files */ - vprint("mounting backing device\n"); if (!mkdtemp(mntpt)) { fprintf(stderr, "%s: Can't create temp mount point %s: %s\n", progname, mntpt, strerror(errno)); @@ -670,7 +669,6 @@ int write_local_files(struct mkfs_opts *mop) out_umnt: - vprint("unmounting backing device\n"); umount(mntpt); out_rmdir: rmdir(mntpt); @@ -686,7 +684,6 @@ int read_local_files(struct mkfs_opts *mop) int ret = 0; /* Mount this device temporarily in order to read these files */ - vprint("mounting backing device\n"); if (!mkdtemp(mntpt)) { fprintf(stderr, "%s: Can't create temp mount point %s: %s\n", progname, mntpt, strerror(errno)); @@ -732,6 +729,7 @@ int read_local_files(struct mkfs_opts *mop) if (ret) goto out_close; } + ret = 0; if (lsd.lsd_feature_compat & OBD_COMPAT_OST) { mop->mo_ldd.ldd_flags = LDD_F_SV_TYPE_OST; mop->mo_ldd.ldd_svindex = lsd.lsd_ost_index; @@ -774,7 +772,6 @@ out_close: fclose(filep); out_umnt: - vprint("unmounting backing device\n"); umount(mntpt); out_rmdir: rmdir(mntpt); @@ -1159,7 +1156,7 @@ int main(int argc, char *const argv[]) server_make_name(ldd->ldd_flags, ldd->ldd_svindex, ldd->ldd_fsname, ldd->ldd_svname); - if (verbose >= 0) + if (verbose > 0) print_ldd("Permanent disk data", ldd); if (print_only) { -- 1.8.3.1