From 79d06b6fac3d18f2e5755a940e8afa42e70f3ba2 Mon Sep 17 00:00:00 2001 From: wangdi Date: Tue, 19 Nov 2013 06:48:38 -0800 Subject: [PATCH 1/1] LU-2240 mds: Assign special fid sequence to root. In current implementation, we still use IGIF as the ROOT FID, but ZFS does not have IGIF at all, so it will move root ROOT fid to a new sequence. But for compatible with clients, which are being mounted during upgrade, it will still keep the old IGIF root FID. Signed-off-by: Wang Di Change-Id: I2e6b81cd359436d2e5b96c8a315a56c415f8e631 Reviewed-on: http://review.whamcloud.com/5257 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Mike Pershin --- lustre/fld/fld_handler.c | 7 +-- lustre/fld/fld_index.c | 80 ++++++++++++++++++++----------- lustre/include/lustre/lustre_idl.h | 67 ++++++++++++++++++-------- lustre/include/lustre_fid.h | 13 ++++- lustre/lmv/lmv_fld.c | 14 ++---- lustre/lod/lod_dev.c | 2 +- lustre/mdd/mdd_device.c | 97 ++++++++++++++++++++++++-------------- lustre/mdd/mdd_internal.h | 5 ++ lustre/osd-ldiskfs/osd_handler.c | 14 ++---- lustre/osd-ldiskfs/osd_oi.c | 4 +- 10 files changed, 193 insertions(+), 110 deletions(-) diff --git a/lustre/fld/fld_handler.c b/lustre/fld/fld_handler.c index 9094843..7fe5d7e 100644 --- a/lustre/fld/fld_handler.c +++ b/lustre/fld/fld_handler.c @@ -72,7 +72,7 @@ LU_KEY_INIT_FINI(fld, struct fld_thread_info); /* context key: fld_thread_key */ -LU_CONTEXT_KEY_DEFINE(fld, LCT_MD_THREAD|LCT_DT_THREAD); +LU_CONTEXT_KEY_DEFINE(fld, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD); cfs_proc_dir_entry_t *fld_type_proc_dir = NULL; @@ -145,8 +145,9 @@ int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld, int rc; ENTRY; - info = lu_context_key_get(&env->le_ctx, &fld_thread_key); - erange = &info->fti_lrange; + info = lu_context_key_get(&env->le_ctx, &fld_thread_key); + LASSERT(info != NULL); + erange = &info->fti_lrange; /* Lookup it in the cache. */ rc = fld_cache_lookup(fld->lsf_cache, seq, erange); diff --git a/lustre/fld/fld_index.c b/lustre/fld/fld_index.c index c0295ab..f8bbfba 100644 --- a/lustre/fld/fld_index.c +++ b/lustre/fld/fld_index.c @@ -71,6 +71,13 @@ static const struct lu_seq_range IGIF_FLD_RANGE = { .lsr_flags = LU_SEQ_RANGE_MDT }; +static const struct lu_seq_range ROOT_FLD_RANGE = { + .lsr_start = FID_SEQ_ROOT, + .lsr_end = FID_SEQ_ROOT + 1, + .lsr_index = 0, + .lsr_flags = LU_SEQ_RANGE_MDT +}; + const struct dt_index_features fld_index_features = { .dif_flags = DT_IND_UPDATE, .dif_keysize_min = sizeof(seqno_t), @@ -255,8 +262,9 @@ int fld_index_lookup(const struct lu_env *env, struct lu_server_fld *fld, RETURN(rc); } -static int fld_insert_igif_fld(struct lu_server_fld *fld, - const struct lu_env *env) +static int fld_insert_entry(const struct lu_env *env, + struct lu_server_fld *fld, + const struct lu_seq_range *range) { struct thandle *th; int rc; @@ -266,7 +274,7 @@ static int fld_insert_igif_fld(struct lu_server_fld *fld, if (IS_ERR(th)) RETURN(PTR_ERR(th)); - rc = fld_declare_index_create(env, fld, &IGIF_FLD_RANGE, th); + rc = fld_declare_index_create(env, fld, range, th); if (rc != 0) { if (rc == -EEXIST) rc = 0; @@ -278,7 +286,7 @@ static int fld_insert_igif_fld(struct lu_server_fld *fld, if (rc) GOTO(out, rc); - rc = fld_index_create(env, fld, &IGIF_FLD_RANGE, th); + rc = fld_index_create(env, fld, range, th); if (rc == -EEXIST) rc = 0; out: @@ -286,6 +294,20 @@ out: RETURN(rc); } +static int fld_insert_special_entries(const struct lu_env *env, + struct lu_server_fld *fld) +{ + int rc; + + rc = fld_insert_entry(env, fld, &IGIF_FLD_RANGE); + if (rc != 0) + RETURN(rc); + + rc = fld_insert_entry(env, fld, &ROOT_FLD_RANGE); + + RETURN(rc); +} + int fld_index_init(const struct lu_env *env, struct lu_server_fld *fld, struct dt_device *dt) { @@ -325,16 +347,7 @@ int fld_index_init(const struct lu_env *env, struct lu_server_fld *fld, fld->lsf_obj = dt_obj; rc = dt_obj->do_ops->do_index_try(env, dt_obj, &fld_index_features); - if (rc == 0) { - LASSERT(dt_obj->do_index_ops != NULL); - mutex_lock(&fld->lsf_lock); - rc = fld_insert_igif_fld(fld, env); - mutex_unlock(&fld->lsf_lock); - if (rc != 0) { - CERROR("insert igif in fld! = %d\n", rc); - GOTO(out, rc); - } - } else { + if (rc != 0) { CERROR("%s: File \"%s\" is not an index: rc = %d!\n", fld->lsf_name, fld_index_name, rc); GOTO(out, rc); @@ -351,21 +364,34 @@ int fld_index_init(const struct lu_env *env, struct lu_server_fld *fld, if (rc < 0) GOTO(out_it_fini, rc); - do { - rc = iops->rec(env, it, (struct dt_rec *)range, 0); - if (rc != 0) - GOTO(out_it_fini, rc); - - LASSERT(range != NULL); - range_be_to_cpu(range, range); - rc = fld_cache_insert(fld->lsf_cache, range); - if (rc != 0) - GOTO(out_it_fini, rc); - rc = iops->next(env, it); + if (rc > 0) { + /* Load FLD entry into server cache */ + do { + rc = iops->rec(env, it, (struct dt_rec *)range, 0); + if (rc != 0) + GOTO(out_it_put, rc); + LASSERT(range != NULL); + range_be_to_cpu(range, range); + rc = fld_cache_insert(fld->lsf_cache, range); + if (rc != 0) + GOTO(out_it_put, rc); + rc = iops->next(env, it); + } while (rc == 0); + } - } while (rc == 0); - rc = 0; + /* Note: fld_insert_entry will detect whether these + * special entries already exist inside FLDB */ + mutex_lock(&fld->lsf_lock); + rc = fld_insert_special_entries(env, fld); + mutex_unlock(&fld->lsf_lock); + if (rc != 0) { + CERROR("%s: insert special entries failed!: rc = %d\n", + fld->lsf_name, rc); + GOTO(out_it_put, rc); + } +out_it_put: + iops->put(env, it); out_it_fini: iops->fini(env, it); out: diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 553d6cc..9330f95 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -399,29 +399,34 @@ static inline obd_id fid_ver_oid(const struct lu_fid *fid) * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0 */ enum fid_seq { - FID_SEQ_OST_MDT0 = 0, - FID_SEQ_LLOG = 1, - FID_SEQ_ECHO = 2, - FID_SEQ_OST_MDT1 = 3, - FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */ - FID_SEQ_RSVD = 11, - FID_SEQ_IGIF = 12, - FID_SEQ_IGIF_MAX = 0x0ffffffffULL, - FID_SEQ_IDIF = 0x100000000ULL, - FID_SEQ_IDIF_MAX = 0x1ffffffffULL, - /* Normal FID sequence starts from this value, i.e. 1<<33 */ - FID_SEQ_START = 0x200000000ULL, + FID_SEQ_OST_MDT0 = 0, + FID_SEQ_LLOG = 1, + FID_SEQ_ECHO = 2, + FID_SEQ_OST_MDT1 = 3, + FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */ + FID_SEQ_RSVD = 11, + FID_SEQ_IGIF = 12, + FID_SEQ_IGIF_MAX = 0x0ffffffffULL, + FID_SEQ_IDIF = 0x100000000ULL, + FID_SEQ_IDIF_MAX = 0x1ffffffffULL, + /* Normal FID sequence starts from this value, i.e. 1<<33 */ + FID_SEQ_START = 0x200000000ULL, /* sequence for local pre-defined FIDs listed in local_oid */ - FID_SEQ_LOCAL_FILE = 0x200000001ULL, - FID_SEQ_DOT_LUSTRE = 0x200000002ULL, + FID_SEQ_LOCAL_FILE = 0x200000001ULL, + FID_SEQ_DOT_LUSTRE = 0x200000002ULL, /* sequence is used for local named objects FIDs generated * by local_object_storage library */ - FID_SEQ_LOCAL_NAME = 0x200000003ULL, - FID_SEQ_SPECIAL = 0x200000004ULL, - FID_SEQ_QUOTA = 0x200000005ULL, - FID_SEQ_QUOTA_GLB = 0x200000006ULL, - FID_SEQ_NORMAL = 0x200000400ULL, - FID_SEQ_LOV_DEFAULT= 0xffffffffffffffffULL + FID_SEQ_LOCAL_NAME = 0x200000003ULL, + /* Because current FLD will only cache the fid sequence, instead + * of oid on the client side, if the FID needs to be exposed to + * clients sides, it needs to make sure all of fids under one + * sequence will be located in one MDT. */ + FID_SEQ_SPECIAL = 0x200000004ULL, + FID_SEQ_QUOTA = 0x200000005ULL, + FID_SEQ_QUOTA_GLB = 0x200000006ULL, + FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */ + FID_SEQ_NORMAL = 0x200000400ULL, + FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL }; #define OBIF_OID_MAX_BITS 32 @@ -478,11 +483,28 @@ static inline int fid_seq_is_rsvd(const __u64 seq) return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD); }; +static inline int fid_seq_is_special(const __u64 seq) +{ + return seq == FID_SEQ_SPECIAL; +}; + +static inline int fid_seq_is_local_file(const __u64 seq) +{ + return seq == FID_SEQ_LOCAL_FILE; +}; + static inline int fid_is_mdt0(const struct lu_fid *fid) { return fid_seq_is_mdt0(fid_seq(fid)); } +static inline void lu_root_fid(struct lu_fid *fid) +{ + fid->f_seq = FID_SEQ_ROOT; + fid->f_oid = 1; + fid->f_ver = 0; +} + /** * Check if a fid is igif or not. * \param fid the fid to be tested. @@ -518,6 +540,11 @@ struct ost_id { obd_seq oi_seq; }; +static inline int fid_is_local_file(const struct lu_fid *fid) +{ + return fid_seq_is_local_file(fid_seq(fid)); +} + static inline int fid_seq_is_norm(const __u64 seq) { return (seq >= FID_SEQ_NORMAL); diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index 539e829..0deb5db 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -249,10 +249,13 @@ static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid) fid->f_ver = 0; } +/* For new FS (>= 2.4), the root FID will be changed to + * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4), + * the root FID will still be IGIF */ static inline int fid_is_root(const struct lu_fid *fid) { - return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE && - fid_oid(fid) == MDD_ROOT_INDEX_OID); + return unlikely((fid_seq(fid) == FID_SEQ_ROOT && + fid_oid(fid) == 1)); } static inline int fid_is_dot_lustre(const struct lu_fid *fid) @@ -296,6 +299,12 @@ static inline int fid_is_client_visible(const struct lu_fid *fid) return fid_is_client_mdt_visible(fid) || fid_is_idif(fid); } +static inline int fid_seq_in_fldb(__u64 seq) +{ + return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) || + seq == FID_SEQ_ROOT; +} + static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq) { if (fid_seq_is_mdt0(seq)) { diff --git a/lustre/lmv/lmv_fld.c b/lustre/lmv/lmv_fld.c index b630bbc26..fc7f4bd 100644 --- a/lustre/lmv/lmv_fld.c +++ b/lustre/lmv/lmv_fld.c @@ -64,16 +64,12 @@ int lmv_fld_lookup(struct lmv_obd *lmv, int rc; ENTRY; - LASSERTF(fid_is_sane(fid), DFID" is insane!\n", PFID(fid)); - /* FIXME: Because ZFS still use LOCAL fid sequence for root, - * and root will always be in MDT0, for local fid, it will - * return 0 directly. And it should be removed once the root - * FID has been assigned with special sequence */ - if (fid_seq(fid) == FID_SEQ_LOCAL_FILE) { - *mds = 0; - RETURN(0); - } + /* FIXME: Currently ZFS still use local seq for ROOT unfortunately, and + * this fid_is_local check should be removed once LU-2240 is fixed */ + LASSERTF((fid_seq_in_fldb(fid_seq(fid)) || + fid_seq_is_local_file(fid_seq(fid))) && + fid_is_sane(fid), DFID" is insane!\n", PFID(fid)); rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds, LU_SEQ_RANGE_MDT, NULL); diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 13e24cf..17dca0c 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -72,7 +72,7 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, RETURN(rc); } - if (!lod->lod_initialized || !fid_is_norm(fid)) { + if (!lod->lod_initialized || (!fid_seq_in_fldb(fid_seq(fid)))) { LASSERT(lu_site2seq(lod2lu_dev(lod)->ld_site) != NULL); *tgt = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id; RETURN(rc); diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 0388758..e9d57c5 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -1161,46 +1161,83 @@ static int mdd_recovery_complete(const struct lu_env *env, RETURN(rc); } +static int mdd_find_or_create_root(const struct lu_env *env, + struct mdd_device *mdd) +{ + struct dt_object *root; + struct md_object *mroot; + struct lu_fid *fid = &mdd_env_info(env)->mti_fid; + int rc = 0; + + ENTRY; + + /* Check if the "ROOT" entry exists already */ + root = dt_store_open(env, mdd->mdd_child, "", mdd_root_dir_name, + fid); + if (!IS_ERR(root)) { + lu_object_put(env, &root->do_lu); + GOTO(out, rc = 0); + } + + lu_root_fid(fid); + /* New Filesystem, create /ROOT */ + mroot = llo_store_create_index(env, &mdd->mdd_md_dev, mdd->mdd_bottom, + "", mdd_root_dir_name, fid, + &dt_directory_features); + if (IS_ERR(mroot)) + GOTO(out, rc = PTR_ERR(mroot)); + + lu_object_put(env, &mroot->mo_lu); +out: + if (rc == 0) + mdd->mdd_root_fid = *fid; + + RETURN(rc); +} + static int mdd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *cdev) { - struct mdd_device *mdd = lu2mdd_dev(cdev); - struct lu_device *next = &mdd->mdd_child->dd_lu_dev; - struct dt_object *root; - struct lu_fid fid; - int rc; + struct mdd_device *mdd = lu2mdd_dev(cdev); + struct lu_device *next = &mdd->mdd_child->dd_lu_dev; + struct dt_object *root; + struct lu_fid *fid = &mdd_env_info(env)->mti_fid; + int rc; ENTRY; - rc = next->ld_ops->ldo_prepare(env, cdev, next); - if (rc) - GOTO(out, rc); + + rc = next->ld_ops->ldo_prepare(env, cdev, next); + if (rc) + GOTO(out, rc); rc = dt_root_get(env, mdd->mdd_child, &mdd->mdd_local_root_fid); if (rc != 0) GOTO(out, rc); - root = dt_store_open(env, mdd->mdd_child, "", mdd_root_dir_name, - &mdd->mdd_root_fid); - if (!IS_ERR(root)) { - LASSERT(root != NULL); - lu_object_put(env, &root->do_lu); - rc = orph_index_init(env, mdd); - } else { - rc = PTR_ERR(root); - } - if (rc) - GOTO(out, rc); + if (mdd_seq_site(mdd)->ss_node_id == 0) { + rc = mdd_find_or_create_root(env, mdd); + if (rc != 0) { + CERROR("%s: create root fid failed: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + GOTO(out, rc); + } - rc = mdd_dot_lustre_setup(env, mdd); - if (rc) { - CERROR("Error(%d) initializing .lustre objects\n", rc); + rc = mdd_dot_lustre_setup(env, mdd); + if (rc != 0) { + CERROR("%s: initializing .lustre failed: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + GOTO(out, rc); + } + } + + rc = orph_index_init(env, mdd); + if (rc != 0) GOTO(out, rc); - } /* we use capa file to declare llog changes, * will be fixed with new llog in 2.3 */ - root = dt_store_open(env, mdd->mdd_child, "", CAPA_KEYS, &fid); + root = dt_store_open(env, mdd->mdd_child, "", CAPA_KEYS, fid); if (IS_ERR(root)) GOTO(out, rc = PTR_ERR(root)); @@ -1214,9 +1251,6 @@ static int mdd_prepare(const struct lu_env *env, if (rc != 0) CERROR("%s: failed to initialize lfsck: rc = %d\n", mdd2obd_dev(mdd)->obd_name, rc); - - GOTO(out, rc); - out: return rc; } @@ -1733,13 +1767,6 @@ static struct lu_local_obj_desc llod_mdd_orphan = { .llod_feat = &dt_directory_features, }; -static struct lu_local_obj_desc llod_mdd_root = { - .llod_name = mdd_root_dir_name, - .llod_oid = MDD_ROOT_INDEX_OID, - .llod_is_index = 1, - .llod_feat = &dt_directory_features, -}; - static struct lu_local_obj_desc llod_lfsck_bookmark = { .llod_name = lfsck_bookmark_name, .llod_oid = LFSCK_BOOKMARK_OID, @@ -1771,7 +1798,6 @@ static int __init mdd_mod_init(void) llo_local_obj_register(&llod_capa_key); llo_local_obj_register(&llod_mdd_orphan); - llo_local_obj_register(&llod_mdd_root); llo_local_obj_register(&llod_lfsck_bookmark); llo_local_obj_register(&llod_lfsck_namespace); @@ -1786,7 +1812,6 @@ static void __exit mdd_mod_exit(void) { llo_local_obj_unregister(&llod_capa_key); llo_local_obj_unregister(&llod_mdd_orphan); - llo_local_obj_unregister(&llod_mdd_root); llo_local_obj_unregister(&llod_lfsck_bookmark); llo_local_obj_unregister(&llod_lfsck_namespace); diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 58fb060..97f6cb6 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -583,6 +583,11 @@ static inline const struct lu_fid *mdd_object_fid(struct mdd_object *obj) return lu_object_fid(mdd2lu_obj(obj)); } +static inline struct seq_server_site *mdd_seq_site(struct mdd_device *mdd) +{ + return mdd2lu_dev(mdd)->ld_site->ld_seq_site; +} + static inline struct lustre_capa *mdd_object_capa(const struct lu_env *env, const struct mdd_object *obj) { diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 86a3dc9..d589556 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1363,7 +1363,8 @@ static void osd_inode_getattr(const struct lu_env *env, { attr->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | LA_SIZE | LA_BLOCKS | LA_UID | LA_GID | - LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE; + LA_FLAGS | LA_NLINK | LA_RDEV | LA_BLKSIZE | + LA_TYPE; attr->la_atime = LTIME_S(inode->i_atime); attr->la_mtime = LTIME_S(inode->i_mtime); @@ -1973,19 +1974,13 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, struct seq_server_site *ss = osd_seq_site(osd); int rc; - if (fid_is_igif(fid)) { - range->lsr_flags = LU_SEQ_RANGE_MDT; - range->lsr_index = 0; - return 0; - } - if (fid_is_idif(fid)) { range->lsr_flags = LU_SEQ_RANGE_OST; range->lsr_index = fid_idif_ost_idx(fid); return 0; } - if (!fid_is_norm(fid)) { + if (!fid_seq_in_fldb(fid_seq(fid))) { range->lsr_flags = LU_SEQ_RANGE_MDT; if (ss != NULL) /* FIXME: If ss is NULL, it suppose not get lsr_index @@ -3166,7 +3161,8 @@ static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, int rc; ENTRY; - if ((!fid_is_norm(fid) && !fid_is_igif(fid)) || ss == NULL) + /* Those FID seqs, which are not in FLDB, must be local seq */ + if (unlikely(!fid_seq_in_fldb(fid_seq(fid)) || ss == NULL)) RETURN(0); rc = osd_fld_lookup(env, osd, fid, range); diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 8f9dbc4..118359a 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -467,7 +467,7 @@ int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, int rc; ENTRY; - if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE)) + if (unlikely(fid_is_local_file(fid) || fid_is_igif(fid))) RETURN(0); if (fid_is_idif(fid) || fid_is_last_id(fid)) @@ -515,8 +515,6 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, return osd_obj_spec_lookup(info, osd, fid, id); if ((check_fld && fid_is_on_ost(info, osd, fid)) || fid_is_llog(fid)) - /* old OSD obj id */ - /* FIXME: actually for all of the OST object */ return osd_obj_map_lookup(info, osd, fid, id); if (fid_is_fs_root(fid)) { -- 1.8.3.1