From: wang di Date: Wed, 7 Aug 2013 07:02:21 +0000 (-0700) Subject: LU-3126 osd: remove fld lookup during configuration X-Git-Tag: 2.4.92~46 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e0702769f267dd009a6287bbc9da2760079a101d;hp=2dc1bb8b7a53077fce8632aabe65b2ce8048a550 LU-3126 osd: remove fld lookup during configuration Remove fld lookup during configuration, so to avoid accessing FLDB on MDT0 before MDT0 is setup. 1. add od_is_ost to check whether the FID is on OST, instead of lookup in FLDB. 2. add oic_device in OI cache, so in OI cache lookup, it will try to match device to avoid the ino of agent inode is being retrieved from OI cache. Then osd_remote_fid, which will trigger fld lookup, can be removed from osd index lookup. Signed-off-by: wang di Change-Id: I85649431f47ad8aa8bd1e46a7b074e15b080bb1d Reviewed-on: http://review.whamcloud.com/7266 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Mike Pershin Reviewed-by: John L. Hammond Reviewed-by: Fan Yong Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/fld/fld_handler.c b/lustre/fld/fld_handler.c index 53694ef..d54a21a 100644 --- a/lustre/fld/fld_handler.c +++ b/lustre/fld/fld_handler.c @@ -145,7 +145,11 @@ int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld, fld->lsf_name, seq, -EIO); RETURN(-EIO); } else { - LASSERT(fld->lsf_control_exp); + if (fld->lsf_control_exp == NULL) { + CERROR("%s: lookup "LPX64", but not connects to MDT0" + "yet: rc = %d.\n", fld->lsf_name, seq, -EIO); + RETURN(-EIO); + } /* send request to mdt0 i.e. super seq. controller. * This is temporary solution, long term solution is fld * replication on all mdt servers. diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 3ee0c7e..8bff3f0 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -611,10 +611,15 @@ static inline obd_id fid_idif_id(obd_seq seq, __u32 oid, __u32 ver) return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid; } +static inline __u32 idif_ost_idx(obd_seq seq) +{ + return (seq >> 16) & 0xffff; +} + /* extract ost index from IDIF FID */ static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid) { - return (fid_seq(fid) >> 16) & 0xffff; + return idif_ost_idx(fid_seq(fid)); } /* extract OST sequence (group) from a wire ost_id (id/seq) pair */ diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index de1ec28..c67d604 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -532,6 +532,7 @@ int server_name2fsname(const char *svname, char *fsname, const char **endptr); int server_name2index(const char *svname, __u32 *idx, const char **endptr); int server_name2svname(const char *label, char *svname, const char **endptr, size_t svsize); +int server_name_is_ost(const char *svname); int lustre_put_lsi(struct super_block *sb); int lustre_start_simple(char *obdname, char *type, char *uuid, diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 922e877..dab4140 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -716,6 +716,26 @@ int server_name2svname(const char *label, char *svname, const char **endptr, } EXPORT_SYMBOL(server_name2svname); +/** + * check server name is OST. + **/ +int server_name_is_ost(const char *svname) +{ + const char *dash; + int rc; + + /* We use server_name2fsname() just for parsing */ + rc = server_name2fsname(svname, NULL, &dash); + if (rc != 0) + return rc; + + dash++; + + if (strncmp(dash, "OST", 3) == 0) + return 1; + return 0; +} +EXPORT_SYMBOL(server_name_is_ost); /* Get the index from the obd name. rc = server type, or diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index a66d90e..d9f0a40 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -393,7 +393,8 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, RETURN(-ENOENT); /* Search order: 1. per-thread cache. */ - if (lu_fid_eq(fid, &oic->oic_fid)) { + if (lu_fid_eq(fid, &oic->oic_fid) && + likely(oic->oic_dev == dev)) { id = &oic->oic_lid; goto iget; } @@ -2096,18 +2097,18 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, } int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range) + obd_seq seq, struct lu_seq_range *range) { struct seq_server_site *ss = osd_seq_site(osd); int rc; - if (fid_is_idif(fid)) { + if (fid_seq_is_idif(seq)) { fld_range_set_ost(range); - range->lsr_index = fid_idif_ost_idx(fid); + range->lsr_index = idif_ost_idx(seq); return 0; } - if (!fid_seq_in_fldb(fid_seq(fid))) { + if (!fid_seq_in_fldb(seq)) { fld_range_set_mdt(range); if (ss != NULL) /* FIXME: If ss is NULL, it suppose not get lsr_index @@ -2118,10 +2119,10 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, LASSERT(ss != NULL); fld_range_set_any(range); - rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); + rc = fld_server_lookup(env, ss->ss_server_fld, seq, range); if (rc != 0) { - CERROR("%s: cannot find FLD range for "DFID": rc = %d\n", - osd_name(osd), PFID(fid), rc); + CERROR("%s: cannot find FLD range for "LPX64": rc = %d\n", + osd_name(osd), seq, rc); } return rc; } @@ -2181,7 +2182,7 @@ static int osd_declare_object_create(const struct lu_env *env, if (fid_is_norm(lu_object_fid(&dt->do_lu)) && !fid_is_last_id(lu_object_fid(&dt->do_lu))) osd_fld_lookup(env, osd_dt_dev(handle->th_dev), - lu_object_fid(&dt->do_lu), range); + fid_seq(lu_object_fid(&dt->do_lu)), range); RETURN(rc); @@ -3322,26 +3323,45 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de, return rc; } -static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, - struct lu_fid *fid) +static int osd_mdt_seq_exists(const struct lu_env *env, + struct osd_device *osd, obd_seq seq) { struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; struct seq_server_site *ss = osd_seq_site(osd); int rc; ENTRY; - /* Those FID seqs, which are not in FLDB, must be local seq */ - if (unlikely(!fid_seq_in_fldb(fid_seq(fid)) || ss == NULL)) - RETURN(0); + if (ss == NULL) + RETURN(1); - rc = osd_fld_lookup(env, osd, fid, range); + /* XXX: currently, each MDT only store avaible sequence on disk, and no + * allocated sequences information on disk, so we have to lookup FLDB, + * but it probably makes more sense also store allocated sequence + * locally, so we do not need do remote FLDB lookup in OSD */ + rc = osd_fld_lookup(env, osd, seq, range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd_name(osd), PFID(fid)); - RETURN(rc); + CERROR("%s: Can not lookup fld for "LPX64"\n", + osd_name(osd), seq); + RETURN(0); } - RETURN(ss->ss_node_id != range->lsr_index); + RETURN(ss->ss_node_id == range->lsr_index); +} + +static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, + struct lu_fid *fid) +{ + ENTRY; + + /* FID seqs not in FLDB, must be local seq */ + if (unlikely(!fid_seq_in_fldb(fid_seq(fid)))) + RETURN(0); + + /* Currently only check this for FID on MDT */ + if (osd_mdt_seq_exists(env, osd, fid_seq(fid))) + RETURN(0); + + RETURN(1); } /** @@ -3880,6 +3900,7 @@ int osd_add_oi_cache(struct osd_thread_info *info, struct osd_device *osd, id->oii_ino, id->oii_gen, info); info->oti_cache.oic_lid = *id; info->oti_cache.oic_fid = *fid; + info->oti_cache.oic_dev = osd; return 0; } @@ -3942,7 +3963,7 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, rc = osd_ea_fid_get(env, obj, ino, fid, id); else osd_id_gen(id, ino, OSD_OII_NOGEN); - if (rc != 0 || osd_remote_fid(env, dev, fid)) { + if (rc != 0) { fid_zero(&oic->oic_fid); GOTO(out, rc); } @@ -5538,6 +5559,9 @@ static int osd_device_init0(const struct lu_env *env, GOTO(out_mnt, rc); } + if (server_name_is_ost(o->od_svname)) + o->od_is_ost = 1; + rc = osd_obj_map_init(env, o); if (rc != 0) GOTO(out_mnt, rc); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 3daf5b2..7022073 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -243,7 +243,8 @@ struct osd_device { od_noscrub:1, od_dirent_journal:1, od_igif_inoi:1, - od_check_ff:1; + od_check_ff:1, + od_is_ost:1; unsigned long od_capa_timeout; __u32 od_capa_alg; @@ -683,7 +684,7 @@ int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, int osd_scrub_dump(struct osd_device *dev, char *buf, int len); int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range); + obd_seq seq, struct lu_seq_range *range); int osd_delete_from_remote_parent(const struct lu_env *env, struct osd_device *osd, @@ -696,6 +697,8 @@ int osd_lookup_in_remote_parent(struct osd_thread_info *oti, const struct lu_fid *fid, struct osd_inode_id *id); +int osd_ost_seq_exists(struct osd_thread_info *info, struct osd_device *osd, + __u64 seq); /* osd_quota_fmt.c */ int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, int type, uint blk, int depth, uint index, @@ -841,7 +844,6 @@ static inline char *osd_name(struct osd_device *osd) return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name; } - extern const struct dt_body_operations osd_body_ops; extern struct lu_context_key osd_key; diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 230f015..8eff0a3 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -470,8 +470,6 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, enum oi_check_flags flags) { - struct lu_seq_range *range = &info->oti_seq_range; - int rc; ENTRY; if (flags & OI_KNOWN_ON_OST) @@ -487,17 +485,7 @@ int fid_is_on_ost(struct osd_thread_info *info, struct osd_device *osd, if (!(flags & OI_CHECK_FLD)) RETURN(0); - rc = osd_fld_lookup(info->oti_env, osd, fid, range); - if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd_name(osd), PFID(fid)); - RETURN(rc); - } - - CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), - PRANGE(range)); - - if (fld_range_is_ost(range)) + if (osd->od_is_ost) RETURN(1); RETURN(0); diff --git a/lustre/osd-ldiskfs/osd_oi.h b/lustre/osd-ldiskfs/osd_oi.h index 65e85ac..5ddf162 100644 --- a/lustre/osd-ldiskfs/osd_oi.h +++ b/lustre/osd-ldiskfs/osd_oi.h @@ -87,6 +87,7 @@ struct osd_inode_id { struct osd_idmap_cache { struct lu_fid oic_fid; struct osd_inode_id oic_lid; + struct osd_device *oic_dev; }; static inline void osd_id_pack(struct osd_inode_id *tgt, diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 9944e78..f41a9dd 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -529,6 +529,9 @@ static int osd_mount(const struct lu_env *env, strncpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname) - 1); + if (server_name_is_ost(o->od_svname)) + o->od_is_ost = 1; + rc = -udmu_objset_open(o->od_mntdev, &o->od_objset); if (rc) { CERROR("can't open objset %s: %d\n", o->od_mntdev, rc); diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 6ebd0ab..29df71d 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -472,25 +472,44 @@ static inline void osd_object_put(const struct lu_env *env, lu_object_put(env, &obj->oo_dt.do_lu); } -static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, - struct lu_fid *fid) +static int osd_mdt_seq_exists(const struct lu_env *env, struct osd_device *osd, + obd_seq seq) { struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; struct seq_server_site *ss = osd_seq_site(osd); int rc; ENTRY; - if (!fid_is_norm(fid) && !fid_is_root(fid)) - RETURN(0); + if (ss == NULL) + RETURN(1); - rc = osd_fld_lookup(env, osd, fid, range); + /* XXX: currently, each MDT only store avaible sequence on disk, + * and no allocated sequences information on disk, so it has to + * lookup FLDB. It probably makes more sense also store allocated + * sequence locally, so we do not need do remote FLDB lookup in OSD */ + rc = osd_fld_lookup(env, osd, seq, range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd_name(osd), PFID(fid)); - RETURN(rc); + CERROR("%s: Can not lookup fld for "LPX64"\n", + osd_name(osd), seq); + RETURN(0); } - RETURN(ss->ss_node_id != range->lsr_index); + RETURN(ss->ss_node_id == range->lsr_index); +} + +static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, + struct lu_fid *fid) +{ + ENTRY; + + if (!fid_is_norm(fid) && !fid_is_root(fid)) + RETURN(0); + + /* Currently, it only used to check FID on MDT */ + if (osd_mdt_seq_exists(env, osd, fid_seq(fid))) + RETURN(0); + + RETURN(1); } /** diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 201077d..37f351c 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -245,13 +245,16 @@ struct osd_device { struct lprocfs_stats *od_stats; uint64_t od_root; + uint64_t od_O_id; struct osd_oi **od_oi_table; unsigned int od_oi_count; struct osd_seq_list od_seq_list; unsigned int od_rdonly:1, od_xattr_in_sa:1, - od_quota_iused_est:1; + od_quota_iused_est:1, + od_is_ost:1; + char od_mntdev[128]; char od_svname[128]; @@ -433,12 +436,13 @@ uint64_t osd_get_name_n_idx(const struct lu_env *env, struct osd_device *osd, int osd_options_init(void); int osd_convert_root_to_new_seq(const struct lu_env *env, struct osd_device *o); - +int osd_ost_seq_exists(const struct lu_env *env, struct osd_device *osd, + __u64 seq); /* osd_index.c */ int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat); int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range); + obd_seq seq, struct lu_seq_range *range); /* osd_xattr.c */ int __osd_xattr_load(udmu_objset_t *uos, uint64_t dnode, nvlist_t **sa_xattr); diff --git a/lustre/osd-zfs/osd_oi.c b/lustre/osd-zfs/osd_oi.c index 706b9e4..be873b3 100644 --- a/lustre/osd-zfs/osd_oi.c +++ b/lustre/osd-zfs/osd_oi.c @@ -208,18 +208,18 @@ osd_oi_find_or_create(const struct lu_env *env, struct osd_device *o, * the object is located (tgt index) and it is MDT or OST object. */ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, - const struct lu_fid *fid, struct lu_seq_range *range) + obd_seq seq, struct lu_seq_range *range) { struct seq_server_site *ss = osd_seq_site(osd); int rc; - if (fid_is_idif(fid)) { + if (fid_seq_is_idif(seq)) { fld_range_set_ost(range); - range->lsr_index = fid_idif_ost_idx(fid); + range->lsr_index = idif_ost_idx(seq); return 0; } - if (!fid_seq_in_fldb(fid_seq(fid))) { + if (!fid_seq_in_fldb(seq)) { fld_range_set_mdt(range); if (ss != NULL) /* FIXME: If ss is NULL, it suppose not get lsr_index @@ -230,34 +230,22 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, LASSERT(ss != NULL); fld_range_set_any(range); - rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), range); + rc = fld_server_lookup(env, ss->ss_server_fld, seq, range); if (rc != 0) - CERROR("%s: cannot find FLD range for "DFID": rc = %d\n", - osd_name(osd), PFID(fid), rc); + CERROR("%s: cannot find FLD range for "LPX64": rc = %d\n", + osd_name(osd), seq, rc); return rc; } int fid_is_on_ost(const struct lu_env *env, struct osd_device *osd, const struct lu_fid *fid) { - struct lu_seq_range *range = &osd_oti_get(env)->oti_seq_range; - int rc; ENTRY; if (fid_is_idif(fid)) RETURN(1); - rc = osd_fld_lookup(env, osd, fid, range); - if (rc != 0) { - CERROR("%s: Can not lookup fld for "DFID"\n", - osd_name(osd), PFID(fid)); - RETURN(rc); - } - - CDEBUG(D_INFO, "fid "DFID" range "DRANGE"\n", PFID(fid), - PRANGE(range)); - - if (fld_range_is_ost(range)) + if (osd->od_is_ost) RETURN(1); RETURN(0); @@ -326,12 +314,7 @@ static struct osd_seq *osd_find_or_add_seq(const struct lu_env *env, if (osd_seq->os_compat_dirs == NULL) GOTO(out, rc = -ENOMEM); - rc = osd_oi_lookup(env, osd, osd->od_root, "O", &oi); - if (rc != 0) { - CERROR("%s: Can not find O: rc = %d\n", osd_name(osd), rc); - GOTO(out, rc); - } - + oi.oi_zapid = osd->od_O_id; sprintf(seq_name, (fid_seq_is_rsvd(seq) || fid_seq_is_mdt0(seq)) ? LPU64 : LPX64i, fid_seq_is_idif(seq) ? 0 : seq); @@ -655,6 +638,8 @@ osd_oi_init_compat(const struct lu_env *env, struct osd_device *o) if (rc) RETURN(rc); + o->od_O_id = sdb; + osd_ost_seq_init(env, o); /* Create on-disk indexes to maintain per-UID/GID inode usage. * Those new indexes are created in the top-level ZAP outside the