From 64218d0b47731c039234b16b8343d8ee3b407281 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Tue, 23 Mar 2010 11:36:58 -0700 Subject: [PATCH] b=17670 enable FID_in_Dirent feature for OSD. this support is added to ext3 and ext4 based ldiskfs. following patch use those APIs so that Lustre OSD can make use of it. i=rahul.deshmukh i=girish i=andreas.dilger --- .../series/ldiskfs-2.6-rhel5-ext4.series | 1 + .../kernel_patches/series/ldiskfs-2.6-rhel5.series | 1 + lustre/fid/fid_lib.c | 2 +- lustre/include/lustre_fid.h | 3 + lustre/osd/osd_handler.c | 129 ++++++++++++++++----- lustre/osd/osd_internal.h | 13 ++- lustre/utils/mkfs_lustre.c | 8 +- 7 files changed, 125 insertions(+), 32 deletions(-) diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series index ac5f884..b24c812 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series @@ -30,3 +30,4 @@ ext4-kill-dx_root.patch ext4-extents-mount-option-rhel5.patch ext4-fiemap-2.6-rhel5.patch ext4-mballoc-pa_free-mismatch.patch +ext4_data_in_dirent.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series index 1325e08..28e3460 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series @@ -36,3 +36,4 @@ ext3-corrupted-orphans-2.6.patch ext3-kill-dx_root.patch ext3-fiemap-2.6-rhel5.patch ext3-mballoc-pa_free-mismatch.patch +ext3_data_in_dirent.patch diff --git a/lustre/fid/fid_lib.c b/lustre/fid/fid_lib.c index ab6422c..ec50951 100644 --- a/lustre/fid/fid_lib.c +++ b/lustre/fid/fid_lib.c @@ -73,7 +73,7 @@ * FID_SEQ_START + 2 is for .lustre directory and its objects */ const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = { - FID_SEQ_START + 0x400ULL, + FID_SEQ_DISTRIBUTED_START, (__u64)~0ULL }; EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE); diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index 926e3e6..6e952b5 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -92,6 +92,9 @@ enum { * used sparingly until ldiskfs-based MDT backends and/or IGIF FIDs * have been completely removed. */ +/** fid sequence for distributed fs objects */ +#define FID_SEQ_DISTRIBUTED_START (FID_SEQ_START + 0x400ULL) + /** special OID for local objects */ enum { /** \see osd_oi_index_create */ diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 535d829..56b2266 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -38,6 +38,7 @@ * Top-level entry points into osd module * * Author: Nikita Danilov + * Pravin Shelar : Added fid in dirent */ #ifndef EXPORT_SYMTAB @@ -1723,7 +1724,7 @@ static inline void osd_igif_get(const struct lu_env *env, struct inode *inode, } /** - * Helper function to pack the fid + * Helper function to pack the fid, ldiskfs stores fid in packed format. */ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid, struct lu_fid *befider) @@ -1733,6 +1734,24 @@ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid, pack->fp_len = sizeof(*befider) + 1; } +/** + * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata. + * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs. + * To have compatilibility with 1.8 ldiskfs driver we need to have + * magic number at start of fid data. + * \ldiskfs_dentry_param is used only to pass fid from osd to ldiskfs. + * its inmemory API. + */ +void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param, + const struct dt_rec *fid) +{ + param->edp_magic = LDISKFS_LUFID_MAGIC; + param->edp_len = sizeof(struct lu_fid) + 1; + + fid_cpu_to_be((struct lu_fid *)param->edp_data, + (struct lu_fid *)fid); +} + int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack) { int result; @@ -1805,7 +1824,6 @@ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj, rc = 0; } iput(inode); - out: RETURN(rc); } @@ -1828,7 +1846,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, struct osd_object *obj = osd_dt_obj(dt); struct osd_thread_info *info = osd_oti_get(env); int result; - int is_root = 0; ENTRY; @@ -1839,11 +1856,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, result = __osd_object_create(info, obj, attr, hint, dof, th); - if (hint && hint->dah_parent) - is_root = osd_object_is_root(osd_dt_obj(hint->dah_parent)); - /* objects under osd root shld have igif fid, so dont add fid EA */ - if (result == 0 && is_root == 0) + if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START) result = osd_ea_fid_set(env, dt, fid); if (result == 0) @@ -2435,6 +2449,19 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } +static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de, + struct dt_rec *fid) +{ + struct osd_fid_pack *rec; + int rc = -ENODATA; + + if (de->file_type & LDISKFS_DIRENT_LUFID) { + rec = (struct osd_fid_pack *) (de->name + de->name_len + 1); + rc = osd_fid_unpack((struct lu_fid *)fid, rec); + } + RETURN(rc); +} + /** * Index delete function for interoperability mode (b11826). * It will remove the directory entry added by osd_index_ea_insert(). @@ -2541,7 +2568,7 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt, rc = iam_it_get(it, (struct iam_key *)key); if (rc >= 0) { if (S_ISDIR(obj->oo_inode->i_mode)) - iam_rec = (struct iam_rec *)oti->oti_fid_packed; + iam_rec = (struct iam_rec *)oti->oti_ldp; else iam_rec = (struct iam_rec *) rec; @@ -2583,7 +2610,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, cfs_cap_t save = current->cap_effective; #endif struct osd_thread_info *oti = osd_oti_get(env); - struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_fid_packed; + struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp; int rc; ENTRY; @@ -2633,13 +2660,14 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, */ static int __osd_ea_add_rec(struct osd_thread_info *info, struct osd_object *pobj, - struct osd_object *cobj, + struct inode *cinode, const char *name, + const struct dt_rec *fid, struct thandle *th) { + struct ldiskfs_dentry_param *ldp; struct dentry *child; struct osd_thandle *oth; - struct inode *cinode = cobj->oo_inode; int rc; oth = container_of(th, struct osd_thandle, ot_super); @@ -2647,6 +2675,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, LASSERT(oth->ot_handle->h_transaction != NULL); child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name)); + + if (fid_is_igif((struct lu_fid *)fid) || + fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) { + ldp = (struct ldiskfs_dentry_param *)info->oti_ldp; + osd_get_ldiskfs_dirent_param(ldp, fid); + child->d_fsdata = (void*) ldp; + } else + child->d_fsdata = NULL; rc = ldiskfs_add_entry(oth->ot_handle, child, cinode); RETURN(rc); @@ -2666,11 +2702,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info, */ static int osd_add_dot_dotdot(struct osd_thread_info *info, struct osd_object *dir, - struct osd_object *obj, const char *name, + struct inode *parent_dir, const char *name, + const struct dt_rec *dot_fid, + const struct dt_rec *dot_dot_fid, struct thandle *th) { - struct inode *parent_dir = obj->oo_inode; struct inode *inode = dir->oo_inode; + struct ldiskfs_dentry_param *dot_ldp; + struct ldiskfs_dentry_param *dot_dot_ldp; struct osd_thandle *oth; int result = 0; @@ -2682,17 +2721,31 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, if (dir->oo_compat_dot_created) { result = -EEXIST; } else { - LASSERT(obj == dir); + LASSERT(inode == parent_dir); dir->oo_compat_dot_created = 1; result = 0; } } else if(strcmp(name, dotdot) == 0) { + dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp; + dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2; + if (!dir->oo_compat_dot_created) return -EINVAL; - if (dir->oo_compat_dotdot_created) - return __osd_ea_add_rec(info, dir, obj, name, th); + if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) { + osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid); + osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid); + } else { + dot_ldp = NULL; + dot_dot_ldp = NULL; + } + /* in case of rename, dotdot is already created */ + if (dir->oo_compat_dotdot_created) { + return __osd_ea_add_rec(info, dir, parent_dir, name, + dot_dot_fid, th); + } - result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode); + result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode, + dot_ldp, dot_dot_ldp); if (result == 0) dir->oo_compat_dotdot_created = 1; } @@ -2707,8 +2760,9 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info, */ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, - struct osd_object *cobj, + struct inode *cinode, const char *name, + const struct dt_rec *fid, struct thandle *th) { struct osd_thread_info *info = osd_oti_get(env); @@ -2716,9 +2770,11 @@ static int osd_ea_add_rec(const struct lu_env *env, if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] =='\0'))) - rc = osd_add_dot_dotdot(info, pobj, cobj, name, th); + rc = osd_add_dot_dotdot(info, pobj, cinode, name, + (struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu), + fid, th); else - rc = __osd_ea_add_rec(info, pobj, cobj, name, th); + rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, th); return rc; } @@ -2751,8 +2807,12 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, bh = ll_ldiskfs_find_entry(dir, dentry, &de); if (bh) { ino = le32_to_cpu(de->inode); + rc = osd_get_fid_from_dentry(de, rec); + + /* done with de, release bh */ brelse(bh); - rc = osd_ea_fid_get(env, obj, ino, fid); + if (rc != 0) + rc = osd_ea_fid_get(env, obj, ino, fid); } else rc = -ENOENT; @@ -2866,7 +2926,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; #endif cfs_down_write(&obj->oo_ext_idx_sem); - rc = osd_ea_add_rec(env, obj, child, name, th); + rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th); cfs_up_write(&obj->oo_ext_idx_sem); #ifdef HAVE_QUOTA_SUPPORT current->cap_effective = save; @@ -3238,8 +3298,10 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, loff_t offset, __u64 ino, unsigned d_type) { - struct osd_it_ea *it = (struct osd_it_ea *)buf; - struct osd_it_ea_dirent *ent = it->oie_dirent; + struct osd_it_ea *it = (struct osd_it_ea *)buf; + struct osd_it_ea_dirent *ent = it->oie_dirent; + struct lu_fid *fid = &ent->oied_fid; + struct osd_fid_pack *rec; ENTRY; /* this should never happen */ @@ -3252,6 +3314,17 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen, OSD_IT_EA_BUFSIZE) RETURN(1); + if (d_type & LDISKFS_DIRENT_LUFID) { + rec = (struct osd_fid_pack*) (name + namelen + 1); + + if (osd_fid_unpack(fid, rec) != 0) + fid_zero(fid); + + d_type &= ~LDISKFS_DIRENT_LUFID; + } else { + fid_zero(fid); + } + ent->oied_ino = ino; ent->oied_off = offset; ent->oied_namelen = namelen; @@ -3383,13 +3456,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env, { struct osd_it_ea *it = (struct osd_it_ea *)di; struct osd_object *obj = it->oie_obj; - struct osd_thread_info *info = osd_oti_get(env); - struct lu_fid *fid = &info->oti_fid; - int rc; + struct lu_fid *fid = &it->oie_dirent->oied_fid; + int rc = 0; ENTRY; - rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid); + if (!fid_is_sane(fid)) + rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid); if (rc == 0) osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off, diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index b3da83a..9c5db4a 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -147,6 +147,7 @@ struct osd_fid_pack { }; struct osd_it_ea_dirent { + struct lu_fid oied_fid; __u64 oied_ino; __u64 oied_off; unsigned short oied_namelen; @@ -154,7 +155,14 @@ struct osd_it_ea_dirent { char oied_name[0]; } __attribute__((packed)); -#define OSD_IT_EA_BUFSIZE CFS_PAGE_SIZE +/** + * as osd_it_ea_dirent (in memory dirent struct for osd) is greater + * than lu_dirent struct. osd readdir reads less number of dirent than + * required for mdd dir page. so buffer size need to be increased so that + * there would be one ext3 readdir for every mdd readdir page. + */ + +#define OSD_IT_EA_BUFSIZE (CFS_PAGE_SIZE + CFS_PAGE_SIZE/4) /** * This is iterator's in-memory data structure in interoperability @@ -256,7 +264,8 @@ struct osd_thread_info { #endif struct lu_env oti_obj_delete_tx_env; #define OSD_FID_REC_SZ 32 - char oti_fid_packed[OSD_FID_REC_SZ]; + char oti_ldp[OSD_FID_REC_SZ]; + char oti_ldp2[OSD_FID_REC_SZ]; }; #ifdef LPROCFS diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index 1e59966..cbc33db 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -526,7 +526,13 @@ static void enable_default_backfs_features(struct mkfs_opts *mop) int maj_high, maj_low, min; int ret; - strscat(mop->mo_mkfsopts, " -O dir_index,extents", sizeof(mop->mo_mkfsopts)); + if (IS_MDT(&mop->mo_ldd)) + strscat(mop->mo_mkfsopts, " -O dir_index,extents,dirdata", + sizeof(mop->mo_mkfsopts)); + else + strscat(mop->mo_mkfsopts, " -O dir_index,extents", + sizeof(mop->mo_mkfsopts)); + /* Upstream e2fsprogs called our uninit_groups feature uninit_bg, * check for both of them when testing e2fsprogs features. */ -- 1.8.3.1