Whamcloud - gitweb
b=17670
authorpravin shelar <pravin.shelar@sun.com>
Tue, 23 Mar 2010 18:36:58 +0000 (11:36 -0700)
committerRobert Read <rread@sun.com>
Tue, 23 Mar 2010 18:36:58 +0000 (11:36 -0700)
enable FID_in_Dirent feature for OSD. this support is added to ext3 and
ext4 based ldiskfs. following patch use those APIs so that Lustre OSD
can make use of it.

i=rahul.deshmukh
i=girish
i=andreas.dilger

ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series
lustre/fid/fid_lib.c
lustre/include/lustre_fid.h
lustre/osd/osd_handler.c
lustre/osd/osd_internal.h
lustre/utils/mkfs_lustre.c

index ac5f884..b24c812 100644 (file)
@@ -30,3 +30,4 @@ ext4-kill-dx_root.patch
 ext4-extents-mount-option-rhel5.patch
 ext4-fiemap-2.6-rhel5.patch
 ext4-mballoc-pa_free-mismatch.patch
+ext4_data_in_dirent.patch
index 1325e08..28e3460 100644 (file)
@@ -36,3 +36,4 @@ ext3-corrupted-orphans-2.6.patch
 ext3-kill-dx_root.patch
 ext3-fiemap-2.6-rhel5.patch
 ext3-mballoc-pa_free-mismatch.patch
+ext3_data_in_dirent.patch
index ab6422c..ec50951 100644 (file)
@@ -73,7 +73,7 @@
  * FID_SEQ_START + 2 is for .lustre directory and its objects
  */
 const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
-        FID_SEQ_START + 0x400ULL,
+        FID_SEQ_DISTRIBUTED_START,
         (__u64)~0ULL
 };
 EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE);
index 926e3e6..6e952b5 100644 (file)
@@ -92,6 +92,9 @@ enum {
  * used sparingly until ldiskfs-based MDT backends and/or IGIF FIDs 
  * have been completely removed. */
 
+/** fid sequence for distributed fs objects */
+#define FID_SEQ_DISTRIBUTED_START     (FID_SEQ_START + 0x400ULL)
+
 /** special OID for local objects */
 enum {
         /** \see osd_oi_index_create */
index 535d829..56b2266 100644 (file)
@@ -38,6 +38,7 @@
  * Top-level entry points into osd module
  *
  * Author: Nikita Danilov <nikita@clusterfs.com>
+ *         Pravin Shelar <pravin.shelar@sun.com> : Added fid in dirent
  */
 
 #ifndef EXPORT_SYMTAB
@@ -1723,7 +1724,7 @@ static inline void osd_igif_get(const struct lu_env *env, struct inode  *inode,
 }
 
 /**
- * Helper function to pack the fid
+ * Helper function to pack the fid, ldiskfs stores fid in packed format.
  */
 void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid,
                   struct lu_fid *befider)
@@ -1733,6 +1734,24 @@ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid,
         pack->fp_len =  sizeof(*befider) + 1;
 }
 
+/**
+ * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata.
+ * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs.
+ * To have compatilibility with 1.8 ldiskfs driver we need to have
+ * magic number at start of fid data.
+ * \ldiskfs_dentry_param is used only to pass fid from osd to ldiskfs.
+ * its inmemory API.
+ */
+void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
+                                  const struct dt_rec *fid)
+{
+        param->edp_magic = LDISKFS_LUFID_MAGIC;
+        param->edp_len =  sizeof(struct lu_fid) + 1;
+
+        fid_cpu_to_be((struct lu_fid *)param->edp_data,
+                      (struct lu_fid *)fid);
+}
+
 int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack)
 {
         int result;
@@ -1805,7 +1824,6 @@ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj,
                 rc = 0;
         }
         iput(inode);
-
 out:
         RETURN(rc);
 }
@@ -1828,7 +1846,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         struct osd_object      *obj    = osd_dt_obj(dt);
         struct osd_thread_info *info   = osd_oti_get(env);
         int result;
-        int is_root = 0;
 
         ENTRY;
 
@@ -1839,11 +1856,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
 
-        if (hint && hint->dah_parent)
-                is_root = osd_object_is_root(osd_dt_obj(hint->dah_parent));
-
         /* objects under osd root shld have igif fid, so dont add fid EA */
-        if (result == 0 && is_root == 0)
+        if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START)
                 result = osd_ea_fid_set(env, dt, fid);
 
         if (result == 0)
@@ -2435,6 +2449,19 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt,
         RETURN(rc);
 }
 
+static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
+                                          struct dt_rec *fid)
+{
+        struct osd_fid_pack *rec;
+        int rc = -ENODATA;
+
+        if (de->file_type & LDISKFS_DIRENT_LUFID) {
+                rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
+                rc = osd_fid_unpack((struct lu_fid *)fid, rec);
+        }
+        RETURN(rc);
+}
+
 /**
  * Index delete function for interoperability mode (b11826).
  * It will remove the directory entry added by osd_index_ea_insert().
@@ -2541,7 +2568,7 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt,
         rc = iam_it_get(it, (struct iam_key *)key);
         if (rc >= 0) {
                 if (S_ISDIR(obj->oo_inode->i_mode))
-                        iam_rec = (struct iam_rec *)oti->oti_fid_packed;
+                        iam_rec = (struct iam_rec *)oti->oti_ldp;
                 else
                         iam_rec = (struct iam_rec *) rec;
 
@@ -2583,7 +2610,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         cfs_cap_t              save = current->cap_effective;
 #endif
         struct osd_thread_info *oti = osd_oti_get(env);
-        struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_fid_packed;
+        struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
         int rc;
 
         ENTRY;
@@ -2633,13 +2660,14 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
  */
 static int __osd_ea_add_rec(struct osd_thread_info *info,
                             struct osd_object *pobj,
-                            struct osd_object *cobj,
+                            struct inode  *cinode,
                             const char *name,
+                            const struct dt_rec *fid,
                             struct thandle *th)
 {
+        struct ldiskfs_dentry_param *ldp;
         struct dentry      *child;
         struct osd_thandle *oth;
-        struct inode       *cinode  = cobj->oo_inode;
         int rc;
 
         oth = container_of(th, struct osd_thandle, ot_super);
@@ -2647,6 +2675,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
         LASSERT(oth->ot_handle->h_transaction != NULL);
 
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
+
+        if (fid_is_igif((struct lu_fid *)fid) ||
+            fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) {
+                ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+                osd_get_ldiskfs_dirent_param(ldp, fid);
+                child->d_fsdata = (void*) ldp;
+        } else
+                child->d_fsdata = NULL;
         rc = ldiskfs_add_entry(oth->ot_handle, child, cinode);
 
         RETURN(rc);
@@ -2666,11 +2702,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
  */
 static int osd_add_dot_dotdot(struct osd_thread_info *info,
                               struct osd_object *dir,
-                              struct osd_object *obj, const char *name,
+                              struct inode  *parent_dir, const char *name,
+                              const struct dt_rec *dot_fid,
+                              const struct dt_rec *dot_dot_fid,
                               struct thandle *th)
 {
-        struct inode            *parent_dir   = obj->oo_inode;
         struct inode            *inode  = dir->oo_inode;
+        struct ldiskfs_dentry_param *dot_ldp;
+        struct ldiskfs_dentry_param *dot_dot_ldp;
         struct osd_thandle      *oth;
         int result = 0;
 
@@ -2682,17 +2721,31 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                 if (dir->oo_compat_dot_created) {
                         result = -EEXIST;
                 } else {
-                        LASSERT(obj == dir);
+                        LASSERT(inode == parent_dir);
                         dir->oo_compat_dot_created = 1;
                         result = 0;
                 }
         } else if(strcmp(name, dotdot) == 0) {
+                dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+                dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
+
                 if (!dir->oo_compat_dot_created)
                         return -EINVAL;
-                if (dir->oo_compat_dotdot_created)
-                        return __osd_ea_add_rec(info, dir, obj, name, th);
+                if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) {
+                        osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
+                        osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
+                } else {
+                        dot_ldp = NULL;
+                        dot_dot_ldp = NULL;
+                }
+                /* in case of rename, dotdot is already created */
+                if (dir->oo_compat_dotdot_created) {
+                        return __osd_ea_add_rec(info, dir, parent_dir, name,
+                                                dot_dot_fid, th);
+                }
 
-                result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode);
+                result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode,
+                                                dot_ldp, dot_dot_ldp);
                 if (result == 0)
                        dir->oo_compat_dotdot_created = 1;
         }
@@ -2707,8 +2760,9 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
  */
 static int osd_ea_add_rec(const struct lu_env *env,
                           struct osd_object *pobj,
-                          struct osd_object *cobj,
+                          struct inode *cinode,
                           const char *name,
+                          const struct dt_rec *fid,
                           struct thandle *th)
 {
         struct osd_thread_info    *info   = osd_oti_get(env);
@@ -2716,9 +2770,11 @@ static int osd_ea_add_rec(const struct lu_env *env,
 
         if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' &&
                                                    name[2] =='\0')))
-                rc = osd_add_dot_dotdot(info, pobj, cobj, name, th);
+                rc = osd_add_dot_dotdot(info, pobj, cinode, name,
+                     (struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu),
+                                        fid, th);
         else
-                rc = __osd_ea_add_rec(info, pobj, cobj, name, th);
+                rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, th);
 
         return rc;
 }
@@ -2751,8 +2807,12 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
         bh = ll_ldiskfs_find_entry(dir, dentry, &de);
         if (bh) {
                 ino = le32_to_cpu(de->inode);
+                rc = osd_get_fid_from_dentry(de, rec);
+
+                /* done with de, release bh */
                 brelse(bh);
-                rc = osd_ea_fid_get(env, obj, ino, fid);
+                if (rc != 0)
+                        rc = osd_ea_fid_get(env, obj, ino, fid);
         } else
                 rc = -ENOENT;
 
@@ -2866,7 +2926,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                         current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
 #endif
                 cfs_down_write(&obj->oo_ext_idx_sem);
-                rc = osd_ea_add_rec(env, obj, child, name, th);
+                rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
                 cfs_up_write(&obj->oo_ext_idx_sem);
 #ifdef HAVE_QUOTA_SUPPORT
                 current->cap_effective = save;
@@ -3238,8 +3298,10 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
                                loff_t offset, __u64 ino,
                                unsigned d_type)
 {
-        struct osd_it_ea        *it = (struct osd_it_ea *)buf;
-        struct osd_it_ea_dirent *ent = it->oie_dirent;
+        struct osd_it_ea        *it   = (struct osd_it_ea *)buf;
+        struct osd_it_ea_dirent *ent  = it->oie_dirent;
+        struct lu_fid           *fid  = &ent->oied_fid;
+        struct osd_fid_pack     *rec;
         ENTRY;
 
         /* this should never happen */
@@ -3252,6 +3314,17 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
             OSD_IT_EA_BUFSIZE)
                 RETURN(1);
 
+        if (d_type & LDISKFS_DIRENT_LUFID) {
+                rec = (struct osd_fid_pack*) (name + namelen + 1);
+
+                if (osd_fid_unpack(fid, rec) != 0)
+                        fid_zero(fid);
+
+                d_type &= ~LDISKFS_DIRENT_LUFID;
+        } else {
+                fid_zero(fid);
+        }
+
         ent->oied_ino     = ino;
         ent->oied_off     = offset;
         ent->oied_namelen = namelen;
@@ -3383,13 +3456,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
 {
         struct osd_it_ea        *it     = (struct osd_it_ea *)di;
         struct osd_object       *obj    = it->oie_obj;
-        struct osd_thread_info  *info   = osd_oti_get(env);
-        struct lu_fid           *fid       = &info->oti_fid;
-        int                      rc;
+        struct lu_fid           *fid    = &it->oie_dirent->oied_fid;
+        int    rc = 0;
 
         ENTRY;
 
-        rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid);
+        if (!fid_is_sane(fid))
+                rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid);
 
         if (rc == 0)
                 osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off,
index b3da83a..9c5db4a 100644 (file)
@@ -147,6 +147,7 @@ struct osd_fid_pack {
 };
 
 struct osd_it_ea_dirent {
+        struct lu_fid   oied_fid;
         __u64           oied_ino;
         __u64           oied_off;
         unsigned short  oied_namelen;
@@ -154,7 +155,14 @@ struct osd_it_ea_dirent {
         char            oied_name[0];
 } __attribute__((packed));
 
-#define OSD_IT_EA_BUFSIZE       CFS_PAGE_SIZE
+/**
+ * as osd_it_ea_dirent (in memory dirent struct for osd) is greater
+ * than lu_dirent struct. osd readdir reads less number of dirent than
+ * required for mdd dir page. so buffer size need to be increased so that
+ * there  would be one ext3 readdir for every mdd readdir page.
+ */
+
+#define OSD_IT_EA_BUFSIZE       (CFS_PAGE_SIZE + CFS_PAGE_SIZE/4)
 
 /**
  * This is iterator's in-memory data structure in interoperability
@@ -256,7 +264,8 @@ struct osd_thread_info {
 #endif
         struct lu_env          oti_obj_delete_tx_env;
 #define OSD_FID_REC_SZ 32
-        char                   oti_fid_packed[OSD_FID_REC_SZ];
+        char                   oti_ldp[OSD_FID_REC_SZ];
+        char                   oti_ldp2[OSD_FID_REC_SZ];
 };
 
 #ifdef LPROCFS
index 1e59966..cbc33db 100644 (file)
@@ -526,7 +526,13 @@ static void enable_default_backfs_features(struct mkfs_opts *mop)
         int maj_high, maj_low, min;
         int ret;
 
-        strscat(mop->mo_mkfsopts, " -O dir_index,extents", sizeof(mop->mo_mkfsopts));
+        if (IS_MDT(&mop->mo_ldd))
+                strscat(mop->mo_mkfsopts, " -O dir_index,extents,dirdata",
+                                sizeof(mop->mo_mkfsopts));
+        else
+                strscat(mop->mo_mkfsopts, " -O dir_index,extents",
+                                sizeof(mop->mo_mkfsopts));
+
 
         /* Upstream e2fsprogs called our uninit_groups feature uninit_bg,
          * check for both of them when testing e2fsprogs features. */