Whamcloud - gitweb
LU-376 Positive LL_DIR_END_OFF to indicate the tail of dir hash/offset
authornasf <yong.fan@whamcloud.com>
Sat, 4 Jun 2011 14:43:36 +0000 (22:43 +0800)
committerJohann Lombardi <johann@whamcloud.com>
Mon, 6 Jun 2011 09:07:12 +0000 (02:07 -0700)
1) Keep 'MDS_DIR_END_OFF' unchanged (0xfffffffffffffffeULL) to simplify
   dir hash/offset related interoperability issues.
2) Introduce positive "LL_DIR_END_OFF" (0x7fffffffffffffffULL) on client
   to indicate the tail of dir hash/offset for up layer callers, like
   llseek(), readdir(), and so on.
3) Support 1.8 client to talk with old 2.0 server with 32bit hash.

Change-Id: I126ddb170b9ee24d1ae0610ac6343c9b0f5e4c70
Signed-off-by: nasf <yong.fan@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/887
Tested-by: Hudson
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
lustre/include/lustre/lustre_idl.h
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c

index ece8717..fc8984a 100644 (file)
@@ -768,6 +768,7 @@ enum lu_dirent_attrs {
         LUDA_TYPE   = 0x0002,
 };
 
+#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
 
 extern void lustre_swab_ll_fid (struct ll_fid *fid);
 
index 576f880..3154d32 100644 (file)
@@ -440,12 +440,11 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
 {
 }
 
-static inline unsigned long hash_x_index(__u64 hash)
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
 {
 #ifdef __KERNEL__
-# if BITS_PER_LONG == 32
-        hash >>= 32;
-# endif
+        if (BITS_PER_LONG == 32 && hash64)
+                hash >>= 32;
 #endif
         return ~0UL - hash;
 }
@@ -579,9 +578,6 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
         return le16_to_cpu(ent->lde_reclen);
 }
 
-#define DIR_END_OFF              0xfffffffffffffffeULL
-#define DIR_END_OFF_32BIT        0xfffffffeUL
-
 #ifdef HAVE_RW_TREE_LOCK
 #define TREE_READ_LOCK_IRQ(mapping)     read_lock_irq(&(mapping)->tree_lock)
 #define TREE_READ_UNLOCK_IRQ(mapping) read_unlock_irq(&(mapping)->tree_lock)
@@ -646,13 +642,14 @@ static void ll_check_page(struct inode *dir, struct page *page)
 static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
                                        __u64 *start, __u64 *end)
 {
+        int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
         struct address_space *mapping = dir->i_mapping;
         /*
          * Complement of hash is used as an index so that
          * radix_tree_gang_lookup() can be used to find a page with starting
          * hash _smaller_ than one we are looking for.
          */
-        unsigned long offset = hash_x_index(*hash);
+        unsigned long offset = hash_x_index(*hash, hash64);
         struct page *page;
         int found;
         ENTRY;
@@ -676,14 +673,14 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
                 wait_on_page(page);
                 if (PageUptodate(page)) {
                         dp = kmap(page);
-#if BITS_PER_LONG == 32
-                        *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-                        *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-                        *hash  = *hash >> 32;
-#else
-                        *start = le64_to_cpu(dp->ldp_hash_start);
-                        *end   = le64_to_cpu(dp->ldp_hash_end);
-#endif
+                        if (BITS_PER_LONG == 32 && hash64) {
+                                *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+                                *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+                                *hash  = *hash >> 32;
+                        } else {
+                                *start = le64_to_cpu(dp->ldp_hash_start);
+                                *end   = le64_to_cpu(dp->ldp_hash_end);
+                        }
                         LASSERTF(*start <= *hash, "start = "LPX64",end = "
                                  LPX64",hash = "LPX64"\n", *start, *end, *hash);
                         if (*hash > *end || (*end != *start && *hash == *end)) {
@@ -722,6 +719,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
         __u64 start = 0;
         __u64 end = 0;
         __u64 lhash = hash;
+        int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
         ENTRY;
  
         fid_build_reg_res_name(ll_inode_lu_fid(dir), &res_id);
@@ -787,7 +785,7 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
                 }
         }
 
-        page = read_cache_page(mapping, hash_x_index(hash),
+        page = read_cache_page(mapping, hash_x_index(hash, hash64),
                                (filler_t*)ll_dir_readpage_20, filp);
         if (IS_ERR(page))
                 GOTO(out_unlock, page);
@@ -803,23 +801,23 @@ static struct page *ll_get_dir_page_20(struct file *filp, struct inode *dir,
 hash_collision:
         dp = page_address(page);
 
-#if BITS_PER_LONG == 32
-        start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-        end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-        lhash = hash >> 32;
-#else
-        start = le64_to_cpu(dp->ldp_hash_start);
-        end   = le64_to_cpu(dp->ldp_hash_end);
-        lhash = hash;
-#endif
+        if (BITS_PER_LONG == 32 && hash64) {
+                start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+                end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+                lhash = hash >> 32;
+        } else {
+                start = le64_to_cpu(dp->ldp_hash_start);
+                end   = le64_to_cpu(dp->ldp_hash_end);
+                lhash = hash;
+        }
         if (end == start) {
                 LASSERT(start == lhash);
                 CWARN("Page-wide hash collision: "LPU64"\n", end);
-#if BITS_PER_LONG == 32
-                CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
-                      "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
-                      le64_to_cpu(dp->ldp_hash_end), hash);
-#endif
+                if (BITS_PER_LONG == 32 && hash64)
+                        CWARN("Real page-wide hash collision at ["LPU64" "LPU64
+                              "] with hash "LPU64"\n",
+                              le64_to_cpu(dp->ldp_hash_start),
+                              le64_to_cpu(dp->ldp_hash_end), hash);
                 /*
                  * Fetch whole overflow chain...
                  *
@@ -843,21 +841,20 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
         struct ll_sb_info    *sbi   = ll_i2sbi(inode);
         struct ll_file_data  *fd    = LUSTRE_FPRIVATE(filp);
         __u64                 pos   = fd->fd_dir.lfd_pos;
+        int                   api32 = ll_need_32bit_api(sbi);
+        int                   hash64= sbi->ll_flags & LL_SBI_64BIT_HASH;
         struct page          *page;
         struct ll_dir_chain   chain;
-        int rc;
-        int done;
-        int shift,need_32bit;
-        __u16 type;
+        int                   rc;
+        int                   done;
+        int                   shift;
         ENTRY;
 
-        need_32bit = ll_need_32bit_api(sbi);
-
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
                inode->i_ino, inode->i_generation, inode,
-               (unsigned long)pos, i_size_read(inode), need_32bit);
+               (unsigned long)pos, i_size_read(inode), api32);
 
-        if (pos == DIR_END_OFF)
+        if (pos == MDS_DIR_END_OFF)
                 /*
                  * end-of-file.
                  */
@@ -881,17 +878,17 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
                          * If page is empty (end of directoryis reached),
                          * use this value. 
                          */
-                        __u64 hash = DIR_END_OFF;
+                        __u64 hash = MDS_DIR_END_OFF;
                         __u64 next;
 
                         dp = page_address(page);
                         for (ent = lu_dirent_start(dp); ent != NULL && !done;
                              ent = lu_dirent_next(ent)) {
-                                char          *name;
+                                __u16          type;
                                 int            namelen;
                                 struct lu_fid  fid;
-                                __u64          ino;
                                 __u64          lhash;
+                                __u64          ino;
 
                                 hash = le64_to_cpu(ent->lde_hash);
                                 if (hash < pos)
@@ -908,25 +905,22 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
                                          */
                                         continue;
 
-                                name = ent->lde_name;
                                 fid_le_to_cpu(&fid, &ent->lde_fid);
-                                if (need_32bit) {
+                                ino = ll_fid_build_ino((struct ll_fid *)&fid,
+                                                       api32);
+                                if (api32 && hash64)
                                         lhash = hash >> 32;
-                                        ino = ll_fid_build_ino32((struct ll_fid *)&fid);
-                                } else {
+                                else
                                         lhash = hash;
-                                        ino = ll_fid_build_ino((struct ll_fid *)&fid);
-                                }
-
                                 type = ll_dirent_type_get(ent);
-                                done = filldir(cookie, name, namelen,
+                                done = filldir(cookie, ent->lde_name, namelen,
                                                lhash, ino, type);
                         }
                         next = le64_to_cpu(dp->ldp_hash_end);
                         ll_put_page(page);
                         if (!done) {
                                 pos = next;
-                                if (pos == DIR_END_OFF) {
+                                if (pos == MDS_DIR_END_OFF) {
                                         /*
                                          * End of directory reached.
                                          */
@@ -957,13 +951,16 @@ static int ll_readdir_20(struct file *filp, void *cookie, filldir_t filldir)
         }
 
         fd->fd_dir.lfd_pos = pos;
-        if (need_32bit) {
-                if (pos == DIR_END_OFF)
-                        filp->f_pos = DIR_END_OFF_32BIT;
+        if (pos == MDS_DIR_END_OFF) {
+                if (api32)
+                        filp->f_pos = LL_DIR_END_OFF_32BIT;
                 else
-                        filp->f_pos = pos >> 32;
+                        filp->f_pos = LL_DIR_END_OFF;
         } else {
-                filp->f_pos = pos;
+                if (api32 && hash64)
+                        filp->f_pos = pos >> 32;
+                else
+                        filp->f_pos = pos;
         }
         filp->f_version = inode->i_version;
         touch_atime(filp->f_vfsmnt, filp->f_dentry);
@@ -1667,9 +1664,9 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
 static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 {
         struct inode *inode = file->f_mapping->host;
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        int need_32bit = ll_need_32bit_api(sbi);
         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int api32 = ll_need_32bit_api(sbi);
         loff_t ret = -EINVAL;
         ENTRY;
 
@@ -1678,40 +1675,40 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 
         mutex_lock(&inode->i_mutex);
         switch (origin) {
-                case 2:
-                        offset += inode->i_size;
+                case SEEK_SET:
                         break;
-                case 1:
-                        if ((need_32bit && file->f_pos == DIR_END_OFF_32BIT) ||
-                            (!need_32bit && file->f_pos == DIR_END_OFF)) {
-                                if (offset == 0)
-                                        GOTO(out, ret = file->f_pos);
-                                else if (offset > 0)
-                                        GOTO(out, ret);
-                        }
+                case SEEK_CUR:
                         offset += file->f_pos;
                         break;
+                case SEEK_END:
+                        if (offset > 0)
+                                GOTO(out, ret);
+                        if (api32)
+                                offset += LL_DIR_END_OFF_32BIT;
+                        else
+                                offset += LL_DIR_END_OFF;
+                        break;
+                default:
+                        GOTO(out, ret);
         }
 
-        if (need_32bit && offset >= 0 && offset <= DIR_END_OFF_32BIT) {
+        if (offset >= 0 &&
+            ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
+             (!api32 && offset <= LL_DIR_END_OFF))) {
                 if (offset != file->f_pos) {
-                        if (offset == DIR_END_OFF_32BIT)
-                                fd->fd_dir.lfd_pos = DIR_END_OFF;
-                        else
+                        if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
+                            (!api32 && offset == LL_DIR_END_OFF))
+                                fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
+                        else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
                                 fd->fd_dir.lfd_pos = offset << 32;
-                        file->f_pos = offset;
-                        file->f_version = 0;
-                }
-                ret = offset;
-        } else if (!need_32bit && (offset >= 0 || offset == DIR_END_OFF)) {
-                if (offset != file->f_pos) {
-                        fd->fd_dir.lfd_pos = offset;
+                        else
+                                fd->fd_dir.lfd_pos = offset;
                         file->f_pos = offset;
                         file->f_version = 0;
                 }
                 ret = offset;
         }
-        EXIT;
+        GOTO(out, ret);
 
 out:
         mutex_unlock(&inode->i_mutex);
index 4858a16..ff9e06f 100644 (file)
@@ -3448,18 +3448,19 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                   struct lookup_intent *it, struct kstat *stat)
 {
         struct inode *inode = de->d_inode;
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ll_inode_info *lli = ll_i2info(inode);
         int res = 0;
 
         res = ll_inode_revalidate_it(de, it);
-        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1);
+        ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
 
         if (res)
                 return res;
 
         stat->dev = inode->i_sb->s_dev;
-        if (cfs_curproc_is_32bit())
-                stat->ino = ll_fid_build_ino32((struct ll_fid *)&lli->lli_fid);
+        if (ll_need_32bit_api(sbi))
+                stat->ino = ll_fid_build_ino((struct ll_fid *)&lli->lli_fid, 1);
         else
                 stat->ino = inode->i_ino;
         stat->mode = inode->i_mode;
index 5bf3b3e..c1fd4b6 100644 (file)
@@ -70,6 +70,10 @@ struct lustre_intent_data {
 #define FMODE_EXEC 0
 #endif
 
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF          0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT    0x7fffffffUL
+
 #ifndef DCACHE_LUSTRE_INVALID
 #define DCACHE_LUSTRE_INVALID 0x100
 #endif
@@ -304,6 +308,7 @@ enum stats_track_type {
 #define LL_SBI_LLITE_CHECKSUM  0x100 /* checksum each page in memory */
 #define LL_SBI_LAZYSTATFS      0x200 /* lazystatfs mount option */
 #define LL_SBI_32BIT_API       0x400 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH      0x800 /* support 64-bits dir hash/offset */
 
 /* default value for ll_sb_info->contention_time */
 #define SBI_DEFAULT_CONTENTION_SECONDS     60
@@ -1204,8 +1209,7 @@ enum llioc_iter ll_iocontrol_call(struct inode *inode, struct file *file,
 void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
 void ll_iocontrol_unregister(void *magic);
 
-__u64 ll_fid_build_ino(const struct ll_fid *fid);
-__u32 ll_fid_build_ino32(const struct ll_fid *fid);
+__u64 ll_fid_build_ino(const struct ll_fid *fid, int api32);
 __u32 ll_fid_build_gen(struct ll_sb_info *sbi,
                        struct ll_fid *fid);
 
index 393eef4..6a93def 100644 (file)
@@ -384,6 +384,11 @@ static int client_common_fill_super(struct super_block *sb,
         if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
                 sbi->ll_flags |= LL_SBI_JOIN;
 
+        if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) {
+                LCONSOLE_INFO("client supports 64-bits dir hash/offset!\n");
+                sbi->ll_flags |= LL_SBI_64BIT_HASH;
+        }
+
         obd = class_name2obd(osc);
         if (!obd) {
                 CERROR("OSC %s: not setup or attached\n", osc);
@@ -504,7 +509,7 @@ static int client_common_fill_super(struct super_block *sb,
         }
 
         LASSERT(sbi->ll_rootino != 0);
-        root = ll_iget(sb, ll_fid_build_ino(&rootfid), &md);
+        root = ll_iget(sb, ll_fid_build_ino(&rootfid, 0), &md);
 
         ptlrpc_req_finished(request);
 
@@ -1959,7 +1964,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         }
 #endif
 
-        inode->i_ino = ll_fid_build_ino(&body->fid1);
+        inode->i_ino = ll_fid_build_ino(&body->fid1, 0);
         inode->i_generation = ll_fid_build_gen(sbi, &body->fid1);
         *ll_inode_lu_fid(inode) = *((struct lu_fid*)&md->body->fid1);
 
@@ -2294,7 +2299,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode,
                 /** hashing VFS inode by FIDs.
                  * IGIF will be used for for compatibility if needed.
                  */
-                *inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1), &md);
+                *inode =ll_iget(sb, ll_fid_build_ino(&md.body->fid1, 0), &md);
                 if (*inode == NULL || is_bad_inode(*inode)) {
                         mdc_free_lustre_md(exp, &md);
                         rc = -ENOMEM;
index 3e79725..87849b3 100644 (file)
@@ -115,22 +115,13 @@ static inline __u32 fid_flatten32(const struct lu_fid *fid)
 }
 
 /**
- * for 32 bit inode numbers directly map seq+oid to 32bit number.
- */
-__u32 ll_fid_build_ino32(const struct ll_fid *fid)
-{
-        RETURN(fid_flatten32((struct lu_fid *)fid));
-}
-
-/**
  * build inode number from passed @fid */
-__u64 ll_fid_build_ino(const struct ll_fid *fid)
+__u64 ll_fid_build_ino(const struct ll_fid *fid, int api32)
 {
-#if BITS_PER_LONG == 32
-        RETURN(fid_flatten32((struct lu_fid *)fid));
-#else
-        RETURN(fid_flatten((struct lu_fid *)fid));
-#endif
+        if (BITS_PER_LONG == 32 || api32)
+                RETURN(fid_flatten32((struct lu_fid *)fid));
+        else
+                RETURN(fid_flatten((struct lu_fid *)fid));
 }
 
 __u32 ll_fid_build_gen(struct ll_sb_info *sbi, struct ll_fid *fid)