Whamcloud - gitweb
LU-376 Positive LL_DIR_END_OFF to indicate the tail of dir hash/offset
authornasf <yong.fan@whamcloud.com>
Thu, 2 Jun 2011 06:44:32 +0000 (14:44 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Jun 2011 19:14:28 +0000 (12:14 -0700)
1) Keep 'MDS_DIR_END_OFF' unchanged (0xfffffffffffffffeULL) to simplify
   dir hash/offset related interoperability issues.
2) Introduce positive "LL_DIR_END_OFF" (0x7fffffffffffffffULL) on client
   to indicate the tail of dir hash/offset for up layer callers, like
   llseek(), readdir(), and so on.
3) Support new 2.x client to talk with old 2.0 server with 32bit hash.

Change-Id: I74c148482f91ed91dfffd608f98971566f1257ad
Signed-off-by: nasf <yong.fan@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/886
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
14 files changed:
lustre/include/lclient.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_lite.h
lustre/lclient/lcommon_cl.c
lustre/liblustre/dir.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/statahead.c
lustre/lmv/lmv_obd.c
lustre/mdd/mdd_object.c
lustre/ptlrpc/wiretest.c
lustre/utils/wiretest.c

index e937e00..87361eb 100644 (file)
@@ -361,7 +361,7 @@ void cl_inode_fini(struct inode *inode);
 int cl_local_size(struct inode *inode);
 
 __u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit);
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
 __u32 cl_fid_build_gen(const struct lu_fid *fid);
 
 #ifdef INVARIANT_CHECK
index 8b268f3..e9add3b 100644 (file)
@@ -901,8 +901,7 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
         return le16_to_cpu(ent->lde_reclen);
 }
 
-#define DIR_END_OFF              0x7fffffffffffffffULL
-#define DIR_END_OFF_32BIT        0x7fffffffUL
+#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
 
 /** @} lu_dir */
 
index ac71d69..7fa9937 100644 (file)
@@ -150,12 +150,11 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
 {
 }
 
-static inline unsigned long hash_x_index(__u64 hash)
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
 {
 #ifdef __KERNEL__
-# if BITS_PER_LONG == 32
-        hash >>= 32;
-# endif
+        if (BITS_PER_LONG == 32 && hash64)
+                hash >>= 32;
 #endif
         return ~0UL - hash;
 }
index fede3b4..b083a89 100644 (file)
@@ -1305,16 +1305,12 @@ __u16 ll_dirent_type_get(struct lu_dirent *ent)
 
 /**
  * build inode number from passed @fid */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit)
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
 {
-#if BITS_PER_LONG == 32
-        RETURN(fid_flatten32(fid));
-#else
-        if (need_32bit)
+        if (BITS_PER_LONG == 32 || api32)
                 RETURN(fid_flatten32(fid));
         else
                 RETURN(fid_flatten(fid));
-#endif
 }
 
 /**
index 02a2802..3faa795 100644 (file)
@@ -100,7 +100,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
         }
         ldlm_lock_dump_handle(D_OTHER, &lockh);
 
-        offset = (__u64)hash_x_index(page->index);
+        offset = (__u64)hash_x_index(page->index, 0);
         rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
                          offset, page, &request);
         if (!rc) {
@@ -129,7 +129,7 @@ static cfs_page_t *llu_dir_read_page(struct inode *ino, __u64 hash,
         OBD_PAGE_ALLOC(page, 0);
         if (!page)
                 RETURN(ERR_PTR(-ENOMEM));
-        page->index = hash_x_index(hash);
+        page->index = hash_x_index(hash, 0);
 
         rc = llu_dir_do_readpage(ino, page);
         if (rc) {
@@ -208,7 +208,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                 RETURN(0);
         }
 
-        if (pos == DIR_END_OFF)
+        if (pos == MDS_DIR_END_OFF)
                 /*
                  * end-of-file.
                  */
@@ -229,7 +229,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                          * If page is empty (end of directoryis reached),
                          * use this value.
                          */
-                        __u64 hash = DIR_END_OFF;
+                        __u64 hash = MDS_DIR_END_OFF;
                         __u64 next;
 
                         dp = page->addr;
@@ -269,7 +269,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                         OBD_PAGE_FREE(page);
                         if (!done) {
                                 pos = next;
-                                if (pos == DIR_END_OFF)
+                                if (pos == MDS_DIR_END_OFF)
                                         /*
                                          * End of directory reached.
                                          */
index 6c91602..ff0e92d 100644 (file)
@@ -232,13 +232,14 @@ static void ll_release_page(struct page *page, __u64 hash,
 static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
                                        __u64 *start, __u64 *end)
 {
+        int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
         struct address_space *mapping = dir->i_mapping;
         /*
          * Complement of hash is used as an index so that
          * radix_tree_gang_lookup() can be used to find a page with starting
          * hash _smaller_ than one we are looking for.
          */
-        unsigned long offset = hash_x_index(*hash);
+        unsigned long offset = hash_x_index(*hash, hash64);
         struct page *page;
         int found;
 
@@ -261,14 +262,14 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
                 wait_on_page(page);
                 if (PageUptodate(page)) {
                         dp = kmap(page);
-#if BITS_PER_LONG == 32
-                        *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-                        *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-                        *hash  = *hash >> 32;
-#else
-                        *start = le64_to_cpu(dp->ldp_hash_start);
-                        *end   = le64_to_cpu(dp->ldp_hash_end);
-#endif
+                        if (BITS_PER_LONG == 32 && hash64) {
+                                *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+                                *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+                                *hash  = *hash >> 32;
+                        } else {
+                                *start = le64_to_cpu(dp->ldp_hash_start);
+                                *end   = le64_to_cpu(dp->ldp_hash_end);
+                        }
                         LASSERTF(*start <= *hash, "start = "LPX64",end = "
                                  LPX64",hash = "LPX64"\n", *start, *end, *hash);
                         if (*hash > *end || (*end != *start && *hash == *end)) {
@@ -301,6 +302,7 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
         __u64 end = 0;
         __u64 lhash = hash;
         struct ll_inode_info *lli = ll_i2info(dir);
+        int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
 
         mode = LCK_PR;
         rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
@@ -378,7 +380,7 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
                 }
         }
 
-        page = read_cache_page(mapping, hash_x_index(hash),
+        page = read_cache_page(mapping, hash_x_index(hash, hash64),
                                (filler_t*)mapping->a_ops->readpage, filp);
         if (IS_ERR(page)) {
                 CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
@@ -402,23 +404,23 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
         }
 hash_collision:
         dp = page_address(page);
-#if BITS_PER_LONG == 32
-        start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-        end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-        lhash = hash >> 32;
-#else
-        start = le64_to_cpu(dp->ldp_hash_start);
-        end   = le64_to_cpu(dp->ldp_hash_end);
-        lhash = hash;
-#endif
+        if (BITS_PER_LONG == 32 && hash64) {
+                start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+                end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+                lhash = hash >> 32;
+        } else {
+                start = le64_to_cpu(dp->ldp_hash_start);
+                end   = le64_to_cpu(dp->ldp_hash_end);
+                lhash = hash;
+        }
         if (end == start) {
                 LASSERT(start == lhash);
                 CWARN("Page-wide hash collision: "LPU64"\n", end);
-#if BITS_PER_LONG == 32
-                CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
-                      "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
-                      le64_to_cpu(dp->ldp_hash_end), hash);
-#endif
+                if (BITS_PER_LONG == 32 && hash64)
+                        CWARN("Real page-wide hash collision at ["LPU64" "LPU64
+                              "] with hash "LPU64"\n",
+                              le64_to_cpu(dp->ldp_hash_start),
+                              le64_to_cpu(dp->ldp_hash_end), hash);
                 /*
                  * Fetch whole overflow chain...
                  *
@@ -444,7 +446,8 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
         struct ll_sb_info    *sbi        = ll_i2sbi(inode);
         struct ll_file_data  *fd         = LUSTRE_FPRIVATE(filp);
         __u64                 pos        = fd->fd_dir.lfd_pos;
-        int                   need_32bit = ll_need_32bit_api(sbi);
+        int                   api32      = ll_need_32bit_api(sbi);
+        int                   hash64     = sbi->ll_flags & LL_SBI_64BIT_HASH;
         struct page          *page;
         struct ll_dir_chain   chain;
         int                   done;
@@ -454,9 +457,9 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
                inode->i_ino, inode->i_generation, inode,
-               (unsigned long)pos, i_size_read(inode), need_32bit);
+               (unsigned long)pos, i_size_read(inode), api32);
 
-        if (pos == DIR_END_OFF)
+        if (pos == MDS_DIR_END_OFF)
                 /*
                  * end-of-file.
                  */
@@ -479,7 +482,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                          * If page is empty (end of directory is reached),
                          * use this value.
                          */
-                        __u64 hash = DIR_END_OFF;
+                        __u64 hash = MDS_DIR_END_OFF;
                         __u64 next;
 
                         dp = page_address(page);
@@ -510,12 +513,12 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                          */
                                         continue;
 
-                                if (need_32bit)
+                                if (api32 && hash64)
                                         lhash = hash >> 32;
                                 else
                                         lhash = hash;
                                 fid_le_to_cpu(&fid, &ent->lde_fid);
-                                ino = cl_fid_build_ino(&fid,need_32bit);
+                                ino = cl_fid_build_ino(&fid, api32);
                                 type = ll_dirent_type_get(ent);
                                 /* For 'll_nfs_get_name_filldir()', it will try
                                  * to access the 'ent' through its 'lde_name',
@@ -528,7 +531,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                         ll_put_page(page);
                         if (!done) {
                                 pos = next;
-                                if (pos == DIR_END_OFF) {
+                                if (pos == MDS_DIR_END_OFF) {
                                         /*
                                          * End of directory reached.
                                          */
@@ -557,13 +560,16 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
         }
 
         fd->fd_dir.lfd_pos = pos;
-        if (need_32bit) {
-                if (pos == DIR_END_OFF)
-                        filp->f_pos = DIR_END_OFF_32BIT;
+        if (pos == MDS_DIR_END_OFF) {
+                if (api32)
+                        filp->f_pos = LL_DIR_END_OFF_32BIT;
                 else
-                        filp->f_pos = pos >> 32;
+                        filp->f_pos = LL_DIR_END_OFF;
         } else {
-                filp->f_pos = pos;
+                if (api32 && hash64)
+                        filp->f_pos = pos >> 32;
+                else
+                        filp->f_pos = pos;
         }
         filp->f_version = inode->i_version;
         touch_atime(filp->f_vfsmnt, filp->f_dentry);
@@ -1377,7 +1383,8 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
 {
         struct inode *inode = file->f_mapping->host;
         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-        int need_32bit = ll_need_32bit_api(ll_i2sbi(inode));
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int api32 = ll_need_32bit_api(sbi);
         loff_t ret = -EINVAL;
         ENTRY;
 
@@ -1391,32 +1398,32 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
                 case SEEK_END:
                         if (offset > 0)
                                 GOTO(out, ret);
-                        if (need_32bit)
-                                offset += DIR_END_OFF_32BIT;
+                        if (api32)
+                                offset += LL_DIR_END_OFF_32BIT;
                         else
-                                offset += DIR_END_OFF;
+                                offset += LL_DIR_END_OFF;
                         break;
                 default:
                         GOTO(out, ret);
         }
 
         if (offset >= 0 &&
-            ((need_32bit && offset <= DIR_END_OFF_32BIT) || !need_32bit)) {
+            ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
+             (!api32 && offset <= LL_DIR_END_OFF))) {
                 if (offset != file->f_pos) {
-                        if (need_32bit) {
-                                if (offset == DIR_END_OFF_32BIT)
-                                        fd->fd_dir.lfd_pos = DIR_END_OFF;
-                                else
-                                        fd->fd_dir.lfd_pos = offset << 32;
-                        } else {
+                        if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
+                            (!api32 && offset == LL_DIR_END_OFF))
+                                fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
+                        else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
+                                fd->fd_dir.lfd_pos = offset << 32;
+                        else
                                 fd->fd_dir.lfd_pos = offset;
-                        }
                         file->f_pos = offset;
                         file->f_version = 0;
                 }
                 ret = offset;
         }
-        EXIT;
+        GOTO(out, ret);
 
 out:
         cfs_mutex_unlock(&inode->i_mutex);
index 07bd889..b6062ed 100644 (file)
@@ -2314,7 +2314,10 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                 return res;
 
         stat->dev = inode->i_sb->s_dev;
-        stat->ino = cl_fid_build_ino(&lli->lli_fid, ll_need_32bit_api(sbi));
+        if (ll_need_32bit_api(sbi))
+                stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
+        else
+                stat->ino = inode->i_ino;
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
         stat->uid = inode->i_uid;
index f1db013..766fd29 100644 (file)
 #define FMODE_EXEC 0
 #endif
 
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF          0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT    0x7fffffffUL
+
 #ifndef DCACHE_LUSTRE_INVALID
 #define DCACHE_LUSTRE_INVALID 0x4000000
 #endif
@@ -312,8 +316,9 @@ enum stats_track_type {
 #define LL_SBI_LOCALFLOCK       0x200 /* Local flocks support by kernel */
 #define LL_SBI_LRU_RESIZE       0x400 /* lru resize support */
 #define LL_SBI_LAZYSTATFS       0x800 /* lazystatfs mount option */
-#define LL_SBI_SOM_PREVIEW      0x1000 /* SOM preview mount option */
-#define LL_SBI_32BIT_API        0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_SOM_PREVIEW     0x1000 /* SOM preview mount option */
+#define LL_SBI_32BIT_API       0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH      0x4000 /* support 64-bits dir hash/offset */
 
 /* default value for ll_sb_info->contention_time */
 #define SBI_DEFAULT_CONTENTION_SECONDS     60
index fd6e976..6a86df3 100644 (file)
@@ -339,6 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                 sbi->ll_flags |= LL_SBI_OSS_CAPA;
         }
 
+        if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
+                sbi->ll_flags |= LL_SBI_64BIT_HASH;
+
         obd = class_name2obd(dt);
         if (!obd) {
                 CERROR("DT %s: not setup or attached\n", dt);
index 315e762..0385649 100644 (file)
@@ -861,7 +861,7 @@ keep_de:
                 }
                 pos = le64_to_cpu(dp->ldp_hash_end);
                 ll_put_page(page);
-                if (pos == DIR_END_OFF) {
+                if (pos == MDS_DIR_END_OFF) {
                         /*
                          * End of directory reached.
                          */
@@ -1050,7 +1050,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
                 }
                 pos = le64_to_cpu(dp->ldp_hash_end);
                 ll_put_page(page);
-                if (pos == DIR_END_OFF) {
+                if (pos == MDS_DIR_END_OFF) {
                         /*
                          * End of directory reached.
                          */
index 95ee481..cd4ab9d 100644 (file)
@@ -2320,7 +2320,7 @@ static void lmv_hash_adjust(__u64 *hash, __u64 hash_adj)
         val = le64_to_cpu(*hash);
         if (val < hash_adj)
                 val += MAX_HASH_SIZE;
-        if (val != DIR_END_OFF)
+        if (val != MDS_DIR_END_OFF)
                 *hash = cpu_to_le64(val - hash_adj);
 }
 
@@ -2453,7 +2453,7 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
                         __u64 end;
 
                         end = le64_to_cpu(dp->ldp_hash_end);
-                        if (end == DIR_END_OFF) {
+                        if (end == MDS_DIR_END_OFF) {
                                 dp->ldp_hash_end = cpu_to_le32(seg_size *
                                                                (tgt0_idx + 1));
                                 CDEBUG(D_INODE,
index 10fea55..4fe42ff 100644 (file)
@@ -2345,7 +2345,7 @@ static int __mdd_readpage(const struct lu_env *env, struct mdd_object *obj,
                 /*
                  * end of directory.
                  */
-                hash_end = DIR_END_OFF;
+                hash_end = MDS_DIR_END_OFF;
                 rc = 0;
         }
         if (rc == 0) {
@@ -2402,7 +2402,7 @@ int mdd_readpage(const struct lu_env *env, struct md_object *obj,
                 dp = (struct lu_dirpage*)cfs_kmap(pg);
                 memset(dp, 0 , sizeof(struct lu_dirpage));
                 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
-                dp->ldp_hash_end   = cpu_to_le64(DIR_END_OFF);
+                dp->ldp_hash_end   = cpu_to_le64(MDS_DIR_END_OFF);
                 dp->ldp_flags |= LDF_EMPTY;
                 dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
                 cfs_kunmap(pg);
index f1f1f81..452327c 100644 (file)
@@ -2697,4 +2697,3 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
                  (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
 }
-
index 526e5ee..6b74c32 100644 (file)
@@ -2694,4 +2694,3 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
                  (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
 }
-