From dea1dfafba827572dc1be042de4332e8962f1c14 Mon Sep 17 00:00:00 2001 From: nasf Date: Thu, 2 Jun 2011 14:44:32 +0800 Subject: [PATCH] LU-376 Positive LL_DIR_END_OFF to indicate the tail of dir hash/offset 1) Keep 'MDS_DIR_END_OFF' unchanged (0xfffffffffffffffeULL) to simplify dir hash/offset related interoperability issues. 2) Introduce positive "LL_DIR_END_OFF" (0x7fffffffffffffffULL) on client to indicate the tail of dir hash/offset for up layer callers, like llseek(), readdir(), and so on. 3) Support new 2.x client to talk with old 2.0 server with 32bit hash. Change-Id: I74c148482f91ed91dfffd608f98971566f1257ad Signed-off-by: nasf Reviewed-on: http://review.whamcloud.com/886 Reviewed-by: Andreas Dilger Tested-by: Hudson Reviewed-by: Oleg Drokin --- lustre/include/lclient.h | 2 +- lustre/include/lustre/lustre_idl.h | 3 +- lustre/include/lustre_lite.h | 7 ++- lustre/lclient/lcommon_cl.c | 8 +-- lustre/liblustre/dir.c | 10 ++-- lustre/llite/dir.c | 105 ++++++++++++++++++++----------------- lustre/llite/file.c | 5 +- lustre/llite/llite_internal.h | 9 +++- lustre/llite/llite_lib.c | 3 ++ lustre/llite/statahead.c | 4 +- lustre/lmv/lmv_obd.c | 4 +- lustre/mdd/mdd_object.c | 4 +- lustre/ptlrpc/wiretest.c | 1 - lustre/utils/wiretest.c | 1 - 14 files changed, 88 insertions(+), 78 deletions(-) diff --git a/lustre/include/lclient.h b/lustre/include/lclient.h index e937e00..87361eb 100644 --- a/lustre/include/lclient.h +++ b/lustre/include/lclient.h @@ -361,7 +361,7 @@ void cl_inode_fini(struct inode *inode); int cl_local_size(struct inode *inode); __u16 ll_dirent_type_get(struct lu_dirent *ent); -__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit); +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); __u32 cl_fid_build_gen(const struct lu_fid *fid); #ifdef INVARIANT_CHECK diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 8b268f3..e9add3b 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -901,8 +901,7 @@ static inline int lu_dirent_size(struct lu_dirent *ent) return le16_to_cpu(ent->lde_reclen); } -#define DIR_END_OFF 0x7fffffffffffffffULL -#define DIR_END_OFF_32BIT 0x7fffffffUL +#define MDS_DIR_END_OFF 0xfffffffffffffffeULL /** @} lu_dir */ diff --git a/lustre/include/lustre_lite.h b/lustre/include/lustre_lite.h index ac71d69..7fa9937 100644 --- a/lustre/include/lustre_lite.h +++ b/lustre/include/lustre_lite.h @@ -150,12 +150,11 @@ static inline void ll_dir_chain_fini(struct ll_dir_chain *chain) { } -static inline unsigned long hash_x_index(__u64 hash) +static inline unsigned long hash_x_index(__u64 hash, int hash64) { #ifdef __KERNEL__ -# if BITS_PER_LONG == 32 - hash >>= 32; -# endif + if (BITS_PER_LONG == 32 && hash64) + hash >>= 32; #endif return ~0UL - hash; } diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index fede3b4..b083a89 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -1305,16 +1305,12 @@ __u16 ll_dirent_type_get(struct lu_dirent *ent) /** * build inode number from passed @fid */ -__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit) +__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) { -#if BITS_PER_LONG == 32 - RETURN(fid_flatten32(fid)); -#else - if (need_32bit) + if (BITS_PER_LONG == 32 || api32) RETURN(fid_flatten32(fid)); else RETURN(fid_flatten(fid)); -#endif } /** diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index 02a2802..3faa795 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -100,7 +100,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) } ldlm_lock_dump_handle(D_OTHER, &lockh); - offset = (__u64)hash_x_index(page->index); + offset = (__u64)hash_x_index(page->index, 0); rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL, offset, page, &request); if (!rc) { @@ -129,7 +129,7 @@ static cfs_page_t *llu_dir_read_page(struct inode *ino, __u64 hash, OBD_PAGE_ALLOC(page, 0); if (!page) RETURN(ERR_PTR(-ENOMEM)); - page->index = hash_x_index(hash); + page->index = hash_x_index(hash, 0); rc = llu_dir_do_readpage(ino, page); if (rc) { @@ -208,7 +208,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep, RETURN(0); } - if (pos == DIR_END_OFF) + if (pos == MDS_DIR_END_OFF) /* * end-of-file. */ @@ -229,7 +229,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep, * If page is empty (end of directoryis reached), * use this value. */ - __u64 hash = DIR_END_OFF; + __u64 hash = MDS_DIR_END_OFF; __u64 next; dp = page->addr; @@ -269,7 +269,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep, OBD_PAGE_FREE(page); if (!done) { pos = next; - if (pos == DIR_END_OFF) + if (pos == MDS_DIR_END_OFF) /* * End of directory reached. */ diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 6c91602..ff0e92d 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -232,13 +232,14 @@ static void ll_release_page(struct page *page, __u64 hash, static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash, __u64 *start, __u64 *end) { + int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; struct address_space *mapping = dir->i_mapping; /* * Complement of hash is used as an index so that * radix_tree_gang_lookup() can be used to find a page with starting * hash _smaller_ than one we are looking for. */ - unsigned long offset = hash_x_index(*hash); + unsigned long offset = hash_x_index(*hash, hash64); struct page *page; int found; @@ -261,14 +262,14 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash, wait_on_page(page); if (PageUptodate(page)) { dp = kmap(page); -#if BITS_PER_LONG == 32 - *start = le64_to_cpu(dp->ldp_hash_start) >> 32; - *end = le64_to_cpu(dp->ldp_hash_end) >> 32; - *hash = *hash >> 32; -#else - *start = le64_to_cpu(dp->ldp_hash_start); - *end = le64_to_cpu(dp->ldp_hash_end); -#endif + if (BITS_PER_LONG == 32 && hash64) { + *start = le64_to_cpu(dp->ldp_hash_start) >> 32; + *end = le64_to_cpu(dp->ldp_hash_end) >> 32; + *hash = *hash >> 32; + } else { + *start = le64_to_cpu(dp->ldp_hash_start); + *end = le64_to_cpu(dp->ldp_hash_end); + } LASSERTF(*start <= *hash, "start = "LPX64",end = " LPX64",hash = "LPX64"\n", *start, *end, *hash); if (*hash > *end || (*end != *start && *hash == *end)) { @@ -301,6 +302,7 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash, __u64 end = 0; __u64 lhash = hash; struct ll_inode_info *lli = ll_i2info(dir); + int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH; mode = LCK_PR; rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED, @@ -378,7 +380,7 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash, } } - page = read_cache_page(mapping, hash_x_index(hash), + page = read_cache_page(mapping, hash_x_index(hash, hash64), (filler_t*)mapping->a_ops->readpage, filp); if (IS_ERR(page)) { CERROR("read cache page: "DFID" at "LPU64": rc %ld\n", @@ -402,23 +404,23 @@ struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash, } hash_collision: dp = page_address(page); -#if BITS_PER_LONG == 32 - start = le64_to_cpu(dp->ldp_hash_start) >> 32; - end = le64_to_cpu(dp->ldp_hash_end) >> 32; - lhash = hash >> 32; -#else - start = le64_to_cpu(dp->ldp_hash_start); - end = le64_to_cpu(dp->ldp_hash_end); - lhash = hash; -#endif + if (BITS_PER_LONG == 32 && hash64) { + start = le64_to_cpu(dp->ldp_hash_start) >> 32; + end = le64_to_cpu(dp->ldp_hash_end) >> 32; + lhash = hash >> 32; + } else { + start = le64_to_cpu(dp->ldp_hash_start); + end = le64_to_cpu(dp->ldp_hash_end); + lhash = hash; + } if (end == start) { LASSERT(start == lhash); CWARN("Page-wide hash collision: "LPU64"\n", end); -#if BITS_PER_LONG == 32 - CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with " - "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start), - le64_to_cpu(dp->ldp_hash_end), hash); -#endif + if (BITS_PER_LONG == 32 && hash64) + CWARN("Real page-wide hash collision at ["LPU64" "LPU64 + "] with hash "LPU64"\n", + le64_to_cpu(dp->ldp_hash_start), + le64_to_cpu(dp->ldp_hash_end), hash); /* * Fetch whole overflow chain... * @@ -444,7 +446,8 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); __u64 pos = fd->fd_dir.lfd_pos; - int need_32bit = ll_need_32bit_api(sbi); + int api32 = ll_need_32bit_api(sbi); + int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; struct page *page; struct ll_dir_chain chain; int done; @@ -454,9 +457,9 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", inode->i_ino, inode->i_generation, inode, - (unsigned long)pos, i_size_read(inode), need_32bit); + (unsigned long)pos, i_size_read(inode), api32); - if (pos == DIR_END_OFF) + if (pos == MDS_DIR_END_OFF) /* * end-of-file. */ @@ -479,7 +482,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) * If page is empty (end of directory is reached), * use this value. */ - __u64 hash = DIR_END_OFF; + __u64 hash = MDS_DIR_END_OFF; __u64 next; dp = page_address(page); @@ -510,12 +513,12 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) */ continue; - if (need_32bit) + if (api32 && hash64) lhash = hash >> 32; else lhash = hash; fid_le_to_cpu(&fid, &ent->lde_fid); - ino = cl_fid_build_ino(&fid,need_32bit); + ino = cl_fid_build_ino(&fid, api32); type = ll_dirent_type_get(ent); /* For 'll_nfs_get_name_filldir()', it will try * to access the 'ent' through its 'lde_name', @@ -528,7 +531,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) ll_put_page(page); if (!done) { pos = next; - if (pos == DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * End of directory reached. */ @@ -557,13 +560,16 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) } fd->fd_dir.lfd_pos = pos; - if (need_32bit) { - if (pos == DIR_END_OFF) - filp->f_pos = DIR_END_OFF_32BIT; + if (pos == MDS_DIR_END_OFF) { + if (api32) + filp->f_pos = LL_DIR_END_OFF_32BIT; else - filp->f_pos = pos >> 32; + filp->f_pos = LL_DIR_END_OFF; } else { - filp->f_pos = pos; + if (api32 && hash64) + filp->f_pos = pos >> 32; + else + filp->f_pos = pos; } filp->f_version = inode->i_version; touch_atime(filp->f_vfsmnt, filp->f_dentry); @@ -1377,7 +1383,8 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - int need_32bit = ll_need_32bit_api(ll_i2sbi(inode)); + struct ll_sb_info *sbi = ll_i2sbi(inode); + int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; ENTRY; @@ -1391,32 +1398,32 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) case SEEK_END: if (offset > 0) GOTO(out, ret); - if (need_32bit) - offset += DIR_END_OFF_32BIT; + if (api32) + offset += LL_DIR_END_OFF_32BIT; else - offset += DIR_END_OFF; + offset += LL_DIR_END_OFF; break; default: GOTO(out, ret); } if (offset >= 0 && - ((need_32bit && offset <= DIR_END_OFF_32BIT) || !need_32bit)) { + ((api32 && offset <= LL_DIR_END_OFF_32BIT) || + (!api32 && offset <= LL_DIR_END_OFF))) { if (offset != file->f_pos) { - if (need_32bit) { - if (offset == DIR_END_OFF_32BIT) - fd->fd_dir.lfd_pos = DIR_END_OFF; - else - fd->fd_dir.lfd_pos = offset << 32; - } else { + if ((api32 && offset == LL_DIR_END_OFF_32BIT) || + (!api32 && offset == LL_DIR_END_OFF)) + fd->fd_dir.lfd_pos = MDS_DIR_END_OFF; + else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) + fd->fd_dir.lfd_pos = offset << 32; + else fd->fd_dir.lfd_pos = offset; - } file->f_pos = offset; file->f_version = 0; } ret = offset; } - EXIT; + GOTO(out, ret); out: cfs_mutex_unlock(&inode->i_mutex); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 07bd889..b6062ed 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2314,7 +2314,10 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, return res; stat->dev = inode->i_sb->s_dev; - stat->ino = cl_fid_build_ino(&lli->lli_fid, ll_need_32bit_api(sbi)); + if (ll_need_32bit_api(sbi)) + stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); + else + stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index f1db013..766fd29 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -52,6 +52,10 @@ #define FMODE_EXEC 0 #endif +/** Only used on client-side for indicating the tail of dir hash/offset. */ +#define LL_DIR_END_OFF 0x7fffffffffffffffULL +#define LL_DIR_END_OFF_32BIT 0x7fffffffUL + #ifndef DCACHE_LUSTRE_INVALID #define DCACHE_LUSTRE_INVALID 0x4000000 #endif @@ -312,8 +316,9 @@ enum stats_track_type { #define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */ #define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */ #define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */ -#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */ -#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */ +#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */ +#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */ +#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */ /* default value for ll_sb_info->contention_time */ #define SBI_DEFAULT_CONTENTION_SECONDS 60 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index fd6e976..6a86df3 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -339,6 +339,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) sbi->ll_flags |= LL_SBI_OSS_CAPA; } + if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) + sbi->ll_flags |= LL_SBI_64BIT_HASH; + obd = class_name2obd(dt); if (!obd) { CERROR("DT %s: not setup or attached\n", dt); diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 315e762..0385649 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -861,7 +861,7 @@ keep_de: } pos = le64_to_cpu(dp->ldp_hash_end); ll_put_page(page); - if (pos == DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * End of directory reached. */ @@ -1050,7 +1050,7 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry) } pos = le64_to_cpu(dp->ldp_hash_end); ll_put_page(page); - if (pos == DIR_END_OFF) { + if (pos == MDS_DIR_END_OFF) { /* * End of directory reached. */ diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 95ee481..cd4ab9d 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -2320,7 +2320,7 @@ static void lmv_hash_adjust(__u64 *hash, __u64 hash_adj) val = le64_to_cpu(*hash); if (val < hash_adj) val += MAX_HASH_SIZE; - if (val != DIR_END_OFF) + if (val != MDS_DIR_END_OFF) *hash = cpu_to_le64(val - hash_adj); } @@ -2453,7 +2453,7 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid, __u64 end; end = le64_to_cpu(dp->ldp_hash_end); - if (end == DIR_END_OFF) { + if (end == MDS_DIR_END_OFF) { dp->ldp_hash_end = cpu_to_le32(seg_size * (tgt0_idx + 1)); CDEBUG(D_INODE, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 10fea55..4fe42ff 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -2345,7 +2345,7 @@ static int __mdd_readpage(const struct lu_env *env, struct mdd_object *obj, /* * end of directory. */ - hash_end = DIR_END_OFF; + hash_end = MDS_DIR_END_OFF; rc = 0; } if (rc == 0) { @@ -2402,7 +2402,7 @@ int mdd_readpage(const struct lu_env *env, struct md_object *obj, dp = (struct lu_dirpage*)cfs_kmap(pg); memset(dp, 0 , sizeof(struct lu_dirpage)); dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash); - dp->ldp_hash_end = cpu_to_le64(DIR_END_OFF); + dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF); dp->ldp_flags |= LDF_EMPTY; dp->ldp_flags = cpu_to_le32(dp->ldp_flags); cfs_kunmap(pg); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index f1f1f81..452327c 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -2697,4 +2697,3 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n", (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); } - diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 526e5ee..6b74c32 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -2694,4 +2694,3 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n", (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); } - -- 1.8.3.1