1) Keep 'MDS_DIR_END_OFF' unchanged (0xfffffffffffffffeULL) to simplify
dir hash/offset related interoperability issues.
2) Introduce positive "LL_DIR_END_OFF" (0x7fffffffffffffffULL) on client
to indicate the tail of dir hash/offset for up layer callers, like
llseek(), readdir(), and so on.
3) Support new 2.x client to talk with old 2.0 server with 32bit hash.
Change-Id: I74c148482f91ed91dfffd608f98971566f1257ad
Signed-off-by: nasf <yong.fan@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/886
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
14 files changed:
int cl_local_size(struct inode *inode);
__u16 ll_dirent_type_get(struct lu_dirent *ent);
int cl_local_size(struct inode *inode);
__u16 ll_dirent_type_get(struct lu_dirent *ent);
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit);
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32);
__u32 cl_fid_build_gen(const struct lu_fid *fid);
#ifdef INVARIANT_CHECK
__u32 cl_fid_build_gen(const struct lu_fid *fid);
#ifdef INVARIANT_CHECK
return le16_to_cpu(ent->lde_reclen);
}
return le16_to_cpu(ent->lde_reclen);
}
-#define DIR_END_OFF 0x7fffffffffffffffULL
-#define DIR_END_OFF_32BIT 0x7fffffffUL
+#define MDS_DIR_END_OFF 0xfffffffffffffffeULL
-static inline unsigned long hash_x_index(__u64 hash)
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
-# if BITS_PER_LONG == 32
- hash >>= 32;
-# endif
+ if (BITS_PER_LONG == 32 && hash64)
+ hash >>= 32;
#endif
return ~0UL - hash;
}
#endif
return ~0UL - hash;
}
/**
* build inode number from passed @fid */
/**
* build inode number from passed @fid */
-__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit)
+__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32)
-#if BITS_PER_LONG == 32
- RETURN(fid_flatten32(fid));
-#else
- if (need_32bit)
+ if (BITS_PER_LONG == 32 || api32)
RETURN(fid_flatten32(fid));
else
RETURN(fid_flatten(fid));
RETURN(fid_flatten32(fid));
else
RETURN(fid_flatten(fid));
}
ldlm_lock_dump_handle(D_OTHER, &lockh);
}
ldlm_lock_dump_handle(D_OTHER, &lockh);
- offset = (__u64)hash_x_index(page->index);
+ offset = (__u64)hash_x_index(page->index, 0);
rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
offset, page, &request);
if (!rc) {
rc = md_readpage(sbi->ll_md_exp, &lli->lli_fid, NULL,
offset, page, &request);
if (!rc) {
OBD_PAGE_ALLOC(page, 0);
if (!page)
RETURN(ERR_PTR(-ENOMEM));
OBD_PAGE_ALLOC(page, 0);
if (!page)
RETURN(ERR_PTR(-ENOMEM));
- page->index = hash_x_index(hash);
+ page->index = hash_x_index(hash, 0);
rc = llu_dir_do_readpage(ino, page);
if (rc) {
rc = llu_dir_do_readpage(ino, page);
if (rc) {
- if (pos == DIR_END_OFF)
+ if (pos == MDS_DIR_END_OFF)
* If page is empty (end of directoryis reached),
* use this value.
*/
* If page is empty (end of directoryis reached),
* use this value.
*/
- __u64 hash = DIR_END_OFF;
+ __u64 hash = MDS_DIR_END_OFF;
__u64 next;
dp = page->addr;
__u64 next;
dp = page->addr;
OBD_PAGE_FREE(page);
if (!done) {
pos = next;
OBD_PAGE_FREE(page);
if (!done) {
pos = next;
- if (pos == DIR_END_OFF)
+ if (pos == MDS_DIR_END_OFF)
/*
* End of directory reached.
*/
/*
* End of directory reached.
*/
static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
__u64 *start, __u64 *end)
{
static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
__u64 *start, __u64 *end)
{
+ int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
struct address_space *mapping = dir->i_mapping;
/*
* Complement of hash is used as an index so that
* radix_tree_gang_lookup() can be used to find a page with starting
* hash _smaller_ than one we are looking for.
*/
struct address_space *mapping = dir->i_mapping;
/*
* Complement of hash is used as an index so that
* radix_tree_gang_lookup() can be used to find a page with starting
* hash _smaller_ than one we are looking for.
*/
- unsigned long offset = hash_x_index(*hash);
+ unsigned long offset = hash_x_index(*hash, hash64);
struct page *page;
int found;
struct page *page;
int found;
wait_on_page(page);
if (PageUptodate(page)) {
dp = kmap(page);
wait_on_page(page);
if (PageUptodate(page)) {
dp = kmap(page);
-#if BITS_PER_LONG == 32
- *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- *hash = *hash >> 32;
-#else
- *start = le64_to_cpu(dp->ldp_hash_start);
- *end = le64_to_cpu(dp->ldp_hash_end);
-#endif
+ if (BITS_PER_LONG == 32 && hash64) {
+ *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ *hash = *hash >> 32;
+ } else {
+ *start = le64_to_cpu(dp->ldp_hash_start);
+ *end = le64_to_cpu(dp->ldp_hash_end);
+ }
LASSERTF(*start <= *hash, "start = "LPX64",end = "
LPX64",hash = "LPX64"\n", *start, *end, *hash);
if (*hash > *end || (*end != *start && *hash == *end)) {
LASSERTF(*start <= *hash, "start = "LPX64",end = "
LPX64",hash = "LPX64"\n", *start, *end, *hash);
if (*hash > *end || (*end != *start && *hash == *end)) {
__u64 end = 0;
__u64 lhash = hash;
struct ll_inode_info *lli = ll_i2info(dir);
__u64 end = 0;
__u64 lhash = hash;
struct ll_inode_info *lli = ll_i2info(dir);
+ int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
mode = LCK_PR;
rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
mode = LCK_PR;
rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
- page = read_cache_page(mapping, hash_x_index(hash),
+ page = read_cache_page(mapping, hash_x_index(hash, hash64),
(filler_t*)mapping->a_ops->readpage, filp);
if (IS_ERR(page)) {
CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
(filler_t*)mapping->a_ops->readpage, filp);
if (IS_ERR(page)) {
CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
}
hash_collision:
dp = page_address(page);
}
hash_collision:
dp = page_address(page);
-#if BITS_PER_LONG == 32
- start = le64_to_cpu(dp->ldp_hash_start) >> 32;
- end = le64_to_cpu(dp->ldp_hash_end) >> 32;
- lhash = hash >> 32;
-#else
- start = le64_to_cpu(dp->ldp_hash_start);
- end = le64_to_cpu(dp->ldp_hash_end);
- lhash = hash;
-#endif
+ if (BITS_PER_LONG == 32 && hash64) {
+ start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+ end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+ lhash = hash >> 32;
+ } else {
+ start = le64_to_cpu(dp->ldp_hash_start);
+ end = le64_to_cpu(dp->ldp_hash_end);
+ lhash = hash;
+ }
if (end == start) {
LASSERT(start == lhash);
CWARN("Page-wide hash collision: "LPU64"\n", end);
if (end == start) {
LASSERT(start == lhash);
CWARN("Page-wide hash collision: "LPU64"\n", end);
-#if BITS_PER_LONG == 32
- CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
- "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
- le64_to_cpu(dp->ldp_hash_end), hash);
-#endif
+ if (BITS_PER_LONG == 32 && hash64)
+ CWARN("Real page-wide hash collision at ["LPU64" "LPU64
+ "] with hash "LPU64"\n",
+ le64_to_cpu(dp->ldp_hash_start),
+ le64_to_cpu(dp->ldp_hash_end), hash);
/*
* Fetch whole overflow chain...
*
/*
* Fetch whole overflow chain...
*
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
__u64 pos = fd->fd_dir.lfd_pos;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
__u64 pos = fd->fd_dir.lfd_pos;
- int need_32bit = ll_need_32bit_api(sbi);
+ int api32 = ll_need_32bit_api(sbi);
+ int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
struct page *page;
struct ll_dir_chain chain;
int done;
struct page *page;
struct ll_dir_chain chain;
int done;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
inode->i_ino, inode->i_generation, inode,
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
inode->i_ino, inode->i_generation, inode,
- (unsigned long)pos, i_size_read(inode), need_32bit);
+ (unsigned long)pos, i_size_read(inode), api32);
- if (pos == DIR_END_OFF)
+ if (pos == MDS_DIR_END_OFF)
* If page is empty (end of directory is reached),
* use this value.
*/
* If page is empty (end of directory is reached),
* use this value.
*/
- __u64 hash = DIR_END_OFF;
+ __u64 hash = MDS_DIR_END_OFF;
__u64 next;
dp = page_address(page);
__u64 next;
dp = page_address(page);
lhash = hash >> 32;
else
lhash = hash;
fid_le_to_cpu(&fid, &ent->lde_fid);
lhash = hash >> 32;
else
lhash = hash;
fid_le_to_cpu(&fid, &ent->lde_fid);
- ino = cl_fid_build_ino(&fid,need_32bit);
+ ino = cl_fid_build_ino(&fid, api32);
type = ll_dirent_type_get(ent);
/* For 'll_nfs_get_name_filldir()', it will try
* to access the 'ent' through its 'lde_name',
type = ll_dirent_type_get(ent);
/* For 'll_nfs_get_name_filldir()', it will try
* to access the 'ent' through its 'lde_name',
ll_put_page(page);
if (!done) {
pos = next;
ll_put_page(page);
if (!done) {
pos = next;
- if (pos == DIR_END_OFF) {
+ if (pos == MDS_DIR_END_OFF) {
/*
* End of directory reached.
*/
/*
* End of directory reached.
*/
}
fd->fd_dir.lfd_pos = pos;
}
fd->fd_dir.lfd_pos = pos;
- if (need_32bit) {
- if (pos == DIR_END_OFF)
- filp->f_pos = DIR_END_OFF_32BIT;
+ if (pos == MDS_DIR_END_OFF) {
+ if (api32)
+ filp->f_pos = LL_DIR_END_OFF_32BIT;
- filp->f_pos = pos >> 32;
+ filp->f_pos = LL_DIR_END_OFF;
+ if (api32 && hash64)
+ filp->f_pos = pos >> 32;
+ else
+ filp->f_pos = pos;
}
filp->f_version = inode->i_version;
touch_atime(filp->f_vfsmnt, filp->f_dentry);
}
filp->f_version = inode->i_version;
touch_atime(filp->f_vfsmnt, filp->f_dentry);
{
struct inode *inode = file->f_mapping->host;
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
{
struct inode *inode = file->f_mapping->host;
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- int need_32bit = ll_need_32bit_api(ll_i2sbi(inode));
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
ENTRY;
loff_t ret = -EINVAL;
ENTRY;
case SEEK_END:
if (offset > 0)
GOTO(out, ret);
case SEEK_END:
if (offset > 0)
GOTO(out, ret);
- if (need_32bit)
- offset += DIR_END_OFF_32BIT;
+ if (api32)
+ offset += LL_DIR_END_OFF_32BIT;
+ offset += LL_DIR_END_OFF;
break;
default:
GOTO(out, ret);
}
if (offset >= 0 &&
break;
default:
GOTO(out, ret);
}
if (offset >= 0 &&
- ((need_32bit && offset <= DIR_END_OFF_32BIT) || !need_32bit)) {
+ ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
+ (!api32 && offset <= LL_DIR_END_OFF))) {
if (offset != file->f_pos) {
if (offset != file->f_pos) {
- if (need_32bit) {
- if (offset == DIR_END_OFF_32BIT)
- fd->fd_dir.lfd_pos = DIR_END_OFF;
- else
- fd->fd_dir.lfd_pos = offset << 32;
- } else {
+ if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
+ (!api32 && offset == LL_DIR_END_OFF))
+ fd->fd_dir.lfd_pos = MDS_DIR_END_OFF;
+ else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
+ fd->fd_dir.lfd_pos = offset << 32;
+ else
fd->fd_dir.lfd_pos = offset;
fd->fd_dir.lfd_pos = offset;
file->f_pos = offset;
file->f_version = 0;
}
ret = offset;
}
file->f_pos = offset;
file->f_version = 0;
}
ret = offset;
}
out:
cfs_mutex_unlock(&inode->i_mutex);
out:
cfs_mutex_unlock(&inode->i_mutex);
return res;
stat->dev = inode->i_sb->s_dev;
return res;
stat->dev = inode->i_sb->s_dev;
- stat->ino = cl_fid_build_ino(&lli->lli_fid, ll_need_32bit_api(sbi));
+ if (ll_need_32bit_api(sbi))
+ stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
+ else
+ stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
stat->uid = inode->i_uid;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
stat->uid = inode->i_uid;
#define FMODE_EXEC 0
#endif
#define FMODE_EXEC 0
#endif
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF 0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
+
#ifndef DCACHE_LUSTRE_INVALID
#define DCACHE_LUSTRE_INVALID 0x4000000
#endif
#ifndef DCACHE_LUSTRE_INVALID
#define DCACHE_LUSTRE_INVALID 0x4000000
#endif
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
-#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
+#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
sbi->ll_flags |= LL_SBI_OSS_CAPA;
}
sbi->ll_flags |= LL_SBI_OSS_CAPA;
}
+ if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
+ sbi->ll_flags |= LL_SBI_64BIT_HASH;
+
obd = class_name2obd(dt);
if (!obd) {
CERROR("DT %s: not setup or attached\n", dt);
obd = class_name2obd(dt);
if (!obd) {
CERROR("DT %s: not setup or attached\n", dt);
}
pos = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
}
pos = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
- if (pos == DIR_END_OFF) {
+ if (pos == MDS_DIR_END_OFF) {
/*
* End of directory reached.
*/
/*
* End of directory reached.
*/
}
pos = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
}
pos = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
- if (pos == DIR_END_OFF) {
+ if (pos == MDS_DIR_END_OFF) {
/*
* End of directory reached.
*/
/*
* End of directory reached.
*/
val = le64_to_cpu(*hash);
if (val < hash_adj)
val += MAX_HASH_SIZE;
val = le64_to_cpu(*hash);
if (val < hash_adj)
val += MAX_HASH_SIZE;
- if (val != DIR_END_OFF)
+ if (val != MDS_DIR_END_OFF)
*hash = cpu_to_le64(val - hash_adj);
}
*hash = cpu_to_le64(val - hash_adj);
}
__u64 end;
end = le64_to_cpu(dp->ldp_hash_end);
__u64 end;
end = le64_to_cpu(dp->ldp_hash_end);
- if (end == DIR_END_OFF) {
+ if (end == MDS_DIR_END_OFF) {
dp->ldp_hash_end = cpu_to_le32(seg_size *
(tgt0_idx + 1));
CDEBUG(D_INODE,
dp->ldp_hash_end = cpu_to_le32(seg_size *
(tgt0_idx + 1));
CDEBUG(D_INODE,
/*
* end of directory.
*/
/*
* end of directory.
*/
- hash_end = DIR_END_OFF;
+ hash_end = MDS_DIR_END_OFF;
dp = (struct lu_dirpage*)cfs_kmap(pg);
memset(dp, 0 , sizeof(struct lu_dirpage));
dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
dp = (struct lu_dirpage*)cfs_kmap(pg);
memset(dp, 0 , sizeof(struct lu_dirpage));
dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
- dp->ldp_hash_end = cpu_to_le64(DIR_END_OFF);
+ dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
dp->ldp_flags |= LDF_EMPTY;
dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
cfs_kunmap(pg);
dp->ldp_flags |= LDF_EMPTY;
dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
cfs_kunmap(pg);
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
}
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
}
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
}
LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
(long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
}