From: nasf Date: Wed, 25 May 2011 12:31:02 +0000 (+0800) Subject: LU-163 MDS returns 32/64-bit dir name hash according to client type X-Git-Tag: 2.0.62.0~25 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=96a5daa0c08d7b42ec368080a2a7f0dfb110ef98 LU-163 MDS returns 32/64-bit dir name hash according to client type 1) liblustre client, 1.8.5 or older client do not support 64-bit dir name hash. 2) register Lustre version 'get_name()' for NFS processing. 3) 'll_dir_seek()' processes name hash-based dir seek operation. 4) re-define "DIR_END_OFF" as "0x7fffffffffffffffULL" to always return positive dir name hash for successful seek. 5) small cleanup for 'll_dops_init()' to drop unnecessary statahead process. Signed-off-by: nasf Change-Id: I0e3177c98c6e8814c39551ab54edfa3f861cbc8c Reviewed-on: http://review.whamcloud.com/432 Tested-by: Hudson Reviewed-by: Oleg Drokin --- diff --git a/ldiskfs/kernel_patches/patches/ext3-export-64bit-name-hash.patch b/ldiskfs/kernel_patches/patches/ext3-export-64bit-name-hash.patch index b5d5254..9cc7294 100644 --- a/ldiskfs/kernel_patches/patches/ext3-export-64bit-name-hash.patch +++ b/ldiskfs/kernel_patches/patches/ext3-export-64bit-name-hash.patch @@ -1,8 +1,20 @@ -Index: linux-2.6.18-194.17.1-ext3/fs/ext3/dir.c +Index: linux-stage/fs/ext3/dir.c =================================================================== ---- linux-2.6.18-194.17.1-ext3.orig/fs/ext3/dir.c 2010-11-30 22:46:09.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext3/fs/ext3/dir.c 2010-12-16 00:10:12.000000000 +0300 -@@ -240,19 +240,34 @@ out: +--- linux-stage.orig/fs/ext3/dir.c 2011-04-19 01:39:47.000000000 +0800 ++++ linux-stage/fs/ext3/dir.c 2011-04-19 01:44:19.000000000 +0800 +@@ -237,22 +237,50 @@ + } + + #ifdef CONFIG_EXT3_INDEX ++static inline int is_32bit_api(void) ++{ ++#ifdef HAVE_IS_COMPAT_TASK ++ return is_compat_task(); ++#else ++ return (BITS_PER_LONG == 32); ++#endif ++} ++ /* * These functions convert from the major/minor hash to an f_pos * value. @@ -16,128 +28,108 @@ Index: linux-2.6.18-194.17.1-ext3/fs/ext3/dir.c - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie. Sigh. + * -+ * Whether 64-bit or 32-bit hash value is exported as file pos is -+ * controlled by "64bithash" mount option. ++ * Up layer (OSD) should specify O_32BITHASH or O_64BITHASH explicitly. ++ * On the other hand, we allow ldiskfs to be mounted directly on both 32-bit ++ * and 64-bit nodes, under such case, neither O_32BITHASH nor O_64BITHASH is ++ * specified. */ -#define hash2pos(major, minor) (major >> 1) -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -#define pos2min_hash(pos) (0) -+ -+static inline loff_t hash2pos(struct super_block *sb, __u32 major, __u32 minor) ++static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((__u64)(major >> 1) << 32) | (__u64)minor); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return (major >> 1); ++ else ++ return (((__u64)(major >> 1) << 32) | (__u64)minor); +} + -+static inline __u32 pos2maj_hash(struct super_block *sb, loff_t pos) ++static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((pos >> 32) << 1) & 0xffffffff); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return ((pos << 1) & 0xffffffff); ++ else ++ return (((pos >> 32) << 1) & 0xffffffff); +} + -+static inline __u32 pos2min_hash(struct super_block *sb, loff_t pos) ++static inline __u32 pos2min_hash(struct file *filp, loff_t pos) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (pos & 0xffffffff); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return (0); ++ else ++ return (pos & 0xffffffff); +} /* * This structure holds the nodes of the red-black tree used to store -@@ -314,7 +329,7 @@ static void free_rb_tree_fname(struct rb +@@ -314,7 +342,7 @@ } -static struct dir_private_info *create_dir_info(loff_t pos) -+static struct dir_private_info *create_dir_info(struct super_block *sb, loff_t pos) ++static struct dir_private_info *create_dir_info(struct file* filp, loff_t pos) { struct dir_private_info *p; -@@ -325,8 +340,8 @@ static struct dir_private_info *create_d +@@ -325,8 +353,8 @@ p->curr_node = NULL; p->extra_fname = NULL; p->last_pos = 0; - p->curr_hash = pos2maj_hash(pos); - p->curr_minor_hash = pos2min_hash(pos); -+ p->curr_hash = pos2maj_hash(sb, pos); -+ p->curr_minor_hash = pos2min_hash(sb, pos); ++ p->curr_hash = pos2maj_hash(filp, pos); ++ p->curr_minor_hash = pos2min_hash(filp, pos); p->next_hash = 0; return p; } -@@ -422,7 +437,7 @@ static int call_filldir(struct file * fi +@@ -422,7 +450,7 @@ printk("call_filldir: called with null fname?!?\n"); return 0; } - curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ curr_pos = hash2pos(sb, fname->hash, fname->minor_hash); ++ curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); while (fname) { error = filldir(dirent, fname->name, fname->name_len, curr_pos, -@@ -447,7 +462,7 @@ static int ext3_dx_readdir(struct file * +@@ -447,7 +475,7 @@ int ret; if (!info) { - info = create_dir_info(filp->f_pos); -+ info = create_dir_info(inode->i_sb, filp->f_pos); ++ info = create_dir_info(filp, filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; -@@ -461,8 +476,8 @@ static int ext3_dx_readdir(struct file * +@@ -461,8 +489,8 @@ free_rb_tree_fname(&info->root); info->curr_node = NULL; info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(filp->f_pos); - info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ info->curr_hash = pos2maj_hash(inode->i_sb, filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(inode->i_sb, filp->f_pos); ++ info->curr_hash = pos2maj_hash(filp, filp->f_pos); ++ info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); } /* -Index: linux-2.6.18-194.17.1-ext3/fs/ext3/super.c +Index: linux-stage/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.18-194.17.1-ext3.orig/fs/ext3/super.c 2010-11-30 22:48:01.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext3/fs/ext3/super.c 2010-12-16 00:11:59.000000000 +0300 -@@ -742,6 +742,7 @@ enum { - Opt_grpquota, - Opt_extents, Opt_noextents, Opt_bigendian_extents, Opt_extdebug, - Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_maxdirsize, Opt_force_over_8tb, -+ Opt_64bithash, - }; - - static match_table_t tokens = { -@@ -808,6 +809,7 @@ static match_table_t tokens = { - {Opt_force_over_8tb, "force_over_8tb"}, - {Opt_resize, "resize"}, - {Opt_maxdirsize, "maxdirsize=%u"}, -+ {Opt_64bithash, "64bithash"}, - {Opt_err, NULL} - }; - -@@ -1195,6 +1197,9 @@ clear_qf_name: - case Opt_force_over_8tb: - force_over_8tb = 1; - break; -+ case Opt_64bithash: -+ set_opt(sbi->s_mount_opt, 64BITHASH); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -Index: linux-2.6.18-194.17.1-ext3/include/linux/ext3_fs.h -=================================================================== ---- linux-2.6.18-194.17.1-ext3.orig/include/linux/ext3_fs.h 2010-11-30 22:52:58.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext3/include/linux/ext3_fs.h 2010-12-16 00:12:45.000000000 +0300 -@@ -483,6 +483,8 @@ do { \ - #define EXT3_MOUNT_JOURNAL_ASYNC_COMMIT 0x20000000 /* Journal Async Commit */ +--- linux-stage.orig/include/linux/ext3_fs.h 2011-04-19 01:39:47.000000000 +0800 ++++ linux-stage/include/linux/ext3_fs.h 2011-04-19 01:45:21.000000000 +0800 +@@ -54,6 +54,14 @@ + #define ext3_debug(f, a...) do {} while (0) #endif -+#define EXT3_MOUNT_64BITHASH 0x40000000 /* export 64-bit name hash */ ++#ifndef O_32BITHASH ++# define O_32BITHASH 0x10000000 ++#endif ++ ++#ifndef O_64BITHASH ++# define O_64BITHASH 0x20000000 ++#endif + - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt + #define EXT3_MULTIBLOCK_ALLOCATOR 1 + + #define EXT3_MB_HINT_MERGE 1 /* prefer goal again. length */ diff --git a/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash-rhel6.patch deleted file mode 100644 index d261d92..0000000 --- a/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash-rhel6.patch +++ /dev/null @@ -1,141 +0,0 @@ -Index: linux-stage/fs/ext4/dir.c -=================================================================== ---- linux-stage.orig/fs/ext4/dir.c 2011-03-31 10:35:49.000000000 +0800 -+++ linux-stage/fs/ext4/dir.c 2011-04-01 09:33:58.706267179 +0800 -@@ -249,19 +249,32 @@ - /* - * These functions convert from the major/minor hash to an f_pos - * value. -- * -- * Currently we only use major hash numer. This is unfortunate, but -- * on 32-bit machines, the same VFS interface is used for lseek and -- * llseek, so if we use the 64 bit offset, then the 32-bit versions of -- * lseek/telldir/seekdir will blow out spectacularly, and from within -- * the ext2 low-level routine, we don't know if we're being called by -- * a 64-bit version of the system call or the 32-bit version of the -- * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -- * cookie. Sigh. -+ * Whether 64-bit or 32-bit hash value is exported as file pos is -+ * controlled by "64bithash" mount option. - */ --#define hash2pos(major, minor) (major >> 1) --#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) --#define pos2min_hash(pos) (0) -+static inline loff_t hash2pos(struct super_block *sb, __u32 major, __u32 minor) -+{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((__u64)(major >> 1) << 32) | (__u64)minor); -+ else -+ return (major >> 1); -+} -+ -+static inline __u32 pos2maj_hash(struct super_block *sb, loff_t pos) -+{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((pos >> 32) << 1) & 0xffffffff); -+ else -+ return ((pos << 1) & 0xffffffff); -+} -+ -+static inline __u32 pos2min_hash(struct super_block *sb, loff_t pos) -+{ -+ if (test_opt(sb, 64BITHASH)) -+ return (pos & 0xffffffff); -+ else -+ return (0); -+} - - /* - * This structure holds the nodes of the red-black tree used to store -@@ -322,15 +335,16 @@ - } - - --static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) -+static struct dir_private_info *ext4_htree_create_dir_info( -+ struct super_block *sb, loff_t pos) - { - struct dir_private_info *p; - - p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); - if (!p) - return NULL; -- p->curr_hash = pos2maj_hash(pos); -- p->curr_minor_hash = pos2min_hash(pos); -+ p->curr_hash = pos2maj_hash(sb, pos); -+ p->curr_minor_hash = pos2min_hash(sb, pos); - return p; - } - -@@ -426,7 +440,7 @@ - "null fname?!?\n"); - return 0; - } -- curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ curr_pos = hash2pos(sb, fname->hash, fname->minor_hash); - while (fname) { - error = filldir(dirent, fname->name, - fname->name_len, curr_pos, -@@ -451,7 +465,7 @@ - int ret; - - if (!info) { -- info = ext4_htree_create_dir_info(filp->f_pos); -+ info = ext4_htree_create_dir_info(inode->i_sb, filp->f_pos); - if (!info) - return -ENOMEM; - filp->private_data = info; -@@ -465,8 +479,8 @@ - free_rb_tree_fname(&info->root); - info->curr_node = NULL; - info->extra_fname = NULL; -- info->curr_hash = pos2maj_hash(filp->f_pos); -- info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ info->curr_hash = pos2maj_hash(inode->i_sb, filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(inode->i_sb, filp->f_pos); - } - - /* -Index: linux-stage/fs/ext4/ext4.h -=================================================================== ---- linux-stage.orig/fs/ext4/ext4.h 2011-03-31 10:35:50.000000000 +0800 -+++ linux-stage/fs/ext4/ext4.h 2011-04-01 09:33:58.740267284 +0800 -@@ -785,6 +785,7 @@ - #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ - #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ - #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ -+#define EXT4_MOUNT_64BITHASH 0x4000000 /* export 64-bit name hash */ - #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ - #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ - #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ -Index: linux-stage/fs/ext4/super.c -=================================================================== ---- linux-stage.orig/fs/ext4/super.c 2011-03-31 10:35:50.000000000 +0800 -+++ linux-stage/fs/ext4/super.c 2011-04-01 09:35:00.251453404 +0800 -@@ -1540,7 +1540,7 @@ - Opt_inode_readahead_blks, Opt_journal_ioprio, - Opt_discard, Opt_nodiscard, - Opt_mballoc, Opt_bigendian_extents, Opt_force_over_16tb, -- Opt_no_mbcache, -+ Opt_no_mbcache, Opt_64bithash, - Opt_extents, Opt_noextents, - }; - -@@ -1614,6 +1614,7 @@ - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_no_mbcache, "no_mbcache"}, -+ {Opt_64bithash, "64bithash"}, - {Opt_extents, "extents"}, - {Opt_noextents, "noextents"}, - {Opt_err, NULL}, -@@ -2092,6 +2093,9 @@ - case Opt_no_mbcache: - set_opt(sbi->s_mount_opt, NO_MBCACHE); - break; -+ case Opt_64bithash: -+ set_opt(sbi->s_mount_opt, 64BITHASH); -+ break; - default: - ext4_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" " diff --git a/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash.patch b/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash.patch index e920e4e..c7e01f4 100644 --- a/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash.patch +++ b/ldiskfs/kernel_patches/patches/ext4-export-64bit-name-hash.patch @@ -1,12 +1,24 @@ -Index: linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c +Index: linux-stage/fs/ext4/dir.c =================================================================== ---- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/dir.c 2010-12-02 16:37:05.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c 2010-12-16 00:06:49.000000000 +0300 -@@ -245,19 +245,32 @@ out: +--- linux-stage.orig/fs/ext4/dir.c 2011-04-19 01:02:34.000000000 +0800 ++++ linux-stage/fs/ext4/dir.c 2011-04-19 01:24:36.000000000 +0800 +@@ -242,22 +242,50 @@ + return ret; + } + ++static inline int is_32bit_api(void) ++{ ++#ifdef HAVE_IS_COMPAT_TASK ++ return is_compat_task(); ++#else ++ return (BITS_PER_LONG == 32); ++#endif ++} ++ /* * These functions convert from the major/minor hash to an f_pos * value. -- * + * - * Currently we only use major hash numer. This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of @@ -15,45 +27,50 @@ Index: linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c - * a 64-bit version of the system call or the 32-bit version of the - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie. Sigh. -+ * Whether 64-bit or 32-bit hash value is exported as file pos is -+ * controlled by "64bithash" mount option. ++ * Up layer (OSD) should specify O_32BITHASH or O_64BITHASH explicitly. ++ * On the other hand, we allow ldiskfs to be mounted directly on both 32-bit ++ * and 64-bit nodes, under such case, neither O_32BITHASH nor O_64BITHASH is ++ * specified. */ -#define hash2pos(major, minor) (major >> 1) -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -#define pos2min_hash(pos) (0) -+static inline loff_t hash2pos(struct super_block *sb, __u32 major, __u32 minor) ++static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((__u64)(major >> 1) << 32) | (__u64)minor); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return (major >> 1); ++ else ++ return (((__u64)(major >> 1) << 32) | (__u64)minor); +} + -+static inline __u32 pos2maj_hash(struct super_block *sb, loff_t pos) ++static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (((pos >> 32) << 1) & 0xffffffff); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return ((pos << 1) & 0xffffffff); ++ else ++ return (((pos >> 32) << 1) & 0xffffffff); +} + -+static inline __u32 pos2min_hash(struct super_block *sb, loff_t pos) ++static inline __u32 pos2min_hash(struct file *filp, loff_t pos) +{ -+ if (test_opt(sb, 64BITHASH)) -+ return (pos & 0xffffffff); -+ else ++ if ((filp->f_flags & O_32BITHASH) || ++ (!(filp->f_flags & O_64BITHASH) && is_32bit_api())) + return (0); ++ else ++ return (pos & 0xffffffff); +} /* * This structure holds the nodes of the red-black tree used to store -@@ -318,15 +331,16 @@ static void free_rb_tree_fname(struct rb +@@ -318,15 +346,16 @@ } -static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) -+static struct dir_private_info *ext4_htree_create_dir_info( -+ struct super_block *sb, loff_t pos) ++static struct dir_private_info * ++ext4_htree_create_dir_info(struct file *filp, loff_t pos) { struct dir_private_info *p; @@ -62,79 +79,56 @@ Index: linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c return NULL; - p->curr_hash = pos2maj_hash(pos); - p->curr_minor_hash = pos2min_hash(pos); -+ p->curr_hash = pos2maj_hash(sb, pos); -+ p->curr_minor_hash = pos2min_hash(sb, pos); ++ p->curr_hash = pos2maj_hash(filp, pos); ++ p->curr_minor_hash = pos2min_hash(filp, pos); return p; } -@@ -422,7 +436,7 @@ static int call_filldir(struct file *fil +@@ -422,7 +451,7 @@ "null fname?!?\n"); return 0; } - curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ curr_pos = hash2pos(sb, fname->hash, fname->minor_hash); ++ curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); while (fname) { error = filldir(dirent, fname->name, fname->name_len, curr_pos, -@@ -447,7 +461,7 @@ static int ext4_dx_readdir(struct file * +@@ -447,7 +476,7 @@ int ret; if (!info) { - info = ext4_htree_create_dir_info(filp->f_pos); -+ info = ext4_htree_create_dir_info(inode->i_sb, filp->f_pos); ++ info = ext4_htree_create_dir_info(filp, filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; -@@ -461,8 +475,8 @@ static int ext4_dx_readdir(struct file * +@@ -461,8 +490,8 @@ free_rb_tree_fname(&info->root); info->curr_node = NULL; info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(filp->f_pos); - info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ info->curr_hash = pos2maj_hash(inode->i_sb, filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(inode->i_sb, filp->f_pos); ++ info->curr_hash = pos2maj_hash(filp, filp->f_pos); ++ info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); } /* -Index: linux-2.6.18-194.17.1-ext4/fs/ext4/ext4.h -=================================================================== ---- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/ext4.h 2010-12-03 11:05:04.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext4/fs/ext4/ext4.h 2010-12-16 00:13:32.000000000 +0300 -@@ -741,6 +741,7 @@ struct ext4_inode_info { - #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ - #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ - #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ -+#define EXT4_MOUNT_64BITHASH 0x4000000 /* export 64-bit name hash */ - #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ - #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ - #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ -Index: linux-2.6.18-194.17.1-ext4/fs/ext4/super.c +Index: linux-stage/fs/ext4/ext4.h =================================================================== ---- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/super.c 2010-12-02 21:10:39.000000000 +0300 -+++ linux-2.6.18-194.17.1-ext4/fs/ext4/super.c 2010-12-15 23:57:43.000000000 +0300 -@@ -1479,6 +1479,7 @@ enum { - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_bigendian_extents, - Opt_force_over_16tb, - Opt_no_mbcache, -+ Opt_64bithash, - }; +--- linux-stage.orig/fs/ext4/ext4.h 2011-04-19 01:02:34.000000000 +0800 ++++ linux-stage/fs/ext4/ext4.h 2011-04-19 01:02:34.000000000 +0800 +@@ -55,6 +55,14 @@ + #define ext4_debug(f, a...) do {} while (0) + #endif - static match_table_t tokens = { -@@ -1552,6 +1553,7 @@ static match_table_t tokens = { - {Opt_bigendian_extents, "bigendian_extents"}, - {Opt_force_over_16tb, "force_over_16tb"}, - {Opt_no_mbcache, "no_mbcache"}, -+ {Opt_64bithash, "64bithash"}, - {Opt_err, NULL}, - }; ++#ifndef O_32BITHASH ++# define O_32BITHASH 0x10000000 ++#endif ++ ++#ifndef O_64BITHASH ++# define O_64BITHASH 0x20000000 ++#endif ++ + #define HAVE_DISK_INODE_VERSION -@@ -2004,6 +2006,9 @@ set_qf_format: - case Opt_no_mbcache: - set_opt(sbi->s_mount_opt, NO_MBCACHE); - break; -+ case Opt_64bithash: -+ set_opt(sbi->s_mount_opt, 64BITHASH); -+ break; - default: - ext4_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" " + /* data type for block offset of block group */ diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series index 992478f..eda1320a 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series @@ -28,4 +28,4 @@ ext4_data_in_dirent-rhel6.patch ext4-disable-mb-cache-rhel6.patch ext4-back-dquot-to-rhel6.patch ext4-nocmtime-2.6-rhel5.patch -ext4-export-64bit-name-hash-rhel6.patch +ext4-export-64bit-name-hash.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series index f08ee3a..9f36eb7 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series @@ -32,3 +32,4 @@ ext4-dynlocks-2.6-rhel5.patch ext4-hash-indexed-dir-dotdot-update.patch ext4-disable-write-bar-by-default.patch ext4-mballoc-pa_free-mismatch.patch +ext4-export-64bit-name-hash.patch diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index a45cd28..e6c6103 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -457,6 +457,7 @@ struct dt_index_operations { */ struct dt_it *(*init)(const struct lu_env *env, struct dt_object *dt, + __u32 attr, struct lustre_capa *capa); void (*fini)(const struct lu_env *env, struct dt_it *di); diff --git a/lustre/include/lclient.h b/lustre/include/lclient.h index 1f5d908..e937e00 100644 --- a/lustre/include/lclient.h +++ b/lustre/include/lclient.h @@ -361,8 +361,7 @@ void cl_inode_fini(struct inode *inode); int cl_local_size(struct inode *inode); __u16 ll_dirent_type_get(struct lu_dirent *ent); -__u64 cl_fid_build_ino(const struct lu_fid *fid); -__u32 cl_fid_build_ino32(const struct lu_fid *fid); +__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit); __u32 cl_fid_build_gen(const struct lu_fid *fid); #ifdef INVARIANT_CHECK diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index db013f2..9df5862 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -48,6 +48,14 @@ #include +/* Some old kernels (like 2.6.9) may not define such SEEK_XXX. So the + * definition allows to compile lustre client on more OS platforms. */ +#ifndef SEEK_SET + #define SEEK_SET 0 + #define SEEK_CUR 1 + #define SEEK_END 2 +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) struct ll_iattr { struct iattr iattr; diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index a139e998..8b268f3 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -790,8 +790,9 @@ static inline int lu_fid_cmp(const struct lu_fid *f0, * enumeration. */ enum lu_dirent_attrs { - LUDA_FID = 0x0001, - LUDA_TYPE = 0x0002, + LUDA_FID = 0x0001, + LUDA_TYPE = 0x0002, + LUDA_64BITHASH = 0x0004, }; /** @@ -900,7 +901,8 @@ static inline int lu_dirent_size(struct lu_dirent *ent) return le16_to_cpu(ent->lde_reclen); } -#define DIR_END_OFF 0xfffffffffffffffeULL +#define DIR_END_OFF 0x7fffffffffffffffULL +#define DIR_END_OFF_32BIT 0x7fffffffUL /** @} lu_dir */ @@ -1070,6 +1072,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */ #define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */ #define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client supports layout lock */ +#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits + * directory hash */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ @@ -1090,7 +1094,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID | \ LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_VBR | \ OBD_CONNECT_LOV_V3 | OBD_CONNECT_SOM | \ - OBD_CONNECT_FULL20) + OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ diff --git a/lustre/lclient/lcommon_cl.c b/lustre/lclient/lcommon_cl.c index 6bb311a..fede3b4 100644 --- a/lustre/lclient/lcommon_cl.c +++ b/lustre/lclient/lcommon_cl.c @@ -1304,21 +1304,16 @@ __u16 ll_dirent_type_get(struct lu_dirent *ent) } /** - * for 32 bit inode numbers directly map seq+oid to 32bit number. - */ -__u32 cl_fid_build_ino32(const struct lu_fid *fid) -{ - RETURN(fid_flatten32(fid)); -} - -/** * build inode number from passed @fid */ -__u64 cl_fid_build_ino(const struct lu_fid *fid) +__u64 cl_fid_build_ino(const struct lu_fid *fid, int need_32bit) { #if BITS_PER_LONG == 32 RETURN(fid_flatten32(fid)); #else - RETURN(fid_flatten(fid)); + if (need_32bit) + RETURN(fid_flatten32(fid)); + else + RETURN(fid_flatten(fid)); #endif } diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index fb33c35..02a2802 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -259,7 +259,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep, fid = ent->lde_fid; name = ent->lde_name; fid_le_to_cpu(&fid, &fid); - ino = cl_fid_build_ino(&fid); + ino = cl_fid_build_ino(&fid, 0); type = ll_dirent_type_get(ent); done = filldir(buf, nbytes, name, namelen, (loff_t)hash, ino, type, diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 8ab16d7..3f202c9 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -200,7 +200,7 @@ static int ll_set_dd(struct dentry *de) RETURN(0); } -int ll_dops_init(struct dentry *de, int block) +int ll_dops_init(struct dentry *de, int block, int init_sa) { struct ll_dentry_data *lld = ll_d2d(de); int rc = 0; @@ -213,7 +213,7 @@ int ll_dops_init(struct dentry *de, int block) lld = ll_d2d(de); } - if (lld != NULL) + if (lld != NULL && init_sa != 0) lld->lld_sa_generation = 0; de->d_op = &ll_d_ops; diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 84e93e4..6c91602 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -439,20 +439,19 @@ fail: int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) { - struct inode *inode = filp->f_dentry->d_inode; - struct ll_inode_info *info = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); - __u64 pos = fd->fd_dir.lfd_pos; + struct inode *inode = filp->f_dentry->d_inode; + struct ll_inode_info *info = ll_i2info(inode); + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); + __u64 pos = fd->fd_dir.lfd_pos; + int need_32bit = ll_need_32bit_api(sbi); struct page *page; struct ll_dir_chain chain; - int rc, need_32bit; - int done; - int shift; - __u16 type; + int done; + int shift; + int rc; ENTRY; - need_32bit = ll_need_32bit_api(sbi); CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n", inode->i_ino, inode->i_generation, inode, (unsigned long)pos, i_size_read(inode), need_32bit); @@ -486,11 +485,11 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) dp = page_address(page); for (ent = lu_dirent_start(dp); ent != NULL && !done; ent = lu_dirent_next(ent)) { - char *name; + __u16 type; int namelen; struct lu_fid fid; - __u64 ino; __u64 lhash; + __u64 ino; /* * XXX: implement correct swabbing here. @@ -511,17 +510,18 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) */ continue; - name = ent->lde_name; - fid_le_to_cpu(&fid, &ent->lde_fid); - if (need_32bit) { + if (need_32bit) lhash = hash >> 32; - ino = cl_fid_build_ino32(&fid); - } else { + else lhash = hash; - ino = cl_fid_build_ino(&fid); - } + fid_le_to_cpu(&fid, &ent->lde_fid); + ino = cl_fid_build_ino(&fid,need_32bit); type = ll_dirent_type_get(ent); - done = filldir(cookie, name, namelen, + /* For 'll_nfs_get_name_filldir()', it will try + * to access the 'ent' through its 'lde_name', + * so the parameter 'name' for 'filldir()' must + * be part of the 'ent'. */ + done = filldir(cookie, ent->lde_name, namelen, lhash, ino, type); } next = le64_to_cpu(dp->ldp_hash_end); @@ -557,10 +557,14 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) } fd->fd_dir.lfd_pos = pos; - if (need_32bit) - filp->f_pos = pos >> 32; - else + if (need_32bit) { + if (pos == DIR_END_OFF) + filp->f_pos = DIR_END_OFF_32BIT; + else + filp->f_pos = pos >> 32; + } else { filp->f_pos = pos; + } filp->f_version = inode->i_version; touch_atime(filp->f_vfsmnt, filp->f_dentry); @@ -1371,33 +1375,52 @@ out_free: static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) { + struct inode *inode = file->f_mapping->host; struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - loff_t pos = file->f_pos; - loff_t ret; + int need_32bit = ll_need_32bit_api(ll_i2sbi(inode)); + loff_t ret = -EINVAL; ENTRY; - if (origin == 1 && offset >= 0 && file->f_pos == DIR_END_OFF) { - CWARN("end of dir hash, DIR_END_OFF(-2) is returned\n"); - RETURN(DIR_END_OFF); + cfs_mutex_lock(&inode->i_mutex); + switch (origin) { + case SEEK_SET: + break; + case SEEK_CUR: + offset += file->f_pos; + break; + case SEEK_END: + if (offset > 0) + GOTO(out, ret); + if (need_32bit) + offset += DIR_END_OFF_32BIT; + else + offset += DIR_END_OFF; + break; + default: + GOTO(out, ret); } - ret = default_llseek(file, offset, origin); - if (ret >= 0) { - struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode); - - if (ll_need_32bit_api(sbi)) { - if (file->f_pos >> 32) { - /* hash overflow, simple revert */ - file->f_pos = pos; - RETURN(-EOVERFLOW); + if (offset >= 0 && + ((need_32bit && offset <= DIR_END_OFF_32BIT) || !need_32bit)) { + if (offset != file->f_pos) { + if (need_32bit) { + if (offset == DIR_END_OFF_32BIT) + fd->fd_dir.lfd_pos = DIR_END_OFF; + else + fd->fd_dir.lfd_pos = offset << 32; } else { - fd->fd_dir.lfd_pos = file->f_pos << 32; + fd->fd_dir.lfd_pos = offset; } - } else { - fd->fd_dir.lfd_pos = file->f_pos; + file->f_pos = offset; + file->f_version = 0; } + ret = offset; } - RETURN(ret); + EXIT; + +out: + cfs_mutex_unlock(&inode->i_mutex); + return ret; } int ll_dir_open(struct inode *inode, struct file *file) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 22f5958a..07bd889 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2302,22 +2302,19 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat) { struct inode *inode = de->d_inode; + struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_inode_info *lli = ll_i2info(inode); int res = 0; res = ll_inode_revalidate_it(de, it, MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETATTR, 1); + ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); if (res) return res; stat->dev = inode->i_sb->s_dev; - if (ll_need_32bit_api(ll_i2sbi(inode))) - stat->ino = cl_fid_build_ino32(&lli->lli_fid); - else - stat->ino = inode->i_ino; - + stat->ino = cl_fid_build_ino(&lli->lli_fid, ll_need_32bit_api(sbi)); stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; stat->uid = inode->i_uid; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 43186e9..f1db013 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -78,6 +78,12 @@ extern struct file_operations ll_pgcache_seq_fops; /* remote client permission cache */ #define REMOTE_PERM_HASHSIZE 16 +struct ll_getname_data { + char *lgd_name; /* points to a buffer with NAME_MAX+1 size */ + struct lu_fid lgd_fid; /* target fid we are looking for */ + int lgd_found; /* inode matched? */ +}; + /* llite setxid/access permission for user on remote client */ struct ll_remote_perm { cfs_hlist_node_t lrp_list; @@ -581,6 +587,7 @@ extern struct file_operations ll_dir_operations; extern struct inode_operations ll_dir_inode_operations; struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash, int exact, struct ll_dir_chain *chain); +int ll_readdir(struct file *filp, void *cookie, filldir_t filldir); int ll_get_mdt_idx(struct inode *inode); /* llite/namei.c */ @@ -679,7 +686,7 @@ int ll_fid2path(struct obd_export *exp, void *arg); /** * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases */ -int ll_dops_init(struct dentry *de, int block); +int ll_dops_init(struct dentry *de, int block, int init_sa); extern cfs_spinlock_t ll_lookup_lock; extern struct dentry_operations ll_d_ops; void ll_intent_drop_lock(struct lookup_intent *); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index a67031e..cd33607 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -202,7 +202,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET| OBD_CONNECT_FID | OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 | OBD_CONNECT_RMT_CLIENT | - OBD_CONNECT_VBR | OBD_CONNECT_FULL20; + OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | + OBD_CONNECT_64BITHASH; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -460,7 +461,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) } LASSERT(fid_is_sane(&sbi->ll_root_fid)); - root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid), &lmd); + root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, 0), &lmd); md_free_lustre_md(sbi->ll_md_exp, &lmd); ptlrpc_req_finished(request); @@ -1552,7 +1553,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) cfs_spin_unlock(&lli->lli_lock); } #endif - inode->i_ino = cl_fid_build_ino(&body->fid1); + inode->i_ino = cl_fid_build_ino(&body->fid1, 0); inode->i_generation = cl_fid_build_gen(&body->fid1); if (body->valid & OBD_MD_FLATIME) { @@ -1977,7 +1978,7 @@ int ll_prep_inode(struct inode **inode, */ LASSERT(fid_is_sane(&md.body->fid1)); - *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1), &md); + *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1, 0), &md); if (*inode == NULL || IS_ERR(*inode)) { if (md.lsm) obd_free_memmd(sbi->ll_dt_exp, &md.lsm); diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 40b8513..ffb8a07 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -73,7 +73,7 @@ static struct inode *search_inode_for_lustre(struct super_block *sb, struct ptlrpc_request *req = NULL; struct inode *inode = NULL; int eadatalen = 0; - unsigned long hash = (unsigned long) cl_fid_build_ino(fid); + unsigned long hash = (unsigned long) cl_fid_build_ino(fid, 0); struct md_op_data *op_data; int rc; ENTRY; @@ -141,7 +141,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, if (!result) RETURN(ERR_PTR(-ENOMEM)); - ll_dops_init(result, 1); + ll_dops_init(result, 1, 0); RETURN(result); } @@ -184,6 +184,63 @@ static int ll_encode_fh(struct dentry *de, __u32 *fh, int *plen, RETURN(LUSTRE_NFS_FID); } +static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen, + loff_t hash, u64 ino, unsigned type) +{ + /* It is hack to access lde_fid for comparison with lgd_fid. + * So the input 'name' must be part of the 'lu_dirent'. */ + struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name); + struct ll_getname_data *lgd = cookie; + struct lu_fid fid; + + fid_le_to_cpu(&fid, &lde->lde_fid); + if (lu_fid_eq(&fid, &lgd->lgd_fid)) { + memcpy(lgd->lgd_name, name, namelen); + lgd->lgd_name[namelen] = 0; + lgd->lgd_found = 1; + } + return lgd->lgd_found; +} + +static int ll_get_name(struct dentry *dentry, char *name, + struct dentry *child) +{ + struct inode *dir = dentry->d_inode; + struct file *filp; + struct ll_getname_data lgd; + int rc; + ENTRY; + + if (!dir || !S_ISDIR(dir->i_mode)) + GOTO(out, rc = -ENOTDIR); + + if (!dir->i_fop) + GOTO(out, rc = -EINVAL); + + filp = ll_dentry_open(dget(dentry), NULL, O_RDONLY, current_cred()); + if (IS_ERR(filp)) + GOTO(out, rc = PTR_ERR(filp)); + + if (!filp->f_op->readdir) + GOTO(out_close, rc = -EINVAL); + + lgd.lgd_name = name; + lgd.lgd_fid = ll_i2info(child->d_inode)->lli_fid; + lgd.lgd_found = 0; + + cfs_mutex_lock(&dir->i_mutex); + rc = ll_readdir(filp, &lgd, ll_nfs_get_name_filldir); + cfs_mutex_unlock(&dir->i_mutex); + if (!rc && !lgd.lgd_found) + rc = -ENOENT; + EXIT; + +out_close: + fput(filp); +out: + return rc; +} + #ifdef HAVE_FH_TO_DENTRY static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) @@ -290,6 +347,7 @@ static struct dentry *ll_get_parent(struct dentry *dchild) struct export_operations lustre_export_operations = { .get_parent = ll_get_parent, .encode_fh = ll_encode_fh, + .get_name = ll_get_name, #ifdef HAVE_FH_TO_DENTRY .fh_to_dentry = ll_fh_to_dentry, .fh_to_parent = ll_fh_to_parent, diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index f9f9bc7..9e4898c 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -388,7 +388,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) lock_dentry(dentry); __d_drop(dentry); unlock_dentry(dentry); - ll_dops_init(dentry, 0); + ll_dops_init(dentry, 0, 1); d_rehash_cond(dentry, 0); /* avoid taking dcache_lock inside */ spin_unlock(&dcache_lock); cfs_spin_unlock(&ll_lookup_lock); @@ -409,7 +409,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) unlock_dentry(last_discon); spin_unlock(&dcache_lock); cfs_spin_unlock(&ll_lookup_lock); - ll_dops_init(last_discon, 1); + ll_dops_init(last_discon, 1, 1); d_rehash(de); d_move(last_discon, de); iput(inode); @@ -460,14 +460,14 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, Everybody else who needs correct file size would call cl_glimpse_size or some equivalent themselves anyway. Also see bug 7198. */ - ll_dops_init(*de, 1); + ll_dops_init(*de, 1, 1); *de = ll_find_alias(inode, *de); if (*de != save) { struct ll_dentry_data *lld = ll_d2d(*de); /* just make sure the ll_dentry_data is ready */ if (unlikely(lld == NULL)) - ll_dops_init(*de, 1); + ll_dops_init(*de, 1, 1); } /* we have lookup look - unhide dentry */ if (bits & MDS_INODELOCK_LOOKUP) { @@ -476,7 +476,7 @@ int ll_lookup_it_finish(struct ptlrpc_request *request, unlock_dentry(*de); } } else { - ll_dops_init(*de, 1); + ll_dops_init(*de, 1, 1); /* Check that parent has UPDATE lock. If there is none, we cannot afford to hash this dentry (done by ll_d_add) as it might get picked up later when UPDATE lock will appear */ @@ -650,7 +650,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, if ((nd->flags & LOOKUP_CREATE ) && !(nd->flags & LOOKUP_OPEN)) { /* We are sure this is new dentry, so we need to create our private data and set the dentry ops */ - ll_dops_init(dentry, 1); + ll_dops_init(dentry, 1, 1); RETURN(NULL); } it = ll_convert_intent(&nd->intent.open, nd->flags); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index f0efe38..f9d0c7f 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -250,7 +250,7 @@ static int mdd_dir_is_empty(const struct lu_env *env, RETURN(-ENOTDIR); iops = &obj->do_index_ops->dio_it; - it = iops->init(env, obj, BYPASS_CAPA); + it = iops->init(env, obj, LUDA_64BITHASH, BYPASS_CAPA); if (!IS_ERR(it)) { result = iops->get(env, it, (const void *)""); if (result > 0) { diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 581b876..8f8a424 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -2290,7 +2290,7 @@ static int __mdd_readpage(const struct lu_env *env, struct mdd_object *obj, * iterate through directory and fill pages from @rdpg */ iops = &next->do_index_ops->dio_it; - it = iops->init(env, next, mdd_object_capa(env, obj)); + it = iops->init(env, next, rdpg->rp_attrs, mdd_object_capa(env, obj)); if (IS_ERR(it)) return PTR_ERR(it); diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c index 6c29665..8c9a994 100644 --- a/lustre/mdd/mdd_orphans.c +++ b/lustre/mdd/mdd_orphans.c @@ -390,7 +390,7 @@ static int orph_index_iterate(const struct lu_env *env, /* In recovery phase, do not need for any lock here */ iops = &dor->do_index_ops->dio_it; - it = iops->init(env, dor, BYPASS_CAPA); + it = iops->init(env, dor, LUDA_64BITHASH, BYPASS_CAPA); if (!IS_ERR(it)) { result = iops->load(env, it, 0); if (result > 0) { diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 5d08c4e..094c6d2 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1489,6 +1489,8 @@ static int mdt_readpage(struct mdt_thread_info *info) } rdpg->rp_attrs = reqbody->mode; + if (info->mti_exp->exp_connect_flags & OBD_CONNECT_64BITHASH) + rdpg->rp_attrs |= LUDA_64BITHASH; rdpg->rp_count = reqbody->nlink; rdpg->rp_npages = (rdpg->rp_count + CFS_PAGE_SIZE - 1)>>CFS_PAGE_SHIFT; OBD_ALLOC(rdpg->rp_pages, rdpg->rp_npages * sizeof rdpg->rp_pages[0]); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 67351d7..bb870a0 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -805,6 +805,7 @@ static const char *obd_connect_names[] = { "large_ea", "full20", "layout_lock", + "64bithash", NULL }; diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index a1401bb..537a6b7 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1311,7 +1311,6 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) unsigned long page, s_flags; struct page *__page; int rc; - int len; ENTRY; OBD_ALLOC(ldd, sizeof(*ldd)); @@ -1364,18 +1363,11 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb) /* Glom up mount options */ memset(options, 0, CFS_PAGE_SIZE); - if (IS_MDT(ldd)) { - /* enable 64bithash for MDS by force */ - strcpy(options, "64bithash,"); - len = CFS_PAGE_SIZE - strlen(options) - 2; - strncat(options, ldd->ldd_mount_opts, len); - } else { - strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2); - } + strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2); /* Add in any mount-line options */ if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) { - len = CFS_PAGE_SIZE - strlen(options) - 2; + int len = CFS_PAGE_SIZE - strlen(options) - 2; if (*options != 0) strcat(options, ","); strncat(options, lmd->lmd_opts, len); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index c474d4c..cd4bb1a 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -3182,8 +3182,9 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, */ static struct dt_it *osd_it_iam_init(const struct lu_env *env, - struct dt_object *dt, - struct lustre_capa *capa) + struct dt_object *dt, + __u32 unused, + struct lustre_capa *capa) { struct osd_it_iam *it; struct osd_thread_info *oti = osd_oti_get(env); @@ -3435,6 +3436,7 @@ static const struct dt_index_operations osd_index_iam_ops = { */ static struct dt_it *osd_it_ea_init(const struct lu_env *env, struct dt_object *dt, + __u32 attr, struct lustre_capa *capa) { struct osd_object *obj = osd_dt_obj(dt); @@ -3456,6 +3458,10 @@ static struct dt_it *osd_it_ea_init(const struct lu_env *env, it->oie_obj = obj; it->oie_file.f_pos = 0; it->oie_file.f_dentry = obj_dentry; + if (attr & LUDA_64BITHASH) + it->oie_file.f_flags = O_64BITHASH; + else + it->oie_file.f_flags = O_32BITHASH; it->oie_file.f_mapping = obj->oo_inode->i_mapping; it->oie_file.f_op = obj->oo_inode->i_fop; it->oie_file.private_data = NULL; diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index b5da020..f1f1f81 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -493,6 +493,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL); CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL); CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL); + CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL); + CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n", diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 34a659b..e3e6b6c 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -222,6 +222,8 @@ static void check_obd_connect_data(void) CHECK_CDEFINE(OBD_CONNECT_VBR); CHECK_CDEFINE(OBD_CONNECT_SKIP_ORPHAN); CHECK_CDEFINE(OBD_CONNECT_FULL20); + CHECK_CDEFINE(OBD_CONNECT_LAYOUTLOCK); + CHECK_CDEFINE(OBD_CONNECT_64BITHASH); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 9679c69..526e5ee 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -490,6 +490,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL); CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL); CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL); + CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL); + CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",