Whamcloud - gitweb
b=20581 MDS returns full hash for readdir to decrease hash collision
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-export-64bit-name-hash.patch
1 Index: linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c
2 ===================================================================
3 --- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/dir.c       2010-12-02 16:37:05.000000000 +0300
4 +++ linux-2.6.18-194.17.1-ext4/fs/ext4/dir.c    2010-12-16 00:06:49.000000000 +0300
5 @@ -245,19 +245,32 @@ out:
6  /*
7   * These functions convert from the major/minor hash to an f_pos
8   * value.
9 - *
10 - * Currently we only use major hash numer.  This is unfortunate, but
11 - * on 32-bit machines, the same VFS interface is used for lseek and
12 - * llseek, so if we use the 64 bit offset, then the 32-bit versions of
13 - * lseek/telldir/seekdir will blow out spectacularly, and from within
14 - * the ext2 low-level routine, we don't know if we're being called by
15 - * a 64-bit version of the system call or the 32-bit version of the
16 - * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
17 - * cookie.  Sigh.
18 + * Whether 64-bit or 32-bit hash value is exported as file pos is
19 + * controlled by "64bithash" mount option.
20   */
21 -#define hash2pos(major, minor) (major >> 1)
22 -#define pos2maj_hash(pos)      ((pos << 1) & 0xffffffff)
23 -#define pos2min_hash(pos)      (0)
24 +static inline loff_t hash2pos(struct super_block *sb, __u32 major, __u32 minor)
25 +{
26 +       if (test_opt(sb, 64BITHASH))
27 +               return (((__u64)(major >> 1) << 32) | (__u64)minor);
28 +       else
29 +               return (major >> 1);
30 +}
31 +
32 +static inline __u32 pos2maj_hash(struct super_block *sb, loff_t pos)
33 +{
34 +       if (test_opt(sb, 64BITHASH))
35 +               return (((pos >> 32) << 1) & 0xffffffff);
36 +       else
37 +               return ((pos << 1) & 0xffffffff);
38 +}
39 +
40 +static inline __u32 pos2min_hash(struct super_block *sb, loff_t pos)
41 +{
42 +       if (test_opt(sb, 64BITHASH))
43 +               return (pos  & 0xffffffff);
44 +       else
45 +               return (0);
46 +}
47  
48  /*
49   * This structure holds the nodes of the red-black tree used to store
50 @@ -318,15 +331,16 @@ static void free_rb_tree_fname(struct rb
51  }
52  
53  
54 -static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
55 +static struct dir_private_info *ext4_htree_create_dir_info(
56 +        struct super_block *sb, loff_t pos)
57  {
58         struct dir_private_info *p;
59  
60         p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
61         if (!p)
62                 return NULL;
63 -       p->curr_hash = pos2maj_hash(pos);
64 -       p->curr_minor_hash = pos2min_hash(pos);
65 +       p->curr_hash = pos2maj_hash(sb, pos);
66 +       p->curr_minor_hash = pos2min_hash(sb, pos);
67         return p;
68  }
69  
70 @@ -422,7 +436,7 @@ static int call_filldir(struct file *fil
71                        "null fname?!?\n");
72                 return 0;
73         }
74 -       curr_pos = hash2pos(fname->hash, fname->minor_hash);
75 +       curr_pos = hash2pos(sb, fname->hash, fname->minor_hash);
76         while (fname) {
77                 error = filldir(dirent, fname->name,
78                                 fname->name_len, curr_pos,
79 @@ -447,7 +461,7 @@ static int ext4_dx_readdir(struct file *
80         int     ret;
81  
82         if (!info) {
83 -               info = ext4_htree_create_dir_info(filp->f_pos);
84 +               info = ext4_htree_create_dir_info(inode->i_sb, filp->f_pos);
85                 if (!info)
86                         return -ENOMEM;
87                 filp->private_data = info;
88 @@ -461,8 +475,8 @@ static int ext4_dx_readdir(struct file *
89                 free_rb_tree_fname(&info->root);
90                 info->curr_node = NULL;
91                 info->extra_fname = NULL;
92 -               info->curr_hash = pos2maj_hash(filp->f_pos);
93 -               info->curr_minor_hash = pos2min_hash(filp->f_pos);
94 +               info->curr_hash = pos2maj_hash(inode->i_sb, filp->f_pos);
95 +               info->curr_minor_hash = pos2min_hash(inode->i_sb, filp->f_pos);
96         }
97  
98         /*
99 Index: linux-2.6.18-194.17.1-ext4/fs/ext4/ext4.h
100 ===================================================================
101 --- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/ext4.h      2010-12-03 11:05:04.000000000 +0300
102 +++ linux-2.6.18-194.17.1-ext4/fs/ext4/ext4.h   2010-12-16 00:13:32.000000000 +0300
103 @@ -741,6 +741,7 @@ struct ext4_inode_info {
104  #define EXT4_MOUNT_JOURNAL_CHECKSUM    0x800000 /* Journal checksums */
105  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT        0x1000000 /* Journal Async Commit */
106  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
107 +#define EXT4_MOUNT_64BITHASH           0x4000000 /* export 64-bit name hash */
108  #define EXT4_MOUNT_DELALLOC            0x8000000 /* Delalloc support */
109  #define EXT4_MOUNT_DATA_ERR_ABORT      0x10000000 /* Abort on file data write */
110  #define EXT4_MOUNT_BLOCK_VALIDITY      0x20000000 /* Block validity checking */
111 Index: linux-2.6.18-194.17.1-ext4/fs/ext4/super.c
112 ===================================================================
113 --- linux-2.6.18-194.17.1-ext4.orig/fs/ext4/super.c     2010-12-02 21:10:39.000000000 +0300
114 +++ linux-2.6.18-194.17.1-ext4/fs/ext4/super.c  2010-12-15 23:57:43.000000000 +0300
115 @@ -1479,6 +1479,7 @@ enum {
116         Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_bigendian_extents,
117         Opt_force_over_16tb,
118         Opt_no_mbcache,
119 +       Opt_64bithash,
120  };
121  
122  static match_table_t tokens = {
123 @@ -1552,6 +1553,7 @@ static match_table_t tokens = {
124         {Opt_bigendian_extents, "bigendian_extents"},
125         {Opt_force_over_16tb, "force_over_16tb"},
126         {Opt_no_mbcache, "no_mbcache"},
127 +       {Opt_64bithash, "64bithash"},
128         {Opt_err, NULL},
129  };
130  
131 @@ -2004,6 +2006,9 @@ set_qf_format:
132                 case Opt_no_mbcache:
133                         set_opt(sbi->s_mount_opt, NO_MBCACHE);
134                         break;
135 +               case Opt_64bithash:
136 +                       set_opt(sbi->s_mount_opt, 64BITHASH);
137 +                       break;
138                 default:
139                         ext4_msg(sb, KERN_ERR,
140                                "Unrecognized mount option \"%s\" "