Whamcloud - gitweb
b=20581 MDS returns full hash for readdir to decrease hash collision
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-export-64bit-name-hash.patch
1 Index: linux-2.6.18-194.17.1-ext3/fs/ext3/dir.c
2 ===================================================================
3 --- linux-2.6.18-194.17.1-ext3.orig/fs/ext3/dir.c       2010-11-30 22:46:09.000000000 +0300
4 +++ linux-2.6.18-194.17.1-ext3/fs/ext3/dir.c    2010-12-16 00:10:12.000000000 +0300
5 @@ -240,19 +240,34 @@ out:
6  /*
7   * These functions convert from the major/minor hash to an f_pos
8   * value.
9 - * 
10 - * Currently we only use major hash numer.  This is unfortunate, but
11 - * on 32-bit machines, the same VFS interface is used for lseek and
12 - * llseek, so if we use the 64 bit offset, then the 32-bit versions of
13 - * lseek/telldir/seekdir will blow out spectacularly, and from within
14 - * the ext2 low-level routine, we don't know if we're being called by
15 - * a 64-bit version of the system call or the 32-bit version of the
16 - * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
17 - * cookie.  Sigh.
18 + *
19 + * Whether 64-bit or 32-bit hash value is exported as file pos is
20 + * controlled by "64bithash" mount option.
21   */
22 -#define hash2pos(major, minor) (major >> 1)
23 -#define pos2maj_hash(pos)      ((pos << 1) & 0xffffffff)
24 -#define pos2min_hash(pos)      (0)
25 +
26 +static inline loff_t hash2pos(struct super_block *sb, __u32 major, __u32 minor)
27 +{
28 +       if (test_opt(sb, 64BITHASH))
29 +               return (((__u64)(major >> 1) << 32) | (__u64)minor);
30 +       else
31 +               return (major >> 1);
32 +}
33 +
34 +static inline __u32 pos2maj_hash(struct super_block *sb, loff_t pos)
35 +{
36 +       if (test_opt(sb, 64BITHASH))
37 +               return (((pos >> 32) << 1) & 0xffffffff);
38 +       else
39 +               return ((pos << 1) & 0xffffffff);
40 +}
41 +
42 +static inline __u32 pos2min_hash(struct super_block *sb, loff_t pos)
43 +{
44 +       if (test_opt(sb, 64BITHASH))
45 +               return (pos & 0xffffffff);
46 +       else
47 +               return (0);
48 +}
49  
50  /*
51   * This structure holds the nodes of the red-black tree used to store
52 @@ -314,7 +329,7 @@ static void free_rb_tree_fname(struct rb
53  }
54  
55  
56 -static struct dir_private_info *create_dir_info(loff_t pos)
57 +static struct dir_private_info *create_dir_info(struct super_block *sb, loff_t pos)
58  {
59         struct dir_private_info *p;
60  
61 @@ -325,8 +340,8 @@ static struct dir_private_info *create_d
62         p->curr_node = NULL;
63         p->extra_fname = NULL;
64         p->last_pos = 0;
65 -       p->curr_hash = pos2maj_hash(pos);
66 -       p->curr_minor_hash = pos2min_hash(pos);
67 +       p->curr_hash = pos2maj_hash(sb, pos);
68 +       p->curr_minor_hash = pos2min_hash(sb, pos);
69         p->next_hash = 0;
70         return p;
71  }
72 @@ -422,7 +437,7 @@ static int call_filldir(struct file * fi
73                 printk("call_filldir: called with null fname?!?\n");
74                 return 0;
75         }
76 -       curr_pos = hash2pos(fname->hash, fname->minor_hash);
77 +       curr_pos = hash2pos(sb, fname->hash, fname->minor_hash);
78         while (fname) {
79                 error = filldir(dirent, fname->name,
80                                 fname->name_len, curr_pos, 
81 @@ -447,7 +462,7 @@ static int ext3_dx_readdir(struct file *
82         int     ret;
83  
84         if (!info) {
85 -               info = create_dir_info(filp->f_pos);
86 +               info = create_dir_info(inode->i_sb, filp->f_pos);
87                 if (!info)
88                         return -ENOMEM;
89                 filp->private_data = info;
90 @@ -461,8 +476,8 @@ static int ext3_dx_readdir(struct file *
91                 free_rb_tree_fname(&info->root);
92                 info->curr_node = NULL;
93                 info->extra_fname = NULL;
94 -               info->curr_hash = pos2maj_hash(filp->f_pos);
95 -               info->curr_minor_hash = pos2min_hash(filp->f_pos);
96 +               info->curr_hash = pos2maj_hash(inode->i_sb, filp->f_pos);
97 +               info->curr_minor_hash = pos2min_hash(inode->i_sb, filp->f_pos);
98         }
99  
100         /*
101 Index: linux-2.6.18-194.17.1-ext3/fs/ext3/super.c
102 ===================================================================
103 --- linux-2.6.18-194.17.1-ext3.orig/fs/ext3/super.c     2010-11-30 22:48:01.000000000 +0300
104 +++ linux-2.6.18-194.17.1-ext3/fs/ext3/super.c  2010-12-16 00:11:59.000000000 +0300
105 @@ -742,6 +742,7 @@ enum {
106         Opt_grpquota,
107         Opt_extents, Opt_noextents, Opt_bigendian_extents, Opt_extdebug,
108         Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_maxdirsize, Opt_force_over_8tb,
109 +       Opt_64bithash,
110  };
111  
112  static match_table_t tokens = {
113 @@ -808,6 +809,7 @@ static match_table_t tokens = {
114         {Opt_force_over_8tb, "force_over_8tb"},
115         {Opt_resize, "resize"},
116         {Opt_maxdirsize, "maxdirsize=%u"},
117 +       {Opt_64bithash, "64bithash"},
118         {Opt_err, NULL}
119  };
120  
121 @@ -1195,6 +1197,9 @@ clear_qf_name:
122                 case Opt_force_over_8tb:
123                         force_over_8tb = 1;
124                         break;
125 +               case Opt_64bithash:
126 +                       set_opt(sbi->s_mount_opt, 64BITHASH);
127 +                       break;
128                 default:
129                         printk (KERN_ERR
130                                 "EXT3-fs: Unrecognized mount option \"%s\" "
131 Index: linux-2.6.18-194.17.1-ext3/include/linux/ext3_fs.h
132 ===================================================================
133 --- linux-2.6.18-194.17.1-ext3.orig/include/linux/ext3_fs.h     2010-11-30 22:52:58.000000000 +0300
134 +++ linux-2.6.18-194.17.1-ext3/include/linux/ext3_fs.h  2010-12-16 00:12:45.000000000 +0300
135 @@ -483,6 +483,8 @@ do {                                                                               \
136  #define EXT3_MOUNT_JOURNAL_ASYNC_COMMIT 0x20000000 /* Journal Async Commit */
137  #endif
138  
139 +#define EXT3_MOUNT_64BITHASH            0x40000000 /* export 64-bit name hash */
140 +
141  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
142  #ifndef clear_opt
143  #define clear_opt(o, opt)              o &= ~EXT3_MOUNT_##opt