Whamcloud - gitweb
0d9342165bb8639e15c97de74370f0b45a5eb714
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel6.3 / ext4-large-eas.patch
1 Index: linux-stage/fs/ext4/ext4.h
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/ext4.h
4 +++ linux-stage/fs/ext4/ext4.h
5 @@ -1329,6 +1329,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
6  #define EXT4_FEATURE_INCOMPAT_64BIT            0x0080
7  #define EXT4_FEATURE_INCOMPAT_MMP               0x0100
8  #define EXT4_FEATURE_INCOMPAT_FLEX_BG          0x0200
9 +#define EXT4_FEATURE_INCOMPAT_EA_INODE         0x0400
10  #define EXT4_FEATURE_INCOMPAT_DIRDATA          0x1000
11  
12  #define EXT4_FEATURE_COMPAT_SUPP       EXT2_FEATURE_COMPAT_EXT_ATTR
13 @@ -1338,6 +1339,7 @@ EXT4_INODE_BIT_FNS(state, state_flags)
14                                          EXT4_FEATURE_INCOMPAT_EXTENTS| \
15                                          EXT4_FEATURE_INCOMPAT_64BIT| \
16                                          EXT4_FEATURE_INCOMPAT_FLEX_BG| \
17 +                                        EXT4_FEATURE_INCOMPAT_EA_INODE| \
18                                          EXT4_FEATURE_INCOMPAT_MMP| \
19                                          EXT4_FEATURE_INCOMPAT_DIRDATA)
20  
21 @@ -1695,6 +1697,12 @@ struct mmpd_data {
22  #define EXT4_MMP_MAX_CHECK_INTERVAL    300UL
23  
24  /*
25 + * Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
26 + * This limit is arbitrary, but is reasonable for the xattr API.
27 + */
28 +#define EXT4_XATTR_MAX_LARGE_EA_SIZE    (1024 * 1024)
29 +
30 +/*
31   * Function prototypes
32   */
33  
34 @@ -1706,6 +1714,10 @@ struct mmpd_data {
35  # define ATTRIB_NORET  __attribute__((noreturn))
36  # define NORET_AND     noreturn,
37  
38 +struct ext4_xattr_ino_array {
39 +       unsigned int xia_count;         /* # of used item in the array */
40 +       unsigned int xia_inodes[0];
41 +};
42  /* bitmap.c */
43  extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
44  
45 Index: linux-stage/fs/ext4/xattr.c
46 ===================================================================
47 --- linux-stage.orig/fs/ext4/xattr.c
48 +++ linux-stage/fs/ext4/xattr.c
49 @@ -168,19 +168,26 @@ ext4_xattr_check_block(struct buffer_hea
50  }
51  
52  static inline int
53 -ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
54 +ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size,
55 +                      struct inode *inode)
56  {
57         size_t value_size = le32_to_cpu(entry->e_value_size);
58  
59 -       if (entry->e_value_block != 0 || value_size > size ||
60 -           le16_to_cpu(entry->e_value_offs) + value_size > size)
61 +       if ((entry->e_value_inum == 0) &&
62 +          (le16_to_cpu(entry->e_value_offs) + value_size > size))
63 +               return -EIO;
64 +       if (entry->e_value_inum != 0 &&
65 +           (le32_to_cpu(entry->e_value_inum) < EXT4_FIRST_INO(inode->i_sb) ||
66 +            le32_to_cpu(entry->e_value_inum) >
67 +            le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_inodes_count)))
68                 return -EIO;
69         return 0;
70  }
71  
72  static int
73  ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
74 -                     const char *name, size_t size, int sorted)
75 +                     const char *name, size_t size, int sorted,
76 +                     struct inode *inode)
77  {
78         struct ext4_xattr_entry *entry;
79         size_t name_len;
80 @@ -200,11 +207,104 @@ ext4_xattr_find_entry(struct ext4_xattr_
81                         break;
82         }
83         *pentry = entry;
84 -       if (!cmp && ext4_xattr_check_entry(entry, size))
85 +       if (!cmp && ext4_xattr_check_entry(entry, size, inode))
86                         return -EIO;
87         return cmp ? -ENODATA : 0;
88  }
89  
90 +/*
91 + * Read the EA value from an inode.
92 + */
93 +static int
94 +ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size)
95 +{
96 +       unsigned long block = 0;
97 +       struct buffer_head *bh = NULL;
98 +       int err, blocksize;
99 +       size_t csize, ret_size = 0;
100 +
101 +       if (*size == 0)
102 +               return 0;
103 +
104 +       blocksize = ea_inode->i_sb->s_blocksize;
105 +
106 +       while (ret_size < *size) {
107 +               csize = (*size - ret_size) > blocksize ? blocksize :
108 +                                                       *size - ret_size;
109 +               bh = ext4_bread(NULL, ea_inode, block, 0, &err);
110 +               if (!bh) {
111 +                       *size = ret_size;
112 +                       return err;
113 +               }
114 +               memcpy(buf, bh->b_data, csize);
115 +               brelse(bh);
116 +
117 +               buf += csize;
118 +               block += 1;
119 +               ret_size += csize;
120 +       }
121 +
122 +       *size = ret_size;
123 +
124 +       return err;
125 +}
126 +
127 +struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, int *err)
128 +{
129 +       struct inode *ea_inode = NULL;
130 +
131 +       ea_inode = ext4_iget(parent->i_sb, ea_ino);
132 +       if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) {
133 +               int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0;
134 +               ext4_error(parent->i_sb, "error while reading EA inode %lu "
135 +                          "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode));
136 +               *err = rc != 0 ? rc : -EIO;
137 +               return NULL;
138 +       }
139 +
140 +       if (ea_inode->i_xattr_inode_parent != parent->i_ino ||
141 +           ea_inode->i_generation != parent->i_generation) {
142 +               ext4_error(parent->i_sb, "Backpointer from EA inode %lu "
143 +                          "to parent invalid.", ea_ino);
144 +               *err = -EINVAL;
145 +               goto error;
146 +       }
147 +
148 +       if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) {
149 +               ext4_error(parent->i_sb, "EA inode %lu does not have "
150 +                          "EXT4_EA_INODE_FL flag set.\n", ea_ino);
151 +               *err = -EINVAL;
152 +               goto error;
153 +       }
154 +
155 +       *err = 0;
156 +       return ea_inode;
157 +
158 +error:
159 +       iput(ea_inode);
160 +       return NULL;
161 +}
162 +
163 +/*
164 + * Read the value from the EA inode.
165 + */
166 +static int
167 +ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer,
168 +                    size_t *size)
169 +{
170 +       struct inode *ea_inode = NULL;
171 +       int err;
172 +
173 +       ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
174 +       if (err)
175 +               return err;
176 +
177 +       err = ext4_xattr_inode_read(ea_inode, buffer, size);
178 +       iput(ea_inode);
179 +
180 +       return err;
181 +}
182 +
183  static int
184  ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
185                      void *buffer, size_t buffer_size)
186 @@ -236,7 +335,8 @@ bad_block:
187         }
188         ext4_xattr_cache_insert(bh);
189         entry = BFIRST(bh);
190 -       error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
191 +       error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
192 +                                     inode);
193         if (error == -EIO)
194                 goto bad_block;
195         if (error)
196 @@ -246,8 +346,16 @@ bad_block:
197                 error = -ERANGE;
198                 if (size > buffer_size)
199                         goto cleanup;
200 -               memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
201 -                      size);
202 +               if (entry->e_value_inum != 0) {
203 +                       error = ext4_xattr_inode_get(inode,
204 +                                            le32_to_cpu(entry->e_value_inum),
205 +                                            buffer, &size);
206 +                       if (error)
207 +                               goto cleanup;
208 +               } else {
209 +                       memcpy(buffer, bh->b_data +
210 +                              le16_to_cpu(entry->e_value_offs), size);
211 +               }
212         }
213         error = size;
214  
215 @@ -281,7 +389,7 @@ ext4_xattr_ibody_get(struct inode *inode
216         if (error)
217                 goto cleanup;
218         error = ext4_xattr_find_entry(&entry, name_index, name,
219 -                                     end - (void *)entry, 0);
220 +                                     end - (void *)entry, 0, inode);
221         if (error)
222                 goto cleanup;
223         size = le32_to_cpu(entry->e_value_size);
224 @@ -289,8 +397,16 @@ ext4_xattr_ibody_get(struct inode *inode
225                 error = -ERANGE;
226                 if (size > buffer_size)
227                         goto cleanup;
228 -               memcpy(buffer, (void *)IFIRST(header) +
229 -                      le16_to_cpu(entry->e_value_offs), size);
230 +               if (entry->e_value_inum != 0) {
231 +                       error = ext4_xattr_inode_get(inode,
232 +                                            le32_to_cpu(entry->e_value_inum),
233 +                                            buffer, &size);
234 +                       if (error)
235 +                               goto cleanup;
236 +               } else {
237 +                       memcpy(buffer, (void *)IFIRST(header) +
238 +                              le16_to_cpu(entry->e_value_offs), size);
239 +               }
240         }
241         error = size;
242  
243 @@ -513,7 +629,7 @@ static size_t ext4_xattr_free_space(stru
244  {
245         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
246                 *total += EXT4_XATTR_LEN(last->e_name_len);
247 -               if (!last->e_value_block && last->e_value_size) {
248 +               if (last->e_value_inum == 0 && last->e_value_size > 0) {
249                         size_t offs = le16_to_cpu(last->e_value_offs);
250                         if (offs < *min_offs)
251                                 *min_offs = offs;
252 @@ -522,11 +638,159 @@ static size_t ext4_xattr_free_space(stru
253         return (*min_offs - ((void *)last - base) - sizeof(__u32));
254  }
255  
256 +/*
257 + * Write the value of the EA in an inode.
258 + */
259 +static int
260 +ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
261 +                      const void *buf, int bufsize)
262 +{
263 +       struct buffer_head *bh = NULL, dummy;
264 +       unsigned long block = 0;
265 +       unsigned blocksize = ea_inode->i_sb->s_blocksize;
266 +       unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
267 +       int csize, wsize = 0;
268 +       int ret = 0;
269 +       int retries = 0;
270 +
271 +retry:
272 +       while (ret >= 0 && ret < max_blocks) {
273 +               block += ret;
274 +               max_blocks -= ret;
275 +
276 +               ret = ext4_get_blocks(handle, ea_inode, block, max_blocks,
277 +                                     &dummy, EXT4_GET_BLOCKS_CREATE);
278 +               if (ret <= 0) {
279 +                       ext4_mark_inode_dirty(handle, ea_inode);
280 +                       if (ret == -ENOSPC &&
281 +                           ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
282 +                               ret = 0;
283 +                               goto retry;
284 +                       }
285 +                       break;
286 +               }
287 +       }
288 +
289 +       if (ret < 0)
290 +               return ret;
291 +
292 +       block = 0;
293 +       while (wsize < bufsize) {
294 +               if (bh != NULL)
295 +                       brelse(bh);
296 +               csize = (bufsize - wsize) > blocksize ? blocksize :
297 +                                                               bufsize - wsize;
298 +               bh = ext4_getblk(handle, ea_inode, block, 0, &ret);
299 +               if (!bh)
300 +                       goto out;
301 +               ret = ext4_journal_get_write_access(handle, bh);
302 +               if (ret)
303 +                       goto out;
304 +
305 +               memcpy(bh->b_data, buf, csize);
306 +               set_buffer_uptodate(bh);
307 +               ext4_handle_dirty_metadata(handle, ea_inode, bh);
308 +
309 +               buf += csize;
310 +               wsize += csize;
311 +               block += 1;
312 +       }
313 +
314 +       i_size_write(ea_inode, wsize);
315 +       ext4_update_i_disksize(ea_inode, wsize);
316 +
317 +       ext4_mark_inode_dirty(handle, ea_inode);
318 +
319 +out:
320 +       brelse(bh);
321 +
322 +       return ret;
323 +}
324 +
325 +/*
326 + * Create an inode to store the value of a large EA.
327 + */
328 +static struct inode *
329 +ext4_xattr_inode_create(handle_t *handle, struct inode *inode)
330 +{
331 +       struct inode *ea_inode = NULL;
332 +
333 +       /*
334 +        * Let the next inode be the goal, so we try and allocate the EA inode
335 +        * in the same group, or nearby one.
336 +        */
337 +       ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
338 +                                 S_IFREG|0600, NULL, inode->i_ino + 1);
339 +
340 +       if (!IS_ERR(ea_inode)) {
341 +               ea_inode->i_op = &ext4_file_inode_operations;
342 +               ea_inode->i_fop = &ext4_file_operations;
343 +               ext4_set_aops(ea_inode);
344 +               ea_inode->i_generation = inode->i_generation;
345 +               EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL;
346 +
347 +               /*
348 +                * A back-pointer from EA inode to parent inode will be useful
349 +                * for e2fsck.
350 +                */
351 +               ea_inode->i_xattr_inode_parent = inode->i_ino;
352 +               unlock_new_inode(ea_inode);
353 +       }
354 +
355 +       return ea_inode;
356 +}
357 +
358 +/*
359 + * Unlink the inode storing the value of the EA.
360 + */
361 +int
362 +ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
363 +{
364 +       struct inode *ea_inode = NULL;
365 +       int err;
366 +
367 +       ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
368 +       if (err)
369 +               return err;
370 +
371 +       ea_inode->i_nlink = 0;
372 +       iput(ea_inode);
373 +
374 +       return 0;
375 +}
376 +
377 +/*
378 + * Add value of the EA in an inode.
379 + */
380 +static int
381 +ext4_xattr_inode_set(handle_t *handle, struct inode *inode, unsigned long *ea_ino,
382 +                    const void *value, size_t value_len)
383 +{
384 +       struct inode *ea_inode = NULL;
385 +       int err;
386 +
387 +       /* Create an inode for the EA value */
388 +       ea_inode = ext4_xattr_inode_create(handle, inode);
389 +       if (IS_ERR(ea_inode))
390 +               return -1;
391 +
392 +       err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
393 +       if (err)
394 +               ea_inode->i_nlink = 0;
395 +       else
396 +               *ea_ino = ea_inode->i_ino;
397 +
398 +       iput(ea_inode);
399 +
400 +       return err;
401 +}
402 +
403  struct ext4_xattr_info {
404 -       int name_index;
405         const char *name;
406         const void *value;
407         size_t value_len;
408 +       int name_index;
409 +       int in_inode;
410  };
411  
412  struct ext4_xattr_search {
413 @@ -538,15 +802,23 @@ struct ext4_xattr_search {
414  };
415  
416  static int
417 -ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
418 +ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s,
419 +                    handle_t *handle, struct inode *inode)
420  {
421         struct ext4_xattr_entry *last;
422         size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
423 +       int in_inode = i->in_inode;
424 +
425 +       if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
426 +                EXT4_FEATURE_INCOMPAT_EA_INODE) &&
427 +           (EXT4_XATTR_SIZE(i->value_len) >
428 +            EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
429 +               in_inode = 1;
430  
431         /* Compute min_offs and last. */
432         last = s->first;
433         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
434 -               if (!last->e_value_block && last->e_value_size) {
435 +               if (last->e_value_inum == 0 && last->e_value_size > 0) {
436                         size_t offs = le16_to_cpu(last->e_value_offs);
437                         if (offs < min_offs)
438                                 min_offs = offs;
439 @@ -554,16 +826,21 @@ ext4_xattr_set_entry(struct ext4_xattr_i
440         }
441         free = min_offs - ((void *)last - s->base) - sizeof(__u32);
442         if (!s->not_found) {
443 -               if (!s->here->e_value_block && s->here->e_value_size) {
444 +               if (!in_inode && s->here->e_value_inum == 0 &&
445 +                   s->here->e_value_size > 0) {
446                         size_t size = le32_to_cpu(s->here->e_value_size);
447                         free += EXT4_XATTR_SIZE(size);
448                 }
449                 free += EXT4_XATTR_LEN(name_len);
450         }
451         if (i->value) {
452 -               if (free < EXT4_XATTR_SIZE(i->value_len) ||
453 -                   free < EXT4_XATTR_LEN(name_len) +
454 -                          EXT4_XATTR_SIZE(i->value_len))
455 +               size_t value_len = EXT4_XATTR_SIZE(i->value_len);
456 +
457 +               if (in_inode)
458 +                       value_len = 0;
459 +
460 +               if (free < value_len ||
461 +                   free < EXT4_XATTR_LEN(name_len) + value_len)
462                         return -ENOSPC;
463         }
464  
465 @@ -577,7 +854,8 @@ ext4_xattr_set_entry(struct ext4_xattr_i
466                 s->here->e_name_len = name_len;
467                 memcpy(s->here->e_name, i->name, name_len);
468         } else {
469 -               if (!s->here->e_value_block && s->here->e_value_size) {
470 +               if (s->here->e_value_offs > 0 && s->here->e_value_inum == 0 &&
471 +                   s->here->e_value_size > 0) {
472                         void *first_val = s->base + min_offs;
473                         size_t offs = le16_to_cpu(s->here->e_value_offs);
474                         void *val = s->base + offs;
475 @@ -606,13 +884,18 @@ ext4_xattr_set_entry(struct ext4_xattr_i
476                         last = s->first;
477                         while (!IS_LAST_ENTRY(last)) {
478                                 size_t o = le16_to_cpu(last->e_value_offs);
479 -                               if (!last->e_value_block &&
480 +                               if (!last->e_value_inum &&
481                                     last->e_value_size && o < offs)
482                                         last->e_value_offs =
483                                                 cpu_to_le16(o + size);
484                                 last = EXT4_XATTR_NEXT(last);
485                         }
486                 }
487 +               if (s->here->e_value_inum != 0) {
488 +                       ext4_xattr_inode_unlink(inode,
489 +                                       le32_to_cpu(s->here->e_value_inum));
490 +                       s->here->e_value_inum = 0;
491 +               }
492                 if (!i->value) {
493                         /* Remove the old name. */
494                         size_t size = EXT4_XATTR_LEN(name_len);
495 @@ -626,10 +908,17 @@ ext4_xattr_set_entry(struct ext4_xattr_i
496         if (i->value) {
497                 /* Insert the new value. */
498                 s->here->e_value_size = cpu_to_le32(i->value_len);
499 -               if (i->value_len) {
500 +               if (in_inode) {
501 +                       unsigned long ea_ino = le32_to_cpu(s->here->e_value_inum);
502 +                       ext4_xattr_inode_set(handle, inode, &ea_ino, i->value,
503 +                                            i->value_len);
504 +                       s->here->e_value_inum = cpu_to_le32(ea_ino);
505 +                       s->here->e_value_offs = 0;
506 +               } else if (i->value_len) {
507                         size_t size = EXT4_XATTR_SIZE(i->value_len);
508                         void *val = s->base + min_offs - size;
509                         s->here->e_value_offs = cpu_to_le16(min_offs - size);
510 +                       s->here->e_value_inum = 0;
511                         memset(val + size - EXT4_XATTR_PAD, 0,
512                                EXT4_XATTR_PAD); /* Clear the pad bytes. */
513                         memcpy(val, i->value, i->value_len);
514 @@ -674,7 +963,7 @@ ext4_xattr_block_find(struct inode *inod
515                 bs->s.end = bs->bh->b_data + bs->bh->b_size;
516                 bs->s.here = bs->s.first;
517                 error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
518 -                                             i->name, bs->bh->b_size, 1);
519 +                                            i->name, bs->bh->b_size, 1, inode);
520                 if (error && error != -ENODATA)
521                         goto cleanup;
522                 bs->s.not_found = error;
523 @@ -698,8 +987,6 @@ ext4_xattr_block_set(handle_t *handle, s
524  
525  #define header(x) ((struct ext4_xattr_header *)(x))
526  
527 -       if (i->value && i->value_len > sb->s_blocksize)
528 -               return -ENOSPC;
529         if (s->base) {
530                 ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
531                                         bs->bh->b_blocknr);
532 @@ -714,7 +1001,7 @@ ext4_xattr_block_set(handle_t *handle, s
533                                 ce = NULL;
534                         }
535                         ea_bdebug(bs->bh, "modifying in-place");
536 -                       error = ext4_xattr_set_entry(i, s);
537 +                       error = ext4_xattr_set_entry(i, s, handle, inode);
538                         if (!error) {
539                                 if (!IS_LAST_ENTRY(s->first))
540                                         ext4_xattr_rehash(header(s->base),
541 @@ -766,7 +1053,7 @@ ext4_xattr_block_set(handle_t *handle, s
542                 s->end = s->base + sb->s_blocksize;
543         }
544  
545 -       error = ext4_xattr_set_entry(i, s);
546 +       error = ext4_xattr_set_entry(i, s, handle, inode);
547         if (error == -EIO)
548                 goto bad_block;
549         if (error)
550 @@ -917,7 +1204,7 @@ ext4_xattr_ibody_find(struct inode *inod
551                 /* Find the named attribute. */
552                 error = ext4_xattr_find_entry(&is->s.here, i->name_index,
553                                               i->name, is->s.end -
554 -                                             (void *)is->s.base, 0);
555 +                                             (void *)is->s.base, 0, inode);
556                 if (error && error != -ENODATA)
557                         return error;
558                 is->s.not_found = error;
559 @@ -936,7 +1223,7 @@ ext4_xattr_ibody_set(handle_t *handle, s
560  
561         if (EXT4_I(inode)->i_extra_isize == 0)
562                 return -ENOSPC;
563 -       error = ext4_xattr_set_entry(i, s);
564 +       error = ext4_xattr_set_entry(i, s, handle, inode);
565         if (error)
566                 return error;
567         header = IHDR(inode, ext4_raw_inode(&is->iloc));
568 @@ -972,7 +1259,7 @@ ext4_xattr_set_handle(handle_t *handle, 
569                 .name = name,
570                 .value = value,
571                 .value_len = value_len,
572 -
573 +               .in_inode = 0,
574         };
575         struct ext4_xattr_ibody_find is = {
576                 .s = { .not_found = -ENODATA, },
577 @@ -1041,6 +1328,15 @@ ext4_xattr_set_handle(handle_t *handle, 
578                                         goto cleanup;
579                         }
580                         error = ext4_xattr_block_set(handle, inode, &i, &bs);
581 +                       if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
582 +                                       EXT4_FEATURE_INCOMPAT_EA_INODE) &&
583 +                           error == -ENOSPC) {
584 +                               /* xattr not fit to block, store at external
585 +                                * inode */
586 +                               i.in_inode = 1;
587 +                               error = ext4_xattr_ibody_set(handle, inode,
588 +                                                            &i, &is);
589 +                       }
590                         if (error)
591                                 goto cleanup;
592                         if (!is.s.not_found) {
593 @@ -1087,10 +1383,25 @@ ext4_xattr_set(struct inode *inode, int 
594                const void *value, size_t value_len, int flags)
595  {
596         handle_t *handle;
597 +       struct super_block *sb = inode->i_sb;
598 +       int buffer_credits;
599         int error, retries = 0;
600  
601 +       buffer_credits = EXT4_DATA_TRANS_BLOCKS(sb);
602 +       if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) &&
603 +           EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EA_INODE)) {
604 +               int nrblocks = (value_len + sb->s_blocksize - 1) >>
605 +                                       sb->s_blocksize_bits;
606 +
607 +               /* For new inode */
608 +               buffer_credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
609 +
610 +               /* For data blocks of EA inode */
611 +               buffer_credits += ext4_meta_trans_blocks(inode, nrblocks, 0);
612 +       }
613 +
614  retry:
615 -       handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
616 +       handle = ext4_journal_start(inode, buffer_credits);
617         if (IS_ERR(handle)) {
618                 error = PTR_ERR(handle);
619         } else {
620 @@ -1100,7 +1411,7 @@ retry:
621                                               value, value_len, flags);
622                 error2 = ext4_journal_stop(handle);
623                 if (error == -ENOSPC &&
624 -                   ext4_should_retry_alloc(inode->i_sb, &retries))
625 +                   ext4_should_retry_alloc(sb, &retries))
626                         goto retry;
627                 if (error == 0)
628                         error = error2;
629 @@ -1122,7 +1433,7 @@ static void ext4_xattr_shift_entries(str
630  
631         /* Adjust the value offsets of the entries */
632         for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
633 -               if (!last->e_value_block && last->e_value_size) {
634 +               if (last->e_value_inum == 0 && last->e_value_size > 0) {
635                         new_offs = le16_to_cpu(last->e_value_offs) +
636                                                         value_offs_shift;
637                         BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
638 @@ -1355,22 +1666,135 @@ cleanup:
639         return error;
640  }
641  
642 +#define EIA_INCR 16 /* must be 2^n */
643 +#define EIA_MASK (EIA_INCR - 1)
644 +/* Add the large xattr @ino into @lea_ino_array for later deletion.
645 + * If @lea_ino_array is new or full it will be grown and the old
646 + * contents copied over.
647 + */
648 +static int
649 +ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino)
650 +{
651 +       if (*lea_ino_array == NULL) {
652 +               /*
653 +                * Start with 15 inodes, so it fits into a power-of-two size.
654 +                * If *lea_ino_array is NULL, this is essentially offsetof()
655 +                */
656 +               (*lea_ino_array) =
657 +                       kmalloc(offsetof(struct ext4_xattr_ino_array,
658 +                                        xia_inodes[EIA_MASK]),
659 +                               GFP_NOFS);
660 +               if (*lea_ino_array == NULL)
661 +                       return -ENOMEM;
662 +               (*lea_ino_array)->xia_count = 0;
663 +       } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) {
664 +               /* expand the array once all 15 + n * 16 slots are full */
665 +               struct ext4_xattr_ino_array *new_array = NULL;
666 +               int count = (*lea_ino_array)->xia_count;
667 +
668 +               /* if new_array is NULL, this is essentially offsetof() */
669 +               new_array = kmalloc(
670 +                               offsetof(struct ext4_xattr_ino_array,
671 +                                        xia_inodes[count + EIA_INCR]),
672 +                               GFP_NOFS);
673 +               if (new_array == NULL)
674 +                       return -ENOMEM;
675 +               memcpy(new_array, *lea_ino_array,
676 +                      offsetof(struct ext4_xattr_ino_array,
677 +                               xia_inodes[count]));
678 +               kfree(*lea_ino_array);
679 +               *lea_ino_array = new_array;
680 +       }
681 +       (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino;
682 +       return 0;
683 +}
684  
685 +/**
686 + * Add xattr inode to orphan list
687 + */
688 +static int
689 +ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode,
690 +                       int credits, struct ext4_xattr_ino_array *lea_ino_array)
691 +{
692 +       struct inode *ea_inode = NULL;
693 +       int idx = 0, error = 0;
694 +
695 +       if (lea_ino_array == NULL)
696 +               return 0;
697 +
698 +       for (; idx < lea_ino_array->xia_count; ++idx) {
699 +               if (!ext4_handle_has_enough_credits(handle, credits)) {
700 +                       error = ext4_journal_extend(handle, credits);
701 +                       if (error > 0)
702 +                               error = ext4_journal_restart(handle, credits);
703 +
704 +                       if (error != 0) {
705 +                               ext4_warning(inode->i_sb,
706 +                                       "couldn't extend journal "
707 +                                       "(err %d)", error);
708 +                               return error;
709 +                       }
710 +               }
711 +               ea_inode = ext4_xattr_inode_iget(inode,
712 +                               lea_ino_array->xia_inodes[idx], &error);
713 +               if (error)
714 +                       continue;
715 +               ext4_orphan_add(handle, ea_inode);
716 +               /* the inode's i_count will be released by caller */
717 +       }
718 +
719 +       return 0;
720 +}
721  
722  /*
723   * ext4_xattr_delete_inode()
724   *
725 - * Free extended attribute resources associated with this inode. This
726 + * Free extended attribute resources associated with this inode. Traverse
727 + * all entries and unlink any xattr inodes associated with this inode. This
728   * is called immediately before an inode is freed. We have exclusive
729 - * access to the inode.
730 + * access to the inode. If an orphan inode is deleted it will also delete any
731 + * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget()
732 + * to ensure they belong to the parent inode and were not deleted already.
733   */
734 -void
735 -ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
736 +int
737 +ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
738 +                       struct ext4_xattr_ino_array **lea_ino_array)
739  {
740         struct buffer_head *bh = NULL;
741 +       struct ext4_xattr_ibody_header *header;
742 +       struct ext4_inode *raw_inode;
743 +       struct ext4_iloc iloc;
744 +       struct ext4_xattr_entry *entry;
745 +       int credits = 3, error = 0;
746  
747 -       if (!EXT4_I(inode)->i_file_acl)
748 +       if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
749 +               goto delete_external_ea;
750 +
751 +       error = ext4_get_inode_loc(inode, &iloc);
752 +       if (error)
753 +               goto cleanup;
754 +       raw_inode = ext4_raw_inode(&iloc);
755 +       header = IHDR(inode, raw_inode);
756 +       entry = IFIRST(header);
757 +       for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
758 +               if (entry->e_value_inum == 0)
759 +                       continue;
760 +               if (ext4_expand_ino_array(lea_ino_array,
761 +                                         entry->e_value_inum) != 0) {
762 +                       brelse(iloc.bh);
763 +                       goto cleanup;
764 +               }
765 +               entry->e_value_inum = 0;
766 +       }
767 +       brelse(iloc.bh);
768 +
769 +delete_external_ea:
770 +       if (!EXT4_I(inode)->i_file_acl) {
771 +               /* add xattr inode to orphan list */
772 +               ext4_xattr_inode_orphan_add(handle, inode, credits,
773 +                                               *lea_ino_array);
774                 goto cleanup;
775 +       }
776         bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
777         if (!bh) {
778                 ext4_error(inode->i_sb, "inode %lu: block %llu read error",
779 @@ -1383,11 +1807,71 @@ ext4_xattr_delete_inode(handle_t *handle
780                            inode->i_ino, EXT4_I(inode)->i_file_acl);
781                 goto cleanup;
782         }
783 +
784 +       entry = BFIRST(bh);
785 +       for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
786 +               if (entry->e_value_inum == 0)
787 +                       continue;
788 +               if (ext4_expand_ino_array(lea_ino_array,
789 +                                         entry->e_value_inum) != 0)
790 +                       goto cleanup;
791 +               entry->e_value_inum = 0;
792 +       }
793 +
794 +       /* add xattr inode to orphan list */
795 +       error = ext4_xattr_inode_orphan_add(handle, inode, credits,
796 +                                       *lea_ino_array);
797 +       if (error != 0)
798 +               goto cleanup;
799 +
800 +       if (!IS_NOQUOTA(inode))
801 +               credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
802 +
803 +       if (!ext4_handle_has_enough_credits(handle, credits)) {
804 +               error = ext4_journal_extend(handle, credits);
805 +               if (error > 0)
806 +                       error = ext4_journal_restart(handle, credits);
807 +               if (error != 0) {
808 +                       ext4_warning(inode->i_sb,
809 +                               "couldn't extend journal (err %d)", error);
810 +                       goto cleanup;
811 +               }
812 +       }
813 +
814         ext4_xattr_release_block(handle, inode, bh);
815         EXT4_I(inode)->i_file_acl = 0;
816  
817  cleanup:
818         brelse(bh);
819 +
820 +       return error;
821 +}
822 +
823 +void
824 +ext4_xattr_inode_array_free(struct inode *inode,
825 +                           struct ext4_xattr_ino_array *lea_ino_array)
826 +{
827 +       struct inode    *ea_inode = NULL;
828 +       int             idx = 0;
829 +       int             err;
830 +
831 +       if (lea_ino_array == NULL)
832 +               return;
833 +
834 +       for (; idx < lea_ino_array->xia_count; ++idx) {
835 +               ea_inode = ext4_xattr_inode_iget(inode,
836 +                               lea_ino_array->xia_inodes[idx], &err);
837 +               if (err)
838 +                       continue;
839 +
840 +               /* for inode's i_count get from ext4_xattr_delete_inode */
841 +               if (!list_empty(&EXT4_I(ea_inode)->i_orphan))
842 +                       iput(ea_inode);
843 +
844 +               ea_inode->i_nlink = 0;
845 +               iput(ea_inode);
846 +       }
847 +       kfree(lea_ino_array);
848  }
849  
850  /*
851 @@ -1457,10 +1941,9 @@ ext4_xattr_cmp(struct ext4_xattr_header 
852                     entry1->e_name_index != entry2->e_name_index ||
853                     entry1->e_name_len != entry2->e_name_len ||
854                     entry1->e_value_size != entry2->e_value_size ||
855 +                   entry1->e_value_inum != entry2->e_value_inum ||
856                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
857                         return 1;
858 -               if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
859 -                       return -EIO;
860                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
861                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
862                            le32_to_cpu(entry1->e_value_size)))
863 @@ -1545,7 +2028,7 @@ static inline void ext4_xattr_hash_entry
864                        *name++;
865         }
866  
867 -       if (entry->e_value_block == 0 && entry->e_value_size != 0) {
868 +       if (entry->e_value_inum == 0 && entry->e_value_size != 0) {
869                 __le32 *value = (__le32 *)((char *)header +
870                         le16_to_cpu(entry->e_value_offs));
871                 for (n = (le32_to_cpu(entry->e_value_size) +
872 Index: linux-stage/fs/ext4/xattr.h
873 ===================================================================
874 --- linux-stage.orig/fs/ext4/xattr.h
875 +++ linux-stage/fs/ext4/xattr.h
876 @@ -38,7 +38,7 @@ struct ext4_xattr_entry {
877         __u8    e_name_len;     /* length of name */
878         __u8    e_name_index;   /* attribute name index */
879         __le16  e_value_offs;   /* offset in disk block of value */
880 -       __le32  e_value_block;  /* disk block attribute is stored on (n/i) */
881 +       __le32  e_value_inum;   /* inode in which the value is stored */
882         __le32  e_value_size;   /* size of attribute value */
883         __le32  e_hash;         /* hash value of name and value */
884         char    e_name[0];      /* attribute name */
885 @@ -63,6 +63,15 @@ struct ext4_xattr_entry {
886                 EXT4_I(inode)->i_extra_isize))
887  #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
888  
889 +#define i_xattr_inode_parent i_mtime.tv_sec
890 +
891 +/*
892 + * The minimum size of EA value when you start storing it in an external inode
893 + * size of block - size of header - size of 1 entry - 4 null bytes
894 +*/
895 +#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b)                                        \
896 +       ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
897 +
898  # ifdef CONFIG_EXT4_FS_XATTR
899  
900  extern struct xattr_handler ext4_xattr_user_handler;
901 @@ -77,7 +86,13 @@ extern int ext4_xattr_get(struct inode *
902  extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
903  extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
904  
905 -extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
906 +extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
907 +                                          int *err);
908 +extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
909 +extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
910 +                                  struct ext4_xattr_ino_array **array);
911 +extern void ext4_xattr_inode_array_free(struct inode *inode,
912 +                                       struct ext4_xattr_ino_array *array);
913  extern void ext4_xattr_put_super(struct super_block *);
914  
915  extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
916 @@ -111,9 +126,11 @@ ext4_xattr_set_handle(handle_t *handle, 
917         return -EOPNOTSUPP;
918  }
919  
920 -static inline void
921 -ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
922 +inline int
923 +ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
924 +                       struct ext4_xattr_ino_array **array)
925  {
926 +       return -EOPNOTSUPP;
927  }
928  
929  static inline void
930 Index: linux-stage/fs/ext4/inode.c
931 ===================================================================
932 --- linux-stage.orig/fs/ext4/inode.c
933 +++ linux-stage/fs/ext4/inode.c
934 @@ -222,6 +222,8 @@ void ext4_delete_inode(struct inode *ino
935  {
936         handle_t *handle;
937         int err;
938 +       int extra_credits = 3;
939 +       struct ext4_xattr_ino_array *lea_ino_array = NULL;
940  
941         if (ext4_should_order_data(inode))
942                 ext4_begin_ordered_truncate(inode, 0);
943 @@ -235,7 +237,8 @@ void ext4_delete_inode(struct inode *ino
944          * protection against it
945          */
946         sb_start_intwrite(inode->i_sb);
947 -       handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
948 +
949 +       handle = ext4_journal_start(inode, extra_credits);
950         if (IS_ERR(handle)) {
951                 ext4_std_error(inode->i_sb, PTR_ERR(handle));
952                 /*
953 @@ -247,9 +250,36 @@ void ext4_delete_inode(struct inode *ino
954                 sb_end_intwrite(inode->i_sb);
955                 goto no_delete;
956         }
957 -
958         if (IS_SYNC(inode))
959                 ext4_handle_sync(handle);
960 +
961 +       /*
962 +        * Delete xattr inode before deleting the main inode.
963 +        */
964 +       err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
965 +       if (err) {
966 +               ext4_warning(inode->i_sb,
967 +                            "couldn't delete inode's xattr (err %d)", err);
968 +               goto stop_handle;
969 +       }
970 +
971 +       if (!IS_NOQUOTA(inode))
972 +               extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
973 +
974 +       if (!ext4_handle_has_enough_credits(handle,
975 +                               blocks_for_truncate(inode) + extra_credits)) {
976 +               err = ext4_journal_extend(handle,
977 +                               blocks_for_truncate(inode) + extra_credits);
978 +               if (err > 0)
979 +                       err = ext4_journal_restart(handle,
980 +                               blocks_for_truncate(inode) + extra_credits);
981 +               if (err != 0) {
982 +                       ext4_warning(inode->i_sb,
983 +                                    "couldn't extend journal (err %d)", err);
984 +                       goto stop_handle;
985 +               }
986 +       }
987 +
988         inode->i_size = 0;
989         err = ext4_mark_inode_dirty(handle, inode);
990         if (err) {
991 @@ -303,8 +333,12 @@ void ext4_delete_inode(struct inode *ino
992                 clear_inode(inode);
993         else
994                 ext4_free_inode(handle, inode);
995 +
996         ext4_journal_stop(handle);
997         sb_end_intwrite(inode->i_sb);
998 +
999 +       if (lea_ino_array != NULL)
1000 +               ext4_xattr_inode_array_free(inode, lea_ino_array);
1001         return;
1002  no_delete:
1003         clear_inode(inode);     /* We must guarantee clearing of inode... */
1004 Index: linux-stage/fs/ext4/ialloc.c
1005 ===================================================================
1006 --- linux-stage.orig/fs/ext4/ialloc.c
1007 +++ linux-stage/fs/ext4/ialloc.c
1008 @@ -219,7 +219,6 @@ void ext4_free_inode(handle_t *handle, s
1009          * as writing the quota to disk may need the lock as well.
1010          */
1011         vfs_dq_init(inode);
1012 -       ext4_xattr_delete_inode(handle, inode);
1013         vfs_dq_free_inode(inode);
1014         vfs_dq_drop(inode);
1015