Whamcloud - gitweb
Branch b1_6
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre administrative quota format.
5  *
6  *  from
7  *  linux/fs/quota_v2.c
8  */
9
10 #ifndef EXPORT_SYMTAB
11 # define EXPORT_SYMTAB
12 #endif
13
14 #include <linux/errno.h>
15 #include <linux/fs.h>
16 #include <linux/mount.h>
17 #include <linux/kernel.h>
18 #include <linux/init.h>
19 #include <linux/module.h>
20 #include <linux/slab.h>
21 #include <linux/quotaio_v1.h>
22
23 #include <asm/byteorder.h>
24 #include <asm/uaccess.h>
25
26 #include <lustre_quota.h>
27 #include <obd_support.h>
28 #include "lustre_quota_fmt.h"
29
30 static const uint lustre_initqversions[][MAXQUOTAS] = {
31         [LUSTRE_QUOTA_V1] = LUSTRE_INITQVERSIONS,
32         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
33 };
34
35 static const int lustre_dqstrinblk[] = {
36         [LUSTRE_QUOTA_V1] = LUSTRE_DQSTRINBLK,
37         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
38 };
39
40 static const int lustre_disk_dqblk_sz[] = {
41         [LUSTRE_QUOTA_V1] = sizeof(struct lustre_disk_dqblk),
42         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
43 };
44
45 int check_quota_file(struct file *f, struct inode *inode, int type, 
46                      lustre_quota_version_t version)
47 {
48         struct lustre_disk_dqheader dqhead;
49         mm_segment_t fs;
50         ssize_t size;
51         loff_t offset = 0;
52         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
53         const uint *quota_versions = lustre_initqversions[version];
54
55         if (f) {
56                 fs = get_fs();
57                 set_fs(KERNEL_DS);
58                 size = f->f_op->read(f, (char *)&dqhead,
59                                      sizeof(struct lustre_disk_dqheader), 
60                                      &offset);
61                 set_fs(fs);
62         } else { 
63 #ifndef KERNEL_SUPPORTS_QUOTA_READ
64                 size = 0;
65 #else
66                 struct super_block *sb = inode->i_sb;
67                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
68                                             sizeof(struct lustre_disk_dqheader), 0);
69 #endif
70         }
71         if (size != sizeof(struct lustre_disk_dqheader))
72                 return -EINVAL;
73         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
74             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
75                 return -EINVAL;
76         return 0;
77 }
78
79 /* Check whether given file is really lustre admin quotafile */
80 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
81 {
82         struct file *f = lqi->qi_files[type];
83         return check_quota_file(f, NULL, type, lqi->qi_version);
84 }
85
86 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
87 {
88         mm_segment_t fs;
89         struct lustre_disk_dqinfo dinfo;
90         ssize_t size;
91         loff_t offset = LUSTRE_DQINFOOFF;
92
93         fs = get_fs();
94         set_fs(KERNEL_DS);
95         size = f->f_op->read(f, (char *)&dinfo, 
96                              sizeof(struct lustre_disk_dqinfo), &offset);
97         set_fs(fs);
98         if (size != sizeof(struct lustre_disk_dqinfo)) {
99                 CERROR("Can't read info structure on device %s.\n",
100                        f->f_vfsmnt->mnt_sb->s_id);
101                 return -EINVAL;
102         }
103         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
104         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
105         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
106         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
107         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
108         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
109         return 0;
110 }
111
112 /* Read information header from quota file */
113 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
114 {
115         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
116 }
117
118 /* Write information header to quota file */
119 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
120 {
121         mm_segment_t fs;
122         struct lustre_disk_dqinfo dinfo;
123         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
124         struct file *f = lqi->qi_files[type];
125         ssize_t size;
126         loff_t offset = LUSTRE_DQINFOOFF;
127
128         info->dqi_flags &= ~DQF_INFO_DIRTY;
129         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
130         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
131         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
132         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
133         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
134         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
135         fs = get_fs();
136         set_fs(KERNEL_DS);
137         size = f->f_op->write(f, (char *)&dinfo, 
138                               sizeof(struct lustre_disk_dqinfo), &offset);
139         set_fs(fs);
140         if (size != sizeof(struct lustre_disk_dqinfo)) {
141                 printk(KERN_WARNING
142                        "Can't write info structure on device %s.\n",
143                        f->f_vfsmnt->mnt_sb->s_id);
144                 return -1;
145         }
146         return 0;
147 }
148
149 #define DQ2MQ(v) ((sizeof(v) == sizeof(__u64)) ? \
150                 le64_to_cpu(v) : le32_to_cpu(v))
151
152 #define MQ2DQ(v,newv) ((sizeof(v) == sizeof(__u64)) ? \
153                 (v = cpu_to_le64((__u64)newv)) : (v = cpu_to_le32((__u32)newv)))
154
155 #define DQF_GET(var,ver,field) ((ver == LUSTRE_QUOTA_V1)?\
156                 DQ2MQ(((struct lustre_disk_dqblk*)(var))->field):\
157                 DQ2MQ(((struct lustre_disk_dqblk_v2*)(var))->field))
158
159 #define DQF_PUT(var,ver,field,val) ((ver == LUSTRE_QUOTA_V1)?\
160                 MQ2DQ(((struct lustre_disk_dqblk*)(var))->field, val):\
161                 MQ2DQ(((struct lustre_disk_dqblk_v2*)(var))->field, val))
162
163 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
164                  lustre_quota_version_t version)
165 {
166         m->dqb_ihardlimit = DQF_GET(d, version, dqb_ihardlimit);
167         m->dqb_isoftlimit = DQF_GET(d, version, dqb_isoftlimit);
168         m->dqb_curinodes = DQF_GET(d, version, dqb_curinodes);
169         m->dqb_itime = DQF_GET(d, version, dqb_itime);
170         m->dqb_bhardlimit = DQF_GET(d, version, dqb_bhardlimit);
171         m->dqb_bsoftlimit = DQF_GET(d, version, dqb_bsoftlimit);
172         m->dqb_curspace = DQF_GET(d, version, dqb_curspace);
173         m->dqb_btime = DQF_GET(d, version, dqb_btime);
174 }
175
176 static int check_quota_bounds(struct lustre_mem_dqblk *m, 
177                               lustre_quota_version_t version)
178 {
179         return (version == LUSTRE_QUOTA_V1  &&
180                 m->dqb_ihardlimit <= MAX_UL &&
181                 m->dqb_isoftlimit <= MAX_UL &&
182                 m->dqb_curinodes <= MAX_UL  &&
183                 m->dqb_bhardlimit <= MAX_UL &&
184                 m->dqb_bsoftlimit <= MAX_UL) ||
185                 version != LUSTRE_QUOTA_V1;
186 }
187
188 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
189                        qid_t id, lustre_quota_version_t version)
190 {
191         if (!check_quota_bounds(m, version))
192                 return -EINVAL;
193
194         DQF_PUT(d, version, dqb_ihardlimit, m->dqb_ihardlimit);
195         DQF_PUT(d, version, dqb_isoftlimit, m->dqb_isoftlimit);
196         DQF_PUT(d, version, dqb_curinodes, m->dqb_curinodes);
197         DQF_PUT(d, version, dqb_itime, m->dqb_itime);
198         DQF_PUT(d, version, dqb_bhardlimit, m->dqb_bhardlimit);
199         DQF_PUT(d, version, dqb_bsoftlimit, m->dqb_bsoftlimit);
200         DQF_PUT(d, version, dqb_curspace, m->dqb_curspace);
201         DQF_PUT(d, version, dqb_btime, m->dqb_btime);
202         DQF_PUT(d, version, dqb_id, id);
203
204         return 0;
205 }
206
207 dqbuf_t getdqbuf(void)
208 {
209         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
210         if (!buf)
211                 printk(KERN_WARNING
212                        "VFS: Not enough memory for quota buffers.\n");
213         return buf;
214 }
215
216 void freedqbuf(dqbuf_t buf)
217 {
218         kfree(buf);
219 }
220
221 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
222 {
223         mm_segment_t fs;
224         ssize_t ret;
225         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
226
227         memset(buf, 0, LUSTRE_DQBLKSIZE);
228         fs = get_fs();
229         set_fs(KERNEL_DS);
230         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
231         set_fs(fs);
232         return ret;
233 }
234
235 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
236 {
237         mm_segment_t fs;
238         ssize_t ret;
239         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
240
241         fs = get_fs();
242         set_fs(KERNEL_DS);
243         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
244         set_fs(fs);
245         return ret;
246 }
247
248 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
249 {
250         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
251 }
252
253 /* Remove empty block from list and return it */
254 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
255 {
256         dqbuf_t buf = getdqbuf();
257         struct lustre_disk_dqdbheader *dh =
258             (struct lustre_disk_dqdbheader *)buf;
259         int ret, blk;
260
261         if (!buf)
262                 return -ENOMEM;
263         if (info->dqi_free_blk) {
264                 blk = info->dqi_free_blk;
265                 if ((ret = read_blk(filp, blk, buf)) < 0)
266                         goto out_buf;
267                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
268         } else {
269                 memset(buf, 0, LUSTRE_DQBLKSIZE);
270                 /* Assure block allocation... */
271                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
272                         goto out_buf;
273                 blk = info->dqi_blocks++;
274         }
275         lustre_mark_info_dirty(info);
276         ret = blk;
277 out_buf:
278         freedqbuf(buf);
279         return ret;
280 }
281
282 /* Insert empty block to the list */
283 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
284                    dqbuf_t buf, uint blk)
285 {
286         struct lustre_disk_dqdbheader *dh =
287             (struct lustre_disk_dqdbheader *)buf;
288         int err;
289
290         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
291         dh->dqdh_prev_free = cpu_to_le32(0);
292         dh->dqdh_entries = cpu_to_le16(0);
293         info->dqi_free_blk = blk;
294         lustre_mark_info_dirty(info);
295         if ((err = write_blk(filp, blk, buf)) < 0)
296                 /* Some strange block. We had better leave it... */
297                 return err;
298         return 0;
299 }
300
301 /* Remove given block from the list of blocks with free entries */
302 int remove_free_dqentry(struct file *filp,
303                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
304                         uint blk)
305 {
306         dqbuf_t tmpbuf = getdqbuf();
307         struct lustre_disk_dqdbheader *dh =
308             (struct lustre_disk_dqdbheader *)buf;
309         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
310             le32_to_cpu(dh->dqdh_prev_free);
311         int err;
312
313         if (!tmpbuf)
314                 return -ENOMEM;
315         if (nextblk) {
316                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
317                         goto out_buf;
318                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
319                     dh->dqdh_prev_free;
320                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
321                         goto out_buf;
322         }
323         if (prevblk) {
324                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
325                         goto out_buf;
326                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
327                     dh->dqdh_next_free;
328                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
329                         goto out_buf;
330         } else {
331                 info->dqi_free_entry = nextblk;
332                 lustre_mark_info_dirty(info);
333         }
334         freedqbuf(tmpbuf);
335         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
336         err = write_blk(filp, blk, buf);
337         if (err < 0)      /* No matter whether write succeeds block is out of list */
338                 CERROR("VFS: Can't write block (%u) with "
339                        "free entries (rc=%d).\n", blk, err);
340         return 0;
341 out_buf:
342         freedqbuf(tmpbuf);
343         return err;
344 }
345
346 /* Insert given block to the beginning of list with free entries */
347 int insert_free_dqentry(struct file *filp,
348                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
349                         uint blk)
350 {
351         dqbuf_t tmpbuf = getdqbuf();
352         struct lustre_disk_dqdbheader *dh =
353             (struct lustre_disk_dqdbheader *)buf;
354         int err;
355
356         if (!tmpbuf)
357                 return -ENOMEM;
358         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
359         dh->dqdh_prev_free = cpu_to_le32(0);
360         if ((err = write_blk(filp, blk, buf)) < 0)
361                 goto out_buf;
362         if (info->dqi_free_entry) {
363                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
364                         goto out_buf;
365                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
366                     cpu_to_le32(blk);
367                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
368                         goto out_buf;
369         }
370         freedqbuf(tmpbuf);
371         info->dqi_free_entry = blk;
372         lustre_mark_info_dirty(info);
373         return 0;
374 out_buf:
375         freedqbuf(tmpbuf);
376         return err;
377 }
378
379
380
381 /* Find space for dquot */
382 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
383                               lustre_quota_version_t version)
384 {
385         struct lustre_quota_info *lqi = dquot->dq_info;
386         struct file *filp = lqi->qi_files[dquot->dq_type];
387         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
388         uint blk, i;
389         struct lustre_disk_dqdbheader *dh;
390         void *ddquot;
391         int dqblk_sz = lustre_disk_dqblk_sz[version];
392         int dqstrinblk = lustre_dqstrinblk[version];
393         char fakedquot[dqblk_sz];
394         dqbuf_t buf;
395
396         *err = 0;
397         if (!(buf = getdqbuf())) {
398                 *err = -ENOMEM;
399                 return 0;
400         }
401         dh = (struct lustre_disk_dqdbheader *)buf;
402         ddquot = GETENTRIES(buf, version);
403         if (info->dqi_free_entry) {
404                 blk = info->dqi_free_entry;
405                 if ((*err = read_blk(filp, blk, buf)) < 0)
406                         goto out_buf;
407         } else {
408                 blk = get_free_dqblk(filp, info);
409                 if ((int)blk < 0) {
410                         *err = blk;
411                         freedqbuf(buf);
412                         return 0;
413                 }
414                 memset(buf, 0, LUSTRE_DQBLKSIZE);
415                 info->dqi_free_entry = blk; /* This is enough as block is 
416                                                already zeroed and entry list
417                                                is empty... */
418                 lustre_mark_info_dirty(info);
419         }
420
421         /* Will block be full */
422         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
423                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
424                         CERROR("VFS: Can't remove block %u"
425                                " from entry free list.\n", blk);
426                         goto out_buf;
427                 }
428         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
429         memset(fakedquot, 0, dqblk_sz);
430         /* Find free structure in block */
431         for (i = 0; i < dqstrinblk &&
432              memcmp(fakedquot, (char*)ddquot + i * dqblk_sz, 
433                     sizeof(fakedquot)); i++);
434
435         if (i == dqstrinblk) {
436                 CERROR("VFS: Data block full but it shouldn't.\n");
437                 *err = -EIO;
438                 goto out_buf;
439         }
440
441         if ((*err = write_blk(filp, blk, buf)) < 0) {
442                 CERROR("VFS: Can't write quota data block %u.\n", blk);
443                 goto out_buf;
444         }
445         dquot->dq_off =
446             (blk << LUSTRE_DQBLKSIZE_BITS) +
447             sizeof(struct lustre_disk_dqdbheader) +
448             i * dqblk_sz;
449         freedqbuf(buf);
450         return blk;
451 out_buf:
452         freedqbuf(buf);
453         return 0;
454 }
455
456 /* Insert reference to structure into the trie */
457 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
458                           lustre_quota_version_t version)
459 {
460         struct lustre_quota_info *lqi = dquot->dq_info;
461         struct file *filp = lqi->qi_files[dquot->dq_type];
462         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
463         dqbuf_t buf;
464         int ret = 0, newson = 0, newact = 0;
465         u32 *ref;
466         uint newblk;
467
468         if (!(buf = getdqbuf()))
469                 return -ENOMEM;
470         if (!*treeblk) {
471                 ret = get_free_dqblk(filp, info);
472                 if (ret < 0)
473                         goto out_buf;
474                 *treeblk = ret;
475                 memset(buf, 0, LUSTRE_DQBLKSIZE);
476                 newact = 1;
477         } else {
478                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
479                         CERROR("VFS: Can't read tree quota block %u.\n",
480                                *treeblk);
481                         goto out_buf;
482                 }
483         }
484         ref = (u32 *) buf;
485         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
486         if (!newblk)
487                 newson = 1;
488         if (depth == LUSTRE_DQTREEDEPTH - 1) {
489
490                 if (newblk) {
491                         CERROR("VFS: Inserting already present quota entry "
492                                "(block %u).\n", 
493                                ref[GETIDINDEX(dquot->dq_id, depth)]);
494                         ret = -EIO;
495                         goto out_buf;
496                 }
497
498                 newblk = find_free_dqentry(dquot, &ret, version);
499         } else
500                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
501         if (newson && ret >= 0) {
502                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
503                 ret = write_blk(filp, *treeblk, buf);
504         } else if (newact && ret < 0)
505                 put_free_dqblk(filp, info, buf, *treeblk);
506 out_buf:
507         freedqbuf(buf);
508         return ret;
509 }
510
511 /* Wrapper for inserting quota structure into tree */
512 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
513                                  lustre_quota_version_t version)
514 {
515         int tmp = LUSTRE_DQTREEOFF;
516         return do_insert_tree(dquot, &tmp, 0, version);
517 }
518
519 /*
520  *  We don't have to be afraid of deadlocks as we never have quotas on quota files...
521  */
522 static int lustre_write_dquot(struct lustre_dquot *dquot, 
523                               lustre_quota_version_t version)
524 {
525         int type = dquot->dq_type;
526         struct file *filp;
527         mm_segment_t fs;
528         loff_t offset;
529         ssize_t ret;
530         int dqblk_sz = lustre_disk_dqblk_sz[version];
531         char ddquot[dqblk_sz], empty[dqblk_sz];
532
533         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
534         if (ret < 0)
535                 return ret;
536
537         if (!dquot->dq_off)
538                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
539                         CERROR("VFS: Error %Zd occurred while creating quota.\n",
540                                ret);
541                         return ret;
542                 }
543         filp = dquot->dq_info->qi_files[type];
544         offset = dquot->dq_off;
545         /* Argh... We may need to write structure full of zeroes but that would be
546          * treated as an empty place by the rest of the code. Format change would
547          * be definitely cleaner but the problems probably are not worth it */
548         memset(empty, 0, dqblk_sz);
549         if (!memcmp(empty, ddquot, dqblk_sz))
550                 DQF_PUT(ddquot, version, dqb_itime, 1);
551         fs = get_fs();
552         set_fs(KERNEL_DS);
553         ret = filp->f_op->write(filp, ddquot,
554                                 dqblk_sz, &offset);
555         set_fs(fs);
556         if (ret != dqblk_sz) {
557                 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
558                        filp->f_dentry->d_sb->s_id);
559                 if (ret >= 0)
560                         ret = -ENOSPC;
561         } else
562                 ret = 0;
563
564         return ret;
565 }
566
567 /* Free dquot entry in data block */
568 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
569                         lustre_quota_version_t version)
570 {
571         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
572         struct lustre_mem_dqinfo *info =
573             &dquot->dq_info->qi_info[dquot->dq_type];
574         struct lustre_disk_dqdbheader *dh;
575         dqbuf_t buf = getdqbuf();
576         int dqstrinblk = lustre_dqstrinblk[version];
577         int ret = 0;
578
579         if (!buf)
580                 return -ENOMEM;
581         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
582                 CERROR("VFS: Quota structure has offset to other block (%u) "
583                        "than it should (%u).\n", blk, 
584                        (uint)(dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
585                 goto out_buf;
586         }
587         if ((ret = read_blk(filp, blk, buf)) < 0) {
588                 CERROR("VFS: Can't read quota data block %u\n", blk);
589                 goto out_buf;
590         }
591         dh = (struct lustre_disk_dqdbheader *)buf;
592         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
593         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
594                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
595                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
596                         CERROR("VFS: Can't move quota data block (%u) "
597                                "to free list.\n", blk);
598                         goto out_buf;
599                 }
600         } else {
601                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
602                        0, lustre_disk_dqblk_sz[version]);
603                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
604                         /* Insert will write block itself */
605                         if ((ret =
606                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
607                                 CERROR("VFS: Can't insert quota data block (%u) "
608                                        "to free entry list.\n", blk);
609                                 goto out_buf;
610                         }
611                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
612                         CERROR("VFS: Can't write quota data block %u\n", blk);
613                         goto out_buf;
614                 }
615         }
616         dquot->dq_off = 0;      /* Quota is now unattached */
617 out_buf:
618         freedqbuf(buf);
619         return ret;
620 }
621
622 /* Remove reference to dquot from tree */
623 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
624                        lustre_quota_version_t version)
625 {
626         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
627         struct lustre_mem_dqinfo *info =
628             &dquot->dq_info->qi_info[dquot->dq_type];
629         dqbuf_t buf = getdqbuf();
630         int ret = 0;
631         uint newblk;
632         u32 *ref = (u32 *) buf;
633
634         if (!buf)
635                 return -ENOMEM;
636         if ((ret = read_blk(filp, *blk, buf)) < 0) {
637                 CERROR("VFS: Can't read quota data block %u\n", *blk);
638                 goto out_buf;
639         }
640         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
641         if (depth == LUSTRE_DQTREEDEPTH - 1) {
642                 ret = free_dqentry(dquot, newblk, version);
643                 newblk = 0;
644         } else
645                 ret = remove_tree(dquot, &newblk, depth + 1, version);
646         if (ret >= 0 && !newblk) {
647                 int i;
648                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
649                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
650                         /* Block got empty? */ ;
651                 /* don't put the root block into free blk list! */
652                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
653                         put_free_dqblk(filp, info, buf, *blk);
654                         *blk = 0;
655                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
656                         CERROR("VFS: Can't write quota tree block %u.\n", *blk);
657         }
658 out_buf:
659         freedqbuf(buf);
660         return ret;
661 }
662
663 /* Delete dquot from tree */
664 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
665                                 lustre_quota_version_t version)
666 {
667         uint tmp = LUSTRE_DQTREEOFF;
668
669         if (!dquot->dq_off)     /* Even not allocated? */
670                 return 0;
671         return remove_tree(dquot, &tmp, 0, version);
672 }
673
674 /* Find entry in block */
675 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
676                                  lustre_quota_version_t version)
677 {
678         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
679         dqbuf_t buf = getdqbuf();
680         loff_t ret = 0;
681         int i;
682         char *ddquot = GETENTRIES(buf, version);
683         int dqblk_sz = lustre_disk_dqblk_sz[version];
684         int dqstrinblk = lustre_dqstrinblk[version];
685
686         if (!buf)
687                 return -ENOMEM;
688         if ((ret = read_blk(filp, blk, buf)) < 0) {
689                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
690                 goto out_buf;
691         }
692         if (dquot->dq_id)
693                 for (i = 0; i < dqstrinblk && 
694                      DQF_GET(ddquot+i*dqblk_sz, version, dqb_id) != dquot->dq_id;
695                      i++) ;
696         else {                  /* ID 0 as a bit more complicated searching... */
697                 char fakedquot[dqblk_sz];
698
699                 memset(fakedquot, 0, sizeof(fakedquot));
700                 for (i = 0; i < dqstrinblk; i++)
701                         if (!DQF_GET(ddquot + i*dqblk_sz, version, dqb_id)
702                             && memcmp(fakedquot, ddquot + i*dqblk_sz,
703                                       dqblk_sz))
704                                 break;
705         }
706         if (i == dqstrinblk) {
707                 CERROR("VFS: Quota for id %u referenced but not present.\n",
708                        dquot->dq_id);
709                 ret = -EIO;
710                 goto out_buf;
711         } else
712                 ret =
713                     (blk << LUSTRE_DQBLKSIZE_BITS) +
714                     sizeof(struct lustre_disk_dqdbheader) +
715                     i * dqblk_sz;
716 out_buf:
717         freedqbuf(buf);
718         return ret;
719 }
720
721 /* Find entry for given id in the tree */
722 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
723                                 lustre_quota_version_t version)
724 {
725         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
726         dqbuf_t buf = getdqbuf();
727         loff_t ret = 0;
728         u32 *ref = (u32 *) buf;
729
730         if (!buf)
731                 return -ENOMEM;
732         if ((ret = read_blk(filp, blk, buf)) < 0) {
733                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
734                 goto out_buf;
735         }
736         ret = 0;
737         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
738         if (!blk)               /* No reference? */
739                 goto out_buf;
740         if (depth < LUSTRE_DQTREEDEPTH - 1)
741                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
742         else
743                 ret = find_block_dqentry(dquot, blk, version);
744 out_buf:
745         freedqbuf(buf);
746         return ret;
747 }
748
749 /* Find entry for given id in the tree - wrapper function */
750 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
751                                   lustre_quota_version_t version)
752 {
753         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
754 }
755
756 int lustre_read_dquot(struct lustre_dquot *dquot)
757 {
758         int type = dquot->dq_type;
759         struct file *filp;
760         mm_segment_t fs;
761         loff_t offset;
762         int ret = 0, dqblk_sz;
763         lustre_quota_version_t version;
764
765         /* Invalidated quota? */
766         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
767                 CERROR("VFS: Quota invalidated while reading!\n");
768                 return -EIO;
769         }
770
771         version = dquot->dq_info->qi_version;
772         dqblk_sz = lustre_disk_dqblk_sz[version];
773
774         offset = find_dqentry(dquot, version);
775         if (offset <= 0) {      /* Entry not present? */
776                 if (offset < 0)
777                         CERROR("VFS: Can't read quota structure for id %u.\n",
778                                dquot->dq_id);
779                 dquot->dq_off = 0;
780                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
781                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
782                 ret = offset;
783         } else {
784                 char ddquot[dqblk_sz], empty[dqblk_sz];
785
786                 dquot->dq_off = offset;
787                 fs = get_fs();
788                 set_fs(KERNEL_DS);
789                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
790                     dqblk_sz) {
791                         if (ret >= 0)
792                                 ret = -EIO;
793                         CERROR("VFS: Error while reading quota structure "
794                                "for id %u.\n", dquot->dq_id);
795                         memset(ddquot, 0, dqblk_sz);
796                 } else {
797                         ret = 0;
798                         /* We need to escape back all-zero structure */
799                         memset(empty, 0, dqblk_sz);
800                         DQF_PUT(empty, version, dqb_itime, 1);
801                         if (!memcmp(empty, ddquot, dqblk_sz))
802                                 DQF_PUT(ddquot, version, dqb_itime, 0);
803                 }
804                 set_fs(fs);
805                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
806         }
807
808         return ret;
809 }
810
811 /* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */
812 int lustre_commit_dquot(struct lustre_dquot *dquot)
813 {
814         int rc = 0;
815         lustre_quota_version_t version = dquot->dq_info->qi_version;
816
817         /* always clear the flag so we don't loop on an IO error... */
818         clear_bit(DQ_MOD_B, &dquot->dq_flags);
819
820         /* The block/inode usage in admin quotafile isn't the real usage
821          * over all cluster, so keep the fake dquot entry on disk is
822          * meaningless, just remove it */
823         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
824                 rc = lustre_delete_dquot(dquot, version);
825         else
826                 rc = lustre_write_dquot(dquot, version);
827
828         if (rc < 0)
829                 return rc;
830
831         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
832                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
833
834         return rc;
835 }
836
837 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
838 {
839         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
840         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
841         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
842         struct lustre_disk_dqheader dqhead;
843         ssize_t size;
844         loff_t offset = 0;
845         struct file *fp = lqi->qi_files[type];
846         int rc = 0;
847
848         /* write quotafile header */
849         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
850                                        fake_magics[type] : quota_magics[type]);
851         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
852         size = fp->f_op->write(fp, (char *)&dqhead,
853                                sizeof(struct lustre_disk_dqheader), &offset);
854
855         if (size != sizeof(struct lustre_disk_dqheader)) {
856                 CERROR("error writing quotafile header (rc:%d)\n", rc);
857                 rc = size;
858         }
859
860         return rc;
861 }
862
863 /* We need to export this function to initialize quotafile, because we haven't
864  * user level check utility */
865 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
866                                    int fakemagics)
867 {
868         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
869         int rc;
870
871         rc = lustre_init_quota_header(lqi, type, fakemagics);
872         if (rc)
873                 return rc;
874
875         /* write init quota info */
876         memset(dqinfo, 0, sizeof(*dqinfo));
877         dqinfo->dqi_bgrace = MAX_DQ_TIME;
878         dqinfo->dqi_igrace = MAX_IQ_TIME;
879         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
880
881         return lustre_write_quota_info(lqi, type);
882 }
883
884 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
885 {
886         return lustre_init_quota_info_generic(lqi, type, 0);
887 }
888
889 ssize_t quota_read(struct file *file, struct inode *inode, int type,
890                    uint blk, dqbuf_t buf)
891 {
892         if (file) {
893                 return read_blk(file, blk, buf);
894         } else {
895 #ifndef KERNEL_SUPPORTS_QUOTA_READ
896                 return -ENOTSUPP;
897 #else
898                 struct super_block *sb = inode->i_sb;
899                 memset(buf, 0, LUSTRE_DQBLKSIZE);
900                 return sb->s_op->quota_read(sb, type, (char *)buf,
901                                             LUSTRE_DQBLKSIZE, 
902                                             blk << LUSTRE_DQBLKSIZE_BITS);
903 #endif
904         }
905 }
906
907 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
908                               uint blk, struct list_head *list)
909 {
910         dqbuf_t buf = getdqbuf();
911         loff_t ret = 0;
912         struct lustre_disk_dqdbheader *dqhead =
913             (struct lustre_disk_dqdbheader *)buf;
914         struct dqblk *blk_item;
915         struct dqblk *pos;
916         struct list_head *tmp;
917
918         if (!buf)
919                 return -ENOMEM;
920         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
921                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
922                 goto out_buf;
923         }
924         ret = 0;
925
926         if (!le32_to_cpu(dqhead->dqdh_entries))
927                 goto out_buf;
928
929         if (list_empty(list)) {
930                 tmp = list;
931                 goto done;
932         }
933
934         list_for_each_entry(pos, list, link) {
935                 if (blk == pos->blk)    /* we got this blk already */
936                         goto out_buf;
937                 if (blk > pos->blk)
938                         continue;
939                 break;
940         }
941         tmp = &pos->link;
942 done:
943         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
944         if (!blk_item) {
945                 ret = -ENOMEM;
946                 goto out_buf;
947         }
948         blk_item->blk = blk;
949         INIT_LIST_HEAD(&blk_item->link);
950
951         list_add_tail(&blk_item->link, tmp);
952
953 out_buf:
954         freedqbuf(buf);
955         return ret;
956 }
957
958 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
959                       uint blk, int depth, struct list_head *list)
960 {
961         dqbuf_t buf = getdqbuf();
962         loff_t ret = 0;
963         int index;
964         u32 *ref = (u32 *) buf;
965
966         if (!buf)
967                 return -ENOMEM;
968         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
969                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
970                 goto out_buf;
971         }
972         ret = 0;
973
974         for (index = 0; index <= 0xff && !ret; index++) {
975                 blk = le32_to_cpu(ref[index]);
976                 if (!blk)       /* No reference */
977                         continue;
978
979                 if (depth < LUSTRE_DQTREEDEPTH - 1)
980                         ret = walk_tree_dqentry(filp, inode, type, blk,
981                                                 depth + 1, list);
982                 else
983                         ret = walk_block_dqentry(filp, inode, type, blk, list);
984         }
985 out_buf:
986         freedqbuf(buf);
987         return ret;
988 }
989
990 /* Walk through the quota file (v2 format) to get all ids with quota limit */
991 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
992                     struct list_head *list)
993 {
994         struct list_head blk_list;
995         struct dqblk *blk_item, *tmp;
996         dqbuf_t buf = NULL;
997         char *ddquot;
998         int rc;
999         lustre_quota_version_t version;
1000
1001         ENTRY;
1002
1003         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V1) == 0)
1004                 version = LUSTRE_QUOTA_V1;
1005         else if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1006                 version = LUSTRE_QUOTA_V2;
1007         else {
1008                 CERROR("unknown quota file format!\n");
1009                 RETURN(-EINVAL);
1010         }
1011
1012         if (!list_empty(list)) {
1013                 CERROR("not empty list\n");
1014                 RETURN(-EINVAL);
1015         }
1016
1017         INIT_LIST_HEAD(&blk_list);
1018         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1019         if (rc) {
1020                 CERROR("walk through quota file failed!(%d)\n", rc);
1021                 GOTO(out_free, rc);
1022         }
1023         if (list_empty(&blk_list))
1024                 RETURN(0);
1025
1026         buf = getdqbuf();
1027         if (!buf)
1028                 RETURN(-ENOMEM);
1029         ddquot = GETENTRIES(buf, version);
1030
1031         list_for_each_entry(blk_item, &blk_list, link) {
1032                 loff_t ret = 0;
1033                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1034                 char fakedquot[dqblk_sz];
1035
1036                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1037                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1038                         CERROR("VFS: Can't read quota tree block %u.\n",
1039                                blk_item->blk);
1040                         GOTO(out_free, rc = ret);
1041                 }
1042
1043                 memset(fakedquot, 0, dqblk_sz);
1044                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1045                         struct dquot_id *dqid;
1046                         /* skip empty entry */
1047                         if (!memcmp(fakedquot, ddquot + i*dqblk_sz, dqblk_sz))
1048                                 continue;
1049
1050                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
1051                         if (!dqid) 
1052                                 GOTO(out_free, rc = -ENOMEM);
1053
1054                         dqid->di_id = DQF_GET(ddquot + i * dqblk_sz, 
1055                                               version, dqb_id);
1056                         INIT_LIST_HEAD(&dqid->di_link);
1057                         list_add(&dqid->di_link, list);
1058                 }
1059         }
1060
1061 out_free:
1062         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1063                 list_del_init(&blk_item->link);
1064                 kfree(blk_item);
1065         }
1066         if (buf)
1067                 freedqbuf(buf);
1068
1069         RETURN(rc);
1070 }
1071
1072
1073 EXPORT_SYMBOL(lustre_read_quota_info);
1074 EXPORT_SYMBOL(lustre_write_quota_info);
1075 EXPORT_SYMBOL(lustre_check_quota_file);
1076 EXPORT_SYMBOL(lustre_read_dquot);
1077 EXPORT_SYMBOL(lustre_commit_dquot);
1078 EXPORT_SYMBOL(lustre_init_quota_info);
1079 EXPORT_SYMBOL(lustre_get_qids);