Whamcloud - gitweb
b=16890
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 static const union
77 {
78         struct lustre_disk_dqblk_v2 r1;
79 } fakedquot[] = {
80         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
81 };
82
83 static const union
84 {
85         struct lustre_disk_dqblk_v2 r1;
86 } emptydquot[] = {
87         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
88 };
89
90 int check_quota_file(struct file *f, struct inode *inode, int type, 
91                      lustre_quota_version_t version)
92 {
93         struct lustre_disk_dqheader dqhead;
94         mm_segment_t fs;
95         ssize_t size;
96         loff_t offset = 0;
97         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
98         const uint *quota_versions = lustre_initqversions[version];
99
100         if (f) {
101                 fs = get_fs();
102                 set_fs(KERNEL_DS);
103                 size = f->f_op->read(f, (char *)&dqhead,
104                                      sizeof(struct lustre_disk_dqheader), 
105                                      &offset);
106                 set_fs(fs);
107         } else { 
108 #ifndef KERNEL_SUPPORTS_QUOTA_READ
109                 size = 0;
110 #else
111                 struct super_block *sb = inode->i_sb;
112                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
113                                             sizeof(struct lustre_disk_dqheader), 0);
114 #endif
115         }
116         if (size != sizeof(struct lustre_disk_dqheader))
117                 return -EINVAL;
118         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
119             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
120                 return -EINVAL;
121         return 0;
122 }
123
124 /**
125  * Check whether given file is really lustre admin quotafile
126  */
127 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
128 {
129         struct file *f = lqi->qi_files[type];
130         return check_quota_file(f, NULL, type, lqi->qi_version);
131 }
132
133 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
134 {
135         mm_segment_t fs;
136         struct lustre_disk_dqinfo dinfo;
137         ssize_t size;
138         loff_t offset = LUSTRE_DQINFOOFF;
139
140         fs = get_fs();
141         set_fs(KERNEL_DS);
142         size = f->f_op->read(f, (char *)&dinfo, 
143                              sizeof(struct lustre_disk_dqinfo), &offset);
144         set_fs(fs);
145         if (size != sizeof(struct lustre_disk_dqinfo)) {
146                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
147                        f->f_vfsmnt->mnt_sb->s_id);
148                 return -EINVAL;
149         }
150         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
151         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
152         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
153         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
154         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
155         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
156         return 0;
157 }
158
159 /**
160  * Read information header from quota file
161  */
162 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
163 {
164         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
165 }
166
167 /**
168  * Write information header to quota file
169  */
170 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
171 {
172         mm_segment_t fs;
173         struct lustre_disk_dqinfo dinfo;
174         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
175         struct file *f = lqi->qi_files[type];
176         ssize_t size;
177         loff_t offset = LUSTRE_DQINFOOFF;
178
179         info->dqi_flags &= ~DQF_INFO_DIRTY;
180         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
181         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
182         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
183         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
184         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
185         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
186         fs = get_fs();
187         set_fs(KERNEL_DS);
188         size = f->f_op->write(f, (char *)&dinfo, 
189                               sizeof(struct lustre_disk_dqinfo), &offset);
190         set_fs(fs);
191         if (size != sizeof(struct lustre_disk_dqinfo)) {
192                 CDEBUG(D_WARNING, 
193                        "Can't write info structure on device %s.\n",
194                        f->f_vfsmnt->mnt_sb->s_id);
195                 return -1;
196         }
197         return 0;
198 }
199
200 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
201                  lustre_quota_version_t version)
202 {
203         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
204
205         LASSERT(version == LUSTRE_QUOTA_V2);
206
207         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
208         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
209         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
210         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
211         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
212         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
213         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
214         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
215 }
216
217 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
218                        qid_t id, lustre_quota_version_t version)
219 {
220         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
221
222         LASSERT(version == LUSTRE_QUOTA_V2);
223
224         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
225         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
226         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
227         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
228         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
229         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
230         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
231         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
232         dqblk->dqb_id = cpu_to_le32(id);
233
234         return 0;
235 }
236
237 dqbuf_t getdqbuf(void)
238 {
239         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
240         if (!buf)
241                 CDEBUG(D_WARNING, 
242                        "VFS: Not enough memory for quota buffers.\n");
243         return buf;
244 }
245
246 void freedqbuf(dqbuf_t buf)
247 {
248         kfree(buf);
249 }
250
251 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
252 {
253         mm_segment_t fs;
254         ssize_t ret;
255         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
256
257         memset(buf, 0, LUSTRE_DQBLKSIZE);
258         fs = get_fs();
259         set_fs(KERNEL_DS);
260         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
261         set_fs(fs);
262         return ret;
263 }
264
265 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
266 {
267         mm_segment_t fs;
268         ssize_t ret;
269         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
270
271         fs = get_fs();
272         set_fs(KERNEL_DS);
273         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
274         set_fs(fs);
275         return ret;
276 }
277
278 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
279 {
280         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
281 }
282
283 /**
284  * Remove empty block from list and return it
285  */
286 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
287 {
288         dqbuf_t buf = getdqbuf();
289         struct lustre_disk_dqdbheader *dh =
290             (struct lustre_disk_dqdbheader *)buf;
291         int ret, blk;
292
293         if (!buf)
294                 return -ENOMEM;
295         if (info->dqi_free_blk) {
296                 blk = info->dqi_free_blk;
297                 if ((ret = read_blk(filp, blk, buf)) < 0)
298                         goto out_buf;
299                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
300         } else {
301                 memset(buf, 0, LUSTRE_DQBLKSIZE);
302                 /* Assure block allocation... */
303                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
304                         goto out_buf;
305                 blk = info->dqi_blocks++;
306         }
307         lustre_mark_info_dirty(info);
308         ret = blk;
309 out_buf:
310         freedqbuf(buf);
311         return ret;
312 }
313
314 /**
315  * Insert empty block to the list
316  */
317 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
318                    dqbuf_t buf, uint blk)
319 {
320         struct lustre_disk_dqdbheader *dh =
321             (struct lustre_disk_dqdbheader *)buf;
322         int err;
323
324         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
325         dh->dqdh_prev_free = cpu_to_le32(0);
326         dh->dqdh_entries = cpu_to_le16(0);
327         info->dqi_free_blk = blk;
328         lustre_mark_info_dirty(info);
329         if ((err = write_blk(filp, blk, buf)) < 0)
330                 /* Some strange block. We had better leave it... */
331                 return err;
332         return 0;
333 }
334
335 /**
336  * Remove given block from the list of blocks with free entries
337  */
338 int remove_free_dqentry(struct file *filp,
339                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
340                         uint blk)
341 {
342         dqbuf_t tmpbuf = getdqbuf();
343         struct lustre_disk_dqdbheader *dh =
344             (struct lustre_disk_dqdbheader *)buf;
345         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
346             le32_to_cpu(dh->dqdh_prev_free);
347         int err;
348
349         if (!tmpbuf)
350                 return -ENOMEM;
351         if (nextblk) {
352                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
353                         goto out_buf;
354                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
355                     dh->dqdh_prev_free;
356                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
357                         goto out_buf;
358         }
359         if (prevblk) {
360                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
361                         goto out_buf;
362                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
363                     dh->dqdh_next_free;
364                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
365                         goto out_buf;
366         } else {
367                 info->dqi_free_entry = nextblk;
368                 lustre_mark_info_dirty(info);
369         }
370         freedqbuf(tmpbuf);
371         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
372         if (write_blk(filp, blk, buf) < 0)
373                 /* No matter whether write succeeds block is out of list */
374                 CDEBUG(D_ERROR, 
375                        "VFS: Can't write block (%u) with free entries.\n", blk);
376         return 0;
377 out_buf:
378         freedqbuf(tmpbuf);
379         return err;
380 }
381
382 /**
383  * Insert given block to the beginning of list with free entries
384  */
385 int insert_free_dqentry(struct file *filp,
386                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
387                         uint blk)
388 {
389         dqbuf_t tmpbuf = getdqbuf();
390         struct lustre_disk_dqdbheader *dh =
391             (struct lustre_disk_dqdbheader *)buf;
392         int err;
393
394         if (!tmpbuf)
395                 return -ENOMEM;
396         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
397         dh->dqdh_prev_free = cpu_to_le32(0);
398         if ((err = write_blk(filp, blk, buf)) < 0)
399                 goto out_buf;
400         if (info->dqi_free_entry) {
401                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
402                         goto out_buf;
403                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
404                     cpu_to_le32(blk);
405                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
406                         goto out_buf;
407         }
408         freedqbuf(tmpbuf);
409         info->dqi_free_entry = blk;
410         lustre_mark_info_dirty(info);
411         return 0;
412 out_buf:
413         freedqbuf(tmpbuf);
414         return err;
415 }
416
417
418
419 /**
420  * Find space for dquot
421  */
422 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
423                               lustre_quota_version_t version)
424 {
425         struct lustre_quota_info *lqi = dquot->dq_info;
426         struct file *filp = lqi->qi_files[dquot->dq_type];
427         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
428         uint blk, i;
429         struct lustre_disk_dqdbheader *dh;
430         void *ddquot;
431         int dqblk_sz = lustre_disk_dqblk_sz[version];
432         int dqstrinblk = lustre_dqstrinblk[version];
433         dqbuf_t buf;
434
435         *err = 0;
436         if (!(buf = getdqbuf())) {
437                 *err = -ENOMEM;
438                 return 0;
439         }
440         dh = (struct lustre_disk_dqdbheader *)buf;
441         ddquot = GETENTRIES(buf, version);
442         if (info->dqi_free_entry) {
443                 blk = info->dqi_free_entry;
444                 if ((*err = read_blk(filp, blk, buf)) < 0)
445                         goto out_buf;
446         } else {
447                 blk = get_free_dqblk(filp, info);
448                 if ((int)blk < 0) {
449                         *err = blk;
450                         freedqbuf(buf);
451                         return 0;
452                 }
453                 memset(buf, 0, LUSTRE_DQBLKSIZE);
454                 info->dqi_free_entry = blk; /* This is enough as block is 
455                                                already zeroed and entry list
456                                                is empty... */
457                 lustre_mark_info_dirty(info);
458         }
459
460         /* Will block be full */
461         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
462                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
463                         CDEBUG(D_ERROR, 
464                                "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n",
465                                blk);
466                         goto out_buf;
467                 }
468         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
469         /* Find free structure in block */
470         for (i = 0; i < dqstrinblk &&
471              memcmp((char *)&emptydquot[version],
472                     (char*)ddquot + i * dqblk_sz,
473                     dqblk_sz); i++);
474
475         if (i == dqstrinblk) {
476                 CDEBUG(D_ERROR, 
477                        "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
478                 *err = -EIO;
479                 goto out_buf;
480         }
481
482         if ((*err = write_blk(filp, blk, buf)) < 0) {
483                 CDEBUG(D_ERROR,
484                        "VFS: find_free_dqentry(): Can't write quota data block %u.\n",
485                        blk);
486                 goto out_buf;
487         }
488         dquot->dq_off =
489             (blk << LUSTRE_DQBLKSIZE_BITS) +
490             sizeof(struct lustre_disk_dqdbheader) +
491             i * dqblk_sz;
492         freedqbuf(buf);
493         return blk;
494 out_buf:
495         freedqbuf(buf);
496         return 0;
497 }
498
499 /**
500  * Insert reference to structure into the trie
501  */
502 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
503                           lustre_quota_version_t version)
504 {
505         struct lustre_quota_info *lqi = dquot->dq_info;
506         struct file *filp = lqi->qi_files[dquot->dq_type];
507         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
508         dqbuf_t buf;
509         int ret = 0, newson = 0, newact = 0;
510         u32 *ref;
511         uint newblk;
512
513         if (!(buf = getdqbuf()))
514                 return -ENOMEM;
515         if (!*treeblk) {
516                 ret = get_free_dqblk(filp, info);
517                 if (ret < 0)
518                         goto out_buf;
519                 *treeblk = ret;
520                 memset(buf, 0, LUSTRE_DQBLKSIZE);
521                 newact = 1;
522         } else {
523                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
524                         CDEBUG(D_ERROR,
525                                "VFS: Can't read tree quota block %u.\n",
526                                *treeblk);
527                         goto out_buf;
528                 }
529         }
530         ref = (u32 *) buf;
531         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
532         if (!newblk)
533                 newson = 1;
534         if (depth == LUSTRE_DQTREEDEPTH - 1) {
535
536                 if (newblk) {
537                         CDEBUG(D_ERROR, 
538                                "VFS: Inserting already present quota entry (block %u).\n",
539                                ref[GETIDINDEX(dquot->dq_id, depth)]);
540                         ret = -EIO;
541                         goto out_buf;
542                 }
543
544                 newblk = find_free_dqentry(dquot, &ret, version);
545         } else
546                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
547         if (newson && ret >= 0) {
548                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
549                 ret = write_blk(filp, *treeblk, buf);
550         } else if (newact && ret < 0)
551                 put_free_dqblk(filp, info, buf, *treeblk);
552 out_buf:
553         freedqbuf(buf);
554         return ret;
555 }
556
557 /**
558  * Wrapper for inserting quota structure into tree
559  */
560 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
561                                  lustre_quota_version_t version)
562 {
563         int tmp = LUSTRE_DQTREEOFF;
564         return do_insert_tree(dquot, &tmp, 0, version);
565 }
566
567 /**
568  * We don't have to be afraid of deadlocks as we never have quotas on
569  * quota files...
570  */
571 static int lustre_write_dquot(struct lustre_dquot *dquot, 
572                               lustre_quota_version_t version)
573 {
574         int type = dquot->dq_type;
575         struct file *filp;
576         mm_segment_t fs;
577         loff_t offset;
578         ssize_t ret;
579         int dqblk_sz = lustre_disk_dqblk_sz[version];
580         char ddquot[dqblk_sz];
581
582         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
583         if (ret < 0)
584                 return ret;
585
586         if (!dquot->dq_off)
587                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
588                         CDEBUG(D_ERROR,
589                                "VFS: Error %Zd occurred while creating quota.\n",
590                                ret);
591                         return ret;
592                 }
593         filp = dquot->dq_info->qi_files[type];
594         offset = dquot->dq_off;
595         /* Argh... We may need to write structure full of zeroes but that would be
596          * treated as an empty place by the rest of the code. Format change would
597          * be definitely cleaner but the problems probably are not worth it */
598         if (!memcmp((char *)&emptydquot[version], ddquot, dqblk_sz))
599                 ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime = cpu_to_le64(1);
600         fs = get_fs();
601         set_fs(KERNEL_DS);
602         ret = filp->f_op->write(filp, ddquot,
603                                 dqblk_sz, &offset);
604         set_fs(fs);
605         if (ret != dqblk_sz) {
606                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
607                        filp->f_dentry->d_sb->s_id);
608                 if (ret >= 0)
609                         ret = -ENOSPC;
610         } else
611                 ret = 0;
612
613         return ret;
614 }
615
616 /**
617  * Free dquot entry in data block
618  */
619 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
620                         lustre_quota_version_t version)
621 {
622         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
623         struct lustre_mem_dqinfo *info =
624             &dquot->dq_info->qi_info[dquot->dq_type];
625         struct lustre_disk_dqdbheader *dh;
626         dqbuf_t buf = getdqbuf();
627         int dqstrinblk = lustre_dqstrinblk[version];
628         int ret = 0;
629
630         if (!buf)
631                 return -ENOMEM;
632         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
633                 CDEBUG(D_ERROR,
634                        "VFS: Quota structure has offset to other block (%u) than it should (%u).\n",
635                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
636                 goto out_buf;
637         }
638         if ((ret = read_blk(filp, blk, buf)) < 0) {
639                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
640                 goto out_buf;
641         }
642         dh = (struct lustre_disk_dqdbheader *)buf;
643         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
644         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
645                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
646                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
647                         CDEBUG(D_ERROR,
648                                "VFS: Can't move quota data block (%u) to free list.\n",
649                                blk);
650                         goto out_buf;
651                 }
652         } else {
653                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
654                        0, lustre_disk_dqblk_sz[version]);
655                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
656                         /* Insert will write block itself */
657                         if ((ret =
658                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
659                                 CDEBUG(D_ERROR,
660                                        "VFS: Can't insert quota data block (%u) to free entry list.\n",
661                                        blk);
662                                 goto out_buf;
663                         }
664                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
665                         CDEBUG(D_ERROR,
666                                "VFS: Can't write quota data block %u\n", blk);
667                         goto out_buf;
668                 }
669         }
670         dquot->dq_off = 0;      /* Quota is now unattached */
671 out_buf:
672         freedqbuf(buf);
673         return ret;
674 }
675
676 /**
677  * Remove reference to dquot from tree
678  */
679 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
680                        lustre_quota_version_t version)
681 {
682         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
683         struct lustre_mem_dqinfo *info =
684             &dquot->dq_info->qi_info[dquot->dq_type];
685         dqbuf_t buf = getdqbuf();
686         int ret = 0;
687         uint newblk;
688         u32 *ref = (u32 *) buf;
689
690         if (!buf)
691                 return -ENOMEM;
692         if ((ret = read_blk(filp, *blk, buf)) < 0) {
693                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
694                 goto out_buf;
695         }
696         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
697         if (depth == LUSTRE_DQTREEDEPTH - 1) {
698                 ret = free_dqentry(dquot, newblk, version);
699                 newblk = 0;
700         } else
701                 ret = remove_tree(dquot, &newblk, depth + 1, version);
702         if (ret >= 0 && !newblk) {
703                 int i;
704                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
705                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
706                         /* Block got empty? */ ;
707                 /* don't put the root block into free blk list! */
708                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
709                         put_free_dqblk(filp, info, buf, *blk);
710                         *blk = 0;
711                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
712                         CDEBUG(D_ERROR,
713                                "VFS: Can't write quota tree block %u.\n", *blk);
714         }
715 out_buf:
716         freedqbuf(buf);
717         return ret;
718 }
719
720 /**
721  * Delete dquot from tree
722  */
723 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
724                                 lustre_quota_version_t version)
725 {
726         uint tmp = LUSTRE_DQTREEOFF;
727
728         if (!dquot->dq_off)     /* Even not allocated? */
729                 return 0;
730         return remove_tree(dquot, &tmp, 0, version);
731 }
732
733 /**
734  * Find entry in block
735  */
736 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
737                                  lustre_quota_version_t version)
738 {
739         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
740         dqbuf_t buf = getdqbuf();
741         loff_t ret = 0;
742         int i;
743         struct lustre_disk_dqblk_v2 *ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
744         int dqblk_sz = lustre_disk_dqblk_sz[version];
745         int dqstrinblk = lustre_dqstrinblk[version];
746
747         LASSERT(version == LUSTRE_QUOTA_V2);
748
749         if (!buf)
750                 return -ENOMEM;
751         if ((ret = read_blk(filp, blk, buf)) < 0) {
752                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
753                 goto out_buf;
754         }
755         if (dquot->dq_id)
756                 for (i = 0; i < dqstrinblk && 
757                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
758                      i++) ;
759         else {                  /* ID 0 as a bit more complicated searching... */
760                 for (i = 0; i < dqstrinblk; i++)
761                         if (!le32_to_cpu(ddquot[i].dqb_id)
762                             && memcmp((char *)&emptydquot[version],
763                                       ddquot + i*dqblk_sz,
764                                       dqblk_sz))
765                                 break;
766         }
767         if (i == dqstrinblk) {
768                 CDEBUG(D_ERROR,
769                        "VFS: Quota for id %u referenced but not present.\n",
770                        dquot->dq_id);
771                 ret = -EIO;
772                 goto out_buf;
773         } else
774                 ret =
775                     (blk << LUSTRE_DQBLKSIZE_BITS) +
776                     sizeof(struct lustre_disk_dqdbheader) +
777                     i * dqblk_sz;
778 out_buf:
779         freedqbuf(buf);
780         return ret;
781 }
782
783 /**
784  * Find entry for given id in the tree
785  */
786 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
787                                 lustre_quota_version_t version)
788 {
789         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
790         dqbuf_t buf = getdqbuf();
791         loff_t ret = 0;
792         u32 *ref = (u32 *) buf;
793
794         if (!buf)
795                 return -ENOMEM;
796         if ((ret = read_blk(filp, blk, buf)) < 0) {
797                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
798                 goto out_buf;
799         }
800         ret = 0;
801         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
802         if (!blk)               /* No reference? */
803                 goto out_buf;
804         if (depth < LUSTRE_DQTREEDEPTH - 1)
805                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
806         else
807                 ret = find_block_dqentry(dquot, blk, version);
808 out_buf:
809         freedqbuf(buf);
810         return ret;
811 }
812
813 /**
814  * Find entry for given id in the tree - wrapper function
815  */
816 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
817                                   lustre_quota_version_t version)
818 {
819         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
820 }
821
822 int lustre_read_dquot(struct lustre_dquot *dquot)
823 {
824         int type = dquot->dq_type;
825         struct file *filp;
826         mm_segment_t fs;
827         loff_t offset;
828         int ret = 0, dqblk_sz;
829         lustre_quota_version_t version;
830
831         /* Invalidated quota? */
832         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
833                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
834                 return -EIO;
835         }
836
837         version = dquot->dq_info->qi_version;
838         LASSERT(version == LUSTRE_QUOTA_V2);
839         dqblk_sz = lustre_disk_dqblk_sz[version];
840
841         offset = find_dqentry(dquot, version);
842         if (offset <= 0) {      /* Entry not present? */
843                 if (offset < 0)
844                         CDEBUG(D_ERROR,
845                                "VFS: Can't read quota structure for id %u.\n",
846                                dquot->dq_id);
847                 dquot->dq_off = 0;
848                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
849                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
850                 ret = offset;
851         } else {
852                 char ddquot[dqblk_sz];
853
854                 dquot->dq_off = offset;
855                 fs = get_fs();
856                 set_fs(KERNEL_DS);
857                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
858                     dqblk_sz) {
859                         if (ret >= 0)
860                                 ret = -EIO;
861                         CDEBUG(D_ERROR,
862                                "VFS: Error while reading quota structure for id %u.\n",
863                                dquot->dq_id);
864                         memset(ddquot, 0, dqblk_sz);
865                 } else {
866                         ret = 0;
867                         /* We need to escape back all-zero structure */
868                         if (!memcmp((char *)&fakedquot[version],
869                                     ddquot, dqblk_sz))
870                                 ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime = cpu_to_le64(0);
871                 }
872                 set_fs(fs);
873                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
874         }
875
876         return ret;
877 }
878
879 /**
880  * Commit changes of dquot to disk - it might also mean deleting
881  * it when quota became fake.
882  */
883 int lustre_commit_dquot(struct lustre_dquot *dquot)
884 {
885         int rc = 0;
886         lustre_quota_version_t version = dquot->dq_info->qi_version;
887
888         /* always clear the flag so we don't loop on an IO error... */
889         clear_bit(DQ_MOD_B, &dquot->dq_flags);
890
891         /* The block/inode usage in admin quotafile isn't the real usage
892          * over all cluster, so keep the fake dquot entry on disk is
893          * meaningless, just remove it */
894         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
895                 rc = lustre_delete_dquot(dquot, version);
896         else
897                 rc = lustre_write_dquot(dquot, version);
898
899         if (rc < 0)
900                 return rc;
901
902         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
903                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
904
905         return rc;
906 }
907
908 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
909 {
910         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
911         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
912         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
913         struct lustre_disk_dqheader dqhead;
914         ssize_t size;
915         loff_t offset = 0;
916         struct file *fp = lqi->qi_files[type];
917         int rc = 0;
918
919         /* write quotafile header */
920         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
921                                        fake_magics[type] : quota_magics[type]);
922         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
923         size = fp->f_op->write(fp, (char *)&dqhead,
924                                sizeof(struct lustre_disk_dqheader), &offset);
925
926         if (size != sizeof(struct lustre_disk_dqheader)) {
927                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
928                 rc = size;
929         }
930
931         return rc;
932 }
933
934 /**
935  * We need to export this function to initialize quotafile, because we haven't
936  * user level check utility
937  */
938 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
939                                    int fakemagics)
940 {
941         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
942         int rc;
943
944         rc = lustre_init_quota_header(lqi, type, fakemagics);
945         if (rc)
946                 return rc;
947
948         /* write init quota info */
949         memset(dqinfo, 0, sizeof(*dqinfo));
950         dqinfo->dqi_bgrace = MAX_DQ_TIME;
951         dqinfo->dqi_igrace = MAX_IQ_TIME;
952         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
953
954         return lustre_write_quota_info(lqi, type);
955 }
956
957 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
958 {
959         return lustre_init_quota_info_generic(lqi, type, 0);
960 }
961
962 ssize_t quota_read(struct file *file, struct inode *inode, int type,
963                    uint blk, dqbuf_t buf)
964 {
965         if (file) {
966                 return read_blk(file, blk, buf);
967         } else {
968 #ifndef KERNEL_SUPPORTS_QUOTA_READ
969                 return -ENOTSUPP;
970 #else
971                 struct super_block *sb = inode->i_sb;
972                 memset(buf, 0, LUSTRE_DQBLKSIZE);
973                 return sb->s_op->quota_read(sb, type, (char *)buf,
974                                             LUSTRE_DQBLKSIZE, 
975                                             blk << LUSTRE_DQBLKSIZE_BITS);
976 #endif
977         }
978 }
979
980 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
981                               uint blk, struct list_head *list)
982 {
983         dqbuf_t buf = getdqbuf();
984         loff_t ret = 0;
985         struct lustre_disk_dqdbheader *dqhead =
986             (struct lustre_disk_dqdbheader *)buf;
987         struct dqblk *blk_item;
988         struct dqblk *pos;
989         struct list_head *tmp;
990
991         if (!buf)
992                 return -ENOMEM;
993         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
994                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
995                 goto out_buf;
996         }
997         ret = 0;
998
999         if (!le32_to_cpu(dqhead->dqdh_entries))
1000                 goto out_buf;
1001
1002         if (list_empty(list)) {
1003                 tmp = list;
1004                 goto done;
1005         }
1006
1007         list_for_each_entry(pos, list, link) {
1008                 if (blk == pos->blk)    /* we got this blk already */
1009                         goto out_buf;
1010                 if (blk > pos->blk)
1011                         continue;
1012                 break;
1013         }
1014         tmp = &pos->link;
1015 done:
1016         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1017         if (!blk_item) {
1018                 ret = -ENOMEM;
1019                 goto out_buf;
1020         }
1021         blk_item->blk = blk;
1022         INIT_LIST_HEAD(&blk_item->link);
1023
1024         list_add_tail(&blk_item->link, tmp);
1025
1026 out_buf:
1027         freedqbuf(buf);
1028         return ret;
1029 }
1030
1031 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1032                       uint blk, int depth, struct list_head *list)
1033 {
1034         dqbuf_t buf = getdqbuf();
1035         loff_t ret = 0;
1036         int index;
1037         u32 *ref = (u32 *) buf;
1038
1039         if (!buf)
1040                 return -ENOMEM;
1041         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1042                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1043                 goto out_buf;
1044         }
1045         ret = 0;
1046
1047         for (index = 0; index <= 0xff && !ret; index++) {
1048                 blk = le32_to_cpu(ref[index]);
1049                 if (!blk)       /* No reference */
1050                         continue;
1051
1052                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1053                         ret = walk_tree_dqentry(filp, inode, type, blk,
1054                                                 depth + 1, list);
1055                 else
1056                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1057         }
1058 out_buf:
1059         freedqbuf(buf);
1060         return ret;
1061 }
1062
1063 /**
1064  * Walk through the quota file (v2 format) to get all ids with quota limit
1065  */
1066 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1067                     struct list_head *list)
1068 {
1069         struct list_head blk_list;
1070         struct dqblk *blk_item, *tmp;
1071         dqbuf_t buf = NULL;
1072         struct lustre_disk_dqblk_v2 *ddquot;
1073         int rc;
1074         lustre_quota_version_t version;
1075
1076         ENTRY;
1077
1078         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1079                 version = LUSTRE_QUOTA_V2;
1080         else {
1081                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1082                 RETURN(-EINVAL);
1083         }
1084
1085         if (!list_empty(list)) {
1086                 CDEBUG(D_ERROR, "not empty list\n");
1087                 RETURN(-EINVAL);
1088         }
1089
1090         INIT_LIST_HEAD(&blk_list);
1091         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1092         if (rc) {
1093                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1094                 GOTO(out_free, rc);
1095         }
1096         if (list_empty(&blk_list))
1097                 RETURN(0);
1098
1099         buf = getdqbuf();
1100         if (!buf)
1101                 RETURN(-ENOMEM);
1102         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1103
1104         list_for_each_entry(blk_item, &blk_list, link) {
1105                 loff_t ret = 0;
1106                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1107
1108                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1109                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1110                         CDEBUG(D_ERROR,
1111                                "VFS: Can't read quota tree block %u.\n",
1112                                blk_item->blk);
1113                         GOTO(out_free, rc = ret);
1114                 }
1115
1116                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1117                         struct dquot_id *dqid;
1118                         /* skip empty entry */
1119                         if (!memcmp((char *)&emptydquot[version],
1120                                     ddquot + i*dqblk_sz, dqblk_sz))
1121                                 continue;
1122
1123                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
1124                         if (!dqid) 
1125                                 GOTO(out_free, rc = -ENOMEM);
1126
1127                         dqid->di_id = le32_to_cpu(ddquot[i].dqb_id);
1128                         INIT_LIST_HEAD(&dqid->di_link);
1129                         list_add(&dqid->di_link, list);
1130                 }
1131         }
1132
1133 out_free:
1134         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1135                 list_del_init(&blk_item->link);
1136                 kfree(blk_item);
1137         }
1138         if (buf)
1139                 freedqbuf(buf);
1140
1141         RETURN(rc);
1142 }
1143
1144
1145 EXPORT_SYMBOL(lustre_read_quota_info);
1146 EXPORT_SYMBOL(lustre_write_quota_info);
1147 EXPORT_SYMBOL(lustre_check_quota_file);
1148 EXPORT_SYMBOL(lustre_read_dquot);
1149 EXPORT_SYMBOL(lustre_commit_dquot);
1150 EXPORT_SYMBOL(lustre_init_quota_info);
1151 EXPORT_SYMBOL(lustre_get_qids);
1152 #endif