Whamcloud - gitweb
LU-56 libcfs: NUMA allocator and code cleanup
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lustre/lvfs/lustre_quota_fmt.c
35  *
36  * Lustre administrative quota format.
37  * from linux/fs/quota_v2.c
38  */
39
40 #ifndef EXPORT_SYMTAB
41 # define EXPORT_SYMTAB
42 #endif
43
44 #include <linux/errno.h>
45 #include <linux/fs.h>
46 #include <linux/mount.h>
47 #include <linux/kernel.h>
48 #include <linux/init.h>
49 #include <linux/module.h>
50 #include <linux/slab.h>
51 #ifdef HAVE_QUOTAIO_V1_H
52 # include <linux/quotaio_v1.h>
53 #endif
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 static const union
77 {
78         struct lustre_disk_dqblk_v2 r1;
79 } fakedquot[] = {
80         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
81 };
82
83 static const union
84 {
85         struct lustre_disk_dqblk_v2 r1;
86 } emptydquot[] = {
87         [LUSTRE_QUOTA_V2] = {.r1 = { 0 }}
88 };
89
90 extern void *lustre_quota_journal_start(struct inode *inode, int delete);
91 extern void lustre_quota_journal_stop(void *handle);
92 extern ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
93                                  char *buf, int count, loff_t pos);
94 extern ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos);
95
96 int check_quota_file(struct file *f, struct inode *inode, int type,
97                      lustre_quota_version_t version)
98 {
99         struct lustre_disk_dqheader dqhead;
100         ssize_t size;
101         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
102         const uint *quota_versions = lustre_initqversions[version];
103
104         size = lustre_read_quota(f, inode, type, (char *)&dqhead,
105                                  sizeof(struct lustre_disk_dqheader), 0);
106         if (size != sizeof(struct lustre_disk_dqheader))
107                 return -EINVAL;
108         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
109             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
110                 return -EINVAL;
111         return 0;
112 }
113
114 /**
115  * Check whether given file is really lustre admin quotafile
116  */
117 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
118 {
119         struct file *f = lqi->qi_files[type];
120         return check_quota_file(f, NULL, type, lqi->qi_version);
121 }
122
123 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
124 {
125         struct lustre_disk_dqinfo dinfo;
126         ssize_t size;
127
128         size = lustre_read_quota(f, NULL, 0, (char *)&dinfo,
129                                  sizeof(struct lustre_disk_dqinfo),
130                                  LUSTRE_DQINFOOFF);
131
132         if (size != sizeof(struct lustre_disk_dqinfo)) {
133                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
134                        f->f_vfsmnt->mnt_sb->s_id);
135                 return -EINVAL;
136         }
137         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
138         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
139         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
140         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
141         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
142         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
143         return 0;
144 }
145
146 /**
147  * Read information header from quota file
148  */
149 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
150 {
151         return lustre_read_quota_file_info(lqi->qi_files[type],
152                                            &lqi->qi_info[type]);
153 }
154
155 /**
156  * Write information header to quota file
157  */
158 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
159 {
160         struct lustre_disk_dqinfo dinfo;
161         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
162         struct file *f = lqi->qi_files[type];
163         ssize_t size;
164
165         info->dqi_flags &= ~DQF_INFO_DIRTY;
166         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
167         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
168         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
169         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
170         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
171         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
172
173         size = lustre_write_quota(f, (char *)&dinfo,
174                                   sizeof(struct lustre_disk_dqinfo),
175                                   LUSTRE_DQINFOOFF);
176
177         if (size != sizeof(struct lustre_disk_dqinfo)) {
178                 CDEBUG(D_WARNING, 
179                        "Can't write info structure on device %s.\n",
180                        f->f_vfsmnt->mnt_sb->s_id);
181                 return -1;
182         }
183         return 0;
184 }
185
186 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
187                  lustre_quota_version_t version)
188 {
189         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
190
191         LASSERT(version == LUSTRE_QUOTA_V2);
192
193         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
194         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
195         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
196         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
197         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
198         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
199         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
200         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
201 }
202
203 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
204                        qid_t id, lustre_quota_version_t version)
205 {
206         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
207
208         LASSERT(version == LUSTRE_QUOTA_V2);
209
210         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
211         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
212         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
213         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
214         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
215         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
216         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
217         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
218         dqblk->dqb_id = cpu_to_le32(id);
219
220         return 0;
221 }
222
223 dqbuf_t getdqbuf(void)
224 {
225         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
226         if (!buf)
227                 CDEBUG(D_WARNING, 
228                        "VFS: Not enough memory for quota buffers.\n");
229         return buf;
230 }
231
232 void freedqbuf(dqbuf_t buf)
233 {
234         kfree(buf);
235 }
236
237 ssize_t read_blk(struct file *filp, struct inode *inode, int type,
238                  uint blk, dqbuf_t buf)
239 {
240         ssize_t ret;
241
242         memset(buf, 0, LUSTRE_DQBLKSIZE);
243         ret = lustre_read_quota(filp, inode, type, (char *)buf, LUSTRE_DQBLKSIZE,
244                                 blk << LUSTRE_DQBLKSIZE_BITS);
245
246         /* Reading past EOF just returns a block of zeros */
247         if (ret == -EBADR)
248                 ret = 0;
249
250         return ret;
251 }
252
253 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
254 {
255         ssize_t ret;
256
257         ret = lustre_write_quota(filp, (char *)buf, LUSTRE_DQBLKSIZE,
258                                  blk << LUSTRE_DQBLKSIZE_BITS);
259
260         return ret;
261 }
262
263 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
264 {
265         cfs_set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
266 }
267
268 /**
269  * Remove empty block from list and return it
270  */
271 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
272 {
273         dqbuf_t buf = getdqbuf();
274         struct lustre_disk_dqdbheader *dh =
275             (struct lustre_disk_dqdbheader *)buf;
276         int ret, blk;
277
278         if (!buf)
279                 return -ENOMEM;
280         if (info->dqi_free_blk) {
281                 blk = info->dqi_free_blk;
282                 if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0)
283                         goto out_buf;
284                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
285         } else {
286                 memset(buf, 0, LUSTRE_DQBLKSIZE);
287                 /* Assure block allocation... */
288                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
289                         goto out_buf;
290                 blk = info->dqi_blocks++;
291         }
292         lustre_mark_info_dirty(info);
293         ret = blk;
294 out_buf:
295         freedqbuf(buf);
296         return ret;
297 }
298
299 /**
300  * Insert empty block to the list
301  */
302 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
303                    dqbuf_t buf, uint blk)
304 {
305         struct lustre_disk_dqdbheader *dh =
306             (struct lustre_disk_dqdbheader *)buf;
307         int err;
308
309         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
310         dh->dqdh_prev_free = cpu_to_le32(0);
311         dh->dqdh_entries = cpu_to_le16(0);
312         info->dqi_free_blk = blk;
313         lustre_mark_info_dirty(info);
314         if ((err = write_blk(filp, blk, buf)) < 0)
315                 /* Some strange block. We had better leave it... */
316                 return err;
317         return 0;
318 }
319
320 /**
321  * Remove given block from the list of blocks with free entries
322  */
323 int remove_free_dqentry(struct file *filp,
324                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
325                         uint blk)
326 {
327         dqbuf_t tmpbuf = getdqbuf();
328         struct lustre_disk_dqdbheader *dh =
329             (struct lustre_disk_dqdbheader *)buf;
330         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
331             le32_to_cpu(dh->dqdh_prev_free);
332         int err;
333
334         if (!tmpbuf)
335                 return -ENOMEM;
336         if (nextblk) {
337                 if ((err = read_blk(filp, NULL, 0, nextblk, tmpbuf)) < 0)
338                         goto out_buf;
339                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
340                     dh->dqdh_prev_free;
341                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
342                         goto out_buf;
343         }
344         if (prevblk) {
345                 if ((err = read_blk(filp, NULL, 0, prevblk, tmpbuf)) < 0)
346                         goto out_buf;
347                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
348                     dh->dqdh_next_free;
349                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
350                         goto out_buf;
351         } else {
352                 info->dqi_free_entry = nextblk;
353                 lustre_mark_info_dirty(info);
354         }
355         freedqbuf(tmpbuf);
356         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
357         if (write_blk(filp, blk, buf) < 0)
358                 /* No matter whether write succeeds block is out of list */
359                 CDEBUG(D_ERROR, 
360                        "VFS: Can't write block (%u) with free entries.\n", blk);
361         return 0;
362 out_buf:
363         freedqbuf(tmpbuf);
364         return err;
365 }
366
367 /**
368  * Insert given block to the beginning of list with free entries
369  */
370 int insert_free_dqentry(struct file *filp,
371                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
372                         uint blk)
373 {
374         dqbuf_t tmpbuf = getdqbuf();
375         struct lustre_disk_dqdbheader *dh =
376             (struct lustre_disk_dqdbheader *)buf;
377         int err;
378
379         if (!tmpbuf)
380                 return -ENOMEM;
381         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
382         dh->dqdh_prev_free = cpu_to_le32(0);
383         if ((err = write_blk(filp, blk, buf)) < 0)
384                 goto out_buf;
385         if (info->dqi_free_entry) {
386                 if ((err = read_blk(filp, NULL, 0, info->dqi_free_entry, tmpbuf)) < 0)
387                         goto out_buf;
388                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
389                     cpu_to_le32(blk);
390                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
391                         goto out_buf;
392         }
393         freedqbuf(tmpbuf);
394         info->dqi_free_entry = blk;
395         lustre_mark_info_dirty(info);
396         return 0;
397 out_buf:
398         freedqbuf(tmpbuf);
399         return err;
400 }
401
402
403
404 /**
405  * Find space for dquot
406  */
407 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
408                               lustre_quota_version_t version)
409 {
410         struct lustre_quota_info *lqi = dquot->dq_info;
411         struct file *filp = lqi->qi_files[dquot->dq_type];
412         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
413         uint blk, i;
414         struct lustre_disk_dqdbheader *dh;
415         void *ddquot;
416         int dqblk_sz = lustre_disk_dqblk_sz[version];
417         int dqstrinblk = lustre_dqstrinblk[version];
418         dqbuf_t buf;
419
420         *err = 0;
421         if (!(buf = getdqbuf())) {
422                 *err = -ENOMEM;
423                 return 0;
424         }
425         dh = (struct lustre_disk_dqdbheader *)buf;
426         ddquot = GETENTRIES(buf, version);
427         if (info->dqi_free_entry) {
428                 blk = info->dqi_free_entry;
429                 if ((*err = read_blk(filp, NULL, 0, blk, buf)) < 0)
430                         goto out_buf;
431         } else {
432                 blk = get_free_dqblk(filp, info);
433                 if ((int)blk < 0) {
434                         *err = blk;
435                         freedqbuf(buf);
436                         return 0;
437                 }
438                 memset(buf, 0, LUSTRE_DQBLKSIZE);
439                 info->dqi_free_entry = blk; /* This is enough as block is 
440                                                already zeroed and entry list
441                                                is empty... */
442                 lustre_mark_info_dirty(info);
443         }
444
445         /* Will block be full */
446         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
447                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
448                         CDEBUG(D_ERROR, 
449                                "VFS: find_free_dqentry(): Can't remove block "
450                                "(%u) from entry free list.\n", blk);
451                         goto out_buf;
452                 }
453         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
454         /* Find free structure in block */
455         for (i = 0; i < dqstrinblk &&
456              memcmp((char *)&emptydquot[version],
457                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
458              i++);
459
460         if (i == dqstrinblk) {
461                 CDEBUG(D_ERROR, 
462                        "VFS: find_free_dqentry(): Data block full but it "
463                        "shouldn't.\n");
464                 *err = -EIO;
465                 goto out_buf;
466         }
467
468         if ((*err = write_blk(filp, blk, buf)) < 0) {
469                 CDEBUG(D_ERROR,
470                        "VFS: find_free_dqentry(): Can't write quota data "
471                        "block %u.\n", blk);
472                 goto out_buf;
473         }
474         dquot->dq_off =
475             (blk << LUSTRE_DQBLKSIZE_BITS) +
476             sizeof(struct lustre_disk_dqdbheader) +
477             i * dqblk_sz;
478         freedqbuf(buf);
479         return blk;
480 out_buf:
481         freedqbuf(buf);
482         return 0;
483 }
484
485 /**
486  * Insert reference to structure into the trie
487  */
488 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
489                           lustre_quota_version_t version)
490 {
491         struct lustre_quota_info *lqi = dquot->dq_info;
492         struct file *filp = lqi->qi_files[dquot->dq_type];
493         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
494         dqbuf_t buf;
495         int ret = 0, newson = 0, newact = 0;
496         u32 *ref;
497         uint newblk;
498
499         if (!(buf = getdqbuf()))
500                 return -ENOMEM;
501         if (!*treeblk) {
502                 ret = get_free_dqblk(filp, info);
503                 if (ret < 0)
504                         goto out_buf;
505                 *treeblk = ret;
506                 memset(buf, 0, LUSTRE_DQBLKSIZE);
507                 newact = 1;
508         } else {
509                 if ((ret = read_blk(filp, NULL, 0, *treeblk, buf)) < 0) {
510                         CERROR("VFS: Can't read tree quota block %u.\n",
511                                *treeblk);
512                         goto out_buf;
513                 }
514         }
515         ref = (u32 *) buf;
516         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
517         if (!newblk)
518                 newson = 1;
519         if (depth == LUSTRE_DQTREEDEPTH - 1) {
520
521                 if (newblk) {
522                         CDEBUG(D_ERROR, 
523                                "VFS: Inserting already present quota entry "
524                                "(block %u).\n",
525                                ref[GETIDINDEX(dquot->dq_id, depth)]);
526                         ret = -EIO;
527                         goto out_buf;
528                 }
529
530                 newblk = find_free_dqentry(dquot, &ret, version);
531         } else
532                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
533         if (newson && ret >= 0) {
534                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
535                 ret = write_blk(filp, *treeblk, buf);
536         } else if (newact && ret < 0)
537                 put_free_dqblk(filp, info, buf, *treeblk);
538 out_buf:
539         freedqbuf(buf);
540         return ret;
541 }
542
543 /**
544  * Wrapper for inserting quota structure into tree
545  */
546 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
547                                  lustre_quota_version_t version)
548 {
549         int tmp = LUSTRE_DQTREEOFF;
550         return do_insert_tree(dquot, &tmp, 0, version);
551 }
552
553 /**
554  * We don't have to be afraid of deadlocks as we never have quotas on
555  * quota files...
556  */
557 static int lustre_write_dquot(struct lustre_dquot *dquot, 
558                               lustre_quota_version_t version)
559 {
560         int type = dquot->dq_type;
561         struct file *filp;
562         loff_t offset;
563         ssize_t ret;
564         int dqblk_sz = lustre_disk_dqblk_sz[version];
565         struct lustre_disk_dqblk_v2 ddquot;
566
567         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
568         if (ret < 0)
569                 return ret;
570
571         if (!dquot->dq_off)
572                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
573                         CDEBUG(D_ERROR,
574                                "VFS: Error %Zd occurred while creating "
575                                "quota.\n", ret);
576                         return ret;
577                 }
578         filp = dquot->dq_info->qi_files[type];
579         offset = dquot->dq_off;
580         /* Argh... We may need to write structure full of zeroes but that would
581          * be treated as an empty place by the rest of the code. Format change
582          * would be definitely cleaner but the problems probably are not worth
583          * it */
584         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
585                 ddquot.dqb_itime = cpu_to_le64(1);
586
587         ret = lustre_write_quota(filp, (char *)&ddquot, dqblk_sz, offset);
588         if (ret != dqblk_sz) {
589                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
590                        filp->f_dentry->d_sb->s_id);
591                 if (ret >= 0)
592                         ret = -ENOSPC;
593         } else
594                 ret = 0;
595
596         return ret;
597 }
598
599 /**
600  * Free dquot entry in data block
601  */
602 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
603                         lustre_quota_version_t version)
604 {
605         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
606         struct lustre_mem_dqinfo *info =
607             &dquot->dq_info->qi_info[dquot->dq_type];
608         struct lustre_disk_dqdbheader *dh;
609         dqbuf_t buf = getdqbuf();
610         int dqstrinblk = lustre_dqstrinblk[version];
611         int ret = 0;
612
613         if (!buf)
614                 return -ENOMEM;
615         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
616                 CDEBUG(D_ERROR,
617                        "VFS: Quota structure has offset to other block (%u) "
618                        "than it should (%u).\n",
619                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
620                 goto out_buf;
621         }
622         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
623                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
624                 goto out_buf;
625         }
626         dh = (struct lustre_disk_dqdbheader *)buf;
627         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
628         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
629                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
630                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
631                         CDEBUG(D_ERROR,
632                                "VFS: Can't move quota data block (%u) to free "
633                                "list.\n", blk);
634                         goto out_buf;
635                 }
636         } else {
637                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
638                        0, lustre_disk_dqblk_sz[version]);
639                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
640                         /* Insert will write block itself */
641                         if ((ret =
642                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
643                                 CDEBUG(D_ERROR,
644                                        "VFS: Can't insert quota data block "
645                                        "(%u) to free entry list.\n", blk);
646                                 goto out_buf;
647                         }
648                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
649                         CDEBUG(D_ERROR,
650                                "VFS: Can't write quota data block %u\n", blk);
651                         goto out_buf;
652                 }
653         }
654         dquot->dq_off = 0;      /* Quota is now unattached */
655 out_buf:
656         freedqbuf(buf);
657         return ret;
658 }
659
660 /**
661  * Remove reference to dquot from tree
662  */
663 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
664                        lustre_quota_version_t version)
665 {
666         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
667         struct lustre_mem_dqinfo *info =
668             &dquot->dq_info->qi_info[dquot->dq_type];
669         dqbuf_t buf = getdqbuf();
670         int ret = 0;
671         uint newblk;
672         u32 *ref = (u32 *) buf;
673
674         if (!buf)
675                 return -ENOMEM;
676         if ((ret = read_blk(filp, NULL, 0, *blk, buf)) < 0) {
677                 CERROR("VFS: Can't read quota data block %u\n", *blk);
678                 goto out_buf;
679         }
680         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
681         if (depth == LUSTRE_DQTREEDEPTH - 1) {
682                 ret = free_dqentry(dquot, newblk, version);
683                 newblk = 0;
684         } else
685                 ret = remove_tree(dquot, &newblk, depth + 1, version);
686         if (ret >= 0 && !newblk) {
687                 int i;
688                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
689                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
690                         /* Block got empty? */ ;
691                 /* don't put the root block into free blk list! */
692                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
693                         put_free_dqblk(filp, info, buf, *blk);
694                         *blk = 0;
695                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
696                         CDEBUG(D_ERROR,
697                                "VFS: Can't write quota tree block %u.\n", *blk);
698         }
699 out_buf:
700         freedqbuf(buf);
701         return ret;
702 }
703
704 /**
705  * Delete dquot from tree
706  */
707 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
708                                 lustre_quota_version_t version)
709 {
710         uint tmp = LUSTRE_DQTREEOFF;
711
712         if (!dquot->dq_off)     /* Even not allocated? */
713                 return 0;
714         return remove_tree(dquot, &tmp, 0, version);
715 }
716
717 /**
718  * Find entry in block
719  */
720 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
721                                  lustre_quota_version_t version)
722 {
723         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
724         dqbuf_t buf = getdqbuf();
725         loff_t ret = 0;
726         int i;
727         struct lustre_disk_dqblk_v2 *ddquot =
728                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
729         int dqblk_sz = lustre_disk_dqblk_sz[version];
730         int dqstrinblk = lustre_dqstrinblk[version];
731
732         LASSERT(version == LUSTRE_QUOTA_V2);
733
734         if (!buf)
735                 return -ENOMEM;
736         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
737                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
738                 goto out_buf;
739         }
740         if (dquot->dq_id)
741                 for (i = 0; i < dqstrinblk && 
742                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
743                      i++) ;
744         else {                  /* ID 0 as a bit more complicated searching... */
745                 for (i = 0; i < dqstrinblk; i++)
746                         if (!le32_to_cpu(ddquot[i].dqb_id)
747                             && memcmp((char *)&emptydquot[version],
748                                       (char *)&ddquot[i], dqblk_sz))
749                                 break;
750         }
751         if (i == dqstrinblk) {
752                 CDEBUG(D_ERROR,
753                        "VFS: Quota for id %u referenced but not present.\n",
754                        dquot->dq_id);
755                 ret = -EIO;
756                 goto out_buf;
757         } else
758                 ret =
759                     (blk << LUSTRE_DQBLKSIZE_BITS) +
760                     sizeof(struct lustre_disk_dqdbheader) +
761                     i * dqblk_sz;
762 out_buf:
763         freedqbuf(buf);
764         return ret;
765 }
766
767 /**
768  * Find entry for given id in the tree
769  */
770 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
771                                 lustre_quota_version_t version)
772 {
773         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
774         dqbuf_t buf = getdqbuf();
775         loff_t ret = 0;
776         u32 *ref = (u32 *) buf;
777
778         if (!buf)
779                 return -ENOMEM;
780         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
781                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
782                 goto out_buf;
783         }
784         ret = 0;
785         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
786         if (!blk)               /* No reference? */
787                 goto out_buf;
788         if (depth < LUSTRE_DQTREEDEPTH - 1)
789                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
790         else
791                 ret = find_block_dqentry(dquot, blk, version);
792 out_buf:
793         freedqbuf(buf);
794         return ret;
795 }
796
797 /**
798  * Find entry for given id in the tree - wrapper function
799  */
800 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
801                                   lustre_quota_version_t version)
802 {
803         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
804 }
805
806 int lustre_read_dquot(struct lustre_dquot *dquot)
807 {
808         int type = dquot->dq_type;
809         struct file *filp;
810         loff_t offset;
811         int ret = 0, dqblk_sz;
812         lustre_quota_version_t version;
813
814         /* Invalidated quota? */
815         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
816                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
817                 return -ESRCH;
818         }
819
820         version = dquot->dq_info->qi_version;
821         LASSERT(version == LUSTRE_QUOTA_V2);
822         dqblk_sz = lustre_disk_dqblk_sz[version];
823
824         offset = find_dqentry(dquot, version);
825         if (offset <= 0) {      /* Entry not present? */
826                 if (offset < 0)
827                         CDEBUG(D_ERROR,
828                                "VFS: Can't read quota structure for id %u.\n",
829                                dquot->dq_id);
830                 dquot->dq_off = 0;
831                 cfs_set_bit(DQ_FAKE_B, &dquot->dq_flags);
832                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
833                 ret = offset;
834         } else {
835                 struct lustre_disk_dqblk_v2 ddquot;
836
837                 dquot->dq_off = offset;
838                 if ((ret = lustre_read_quota(filp, NULL, type, (char *)&ddquot,
839                                              dqblk_sz, offset)) != dqblk_sz) {
840                         if (ret >= 0)
841                                 ret = -EIO;
842                         CDEBUG(D_ERROR,
843                                "VFS: Error while reading quota structure for id "
844                                "%u.\n", dquot->dq_id);
845                         memset((char *)&ddquot, 0, dqblk_sz);
846                 } else {
847                         ret = 0;
848                         /* We need to escape back all-zero structure */
849                         if (!memcmp((char *)&fakedquot[version],
850                                     (char *)&ddquot, dqblk_sz))
851                                 ddquot.dqb_itime = cpu_to_le64(0);
852                 }
853                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
854         }
855
856         return ret;
857 }
858
859 /**
860  * Commit changes of dquot to disk - it might also mean deleting
861  * it when quota became fake.
862  */
863 int lustre_commit_dquot(struct lustre_dquot *dquot)
864 {
865         int rc = 0;
866         lustre_quota_version_t version = dquot->dq_info->qi_version;
867         void *handle;
868         struct inode *inode = dquot->dq_info->qi_files[dquot->dq_type]->f_dentry->d_inode;
869         int delete = 0;
870
871         /* always clear the flag so we don't loop on an IO error... */
872         cfs_clear_bit(DQ_MOD_B, &dquot->dq_flags);
873
874         /* The block/inode usage in admin quotafile isn't the real usage
875          * over all cluster, so keep the fake dquot entry on disk is
876          * meaningless, just remove it */
877         if (cfs_test_bit(DQ_FAKE_B, &dquot->dq_flags))
878                 delete = 1;
879         handle = lustre_quota_journal_start(inode, delete);
880         if (unlikely(IS_ERR(handle))) {
881                 rc = PTR_ERR(handle);
882                 CERROR("fail to lustre_quota_journal_start: rc = %d\n", rc);
883                 return rc;
884         }
885
886         if (delete)
887                 rc = lustre_delete_dquot(dquot, version);
888         else
889                 rc = lustre_write_dquot(dquot, version);
890         lustre_quota_journal_stop(handle);
891
892         if (rc < 0)
893                 return rc;
894
895         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
896                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
897
898         return rc;
899 }
900
901 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
902                              int fakemagics)
903 {
904         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
905         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
906         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
907         struct lustre_disk_dqheader dqhead;
908         ssize_t size;
909         struct file *fp = lqi->qi_files[type];
910         int rc = 0;
911
912         /* write quotafile header */
913         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
914                                        fake_magics[type] : quota_magics[type]);
915         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
916         size = lustre_write_quota(fp, (char *)&dqhead,
917                                   sizeof(struct lustre_disk_dqheader), 0);
918
919         if (size != sizeof(struct lustre_disk_dqheader)) {
920                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
921                 rc = size;
922         }
923
924         return rc;
925 }
926
927 /**
928  * We need to export this function to initialize quotafile, because we haven't
929  * user level check utility
930  */
931 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
932                                    int fakemagics)
933 {
934         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
935         int rc;
936
937         rc = lustre_init_quota_header(lqi, type, fakemagics);
938         if (rc)
939                 return rc;
940
941         /* write init quota info */
942         memset(dqinfo, 0, sizeof(*dqinfo));
943         dqinfo->dqi_bgrace = MAX_DQ_TIME;
944         dqinfo->dqi_igrace = MAX_IQ_TIME;
945         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
946
947         return lustre_write_quota_info(lqi, type);
948 }
949
950 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
951 {
952         return lustre_init_quota_info_generic(lqi, type, 0);
953 }
954
955 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
956                               uint blk, cfs_list_t *list)
957 {
958         dqbuf_t buf = getdqbuf();
959         loff_t ret = 0;
960         struct lustre_disk_dqdbheader *dqhead =
961             (struct lustre_disk_dqdbheader *)buf;
962         struct dqblk *blk_item;
963         struct dqblk *pos;
964         cfs_list_t *tmp;
965
966         if (!buf)
967                 return -ENOMEM;
968         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
969                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
970                 goto out_buf;
971         }
972         ret = 0;
973
974         if (!le32_to_cpu(dqhead->dqdh_entries))
975                 goto out_buf;
976
977         if (cfs_list_empty(list)) {
978                 tmp = list;
979                 goto done;
980         }
981
982         cfs_list_for_each_entry(pos, list, link) {
983                 if (blk == pos->blk)    /* we got this blk already */
984                         goto out_buf;
985                 if (blk > pos->blk)
986                         continue;
987                 break;
988         }
989         tmp = &pos->link;
990 done:
991         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
992         if (!blk_item) {
993                 ret = -ENOMEM;
994                 goto out_buf;
995         }
996         blk_item->blk = blk;
997         CFS_INIT_LIST_HEAD(&blk_item->link);
998
999         cfs_list_add_tail(&blk_item->link, tmp);
1000
1001 out_buf:
1002         freedqbuf(buf);
1003         return ret;
1004 }
1005
1006 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1007                       uint blk, int depth, cfs_list_t *list)
1008 {
1009         dqbuf_t buf = getdqbuf();
1010         loff_t ret = 0;
1011         int index;
1012         u32 *ref = (u32 *) buf;
1013
1014         if (!buf)
1015                 return -ENOMEM;
1016         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
1017                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1018                 goto out_buf;
1019         }
1020         ret = 0;
1021
1022         for (index = 0; index <= 0xff && !ret; index++) {
1023                 blk = le32_to_cpu(ref[index]);
1024                 if (!blk)       /* No reference */
1025                         continue;
1026
1027                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1028                         ret = walk_tree_dqentry(filp, inode, type, blk,
1029                                                 depth + 1, list);
1030                 else
1031                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1032         }
1033 out_buf:
1034         freedqbuf(buf);
1035         return ret;
1036 }
1037
1038 /**
1039  * Walk through the quota file (v2 format) to get all ids with quota limit
1040  */
1041 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1042                     cfs_list_t *list)
1043 {
1044         cfs_list_t blk_list;
1045         struct dqblk *blk_item, *tmp;
1046         dqbuf_t buf = NULL;
1047         struct lustre_disk_dqblk_v2 *ddquot;
1048         int rc;
1049         lustre_quota_version_t version;
1050
1051         ENTRY;
1052
1053         LASSERT(ergo(fp == NULL, inode != NULL));
1054
1055         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1056                 version = LUSTRE_QUOTA_V2;
1057         else {
1058                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1059                 RETURN(-EINVAL);
1060         }
1061
1062         if (!cfs_list_empty(list)) {
1063                 CDEBUG(D_ERROR, "not empty list\n");
1064                 RETURN(-EINVAL);
1065         }
1066
1067         CFS_INIT_LIST_HEAD(&blk_list);
1068         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1069         if (rc) {
1070                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1071                 GOTO(out_free, rc);
1072         }
1073         if (cfs_list_empty(&blk_list))
1074                 RETURN(0);
1075
1076         buf = getdqbuf();
1077         if (!buf)
1078                 RETURN(-ENOMEM);
1079         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1080
1081         cfs_list_for_each_entry(blk_item, &blk_list, link) {
1082                 loff_t ret = 0;
1083                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1084
1085                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1086                 if ((ret = read_blk(fp, inode, type, blk_item->blk, buf)) < 0) {
1087                         CERROR("VFS: Can't read quota tree block %u.\n",
1088                                blk_item->blk);
1089                         GOTO(out_free, rc = ret);
1090                 }
1091
1092                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1093                         struct dquot_id *dqid;
1094                         /* skip empty entry */
1095                         if (!memcmp((char *)&emptydquot[version],
1096                                     (char *)&ddquot[i], dqblk_sz))
1097                                 continue;
1098
1099                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1100                         if (!dqid)
1101                                 GOTO(out_free, rc = -ENOMEM);
1102
1103                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1104                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1105                                          QI_SET : 0;
1106                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1107                                          QB_SET : 0;
1108
1109                         CFS_INIT_LIST_HEAD(&dqid->di_link);
1110                         cfs_list_add(&dqid->di_link, list);
1111                 }
1112         }
1113
1114 out_free:
1115         cfs_list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1116                 cfs_list_del_init(&blk_item->link);
1117                 kfree(blk_item);
1118         }
1119         if (buf)
1120                 freedqbuf(buf);
1121
1122         RETURN(rc);
1123 }
1124
1125
1126 EXPORT_SYMBOL(lustre_read_quota_info);
1127 EXPORT_SYMBOL(lustre_write_quota_info);
1128 EXPORT_SYMBOL(lustre_check_quota_file);
1129 EXPORT_SYMBOL(lustre_read_dquot);
1130 EXPORT_SYMBOL(lustre_commit_dquot);
1131 EXPORT_SYMBOL(lustre_init_quota_info);
1132 EXPORT_SYMBOL(lustre_get_qids);
1133 #endif