Whamcloud - gitweb
b=16466 add the support for journaled admin quota file
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #ifdef HAVE_QUOTAIO_V1_H
54 # include <linux/quotaio_v1.h>
55 #endif
56
57 #include <asm/byteorder.h>
58 #include <asm/uaccess.h>
59
60 #include <lustre_quota.h>
61 #include <obd_support.h>
62 #include "lustre_quota_fmt.h"
63
64 #ifdef HAVE_QUOTA_SUPPORT
65
66 static const uint lustre_initqversions[][MAXQUOTAS] = {
67         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
68 };
69
70 static const int lustre_dqstrinblk[] = {
71         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
72 };
73
74 static const int lustre_disk_dqblk_sz[] = {
75         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
76 };
77
78 static const union
79 {
80         struct lustre_disk_dqblk_v2 r1;
81 } fakedquot[] = {
82         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
83 };
84
85 static const union
86 {
87         struct lustre_disk_dqblk_v2 r1;
88 } emptydquot[] = {
89         [LUSTRE_QUOTA_V2] = {.r1 = { 0 }}
90 };
91
92 extern void *lustre_quota_journal_start(struct inode *inode, int delete);
93 extern void lustre_quota_journal_stop(void *handle);
94 extern ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
95                                  char *buf, int count, loff_t pos);
96 extern ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos);
97
98 int check_quota_file(struct file *f, struct inode *inode, int type,
99                      lustre_quota_version_t version)
100 {
101         struct lustre_disk_dqheader dqhead;
102         ssize_t size;
103         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
104         const uint *quota_versions = lustre_initqversions[version];
105
106         if (!f && !inode) {
107                 CERROR("check_quota_file failed!\n");
108                 libcfs_debug_dumpstack(NULL);
109                 return -EINVAL;
110         }
111
112         size = lustre_read_quota(f, inode, type, (char *)&dqhead,
113                                  sizeof(struct lustre_disk_dqheader), 0);
114
115         if (size != sizeof(struct lustre_disk_dqheader))
116                 return -EINVAL;
117         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
118             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
119                 return -EINVAL;
120         return 0;
121 }
122
123 /**
124  * Check whether given file is really lustre admin quotafile
125  */
126 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
127 {
128         struct file *f = lqi->qi_files[type];
129         return check_quota_file(f, NULL, type, lqi->qi_version);
130 }
131
132 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
133 {
134         struct lustre_disk_dqinfo dinfo;
135         ssize_t size;
136
137         size = lustre_read_quota(f, NULL, 0, (char *)&dinfo,
138                                  sizeof(struct lustre_disk_dqinfo),
139                                  LUSTRE_DQINFOOFF);
140
141         if (size != sizeof(struct lustre_disk_dqinfo)) {
142                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
143                        f->f_vfsmnt->mnt_sb->s_id);
144                 return -EINVAL;
145         }
146         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
147         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
148         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
149         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
150         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
151         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
152         return 0;
153 }
154
155 /**
156  * Read information header from quota file
157  */
158 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
159 {
160         return lustre_read_quota_file_info(lqi->qi_files[type],
161                                            &lqi->qi_info[type]);
162 }
163
164 /**
165  * Write information header to quota file
166  */
167 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
168 {
169         struct lustre_disk_dqinfo dinfo;
170         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
171         struct file *f = lqi->qi_files[type];
172         ssize_t size;
173
174         info->dqi_flags &= ~DQF_INFO_DIRTY;
175         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
176         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
177         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
178         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
179         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
180         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
181
182         size = lustre_write_quota(f, (char *)&dinfo,
183                                   sizeof(struct lustre_disk_dqinfo),
184                                   LUSTRE_DQINFOOFF);
185
186         if (size != sizeof(struct lustre_disk_dqinfo)) {
187                 CDEBUG(D_WARNING, 
188                        "Can't write info structure on device %s.\n",
189                        f->f_vfsmnt->mnt_sb->s_id);
190                 return -1;
191         }
192         return 0;
193 }
194
195 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
196                  lustre_quota_version_t version)
197 {
198         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
199
200         LASSERT(version == LUSTRE_QUOTA_V2);
201
202         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
203         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
204         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
205         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
206         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
207         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
208         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
209         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
210 }
211
212 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
213                        qid_t id, lustre_quota_version_t version)
214 {
215         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
216
217         LASSERT(version == LUSTRE_QUOTA_V2);
218
219         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
220         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
221         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
222         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
223         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
224         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
225         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
226         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
227         dqblk->dqb_id = cpu_to_le32(id);
228
229         return 0;
230 }
231
232 dqbuf_t getdqbuf(void)
233 {
234         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
235         if (!buf)
236                 CDEBUG(D_WARNING, 
237                        "VFS: Not enough memory for quota buffers.\n");
238         return buf;
239 }
240
241 void freedqbuf(dqbuf_t buf)
242 {
243         kfree(buf);
244 }
245
246 ssize_t read_blk(struct file *filp, struct inode *inode, int type,
247                  uint blk, dqbuf_t buf)
248 {
249         ssize_t ret;
250
251         memset(buf, 0, LUSTRE_DQBLKSIZE);
252         ret = lustre_read_quota(filp, inode, type, (char *)buf, LUSTRE_DQBLKSIZE,
253                                 blk << LUSTRE_DQBLKSIZE_BITS);
254
255         /* Reading past EOF just returns a block of zeros */
256         if (ret == -EBADR)
257                 ret = 0;
258
259         return ret;
260 }
261
262 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
263 {
264         ssize_t ret;
265
266         ret = lustre_write_quota(filp, (char *)buf, LUSTRE_DQBLKSIZE,
267                                  blk << LUSTRE_DQBLKSIZE_BITS);
268
269         return ret;
270 }
271
272 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
273 {
274         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
275 }
276
277 /**
278  * Remove empty block from list and return it
279  */
280 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
281 {
282         dqbuf_t buf = getdqbuf();
283         struct lustre_disk_dqdbheader *dh =
284             (struct lustre_disk_dqdbheader *)buf;
285         int ret, blk;
286
287         if (!buf)
288                 return -ENOMEM;
289         if (info->dqi_free_blk) {
290                 blk = info->dqi_free_blk;
291                 if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0)
292                         goto out_buf;
293                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
294         } else {
295                 memset(buf, 0, LUSTRE_DQBLKSIZE);
296                 /* Assure block allocation... */
297                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
298                         goto out_buf;
299                 blk = info->dqi_blocks++;
300         }
301         lustre_mark_info_dirty(info);
302         ret = blk;
303 out_buf:
304         freedqbuf(buf);
305         return ret;
306 }
307
308 /**
309  * Insert empty block to the list
310  */
311 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
312                    dqbuf_t buf, uint blk)
313 {
314         struct lustre_disk_dqdbheader *dh =
315             (struct lustre_disk_dqdbheader *)buf;
316         int err;
317
318         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
319         dh->dqdh_prev_free = cpu_to_le32(0);
320         dh->dqdh_entries = cpu_to_le16(0);
321         info->dqi_free_blk = blk;
322         lustre_mark_info_dirty(info);
323         if ((err = write_blk(filp, blk, buf)) < 0)
324                 /* Some strange block. We had better leave it... */
325                 return err;
326         return 0;
327 }
328
329 /**
330  * Remove given block from the list of blocks with free entries
331  */
332 int remove_free_dqentry(struct file *filp,
333                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
334                         uint blk)
335 {
336         dqbuf_t tmpbuf = getdqbuf();
337         struct lustre_disk_dqdbheader *dh =
338             (struct lustre_disk_dqdbheader *)buf;
339         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
340             le32_to_cpu(dh->dqdh_prev_free);
341         int err;
342
343         if (!tmpbuf)
344                 return -ENOMEM;
345         if (nextblk) {
346                 if ((err = read_blk(filp, NULL, 0, nextblk, tmpbuf)) < 0)
347                         goto out_buf;
348                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
349                     dh->dqdh_prev_free;
350                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
351                         goto out_buf;
352         }
353         if (prevblk) {
354                 if ((err = read_blk(filp, NULL, 0, prevblk, tmpbuf)) < 0)
355                         goto out_buf;
356                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
357                     dh->dqdh_next_free;
358                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
359                         goto out_buf;
360         } else {
361                 info->dqi_free_entry = nextblk;
362                 lustre_mark_info_dirty(info);
363         }
364         freedqbuf(tmpbuf);
365         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
366         if (write_blk(filp, blk, buf) < 0)
367                 /* No matter whether write succeeds block is out of list */
368                 CDEBUG(D_ERROR, 
369                        "VFS: Can't write block (%u) with free entries.\n", blk);
370         return 0;
371 out_buf:
372         freedqbuf(tmpbuf);
373         return err;
374 }
375
376 /**
377  * Insert given block to the beginning of list with free entries
378  */
379 int insert_free_dqentry(struct file *filp,
380                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
381                         uint blk)
382 {
383         dqbuf_t tmpbuf = getdqbuf();
384         struct lustre_disk_dqdbheader *dh =
385             (struct lustre_disk_dqdbheader *)buf;
386         int err;
387
388         if (!tmpbuf)
389                 return -ENOMEM;
390         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
391         dh->dqdh_prev_free = cpu_to_le32(0);
392         if ((err = write_blk(filp, blk, buf)) < 0)
393                 goto out_buf;
394         if (info->dqi_free_entry) {
395                 if ((err = read_blk(filp, NULL, 0, info->dqi_free_entry, tmpbuf)) < 0)
396                         goto out_buf;
397                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
398                     cpu_to_le32(blk);
399                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
400                         goto out_buf;
401         }
402         freedqbuf(tmpbuf);
403         info->dqi_free_entry = blk;
404         lustre_mark_info_dirty(info);
405         return 0;
406 out_buf:
407         freedqbuf(tmpbuf);
408         return err;
409 }
410
411
412
413 /**
414  * Find space for dquot
415  */
416 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
417                               lustre_quota_version_t version)
418 {
419         struct lustre_quota_info *lqi = dquot->dq_info;
420         struct file *filp = lqi->qi_files[dquot->dq_type];
421         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
422         uint blk, i;
423         struct lustre_disk_dqdbheader *dh;
424         void *ddquot;
425         int dqblk_sz = lustre_disk_dqblk_sz[version];
426         int dqstrinblk = lustre_dqstrinblk[version];
427         dqbuf_t buf;
428
429         *err = 0;
430         if (!(buf = getdqbuf())) {
431                 *err = -ENOMEM;
432                 return 0;
433         }
434         dh = (struct lustre_disk_dqdbheader *)buf;
435         ddquot = GETENTRIES(buf, version);
436         if (info->dqi_free_entry) {
437                 blk = info->dqi_free_entry;
438                 if ((*err = read_blk(filp, NULL, 0, blk, buf)) < 0)
439                         goto out_buf;
440         } else {
441                 blk = get_free_dqblk(filp, info);
442                 if ((int)blk < 0) {
443                         *err = blk;
444                         freedqbuf(buf);
445                         return 0;
446                 }
447                 memset(buf, 0, LUSTRE_DQBLKSIZE);
448                 info->dqi_free_entry = blk; /* This is enough as block is 
449                                                already zeroed and entry list
450                                                is empty... */
451                 lustre_mark_info_dirty(info);
452         }
453
454         /* Will block be full */
455         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
456                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
457                         CDEBUG(D_ERROR, 
458                                "VFS: find_free_dqentry(): Can't remove block "
459                                "(%u) from entry free list.\n", blk);
460                         goto out_buf;
461                 }
462         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
463         /* Find free structure in block */
464         for (i = 0; i < dqstrinblk &&
465              memcmp((char *)&emptydquot[version],
466                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
467              i++);
468
469         if (i == dqstrinblk) {
470                 CDEBUG(D_ERROR, 
471                        "VFS: find_free_dqentry(): Data block full but it "
472                        "shouldn't.\n");
473                 *err = -EIO;
474                 goto out_buf;
475         }
476
477         if ((*err = write_blk(filp, blk, buf)) < 0) {
478                 CDEBUG(D_ERROR,
479                        "VFS: find_free_dqentry(): Can't write quota data "
480                        "block %u.\n", blk);
481                 goto out_buf;
482         }
483         dquot->dq_off =
484             (blk << LUSTRE_DQBLKSIZE_BITS) +
485             sizeof(struct lustre_disk_dqdbheader) +
486             i * dqblk_sz;
487         freedqbuf(buf);
488         return blk;
489 out_buf:
490         freedqbuf(buf);
491         return 0;
492 }
493
494 /**
495  * Insert reference to structure into the trie
496  */
497 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
498                           lustre_quota_version_t version)
499 {
500         struct lustre_quota_info *lqi = dquot->dq_info;
501         struct file *filp = lqi->qi_files[dquot->dq_type];
502         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
503         dqbuf_t buf;
504         int ret = 0, newson = 0, newact = 0;
505         u32 *ref;
506         uint newblk;
507
508         if (!(buf = getdqbuf()))
509                 return -ENOMEM;
510         if (!*treeblk) {
511                 ret = get_free_dqblk(filp, info);
512                 if (ret < 0)
513                         goto out_buf;
514                 *treeblk = ret;
515                 memset(buf, 0, LUSTRE_DQBLKSIZE);
516                 newact = 1;
517         } else {
518                 if ((ret = read_blk(filp, NULL, 0, *treeblk, buf)) < 0) {
519                         CERROR("VFS: Can't read tree quota block %u.\n",
520                                *treeblk);
521                         goto out_buf;
522                 }
523         }
524         ref = (u32 *) buf;
525         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
526         if (!newblk)
527                 newson = 1;
528         if (depth == LUSTRE_DQTREEDEPTH - 1) {
529
530                 if (newblk) {
531                         CDEBUG(D_ERROR, 
532                                "VFS: Inserting already present quota entry "
533                                "(block %u).\n",
534                                ref[GETIDINDEX(dquot->dq_id, depth)]);
535                         ret = -EIO;
536                         goto out_buf;
537                 }
538
539                 newblk = find_free_dqentry(dquot, &ret, version);
540         } else
541                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
542         if (newson && ret >= 0) {
543                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
544                 ret = write_blk(filp, *treeblk, buf);
545         } else if (newact && ret < 0)
546                 put_free_dqblk(filp, info, buf, *treeblk);
547 out_buf:
548         freedqbuf(buf);
549         return ret;
550 }
551
552 /**
553  * Wrapper for inserting quota structure into tree
554  */
555 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
556                                  lustre_quota_version_t version)
557 {
558         int tmp = LUSTRE_DQTREEOFF;
559         return do_insert_tree(dquot, &tmp, 0, version);
560 }
561
562 /**
563  * We don't have to be afraid of deadlocks as we never have quotas on
564  * quota files...
565  */
566 static int lustre_write_dquot(struct lustre_dquot *dquot, 
567                               lustre_quota_version_t version)
568 {
569         int type = dquot->dq_type;
570         struct file *filp;
571         loff_t offset;
572         ssize_t ret;
573         int dqblk_sz = lustre_disk_dqblk_sz[version];
574         struct lustre_disk_dqblk_v2 ddquot;
575
576         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
577         if (ret < 0)
578                 return ret;
579
580         if (!dquot->dq_off)
581                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
582                         CDEBUG(D_ERROR,
583                                "VFS: Error %Zd occurred while creating "
584                                "quota.\n", ret);
585                         return ret;
586                 }
587         filp = dquot->dq_info->qi_files[type];
588         offset = dquot->dq_off;
589         /* Argh... We may need to write structure full of zeroes but that would
590          * be treated as an empty place by the rest of the code. Format change
591          * would be definitely cleaner but the problems probably are not worth
592          * it */
593         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
594                 ddquot.dqb_itime = cpu_to_le64(1);
595
596         ret = lustre_write_quota(filp, (char *)&ddquot, dqblk_sz, offset);
597         if (ret != dqblk_sz) {
598                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
599                        filp->f_dentry->d_sb->s_id);
600                 if (ret >= 0)
601                         ret = -ENOSPC;
602         } else
603                 ret = 0;
604
605         return ret;
606 }
607
608 /**
609  * Free dquot entry in data block
610  */
611 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
612                         lustre_quota_version_t version)
613 {
614         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
615         struct lustre_mem_dqinfo *info =
616             &dquot->dq_info->qi_info[dquot->dq_type];
617         struct lustre_disk_dqdbheader *dh;
618         dqbuf_t buf = getdqbuf();
619         int dqstrinblk = lustre_dqstrinblk[version];
620         int ret = 0;
621
622         if (!buf)
623                 return -ENOMEM;
624         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
625                 CDEBUG(D_ERROR,
626                        "VFS: Quota structure has offset to other block (%u) "
627                        "than it should (%u).\n",
628                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
629                 goto out_buf;
630         }
631         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
632                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
633                 goto out_buf;
634         }
635         dh = (struct lustre_disk_dqdbheader *)buf;
636         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
637         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
638                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
639                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
640                         CDEBUG(D_ERROR,
641                                "VFS: Can't move quota data block (%u) to free "
642                                "list.\n", blk);
643                         goto out_buf;
644                 }
645         } else {
646                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
647                        0, lustre_disk_dqblk_sz[version]);
648                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
649                         /* Insert will write block itself */
650                         if ((ret =
651                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
652                                 CDEBUG(D_ERROR,
653                                        "VFS: Can't insert quota data block "
654                                        "(%u) to free entry list.\n", blk);
655                                 goto out_buf;
656                         }
657                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
658                         CDEBUG(D_ERROR,
659                                "VFS: Can't write quota data block %u\n", blk);
660                         goto out_buf;
661                 }
662         }
663         dquot->dq_off = 0;      /* Quota is now unattached */
664 out_buf:
665         freedqbuf(buf);
666         return ret;
667 }
668
669 /**
670  * Remove reference to dquot from tree
671  */
672 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
673                        lustre_quota_version_t version)
674 {
675         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
676         struct lustre_mem_dqinfo *info =
677             &dquot->dq_info->qi_info[dquot->dq_type];
678         dqbuf_t buf = getdqbuf();
679         int ret = 0;
680         uint newblk;
681         u32 *ref = (u32 *) buf;
682
683         if (!buf)
684                 return -ENOMEM;
685         if ((ret = read_blk(filp, NULL, 0, *blk, buf)) < 0) {
686                 CERROR("VFS: Can't read quota data block %u\n", *blk);
687                 goto out_buf;
688         }
689         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
690         if (depth == LUSTRE_DQTREEDEPTH - 1) {
691                 ret = free_dqentry(dquot, newblk, version);
692                 newblk = 0;
693         } else
694                 ret = remove_tree(dquot, &newblk, depth + 1, version);
695         if (ret >= 0 && !newblk) {
696                 int i;
697                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
698                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
699                         /* Block got empty? */ ;
700                 /* don't put the root block into free blk list! */
701                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
702                         put_free_dqblk(filp, info, buf, *blk);
703                         *blk = 0;
704                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
705                         CDEBUG(D_ERROR,
706                                "VFS: Can't write quota tree block %u.\n", *blk);
707         }
708 out_buf:
709         freedqbuf(buf);
710         return ret;
711 }
712
713 /**
714  * Delete dquot from tree
715  */
716 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
717                                 lustre_quota_version_t version)
718 {
719         uint tmp = LUSTRE_DQTREEOFF;
720
721         if (!dquot->dq_off)     /* Even not allocated? */
722                 return 0;
723         return remove_tree(dquot, &tmp, 0, version);
724 }
725
726 /**
727  * Find entry in block
728  */
729 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
730                                  lustre_quota_version_t version)
731 {
732         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
733         dqbuf_t buf = getdqbuf();
734         loff_t ret = 0;
735         int i;
736         struct lustre_disk_dqblk_v2 *ddquot =
737                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
738         int dqblk_sz = lustre_disk_dqblk_sz[version];
739         int dqstrinblk = lustre_dqstrinblk[version];
740
741         LASSERT(version == LUSTRE_QUOTA_V2);
742
743         if (!buf)
744                 return -ENOMEM;
745         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
746                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
747                 goto out_buf;
748         }
749         if (dquot->dq_id)
750                 for (i = 0; i < dqstrinblk && 
751                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
752                      i++) ;
753         else {                  /* ID 0 as a bit more complicated searching... */
754                 for (i = 0; i < dqstrinblk; i++)
755                         if (!le32_to_cpu(ddquot[i].dqb_id)
756                             && memcmp((char *)&emptydquot[version],
757                                       (char *)&ddquot[i], dqblk_sz))
758                                 break;
759         }
760         if (i == dqstrinblk) {
761                 CDEBUG(D_ERROR,
762                        "VFS: Quota for id %u referenced but not present.\n",
763                        dquot->dq_id);
764                 ret = -EIO;
765                 goto out_buf;
766         } else
767                 ret =
768                     (blk << LUSTRE_DQBLKSIZE_BITS) +
769                     sizeof(struct lustre_disk_dqdbheader) +
770                     i * dqblk_sz;
771 out_buf:
772         freedqbuf(buf);
773         return ret;
774 }
775
776 /**
777  * Find entry for given id in the tree
778  */
779 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
780                                 lustre_quota_version_t version)
781 {
782         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
783         dqbuf_t buf = getdqbuf();
784         loff_t ret = 0;
785         u32 *ref = (u32 *) buf;
786
787         if (!buf)
788                 return -ENOMEM;
789         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
790                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
791                 goto out_buf;
792         }
793         ret = 0;
794         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
795         if (!blk)               /* No reference? */
796                 goto out_buf;
797         if (depth < LUSTRE_DQTREEDEPTH - 1)
798                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
799         else
800                 ret = find_block_dqentry(dquot, blk, version);
801 out_buf:
802         freedqbuf(buf);
803         return ret;
804 }
805
806 /**
807  * Find entry for given id in the tree - wrapper function
808  */
809 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
810                                   lustre_quota_version_t version)
811 {
812         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
813 }
814
815 int lustre_read_dquot(struct lustre_dquot *dquot)
816 {
817         int type = dquot->dq_type;
818         struct file *filp;
819         loff_t offset;
820         int ret = 0, dqblk_sz;
821         lustre_quota_version_t version;
822
823         /* Invalidated quota? */
824         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
825                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
826                 return -ESRCH;
827         }
828
829         version = dquot->dq_info->qi_version;
830         LASSERT(version == LUSTRE_QUOTA_V2);
831         dqblk_sz = lustre_disk_dqblk_sz[version];
832
833         offset = find_dqentry(dquot, version);
834         if (offset <= 0) {      /* Entry not present? */
835                 if (offset < 0)
836                         CDEBUG(D_ERROR,
837                                "VFS: Can't read quota structure for id %u.\n",
838                                dquot->dq_id);
839                 dquot->dq_off = 0;
840                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
841                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
842                 ret = offset;
843         } else {
844                 struct lustre_disk_dqblk_v2 ddquot;
845
846                 dquot->dq_off = offset;
847                 if ((ret = lustre_read_quota(filp, NULL, type, (char *)&ddquot,
848                                              dqblk_sz, offset)) != dqblk_sz) {
849                         if (ret >= 0)
850                                 ret = -EIO;
851                         CDEBUG(D_ERROR,
852                                "VFS: Error while reading quota structure for id "
853                                "%u.\n", dquot->dq_id);
854                         memset((char *)&ddquot, 0, dqblk_sz);
855                 } else {
856                         ret = 0;
857                         /* We need to escape back all-zero structure */
858                         if (!memcmp((char *)&fakedquot[version],
859                                     (char *)&ddquot, dqblk_sz))
860                                 ddquot.dqb_itime = cpu_to_le64(0);
861                 }
862                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
863         }
864
865         return ret;
866 }
867
868 /**
869  * Commit changes of dquot to disk - it might also mean deleting
870  * it when quota became fake.
871  */
872 int lustre_commit_dquot(struct lustre_dquot *dquot)
873 {
874         int rc = 0;
875         lustre_quota_version_t version = dquot->dq_info->qi_version;
876         void *handle;
877         struct inode *inode = dquot->dq_info->qi_files[dquot->dq_type]->f_dentry->d_inode;
878
879         /* always clear the flag so we don't loop on an IO error... */
880         clear_bit(DQ_MOD_B, &dquot->dq_flags);
881
882         /* The block/inode usage in admin quotafile isn't the real usage
883          * over all cluster, so keep the fake dquot entry on disk is
884          * meaningless, just remove it */
885         if (test_bit(DQ_FAKE_B, &dquot->dq_flags)) {
886                 handle = lustre_quota_journal_start(inode, 1);
887                 rc = lustre_delete_dquot(dquot, version);
888                 lustre_quota_journal_stop(handle);
889         } else {
890                 handle = lustre_quota_journal_start(inode, 0);
891                 rc = lustre_write_dquot(dquot, version);
892                 lustre_quota_journal_stop(handle);
893         }
894
895         if (rc < 0)
896                 return rc;
897
898         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
899                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
900
901         return rc;
902 }
903
904 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
905                              int fakemagics)
906 {
907         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
908         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
909         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
910         struct lustre_disk_dqheader dqhead;
911         ssize_t size;
912         struct file *fp = lqi->qi_files[type];
913         int rc = 0;
914
915         /* write quotafile header */
916         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
917                                        fake_magics[type] : quota_magics[type]);
918         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
919         size = lustre_write_quota(fp, (char *)&dqhead,
920                                   sizeof(struct lustre_disk_dqheader), 0);
921
922         if (size != sizeof(struct lustre_disk_dqheader)) {
923                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
924                 rc = size;
925         }
926
927         return rc;
928 }
929
930 /**
931  * We need to export this function to initialize quotafile, because we haven't
932  * user level check utility
933  */
934 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
935                                    int fakemagics)
936 {
937         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
938         int rc;
939
940         rc = lustre_init_quota_header(lqi, type, fakemagics);
941         if (rc)
942                 return rc;
943
944         /* write init quota info */
945         memset(dqinfo, 0, sizeof(*dqinfo));
946         dqinfo->dqi_bgrace = MAX_DQ_TIME;
947         dqinfo->dqi_igrace = MAX_IQ_TIME;
948         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
949
950         return lustre_write_quota_info(lqi, type);
951 }
952
953 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
954 {
955         return lustre_init_quota_info_generic(lqi, type, 0);
956 }
957
958 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
959                               uint blk, struct list_head *list)
960 {
961         dqbuf_t buf = getdqbuf();
962         loff_t ret = 0;
963         struct lustre_disk_dqdbheader *dqhead =
964             (struct lustre_disk_dqdbheader *)buf;
965         struct dqblk *blk_item;
966         struct dqblk *pos;
967         struct list_head *tmp;
968
969         if (!buf)
970                 return -ENOMEM;
971         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
972                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
973                 goto out_buf;
974         }
975         ret = 0;
976
977         if (!le32_to_cpu(dqhead->dqdh_entries))
978                 goto out_buf;
979
980         if (list_empty(list)) {
981                 tmp = list;
982                 goto done;
983         }
984
985         list_for_each_entry(pos, list, link) {
986                 if (blk == pos->blk)    /* we got this blk already */
987                         goto out_buf;
988                 if (blk > pos->blk)
989                         continue;
990                 break;
991         }
992         tmp = &pos->link;
993 done:
994         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
995         if (!blk_item) {
996                 ret = -ENOMEM;
997                 goto out_buf;
998         }
999         blk_item->blk = blk;
1000         INIT_LIST_HEAD(&blk_item->link);
1001
1002         list_add_tail(&blk_item->link, tmp);
1003
1004 out_buf:
1005         freedqbuf(buf);
1006         return ret;
1007 }
1008
1009 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1010                       uint blk, int depth, struct list_head *list)
1011 {
1012         dqbuf_t buf = getdqbuf();
1013         loff_t ret = 0;
1014         int index;
1015         u32 *ref = (u32 *) buf;
1016
1017         if (!buf)
1018                 return -ENOMEM;
1019         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
1020                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1021                 goto out_buf;
1022         }
1023         ret = 0;
1024
1025         for (index = 0; index <= 0xff && !ret; index++) {
1026                 blk = le32_to_cpu(ref[index]);
1027                 if (!blk)       /* No reference */
1028                         continue;
1029
1030                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1031                         ret = walk_tree_dqentry(filp, inode, type, blk,
1032                                                 depth + 1, list);
1033                 else
1034                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1035         }
1036 out_buf:
1037         freedqbuf(buf);
1038         return ret;
1039 }
1040
1041 /**
1042  * Walk through the quota file (v2 format) to get all ids with quota limit
1043  */
1044 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1045                     struct list_head *list)
1046 {
1047         struct list_head blk_list;
1048         struct dqblk *blk_item, *tmp;
1049         dqbuf_t buf = NULL;
1050         struct lustre_disk_dqblk_v2 *ddquot;
1051         int rc;
1052         lustre_quota_version_t version;
1053
1054         ENTRY;
1055
1056         LASSERT(ergo(fp == NULL, inode != NULL));
1057
1058         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1059                 version = LUSTRE_QUOTA_V2;
1060         else {
1061                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1062                 RETURN(-EINVAL);
1063         }
1064
1065         if (!list_empty(list)) {
1066                 CDEBUG(D_ERROR, "not empty list\n");
1067                 RETURN(-EINVAL);
1068         }
1069
1070         INIT_LIST_HEAD(&blk_list);
1071         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1072         if (rc) {
1073                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1074                 GOTO(out_free, rc);
1075         }
1076         if (list_empty(&blk_list))
1077                 RETURN(0);
1078
1079         buf = getdqbuf();
1080         if (!buf)
1081                 RETURN(-ENOMEM);
1082         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1083
1084         list_for_each_entry(blk_item, &blk_list, link) {
1085                 loff_t ret = 0;
1086                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1087
1088                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1089                 if ((ret = read_blk(fp, inode, type, blk_item->blk, buf)) < 0) {
1090                         CERROR("VFS: Can't read quota tree block %u.\n",
1091                                blk_item->blk);
1092                         GOTO(out_free, rc = ret);
1093                 }
1094
1095                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1096                         struct dquot_id *dqid;
1097                         /* skip empty entry */
1098                         if (!memcmp((char *)&emptydquot[version],
1099                                     (char *)&ddquot[i], dqblk_sz))
1100                                 continue;
1101
1102                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1103                         if (!dqid)
1104                                 GOTO(out_free, rc = -ENOMEM);
1105
1106                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1107                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1108                                          QI_SET : 0;
1109                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1110                                          QB_SET : 0;
1111
1112                         INIT_LIST_HEAD(&dqid->di_link);
1113                         list_add(&dqid->di_link, list);
1114                 }
1115         }
1116
1117 out_free:
1118         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1119                 list_del_init(&blk_item->link);
1120                 kfree(blk_item);
1121         }
1122         if (buf)
1123                 freedqbuf(buf);
1124
1125         RETURN(rc);
1126 }
1127
1128
1129 EXPORT_SYMBOL(lustre_read_quota_info);
1130 EXPORT_SYMBOL(lustre_write_quota_info);
1131 EXPORT_SYMBOL(lustre_check_quota_file);
1132 EXPORT_SYMBOL(lustre_read_dquot);
1133 EXPORT_SYMBOL(lustre_commit_dquot);
1134 EXPORT_SYMBOL(lustre_init_quota_info);
1135 EXPORT_SYMBOL(lustre_get_qids);
1136 #endif