Whamcloud - gitweb
b=22634 hold "mds_qonoff_sem" when call "lustre_read_quota()", and check parameter...
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #ifdef HAVE_QUOTAIO_V1_H
54 # include <linux/quotaio_v1.h>
55 #endif
56
57 #include <asm/byteorder.h>
58 #include <asm/uaccess.h>
59
60 #include <lustre_quota.h>
61 #include <obd_support.h>
62 #include "lustre_quota_fmt.h"
63
64 #ifdef HAVE_QUOTA_SUPPORT
65
66 static const uint lustre_initqversions[][MAXQUOTAS] = {
67         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
68 };
69
70 static const int lustre_dqstrinblk[] = {
71         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
72 };
73
74 static const int lustre_disk_dqblk_sz[] = {
75         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
76 };
77
78 static const union
79 {
80         struct lustre_disk_dqblk_v2 r1;
81 } fakedquot[] = {
82         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
83 };
84
85 static const union
86 {
87         struct lustre_disk_dqblk_v2 r1;
88 } emptydquot[] = {
89         [LUSTRE_QUOTA_V2] = {.r1 = { 0 }}
90 };
91
92 extern void *lustre_quota_journal_start(struct inode *inode, int delete);
93 extern void lustre_quota_journal_stop(void *handle);
94 extern ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
95                                  char *buf, int count, loff_t pos);
96 extern ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos);
97
98 int check_quota_file(struct file *f, struct inode *inode, int type,
99                      lustre_quota_version_t version)
100 {
101         struct lustre_disk_dqheader dqhead;
102         ssize_t size;
103         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
104         const uint *quota_versions = lustre_initqversions[version];
105
106         size = lustre_read_quota(f, inode, type, (char *)&dqhead,
107                                  sizeof(struct lustre_disk_dqheader), 0);
108         if (size != sizeof(struct lustre_disk_dqheader))
109                 return -EINVAL;
110         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
111             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
112                 return -EINVAL;
113         return 0;
114 }
115
116 /**
117  * Check whether given file is really lustre admin quotafile
118  */
119 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
120 {
121         struct file *f = lqi->qi_files[type];
122         return check_quota_file(f, NULL, type, lqi->qi_version);
123 }
124
125 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
126 {
127         struct lustre_disk_dqinfo dinfo;
128         ssize_t size;
129
130         size = lustre_read_quota(f, NULL, 0, (char *)&dinfo,
131                                  sizeof(struct lustre_disk_dqinfo),
132                                  LUSTRE_DQINFOOFF);
133
134         if (size != sizeof(struct lustre_disk_dqinfo)) {
135                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
136                        f->f_vfsmnt->mnt_sb->s_id);
137                 return -EINVAL;
138         }
139         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
140         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
141         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
142         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
143         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
144         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
145         return 0;
146 }
147
148 /**
149  * Read information header from quota file
150  */
151 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
152 {
153         return lustre_read_quota_file_info(lqi->qi_files[type],
154                                            &lqi->qi_info[type]);
155 }
156
157 /**
158  * Write information header to quota file
159  */
160 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
161 {
162         struct lustre_disk_dqinfo dinfo;
163         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
164         struct file *f = lqi->qi_files[type];
165         ssize_t size;
166
167         info->dqi_flags &= ~DQF_INFO_DIRTY;
168         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
169         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
170         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
171         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
172         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
173         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
174
175         size = lustre_write_quota(f, (char *)&dinfo,
176                                   sizeof(struct lustre_disk_dqinfo),
177                                   LUSTRE_DQINFOOFF);
178
179         if (size != sizeof(struct lustre_disk_dqinfo)) {
180                 CDEBUG(D_WARNING, 
181                        "Can't write info structure on device %s.\n",
182                        f->f_vfsmnt->mnt_sb->s_id);
183                 return -1;
184         }
185         return 0;
186 }
187
188 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
189                  lustre_quota_version_t version)
190 {
191         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
192
193         LASSERT(version == LUSTRE_QUOTA_V2);
194
195         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
196         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
197         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
198         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
199         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
200         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
201         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
202         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
203 }
204
205 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
206                        qid_t id, lustre_quota_version_t version)
207 {
208         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
209
210         LASSERT(version == LUSTRE_QUOTA_V2);
211
212         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
213         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
214         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
215         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
216         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
217         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
218         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
219         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
220         dqblk->dqb_id = cpu_to_le32(id);
221
222         return 0;
223 }
224
225 dqbuf_t getdqbuf(void)
226 {
227         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
228         if (!buf)
229                 CDEBUG(D_WARNING, 
230                        "VFS: Not enough memory for quota buffers.\n");
231         return buf;
232 }
233
234 void freedqbuf(dqbuf_t buf)
235 {
236         kfree(buf);
237 }
238
239 ssize_t read_blk(struct file *filp, struct inode *inode, int type,
240                  uint blk, dqbuf_t buf)
241 {
242         ssize_t ret;
243
244         memset(buf, 0, LUSTRE_DQBLKSIZE);
245         ret = lustre_read_quota(filp, inode, type, (char *)buf, LUSTRE_DQBLKSIZE,
246                                 blk << LUSTRE_DQBLKSIZE_BITS);
247
248         /* Reading past EOF just returns a block of zeros */
249         if (ret == -EBADR)
250                 ret = 0;
251
252         return ret;
253 }
254
255 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
256 {
257         ssize_t ret;
258
259         ret = lustre_write_quota(filp, (char *)buf, LUSTRE_DQBLKSIZE,
260                                  blk << LUSTRE_DQBLKSIZE_BITS);
261
262         return ret;
263 }
264
265 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
266 {
267         cfs_set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
268 }
269
270 /**
271  * Remove empty block from list and return it
272  */
273 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
274 {
275         dqbuf_t buf = getdqbuf();
276         struct lustre_disk_dqdbheader *dh =
277             (struct lustre_disk_dqdbheader *)buf;
278         int ret, blk;
279
280         if (!buf)
281                 return -ENOMEM;
282         if (info->dqi_free_blk) {
283                 blk = info->dqi_free_blk;
284                 if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0)
285                         goto out_buf;
286                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
287         } else {
288                 memset(buf, 0, LUSTRE_DQBLKSIZE);
289                 /* Assure block allocation... */
290                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
291                         goto out_buf;
292                 blk = info->dqi_blocks++;
293         }
294         lustre_mark_info_dirty(info);
295         ret = blk;
296 out_buf:
297         freedqbuf(buf);
298         return ret;
299 }
300
301 /**
302  * Insert empty block to the list
303  */
304 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
305                    dqbuf_t buf, uint blk)
306 {
307         struct lustre_disk_dqdbheader *dh =
308             (struct lustre_disk_dqdbheader *)buf;
309         int err;
310
311         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
312         dh->dqdh_prev_free = cpu_to_le32(0);
313         dh->dqdh_entries = cpu_to_le16(0);
314         info->dqi_free_blk = blk;
315         lustre_mark_info_dirty(info);
316         if ((err = write_blk(filp, blk, buf)) < 0)
317                 /* Some strange block. We had better leave it... */
318                 return err;
319         return 0;
320 }
321
322 /**
323  * Remove given block from the list of blocks with free entries
324  */
325 int remove_free_dqentry(struct file *filp,
326                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
327                         uint blk)
328 {
329         dqbuf_t tmpbuf = getdqbuf();
330         struct lustre_disk_dqdbheader *dh =
331             (struct lustre_disk_dqdbheader *)buf;
332         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
333             le32_to_cpu(dh->dqdh_prev_free);
334         int err;
335
336         if (!tmpbuf)
337                 return -ENOMEM;
338         if (nextblk) {
339                 if ((err = read_blk(filp, NULL, 0, nextblk, tmpbuf)) < 0)
340                         goto out_buf;
341                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
342                     dh->dqdh_prev_free;
343                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
344                         goto out_buf;
345         }
346         if (prevblk) {
347                 if ((err = read_blk(filp, NULL, 0, prevblk, tmpbuf)) < 0)
348                         goto out_buf;
349                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
350                     dh->dqdh_next_free;
351                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
352                         goto out_buf;
353         } else {
354                 info->dqi_free_entry = nextblk;
355                 lustre_mark_info_dirty(info);
356         }
357         freedqbuf(tmpbuf);
358         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
359         if (write_blk(filp, blk, buf) < 0)
360                 /* No matter whether write succeeds block is out of list */
361                 CDEBUG(D_ERROR, 
362                        "VFS: Can't write block (%u) with free entries.\n", blk);
363         return 0;
364 out_buf:
365         freedqbuf(tmpbuf);
366         return err;
367 }
368
369 /**
370  * Insert given block to the beginning of list with free entries
371  */
372 int insert_free_dqentry(struct file *filp,
373                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
374                         uint blk)
375 {
376         dqbuf_t tmpbuf = getdqbuf();
377         struct lustre_disk_dqdbheader *dh =
378             (struct lustre_disk_dqdbheader *)buf;
379         int err;
380
381         if (!tmpbuf)
382                 return -ENOMEM;
383         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
384         dh->dqdh_prev_free = cpu_to_le32(0);
385         if ((err = write_blk(filp, blk, buf)) < 0)
386                 goto out_buf;
387         if (info->dqi_free_entry) {
388                 if ((err = read_blk(filp, NULL, 0, info->dqi_free_entry, tmpbuf)) < 0)
389                         goto out_buf;
390                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
391                     cpu_to_le32(blk);
392                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
393                         goto out_buf;
394         }
395         freedqbuf(tmpbuf);
396         info->dqi_free_entry = blk;
397         lustre_mark_info_dirty(info);
398         return 0;
399 out_buf:
400         freedqbuf(tmpbuf);
401         return err;
402 }
403
404
405
406 /**
407  * Find space for dquot
408  */
409 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
410                               lustre_quota_version_t version)
411 {
412         struct lustre_quota_info *lqi = dquot->dq_info;
413         struct file *filp = lqi->qi_files[dquot->dq_type];
414         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
415         uint blk, i;
416         struct lustre_disk_dqdbheader *dh;
417         void *ddquot;
418         int dqblk_sz = lustre_disk_dqblk_sz[version];
419         int dqstrinblk = lustre_dqstrinblk[version];
420         dqbuf_t buf;
421
422         *err = 0;
423         if (!(buf = getdqbuf())) {
424                 *err = -ENOMEM;
425                 return 0;
426         }
427         dh = (struct lustre_disk_dqdbheader *)buf;
428         ddquot = GETENTRIES(buf, version);
429         if (info->dqi_free_entry) {
430                 blk = info->dqi_free_entry;
431                 if ((*err = read_blk(filp, NULL, 0, blk, buf)) < 0)
432                         goto out_buf;
433         } else {
434                 blk = get_free_dqblk(filp, info);
435                 if ((int)blk < 0) {
436                         *err = blk;
437                         freedqbuf(buf);
438                         return 0;
439                 }
440                 memset(buf, 0, LUSTRE_DQBLKSIZE);
441                 info->dqi_free_entry = blk; /* This is enough as block is 
442                                                already zeroed and entry list
443                                                is empty... */
444                 lustre_mark_info_dirty(info);
445         }
446
447         /* Will block be full */
448         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
449                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
450                         CDEBUG(D_ERROR, 
451                                "VFS: find_free_dqentry(): Can't remove block "
452                                "(%u) from entry free list.\n", blk);
453                         goto out_buf;
454                 }
455         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
456         /* Find free structure in block */
457         for (i = 0; i < dqstrinblk &&
458              memcmp((char *)&emptydquot[version],
459                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
460              i++);
461
462         if (i == dqstrinblk) {
463                 CDEBUG(D_ERROR, 
464                        "VFS: find_free_dqentry(): Data block full but it "
465                        "shouldn't.\n");
466                 *err = -EIO;
467                 goto out_buf;
468         }
469
470         if ((*err = write_blk(filp, blk, buf)) < 0) {
471                 CDEBUG(D_ERROR,
472                        "VFS: find_free_dqentry(): Can't write quota data "
473                        "block %u.\n", blk);
474                 goto out_buf;
475         }
476         dquot->dq_off =
477             (blk << LUSTRE_DQBLKSIZE_BITS) +
478             sizeof(struct lustre_disk_dqdbheader) +
479             i * dqblk_sz;
480         freedqbuf(buf);
481         return blk;
482 out_buf:
483         freedqbuf(buf);
484         return 0;
485 }
486
487 /**
488  * Insert reference to structure into the trie
489  */
490 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
491                           lustre_quota_version_t version)
492 {
493         struct lustre_quota_info *lqi = dquot->dq_info;
494         struct file *filp = lqi->qi_files[dquot->dq_type];
495         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
496         dqbuf_t buf;
497         int ret = 0, newson = 0, newact = 0;
498         u32 *ref;
499         uint newblk;
500
501         if (!(buf = getdqbuf()))
502                 return -ENOMEM;
503         if (!*treeblk) {
504                 ret = get_free_dqblk(filp, info);
505                 if (ret < 0)
506                         goto out_buf;
507                 *treeblk = ret;
508                 memset(buf, 0, LUSTRE_DQBLKSIZE);
509                 newact = 1;
510         } else {
511                 if ((ret = read_blk(filp, NULL, 0, *treeblk, buf)) < 0) {
512                         CERROR("VFS: Can't read tree quota block %u.\n",
513                                *treeblk);
514                         goto out_buf;
515                 }
516         }
517         ref = (u32 *) buf;
518         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
519         if (!newblk)
520                 newson = 1;
521         if (depth == LUSTRE_DQTREEDEPTH - 1) {
522
523                 if (newblk) {
524                         CDEBUG(D_ERROR, 
525                                "VFS: Inserting already present quota entry "
526                                "(block %u).\n",
527                                ref[GETIDINDEX(dquot->dq_id, depth)]);
528                         ret = -EIO;
529                         goto out_buf;
530                 }
531
532                 newblk = find_free_dqentry(dquot, &ret, version);
533         } else
534                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
535         if (newson && ret >= 0) {
536                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
537                 ret = write_blk(filp, *treeblk, buf);
538         } else if (newact && ret < 0)
539                 put_free_dqblk(filp, info, buf, *treeblk);
540 out_buf:
541         freedqbuf(buf);
542         return ret;
543 }
544
545 /**
546  * Wrapper for inserting quota structure into tree
547  */
548 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
549                                  lustre_quota_version_t version)
550 {
551         int tmp = LUSTRE_DQTREEOFF;
552         return do_insert_tree(dquot, &tmp, 0, version);
553 }
554
555 /**
556  * We don't have to be afraid of deadlocks as we never have quotas on
557  * quota files...
558  */
559 static int lustre_write_dquot(struct lustre_dquot *dquot, 
560                               lustre_quota_version_t version)
561 {
562         int type = dquot->dq_type;
563         struct file *filp;
564         loff_t offset;
565         ssize_t ret;
566         int dqblk_sz = lustre_disk_dqblk_sz[version];
567         struct lustre_disk_dqblk_v2 ddquot;
568
569         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
570         if (ret < 0)
571                 return ret;
572
573         if (!dquot->dq_off)
574                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
575                         CDEBUG(D_ERROR,
576                                "VFS: Error %Zd occurred while creating "
577                                "quota.\n", ret);
578                         return ret;
579                 }
580         filp = dquot->dq_info->qi_files[type];
581         offset = dquot->dq_off;
582         /* Argh... We may need to write structure full of zeroes but that would
583          * be treated as an empty place by the rest of the code. Format change
584          * would be definitely cleaner but the problems probably are not worth
585          * it */
586         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
587                 ddquot.dqb_itime = cpu_to_le64(1);
588
589         ret = lustre_write_quota(filp, (char *)&ddquot, dqblk_sz, offset);
590         if (ret != dqblk_sz) {
591                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
592                        filp->f_dentry->d_sb->s_id);
593                 if (ret >= 0)
594                         ret = -ENOSPC;
595         } else
596                 ret = 0;
597
598         return ret;
599 }
600
601 /**
602  * Free dquot entry in data block
603  */
604 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
605                         lustre_quota_version_t version)
606 {
607         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
608         struct lustre_mem_dqinfo *info =
609             &dquot->dq_info->qi_info[dquot->dq_type];
610         struct lustre_disk_dqdbheader *dh;
611         dqbuf_t buf = getdqbuf();
612         int dqstrinblk = lustre_dqstrinblk[version];
613         int ret = 0;
614
615         if (!buf)
616                 return -ENOMEM;
617         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
618                 CDEBUG(D_ERROR,
619                        "VFS: Quota structure has offset to other block (%u) "
620                        "than it should (%u).\n",
621                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
622                 goto out_buf;
623         }
624         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
625                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
626                 goto out_buf;
627         }
628         dh = (struct lustre_disk_dqdbheader *)buf;
629         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
630         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
631                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
632                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
633                         CDEBUG(D_ERROR,
634                                "VFS: Can't move quota data block (%u) to free "
635                                "list.\n", blk);
636                         goto out_buf;
637                 }
638         } else {
639                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
640                        0, lustre_disk_dqblk_sz[version]);
641                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
642                         /* Insert will write block itself */
643                         if ((ret =
644                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
645                                 CDEBUG(D_ERROR,
646                                        "VFS: Can't insert quota data block "
647                                        "(%u) to free entry list.\n", blk);
648                                 goto out_buf;
649                         }
650                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
651                         CDEBUG(D_ERROR,
652                                "VFS: Can't write quota data block %u\n", blk);
653                         goto out_buf;
654                 }
655         }
656         dquot->dq_off = 0;      /* Quota is now unattached */
657 out_buf:
658         freedqbuf(buf);
659         return ret;
660 }
661
662 /**
663  * Remove reference to dquot from tree
664  */
665 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
666                        lustre_quota_version_t version)
667 {
668         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
669         struct lustre_mem_dqinfo *info =
670             &dquot->dq_info->qi_info[dquot->dq_type];
671         dqbuf_t buf = getdqbuf();
672         int ret = 0;
673         uint newblk;
674         u32 *ref = (u32 *) buf;
675
676         if (!buf)
677                 return -ENOMEM;
678         if ((ret = read_blk(filp, NULL, 0, *blk, buf)) < 0) {
679                 CERROR("VFS: Can't read quota data block %u\n", *blk);
680                 goto out_buf;
681         }
682         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
683         if (depth == LUSTRE_DQTREEDEPTH - 1) {
684                 ret = free_dqentry(dquot, newblk, version);
685                 newblk = 0;
686         } else
687                 ret = remove_tree(dquot, &newblk, depth + 1, version);
688         if (ret >= 0 && !newblk) {
689                 int i;
690                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
691                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
692                         /* Block got empty? */ ;
693                 /* don't put the root block into free blk list! */
694                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
695                         put_free_dqblk(filp, info, buf, *blk);
696                         *blk = 0;
697                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
698                         CDEBUG(D_ERROR,
699                                "VFS: Can't write quota tree block %u.\n", *blk);
700         }
701 out_buf:
702         freedqbuf(buf);
703         return ret;
704 }
705
706 /**
707  * Delete dquot from tree
708  */
709 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
710                                 lustre_quota_version_t version)
711 {
712         uint tmp = LUSTRE_DQTREEOFF;
713
714         if (!dquot->dq_off)     /* Even not allocated? */
715                 return 0;
716         return remove_tree(dquot, &tmp, 0, version);
717 }
718
719 /**
720  * Find entry in block
721  */
722 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
723                                  lustre_quota_version_t version)
724 {
725         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
726         dqbuf_t buf = getdqbuf();
727         loff_t ret = 0;
728         int i;
729         struct lustre_disk_dqblk_v2 *ddquot =
730                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
731         int dqblk_sz = lustre_disk_dqblk_sz[version];
732         int dqstrinblk = lustre_dqstrinblk[version];
733
734         LASSERT(version == LUSTRE_QUOTA_V2);
735
736         if (!buf)
737                 return -ENOMEM;
738         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
739                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
740                 goto out_buf;
741         }
742         if (dquot->dq_id)
743                 for (i = 0; i < dqstrinblk && 
744                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
745                      i++) ;
746         else {                  /* ID 0 as a bit more complicated searching... */
747                 for (i = 0; i < dqstrinblk; i++)
748                         if (!le32_to_cpu(ddquot[i].dqb_id)
749                             && memcmp((char *)&emptydquot[version],
750                                       (char *)&ddquot[i], dqblk_sz))
751                                 break;
752         }
753         if (i == dqstrinblk) {
754                 CDEBUG(D_ERROR,
755                        "VFS: Quota for id %u referenced but not present.\n",
756                        dquot->dq_id);
757                 ret = -EIO;
758                 goto out_buf;
759         } else
760                 ret =
761                     (blk << LUSTRE_DQBLKSIZE_BITS) +
762                     sizeof(struct lustre_disk_dqdbheader) +
763                     i * dqblk_sz;
764 out_buf:
765         freedqbuf(buf);
766         return ret;
767 }
768
769 /**
770  * Find entry for given id in the tree
771  */
772 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
773                                 lustre_quota_version_t version)
774 {
775         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
776         dqbuf_t buf = getdqbuf();
777         loff_t ret = 0;
778         u32 *ref = (u32 *) buf;
779
780         if (!buf)
781                 return -ENOMEM;
782         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
783                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
784                 goto out_buf;
785         }
786         ret = 0;
787         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
788         if (!blk)               /* No reference? */
789                 goto out_buf;
790         if (depth < LUSTRE_DQTREEDEPTH - 1)
791                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
792         else
793                 ret = find_block_dqentry(dquot, blk, version);
794 out_buf:
795         freedqbuf(buf);
796         return ret;
797 }
798
799 /**
800  * Find entry for given id in the tree - wrapper function
801  */
802 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
803                                   lustre_quota_version_t version)
804 {
805         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
806 }
807
808 int lustre_read_dquot(struct lustre_dquot *dquot)
809 {
810         int type = dquot->dq_type;
811         struct file *filp;
812         loff_t offset;
813         int ret = 0, dqblk_sz;
814         lustre_quota_version_t version;
815
816         /* Invalidated quota? */
817         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
818                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
819                 return -ESRCH;
820         }
821
822         version = dquot->dq_info->qi_version;
823         LASSERT(version == LUSTRE_QUOTA_V2);
824         dqblk_sz = lustre_disk_dqblk_sz[version];
825
826         offset = find_dqentry(dquot, version);
827         if (offset <= 0) {      /* Entry not present? */
828                 if (offset < 0)
829                         CDEBUG(D_ERROR,
830                                "VFS: Can't read quota structure for id %u.\n",
831                                dquot->dq_id);
832                 dquot->dq_off = 0;
833                 cfs_set_bit(DQ_FAKE_B, &dquot->dq_flags);
834                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
835                 ret = offset;
836         } else {
837                 struct lustre_disk_dqblk_v2 ddquot;
838
839                 dquot->dq_off = offset;
840                 if ((ret = lustre_read_quota(filp, NULL, type, (char *)&ddquot,
841                                              dqblk_sz, offset)) != dqblk_sz) {
842                         if (ret >= 0)
843                                 ret = -EIO;
844                         CDEBUG(D_ERROR,
845                                "VFS: Error while reading quota structure for id "
846                                "%u.\n", dquot->dq_id);
847                         memset((char *)&ddquot, 0, dqblk_sz);
848                 } else {
849                         ret = 0;
850                         /* We need to escape back all-zero structure */
851                         if (!memcmp((char *)&fakedquot[version],
852                                     (char *)&ddquot, dqblk_sz))
853                                 ddquot.dqb_itime = cpu_to_le64(0);
854                 }
855                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
856         }
857
858         return ret;
859 }
860
861 /**
862  * Commit changes of dquot to disk - it might also mean deleting
863  * it when quota became fake.
864  */
865 int lustre_commit_dquot(struct lustre_dquot *dquot)
866 {
867         int rc = 0;
868         lustre_quota_version_t version = dquot->dq_info->qi_version;
869         void *handle;
870         struct inode *inode = dquot->dq_info->qi_files[dquot->dq_type]->f_dentry->d_inode;
871         int delete = 0;
872
873         /* always clear the flag so we don't loop on an IO error... */
874         cfs_clear_bit(DQ_MOD_B, &dquot->dq_flags);
875
876         /* The block/inode usage in admin quotafile isn't the real usage
877          * over all cluster, so keep the fake dquot entry on disk is
878          * meaningless, just remove it */
879         if (cfs_test_bit(DQ_FAKE_B, &dquot->dq_flags))
880                 delete = 1;
881         handle = lustre_quota_journal_start(inode, delete);
882         if (unlikely(IS_ERR(handle))) {
883                 rc = PTR_ERR(handle);
884                 CERROR("fail to lustre_quota_journal_start: rc = %d\n", rc);
885                 return rc;
886         }
887
888         if (delete)
889                 rc = lustre_delete_dquot(dquot, version);
890         else
891                 rc = lustre_write_dquot(dquot, version);
892         lustre_quota_journal_stop(handle);
893
894         if (rc < 0)
895                 return rc;
896
897         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
898                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
899
900         return rc;
901 }
902
903 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
904                              int fakemagics)
905 {
906         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
907         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
908         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
909         struct lustre_disk_dqheader dqhead;
910         ssize_t size;
911         struct file *fp = lqi->qi_files[type];
912         int rc = 0;
913
914         /* write quotafile header */
915         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
916                                        fake_magics[type] : quota_magics[type]);
917         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
918         size = lustre_write_quota(fp, (char *)&dqhead,
919                                   sizeof(struct lustre_disk_dqheader), 0);
920
921         if (size != sizeof(struct lustre_disk_dqheader)) {
922                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
923                 rc = size;
924         }
925
926         return rc;
927 }
928
929 /**
930  * We need to export this function to initialize quotafile, because we haven't
931  * user level check utility
932  */
933 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
934                                    int fakemagics)
935 {
936         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
937         int rc;
938
939         rc = lustre_init_quota_header(lqi, type, fakemagics);
940         if (rc)
941                 return rc;
942
943         /* write init quota info */
944         memset(dqinfo, 0, sizeof(*dqinfo));
945         dqinfo->dqi_bgrace = MAX_DQ_TIME;
946         dqinfo->dqi_igrace = MAX_IQ_TIME;
947         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
948
949         return lustre_write_quota_info(lqi, type);
950 }
951
952 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
953 {
954         return lustre_init_quota_info_generic(lqi, type, 0);
955 }
956
957 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
958                               uint blk, cfs_list_t *list)
959 {
960         dqbuf_t buf = getdqbuf();
961         loff_t ret = 0;
962         struct lustre_disk_dqdbheader *dqhead =
963             (struct lustre_disk_dqdbheader *)buf;
964         struct dqblk *blk_item;
965         struct dqblk *pos;
966         cfs_list_t *tmp;
967
968         if (!buf)
969                 return -ENOMEM;
970         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
971                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
972                 goto out_buf;
973         }
974         ret = 0;
975
976         if (!le32_to_cpu(dqhead->dqdh_entries))
977                 goto out_buf;
978
979         if (cfs_list_empty(list)) {
980                 tmp = list;
981                 goto done;
982         }
983
984         cfs_list_for_each_entry(pos, list, link) {
985                 if (blk == pos->blk)    /* we got this blk already */
986                         goto out_buf;
987                 if (blk > pos->blk)
988                         continue;
989                 break;
990         }
991         tmp = &pos->link;
992 done:
993         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
994         if (!blk_item) {
995                 ret = -ENOMEM;
996                 goto out_buf;
997         }
998         blk_item->blk = blk;
999         CFS_INIT_LIST_HEAD(&blk_item->link);
1000
1001         cfs_list_add_tail(&blk_item->link, tmp);
1002
1003 out_buf:
1004         freedqbuf(buf);
1005         return ret;
1006 }
1007
1008 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1009                       uint blk, int depth, cfs_list_t *list)
1010 {
1011         dqbuf_t buf = getdqbuf();
1012         loff_t ret = 0;
1013         int index;
1014         u32 *ref = (u32 *) buf;
1015
1016         if (!buf)
1017                 return -ENOMEM;
1018         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
1019                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1020                 goto out_buf;
1021         }
1022         ret = 0;
1023
1024         for (index = 0; index <= 0xff && !ret; index++) {
1025                 blk = le32_to_cpu(ref[index]);
1026                 if (!blk)       /* No reference */
1027                         continue;
1028
1029                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1030                         ret = walk_tree_dqentry(filp, inode, type, blk,
1031                                                 depth + 1, list);
1032                 else
1033                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1034         }
1035 out_buf:
1036         freedqbuf(buf);
1037         return ret;
1038 }
1039
1040 /**
1041  * Walk through the quota file (v2 format) to get all ids with quota limit
1042  */
1043 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1044                     cfs_list_t *list)
1045 {
1046         cfs_list_t blk_list;
1047         struct dqblk *blk_item, *tmp;
1048         dqbuf_t buf = NULL;
1049         struct lustre_disk_dqblk_v2 *ddquot;
1050         int rc;
1051         lustre_quota_version_t version;
1052
1053         ENTRY;
1054
1055         LASSERT(ergo(fp == NULL, inode != NULL));
1056
1057         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1058                 version = LUSTRE_QUOTA_V2;
1059         else {
1060                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1061                 RETURN(-EINVAL);
1062         }
1063
1064         if (!cfs_list_empty(list)) {
1065                 CDEBUG(D_ERROR, "not empty list\n");
1066                 RETURN(-EINVAL);
1067         }
1068
1069         CFS_INIT_LIST_HEAD(&blk_list);
1070         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1071         if (rc) {
1072                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1073                 GOTO(out_free, rc);
1074         }
1075         if (cfs_list_empty(&blk_list))
1076                 RETURN(0);
1077
1078         buf = getdqbuf();
1079         if (!buf)
1080                 RETURN(-ENOMEM);
1081         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1082
1083         cfs_list_for_each_entry(blk_item, &blk_list, link) {
1084                 loff_t ret = 0;
1085                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1086
1087                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1088                 if ((ret = read_blk(fp, inode, type, blk_item->blk, buf)) < 0) {
1089                         CERROR("VFS: Can't read quota tree block %u.\n",
1090                                blk_item->blk);
1091                         GOTO(out_free, rc = ret);
1092                 }
1093
1094                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1095                         struct dquot_id *dqid;
1096                         /* skip empty entry */
1097                         if (!memcmp((char *)&emptydquot[version],
1098                                     (char *)&ddquot[i], dqblk_sz))
1099                                 continue;
1100
1101                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1102                         if (!dqid)
1103                                 GOTO(out_free, rc = -ENOMEM);
1104
1105                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1106                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1107                                          QI_SET : 0;
1108                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1109                                          QB_SET : 0;
1110
1111                         CFS_INIT_LIST_HEAD(&dqid->di_link);
1112                         cfs_list_add(&dqid->di_link, list);
1113                 }
1114         }
1115
1116 out_free:
1117         cfs_list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1118                 cfs_list_del_init(&blk_item->link);
1119                 kfree(blk_item);
1120         }
1121         if (buf)
1122                 freedqbuf(buf);
1123
1124         RETURN(rc);
1125 }
1126
1127
1128 EXPORT_SYMBOL(lustre_read_quota_info);
1129 EXPORT_SYMBOL(lustre_write_quota_info);
1130 EXPORT_SYMBOL(lustre_check_quota_file);
1131 EXPORT_SYMBOL(lustre_read_dquot);
1132 EXPORT_SYMBOL(lustre_commit_dquot);
1133 EXPORT_SYMBOL(lustre_init_quota_info);
1134 EXPORT_SYMBOL(lustre_get_qids);
1135 #endif