Whamcloud - gitweb
b=14929 a tiny fix for mkfs build
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #ifdef HAVE_QUOTAIO_V1_H
54 # include <linux/quotaio_v1.h>
55 #endif
56
57 #include <asm/byteorder.h>
58 #include <asm/uaccess.h>
59
60 #include <lustre_quota.h>
61 #include <obd_support.h>
62 #include "lustre_quota_fmt.h"
63
64 #ifdef HAVE_QUOTA_SUPPORT
65
66 static const uint lustre_initqversions[][MAXQUOTAS] = {
67         [LUSTRE_QUOTA_V1] = LUSTRE_INITQVERSIONS_V1,
68         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
69 };
70
71 static const int lustre_dqstrinblk[] = {
72         [LUSTRE_QUOTA_V1] = LUSTRE_DQSTRINBLK,
73         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
74 };
75
76 static const int lustre_disk_dqblk_sz[] = {
77         [LUSTRE_QUOTA_V1] = sizeof(struct lustre_disk_dqblk),
78         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
79 };
80
81 static const union
82 {
83         struct lustre_disk_dqblk    r0;
84         struct lustre_disk_dqblk_v2 r1;
85 } fakedquot[] = {
86         [LUSTRE_QUOTA_V1] = {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}},
87         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
88 };
89
90 static const union
91 {
92         struct lustre_disk_dqblk    r0;
93         struct lustre_disk_dqblk_v2 r1;
94 } emptydquot[] = {
95         [LUSTRE_QUOTA_V1] = {.r0 = { 0 }},
96         [LUSTRE_QUOTA_V2] = {.r1 = { 0 }}
97 };
98
99 extern void *lustre_quota_journal_start(struct inode *inode, int delete);
100 extern void lustre_quota_journal_stop(void *handle);
101 extern ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
102                                  char *buf, int count, loff_t pos);
103 extern ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos);
104
105 int check_quota_file(struct file *f, struct inode *inode, int type,
106                      lustre_quota_version_t version)
107 {
108         struct lustre_disk_dqheader dqhead;
109         ssize_t size;
110         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
111         const uint *quota_versions = lustre_initqversions[version];
112
113         if (!f && !inode) {
114                 CERROR("check_quota_file failed!\n");
115                 libcfs_debug_dumpstack(NULL);
116                 return -EINVAL;
117         }
118
119         size = lustre_read_quota(f, inode, type, (char *)&dqhead,
120                                  sizeof(struct lustre_disk_dqheader), 0);
121
122         if (size != sizeof(struct lustre_disk_dqheader))
123                 return -EINVAL;
124         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
125             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
126                 return -EINVAL;
127         return 0;
128 }
129
130 /* Check whether given file is really lustre admin quotafile */
131 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
132 {
133         struct file *f = lqi->qi_files[type];
134         return check_quota_file(f, NULL, type, lqi->qi_version);
135 }
136
137 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
138 {
139         struct lustre_disk_dqinfo dinfo;
140         ssize_t size;
141
142         size = lustre_read_quota(f, NULL, 0, (char *)&dinfo,
143                                  sizeof(struct lustre_disk_dqinfo),
144                                  LUSTRE_DQINFOOFF);
145
146         if (size != sizeof(struct lustre_disk_dqinfo)) {
147                 CERROR("Can't read info structure on device %s.\n",
148                        f->f_vfsmnt->mnt_sb->s_id);
149                 return -EINVAL;
150         }
151         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
152         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
153         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
154         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
155         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
156         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
157         return 0;
158 }
159
160 /* Read information header from quota file */
161 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
162 {
163         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
164 }
165
166 /* Write information header to quota file */
167 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
168 {
169         struct lustre_disk_dqinfo dinfo;
170         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
171         struct file *f = lqi->qi_files[type];
172         ssize_t size;
173
174         info->dqi_flags &= ~DQF_INFO_DIRTY;
175         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
176         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
177         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
178         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
179         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
180         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
181
182         size = lustre_write_quota(f, (char *)&dinfo,
183                                   sizeof(struct lustre_disk_dqinfo),
184                                   LUSTRE_DQINFOOFF);
185
186         if (size != sizeof(struct lustre_disk_dqinfo)) {
187                 CWARN("Can't write info structure on device %s.\n",
188                       f->f_vfsmnt->mnt_sb->s_id);
189                 return -1;
190         }
191         return 0;
192 }
193
194 #define DQ2MQ(v) ((sizeof(v) == sizeof(__u64)) ? \
195                 le64_to_cpu(v) : le32_to_cpu(v))
196
197 #define MQ2DQ(v,newv) ((sizeof(v) == sizeof(__u64)) ? \
198                 (v = cpu_to_le64((__u64)newv)) : (v = cpu_to_le32((__u32)newv)))
199
200 #define DQF_GET(var,ver,field) ((ver == LUSTRE_QUOTA_V1)?\
201                 DQ2MQ(((struct lustre_disk_dqblk*)(var))->field):\
202                 DQ2MQ(((struct lustre_disk_dqblk_v2*)(var))->field))
203
204 #define DQF_PUT(var,ver,field,val) ((ver == LUSTRE_QUOTA_V1)?\
205                 MQ2DQ(((struct lustre_disk_dqblk*)(var))->field, val):\
206                 MQ2DQ(((struct lustre_disk_dqblk_v2*)(var))->field, val))
207
208 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
209                  lustre_quota_version_t version)
210 {
211         m->dqb_ihardlimit = DQF_GET(d, version, dqb_ihardlimit);
212         m->dqb_isoftlimit = DQF_GET(d, version, dqb_isoftlimit);
213         m->dqb_curinodes = DQF_GET(d, version, dqb_curinodes);
214         m->dqb_itime = DQF_GET(d, version, dqb_itime);
215         m->dqb_bhardlimit = DQF_GET(d, version, dqb_bhardlimit);
216         m->dqb_bsoftlimit = DQF_GET(d, version, dqb_bsoftlimit);
217         m->dqb_curspace = DQF_GET(d, version, dqb_curspace);
218         m->dqb_btime = DQF_GET(d, version, dqb_btime);
219 }
220
221 static int check_quota_bounds(struct lustre_mem_dqblk *m, 
222                               lustre_quota_version_t version)
223 {
224         return (version == LUSTRE_QUOTA_V1  &&
225                 m->dqb_ihardlimit <= MAX_UL &&
226                 m->dqb_isoftlimit <= MAX_UL &&
227                 m->dqb_curinodes <= MAX_UL  &&
228                 m->dqb_bhardlimit <= MAX_UL &&
229                 m->dqb_bsoftlimit <= MAX_UL) ||
230                 version != LUSTRE_QUOTA_V1;
231 }
232
233 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
234                        qid_t id, lustre_quota_version_t version)
235 {
236         if (!check_quota_bounds(m, version))
237                 return -EINVAL;
238
239         DQF_PUT(d, version, dqb_ihardlimit, m->dqb_ihardlimit);
240         DQF_PUT(d, version, dqb_isoftlimit, m->dqb_isoftlimit);
241         DQF_PUT(d, version, dqb_curinodes, m->dqb_curinodes);
242         DQF_PUT(d, version, dqb_itime, m->dqb_itime);
243         DQF_PUT(d, version, dqb_bhardlimit, m->dqb_bhardlimit);
244         DQF_PUT(d, version, dqb_bsoftlimit, m->dqb_bsoftlimit);
245         DQF_PUT(d, version, dqb_curspace, m->dqb_curspace);
246         DQF_PUT(d, version, dqb_btime, m->dqb_btime);
247         DQF_PUT(d, version, dqb_id, id);
248
249         return 0;
250 }
251
252 dqbuf_t getdqbuf(void)
253 {
254         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
255         if (!buf)
256                 CWARN("VFS: Not enough memory for quota buffers.\n");
257         return buf;
258 }
259
260 void freedqbuf(dqbuf_t buf)
261 {
262         kfree(buf);
263 }
264
265 ssize_t read_blk(struct file *filp, struct inode *inode, int type,
266                  uint blk, dqbuf_t buf)
267 {
268         ssize_t ret;
269
270         memset(buf, 0, LUSTRE_DQBLKSIZE);
271         ret = lustre_read_quota(filp, inode, type, (char *)buf, LUSTRE_DQBLKSIZE,
272                                 blk << LUSTRE_DQBLKSIZE_BITS);
273
274         /* Reading past EOF just returns a block of zeros */
275         if (ret == -EBADR)
276                 ret = 0;
277
278         return ret;
279 }
280
281 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
282 {
283         ssize_t ret;
284
285         ret = lustre_write_quota(filp, (char *)buf, LUSTRE_DQBLKSIZE,
286                                  blk << LUSTRE_DQBLKSIZE_BITS);
287
288         return ret;
289 }
290
291 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
292 {
293         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
294 }
295
296 /* Remove empty block from list and return it */
297 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
298 {
299         dqbuf_t buf = getdqbuf();
300         struct lustre_disk_dqdbheader *dh =
301             (struct lustre_disk_dqdbheader *)buf;
302         int ret, blk;
303
304         if (!buf)
305                 return -ENOMEM;
306         if (info->dqi_free_blk) {
307                 blk = info->dqi_free_blk;
308                 if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0)
309                         goto out_buf;
310                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
311         } else {
312                 memset(buf, 0, LUSTRE_DQBLKSIZE);
313                 /* Assure block allocation... */
314                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
315                         goto out_buf;
316                 blk = info->dqi_blocks++;
317         }
318         lustre_mark_info_dirty(info);
319         ret = blk;
320 out_buf:
321         freedqbuf(buf);
322         return ret;
323 }
324
325 /* Insert empty block to the list */
326 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
327                    dqbuf_t buf, uint blk)
328 {
329         struct lustre_disk_dqdbheader *dh =
330             (struct lustre_disk_dqdbheader *)buf;
331         int err;
332
333         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
334         dh->dqdh_prev_free = cpu_to_le32(0);
335         dh->dqdh_entries = cpu_to_le16(0);
336         info->dqi_free_blk = blk;
337         lustre_mark_info_dirty(info);
338         if ((err = write_blk(filp, blk, buf)) < 0)
339                 /* Some strange block. We had better leave it... */
340                 return err;
341         return 0;
342 }
343
344 /* Remove given block from the list of blocks with free entries */
345 int remove_free_dqentry(struct file *filp,
346                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
347                         uint blk)
348 {
349         dqbuf_t tmpbuf = getdqbuf();
350         struct lustre_disk_dqdbheader *dh =
351             (struct lustre_disk_dqdbheader *)buf;
352         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
353             le32_to_cpu(dh->dqdh_prev_free);
354         int err;
355
356         if (!tmpbuf)
357                 return -ENOMEM;
358         if (nextblk) {
359                 if ((err = read_blk(filp, NULL, 0, nextblk, tmpbuf)) < 0)
360                         goto out_buf;
361                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
362                     dh->dqdh_prev_free;
363                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
364                         goto out_buf;
365         }
366         if (prevblk) {
367                 if ((err = read_blk(filp, NULL, 0, prevblk, tmpbuf)) < 0)
368                         goto out_buf;
369                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
370                     dh->dqdh_next_free;
371                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
372                         goto out_buf;
373         } else {
374                 info->dqi_free_entry = nextblk;
375                 lustre_mark_info_dirty(info);
376         }
377         freedqbuf(tmpbuf);
378         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
379         err = write_blk(filp, blk, buf);
380         if (err < 0)      /* No matter whether write succeeds block is out of list */
381                 CERROR("VFS: Can't write block (%u) with "
382                        "free entries (rc=%d).\n", blk, err);
383         return 0;
384 out_buf:
385         freedqbuf(tmpbuf);
386         return err;
387 }
388
389 /* Insert given block to the beginning of list with free entries */
390 int insert_free_dqentry(struct file *filp,
391                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
392                         uint blk)
393 {
394         dqbuf_t tmpbuf = getdqbuf();
395         struct lustre_disk_dqdbheader *dh =
396             (struct lustre_disk_dqdbheader *)buf;
397         int err;
398
399         if (!tmpbuf)
400                 return -ENOMEM;
401         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
402         dh->dqdh_prev_free = cpu_to_le32(0);
403         if ((err = write_blk(filp, blk, buf)) < 0)
404                 goto out_buf;
405         if (info->dqi_free_entry) {
406                 if ((err = read_blk(filp, NULL, 0, info->dqi_free_entry, tmpbuf)) < 0)
407                         goto out_buf;
408                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
409                     cpu_to_le32(blk);
410                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
411                         goto out_buf;
412         }
413         freedqbuf(tmpbuf);
414         info->dqi_free_entry = blk;
415         lustre_mark_info_dirty(info);
416         return 0;
417 out_buf:
418         freedqbuf(tmpbuf);
419         return err;
420 }
421
422
423
424 /* Find space for dquot */
425 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
426                               lustre_quota_version_t version)
427 {
428         struct lustre_quota_info *lqi = dquot->dq_info;
429         struct file *filp = lqi->qi_files[dquot->dq_type];
430         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
431         uint blk, i;
432         struct lustre_disk_dqdbheader *dh;
433         void *ddquot;
434         int dqblk_sz = lustre_disk_dqblk_sz[version];
435         int dqstrinblk = lustre_dqstrinblk[version];
436         dqbuf_t buf;
437
438         *err = 0;
439         if (!(buf = getdqbuf())) {
440                 *err = -ENOMEM;
441                 return 0;
442         }
443         dh = (struct lustre_disk_dqdbheader *)buf;
444         ddquot = GETENTRIES(buf, version);
445         if (info->dqi_free_entry) {
446                 blk = info->dqi_free_entry;
447                 if ((*err = read_blk(filp, NULL, 0, blk, buf)) < 0)
448                         goto out_buf;
449         } else {
450                 blk = get_free_dqblk(filp, info);
451                 if ((int)blk < 0) {
452                         *err = blk;
453                         freedqbuf(buf);
454                         return 0;
455                 }
456                 memset(buf, 0, LUSTRE_DQBLKSIZE);
457                 info->dqi_free_entry = blk; /* This is enough as block is 
458                                                already zeroed and entry list
459                                                is empty... */
460                 lustre_mark_info_dirty(info);
461         }
462
463         /* Will block be full */
464         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
465                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
466                         CERROR("VFS: Can't remove block %u"
467                                " from entry free list.\n", blk);
468                         goto out_buf;
469                 }
470         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
471         /* Find free structure in block */
472         for (i = 0; i < dqstrinblk &&
473              memcmp((char *)&emptydquot[version],
474                     (char*)ddquot + i * dqblk_sz,
475                     dqblk_sz); i++);
476
477         if (i == dqstrinblk) {
478                 CERROR("VFS: Data block full but it shouldn't.\n");
479                 *err = -EIO;
480                 goto out_buf;
481         }
482
483         if ((*err = write_blk(filp, blk, buf)) < 0) {
484                 CERROR("VFS: Can't write quota data block %u.\n", blk);
485                 goto out_buf;
486         }
487         dquot->dq_off =
488             (blk << LUSTRE_DQBLKSIZE_BITS) +
489             sizeof(struct lustre_disk_dqdbheader) +
490             i * dqblk_sz;
491         freedqbuf(buf);
492         return blk;
493 out_buf:
494         freedqbuf(buf);
495         return 0;
496 }
497
498 /* Insert reference to structure into the trie */
499 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
500                           lustre_quota_version_t version)
501 {
502         struct lustre_quota_info *lqi = dquot->dq_info;
503         struct file *filp = lqi->qi_files[dquot->dq_type];
504         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
505         dqbuf_t buf;
506         int ret = 0, newson = 0, newact = 0;
507         u32 *ref;
508         uint newblk;
509
510         if (!(buf = getdqbuf()))
511                 return -ENOMEM;
512         if (!*treeblk) {
513                 ret = get_free_dqblk(filp, info);
514                 if (ret < 0)
515                         goto out_buf;
516                 *treeblk = ret;
517                 memset(buf, 0, LUSTRE_DQBLKSIZE);
518                 newact = 1;
519         } else {
520                 if ((ret = read_blk(filp, NULL, 0, *treeblk, buf)) < 0) {
521                         CERROR("VFS: Can't read tree quota block %u.\n",
522                                *treeblk);
523                         goto out_buf;
524                 }
525         }
526         ref = (u32 *) buf;
527         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
528         if (!newblk)
529                 newson = 1;
530         if (depth == LUSTRE_DQTREEDEPTH - 1) {
531
532                 if (newblk) {
533                         CERROR("VFS: Inserting already present quota entry "
534                                "(block %u).\n", 
535                                ref[GETIDINDEX(dquot->dq_id, depth)]);
536                         ret = -EIO;
537                         goto out_buf;
538                 }
539
540                 newblk = find_free_dqentry(dquot, &ret, version);
541         } else
542                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
543         if (newson && ret >= 0) {
544                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
545                 ret = write_blk(filp, *treeblk, buf);
546         } else if (newact && ret < 0)
547                 put_free_dqblk(filp, info, buf, *treeblk);
548 out_buf:
549         freedqbuf(buf);
550         return ret;
551 }
552
553 /* Wrapper for inserting quota structure into tree */
554 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
555                                  lustre_quota_version_t version)
556 {
557         int tmp = LUSTRE_DQTREEOFF;
558         return do_insert_tree(dquot, &tmp, 0, version);
559 }
560
561 /*
562  *  We don't have to be afraid of deadlocks as we never have quotas on quota files...
563  */
564 static int lustre_write_dquot(struct lustre_dquot *dquot, 
565                               lustre_quota_version_t version)
566 {
567         int type = dquot->dq_type;
568         struct file *filp;
569         loff_t offset;
570         ssize_t ret;
571         int dqblk_sz = lustre_disk_dqblk_sz[version];
572         char ddquot[sizeof(union lustre_disk_dqblk_un)];
573
574         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
575         if (ret < 0)
576                 return ret;
577
578         if (!dquot->dq_off)
579                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
580                         CERROR("VFS: Error %Zd occurred while creating quota.\n",
581                                ret);
582                         return ret;
583                 }
584         filp = dquot->dq_info->qi_files[type];
585         offset = dquot->dq_off;
586         /* Argh... We may need to write structure full of zeroes but that would be
587          * treated as an empty place by the rest of the code. Format change would
588          * be definitely cleaner but the problems probably are not worth it */
589         if (!memcmp((char *)&emptydquot[version], ddquot, dqblk_sz))
590                 DQF_PUT(ddquot, version, dqb_itime, 1);
591
592         ret = lustre_write_quota(filp, ddquot,
593                                  dqblk_sz, offset);
594         if (ret != dqblk_sz) {
595                 CWARN("VFS: dquota write failed on dev %s\n",
596                       filp->f_dentry->d_sb->s_id);
597                 if (ret >= 0)
598                         ret = -ENOSPC;
599         } else
600                 ret = 0;
601
602         return ret;
603 }
604
605 /* Free dquot entry in data block */
606 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
607                         lustre_quota_version_t version)
608 {
609         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
610         struct lustre_mem_dqinfo *info =
611             &dquot->dq_info->qi_info[dquot->dq_type];
612         struct lustre_disk_dqdbheader *dh;
613         dqbuf_t buf = getdqbuf();
614         int dqstrinblk = lustre_dqstrinblk[version];
615         int ret = 0;
616
617         if (!buf)
618                 return -ENOMEM;
619         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
620                 CERROR("VFS: Quota structure has offset to other block (%u) "
621                        "than it should (%u).\n", blk, 
622                        (uint)(dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
623                 goto out_buf;
624         }
625         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
626                 CERROR("VFS: Can't read quota data block %u\n", blk);
627                 goto out_buf;
628         }
629         dh = (struct lustre_disk_dqdbheader *)buf;
630         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
631         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
632                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
633                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
634                         CERROR("VFS: Can't move quota data block (%u) "
635                                "to free list.\n", blk);
636                         goto out_buf;
637                 }
638         } else {
639                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
640                        0, lustre_disk_dqblk_sz[version]);
641                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
642                         /* Insert will write block itself */
643                         if ((ret =
644                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
645                                 CERROR("VFS: Can't insert quota data block (%u) "
646                                        "to free entry list.\n", blk);
647                                 goto out_buf;
648                         }
649                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
650                         CERROR("VFS: Can't write quota data block %u\n", blk);
651                         goto out_buf;
652                 }
653         }
654         dquot->dq_off = 0;      /* Quota is now unattached */
655 out_buf:
656         freedqbuf(buf);
657         return ret;
658 }
659
660 /* Remove reference to dquot from tree */
661 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
662                        lustre_quota_version_t version)
663 {
664         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
665         struct lustre_mem_dqinfo *info =
666             &dquot->dq_info->qi_info[dquot->dq_type];
667         dqbuf_t buf = getdqbuf();
668         int ret = 0;
669         uint newblk;
670         u32 *ref = (u32 *) buf;
671
672         if (!buf)
673                 return -ENOMEM;
674         if ((ret = read_blk(filp, NULL, 0, *blk, buf)) < 0) {
675                 CERROR("VFS: Can't read quota data block %u\n", *blk);
676                 goto out_buf;
677         }
678         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
679         if (depth == LUSTRE_DQTREEDEPTH - 1) {
680                 ret = free_dqentry(dquot, newblk, version);
681                 newblk = 0;
682         } else
683                 ret = remove_tree(dquot, &newblk, depth + 1, version);
684         if (ret >= 0 && !newblk) {
685                 int i;
686                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
687                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
688                         /* Block got empty? */ ;
689                 /* don't put the root block into free blk list! */
690                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
691                         put_free_dqblk(filp, info, buf, *blk);
692                         *blk = 0;
693                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
694                         CERROR("VFS: Can't write quota tree block %u.\n", *blk);
695         }
696 out_buf:
697         freedqbuf(buf);
698         return ret;
699 }
700
701 /* Delete dquot from tree */
702 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
703                                 lustre_quota_version_t version)
704 {
705         uint tmp = LUSTRE_DQTREEOFF;
706
707         if (!dquot->dq_off)     /* Even not allocated? */
708                 return 0;
709         return remove_tree(dquot, &tmp, 0, version);
710 }
711
712 /* Find entry in block */
713 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
714                                  lustre_quota_version_t version)
715 {
716         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
717         dqbuf_t buf = getdqbuf();
718         loff_t ret = 0;
719         int i;
720         char *ddquot = GETENTRIES(buf, version);
721         int dqblk_sz = lustre_disk_dqblk_sz[version];
722         int dqstrinblk = lustre_dqstrinblk[version];
723
724         if (!buf)
725                 return -ENOMEM;
726         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
727                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
728                 goto out_buf;
729         }
730         if (dquot->dq_id)
731                 for (i = 0; i < dqstrinblk && 
732                      DQF_GET(ddquot+i*dqblk_sz, version, dqb_id) != dquot->dq_id;
733                      i++) ;
734         else {                  /* ID 0 as a bit more complicated searching... */
735                 for (i = 0; i < dqstrinblk; i++)
736                         if (!DQF_GET(ddquot + i*dqblk_sz, version, dqb_id)
737                             && memcmp((char *)&emptydquot[version],
738                                       ddquot + i*dqblk_sz,
739                                       dqblk_sz))
740                                 break;
741         }
742         if (i == dqstrinblk) {
743                 CERROR("VFS: Quota for id %u referenced but not present.\n",
744                        dquot->dq_id);
745                 ret = -EIO;
746                 goto out_buf;
747         } else
748                 ret =
749                     (blk << LUSTRE_DQBLKSIZE_BITS) +
750                     sizeof(struct lustre_disk_dqdbheader) +
751                     i * dqblk_sz;
752 out_buf:
753         freedqbuf(buf);
754         return ret;
755 }
756
757 /* Find entry for given id in the tree */
758 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
759                                 lustre_quota_version_t version)
760 {
761         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
762         dqbuf_t buf = getdqbuf();
763         loff_t ret = 0;
764         u32 *ref = (u32 *) buf;
765
766         if (!buf)
767                 return -ENOMEM;
768         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
769                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
770                 goto out_buf;
771         }
772         ret = 0;
773         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
774         if (!blk)               /* No reference? */
775                 goto out_buf;
776         if (depth < LUSTRE_DQTREEDEPTH - 1)
777                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
778         else
779                 ret = find_block_dqentry(dquot, blk, version);
780 out_buf:
781         freedqbuf(buf);
782         return ret;
783 }
784
785 /* Find entry for given id in the tree - wrapper function */
786 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
787                                   lustre_quota_version_t version)
788 {
789         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
790 }
791
792
793 int lustre_read_dquot(struct lustre_dquot *dquot)
794 {
795         int type = dquot->dq_type;
796         struct file *filp;
797         loff_t offset;
798         int ret = 0, dqblk_sz;
799         lustre_quota_version_t version;
800
801         /* Invalidated quota? */
802         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
803                 CERROR("VFS: Quota invalidated while reading!\n");
804                 return -EIO;
805         }
806
807         version = dquot->dq_info->qi_version;
808         dqblk_sz = lustre_disk_dqblk_sz[version];
809
810         offset = find_dqentry(dquot, version);
811         if (offset <= 0) {      /* Entry not present? */
812                 if (offset < 0)
813                         CERROR("VFS: Can't read quota structure for id %u.\n",
814                                dquot->dq_id);
815                 dquot->dq_off = 0;
816                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
817                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
818                 ret = offset;
819         } else {
820                 char ddquot[sizeof(union lustre_disk_dqblk_un)];
821
822                 dquot->dq_off = offset;
823                 if ((ret = lustre_read_quota(filp, NULL, type, ddquot,
824                                              dqblk_sz, offset)) != dqblk_sz) {
825                         if (ret >= 0)
826                                 ret = -EIO;
827                         CERROR("VFS: Error while reading quota structure "
828                                "for id %u.\n", dquot->dq_id);
829                         memset(ddquot, 0, dqblk_sz);
830                 } else {
831                         ret = 0;
832                         /* We need to escape back all-zero structure */
833                         if (!memcmp((char *)&fakedquot[version],
834                                     ddquot, dqblk_sz))
835                                 DQF_PUT(ddquot, version, dqb_itime, 0);
836                 }
837                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
838         }
839
840         return ret;
841 }
842
843 /* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */
844 int lustre_commit_dquot(struct lustre_dquot *dquot)
845 {
846         int rc = 0;
847         lustre_quota_version_t version = dquot->dq_info->qi_version;
848         void *handle;
849         struct inode *inode = dquot->dq_info->qi_files[dquot->dq_type]->f_dentry->d_inode;
850
851         /* always clear the flag so we don't loop on an IO error... */
852         clear_bit(DQ_MOD_B, &dquot->dq_flags);
853
854         /* The block/inode usage in admin quotafile isn't the real usage
855          * over all cluster, so keep the fake dquot entry on disk is
856          * meaningless, just remove it */
857         if (test_bit(DQ_FAKE_B, &dquot->dq_flags)) {
858                 handle = lustre_quota_journal_start(inode, 1);
859                 rc = lustre_delete_dquot(dquot, version);
860                 lustre_quota_journal_stop(handle);
861         } else {
862                 handle = lustre_quota_journal_start(inode, 0);
863                 rc = lustre_write_dquot(dquot, version);
864                 lustre_quota_journal_stop(handle);
865         }
866
867         if (rc < 0)
868                 return rc;
869
870         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
871                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
872
873         return rc;
874 }
875
876 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
877 {
878         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
879         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
880         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
881         struct lustre_disk_dqheader dqhead;
882         ssize_t size;
883         struct file *fp = lqi->qi_files[type];
884         int rc = 0;
885
886         /* write quotafile header */
887         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
888                                        fake_magics[type] : quota_magics[type]);
889         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
890         size = lustre_write_quota(fp, (char *)&dqhead,
891                                   sizeof(struct lustre_disk_dqheader), 0);
892
893         if (size != sizeof(struct lustre_disk_dqheader)) {
894                 CERROR("error writing quotafile header (rc:%d)\n", rc);
895                 rc = size;
896         }
897
898         return rc;
899 }
900
901 /* We need to export this function to initialize quotafile, because we haven't
902  * user level check utility */
903 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
904                                    int fakemagics)
905 {
906         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
907         int rc;
908
909         rc = lustre_init_quota_header(lqi, type, fakemagics);
910         if (rc)
911                 return rc;
912
913         /* write init quota info */
914         memset(dqinfo, 0, sizeof(*dqinfo));
915         dqinfo->dqi_bgrace = MAX_DQ_TIME;
916         dqinfo->dqi_igrace = MAX_IQ_TIME;
917         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
918
919         return lustre_write_quota_info(lqi, type);
920 }
921
922 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
923 {
924         return lustre_init_quota_info_generic(lqi, type, 0);
925 }
926
927 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
928                               uint blk, struct list_head *list)
929 {
930         dqbuf_t buf = getdqbuf();
931         loff_t ret = 0;
932         struct lustre_disk_dqdbheader *dqhead =
933             (struct lustre_disk_dqdbheader *)buf;
934         struct dqblk *blk_item;
935         struct dqblk *pos;
936         struct list_head *tmp;
937
938         if (!buf)
939                 return -ENOMEM;
940         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
941                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
942                 goto out_buf;
943         }
944         ret = 0;
945
946         if (!le32_to_cpu(dqhead->dqdh_entries))
947                 goto out_buf;
948
949         if (list_empty(list)) {
950                 tmp = list;
951                 goto done;
952         }
953
954         list_for_each_entry(pos, list, link) {
955                 if (blk == pos->blk)    /* we got this blk already */
956                         goto out_buf;
957                 if (blk > pos->blk)
958                         continue;
959                 break;
960         }
961         tmp = &pos->link;
962 done:
963         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
964         if (!blk_item) {
965                 ret = -ENOMEM;
966                 goto out_buf;
967         }
968         blk_item->blk = blk;
969         INIT_LIST_HEAD(&blk_item->link);
970
971         list_add_tail(&blk_item->link, tmp);
972
973 out_buf:
974         freedqbuf(buf);
975         return ret;
976 }
977
978 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
979                       uint blk, int depth, struct list_head *list)
980 {
981         dqbuf_t buf = getdqbuf();
982         loff_t ret = 0;
983         int index;
984         u32 *ref = (u32 *) buf;
985
986         if (!buf)
987                 return -ENOMEM;
988         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
989                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
990                 goto out_buf;
991         }
992         ret = 0;
993
994         for (index = 0; index <= 0xff && !ret; index++) {
995                 blk = le32_to_cpu(ref[index]);
996                 if (!blk)       /* No reference */
997                         continue;
998
999                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1000                         ret = walk_tree_dqentry(filp, inode, type, blk,
1001                                                 depth + 1, list);
1002                 else
1003                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1004         }
1005 out_buf:
1006         freedqbuf(buf);
1007         return ret;
1008 }
1009
1010 /* Walk through the quota file (v2 format) to get all ids with quota limit */
1011 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1012                     struct list_head *list)
1013 {
1014         struct list_head blk_list;
1015         struct dqblk *blk_item, *tmp;
1016         dqbuf_t buf = NULL;
1017         char *ddquot;
1018         int rc;
1019         lustre_quota_version_t version;
1020
1021         ENTRY;
1022
1023         LASSERT(ergo(fp == NULL, inode != NULL));
1024
1025         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V1) == 0)
1026                 version = LUSTRE_QUOTA_V1;
1027         else if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1028                 version = LUSTRE_QUOTA_V2;
1029         else {
1030                 CERROR("unknown quota file format!\n");
1031                 RETURN(-EINVAL);
1032         }
1033
1034         if (!list_empty(list)) {
1035                 CERROR("not empty list\n");
1036                 RETURN(-EINVAL);
1037         }
1038
1039         INIT_LIST_HEAD(&blk_list);
1040         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1041         if (rc) {
1042                 CERROR("walk through quota file failed!(%d)\n", rc);
1043                 GOTO(out_free, rc);
1044         }
1045         if (list_empty(&blk_list))
1046                 RETURN(0);
1047
1048         buf = getdqbuf();
1049         if (!buf)
1050                 RETURN(-ENOMEM);
1051         ddquot = GETENTRIES(buf, version);
1052
1053         list_for_each_entry(blk_item, &blk_list, link) {
1054                 loff_t ret = 0;
1055                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1056
1057                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1058                 if ((ret = read_blk(fp, inode, type, blk_item->blk, buf)) < 0) {
1059                         CERROR("VFS: Can't read quota tree block %u.\n",
1060                                blk_item->blk);
1061                         GOTO(out_free, rc = ret);
1062                 }
1063
1064                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1065                         struct dquot_id *dqid;
1066                         /* skip empty entry */
1067                         if (!memcmp((char *)&emptydquot[version],
1068                                     ddquot + i*dqblk_sz, dqblk_sz))
1069                                 continue;
1070
1071                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1072                         if (!dqid)
1073                                 GOTO(out_free, rc = -ENOMEM);
1074
1075                         dqid->di_id = DQF_GET(ddquot + i * dqblk_sz,
1076                                               version, dqb_id);
1077                         dqid->di_flag = DQF_GET(ddquot + i * dqblk_sz, version,
1078                                                 dqb_ihardlimit) ? QI_SET : 0;
1079                         dqid->di_flag |= DQF_GET(ddquot + i * dqblk_sz, version,
1080                                                  dqb_bhardlimit) ? QB_SET : 0;
1081                         INIT_LIST_HEAD(&dqid->di_link);
1082                         list_add(&dqid->di_link, list);
1083                 }
1084         }
1085
1086 out_free:
1087         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1088                 list_del_init(&blk_item->link);
1089                 kfree(blk_item);
1090         }
1091         if (buf)
1092                 freedqbuf(buf);
1093
1094         RETURN(rc);
1095 }
1096
1097
1098 EXPORT_SYMBOL(lustre_read_quota_info);
1099 EXPORT_SYMBOL(lustre_write_quota_info);
1100 EXPORT_SYMBOL(lustre_check_quota_file);
1101 EXPORT_SYMBOL(lustre_read_dquot);
1102 EXPORT_SYMBOL(lustre_commit_dquot);
1103 EXPORT_SYMBOL(lustre_init_quota_info);
1104 EXPORT_SYMBOL(lustre_get_qids);
1105 #endif