Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 static const union
77 {
78         struct lustre_disk_dqblk_v2 r1;
79 } fakedquot[] = {
80         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
81 };
82
83 static const union
84 {
85         struct lustre_disk_dqblk_v2 r1;
86 } emptydquot[] = {
87         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
88 };
89
90 int check_quota_file(struct file *f, struct inode *inode, int type, 
91                      lustre_quota_version_t version)
92 {
93         struct lustre_disk_dqheader dqhead;
94         mm_segment_t fs;
95         ssize_t size;
96         loff_t offset = 0;
97         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
98         const uint *quota_versions = lustre_initqversions[version];
99
100         if (!inode && !f) {
101                 CERROR("check_quota_file failed!\n");
102                 libcfs_debug_dumpstack(NULL);
103                 return -EINVAL;
104         }
105
106         if (f) {
107                 fs = get_fs();
108                 set_fs(KERNEL_DS);
109                 size = f->f_op->read(f, (char *)&dqhead,
110                                      sizeof(struct lustre_disk_dqheader), 
111                                      &offset);
112                 set_fs(fs);
113         } else { 
114 #ifndef KERNEL_SUPPORTS_QUOTA_READ
115                 size = 0;
116 #else
117                 struct super_block *sb = inode->i_sb;
118                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
119                                             sizeof(struct lustre_disk_dqheader),
120                                             0);
121 #endif
122         }
123         if (size != sizeof(struct lustre_disk_dqheader))
124                 return -EINVAL;
125         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
126             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
127                 return -EINVAL;
128         return 0;
129 }
130
131 /**
132  * Check whether given file is really lustre admin quotafile
133  */
134 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
135 {
136         struct file *f = lqi->qi_files[type];
137         return check_quota_file(f, NULL, type, lqi->qi_version);
138 }
139
140 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
141 {
142         mm_segment_t fs;
143         struct lustre_disk_dqinfo dinfo;
144         ssize_t size;
145         loff_t offset = LUSTRE_DQINFOOFF;
146
147         fs = get_fs();
148         set_fs(KERNEL_DS);
149         size = f->f_op->read(f, (char *)&dinfo, 
150                              sizeof(struct lustre_disk_dqinfo), &offset);
151         set_fs(fs);
152         if (size != sizeof(struct lustre_disk_dqinfo)) {
153                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
154                        f->f_vfsmnt->mnt_sb->s_id);
155                 return -EINVAL;
156         }
157         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
158         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
159         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
160         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
161         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
162         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
163         return 0;
164 }
165
166 /**
167  * Read information header from quota file
168  */
169 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
170 {
171         return lustre_read_quota_file_info(lqi->qi_files[type],
172                                            &lqi->qi_info[type]);
173 }
174
175 /**
176  * Write information header to quota file
177  */
178 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
179 {
180         mm_segment_t fs;
181         struct lustre_disk_dqinfo dinfo;
182         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
183         struct file *f = lqi->qi_files[type];
184         ssize_t size;
185         loff_t offset = LUSTRE_DQINFOOFF;
186
187         info->dqi_flags &= ~DQF_INFO_DIRTY;
188         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
189         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
190         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
191         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
192         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
193         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
194         fs = get_fs();
195         set_fs(KERNEL_DS);
196         size = f->f_op->write(f, (char *)&dinfo, 
197                               sizeof(struct lustre_disk_dqinfo), &offset);
198         set_fs(fs);
199         if (size != sizeof(struct lustre_disk_dqinfo)) {
200                 CDEBUG(D_WARNING, 
201                        "Can't write info structure on device %s.\n",
202                        f->f_vfsmnt->mnt_sb->s_id);
203                 return -1;
204         }
205         return 0;
206 }
207
208 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
209                  lustre_quota_version_t version)
210 {
211         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
212
213         LASSERT(version == LUSTRE_QUOTA_V2);
214
215         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
216         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
217         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
218         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
219         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
220         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
221         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
222         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
223 }
224
225 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
226                        qid_t id, lustre_quota_version_t version)
227 {
228         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
229
230         LASSERT(version == LUSTRE_QUOTA_V2);
231
232         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
233         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
234         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
235         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
236         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
237         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
238         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
239         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
240         dqblk->dqb_id = cpu_to_le32(id);
241
242         return 0;
243 }
244
245 dqbuf_t getdqbuf(void)
246 {
247         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
248         if (!buf)
249                 CDEBUG(D_WARNING, 
250                        "VFS: Not enough memory for quota buffers.\n");
251         return buf;
252 }
253
254 void freedqbuf(dqbuf_t buf)
255 {
256         kfree(buf);
257 }
258
259 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
260 {
261         mm_segment_t fs;
262         ssize_t ret;
263         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
264
265         memset(buf, 0, LUSTRE_DQBLKSIZE);
266         fs = get_fs();
267         set_fs(KERNEL_DS);
268         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
269         set_fs(fs);
270         return ret;
271 }
272
273 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
274 {
275         mm_segment_t fs;
276         ssize_t ret;
277         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
278
279         fs = get_fs();
280         set_fs(KERNEL_DS);
281         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
282         set_fs(fs);
283         return ret;
284 }
285
286 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
287 {
288         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
289 }
290
291 /**
292  * Remove empty block from list and return it
293  */
294 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
295 {
296         dqbuf_t buf = getdqbuf();
297         struct lustre_disk_dqdbheader *dh =
298             (struct lustre_disk_dqdbheader *)buf;
299         int ret, blk;
300
301         if (!buf)
302                 return -ENOMEM;
303         if (info->dqi_free_blk) {
304                 blk = info->dqi_free_blk;
305                 if ((ret = read_blk(filp, blk, buf)) < 0)
306                         goto out_buf;
307                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
308         } else {
309                 memset(buf, 0, LUSTRE_DQBLKSIZE);
310                 /* Assure block allocation... */
311                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
312                         goto out_buf;
313                 blk = info->dqi_blocks++;
314         }
315         lustre_mark_info_dirty(info);
316         ret = blk;
317 out_buf:
318         freedqbuf(buf);
319         return ret;
320 }
321
322 /**
323  * Insert empty block to the list
324  */
325 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
326                    dqbuf_t buf, uint blk)
327 {
328         struct lustre_disk_dqdbheader *dh =
329             (struct lustre_disk_dqdbheader *)buf;
330         int err;
331
332         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
333         dh->dqdh_prev_free = cpu_to_le32(0);
334         dh->dqdh_entries = cpu_to_le16(0);
335         info->dqi_free_blk = blk;
336         lustre_mark_info_dirty(info);
337         if ((err = write_blk(filp, blk, buf)) < 0)
338                 /* Some strange block. We had better leave it... */
339                 return err;
340         return 0;
341 }
342
343 /**
344  * Remove given block from the list of blocks with free entries
345  */
346 int remove_free_dqentry(struct file *filp,
347                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
348                         uint blk)
349 {
350         dqbuf_t tmpbuf = getdqbuf();
351         struct lustre_disk_dqdbheader *dh =
352             (struct lustre_disk_dqdbheader *)buf;
353         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
354             le32_to_cpu(dh->dqdh_prev_free);
355         int err;
356
357         if (!tmpbuf)
358                 return -ENOMEM;
359         if (nextblk) {
360                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
361                         goto out_buf;
362                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
363                     dh->dqdh_prev_free;
364                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
365                         goto out_buf;
366         }
367         if (prevblk) {
368                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
369                         goto out_buf;
370                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
371                     dh->dqdh_next_free;
372                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
373                         goto out_buf;
374         } else {
375                 info->dqi_free_entry = nextblk;
376                 lustre_mark_info_dirty(info);
377         }
378         freedqbuf(tmpbuf);
379         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
380         if (write_blk(filp, blk, buf) < 0)
381                 /* No matter whether write succeeds block is out of list */
382                 CDEBUG(D_ERROR, 
383                        "VFS: Can't write block (%u) with free entries.\n", blk);
384         return 0;
385 out_buf:
386         freedqbuf(tmpbuf);
387         return err;
388 }
389
390 /**
391  * Insert given block to the beginning of list with free entries
392  */
393 int insert_free_dqentry(struct file *filp,
394                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
395                         uint blk)
396 {
397         dqbuf_t tmpbuf = getdqbuf();
398         struct lustre_disk_dqdbheader *dh =
399             (struct lustre_disk_dqdbheader *)buf;
400         int err;
401
402         if (!tmpbuf)
403                 return -ENOMEM;
404         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
405         dh->dqdh_prev_free = cpu_to_le32(0);
406         if ((err = write_blk(filp, blk, buf)) < 0)
407                 goto out_buf;
408         if (info->dqi_free_entry) {
409                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
410                         goto out_buf;
411                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
412                     cpu_to_le32(blk);
413                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
414                         goto out_buf;
415         }
416         freedqbuf(tmpbuf);
417         info->dqi_free_entry = blk;
418         lustre_mark_info_dirty(info);
419         return 0;
420 out_buf:
421         freedqbuf(tmpbuf);
422         return err;
423 }
424
425
426
427 /**
428  * Find space for dquot
429  */
430 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
431                               lustre_quota_version_t version)
432 {
433         struct lustre_quota_info *lqi = dquot->dq_info;
434         struct file *filp = lqi->qi_files[dquot->dq_type];
435         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
436         uint blk, i;
437         struct lustre_disk_dqdbheader *dh;
438         void *ddquot;
439         int dqblk_sz = lustre_disk_dqblk_sz[version];
440         int dqstrinblk = lustre_dqstrinblk[version];
441         dqbuf_t buf;
442
443         *err = 0;
444         if (!(buf = getdqbuf())) {
445                 *err = -ENOMEM;
446                 return 0;
447         }
448         dh = (struct lustre_disk_dqdbheader *)buf;
449         ddquot = GETENTRIES(buf, version);
450         if (info->dqi_free_entry) {
451                 blk = info->dqi_free_entry;
452                 if ((*err = read_blk(filp, blk, buf)) < 0)
453                         goto out_buf;
454         } else {
455                 blk = get_free_dqblk(filp, info);
456                 if ((int)blk < 0) {
457                         *err = blk;
458                         freedqbuf(buf);
459                         return 0;
460                 }
461                 memset(buf, 0, LUSTRE_DQBLKSIZE);
462                 info->dqi_free_entry = blk; /* This is enough as block is 
463                                                already zeroed and entry list
464                                                is empty... */
465                 lustre_mark_info_dirty(info);
466         }
467
468         /* Will block be full */
469         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
470                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
471                         CDEBUG(D_ERROR, 
472                                "VFS: find_free_dqentry(): Can't remove block "
473                                "(%u) from entry free list.\n", blk);
474                         goto out_buf;
475                 }
476         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
477         /* Find free structure in block */
478         for (i = 0; i < dqstrinblk &&
479              memcmp((char *)&emptydquot[version],
480                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
481              i++);
482
483         if (i == dqstrinblk) {
484                 CDEBUG(D_ERROR, 
485                        "VFS: find_free_dqentry(): Data block full but it "
486                        "shouldn't.\n");
487                 *err = -EIO;
488                 goto out_buf;
489         }
490
491         if ((*err = write_blk(filp, blk, buf)) < 0) {
492                 CDEBUG(D_ERROR,
493                        "VFS: find_free_dqentry(): Can't write quota data "
494                        "block %u.\n", blk);
495                 goto out_buf;
496         }
497         dquot->dq_off =
498             (blk << LUSTRE_DQBLKSIZE_BITS) +
499             sizeof(struct lustre_disk_dqdbheader) +
500             i * dqblk_sz;
501         freedqbuf(buf);
502         return blk;
503 out_buf:
504         freedqbuf(buf);
505         return 0;
506 }
507
508 /**
509  * Insert reference to structure into the trie
510  */
511 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
512                           lustre_quota_version_t version)
513 {
514         struct lustre_quota_info *lqi = dquot->dq_info;
515         struct file *filp = lqi->qi_files[dquot->dq_type];
516         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
517         dqbuf_t buf;
518         int ret = 0, newson = 0, newact = 0;
519         u32 *ref;
520         uint newblk;
521
522         if (!(buf = getdqbuf()))
523                 return -ENOMEM;
524         if (!*treeblk) {
525                 ret = get_free_dqblk(filp, info);
526                 if (ret < 0)
527                         goto out_buf;
528                 *treeblk = ret;
529                 memset(buf, 0, LUSTRE_DQBLKSIZE);
530                 newact = 1;
531         } else {
532                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
533                         CDEBUG(D_ERROR,
534                                "VFS: Can't read tree quota block %u.\n",
535                                *treeblk);
536                         goto out_buf;
537                 }
538         }
539         ref = (u32 *) buf;
540         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
541         if (!newblk)
542                 newson = 1;
543         if (depth == LUSTRE_DQTREEDEPTH - 1) {
544
545                 if (newblk) {
546                         CDEBUG(D_ERROR, 
547                                "VFS: Inserting already present quota entry "
548                                "(block %u).\n",
549                                ref[GETIDINDEX(dquot->dq_id, depth)]);
550                         ret = -EIO;
551                         goto out_buf;
552                 }
553
554                 newblk = find_free_dqentry(dquot, &ret, version);
555         } else
556                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
557         if (newson && ret >= 0) {
558                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
559                 ret = write_blk(filp, *treeblk, buf);
560         } else if (newact && ret < 0)
561                 put_free_dqblk(filp, info, buf, *treeblk);
562 out_buf:
563         freedqbuf(buf);
564         return ret;
565 }
566
567 /**
568  * Wrapper for inserting quota structure into tree
569  */
570 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
571                                  lustre_quota_version_t version)
572 {
573         int tmp = LUSTRE_DQTREEOFF;
574         return do_insert_tree(dquot, &tmp, 0, version);
575 }
576
577 /**
578  * We don't have to be afraid of deadlocks as we never have quotas on
579  * quota files...
580  */
581 static int lustre_write_dquot(struct lustre_dquot *dquot, 
582                               lustre_quota_version_t version)
583 {
584         int type = dquot->dq_type;
585         struct file *filp;
586         mm_segment_t fs;
587         loff_t offset;
588         ssize_t ret;
589         int dqblk_sz = lustre_disk_dqblk_sz[version];
590         struct lustre_disk_dqblk_v2 ddquot;
591
592         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
593         if (ret < 0)
594                 return ret;
595
596         if (!dquot->dq_off)
597                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
598                         CDEBUG(D_ERROR,
599                                "VFS: Error %Zd occurred while creating "
600                                "quota.\n", ret);
601                         return ret;
602                 }
603         filp = dquot->dq_info->qi_files[type];
604         offset = dquot->dq_off;
605         /* Argh... We may need to write structure full of zeroes but that would
606          * be treated as an empty place by the rest of the code. Format change
607          * would be definitely cleaner but the problems probably are not worth
608          * it */
609         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
610                 ddquot.dqb_itime = cpu_to_le64(1);
611         fs = get_fs();
612         set_fs(KERNEL_DS);
613         ret = filp->f_op->write(filp, (char *)&ddquot,
614                                 dqblk_sz, &offset);
615         set_fs(fs);
616         if (ret != dqblk_sz) {
617                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
618                        filp->f_dentry->d_sb->s_id);
619                 if (ret >= 0)
620                         ret = -ENOSPC;
621         } else
622                 ret = 0;
623
624         return ret;
625 }
626
627 /**
628  * Free dquot entry in data block
629  */
630 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
631                         lustre_quota_version_t version)
632 {
633         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
634         struct lustre_mem_dqinfo *info =
635             &dquot->dq_info->qi_info[dquot->dq_type];
636         struct lustre_disk_dqdbheader *dh;
637         dqbuf_t buf = getdqbuf();
638         int dqstrinblk = lustre_dqstrinblk[version];
639         int ret = 0;
640
641         if (!buf)
642                 return -ENOMEM;
643         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
644                 CDEBUG(D_ERROR,
645                        "VFS: Quota structure has offset to other block (%u) "
646                        "than it should (%u).\n",
647                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
648                 goto out_buf;
649         }
650         if ((ret = read_blk(filp, blk, buf)) < 0) {
651                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
652                 goto out_buf;
653         }
654         dh = (struct lustre_disk_dqdbheader *)buf;
655         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
656         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
657                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
658                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
659                         CDEBUG(D_ERROR,
660                                "VFS: Can't move quota data block (%u) to free "
661                                "list.\n", blk);
662                         goto out_buf;
663                 }
664         } else {
665                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
666                        0, lustre_disk_dqblk_sz[version]);
667                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
668                         /* Insert will write block itself */
669                         if ((ret =
670                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
671                                 CDEBUG(D_ERROR,
672                                        "VFS: Can't insert quota data block "
673                                        "(%u) to free entry list.\n", blk);
674                                 goto out_buf;
675                         }
676                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
677                         CDEBUG(D_ERROR,
678                                "VFS: Can't write quota data block %u\n", blk);
679                         goto out_buf;
680                 }
681         }
682         dquot->dq_off = 0;      /* Quota is now unattached */
683 out_buf:
684         freedqbuf(buf);
685         return ret;
686 }
687
688 /**
689  * Remove reference to dquot from tree
690  */
691 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
692                        lustre_quota_version_t version)
693 {
694         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
695         struct lustre_mem_dqinfo *info =
696             &dquot->dq_info->qi_info[dquot->dq_type];
697         dqbuf_t buf = getdqbuf();
698         int ret = 0;
699         uint newblk;
700         u32 *ref = (u32 *) buf;
701
702         if (!buf)
703                 return -ENOMEM;
704         if ((ret = read_blk(filp, *blk, buf)) < 0) {
705                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
706                 goto out_buf;
707         }
708         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
709         if (depth == LUSTRE_DQTREEDEPTH - 1) {
710                 ret = free_dqentry(dquot, newblk, version);
711                 newblk = 0;
712         } else
713                 ret = remove_tree(dquot, &newblk, depth + 1, version);
714         if (ret >= 0 && !newblk) {
715                 int i;
716                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
717                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
718                         /* Block got empty? */ ;
719                 /* don't put the root block into free blk list! */
720                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
721                         put_free_dqblk(filp, info, buf, *blk);
722                         *blk = 0;
723                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
724                         CDEBUG(D_ERROR,
725                                "VFS: Can't write quota tree block %u.\n", *blk);
726         }
727 out_buf:
728         freedqbuf(buf);
729         return ret;
730 }
731
732 /**
733  * Delete dquot from tree
734  */
735 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
736                                 lustre_quota_version_t version)
737 {
738         uint tmp = LUSTRE_DQTREEOFF;
739
740         if (!dquot->dq_off)     /* Even not allocated? */
741                 return 0;
742         return remove_tree(dquot, &tmp, 0, version);
743 }
744
745 /**
746  * Find entry in block
747  */
748 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
749                                  lustre_quota_version_t version)
750 {
751         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
752         dqbuf_t buf = getdqbuf();
753         loff_t ret = 0;
754         int i;
755         struct lustre_disk_dqblk_v2 *ddquot =
756                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
757         int dqblk_sz = lustre_disk_dqblk_sz[version];
758         int dqstrinblk = lustre_dqstrinblk[version];
759
760         LASSERT(version == LUSTRE_QUOTA_V2);
761
762         if (!buf)
763                 return -ENOMEM;
764         if ((ret = read_blk(filp, blk, buf)) < 0) {
765                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
766                 goto out_buf;
767         }
768         if (dquot->dq_id)
769                 for (i = 0; i < dqstrinblk && 
770                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
771                      i++) ;
772         else {                  /* ID 0 as a bit more complicated searching... */
773                 for (i = 0; i < dqstrinblk; i++)
774                         if (!le32_to_cpu(ddquot[i].dqb_id)
775                             && memcmp((char *)&emptydquot[version],
776                                       (char *)&ddquot[i], dqblk_sz))
777                                 break;
778         }
779         if (i == dqstrinblk) {
780                 CDEBUG(D_ERROR,
781                        "VFS: Quota for id %u referenced but not present.\n",
782                        dquot->dq_id);
783                 ret = -EIO;
784                 goto out_buf;
785         } else
786                 ret =
787                     (blk << LUSTRE_DQBLKSIZE_BITS) +
788                     sizeof(struct lustre_disk_dqdbheader) +
789                     i * dqblk_sz;
790 out_buf:
791         freedqbuf(buf);
792         return ret;
793 }
794
795 /**
796  * Find entry for given id in the tree
797  */
798 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
799                                 lustre_quota_version_t version)
800 {
801         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
802         dqbuf_t buf = getdqbuf();
803         loff_t ret = 0;
804         u32 *ref = (u32 *) buf;
805
806         if (!buf)
807                 return -ENOMEM;
808         if ((ret = read_blk(filp, blk, buf)) < 0) {
809                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
810                 goto out_buf;
811         }
812         ret = 0;
813         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
814         if (!blk)               /* No reference? */
815                 goto out_buf;
816         if (depth < LUSTRE_DQTREEDEPTH - 1)
817                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
818         else
819                 ret = find_block_dqentry(dquot, blk, version);
820 out_buf:
821         freedqbuf(buf);
822         return ret;
823 }
824
825 /**
826  * Find entry for given id in the tree - wrapper function
827  */
828 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
829                                   lustre_quota_version_t version)
830 {
831         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
832 }
833
834 int lustre_read_dquot(struct lustre_dquot *dquot)
835 {
836         int type = dquot->dq_type;
837         struct file *filp;
838         mm_segment_t fs;
839         loff_t offset;
840         int ret = 0, dqblk_sz;
841         lustre_quota_version_t version;
842
843         /* Invalidated quota? */
844         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
845                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
846                 return -EIO;
847         }
848
849         version = dquot->dq_info->qi_version;
850         LASSERT(version == LUSTRE_QUOTA_V2);
851         dqblk_sz = lustre_disk_dqblk_sz[version];
852
853         offset = find_dqentry(dquot, version);
854         if (offset <= 0) {      /* Entry not present? */
855                 if (offset < 0)
856                         CDEBUG(D_ERROR,
857                                "VFS: Can't read quota structure for id %u.\n",
858                                dquot->dq_id);
859                 dquot->dq_off = 0;
860                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
861                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
862                 ret = offset;
863         } else {
864                 struct lustre_disk_dqblk_v2 ddquot;
865
866                 dquot->dq_off = offset;
867                 fs = get_fs();
868                 set_fs(KERNEL_DS);
869                 if ((ret = filp->f_op->read(filp, (char *)&ddquot,
870                                             dqblk_sz, &offset)) != dqblk_sz) {
871                         if (ret >= 0)
872                                 ret = -EIO;
873                         CDEBUG(D_ERROR,
874                                "VFS: Error while reading quota structure for id "
875                                "%u.\n", dquot->dq_id);
876                         memset((char *)&ddquot, 0, dqblk_sz);
877                 } else {
878                         ret = 0;
879                         /* We need to escape back all-zero structure */
880                         if (!memcmp((char *)&fakedquot[version],
881                                     (char *)&ddquot, dqblk_sz))
882                                 ddquot.dqb_itime = cpu_to_le64(0);
883                 }
884                 set_fs(fs);
885                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
886         }
887
888         return ret;
889 }
890
891 /**
892  * Commit changes of dquot to disk - it might also mean deleting
893  * it when quota became fake.
894  */
895 int lustre_commit_dquot(struct lustre_dquot *dquot)
896 {
897         int rc = 0;
898         lustre_quota_version_t version = dquot->dq_info->qi_version;
899
900         /* always clear the flag so we don't loop on an IO error... */
901         clear_bit(DQ_MOD_B, &dquot->dq_flags);
902
903         /* The block/inode usage in admin quotafile isn't the real usage
904          * over all cluster, so keep the fake dquot entry on disk is
905          * meaningless, just remove it */
906         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
907                 rc = lustre_delete_dquot(dquot, version);
908         else
909                 rc = lustre_write_dquot(dquot, version);
910
911         if (rc < 0)
912                 return rc;
913
914         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
915                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
916
917         return rc;
918 }
919
920 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
921                              int fakemagics)
922 {
923         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
924         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
925         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
926         struct lustre_disk_dqheader dqhead;
927         ssize_t size;
928         loff_t offset = 0;
929         struct file *fp = lqi->qi_files[type];
930         int rc = 0;
931
932         /* write quotafile header */
933         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
934                                        fake_magics[type] : quota_magics[type]);
935         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
936         size = fp->f_op->write(fp, (char *)&dqhead,
937                                sizeof(struct lustre_disk_dqheader), &offset);
938
939         if (size != sizeof(struct lustre_disk_dqheader)) {
940                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
941                 rc = size;
942         }
943
944         return rc;
945 }
946
947 /**
948  * We need to export this function to initialize quotafile, because we haven't
949  * user level check utility
950  */
951 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
952                                    int fakemagics)
953 {
954         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
955         int rc;
956
957         rc = lustre_init_quota_header(lqi, type, fakemagics);
958         if (rc)
959                 return rc;
960
961         /* write init quota info */
962         memset(dqinfo, 0, sizeof(*dqinfo));
963         dqinfo->dqi_bgrace = MAX_DQ_TIME;
964         dqinfo->dqi_igrace = MAX_IQ_TIME;
965         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
966
967         return lustre_write_quota_info(lqi, type);
968 }
969
970 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
971 {
972         return lustre_init_quota_info_generic(lqi, type, 0);
973 }
974
975 ssize_t quota_read(struct file *file, struct inode *inode, int type,
976                    uint blk, dqbuf_t buf)
977 {
978         if (file) {
979                 return read_blk(file, blk, buf);
980         } else {
981 #ifndef KERNEL_SUPPORTS_QUOTA_READ
982                 return -ENOTSUPP;
983 #else
984                 struct super_block *sb = inode->i_sb;
985                 memset(buf, 0, LUSTRE_DQBLKSIZE);
986                 return sb->s_op->quota_read(sb, type, (char *)buf,
987                                             LUSTRE_DQBLKSIZE, 
988                                             blk << LUSTRE_DQBLKSIZE_BITS);
989 #endif
990         }
991 }
992
993 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
994                               uint blk, struct list_head *list)
995 {
996         dqbuf_t buf = getdqbuf();
997         loff_t ret = 0;
998         struct lustre_disk_dqdbheader *dqhead =
999             (struct lustre_disk_dqdbheader *)buf;
1000         struct dqblk *blk_item;
1001         struct dqblk *pos;
1002         struct list_head *tmp;
1003
1004         if (!buf)
1005                 return -ENOMEM;
1006         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1007                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1008                 goto out_buf;
1009         }
1010         ret = 0;
1011
1012         if (!le32_to_cpu(dqhead->dqdh_entries))
1013                 goto out_buf;
1014
1015         if (list_empty(list)) {
1016                 tmp = list;
1017                 goto done;
1018         }
1019
1020         list_for_each_entry(pos, list, link) {
1021                 if (blk == pos->blk)    /* we got this blk already */
1022                         goto out_buf;
1023                 if (blk > pos->blk)
1024                         continue;
1025                 break;
1026         }
1027         tmp = &pos->link;
1028 done:
1029         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1030         if (!blk_item) {
1031                 ret = -ENOMEM;
1032                 goto out_buf;
1033         }
1034         blk_item->blk = blk;
1035         INIT_LIST_HEAD(&blk_item->link);
1036
1037         list_add_tail(&blk_item->link, tmp);
1038
1039 out_buf:
1040         freedqbuf(buf);
1041         return ret;
1042 }
1043
1044 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1045                       uint blk, int depth, struct list_head *list)
1046 {
1047         dqbuf_t buf = getdqbuf();
1048         loff_t ret = 0;
1049         int index;
1050         u32 *ref = (u32 *) buf;
1051
1052         if (!buf)
1053                 return -ENOMEM;
1054         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1055                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1056                 goto out_buf;
1057         }
1058         ret = 0;
1059
1060         for (index = 0; index <= 0xff && !ret; index++) {
1061                 blk = le32_to_cpu(ref[index]);
1062                 if (!blk)       /* No reference */
1063                         continue;
1064
1065                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1066                         ret = walk_tree_dqentry(filp, inode, type, blk,
1067                                                 depth + 1, list);
1068                 else
1069                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1070         }
1071 out_buf:
1072         freedqbuf(buf);
1073         return ret;
1074 }
1075
1076 /**
1077  * Walk through the quota file (v2 format) to get all ids with quota limit
1078  */
1079 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1080                     struct list_head *list)
1081 {
1082         struct list_head blk_list;
1083         struct dqblk *blk_item, *tmp;
1084         dqbuf_t buf = NULL;
1085         struct lustre_disk_dqblk_v2 *ddquot;
1086         int rc;
1087         lustre_quota_version_t version;
1088
1089         ENTRY;
1090
1091         LASSERT(ergo(fp == NULL, inode != NULL));
1092
1093         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1094                 version = LUSTRE_QUOTA_V2;
1095         else {
1096                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1097                 RETURN(-EINVAL);
1098         }
1099
1100         if (!list_empty(list)) {
1101                 CDEBUG(D_ERROR, "not empty list\n");
1102                 RETURN(-EINVAL);
1103         }
1104
1105         INIT_LIST_HEAD(&blk_list);
1106         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1107         if (rc) {
1108                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1109                 GOTO(out_free, rc);
1110         }
1111         if (list_empty(&blk_list))
1112                 RETURN(0);
1113
1114         buf = getdqbuf();
1115         if (!buf)
1116                 RETURN(-ENOMEM);
1117         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1118
1119         list_for_each_entry(blk_item, &blk_list, link) {
1120                 loff_t ret = 0;
1121                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1122
1123                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1124                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1125                         CDEBUG(D_ERROR,
1126                                "VFS: Can't read quota tree block %u.\n",
1127                                blk_item->blk);
1128                         GOTO(out_free, rc = ret);
1129                 }
1130
1131                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1132                         struct dquot_id *dqid;
1133                         /* skip empty entry */
1134                         if (!memcmp((char *)&emptydquot[version],
1135                                     (char *)&ddquot[i], dqblk_sz))
1136                                 continue;
1137
1138                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1139                         if (!dqid)
1140                                 GOTO(out_free, rc = -ENOMEM);
1141
1142                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1143                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1144                                          QI_SET : 0;
1145                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1146                                          QB_SET : 0;
1147
1148                         INIT_LIST_HEAD(&dqid->di_link);
1149                         list_add(&dqid->di_link, list);
1150                 }
1151         }
1152
1153 out_free:
1154         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1155                 list_del_init(&blk_item->link);
1156                 kfree(blk_item);
1157         }
1158         if (buf)
1159                 freedqbuf(buf);
1160
1161         RETURN(rc);
1162 }
1163
1164
1165 EXPORT_SYMBOL(lustre_read_quota_info);
1166 EXPORT_SYMBOL(lustre_write_quota_info);
1167 EXPORT_SYMBOL(lustre_check_quota_file);
1168 EXPORT_SYMBOL(lustre_read_dquot);
1169 EXPORT_SYMBOL(lustre_commit_dquot);
1170 EXPORT_SYMBOL(lustre_init_quota_info);
1171 EXPORT_SYMBOL(lustre_get_qids);
1172 #endif