Whamcloud - gitweb
9508437a477aa1f2383931040240ae1759faa0bc
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #ifdef HAVE_QUOTAIO_V1_H
54 # include <linux/quotaio_v1.h>
55 #endif
56
57 #include <asm/byteorder.h>
58 #include <asm/uaccess.h>
59
60 #include <lustre_quota.h>
61 #include <obd_support.h>
62 #include "lustre_quota_fmt.h"
63
64 #ifdef HAVE_QUOTA_SUPPORT
65
66 static const uint lustre_initqversions[][MAXQUOTAS] = {
67         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
68 };
69
70 static const int lustre_dqstrinblk[] = {
71         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
72 };
73
74 static const int lustre_disk_dqblk_sz[] = {
75         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
76 };
77
78 static const union
79 {
80         struct lustre_disk_dqblk_v2 r1;
81 } fakedquot[] = {
82         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
83 };
84
85 static const union
86 {
87         struct lustre_disk_dqblk_v2 r1;
88 } emptydquot[] = {
89         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
90 };
91
92 int check_quota_file(struct file *f, struct inode *inode, int type, 
93                      lustre_quota_version_t version)
94 {
95         struct lustre_disk_dqheader dqhead;
96         mm_segment_t fs;
97         ssize_t size;
98         loff_t offset = 0;
99         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
100         const uint *quota_versions = lustre_initqversions[version];
101
102         if (!inode && !f) {
103                 CERROR("check_quota_file failed!\n");
104                 libcfs_debug_dumpstack(NULL);
105                 return -EINVAL;
106         }
107
108         if (f) {
109                 fs = get_fs();
110                 set_fs(KERNEL_DS);
111                 size = f->f_op->read(f, (char *)&dqhead,
112                                      sizeof(struct lustre_disk_dqheader), 
113                                      &offset);
114                 set_fs(fs);
115         } else { 
116 #ifndef KERNEL_SUPPORTS_QUOTA_READ
117                 size = 0;
118 #else
119                 struct super_block *sb = inode->i_sb;
120                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
121                                             sizeof(struct lustre_disk_dqheader),
122                                             0);
123 #endif
124         }
125         if (size != sizeof(struct lustre_disk_dqheader))
126                 return -EINVAL;
127         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
128             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
129                 return -EINVAL;
130         return 0;
131 }
132
133 /**
134  * Check whether given file is really lustre admin quotafile
135  */
136 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
137 {
138         struct file *f = lqi->qi_files[type];
139         return check_quota_file(f, NULL, type, lqi->qi_version);
140 }
141
142 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
143 {
144         mm_segment_t fs;
145         struct lustre_disk_dqinfo dinfo;
146         ssize_t size;
147         loff_t offset = LUSTRE_DQINFOOFF;
148
149         fs = get_fs();
150         set_fs(KERNEL_DS);
151         size = f->f_op->read(f, (char *)&dinfo, 
152                              sizeof(struct lustre_disk_dqinfo), &offset);
153         set_fs(fs);
154         if (size != sizeof(struct lustre_disk_dqinfo)) {
155                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
156                        f->f_vfsmnt->mnt_sb->s_id);
157                 return -EINVAL;
158         }
159         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
160         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
161         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
162         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
163         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
164         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
165         return 0;
166 }
167
168 /**
169  * Read information header from quota file
170  */
171 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
172 {
173         return lustre_read_quota_file_info(lqi->qi_files[type],
174                                            &lqi->qi_info[type]);
175 }
176
177 /**
178  * Write information header to quota file
179  */
180 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
181 {
182         mm_segment_t fs;
183         struct lustre_disk_dqinfo dinfo;
184         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
185         struct file *f = lqi->qi_files[type];
186         ssize_t size;
187         loff_t offset = LUSTRE_DQINFOOFF;
188
189         info->dqi_flags &= ~DQF_INFO_DIRTY;
190         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
191         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
192         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
193         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
194         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
195         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
196         fs = get_fs();
197         set_fs(KERNEL_DS);
198         size = f->f_op->write(f, (char *)&dinfo, 
199                               sizeof(struct lustre_disk_dqinfo), &offset);
200         set_fs(fs);
201         if (size != sizeof(struct lustre_disk_dqinfo)) {
202                 CDEBUG(D_WARNING, 
203                        "Can't write info structure on device %s.\n",
204                        f->f_vfsmnt->mnt_sb->s_id);
205                 return -1;
206         }
207         return 0;
208 }
209
210 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
211                  lustre_quota_version_t version)
212 {
213         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
214
215         LASSERT(version == LUSTRE_QUOTA_V2);
216
217         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
218         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
219         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
220         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
221         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
222         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
223         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
224         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
225 }
226
227 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
228                        qid_t id, lustre_quota_version_t version)
229 {
230         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
231
232         LASSERT(version == LUSTRE_QUOTA_V2);
233
234         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
235         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
236         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
237         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
238         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
239         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
240         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
241         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
242         dqblk->dqb_id = cpu_to_le32(id);
243
244         return 0;
245 }
246
247 dqbuf_t getdqbuf(void)
248 {
249         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
250         if (!buf)
251                 CDEBUG(D_WARNING, 
252                        "VFS: Not enough memory for quota buffers.\n");
253         return buf;
254 }
255
256 void freedqbuf(dqbuf_t buf)
257 {
258         kfree(buf);
259 }
260
261 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
262 {
263         mm_segment_t fs;
264         ssize_t ret;
265         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
266
267         memset(buf, 0, LUSTRE_DQBLKSIZE);
268         fs = get_fs();
269         set_fs(KERNEL_DS);
270         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
271         set_fs(fs);
272         return ret;
273 }
274
275 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
276 {
277         mm_segment_t fs;
278         ssize_t ret;
279         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
280
281         fs = get_fs();
282         set_fs(KERNEL_DS);
283         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
284         set_fs(fs);
285         return ret;
286 }
287
288 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
289 {
290         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
291 }
292
293 /**
294  * Remove empty block from list and return it
295  */
296 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
297 {
298         dqbuf_t buf = getdqbuf();
299         struct lustre_disk_dqdbheader *dh =
300             (struct lustre_disk_dqdbheader *)buf;
301         int ret, blk;
302
303         if (!buf)
304                 return -ENOMEM;
305         if (info->dqi_free_blk) {
306                 blk = info->dqi_free_blk;
307                 if ((ret = read_blk(filp, blk, buf)) < 0)
308                         goto out_buf;
309                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
310         } else {
311                 memset(buf, 0, LUSTRE_DQBLKSIZE);
312                 /* Assure block allocation... */
313                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
314                         goto out_buf;
315                 blk = info->dqi_blocks++;
316         }
317         lustre_mark_info_dirty(info);
318         ret = blk;
319 out_buf:
320         freedqbuf(buf);
321         return ret;
322 }
323
324 /**
325  * Insert empty block to the list
326  */
327 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
328                    dqbuf_t buf, uint blk)
329 {
330         struct lustre_disk_dqdbheader *dh =
331             (struct lustre_disk_dqdbheader *)buf;
332         int err;
333
334         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
335         dh->dqdh_prev_free = cpu_to_le32(0);
336         dh->dqdh_entries = cpu_to_le16(0);
337         info->dqi_free_blk = blk;
338         lustre_mark_info_dirty(info);
339         if ((err = write_blk(filp, blk, buf)) < 0)
340                 /* Some strange block. We had better leave it... */
341                 return err;
342         return 0;
343 }
344
345 /**
346  * Remove given block from the list of blocks with free entries
347  */
348 int remove_free_dqentry(struct file *filp,
349                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
350                         uint blk)
351 {
352         dqbuf_t tmpbuf = getdqbuf();
353         struct lustre_disk_dqdbheader *dh =
354             (struct lustre_disk_dqdbheader *)buf;
355         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
356             le32_to_cpu(dh->dqdh_prev_free);
357         int err;
358
359         if (!tmpbuf)
360                 return -ENOMEM;
361         if (nextblk) {
362                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
363                         goto out_buf;
364                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
365                     dh->dqdh_prev_free;
366                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
367                         goto out_buf;
368         }
369         if (prevblk) {
370                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
371                         goto out_buf;
372                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
373                     dh->dqdh_next_free;
374                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
375                         goto out_buf;
376         } else {
377                 info->dqi_free_entry = nextblk;
378                 lustre_mark_info_dirty(info);
379         }
380         freedqbuf(tmpbuf);
381         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
382         if (write_blk(filp, blk, buf) < 0)
383                 /* No matter whether write succeeds block is out of list */
384                 CDEBUG(D_ERROR, 
385                        "VFS: Can't write block (%u) with free entries.\n", blk);
386         return 0;
387 out_buf:
388         freedqbuf(tmpbuf);
389         return err;
390 }
391
392 /**
393  * Insert given block to the beginning of list with free entries
394  */
395 int insert_free_dqentry(struct file *filp,
396                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
397                         uint blk)
398 {
399         dqbuf_t tmpbuf = getdqbuf();
400         struct lustre_disk_dqdbheader *dh =
401             (struct lustre_disk_dqdbheader *)buf;
402         int err;
403
404         if (!tmpbuf)
405                 return -ENOMEM;
406         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
407         dh->dqdh_prev_free = cpu_to_le32(0);
408         if ((err = write_blk(filp, blk, buf)) < 0)
409                 goto out_buf;
410         if (info->dqi_free_entry) {
411                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
412                         goto out_buf;
413                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
414                     cpu_to_le32(blk);
415                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
416                         goto out_buf;
417         }
418         freedqbuf(tmpbuf);
419         info->dqi_free_entry = blk;
420         lustre_mark_info_dirty(info);
421         return 0;
422 out_buf:
423         freedqbuf(tmpbuf);
424         return err;
425 }
426
427
428
429 /**
430  * Find space for dquot
431  */
432 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
433                               lustre_quota_version_t version)
434 {
435         struct lustre_quota_info *lqi = dquot->dq_info;
436         struct file *filp = lqi->qi_files[dquot->dq_type];
437         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
438         uint blk, i;
439         struct lustre_disk_dqdbheader *dh;
440         void *ddquot;
441         int dqblk_sz = lustre_disk_dqblk_sz[version];
442         int dqstrinblk = lustre_dqstrinblk[version];
443         dqbuf_t buf;
444
445         *err = 0;
446         if (!(buf = getdqbuf())) {
447                 *err = -ENOMEM;
448                 return 0;
449         }
450         dh = (struct lustre_disk_dqdbheader *)buf;
451         ddquot = GETENTRIES(buf, version);
452         if (info->dqi_free_entry) {
453                 blk = info->dqi_free_entry;
454                 if ((*err = read_blk(filp, blk, buf)) < 0)
455                         goto out_buf;
456         } else {
457                 blk = get_free_dqblk(filp, info);
458                 if ((int)blk < 0) {
459                         *err = blk;
460                         freedqbuf(buf);
461                         return 0;
462                 }
463                 memset(buf, 0, LUSTRE_DQBLKSIZE);
464                 info->dqi_free_entry = blk; /* This is enough as block is 
465                                                already zeroed and entry list
466                                                is empty... */
467                 lustre_mark_info_dirty(info);
468         }
469
470         /* Will block be full */
471         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
472                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
473                         CDEBUG(D_ERROR, 
474                                "VFS: find_free_dqentry(): Can't remove block "
475                                "(%u) from entry free list.\n", blk);
476                         goto out_buf;
477                 }
478         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
479         /* Find free structure in block */
480         for (i = 0; i < dqstrinblk &&
481              memcmp((char *)&emptydquot[version],
482                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
483              i++);
484
485         if (i == dqstrinblk) {
486                 CDEBUG(D_ERROR, 
487                        "VFS: find_free_dqentry(): Data block full but it "
488                        "shouldn't.\n");
489                 *err = -EIO;
490                 goto out_buf;
491         }
492
493         if ((*err = write_blk(filp, blk, buf)) < 0) {
494                 CDEBUG(D_ERROR,
495                        "VFS: find_free_dqentry(): Can't write quota data "
496                        "block %u.\n", blk);
497                 goto out_buf;
498         }
499         dquot->dq_off =
500             (blk << LUSTRE_DQBLKSIZE_BITS) +
501             sizeof(struct lustre_disk_dqdbheader) +
502             i * dqblk_sz;
503         freedqbuf(buf);
504         return blk;
505 out_buf:
506         freedqbuf(buf);
507         return 0;
508 }
509
510 /**
511  * Insert reference to structure into the trie
512  */
513 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
514                           lustre_quota_version_t version)
515 {
516         struct lustre_quota_info *lqi = dquot->dq_info;
517         struct file *filp = lqi->qi_files[dquot->dq_type];
518         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
519         dqbuf_t buf;
520         int ret = 0, newson = 0, newact = 0;
521         u32 *ref;
522         uint newblk;
523
524         if (!(buf = getdqbuf()))
525                 return -ENOMEM;
526         if (!*treeblk) {
527                 ret = get_free_dqblk(filp, info);
528                 if (ret < 0)
529                         goto out_buf;
530                 *treeblk = ret;
531                 memset(buf, 0, LUSTRE_DQBLKSIZE);
532                 newact = 1;
533         } else {
534                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
535                         CDEBUG(D_ERROR,
536                                "VFS: Can't read tree quota block %u.\n",
537                                *treeblk);
538                         goto out_buf;
539                 }
540         }
541         ref = (u32 *) buf;
542         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
543         if (!newblk)
544                 newson = 1;
545         if (depth == LUSTRE_DQTREEDEPTH - 1) {
546
547                 if (newblk) {
548                         CDEBUG(D_ERROR, 
549                                "VFS: Inserting already present quota entry "
550                                "(block %u).\n",
551                                ref[GETIDINDEX(dquot->dq_id, depth)]);
552                         ret = -EIO;
553                         goto out_buf;
554                 }
555
556                 newblk = find_free_dqentry(dquot, &ret, version);
557         } else
558                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
559         if (newson && ret >= 0) {
560                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
561                 ret = write_blk(filp, *treeblk, buf);
562         } else if (newact && ret < 0)
563                 put_free_dqblk(filp, info, buf, *treeblk);
564 out_buf:
565         freedqbuf(buf);
566         return ret;
567 }
568
569 /**
570  * Wrapper for inserting quota structure into tree
571  */
572 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
573                                  lustre_quota_version_t version)
574 {
575         int tmp = LUSTRE_DQTREEOFF;
576         return do_insert_tree(dquot, &tmp, 0, version);
577 }
578
579 /**
580  * We don't have to be afraid of deadlocks as we never have quotas on
581  * quota files...
582  */
583 static int lustre_write_dquot(struct lustre_dquot *dquot, 
584                               lustre_quota_version_t version)
585 {
586         int type = dquot->dq_type;
587         struct file *filp;
588         mm_segment_t fs;
589         loff_t offset;
590         ssize_t ret;
591         int dqblk_sz = lustre_disk_dqblk_sz[version];
592         struct lustre_disk_dqblk_v2 ddquot;
593
594         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
595         if (ret < 0)
596                 return ret;
597
598         if (!dquot->dq_off)
599                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
600                         CDEBUG(D_ERROR,
601                                "VFS: Error %Zd occurred while creating "
602                                "quota.\n", ret);
603                         return ret;
604                 }
605         filp = dquot->dq_info->qi_files[type];
606         offset = dquot->dq_off;
607         /* Argh... We may need to write structure full of zeroes but that would
608          * be treated as an empty place by the rest of the code. Format change
609          * would be definitely cleaner but the problems probably are not worth
610          * it */
611         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
612                 ddquot.dqb_itime = cpu_to_le64(1);
613         fs = get_fs();
614         set_fs(KERNEL_DS);
615         ret = filp->f_op->write(filp, (char *)&ddquot,
616                                 dqblk_sz, &offset);
617         set_fs(fs);
618         if (ret != dqblk_sz) {
619                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
620                        filp->f_dentry->d_sb->s_id);
621                 if (ret >= 0)
622                         ret = -ENOSPC;
623         } else
624                 ret = 0;
625
626         return ret;
627 }
628
629 /**
630  * Free dquot entry in data block
631  */
632 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
633                         lustre_quota_version_t version)
634 {
635         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
636         struct lustre_mem_dqinfo *info =
637             &dquot->dq_info->qi_info[dquot->dq_type];
638         struct lustre_disk_dqdbheader *dh;
639         dqbuf_t buf = getdqbuf();
640         int dqstrinblk = lustre_dqstrinblk[version];
641         int ret = 0;
642
643         if (!buf)
644                 return -ENOMEM;
645         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
646                 CDEBUG(D_ERROR,
647                        "VFS: Quota structure has offset to other block (%u) "
648                        "than it should (%u).\n",
649                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
650                 goto out_buf;
651         }
652         if ((ret = read_blk(filp, blk, buf)) < 0) {
653                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
654                 goto out_buf;
655         }
656         dh = (struct lustre_disk_dqdbheader *)buf;
657         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
658         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
659                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
660                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
661                         CDEBUG(D_ERROR,
662                                "VFS: Can't move quota data block (%u) to free "
663                                "list.\n", blk);
664                         goto out_buf;
665                 }
666         } else {
667                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
668                        0, lustre_disk_dqblk_sz[version]);
669                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
670                         /* Insert will write block itself */
671                         if ((ret =
672                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
673                                 CDEBUG(D_ERROR,
674                                        "VFS: Can't insert quota data block "
675                                        "(%u) to free entry list.\n", blk);
676                                 goto out_buf;
677                         }
678                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
679                         CDEBUG(D_ERROR,
680                                "VFS: Can't write quota data block %u\n", blk);
681                         goto out_buf;
682                 }
683         }
684         dquot->dq_off = 0;      /* Quota is now unattached */
685 out_buf:
686         freedqbuf(buf);
687         return ret;
688 }
689
690 /**
691  * Remove reference to dquot from tree
692  */
693 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
694                        lustre_quota_version_t version)
695 {
696         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
697         struct lustre_mem_dqinfo *info =
698             &dquot->dq_info->qi_info[dquot->dq_type];
699         dqbuf_t buf = getdqbuf();
700         int ret = 0;
701         uint newblk;
702         u32 *ref = (u32 *) buf;
703
704         if (!buf)
705                 return -ENOMEM;
706         if ((ret = read_blk(filp, *blk, buf)) < 0) {
707                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
708                 goto out_buf;
709         }
710         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
711         if (depth == LUSTRE_DQTREEDEPTH - 1) {
712                 ret = free_dqentry(dquot, newblk, version);
713                 newblk = 0;
714         } else
715                 ret = remove_tree(dquot, &newblk, depth + 1, version);
716         if (ret >= 0 && !newblk) {
717                 int i;
718                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
719                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
720                         /* Block got empty? */ ;
721                 /* don't put the root block into free blk list! */
722                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
723                         put_free_dqblk(filp, info, buf, *blk);
724                         *blk = 0;
725                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
726                         CDEBUG(D_ERROR,
727                                "VFS: Can't write quota tree block %u.\n", *blk);
728         }
729 out_buf:
730         freedqbuf(buf);
731         return ret;
732 }
733
734 /**
735  * Delete dquot from tree
736  */
737 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
738                                 lustre_quota_version_t version)
739 {
740         uint tmp = LUSTRE_DQTREEOFF;
741
742         if (!dquot->dq_off)     /* Even not allocated? */
743                 return 0;
744         return remove_tree(dquot, &tmp, 0, version);
745 }
746
747 /**
748  * Find entry in block
749  */
750 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
751                                  lustre_quota_version_t version)
752 {
753         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
754         dqbuf_t buf = getdqbuf();
755         loff_t ret = 0;
756         int i;
757         struct lustre_disk_dqblk_v2 *ddquot =
758                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
759         int dqblk_sz = lustre_disk_dqblk_sz[version];
760         int dqstrinblk = lustre_dqstrinblk[version];
761
762         LASSERT(version == LUSTRE_QUOTA_V2);
763
764         if (!buf)
765                 return -ENOMEM;
766         if ((ret = read_blk(filp, blk, buf)) < 0) {
767                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
768                 goto out_buf;
769         }
770         if (dquot->dq_id)
771                 for (i = 0; i < dqstrinblk && 
772                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
773                      i++) ;
774         else {                  /* ID 0 as a bit more complicated searching... */
775                 for (i = 0; i < dqstrinblk; i++)
776                         if (!le32_to_cpu(ddquot[i].dqb_id)
777                             && memcmp((char *)&emptydquot[version],
778                                       (char *)&ddquot[i], dqblk_sz))
779                                 break;
780         }
781         if (i == dqstrinblk) {
782                 CDEBUG(D_ERROR,
783                        "VFS: Quota for id %u referenced but not present.\n",
784                        dquot->dq_id);
785                 ret = -EIO;
786                 goto out_buf;
787         } else
788                 ret =
789                     (blk << LUSTRE_DQBLKSIZE_BITS) +
790                     sizeof(struct lustre_disk_dqdbheader) +
791                     i * dqblk_sz;
792 out_buf:
793         freedqbuf(buf);
794         return ret;
795 }
796
797 /**
798  * Find entry for given id in the tree
799  */
800 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
801                                 lustre_quota_version_t version)
802 {
803         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
804         dqbuf_t buf = getdqbuf();
805         loff_t ret = 0;
806         u32 *ref = (u32 *) buf;
807
808         if (!buf)
809                 return -ENOMEM;
810         if ((ret = read_blk(filp, blk, buf)) < 0) {
811                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
812                 goto out_buf;
813         }
814         ret = 0;
815         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
816         if (!blk)               /* No reference? */
817                 goto out_buf;
818         if (depth < LUSTRE_DQTREEDEPTH - 1)
819                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
820         else
821                 ret = find_block_dqentry(dquot, blk, version);
822 out_buf:
823         freedqbuf(buf);
824         return ret;
825 }
826
827 /**
828  * Find entry for given id in the tree - wrapper function
829  */
830 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
831                                   lustre_quota_version_t version)
832 {
833         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
834 }
835
836 int lustre_read_dquot(struct lustre_dquot *dquot)
837 {
838         int type = dquot->dq_type;
839         struct file *filp;
840         mm_segment_t fs;
841         loff_t offset;
842         int ret = 0, dqblk_sz;
843         lustre_quota_version_t version;
844
845         /* Invalidated quota? */
846         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
847                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
848                 return -ESRCH;
849         }
850
851         version = dquot->dq_info->qi_version;
852         LASSERT(version == LUSTRE_QUOTA_V2);
853         dqblk_sz = lustre_disk_dqblk_sz[version];
854
855         offset = find_dqentry(dquot, version);
856         if (offset <= 0) {      /* Entry not present? */
857                 if (offset < 0)
858                         CDEBUG(D_ERROR,
859                                "VFS: Can't read quota structure for id %u.\n",
860                                dquot->dq_id);
861                 dquot->dq_off = 0;
862                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
863                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
864                 ret = offset;
865         } else {
866                 struct lustre_disk_dqblk_v2 ddquot;
867
868                 dquot->dq_off = offset;
869                 fs = get_fs();
870                 set_fs(KERNEL_DS);
871                 if ((ret = filp->f_op->read(filp, (char *)&ddquot,
872                                             dqblk_sz, &offset)) != dqblk_sz) {
873                         if (ret >= 0)
874                                 ret = -EIO;
875                         CDEBUG(D_ERROR,
876                                "VFS: Error while reading quota structure for id "
877                                "%u.\n", dquot->dq_id);
878                         memset((char *)&ddquot, 0, dqblk_sz);
879                 } else {
880                         ret = 0;
881                         /* We need to escape back all-zero structure */
882                         if (!memcmp((char *)&fakedquot[version],
883                                     (char *)&ddquot, dqblk_sz))
884                                 ddquot.dqb_itime = cpu_to_le64(0);
885                 }
886                 set_fs(fs);
887                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
888         }
889
890         return ret;
891 }
892
893 /**
894  * Commit changes of dquot to disk - it might also mean deleting
895  * it when quota became fake.
896  */
897 int lustre_commit_dquot(struct lustre_dquot *dquot)
898 {
899         int rc = 0;
900         lustre_quota_version_t version = dquot->dq_info->qi_version;
901
902         /* always clear the flag so we don't loop on an IO error... */
903         clear_bit(DQ_MOD_B, &dquot->dq_flags);
904
905         /* The block/inode usage in admin quotafile isn't the real usage
906          * over all cluster, so keep the fake dquot entry on disk is
907          * meaningless, just remove it */
908         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
909                 rc = lustre_delete_dquot(dquot, version);
910         else
911                 rc = lustre_write_dquot(dquot, version);
912
913         if (rc < 0)
914                 return rc;
915
916         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
917                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
918
919         return rc;
920 }
921
922 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
923                              int fakemagics)
924 {
925         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
926         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
927         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
928         struct lustre_disk_dqheader dqhead;
929         ssize_t size;
930         loff_t offset = 0;
931         struct file *fp = lqi->qi_files[type];
932         int rc = 0;
933
934         /* write quotafile header */
935         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
936                                        fake_magics[type] : quota_magics[type]);
937         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
938         size = fp->f_op->write(fp, (char *)&dqhead,
939                                sizeof(struct lustre_disk_dqheader), &offset);
940
941         if (size != sizeof(struct lustre_disk_dqheader)) {
942                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
943                 rc = size;
944         }
945
946         return rc;
947 }
948
949 /**
950  * We need to export this function to initialize quotafile, because we haven't
951  * user level check utility
952  */
953 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
954                                    int fakemagics)
955 {
956         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
957         int rc;
958
959         rc = lustre_init_quota_header(lqi, type, fakemagics);
960         if (rc)
961                 return rc;
962
963         /* write init quota info */
964         memset(dqinfo, 0, sizeof(*dqinfo));
965         dqinfo->dqi_bgrace = MAX_DQ_TIME;
966         dqinfo->dqi_igrace = MAX_IQ_TIME;
967         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
968
969         return lustre_write_quota_info(lqi, type);
970 }
971
972 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
973 {
974         return lustre_init_quota_info_generic(lqi, type, 0);
975 }
976
977 ssize_t quota_read(struct file *file, struct inode *inode, int type,
978                    uint blk, dqbuf_t buf)
979 {
980         if (file) {
981                 return read_blk(file, blk, buf);
982         } else {
983 #ifndef KERNEL_SUPPORTS_QUOTA_READ
984                 return -ENOTSUPP;
985 #else
986                 struct super_block *sb = inode->i_sb;
987                 memset(buf, 0, LUSTRE_DQBLKSIZE);
988                 return sb->s_op->quota_read(sb, type, (char *)buf,
989                                             LUSTRE_DQBLKSIZE, 
990                                             blk << LUSTRE_DQBLKSIZE_BITS);
991 #endif
992         }
993 }
994
995 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
996                               uint blk, struct list_head *list)
997 {
998         dqbuf_t buf = getdqbuf();
999         loff_t ret = 0;
1000         struct lustre_disk_dqdbheader *dqhead =
1001             (struct lustre_disk_dqdbheader *)buf;
1002         struct dqblk *blk_item;
1003         struct dqblk *pos;
1004         struct list_head *tmp;
1005
1006         if (!buf)
1007                 return -ENOMEM;
1008         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1009                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1010                 goto out_buf;
1011         }
1012         ret = 0;
1013
1014         if (!le32_to_cpu(dqhead->dqdh_entries))
1015                 goto out_buf;
1016
1017         if (list_empty(list)) {
1018                 tmp = list;
1019                 goto done;
1020         }
1021
1022         list_for_each_entry(pos, list, link) {
1023                 if (blk == pos->blk)    /* we got this blk already */
1024                         goto out_buf;
1025                 if (blk > pos->blk)
1026                         continue;
1027                 break;
1028         }
1029         tmp = &pos->link;
1030 done:
1031         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1032         if (!blk_item) {
1033                 ret = -ENOMEM;
1034                 goto out_buf;
1035         }
1036         blk_item->blk = blk;
1037         INIT_LIST_HEAD(&blk_item->link);
1038
1039         list_add_tail(&blk_item->link, tmp);
1040
1041 out_buf:
1042         freedqbuf(buf);
1043         return ret;
1044 }
1045
1046 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1047                       uint blk, int depth, struct list_head *list)
1048 {
1049         dqbuf_t buf = getdqbuf();
1050         loff_t ret = 0;
1051         int index;
1052         u32 *ref = (u32 *) buf;
1053
1054         if (!buf)
1055                 return -ENOMEM;
1056         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1057                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1058                 goto out_buf;
1059         }
1060         ret = 0;
1061
1062         for (index = 0; index <= 0xff && !ret; index++) {
1063                 blk = le32_to_cpu(ref[index]);
1064                 if (!blk)       /* No reference */
1065                         continue;
1066
1067                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1068                         ret = walk_tree_dqentry(filp, inode, type, blk,
1069                                                 depth + 1, list);
1070                 else
1071                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1072         }
1073 out_buf:
1074         freedqbuf(buf);
1075         return ret;
1076 }
1077
1078 /**
1079  * Walk through the quota file (v2 format) to get all ids with quota limit
1080  */
1081 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1082                     struct list_head *list)
1083 {
1084         struct list_head blk_list;
1085         struct dqblk *blk_item, *tmp;
1086         dqbuf_t buf = NULL;
1087         struct lustre_disk_dqblk_v2 *ddquot;
1088         int rc;
1089         lustre_quota_version_t version;
1090
1091         ENTRY;
1092
1093         LASSERT(ergo(fp == NULL, inode != NULL));
1094
1095         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1096                 version = LUSTRE_QUOTA_V2;
1097         else {
1098                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1099                 RETURN(-EINVAL);
1100         }
1101
1102         if (!list_empty(list)) {
1103                 CDEBUG(D_ERROR, "not empty list\n");
1104                 RETURN(-EINVAL);
1105         }
1106
1107         INIT_LIST_HEAD(&blk_list);
1108         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1109         if (rc) {
1110                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1111                 GOTO(out_free, rc);
1112         }
1113         if (list_empty(&blk_list))
1114                 RETURN(0);
1115
1116         buf = getdqbuf();
1117         if (!buf)
1118                 RETURN(-ENOMEM);
1119         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1120
1121         list_for_each_entry(blk_item, &blk_list, link) {
1122                 loff_t ret = 0;
1123                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1124
1125                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1126                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1127                         CDEBUG(D_ERROR,
1128                                "VFS: Can't read quota tree block %u.\n",
1129                                blk_item->blk);
1130                         GOTO(out_free, rc = ret);
1131                 }
1132
1133                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1134                         struct dquot_id *dqid;
1135                         /* skip empty entry */
1136                         if (!memcmp((char *)&emptydquot[version],
1137                                     (char *)&ddquot[i], dqblk_sz))
1138                                 continue;
1139
1140                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1141                         if (!dqid)
1142                                 GOTO(out_free, rc = -ENOMEM);
1143
1144                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1145                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1146                                          QI_SET : 0;
1147                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1148                                          QB_SET : 0;
1149
1150                         INIT_LIST_HEAD(&dqid->di_link);
1151                         list_add(&dqid->di_link, list);
1152                 }
1153         }
1154
1155 out_free:
1156         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1157                 list_del_init(&blk_item->link);
1158                 kfree(blk_item);
1159         }
1160         if (buf)
1161                 freedqbuf(buf);
1162
1163         RETURN(rc);
1164 }
1165
1166
1167 EXPORT_SYMBOL(lustre_read_quota_info);
1168 EXPORT_SYMBOL(lustre_write_quota_info);
1169 EXPORT_SYMBOL(lustre_check_quota_file);
1170 EXPORT_SYMBOL(lustre_read_dquot);
1171 EXPORT_SYMBOL(lustre_commit_dquot);
1172 EXPORT_SYMBOL(lustre_init_quota_info);
1173 EXPORT_SYMBOL(lustre_get_qids);
1174 #endif