Whamcloud - gitweb
fix typo in b=17839 commit (made while ported from HEAD)
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V1] = LUSTRE_INITQVERSIONS,
66         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
67 };
68
69 static const int lustre_dqstrinblk[] = {
70         [LUSTRE_QUOTA_V1] = LUSTRE_DQSTRINBLK,
71         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
72 };
73
74 static const int lustre_disk_dqblk_sz[] = {
75         [LUSTRE_QUOTA_V1] = sizeof(struct lustre_disk_dqblk),
76         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
77 };
78
79 int check_quota_file(struct file *f, struct inode *inode, int type, 
80                      lustre_quota_version_t version)
81 {
82         struct lustre_disk_dqheader dqhead;
83         mm_segment_t fs;
84         ssize_t size;
85         loff_t offset = 0;
86         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
87         const uint *quota_versions = lustre_initqversions[version];
88
89         if (f) {
90                 fs = get_fs();
91                 set_fs(KERNEL_DS);
92                 size = f->f_op->read(f, (char *)&dqhead,
93                                      sizeof(struct lustre_disk_dqheader), 
94                                      &offset);
95                 set_fs(fs);
96         } else { 
97 #ifndef KERNEL_SUPPORTS_QUOTA_READ
98                 size = 0;
99 #else
100                 struct super_block *sb = inode->i_sb;
101                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
102                                             sizeof(struct lustre_disk_dqheader), 0);
103 #endif
104         }
105         if (size != sizeof(struct lustre_disk_dqheader))
106                 return -EINVAL;
107         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
108             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
109                 return -EINVAL;
110         return 0;
111 }
112
113 /* Check whether given file is really lustre admin quotafile */
114 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
115 {
116         struct file *f = lqi->qi_files[type];
117         return check_quota_file(f, NULL, type, lqi->qi_version);
118 }
119
120 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
121 {
122         mm_segment_t fs;
123         struct lustre_disk_dqinfo dinfo;
124         ssize_t size;
125         loff_t offset = LUSTRE_DQINFOOFF;
126
127         fs = get_fs();
128         set_fs(KERNEL_DS);
129         size = f->f_op->read(f, (char *)&dinfo, 
130                              sizeof(struct lustre_disk_dqinfo), &offset);
131         set_fs(fs);
132         if (size != sizeof(struct lustre_disk_dqinfo)) {
133                 CERROR("Can't read info structure on device %s.\n",
134                        f->f_vfsmnt->mnt_sb->s_id);
135                 return -EINVAL;
136         }
137         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
138         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
139         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
140         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
141         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
142         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
143         return 0;
144 }
145
146 /* Read information header from quota file */
147 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
148 {
149         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
150 }
151
152 /* Write information header to quota file */
153 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
154 {
155         mm_segment_t fs;
156         struct lustre_disk_dqinfo dinfo;
157         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
158         struct file *f = lqi->qi_files[type];
159         ssize_t size;
160         loff_t offset = LUSTRE_DQINFOOFF;
161
162         info->dqi_flags &= ~DQF_INFO_DIRTY;
163         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
164         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
165         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
166         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
167         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
168         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
169         fs = get_fs();
170         set_fs(KERNEL_DS);
171         size = f->f_op->write(f, (char *)&dinfo, 
172                               sizeof(struct lustre_disk_dqinfo), &offset);
173         set_fs(fs);
174         if (size != sizeof(struct lustre_disk_dqinfo)) {
175                 CWARN("Can't write info structure on device %s.\n",
176                       f->f_vfsmnt->mnt_sb->s_id);
177                 return -1;
178         }
179         return 0;
180 }
181
182 #define DQ2MQ(v) ((sizeof(v) == sizeof(__u64)) ? \
183                 le64_to_cpu(v) : le32_to_cpu(v))
184
185 #define MQ2DQ(v,newv) ((sizeof(v) == sizeof(__u64)) ? \
186                 (v = cpu_to_le64((__u64)newv)) : (v = cpu_to_le32((__u32)newv)))
187
188 #define DQF_GET(var,ver,field) ((ver == LUSTRE_QUOTA_V1)?\
189                 DQ2MQ(((struct lustre_disk_dqblk*)(var))->field):\
190                 DQ2MQ(((struct lustre_disk_dqblk_v2*)(var))->field))
191
192 #define DQF_PUT(var,ver,field,val) ((ver == LUSTRE_QUOTA_V1)?\
193                 MQ2DQ(((struct lustre_disk_dqblk*)(var))->field, val):\
194                 MQ2DQ(((struct lustre_disk_dqblk_v2*)(var))->field, val))
195
196 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
197                  lustre_quota_version_t version)
198 {
199         m->dqb_ihardlimit = DQF_GET(d, version, dqb_ihardlimit);
200         m->dqb_isoftlimit = DQF_GET(d, version, dqb_isoftlimit);
201         m->dqb_curinodes = DQF_GET(d, version, dqb_curinodes);
202         m->dqb_itime = DQF_GET(d, version, dqb_itime);
203         m->dqb_bhardlimit = DQF_GET(d, version, dqb_bhardlimit);
204         m->dqb_bsoftlimit = DQF_GET(d, version, dqb_bsoftlimit);
205         m->dqb_curspace = DQF_GET(d, version, dqb_curspace);
206         m->dqb_btime = DQF_GET(d, version, dqb_btime);
207 }
208
209 static int check_quota_bounds(struct lustre_mem_dqblk *m, 
210                               lustre_quota_version_t version)
211 {
212         return (version == LUSTRE_QUOTA_V1  &&
213                 m->dqb_ihardlimit <= MAX_UL &&
214                 m->dqb_isoftlimit <= MAX_UL &&
215                 m->dqb_curinodes <= MAX_UL  &&
216                 m->dqb_bhardlimit <= MAX_UL &&
217                 m->dqb_bsoftlimit <= MAX_UL) ||
218                 version != LUSTRE_QUOTA_V1;
219 }
220
221 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
222                        qid_t id, lustre_quota_version_t version)
223 {
224         if (!check_quota_bounds(m, version))
225                 return -EINVAL;
226
227         DQF_PUT(d, version, dqb_ihardlimit, m->dqb_ihardlimit);
228         DQF_PUT(d, version, dqb_isoftlimit, m->dqb_isoftlimit);
229         DQF_PUT(d, version, dqb_curinodes, m->dqb_curinodes);
230         DQF_PUT(d, version, dqb_itime, m->dqb_itime);
231         DQF_PUT(d, version, dqb_bhardlimit, m->dqb_bhardlimit);
232         DQF_PUT(d, version, dqb_bsoftlimit, m->dqb_bsoftlimit);
233         DQF_PUT(d, version, dqb_curspace, m->dqb_curspace);
234         DQF_PUT(d, version, dqb_btime, m->dqb_btime);
235         DQF_PUT(d, version, dqb_id, id);
236
237         return 0;
238 }
239
240 dqbuf_t getdqbuf(void)
241 {
242         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
243         if (!buf)
244                 CWARN("VFS: Not enough memory for quota buffers.\n");
245         return buf;
246 }
247
248 void freedqbuf(dqbuf_t buf)
249 {
250         kfree(buf);
251 }
252
253 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
254 {
255         mm_segment_t fs;
256         ssize_t ret;
257         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
258
259         memset(buf, 0, LUSTRE_DQBLKSIZE);
260         fs = get_fs();
261         set_fs(KERNEL_DS);
262         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
263         set_fs(fs);
264         return ret;
265 }
266
267 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
268 {
269         mm_segment_t fs;
270         ssize_t ret;
271         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
272
273         fs = get_fs();
274         set_fs(KERNEL_DS);
275         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
276         set_fs(fs);
277         return ret;
278 }
279
280 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
281 {
282         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
283 }
284
285 /* Remove empty block from list and return it */
286 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
287 {
288         dqbuf_t buf = getdqbuf();
289         struct lustre_disk_dqdbheader *dh =
290             (struct lustre_disk_dqdbheader *)buf;
291         int ret, blk;
292
293         if (!buf)
294                 return -ENOMEM;
295         if (info->dqi_free_blk) {
296                 blk = info->dqi_free_blk;
297                 if ((ret = read_blk(filp, blk, buf)) < 0)
298                         goto out_buf;
299                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
300         } else {
301                 memset(buf, 0, LUSTRE_DQBLKSIZE);
302                 /* Assure block allocation... */
303                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
304                         goto out_buf;
305                 blk = info->dqi_blocks++;
306         }
307         lustre_mark_info_dirty(info);
308         ret = blk;
309 out_buf:
310         freedqbuf(buf);
311         return ret;
312 }
313
314 /* Insert empty block to the list */
315 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
316                    dqbuf_t buf, uint blk)
317 {
318         struct lustre_disk_dqdbheader *dh =
319             (struct lustre_disk_dqdbheader *)buf;
320         int err;
321
322         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
323         dh->dqdh_prev_free = cpu_to_le32(0);
324         dh->dqdh_entries = cpu_to_le16(0);
325         info->dqi_free_blk = blk;
326         lustre_mark_info_dirty(info);
327         if ((err = write_blk(filp, blk, buf)) < 0)
328                 /* Some strange block. We had better leave it... */
329                 return err;
330         return 0;
331 }
332
333 /* Remove given block from the list of blocks with free entries */
334 int remove_free_dqentry(struct file *filp,
335                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
336                         uint blk)
337 {
338         dqbuf_t tmpbuf = getdqbuf();
339         struct lustre_disk_dqdbheader *dh =
340             (struct lustre_disk_dqdbheader *)buf;
341         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
342             le32_to_cpu(dh->dqdh_prev_free);
343         int err;
344
345         if (!tmpbuf)
346                 return -ENOMEM;
347         if (nextblk) {
348                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
349                         goto out_buf;
350                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
351                     dh->dqdh_prev_free;
352                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
353                         goto out_buf;
354         }
355         if (prevblk) {
356                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
357                         goto out_buf;
358                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
359                     dh->dqdh_next_free;
360                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
361                         goto out_buf;
362         } else {
363                 info->dqi_free_entry = nextblk;
364                 lustre_mark_info_dirty(info);
365         }
366         freedqbuf(tmpbuf);
367         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
368         err = write_blk(filp, blk, buf);
369         if (err < 0)      /* No matter whether write succeeds block is out of list */
370                 CERROR("VFS: Can't write block (%u) with "
371                        "free entries (rc=%d).\n", blk, err);
372         return 0;
373 out_buf:
374         freedqbuf(tmpbuf);
375         return err;
376 }
377
378 /* Insert given block to the beginning of list with free entries */
379 int insert_free_dqentry(struct file *filp,
380                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
381                         uint blk)
382 {
383         dqbuf_t tmpbuf = getdqbuf();
384         struct lustre_disk_dqdbheader *dh =
385             (struct lustre_disk_dqdbheader *)buf;
386         int err;
387
388         if (!tmpbuf)
389                 return -ENOMEM;
390         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
391         dh->dqdh_prev_free = cpu_to_le32(0);
392         if ((err = write_blk(filp, blk, buf)) < 0)
393                 goto out_buf;
394         if (info->dqi_free_entry) {
395                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
396                         goto out_buf;
397                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
398                     cpu_to_le32(blk);
399                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
400                         goto out_buf;
401         }
402         freedqbuf(tmpbuf);
403         info->dqi_free_entry = blk;
404         lustre_mark_info_dirty(info);
405         return 0;
406 out_buf:
407         freedqbuf(tmpbuf);
408         return err;
409 }
410
411
412
413 /* Find space for dquot */
414 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
415                               lustre_quota_version_t version)
416 {
417         struct lustre_quota_info *lqi = dquot->dq_info;
418         struct file *filp = lqi->qi_files[dquot->dq_type];
419         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
420         uint blk, i;
421         struct lustre_disk_dqdbheader *dh;
422         void *ddquot;
423         int dqblk_sz = lustre_disk_dqblk_sz[version];
424         int dqstrinblk = lustre_dqstrinblk[version];
425         char fakedquot[dqblk_sz];
426         dqbuf_t buf;
427
428         *err = 0;
429         if (!(buf = getdqbuf())) {
430                 *err = -ENOMEM;
431                 return 0;
432         }
433         dh = (struct lustre_disk_dqdbheader *)buf;
434         ddquot = GETENTRIES(buf, version);
435         if (info->dqi_free_entry) {
436                 blk = info->dqi_free_entry;
437                 if ((*err = read_blk(filp, blk, buf)) < 0)
438                         goto out_buf;
439         } else {
440                 blk = get_free_dqblk(filp, info);
441                 if ((int)blk < 0) {
442                         *err = blk;
443                         freedqbuf(buf);
444                         return 0;
445                 }
446                 memset(buf, 0, LUSTRE_DQBLKSIZE);
447                 info->dqi_free_entry = blk; /* This is enough as block is 
448                                                already zeroed and entry list
449                                                is empty... */
450                 lustre_mark_info_dirty(info);
451         }
452
453         /* Will block be full */
454         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
455                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
456                         CERROR("VFS: Can't remove block %u"
457                                " from entry free list.\n", blk);
458                         goto out_buf;
459                 }
460         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
461         memset(fakedquot, 0, dqblk_sz);
462         /* Find free structure in block */
463         for (i = 0; i < dqstrinblk &&
464              memcmp(fakedquot, (char*)ddquot + i * dqblk_sz, 
465                     sizeof(fakedquot)); i++);
466
467         if (i == dqstrinblk) {
468                 CERROR("VFS: Data block full but it shouldn't.\n");
469                 *err = -EIO;
470                 goto out_buf;
471         }
472
473         if ((*err = write_blk(filp, blk, buf)) < 0) {
474                 CERROR("VFS: Can't write quota data block %u.\n", blk);
475                 goto out_buf;
476         }
477         dquot->dq_off =
478             (blk << LUSTRE_DQBLKSIZE_BITS) +
479             sizeof(struct lustre_disk_dqdbheader) +
480             i * dqblk_sz;
481         freedqbuf(buf);
482         return blk;
483 out_buf:
484         freedqbuf(buf);
485         return 0;
486 }
487
488 /* Insert reference to structure into the trie */
489 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
490                           lustre_quota_version_t version)
491 {
492         struct lustre_quota_info *lqi = dquot->dq_info;
493         struct file *filp = lqi->qi_files[dquot->dq_type];
494         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
495         dqbuf_t buf;
496         int ret = 0, newson = 0, newact = 0;
497         u32 *ref;
498         uint newblk;
499
500         if (!(buf = getdqbuf()))
501                 return -ENOMEM;
502         if (!*treeblk) {
503                 ret = get_free_dqblk(filp, info);
504                 if (ret < 0)
505                         goto out_buf;
506                 *treeblk = ret;
507                 memset(buf, 0, LUSTRE_DQBLKSIZE);
508                 newact = 1;
509         } else {
510                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
511                         CERROR("VFS: Can't read tree quota block %u.\n",
512                                *treeblk);
513                         goto out_buf;
514                 }
515         }
516         ref = (u32 *) buf;
517         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
518         if (!newblk)
519                 newson = 1;
520         if (depth == LUSTRE_DQTREEDEPTH - 1) {
521
522                 if (newblk) {
523                         CERROR("VFS: Inserting already present quota entry "
524                                "(block %u).\n", 
525                                ref[GETIDINDEX(dquot->dq_id, depth)]);
526                         ret = -EIO;
527                         goto out_buf;
528                 }
529
530                 newblk = find_free_dqentry(dquot, &ret, version);
531         } else
532                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
533         if (newson && ret >= 0) {
534                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
535                 ret = write_blk(filp, *treeblk, buf);
536         } else if (newact && ret < 0)
537                 put_free_dqblk(filp, info, buf, *treeblk);
538 out_buf:
539         freedqbuf(buf);
540         return ret;
541 }
542
543 /* Wrapper for inserting quota structure into tree */
544 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
545                                  lustre_quota_version_t version)
546 {
547         int tmp = LUSTRE_DQTREEOFF;
548         return do_insert_tree(dquot, &tmp, 0, version);
549 }
550
551 /*
552  *  We don't have to be afraid of deadlocks as we never have quotas on quota files...
553  */
554 static int lustre_write_dquot(struct lustre_dquot *dquot, 
555                               lustre_quota_version_t version)
556 {
557         int type = dquot->dq_type;
558         struct file *filp;
559         mm_segment_t fs;
560         loff_t offset;
561         ssize_t ret;
562         int dqblk_sz = lustre_disk_dqblk_sz[version];
563         char ddquot[dqblk_sz], empty[dqblk_sz];
564
565         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
566         if (ret < 0)
567                 return ret;
568
569         if (!dquot->dq_off)
570                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
571                         CERROR("VFS: Error %Zd occurred while creating quota.\n",
572                                ret);
573                         return ret;
574                 }
575         filp = dquot->dq_info->qi_files[type];
576         offset = dquot->dq_off;
577         /* Argh... We may need to write structure full of zeroes but that would be
578          * treated as an empty place by the rest of the code. Format change would
579          * be definitely cleaner but the problems probably are not worth it */
580         memset(empty, 0, dqblk_sz);
581         if (!memcmp(empty, ddquot, dqblk_sz))
582                 DQF_PUT(ddquot, version, dqb_itime, 1);
583         fs = get_fs();
584         set_fs(KERNEL_DS);
585         ret = filp->f_op->write(filp, ddquot,
586                                 dqblk_sz, &offset);
587         set_fs(fs);
588         if (ret != dqblk_sz) {
589                 CWARN("VFS: dquota write failed on dev %s\n",
590                       filp->f_dentry->d_sb->s_id);
591                 if (ret >= 0)
592                         ret = -ENOSPC;
593         } else
594                 ret = 0;
595
596         return ret;
597 }
598
599 /* Free dquot entry in data block */
600 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
601                         lustre_quota_version_t version)
602 {
603         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
604         struct lustre_mem_dqinfo *info =
605             &dquot->dq_info->qi_info[dquot->dq_type];
606         struct lustre_disk_dqdbheader *dh;
607         dqbuf_t buf = getdqbuf();
608         int dqstrinblk = lustre_dqstrinblk[version];
609         int ret = 0;
610
611         if (!buf)
612                 return -ENOMEM;
613         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
614                 CERROR("VFS: Quota structure has offset to other block (%u) "
615                        "than it should (%u).\n", blk, 
616                        (uint)(dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
617                 goto out_buf;
618         }
619         if ((ret = read_blk(filp, blk, buf)) < 0) {
620                 CERROR("VFS: Can't read quota data block %u\n", blk);
621                 goto out_buf;
622         }
623         dh = (struct lustre_disk_dqdbheader *)buf;
624         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
625         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
626                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
627                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
628                         CERROR("VFS: Can't move quota data block (%u) "
629                                "to free list.\n", blk);
630                         goto out_buf;
631                 }
632         } else {
633                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
634                        0, lustre_disk_dqblk_sz[version]);
635                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
636                         /* Insert will write block itself */
637                         if ((ret =
638                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
639                                 CERROR("VFS: Can't insert quota data block (%u) "
640                                        "to free entry list.\n", blk);
641                                 goto out_buf;
642                         }
643                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
644                         CERROR("VFS: Can't write quota data block %u\n", blk);
645                         goto out_buf;
646                 }
647         }
648         dquot->dq_off = 0;      /* Quota is now unattached */
649 out_buf:
650         freedqbuf(buf);
651         return ret;
652 }
653
654 /* Remove reference to dquot from tree */
655 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
656                        lustre_quota_version_t version)
657 {
658         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
659         struct lustre_mem_dqinfo *info =
660             &dquot->dq_info->qi_info[dquot->dq_type];
661         dqbuf_t buf = getdqbuf();
662         int ret = 0;
663         uint newblk;
664         u32 *ref = (u32 *) buf;
665
666         if (!buf)
667                 return -ENOMEM;
668         if ((ret = read_blk(filp, *blk, buf)) < 0) {
669                 CERROR("VFS: Can't read quota data block %u\n", *blk);
670                 goto out_buf;
671         }
672         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
673         if (depth == LUSTRE_DQTREEDEPTH - 1) {
674                 ret = free_dqentry(dquot, newblk, version);
675                 newblk = 0;
676         } else
677                 ret = remove_tree(dquot, &newblk, depth + 1, version);
678         if (ret >= 0 && !newblk) {
679                 int i;
680                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
681                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
682                         /* Block got empty? */ ;
683                 /* don't put the root block into free blk list! */
684                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
685                         put_free_dqblk(filp, info, buf, *blk);
686                         *blk = 0;
687                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
688                         CERROR("VFS: Can't write quota tree block %u.\n", *blk);
689         }
690 out_buf:
691         freedqbuf(buf);
692         return ret;
693 }
694
695 /* Delete dquot from tree */
696 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
697                                 lustre_quota_version_t version)
698 {
699         uint tmp = LUSTRE_DQTREEOFF;
700
701         if (!dquot->dq_off)     /* Even not allocated? */
702                 return 0;
703         return remove_tree(dquot, &tmp, 0, version);
704 }
705
706 /* Find entry in block */
707 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
708                                  lustre_quota_version_t version)
709 {
710         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
711         dqbuf_t buf = getdqbuf();
712         loff_t ret = 0;
713         int i;
714         char *ddquot = GETENTRIES(buf, version);
715         int dqblk_sz = lustre_disk_dqblk_sz[version];
716         int dqstrinblk = lustre_dqstrinblk[version];
717
718         if (!buf)
719                 return -ENOMEM;
720         if ((ret = read_blk(filp, blk, buf)) < 0) {
721                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
722                 goto out_buf;
723         }
724         if (dquot->dq_id)
725                 for (i = 0; i < dqstrinblk && 
726                      DQF_GET(ddquot+i*dqblk_sz, version, dqb_id) != dquot->dq_id;
727                      i++) ;
728         else {                  /* ID 0 as a bit more complicated searching... */
729                 char fakedquot[dqblk_sz];
730
731                 memset(fakedquot, 0, sizeof(fakedquot));
732                 for (i = 0; i < dqstrinblk; i++)
733                         if (!DQF_GET(ddquot + i*dqblk_sz, version, dqb_id)
734                             && memcmp(fakedquot, ddquot + i*dqblk_sz,
735                                       dqblk_sz))
736                                 break;
737         }
738         if (i == dqstrinblk) {
739                 CERROR("VFS: Quota for id %u referenced but not present.\n",
740                        dquot->dq_id);
741                 ret = -EIO;
742                 goto out_buf;
743         } else
744                 ret =
745                     (blk << LUSTRE_DQBLKSIZE_BITS) +
746                     sizeof(struct lustre_disk_dqdbheader) +
747                     i * dqblk_sz;
748 out_buf:
749         freedqbuf(buf);
750         return ret;
751 }
752
753 /* Find entry for given id in the tree */
754 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
755                                 lustre_quota_version_t version)
756 {
757         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
758         dqbuf_t buf = getdqbuf();
759         loff_t ret = 0;
760         u32 *ref = (u32 *) buf;
761
762         if (!buf)
763                 return -ENOMEM;
764         if ((ret = read_blk(filp, blk, buf)) < 0) {
765                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
766                 goto out_buf;
767         }
768         ret = 0;
769         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
770         if (!blk)               /* No reference? */
771                 goto out_buf;
772         if (depth < LUSTRE_DQTREEDEPTH - 1)
773                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
774         else
775                 ret = find_block_dqentry(dquot, blk, version);
776 out_buf:
777         freedqbuf(buf);
778         return ret;
779 }
780
781 /* Find entry for given id in the tree - wrapper function */
782 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
783                                   lustre_quota_version_t version)
784 {
785         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
786 }
787
788 int lustre_read_dquot(struct lustre_dquot *dquot)
789 {
790         int type = dquot->dq_type;
791         struct file *filp;
792         mm_segment_t fs;
793         loff_t offset;
794         int ret = 0, dqblk_sz;
795         lustre_quota_version_t version;
796
797         /* Invalidated quota? */
798         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
799                 CERROR("VFS: Quota invalidated while reading!\n");
800                 return -EIO;
801         }
802
803         version = dquot->dq_info->qi_version;
804         dqblk_sz = lustre_disk_dqblk_sz[version];
805
806         offset = find_dqentry(dquot, version);
807         if (offset <= 0) {      /* Entry not present? */
808                 if (offset < 0)
809                         CERROR("VFS: Can't read quota structure for id %u.\n",
810                                dquot->dq_id);
811                 dquot->dq_off = 0;
812                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
813                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
814                 ret = offset;
815         } else {
816                 char ddquot[dqblk_sz], empty[dqblk_sz];
817
818                 dquot->dq_off = offset;
819                 fs = get_fs();
820                 set_fs(KERNEL_DS);
821                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
822                     dqblk_sz) {
823                         if (ret >= 0)
824                                 ret = -EIO;
825                         CERROR("VFS: Error while reading quota structure "
826                                "for id %u.\n", dquot->dq_id);
827                         memset(ddquot, 0, dqblk_sz);
828                 } else {
829                         ret = 0;
830                         /* We need to escape back all-zero structure */
831                         memset(empty, 0, dqblk_sz);
832                         DQF_PUT(empty, version, dqb_itime, 1);
833                         if (!memcmp(empty, ddquot, dqblk_sz))
834                                 DQF_PUT(ddquot, version, dqb_itime, 0);
835                 }
836                 set_fs(fs);
837                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
838         }
839
840         return ret;
841 }
842
843 /* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */
844 int lustre_commit_dquot(struct lustre_dquot *dquot)
845 {
846         int rc = 0;
847         lustre_quota_version_t version = dquot->dq_info->qi_version;
848
849         /* always clear the flag so we don't loop on an IO error... */
850         clear_bit(DQ_MOD_B, &dquot->dq_flags);
851
852         /* The block/inode usage in admin quotafile isn't the real usage
853          * over all cluster, so keep the fake dquot entry on disk is
854          * meaningless, just remove it */
855         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
856                 rc = lustre_delete_dquot(dquot, version);
857         else
858                 rc = lustre_write_dquot(dquot, version);
859
860         if (rc < 0)
861                 return rc;
862
863         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
864                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
865
866         return rc;
867 }
868
869 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
870 {
871         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
872         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
873         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
874         struct lustre_disk_dqheader dqhead;
875         ssize_t size;
876         loff_t offset = 0;
877         struct file *fp = lqi->qi_files[type];
878         int rc = 0;
879
880         /* write quotafile header */
881         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
882                                        fake_magics[type] : quota_magics[type]);
883         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
884         size = fp->f_op->write(fp, (char *)&dqhead,
885                                sizeof(struct lustre_disk_dqheader), &offset);
886
887         if (size != sizeof(struct lustre_disk_dqheader)) {
888                 CERROR("error writing quotafile header (rc:%d)\n", rc);
889                 rc = size;
890         }
891
892         return rc;
893 }
894
895 /* We need to export this function to initialize quotafile, because we haven't
896  * user level check utility */
897 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
898                                    int fakemagics)
899 {
900         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
901         int rc;
902
903         rc = lustre_init_quota_header(lqi, type, fakemagics);
904         if (rc)
905                 return rc;
906
907         /* write init quota info */
908         memset(dqinfo, 0, sizeof(*dqinfo));
909         dqinfo->dqi_bgrace = MAX_DQ_TIME;
910         dqinfo->dqi_igrace = MAX_IQ_TIME;
911         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
912
913         return lustre_write_quota_info(lqi, type);
914 }
915
916 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
917 {
918         return lustre_init_quota_info_generic(lqi, type, 0);
919 }
920
921 ssize_t quota_read(struct file *file, struct inode *inode, int type,
922                    uint blk, dqbuf_t buf)
923 {
924         if (file) {
925                 return read_blk(file, blk, buf);
926         } else {
927 #ifndef KERNEL_SUPPORTS_QUOTA_READ
928                 return -ENOTSUPP;
929 #else
930                 struct super_block *sb = inode->i_sb;
931                 memset(buf, 0, LUSTRE_DQBLKSIZE);
932                 return sb->s_op->quota_read(sb, type, (char *)buf,
933                                             LUSTRE_DQBLKSIZE, 
934                                             blk << LUSTRE_DQBLKSIZE_BITS);
935 #endif
936         }
937 }
938
939 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
940                               uint blk, struct list_head *list)
941 {
942         dqbuf_t buf = getdqbuf();
943         loff_t ret = 0;
944         struct lustre_disk_dqdbheader *dqhead =
945             (struct lustre_disk_dqdbheader *)buf;
946         struct dqblk *blk_item;
947         struct dqblk *pos;
948         struct list_head *tmp;
949
950         if (!buf)
951                 return -ENOMEM;
952         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
953                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
954                 goto out_buf;
955         }
956         ret = 0;
957
958         if (!le32_to_cpu(dqhead->dqdh_entries))
959                 goto out_buf;
960
961         if (list_empty(list)) {
962                 tmp = list;
963                 goto done;
964         }
965
966         list_for_each_entry(pos, list, link) {
967                 if (blk == pos->blk)    /* we got this blk already */
968                         goto out_buf;
969                 if (blk > pos->blk)
970                         continue;
971                 break;
972         }
973         tmp = &pos->link;
974 done:
975         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
976         if (!blk_item) {
977                 ret = -ENOMEM;
978                 goto out_buf;
979         }
980         blk_item->blk = blk;
981         INIT_LIST_HEAD(&blk_item->link);
982
983         list_add_tail(&blk_item->link, tmp);
984
985 out_buf:
986         freedqbuf(buf);
987         return ret;
988 }
989
990 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
991                       uint blk, int depth, struct list_head *list)
992 {
993         dqbuf_t buf = getdqbuf();
994         loff_t ret = 0;
995         int index;
996         u32 *ref = (u32 *) buf;
997
998         if (!buf)
999                 return -ENOMEM;
1000         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1001                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1002                 goto out_buf;
1003         }
1004         ret = 0;
1005
1006         for (index = 0; index <= 0xff && !ret; index++) {
1007                 blk = le32_to_cpu(ref[index]);
1008                 if (!blk)       /* No reference */
1009                         continue;
1010
1011                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1012                         ret = walk_tree_dqentry(filp, inode, type, blk,
1013                                                 depth + 1, list);
1014                 else
1015                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1016         }
1017 out_buf:
1018         freedqbuf(buf);
1019         return ret;
1020 }
1021
1022 /* Walk through the quota file (v2 format) to get all ids with quota limit */
1023 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1024                     struct list_head *list)
1025 {
1026         struct list_head blk_list;
1027         struct dqblk *blk_item, *tmp;
1028         dqbuf_t buf = NULL;
1029         char *ddquot;
1030         int rc;
1031         lustre_quota_version_t version;
1032
1033         ENTRY;
1034
1035         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V1) == 0)
1036                 version = LUSTRE_QUOTA_V1;
1037         else if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1038                 version = LUSTRE_QUOTA_V2;
1039         else {
1040                 CERROR("unknown quota file format!\n");
1041                 RETURN(-EINVAL);
1042         }
1043
1044         if (!list_empty(list)) {
1045                 CERROR("not empty list\n");
1046                 RETURN(-EINVAL);
1047         }
1048
1049         INIT_LIST_HEAD(&blk_list);
1050         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1051         if (rc) {
1052                 CERROR("walk through quota file failed!(%d)\n", rc);
1053                 GOTO(out_free, rc);
1054         }
1055         if (list_empty(&blk_list))
1056                 RETURN(0);
1057
1058         buf = getdqbuf();
1059         if (!buf)
1060                 RETURN(-ENOMEM);
1061         ddquot = GETENTRIES(buf, version);
1062
1063         list_for_each_entry(blk_item, &blk_list, link) {
1064                 loff_t ret = 0;
1065                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1066                 char fakedquot[dqblk_sz];
1067
1068                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1069                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1070                         CERROR("VFS: Can't read quota tree block %u.\n",
1071                                blk_item->blk);
1072                         GOTO(out_free, rc = ret);
1073                 }
1074
1075                 memset(fakedquot, 0, dqblk_sz);
1076                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1077                         struct dquot_id *dqid;
1078                         /* skip empty entry */
1079                         if (!memcmp(fakedquot, ddquot + i*dqblk_sz, dqblk_sz))
1080                                 continue;
1081
1082                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
1083                         if (!dqid) 
1084                                 GOTO(out_free, rc = -ENOMEM);
1085
1086                         dqid->di_id = DQF_GET(ddquot + i * dqblk_sz, 
1087                                               version, dqb_id);
1088                         INIT_LIST_HEAD(&dqid->di_link);
1089                         list_add(&dqid->di_link, list);
1090                 }
1091         }
1092
1093 out_free:
1094         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1095                 list_del_init(&blk_item->link);
1096                 kfree(blk_item);
1097         }
1098         if (buf)
1099                 freedqbuf(buf);
1100
1101         RETURN(rc);
1102 }
1103
1104
1105 EXPORT_SYMBOL(lustre_read_quota_info);
1106 EXPORT_SYMBOL(lustre_write_quota_info);
1107 EXPORT_SYMBOL(lustre_check_quota_file);
1108 EXPORT_SYMBOL(lustre_read_dquot);
1109 EXPORT_SYMBOL(lustre_commit_dquot);
1110 EXPORT_SYMBOL(lustre_init_quota_info);
1111 EXPORT_SYMBOL(lustre_get_qids);
1112 #endif