Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 static const union
77 {
78         struct lustre_disk_dqblk_v2 r1;
79 } fakedquot[] = {
80         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
81 };
82
83 static const union
84 {
85         struct lustre_disk_dqblk_v2 r1;
86 } emptydquot[] = {
87         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
88 };
89
90 int check_quota_file(struct file *f, struct inode *inode, int type, 
91                      lustre_quota_version_t version)
92 {
93         struct lustre_disk_dqheader dqhead;
94         mm_segment_t fs;
95         ssize_t size;
96         loff_t offset = 0;
97         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
98         const uint *quota_versions = lustre_initqversions[version];
99
100         if (f) {
101                 fs = get_fs();
102                 set_fs(KERNEL_DS);
103                 size = f->f_op->read(f, (char *)&dqhead,
104                                      sizeof(struct lustre_disk_dqheader), 
105                                      &offset);
106                 set_fs(fs);
107         } else { 
108 #ifndef KERNEL_SUPPORTS_QUOTA_READ
109                 size = 0;
110 #else
111                 struct super_block *sb = inode->i_sb;
112                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
113                                             sizeof(struct lustre_disk_dqheader),
114                                             0);
115 #endif
116         }
117         if (size != sizeof(struct lustre_disk_dqheader))
118                 return -EINVAL;
119         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
120             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
121                 return -EINVAL;
122         return 0;
123 }
124
125 /**
126  * Check whether given file is really lustre admin quotafile
127  */
128 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
129 {
130         struct file *f = lqi->qi_files[type];
131         return check_quota_file(f, NULL, type, lqi->qi_version);
132 }
133
134 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
135 {
136         mm_segment_t fs;
137         struct lustre_disk_dqinfo dinfo;
138         ssize_t size;
139         loff_t offset = LUSTRE_DQINFOOFF;
140
141         fs = get_fs();
142         set_fs(KERNEL_DS);
143         size = f->f_op->read(f, (char *)&dinfo, 
144                              sizeof(struct lustre_disk_dqinfo), &offset);
145         set_fs(fs);
146         if (size != sizeof(struct lustre_disk_dqinfo)) {
147                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
148                        f->f_vfsmnt->mnt_sb->s_id);
149                 return -EINVAL;
150         }
151         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
152         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
153         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
154         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
155         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
156         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
157         return 0;
158 }
159
160 /**
161  * Read information header from quota file
162  */
163 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
164 {
165         return lustre_read_quota_file_info(lqi->qi_files[type],
166                                            &lqi->qi_info[type]);
167 }
168
169 /**
170  * Write information header to quota file
171  */
172 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
173 {
174         mm_segment_t fs;
175         struct lustre_disk_dqinfo dinfo;
176         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
177         struct file *f = lqi->qi_files[type];
178         ssize_t size;
179         loff_t offset = LUSTRE_DQINFOOFF;
180
181         info->dqi_flags &= ~DQF_INFO_DIRTY;
182         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
183         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
184         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
185         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
186         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
187         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
188         fs = get_fs();
189         set_fs(KERNEL_DS);
190         size = f->f_op->write(f, (char *)&dinfo, 
191                               sizeof(struct lustre_disk_dqinfo), &offset);
192         set_fs(fs);
193         if (size != sizeof(struct lustre_disk_dqinfo)) {
194                 CDEBUG(D_WARNING, 
195                        "Can't write info structure on device %s.\n",
196                        f->f_vfsmnt->mnt_sb->s_id);
197                 return -1;
198         }
199         return 0;
200 }
201
202 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
203                  lustre_quota_version_t version)
204 {
205         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
206
207         LASSERT(version == LUSTRE_QUOTA_V2);
208
209         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
210         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
211         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
212         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
213         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
214         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
215         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
216         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
217 }
218
219 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
220                        qid_t id, lustre_quota_version_t version)
221 {
222         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
223
224         LASSERT(version == LUSTRE_QUOTA_V2);
225
226         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
227         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
228         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
229         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
230         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
231         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
232         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
233         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
234         dqblk->dqb_id = cpu_to_le32(id);
235
236         return 0;
237 }
238
239 dqbuf_t getdqbuf(void)
240 {
241         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
242         if (!buf)
243                 CDEBUG(D_WARNING, 
244                        "VFS: Not enough memory for quota buffers.\n");
245         return buf;
246 }
247
248 void freedqbuf(dqbuf_t buf)
249 {
250         kfree(buf);
251 }
252
253 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
254 {
255         mm_segment_t fs;
256         ssize_t ret;
257         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
258
259         memset(buf, 0, LUSTRE_DQBLKSIZE);
260         fs = get_fs();
261         set_fs(KERNEL_DS);
262         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
263         set_fs(fs);
264         return ret;
265 }
266
267 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
268 {
269         mm_segment_t fs;
270         ssize_t ret;
271         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
272
273         fs = get_fs();
274         set_fs(KERNEL_DS);
275         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
276         set_fs(fs);
277         return ret;
278 }
279
280 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
281 {
282         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
283 }
284
285 /**
286  * Remove empty block from list and return it
287  */
288 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
289 {
290         dqbuf_t buf = getdqbuf();
291         struct lustre_disk_dqdbheader *dh =
292             (struct lustre_disk_dqdbheader *)buf;
293         int ret, blk;
294
295         if (!buf)
296                 return -ENOMEM;
297         if (info->dqi_free_blk) {
298                 blk = info->dqi_free_blk;
299                 if ((ret = read_blk(filp, blk, buf)) < 0)
300                         goto out_buf;
301                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
302         } else {
303                 memset(buf, 0, LUSTRE_DQBLKSIZE);
304                 /* Assure block allocation... */
305                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
306                         goto out_buf;
307                 blk = info->dqi_blocks++;
308         }
309         lustre_mark_info_dirty(info);
310         ret = blk;
311 out_buf:
312         freedqbuf(buf);
313         return ret;
314 }
315
316 /**
317  * Insert empty block to the list
318  */
319 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
320                    dqbuf_t buf, uint blk)
321 {
322         struct lustre_disk_dqdbheader *dh =
323             (struct lustre_disk_dqdbheader *)buf;
324         int err;
325
326         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
327         dh->dqdh_prev_free = cpu_to_le32(0);
328         dh->dqdh_entries = cpu_to_le16(0);
329         info->dqi_free_blk = blk;
330         lustre_mark_info_dirty(info);
331         if ((err = write_blk(filp, blk, buf)) < 0)
332                 /* Some strange block. We had better leave it... */
333                 return err;
334         return 0;
335 }
336
337 /**
338  * Remove given block from the list of blocks with free entries
339  */
340 int remove_free_dqentry(struct file *filp,
341                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
342                         uint blk)
343 {
344         dqbuf_t tmpbuf = getdqbuf();
345         struct lustre_disk_dqdbheader *dh =
346             (struct lustre_disk_dqdbheader *)buf;
347         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
348             le32_to_cpu(dh->dqdh_prev_free);
349         int err;
350
351         if (!tmpbuf)
352                 return -ENOMEM;
353         if (nextblk) {
354                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
355                         goto out_buf;
356                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
357                     dh->dqdh_prev_free;
358                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
359                         goto out_buf;
360         }
361         if (prevblk) {
362                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
363                         goto out_buf;
364                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
365                     dh->dqdh_next_free;
366                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
367                         goto out_buf;
368         } else {
369                 info->dqi_free_entry = nextblk;
370                 lustre_mark_info_dirty(info);
371         }
372         freedqbuf(tmpbuf);
373         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
374         if (write_blk(filp, blk, buf) < 0)
375                 /* No matter whether write succeeds block is out of list */
376                 CDEBUG(D_ERROR, 
377                        "VFS: Can't write block (%u) with free entries.\n", blk);
378         return 0;
379 out_buf:
380         freedqbuf(tmpbuf);
381         return err;
382 }
383
384 /**
385  * Insert given block to the beginning of list with free entries
386  */
387 int insert_free_dqentry(struct file *filp,
388                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
389                         uint blk)
390 {
391         dqbuf_t tmpbuf = getdqbuf();
392         struct lustre_disk_dqdbheader *dh =
393             (struct lustre_disk_dqdbheader *)buf;
394         int err;
395
396         if (!tmpbuf)
397                 return -ENOMEM;
398         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
399         dh->dqdh_prev_free = cpu_to_le32(0);
400         if ((err = write_blk(filp, blk, buf)) < 0)
401                 goto out_buf;
402         if (info->dqi_free_entry) {
403                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
404                         goto out_buf;
405                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
406                     cpu_to_le32(blk);
407                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
408                         goto out_buf;
409         }
410         freedqbuf(tmpbuf);
411         info->dqi_free_entry = blk;
412         lustre_mark_info_dirty(info);
413         return 0;
414 out_buf:
415         freedqbuf(tmpbuf);
416         return err;
417 }
418
419
420
421 /**
422  * Find space for dquot
423  */
424 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
425                               lustre_quota_version_t version)
426 {
427         struct lustre_quota_info *lqi = dquot->dq_info;
428         struct file *filp = lqi->qi_files[dquot->dq_type];
429         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
430         uint blk, i;
431         struct lustre_disk_dqdbheader *dh;
432         void *ddquot;
433         int dqblk_sz = lustre_disk_dqblk_sz[version];
434         int dqstrinblk = lustre_dqstrinblk[version];
435         dqbuf_t buf;
436
437         *err = 0;
438         if (!(buf = getdqbuf())) {
439                 *err = -ENOMEM;
440                 return 0;
441         }
442         dh = (struct lustre_disk_dqdbheader *)buf;
443         ddquot = GETENTRIES(buf, version);
444         if (info->dqi_free_entry) {
445                 blk = info->dqi_free_entry;
446                 if ((*err = read_blk(filp, blk, buf)) < 0)
447                         goto out_buf;
448         } else {
449                 blk = get_free_dqblk(filp, info);
450                 if ((int)blk < 0) {
451                         *err = blk;
452                         freedqbuf(buf);
453                         return 0;
454                 }
455                 memset(buf, 0, LUSTRE_DQBLKSIZE);
456                 info->dqi_free_entry = blk; /* This is enough as block is 
457                                                already zeroed and entry list
458                                                is empty... */
459                 lustre_mark_info_dirty(info);
460         }
461
462         /* Will block be full */
463         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
464                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
465                         CDEBUG(D_ERROR, 
466                                "VFS: find_free_dqentry(): Can't remove block "
467                                "(%u) from entry free list.\n", blk);
468                         goto out_buf;
469                 }
470         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
471         /* Find free structure in block */
472         for (i = 0; i < dqstrinblk &&
473              memcmp((char *)&emptydquot[version],
474                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
475              i++);
476
477         if (i == dqstrinblk) {
478                 CDEBUG(D_ERROR, 
479                        "VFS: find_free_dqentry(): Data block full but it "
480                        "shouldn't.\n");
481                 *err = -EIO;
482                 goto out_buf;
483         }
484
485         if ((*err = write_blk(filp, blk, buf)) < 0) {
486                 CDEBUG(D_ERROR,
487                        "VFS: find_free_dqentry(): Can't write quota data "
488                        "block %u.\n", blk);
489                 goto out_buf;
490         }
491         dquot->dq_off =
492             (blk << LUSTRE_DQBLKSIZE_BITS) +
493             sizeof(struct lustre_disk_dqdbheader) +
494             i * dqblk_sz;
495         freedqbuf(buf);
496         return blk;
497 out_buf:
498         freedqbuf(buf);
499         return 0;
500 }
501
502 /**
503  * Insert reference to structure into the trie
504  */
505 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
506                           lustre_quota_version_t version)
507 {
508         struct lustre_quota_info *lqi = dquot->dq_info;
509         struct file *filp = lqi->qi_files[dquot->dq_type];
510         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
511         dqbuf_t buf;
512         int ret = 0, newson = 0, newact = 0;
513         u32 *ref;
514         uint newblk;
515
516         if (!(buf = getdqbuf()))
517                 return -ENOMEM;
518         if (!*treeblk) {
519                 ret = get_free_dqblk(filp, info);
520                 if (ret < 0)
521                         goto out_buf;
522                 *treeblk = ret;
523                 memset(buf, 0, LUSTRE_DQBLKSIZE);
524                 newact = 1;
525         } else {
526                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
527                         CDEBUG(D_ERROR,
528                                "VFS: Can't read tree quota block %u.\n",
529                                *treeblk);
530                         goto out_buf;
531                 }
532         }
533         ref = (u32 *) buf;
534         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
535         if (!newblk)
536                 newson = 1;
537         if (depth == LUSTRE_DQTREEDEPTH - 1) {
538
539                 if (newblk) {
540                         CDEBUG(D_ERROR, 
541                                "VFS: Inserting already present quota entry "
542                                "(block %u).\n",
543                                ref[GETIDINDEX(dquot->dq_id, depth)]);
544                         ret = -EIO;
545                         goto out_buf;
546                 }
547
548                 newblk = find_free_dqentry(dquot, &ret, version);
549         } else
550                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
551         if (newson && ret >= 0) {
552                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
553                 ret = write_blk(filp, *treeblk, buf);
554         } else if (newact && ret < 0)
555                 put_free_dqblk(filp, info, buf, *treeblk);
556 out_buf:
557         freedqbuf(buf);
558         return ret;
559 }
560
561 /**
562  * Wrapper for inserting quota structure into tree
563  */
564 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
565                                  lustre_quota_version_t version)
566 {
567         int tmp = LUSTRE_DQTREEOFF;
568         return do_insert_tree(dquot, &tmp, 0, version);
569 }
570
571 /**
572  * We don't have to be afraid of deadlocks as we never have quotas on
573  * quota files...
574  */
575 static int lustre_write_dquot(struct lustre_dquot *dquot, 
576                               lustre_quota_version_t version)
577 {
578         int type = dquot->dq_type;
579         struct file *filp;
580         mm_segment_t fs;
581         loff_t offset;
582         ssize_t ret;
583         int dqblk_sz = lustre_disk_dqblk_sz[version];
584         char ddquot[dqblk_sz];
585
586         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
587         if (ret < 0)
588                 return ret;
589
590         if (!dquot->dq_off)
591                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
592                         CDEBUG(D_ERROR,
593                                "VFS: Error %Zd occurred while creating "
594                                "quota.\n", ret);
595                         return ret;
596                 }
597         filp = dquot->dq_info->qi_files[type];
598         offset = dquot->dq_off;
599         /* Argh... We may need to write structure full of zeroes but that would
600          * be treated as an empty place by the rest of the code. Format change
601          * would be definitely cleaner but the problems probably are not worth
602          * it */
603         if (!memcmp((char *)&emptydquot[version], ddquot, dqblk_sz))
604                 ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime =
605                                                                 cpu_to_le64(1);
606         fs = get_fs();
607         set_fs(KERNEL_DS);
608         ret = filp->f_op->write(filp, ddquot,
609                                 dqblk_sz, &offset);
610         set_fs(fs);
611         if (ret != dqblk_sz) {
612                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
613                        filp->f_dentry->d_sb->s_id);
614                 if (ret >= 0)
615                         ret = -ENOSPC;
616         } else
617                 ret = 0;
618
619         return ret;
620 }
621
622 /**
623  * Free dquot entry in data block
624  */
625 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
626                         lustre_quota_version_t version)
627 {
628         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
629         struct lustre_mem_dqinfo *info =
630             &dquot->dq_info->qi_info[dquot->dq_type];
631         struct lustre_disk_dqdbheader *dh;
632         dqbuf_t buf = getdqbuf();
633         int dqstrinblk = lustre_dqstrinblk[version];
634         int ret = 0;
635
636         if (!buf)
637                 return -ENOMEM;
638         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
639                 CDEBUG(D_ERROR,
640                        "VFS: Quota structure has offset to other block (%u) "
641                        "than it should (%u).\n",
642                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
643                 goto out_buf;
644         }
645         if ((ret = read_blk(filp, blk, buf)) < 0) {
646                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
647                 goto out_buf;
648         }
649         dh = (struct lustre_disk_dqdbheader *)buf;
650         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
651         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
652                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
653                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
654                         CDEBUG(D_ERROR,
655                                "VFS: Can't move quota data block (%u) to free "
656                                "list.\n", blk);
657                         goto out_buf;
658                 }
659         } else {
660                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
661                        0, lustre_disk_dqblk_sz[version]);
662                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
663                         /* Insert will write block itself */
664                         if ((ret =
665                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
666                                 CDEBUG(D_ERROR,
667                                        "VFS: Can't insert quota data block "
668                                        "(%u) to free entry list.\n", blk);
669                                 goto out_buf;
670                         }
671                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
672                         CDEBUG(D_ERROR,
673                                "VFS: Can't write quota data block %u\n", blk);
674                         goto out_buf;
675                 }
676         }
677         dquot->dq_off = 0;      /* Quota is now unattached */
678 out_buf:
679         freedqbuf(buf);
680         return ret;
681 }
682
683 /**
684  * Remove reference to dquot from tree
685  */
686 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
687                        lustre_quota_version_t version)
688 {
689         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
690         struct lustre_mem_dqinfo *info =
691             &dquot->dq_info->qi_info[dquot->dq_type];
692         dqbuf_t buf = getdqbuf();
693         int ret = 0;
694         uint newblk;
695         u32 *ref = (u32 *) buf;
696
697         if (!buf)
698                 return -ENOMEM;
699         if ((ret = read_blk(filp, *blk, buf)) < 0) {
700                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
701                 goto out_buf;
702         }
703         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
704         if (depth == LUSTRE_DQTREEDEPTH - 1) {
705                 ret = free_dqentry(dquot, newblk, version);
706                 newblk = 0;
707         } else
708                 ret = remove_tree(dquot, &newblk, depth + 1, version);
709         if (ret >= 0 && !newblk) {
710                 int i;
711                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
712                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
713                         /* Block got empty? */ ;
714                 /* don't put the root block into free blk list! */
715                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
716                         put_free_dqblk(filp, info, buf, *blk);
717                         *blk = 0;
718                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
719                         CDEBUG(D_ERROR,
720                                "VFS: Can't write quota tree block %u.\n", *blk);
721         }
722 out_buf:
723         freedqbuf(buf);
724         return ret;
725 }
726
727 /**
728  * Delete dquot from tree
729  */
730 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
731                                 lustre_quota_version_t version)
732 {
733         uint tmp = LUSTRE_DQTREEOFF;
734
735         if (!dquot->dq_off)     /* Even not allocated? */
736                 return 0;
737         return remove_tree(dquot, &tmp, 0, version);
738 }
739
740 /**
741  * Find entry in block
742  */
743 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
744                                  lustre_quota_version_t version)
745 {
746         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
747         dqbuf_t buf = getdqbuf();
748         loff_t ret = 0;
749         int i;
750         struct lustre_disk_dqblk_v2 *ddquot =
751                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
752         int dqblk_sz = lustre_disk_dqblk_sz[version];
753         int dqstrinblk = lustre_dqstrinblk[version];
754
755         LASSERT(version == LUSTRE_QUOTA_V2);
756
757         if (!buf)
758                 return -ENOMEM;
759         if ((ret = read_blk(filp, blk, buf)) < 0) {
760                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
761                 goto out_buf;
762         }
763         if (dquot->dq_id)
764                 for (i = 0; i < dqstrinblk && 
765                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
766                      i++) ;
767         else {                  /* ID 0 as a bit more complicated searching... */
768                 for (i = 0; i < dqstrinblk; i++)
769                         if (!le32_to_cpu(ddquot[i].dqb_id)
770                             && memcmp((char *)&emptydquot[version],
771                                       (char *)&ddquot[i], dqblk_sz))
772                                 break;
773         }
774         if (i == dqstrinblk) {
775                 CDEBUG(D_ERROR,
776                        "VFS: Quota for id %u referenced but not present.\n",
777                        dquot->dq_id);
778                 ret = -EIO;
779                 goto out_buf;
780         } else
781                 ret =
782                     (blk << LUSTRE_DQBLKSIZE_BITS) +
783                     sizeof(struct lustre_disk_dqdbheader) +
784                     i * dqblk_sz;
785 out_buf:
786         freedqbuf(buf);
787         return ret;
788 }
789
790 /**
791  * Find entry for given id in the tree
792  */
793 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
794                                 lustre_quota_version_t version)
795 {
796         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
797         dqbuf_t buf = getdqbuf();
798         loff_t ret = 0;
799         u32 *ref = (u32 *) buf;
800
801         if (!buf)
802                 return -ENOMEM;
803         if ((ret = read_blk(filp, blk, buf)) < 0) {
804                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
805                 goto out_buf;
806         }
807         ret = 0;
808         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
809         if (!blk)               /* No reference? */
810                 goto out_buf;
811         if (depth < LUSTRE_DQTREEDEPTH - 1)
812                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
813         else
814                 ret = find_block_dqentry(dquot, blk, version);
815 out_buf:
816         freedqbuf(buf);
817         return ret;
818 }
819
820 /**
821  * Find entry for given id in the tree - wrapper function
822  */
823 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
824                                   lustre_quota_version_t version)
825 {
826         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
827 }
828
829 int lustre_read_dquot(struct lustre_dquot *dquot)
830 {
831         int type = dquot->dq_type;
832         struct file *filp;
833         mm_segment_t fs;
834         loff_t offset;
835         int ret = 0, dqblk_sz;
836         lustre_quota_version_t version;
837
838         /* Invalidated quota? */
839         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
840                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
841                 return -EIO;
842         }
843
844         version = dquot->dq_info->qi_version;
845         LASSERT(version == LUSTRE_QUOTA_V2);
846         dqblk_sz = lustre_disk_dqblk_sz[version];
847
848         offset = find_dqentry(dquot, version);
849         if (offset <= 0) {      /* Entry not present? */
850                 if (offset < 0)
851                         CDEBUG(D_ERROR,
852                                "VFS: Can't read quota structure for id %u.\n",
853                                dquot->dq_id);
854                 dquot->dq_off = 0;
855                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
856                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
857                 ret = offset;
858         } else {
859                 char ddquot[dqblk_sz];
860
861                 dquot->dq_off = offset;
862                 fs = get_fs();
863                 set_fs(KERNEL_DS);
864                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
865                     dqblk_sz) {
866                         if (ret >= 0)
867                                 ret = -EIO;
868                         CDEBUG(D_ERROR,
869                                "VFS: Error while reading quota structure for id "
870                                "%u.\n", dquot->dq_id);
871                         memset(ddquot, 0, dqblk_sz);
872                 } else {
873                         ret = 0;
874                         /* We need to escape back all-zero structure */
875                         if (!memcmp((char *)&fakedquot[version],
876                                     ddquot, dqblk_sz))
877                            ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime =
878                                                                 cpu_to_le64(0);
879                 }
880                 set_fs(fs);
881                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
882         }
883
884         return ret;
885 }
886
887 /**
888  * Commit changes of dquot to disk - it might also mean deleting
889  * it when quota became fake.
890  */
891 int lustre_commit_dquot(struct lustre_dquot *dquot)
892 {
893         int rc = 0;
894         lustre_quota_version_t version = dquot->dq_info->qi_version;
895
896         /* always clear the flag so we don't loop on an IO error... */
897         clear_bit(DQ_MOD_B, &dquot->dq_flags);
898
899         /* The block/inode usage in admin quotafile isn't the real usage
900          * over all cluster, so keep the fake dquot entry on disk is
901          * meaningless, just remove it */
902         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
903                 rc = lustre_delete_dquot(dquot, version);
904         else
905                 rc = lustre_write_dquot(dquot, version);
906
907         if (rc < 0)
908                 return rc;
909
910         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
911                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
912
913         return rc;
914 }
915
916 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
917                              int fakemagics)
918 {
919         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
920         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
921         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
922         struct lustre_disk_dqheader dqhead;
923         ssize_t size;
924         loff_t offset = 0;
925         struct file *fp = lqi->qi_files[type];
926         int rc = 0;
927
928         /* write quotafile header */
929         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
930                                        fake_magics[type] : quota_magics[type]);
931         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
932         size = fp->f_op->write(fp, (char *)&dqhead,
933                                sizeof(struct lustre_disk_dqheader), &offset);
934
935         if (size != sizeof(struct lustre_disk_dqheader)) {
936                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
937                 rc = size;
938         }
939
940         return rc;
941 }
942
943 /**
944  * We need to export this function to initialize quotafile, because we haven't
945  * user level check utility
946  */
947 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
948                                    int fakemagics)
949 {
950         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
951         int rc;
952
953         rc = lustre_init_quota_header(lqi, type, fakemagics);
954         if (rc)
955                 return rc;
956
957         /* write init quota info */
958         memset(dqinfo, 0, sizeof(*dqinfo));
959         dqinfo->dqi_bgrace = MAX_DQ_TIME;
960         dqinfo->dqi_igrace = MAX_IQ_TIME;
961         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
962
963         return lustre_write_quota_info(lqi, type);
964 }
965
966 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
967 {
968         return lustre_init_quota_info_generic(lqi, type, 0);
969 }
970
971 ssize_t quota_read(struct file *file, struct inode *inode, int type,
972                    uint blk, dqbuf_t buf)
973 {
974         if (file) {
975                 return read_blk(file, blk, buf);
976         } else {
977 #ifndef KERNEL_SUPPORTS_QUOTA_READ
978                 return -ENOTSUPP;
979 #else
980                 struct super_block *sb = inode->i_sb;
981                 memset(buf, 0, LUSTRE_DQBLKSIZE);
982                 return sb->s_op->quota_read(sb, type, (char *)buf,
983                                             LUSTRE_DQBLKSIZE, 
984                                             blk << LUSTRE_DQBLKSIZE_BITS);
985 #endif
986         }
987 }
988
989 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
990                               uint blk, struct list_head *list)
991 {
992         dqbuf_t buf = getdqbuf();
993         loff_t ret = 0;
994         struct lustre_disk_dqdbheader *dqhead =
995             (struct lustre_disk_dqdbheader *)buf;
996         struct dqblk *blk_item;
997         struct dqblk *pos;
998         struct list_head *tmp;
999
1000         if (!buf)
1001                 return -ENOMEM;
1002         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1003                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1004                 goto out_buf;
1005         }
1006         ret = 0;
1007
1008         if (!le32_to_cpu(dqhead->dqdh_entries))
1009                 goto out_buf;
1010
1011         if (list_empty(list)) {
1012                 tmp = list;
1013                 goto done;
1014         }
1015
1016         list_for_each_entry(pos, list, link) {
1017                 if (blk == pos->blk)    /* we got this blk already */
1018                         goto out_buf;
1019                 if (blk > pos->blk)
1020                         continue;
1021                 break;
1022         }
1023         tmp = &pos->link;
1024 done:
1025         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1026         if (!blk_item) {
1027                 ret = -ENOMEM;
1028                 goto out_buf;
1029         }
1030         blk_item->blk = blk;
1031         INIT_LIST_HEAD(&blk_item->link);
1032
1033         list_add_tail(&blk_item->link, tmp);
1034
1035 out_buf:
1036         freedqbuf(buf);
1037         return ret;
1038 }
1039
1040 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1041                       uint blk, int depth, struct list_head *list)
1042 {
1043         dqbuf_t buf = getdqbuf();
1044         loff_t ret = 0;
1045         int index;
1046         u32 *ref = (u32 *) buf;
1047
1048         if (!buf)
1049                 return -ENOMEM;
1050         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1051                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1052                 goto out_buf;
1053         }
1054         ret = 0;
1055
1056         for (index = 0; index <= 0xff && !ret; index++) {
1057                 blk = le32_to_cpu(ref[index]);
1058                 if (!blk)       /* No reference */
1059                         continue;
1060
1061                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1062                         ret = walk_tree_dqentry(filp, inode, type, blk,
1063                                                 depth + 1, list);
1064                 else
1065                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1066         }
1067 out_buf:
1068         freedqbuf(buf);
1069         return ret;
1070 }
1071
1072 /**
1073  * Walk through the quota file (v2 format) to get all ids with quota limit
1074  */
1075 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1076                     struct list_head *list)
1077 {
1078         struct list_head blk_list;
1079         struct dqblk *blk_item, *tmp;
1080         dqbuf_t buf = NULL;
1081         struct lustre_disk_dqblk_v2 *ddquot;
1082         int rc;
1083         lustre_quota_version_t version;
1084
1085         ENTRY;
1086
1087         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1088                 version = LUSTRE_QUOTA_V2;
1089         else {
1090                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1091                 RETURN(-EINVAL);
1092         }
1093
1094         if (!list_empty(list)) {
1095                 CDEBUG(D_ERROR, "not empty list\n");
1096                 RETURN(-EINVAL);
1097         }
1098
1099         INIT_LIST_HEAD(&blk_list);
1100         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1101         if (rc) {
1102                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1103                 GOTO(out_free, rc);
1104         }
1105         if (list_empty(&blk_list))
1106                 RETURN(0);
1107
1108         buf = getdqbuf();
1109         if (!buf)
1110                 RETURN(-ENOMEM);
1111         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1112
1113         list_for_each_entry(blk_item, &blk_list, link) {
1114                 loff_t ret = 0;
1115                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1116
1117                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1118                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1119                         CDEBUG(D_ERROR,
1120                                "VFS: Can't read quota tree block %u.\n",
1121                                blk_item->blk);
1122                         GOTO(out_free, rc = ret);
1123                 }
1124
1125                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1126                         struct dquot_id *dqid;
1127                         /* skip empty entry */
1128                         if (!memcmp((char *)&emptydquot[version],
1129                                     (char *)&ddquot[i], dqblk_sz))
1130                                 continue;
1131
1132                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
1133                         if (!dqid) 
1134                                 GOTO(out_free, rc = -ENOMEM);
1135
1136                         dqid->di_id = le32_to_cpu(ddquot[i].dqb_id);
1137                         INIT_LIST_HEAD(&dqid->di_link);
1138                         list_add(&dqid->di_link, list);
1139                 }
1140         }
1141
1142 out_free:
1143         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1144                 list_del_init(&blk_item->link);
1145                 kfree(blk_item);
1146         }
1147         if (buf)
1148                 freedqbuf(buf);
1149
1150         RETURN(rc);
1151 }
1152
1153
1154 EXPORT_SYMBOL(lustre_read_quota_info);
1155 EXPORT_SYMBOL(lustre_write_quota_info);
1156 EXPORT_SYMBOL(lustre_check_quota_file);
1157 EXPORT_SYMBOL(lustre_read_dquot);
1158 EXPORT_SYMBOL(lustre_commit_dquot);
1159 EXPORT_SYMBOL(lustre_init_quota_info);
1160 EXPORT_SYMBOL(lustre_get_qids);
1161 #endif