Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 static const union
77 {
78         struct lustre_disk_dqblk_v2 r1;
79 } fakedquot[] = {
80         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
81 };
82
83 static const union
84 {
85         struct lustre_disk_dqblk_v2 r1;
86 } emptydquot[] = {
87         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
88 };
89
90 int check_quota_file(struct file *f, struct inode *inode, int type, 
91                      lustre_quota_version_t version)
92 {
93         struct lustre_disk_dqheader dqhead;
94         mm_segment_t fs;
95         ssize_t size;
96         loff_t offset = 0;
97         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
98         const uint *quota_versions = lustre_initqversions[version];
99
100         if (f) {
101                 fs = get_fs();
102                 set_fs(KERNEL_DS);
103                 size = f->f_op->read(f, (char *)&dqhead,
104                                      sizeof(struct lustre_disk_dqheader), 
105                                      &offset);
106                 set_fs(fs);
107         } else { 
108 #ifndef KERNEL_SUPPORTS_QUOTA_READ
109                 size = 0;
110 #else
111                 struct super_block *sb = inode->i_sb;
112                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
113                                             sizeof(struct lustre_disk_dqheader),
114                                             0);
115 #endif
116         }
117         if (size != sizeof(struct lustre_disk_dqheader))
118                 return -EINVAL;
119         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
120             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
121                 return -EINVAL;
122         return 0;
123 }
124
125 /**
126  * Check whether given file is really lustre admin quotafile
127  */
128 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
129 {
130         struct file *f = lqi->qi_files[type];
131         return check_quota_file(f, NULL, type, lqi->qi_version);
132 }
133
134 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
135 {
136         mm_segment_t fs;
137         struct lustre_disk_dqinfo dinfo;
138         ssize_t size;
139         loff_t offset = LUSTRE_DQINFOOFF;
140
141         fs = get_fs();
142         set_fs(KERNEL_DS);
143         size = f->f_op->read(f, (char *)&dinfo, 
144                              sizeof(struct lustre_disk_dqinfo), &offset);
145         set_fs(fs);
146         if (size != sizeof(struct lustre_disk_dqinfo)) {
147                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
148                        f->f_vfsmnt->mnt_sb->s_id);
149                 return -EINVAL;
150         }
151         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
152         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
153         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
154         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
155         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
156         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
157         return 0;
158 }
159
160 /**
161  * Read information header from quota file
162  */
163 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
164 {
165         return lustre_read_quota_file_info(lqi->qi_files[type],
166                                            &lqi->qi_info[type]);
167 }
168
169 /**
170  * Write information header to quota file
171  */
172 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
173 {
174         mm_segment_t fs;
175         struct lustre_disk_dqinfo dinfo;
176         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
177         struct file *f = lqi->qi_files[type];
178         ssize_t size;
179         loff_t offset = LUSTRE_DQINFOOFF;
180
181         info->dqi_flags &= ~DQF_INFO_DIRTY;
182         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
183         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
184         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
185         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
186         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
187         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
188         fs = get_fs();
189         set_fs(KERNEL_DS);
190         size = f->f_op->write(f, (char *)&dinfo, 
191                               sizeof(struct lustre_disk_dqinfo), &offset);
192         set_fs(fs);
193         if (size != sizeof(struct lustre_disk_dqinfo)) {
194                 CDEBUG(D_WARNING, 
195                        "Can't write info structure on device %s.\n",
196                        f->f_vfsmnt->mnt_sb->s_id);
197                 return -1;
198         }
199         return 0;
200 }
201
202 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
203                  lustre_quota_version_t version)
204 {
205         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
206
207         LASSERT(version == LUSTRE_QUOTA_V2);
208
209         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
210         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
211         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
212         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
213         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
214         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
215         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
216         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
217 }
218
219 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
220                        qid_t id, lustre_quota_version_t version)
221 {
222         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
223
224         LASSERT(version == LUSTRE_QUOTA_V2);
225
226         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
227         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
228         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
229         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
230         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
231         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
232         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
233         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
234         dqblk->dqb_id = cpu_to_le32(id);
235
236         return 0;
237 }
238
239 dqbuf_t getdqbuf(void)
240 {
241         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
242         if (!buf)
243                 CDEBUG(D_WARNING, 
244                        "VFS: Not enough memory for quota buffers.\n");
245         return buf;
246 }
247
248 void freedqbuf(dqbuf_t buf)
249 {
250         kfree(buf);
251 }
252
253 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
254 {
255         mm_segment_t fs;
256         ssize_t ret;
257         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
258
259         memset(buf, 0, LUSTRE_DQBLKSIZE);
260         fs = get_fs();
261         set_fs(KERNEL_DS);
262         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
263         set_fs(fs);
264         return ret;
265 }
266
267 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
268 {
269         mm_segment_t fs;
270         ssize_t ret;
271         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
272
273         fs = get_fs();
274         set_fs(KERNEL_DS);
275         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
276         set_fs(fs);
277         return ret;
278 }
279
280 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
281 {
282         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
283 }
284
285 /**
286  * Remove empty block from list and return it
287  */
288 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
289 {
290         dqbuf_t buf = getdqbuf();
291         struct lustre_disk_dqdbheader *dh =
292             (struct lustre_disk_dqdbheader *)buf;
293         int ret, blk;
294
295         if (!buf)
296                 return -ENOMEM;
297         if (info->dqi_free_blk) {
298                 blk = info->dqi_free_blk;
299                 if ((ret = read_blk(filp, blk, buf)) < 0)
300                         goto out_buf;
301                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
302         } else {
303                 memset(buf, 0, LUSTRE_DQBLKSIZE);
304                 /* Assure block allocation... */
305                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
306                         goto out_buf;
307                 blk = info->dqi_blocks++;
308         }
309         lustre_mark_info_dirty(info);
310         ret = blk;
311 out_buf:
312         freedqbuf(buf);
313         return ret;
314 }
315
316 /**
317  * Insert empty block to the list
318  */
319 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
320                    dqbuf_t buf, uint blk)
321 {
322         struct lustre_disk_dqdbheader *dh =
323             (struct lustre_disk_dqdbheader *)buf;
324         int err;
325
326         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
327         dh->dqdh_prev_free = cpu_to_le32(0);
328         dh->dqdh_entries = cpu_to_le16(0);
329         info->dqi_free_blk = blk;
330         lustre_mark_info_dirty(info);
331         if ((err = write_blk(filp, blk, buf)) < 0)
332                 /* Some strange block. We had better leave it... */
333                 return err;
334         return 0;
335 }
336
337 /**
338  * Remove given block from the list of blocks with free entries
339  */
340 int remove_free_dqentry(struct file *filp,
341                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
342                         uint blk)
343 {
344         dqbuf_t tmpbuf = getdqbuf();
345         struct lustre_disk_dqdbheader *dh =
346             (struct lustre_disk_dqdbheader *)buf;
347         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
348             le32_to_cpu(dh->dqdh_prev_free);
349         int err;
350
351         if (!tmpbuf)
352                 return -ENOMEM;
353         if (nextblk) {
354                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
355                         goto out_buf;
356                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
357                     dh->dqdh_prev_free;
358                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
359                         goto out_buf;
360         }
361         if (prevblk) {
362                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
363                         goto out_buf;
364                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
365                     dh->dqdh_next_free;
366                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
367                         goto out_buf;
368         } else {
369                 info->dqi_free_entry = nextblk;
370                 lustre_mark_info_dirty(info);
371         }
372         freedqbuf(tmpbuf);
373         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
374         if (write_blk(filp, blk, buf) < 0)
375                 /* No matter whether write succeeds block is out of list */
376                 CDEBUG(D_ERROR, 
377                        "VFS: Can't write block (%u) with free entries.\n", blk);
378         return 0;
379 out_buf:
380         freedqbuf(tmpbuf);
381         return err;
382 }
383
384 /**
385  * Insert given block to the beginning of list with free entries
386  */
387 int insert_free_dqentry(struct file *filp,
388                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
389                         uint blk)
390 {
391         dqbuf_t tmpbuf = getdqbuf();
392         struct lustre_disk_dqdbheader *dh =
393             (struct lustre_disk_dqdbheader *)buf;
394         int err;
395
396         if (!tmpbuf)
397                 return -ENOMEM;
398         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
399         dh->dqdh_prev_free = cpu_to_le32(0);
400         if ((err = write_blk(filp, blk, buf)) < 0)
401                 goto out_buf;
402         if (info->dqi_free_entry) {
403                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
404                         goto out_buf;
405                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
406                     cpu_to_le32(blk);
407                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
408                         goto out_buf;
409         }
410         freedqbuf(tmpbuf);
411         info->dqi_free_entry = blk;
412         lustre_mark_info_dirty(info);
413         return 0;
414 out_buf:
415         freedqbuf(tmpbuf);
416         return err;
417 }
418
419
420
421 /**
422  * Find space for dquot
423  */
424 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
425                               lustre_quota_version_t version)
426 {
427         struct lustre_quota_info *lqi = dquot->dq_info;
428         struct file *filp = lqi->qi_files[dquot->dq_type];
429         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
430         uint blk, i;
431         struct lustre_disk_dqdbheader *dh;
432         void *ddquot;
433         int dqblk_sz = lustre_disk_dqblk_sz[version];
434         int dqstrinblk = lustre_dqstrinblk[version];
435         dqbuf_t buf;
436
437         *err = 0;
438         if (!(buf = getdqbuf())) {
439                 *err = -ENOMEM;
440                 return 0;
441         }
442         dh = (struct lustre_disk_dqdbheader *)buf;
443         ddquot = GETENTRIES(buf, version);
444         if (info->dqi_free_entry) {
445                 blk = info->dqi_free_entry;
446                 if ((*err = read_blk(filp, blk, buf)) < 0)
447                         goto out_buf;
448         } else {
449                 blk = get_free_dqblk(filp, info);
450                 if ((int)blk < 0) {
451                         *err = blk;
452                         freedqbuf(buf);
453                         return 0;
454                 }
455                 memset(buf, 0, LUSTRE_DQBLKSIZE);
456                 info->dqi_free_entry = blk; /* This is enough as block is 
457                                                already zeroed and entry list
458                                                is empty... */
459                 lustre_mark_info_dirty(info);
460         }
461
462         /* Will block be full */
463         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
464                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
465                         CDEBUG(D_ERROR, 
466                                "VFS: find_free_dqentry(): Can't remove block "
467                                "(%u) from entry free list.\n", blk);
468                         goto out_buf;
469                 }
470         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
471         /* Find free structure in block */
472         for (i = 0; i < dqstrinblk &&
473              memcmp((char *)&emptydquot[version],
474                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
475              i++);
476
477         if (i == dqstrinblk) {
478                 CDEBUG(D_ERROR, 
479                        "VFS: find_free_dqentry(): Data block full but it "
480                        "shouldn't.\n");
481                 *err = -EIO;
482                 goto out_buf;
483         }
484
485         if ((*err = write_blk(filp, blk, buf)) < 0) {
486                 CDEBUG(D_ERROR,
487                        "VFS: find_free_dqentry(): Can't write quota data "
488                        "block %u.\n", blk);
489                 goto out_buf;
490         }
491         dquot->dq_off =
492             (blk << LUSTRE_DQBLKSIZE_BITS) +
493             sizeof(struct lustre_disk_dqdbheader) +
494             i * dqblk_sz;
495         freedqbuf(buf);
496         return blk;
497 out_buf:
498         freedqbuf(buf);
499         return 0;
500 }
501
502 /**
503  * Insert reference to structure into the trie
504  */
505 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
506                           lustre_quota_version_t version)
507 {
508         struct lustre_quota_info *lqi = dquot->dq_info;
509         struct file *filp = lqi->qi_files[dquot->dq_type];
510         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
511         dqbuf_t buf;
512         int ret = 0, newson = 0, newact = 0;
513         u32 *ref;
514         uint newblk;
515
516         if (!(buf = getdqbuf()))
517                 return -ENOMEM;
518         if (!*treeblk) {
519                 ret = get_free_dqblk(filp, info);
520                 if (ret < 0)
521                         goto out_buf;
522                 *treeblk = ret;
523                 memset(buf, 0, LUSTRE_DQBLKSIZE);
524                 newact = 1;
525         } else {
526                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
527                         CDEBUG(D_ERROR,
528                                "VFS: Can't read tree quota block %u.\n",
529                                *treeblk);
530                         goto out_buf;
531                 }
532         }
533         ref = (u32 *) buf;
534         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
535         if (!newblk)
536                 newson = 1;
537         if (depth == LUSTRE_DQTREEDEPTH - 1) {
538
539                 if (newblk) {
540                         CDEBUG(D_ERROR, 
541                                "VFS: Inserting already present quota entry "
542                                "(block %u).\n",
543                                ref[GETIDINDEX(dquot->dq_id, depth)]);
544                         ret = -EIO;
545                         goto out_buf;
546                 }
547
548                 newblk = find_free_dqentry(dquot, &ret, version);
549         } else
550                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
551         if (newson && ret >= 0) {
552                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
553                 ret = write_blk(filp, *treeblk, buf);
554         } else if (newact && ret < 0)
555                 put_free_dqblk(filp, info, buf, *treeblk);
556 out_buf:
557         freedqbuf(buf);
558         return ret;
559 }
560
561 /**
562  * Wrapper for inserting quota structure into tree
563  */
564 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
565                                  lustre_quota_version_t version)
566 {
567         int tmp = LUSTRE_DQTREEOFF;
568         return do_insert_tree(dquot, &tmp, 0, version);
569 }
570
571 /**
572  * We don't have to be afraid of deadlocks as we never have quotas on
573  * quota files...
574  */
575 static int lustre_write_dquot(struct lustre_dquot *dquot, 
576                               lustre_quota_version_t version)
577 {
578         int type = dquot->dq_type;
579         struct file *filp;
580         mm_segment_t fs;
581         loff_t offset;
582         ssize_t ret;
583         int dqblk_sz = lustre_disk_dqblk_sz[version];
584         struct lustre_disk_dqblk_v2 ddquot;
585
586         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
587         if (ret < 0)
588                 return ret;
589
590         if (!dquot->dq_off)
591                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
592                         CDEBUG(D_ERROR,
593                                "VFS: Error %Zd occurred while creating "
594                                "quota.\n", ret);
595                         return ret;
596                 }
597         filp = dquot->dq_info->qi_files[type];
598         offset = dquot->dq_off;
599         /* Argh... We may need to write structure full of zeroes but that would
600          * be treated as an empty place by the rest of the code. Format change
601          * would be definitely cleaner but the problems probably are not worth
602          * it */
603         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
604                 ddquot.dqb_itime = cpu_to_le64(1);
605         fs = get_fs();
606         set_fs(KERNEL_DS);
607         ret = filp->f_op->write(filp, (char *)&ddquot,
608                                 dqblk_sz, &offset);
609         set_fs(fs);
610         if (ret != dqblk_sz) {
611                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
612                        filp->f_dentry->d_sb->s_id);
613                 if (ret >= 0)
614                         ret = -ENOSPC;
615         } else
616                 ret = 0;
617
618         return ret;
619 }
620
621 /**
622  * Free dquot entry in data block
623  */
624 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
625                         lustre_quota_version_t version)
626 {
627         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
628         struct lustre_mem_dqinfo *info =
629             &dquot->dq_info->qi_info[dquot->dq_type];
630         struct lustre_disk_dqdbheader *dh;
631         dqbuf_t buf = getdqbuf();
632         int dqstrinblk = lustre_dqstrinblk[version];
633         int ret = 0;
634
635         if (!buf)
636                 return -ENOMEM;
637         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
638                 CDEBUG(D_ERROR,
639                        "VFS: Quota structure has offset to other block (%u) "
640                        "than it should (%u).\n",
641                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
642                 goto out_buf;
643         }
644         if ((ret = read_blk(filp, blk, buf)) < 0) {
645                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
646                 goto out_buf;
647         }
648         dh = (struct lustre_disk_dqdbheader *)buf;
649         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
650         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
651                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
652                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
653                         CDEBUG(D_ERROR,
654                                "VFS: Can't move quota data block (%u) to free "
655                                "list.\n", blk);
656                         goto out_buf;
657                 }
658         } else {
659                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
660                        0, lustre_disk_dqblk_sz[version]);
661                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
662                         /* Insert will write block itself */
663                         if ((ret =
664                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
665                                 CDEBUG(D_ERROR,
666                                        "VFS: Can't insert quota data block "
667                                        "(%u) to free entry list.\n", blk);
668                                 goto out_buf;
669                         }
670                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
671                         CDEBUG(D_ERROR,
672                                "VFS: Can't write quota data block %u\n", blk);
673                         goto out_buf;
674                 }
675         }
676         dquot->dq_off = 0;      /* Quota is now unattached */
677 out_buf:
678         freedqbuf(buf);
679         return ret;
680 }
681
682 /**
683  * Remove reference to dquot from tree
684  */
685 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
686                        lustre_quota_version_t version)
687 {
688         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
689         struct lustre_mem_dqinfo *info =
690             &dquot->dq_info->qi_info[dquot->dq_type];
691         dqbuf_t buf = getdqbuf();
692         int ret = 0;
693         uint newblk;
694         u32 *ref = (u32 *) buf;
695
696         if (!buf)
697                 return -ENOMEM;
698         if ((ret = read_blk(filp, *blk, buf)) < 0) {
699                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
700                 goto out_buf;
701         }
702         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
703         if (depth == LUSTRE_DQTREEDEPTH - 1) {
704                 ret = free_dqentry(dquot, newblk, version);
705                 newblk = 0;
706         } else
707                 ret = remove_tree(dquot, &newblk, depth + 1, version);
708         if (ret >= 0 && !newblk) {
709                 int i;
710                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
711                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
712                         /* Block got empty? */ ;
713                 /* don't put the root block into free blk list! */
714                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
715                         put_free_dqblk(filp, info, buf, *blk);
716                         *blk = 0;
717                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
718                         CDEBUG(D_ERROR,
719                                "VFS: Can't write quota tree block %u.\n", *blk);
720         }
721 out_buf:
722         freedqbuf(buf);
723         return ret;
724 }
725
726 /**
727  * Delete dquot from tree
728  */
729 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
730                                 lustre_quota_version_t version)
731 {
732         uint tmp = LUSTRE_DQTREEOFF;
733
734         if (!dquot->dq_off)     /* Even not allocated? */
735                 return 0;
736         return remove_tree(dquot, &tmp, 0, version);
737 }
738
739 /**
740  * Find entry in block
741  */
742 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
743                                  lustre_quota_version_t version)
744 {
745         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
746         dqbuf_t buf = getdqbuf();
747         loff_t ret = 0;
748         int i;
749         struct lustre_disk_dqblk_v2 *ddquot =
750                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
751         int dqblk_sz = lustre_disk_dqblk_sz[version];
752         int dqstrinblk = lustre_dqstrinblk[version];
753
754         LASSERT(version == LUSTRE_QUOTA_V2);
755
756         if (!buf)
757                 return -ENOMEM;
758         if ((ret = read_blk(filp, blk, buf)) < 0) {
759                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
760                 goto out_buf;
761         }
762         if (dquot->dq_id)
763                 for (i = 0; i < dqstrinblk && 
764                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
765                      i++) ;
766         else {                  /* ID 0 as a bit more complicated searching... */
767                 for (i = 0; i < dqstrinblk; i++)
768                         if (!le32_to_cpu(ddquot[i].dqb_id)
769                             && memcmp((char *)&emptydquot[version],
770                                       (char *)&ddquot[i], dqblk_sz))
771                                 break;
772         }
773         if (i == dqstrinblk) {
774                 CDEBUG(D_ERROR,
775                        "VFS: Quota for id %u referenced but not present.\n",
776                        dquot->dq_id);
777                 ret = -EIO;
778                 goto out_buf;
779         } else
780                 ret =
781                     (blk << LUSTRE_DQBLKSIZE_BITS) +
782                     sizeof(struct lustre_disk_dqdbheader) +
783                     i * dqblk_sz;
784 out_buf:
785         freedqbuf(buf);
786         return ret;
787 }
788
789 /**
790  * Find entry for given id in the tree
791  */
792 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
793                                 lustre_quota_version_t version)
794 {
795         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
796         dqbuf_t buf = getdqbuf();
797         loff_t ret = 0;
798         u32 *ref = (u32 *) buf;
799
800         if (!buf)
801                 return -ENOMEM;
802         if ((ret = read_blk(filp, blk, buf)) < 0) {
803                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
804                 goto out_buf;
805         }
806         ret = 0;
807         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
808         if (!blk)               /* No reference? */
809                 goto out_buf;
810         if (depth < LUSTRE_DQTREEDEPTH - 1)
811                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
812         else
813                 ret = find_block_dqentry(dquot, blk, version);
814 out_buf:
815         freedqbuf(buf);
816         return ret;
817 }
818
819 /**
820  * Find entry for given id in the tree - wrapper function
821  */
822 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
823                                   lustre_quota_version_t version)
824 {
825         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
826 }
827
828 int lustre_read_dquot(struct lustre_dquot *dquot)
829 {
830         int type = dquot->dq_type;
831         struct file *filp;
832         mm_segment_t fs;
833         loff_t offset;
834         int ret = 0, dqblk_sz;
835         lustre_quota_version_t version;
836
837         /* Invalidated quota? */
838         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
839                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
840                 return -EIO;
841         }
842
843         version = dquot->dq_info->qi_version;
844         LASSERT(version == LUSTRE_QUOTA_V2);
845         dqblk_sz = lustre_disk_dqblk_sz[version];
846
847         offset = find_dqentry(dquot, version);
848         if (offset <= 0) {      /* Entry not present? */
849                 if (offset < 0)
850                         CDEBUG(D_ERROR,
851                                "VFS: Can't read quota structure for id %u.\n",
852                                dquot->dq_id);
853                 dquot->dq_off = 0;
854                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
855                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
856                 ret = offset;
857         } else {
858                 struct lustre_disk_dqblk_v2 ddquot;
859
860                 dquot->dq_off = offset;
861                 fs = get_fs();
862                 set_fs(KERNEL_DS);
863                 if ((ret = filp->f_op->read(filp, (char *)&ddquot,
864                                             dqblk_sz, &offset)) != dqblk_sz) {
865                         if (ret >= 0)
866                                 ret = -EIO;
867                         CDEBUG(D_ERROR,
868                                "VFS: Error while reading quota structure for id "
869                                "%u.\n", dquot->dq_id);
870                         memset((char *)&ddquot, 0, dqblk_sz);
871                 } else {
872                         ret = 0;
873                         /* We need to escape back all-zero structure */
874                         if (!memcmp((char *)&fakedquot[version],
875                                     (char *)&ddquot, dqblk_sz))
876                                 ddquot.dqb_itime = cpu_to_le64(0);
877                 }
878                 set_fs(fs);
879                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
880         }
881
882         return ret;
883 }
884
885 /**
886  * Commit changes of dquot to disk - it might also mean deleting
887  * it when quota became fake.
888  */
889 int lustre_commit_dquot(struct lustre_dquot *dquot)
890 {
891         int rc = 0;
892         lustre_quota_version_t version = dquot->dq_info->qi_version;
893
894         /* always clear the flag so we don't loop on an IO error... */
895         clear_bit(DQ_MOD_B, &dquot->dq_flags);
896
897         /* The block/inode usage in admin quotafile isn't the real usage
898          * over all cluster, so keep the fake dquot entry on disk is
899          * meaningless, just remove it */
900         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
901                 rc = lustre_delete_dquot(dquot, version);
902         else
903                 rc = lustre_write_dquot(dquot, version);
904
905         if (rc < 0)
906                 return rc;
907
908         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
909                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
910
911         return rc;
912 }
913
914 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
915                              int fakemagics)
916 {
917         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
918         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
919         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
920         struct lustre_disk_dqheader dqhead;
921         ssize_t size;
922         loff_t offset = 0;
923         struct file *fp = lqi->qi_files[type];
924         int rc = 0;
925
926         /* write quotafile header */
927         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
928                                        fake_magics[type] : quota_magics[type]);
929         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
930         size = fp->f_op->write(fp, (char *)&dqhead,
931                                sizeof(struct lustre_disk_dqheader), &offset);
932
933         if (size != sizeof(struct lustre_disk_dqheader)) {
934                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
935                 rc = size;
936         }
937
938         return rc;
939 }
940
941 /**
942  * We need to export this function to initialize quotafile, because we haven't
943  * user level check utility
944  */
945 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
946                                    int fakemagics)
947 {
948         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
949         int rc;
950
951         rc = lustre_init_quota_header(lqi, type, fakemagics);
952         if (rc)
953                 return rc;
954
955         /* write init quota info */
956         memset(dqinfo, 0, sizeof(*dqinfo));
957         dqinfo->dqi_bgrace = MAX_DQ_TIME;
958         dqinfo->dqi_igrace = MAX_IQ_TIME;
959         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
960
961         return lustre_write_quota_info(lqi, type);
962 }
963
964 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
965 {
966         return lustre_init_quota_info_generic(lqi, type, 0);
967 }
968
969 ssize_t quota_read(struct file *file, struct inode *inode, int type,
970                    uint blk, dqbuf_t buf)
971 {
972         if (file) {
973                 return read_blk(file, blk, buf);
974         } else {
975 #ifndef KERNEL_SUPPORTS_QUOTA_READ
976                 return -ENOTSUPP;
977 #else
978                 struct super_block *sb = inode->i_sb;
979                 memset(buf, 0, LUSTRE_DQBLKSIZE);
980                 return sb->s_op->quota_read(sb, type, (char *)buf,
981                                             LUSTRE_DQBLKSIZE, 
982                                             blk << LUSTRE_DQBLKSIZE_BITS);
983 #endif
984         }
985 }
986
987 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
988                               uint blk, struct list_head *list)
989 {
990         dqbuf_t buf = getdqbuf();
991         loff_t ret = 0;
992         struct lustre_disk_dqdbheader *dqhead =
993             (struct lustre_disk_dqdbheader *)buf;
994         struct dqblk *blk_item;
995         struct dqblk *pos;
996         struct list_head *tmp;
997
998         if (!buf)
999                 return -ENOMEM;
1000         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1001                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1002                 goto out_buf;
1003         }
1004         ret = 0;
1005
1006         if (!le32_to_cpu(dqhead->dqdh_entries))
1007                 goto out_buf;
1008
1009         if (list_empty(list)) {
1010                 tmp = list;
1011                 goto done;
1012         }
1013
1014         list_for_each_entry(pos, list, link) {
1015                 if (blk == pos->blk)    /* we got this blk already */
1016                         goto out_buf;
1017                 if (blk > pos->blk)
1018                         continue;
1019                 break;
1020         }
1021         tmp = &pos->link;
1022 done:
1023         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1024         if (!blk_item) {
1025                 ret = -ENOMEM;
1026                 goto out_buf;
1027         }
1028         blk_item->blk = blk;
1029         INIT_LIST_HEAD(&blk_item->link);
1030
1031         list_add_tail(&blk_item->link, tmp);
1032
1033 out_buf:
1034         freedqbuf(buf);
1035         return ret;
1036 }
1037
1038 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1039                       uint blk, int depth, struct list_head *list)
1040 {
1041         dqbuf_t buf = getdqbuf();
1042         loff_t ret = 0;
1043         int index;
1044         u32 *ref = (u32 *) buf;
1045
1046         if (!buf)
1047                 return -ENOMEM;
1048         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1049                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1050                 goto out_buf;
1051         }
1052         ret = 0;
1053
1054         for (index = 0; index <= 0xff && !ret; index++) {
1055                 blk = le32_to_cpu(ref[index]);
1056                 if (!blk)       /* No reference */
1057                         continue;
1058
1059                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1060                         ret = walk_tree_dqentry(filp, inode, type, blk,
1061                                                 depth + 1, list);
1062                 else
1063                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1064         }
1065 out_buf:
1066         freedqbuf(buf);
1067         return ret;
1068 }
1069
1070 /**
1071  * Walk through the quota file (v2 format) to get all ids with quota limit
1072  */
1073 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1074                     struct list_head *list)
1075 {
1076         struct list_head blk_list;
1077         struct dqblk *blk_item, *tmp;
1078         dqbuf_t buf = NULL;
1079         struct lustre_disk_dqblk_v2 *ddquot;
1080         int rc;
1081         lustre_quota_version_t version;
1082
1083         ENTRY;
1084
1085         LASSERT(ergo(fp == NULL, inode != NULL));
1086
1087         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1088                 version = LUSTRE_QUOTA_V2;
1089         else {
1090                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1091                 RETURN(-EINVAL);
1092         }
1093
1094         if (!list_empty(list)) {
1095                 CDEBUG(D_ERROR, "not empty list\n");
1096                 RETURN(-EINVAL);
1097         }
1098
1099         INIT_LIST_HEAD(&blk_list);
1100         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1101         if (rc) {
1102                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1103                 GOTO(out_free, rc);
1104         }
1105         if (list_empty(&blk_list))
1106                 RETURN(0);
1107
1108         buf = getdqbuf();
1109         if (!buf)
1110                 RETURN(-ENOMEM);
1111         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1112
1113         list_for_each_entry(blk_item, &blk_list, link) {
1114                 loff_t ret = 0;
1115                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1116
1117                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1118                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1119                         CDEBUG(D_ERROR,
1120                                "VFS: Can't read quota tree block %u.\n",
1121                                blk_item->blk);
1122                         GOTO(out_free, rc = ret);
1123                 }
1124
1125                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1126                         struct dquot_id *dqid;
1127                         /* skip empty entry */
1128                         if (!memcmp((char *)&emptydquot[version],
1129                                     (char *)&ddquot[i], dqblk_sz))
1130                                 continue;
1131
1132                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1133                         if (!dqid)
1134                                 GOTO(out_free, rc = -ENOMEM);
1135
1136                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1137                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1138                                          QI_SET : 0;
1139                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1140                                          QB_SET : 0;
1141
1142                         INIT_LIST_HEAD(&dqid->di_link);
1143                         list_add(&dqid->di_link, list);
1144                 }
1145         }
1146
1147 out_free:
1148         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1149                 list_del_init(&blk_item->link);
1150                 kfree(blk_item);
1151         }
1152         if (buf)
1153                 freedqbuf(buf);
1154
1155         RETURN(rc);
1156 }
1157
1158
1159 EXPORT_SYMBOL(lustre_read_quota_info);
1160 EXPORT_SYMBOL(lustre_write_quota_info);
1161 EXPORT_SYMBOL(lustre_check_quota_file);
1162 EXPORT_SYMBOL(lustre_read_dquot);
1163 EXPORT_SYMBOL(lustre_commit_dquot);
1164 EXPORT_SYMBOL(lustre_init_quota_info);
1165 EXPORT_SYMBOL(lustre_get_qids);
1166 #endif