Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #ifdef HAVE_QUOTAIO_V1_H
54 # include <linux/quotaio_v1.h>
55 #endif
56
57 #include <asm/byteorder.h>
58 #include <asm/uaccess.h>
59
60 #include <lustre_quota.h>
61 #include <obd_support.h>
62 #include "lustre_quota_fmt.h"
63
64 #ifdef HAVE_QUOTA_SUPPORT
65
66 static const uint lustre_initqversions[][MAXQUOTAS] = {
67         [LUSTRE_QUOTA_V1] = LUSTRE_INITQVERSIONS_V1,
68         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
69 };
70
71 static const int lustre_dqstrinblk[] = {
72         [LUSTRE_QUOTA_V1] = LUSTRE_DQSTRINBLK,
73         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
74 };
75
76 static const int lustre_disk_dqblk_sz[] = {
77         [LUSTRE_QUOTA_V1] = sizeof(struct lustre_disk_dqblk),
78         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
79 };
80
81 static const union
82 {
83         struct lustre_disk_dqblk    r0;
84         struct lustre_disk_dqblk_v2 r1;
85 } fakedquot[] = {
86         [LUSTRE_QUOTA_V1] = {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
87         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
88 };
89
90 static const union
91 {
92         struct lustre_disk_dqblk    r0;
93         struct lustre_disk_dqblk_v2 r1;
94 } emptydquot[] = {
95         [LUSTRE_QUOTA_V1] = {.r0 = { 0 } },
96         [LUSTRE_QUOTA_V2] = {.r1 = { 0 } }
97 };
98
99 int check_quota_file(struct file *f, struct inode *inode, int type, 
100                      lustre_quota_version_t version)
101 {
102         struct lustre_disk_dqheader dqhead;
103         mm_segment_t fs;
104         ssize_t size;
105         loff_t offset = 0;
106         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
107         const uint *quota_versions = lustre_initqversions[version];
108
109         if (!inode && !f) {
110                 CERROR("check_quota_file failed!\n");
111                 libcfs_debug_dumpstack(NULL);
112                 return -EINVAL;
113         }
114
115         if (f) {
116                 fs = get_fs();
117                 set_fs(KERNEL_DS);
118                 size = f->f_op->read(f, (char *)&dqhead,
119                                      sizeof(struct lustre_disk_dqheader), 
120                                      &offset);
121                 set_fs(fs);
122         } else { 
123 #ifndef KERNEL_SUPPORTS_QUOTA_READ
124                 size = 0;
125 #else
126                 struct super_block *sb = inode->i_sb;
127                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
128                                             sizeof(struct lustre_disk_dqheader), 0);
129 #endif
130         }
131         if (size != sizeof(struct lustre_disk_dqheader))
132                 return -EINVAL;
133         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
134             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
135                 return -EINVAL;
136         return 0;
137 }
138
139 /* Check whether given file is really lustre admin quotafile */
140 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
141 {
142         struct file *f = lqi->qi_files[type];
143         return check_quota_file(f, NULL, type, lqi->qi_version);
144 }
145
146 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
147 {
148         mm_segment_t fs;
149         struct lustre_disk_dqinfo dinfo;
150         ssize_t size;
151         loff_t offset = LUSTRE_DQINFOOFF;
152
153         fs = get_fs();
154         set_fs(KERNEL_DS);
155         size = f->f_op->read(f, (char *)&dinfo, 
156                              sizeof(struct lustre_disk_dqinfo), &offset);
157         set_fs(fs);
158         if (size != sizeof(struct lustre_disk_dqinfo)) {
159                 CERROR("Can't read info structure on device %s.\n",
160                        f->f_vfsmnt->mnt_sb->s_id);
161                 return -EINVAL;
162         }
163         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
164         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
165         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
166         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
167         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
168         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
169         return 0;
170 }
171
172 /* Read information header from quota file */
173 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
174 {
175         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
176 }
177
178 /* Write information header to quota file */
179 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
180 {
181         mm_segment_t fs;
182         struct lustre_disk_dqinfo dinfo;
183         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
184         struct file *f = lqi->qi_files[type];
185         ssize_t size;
186         loff_t offset = LUSTRE_DQINFOOFF;
187
188         info->dqi_flags &= ~DQF_INFO_DIRTY;
189         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
190         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
191         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
192         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
193         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
194         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
195         fs = get_fs();
196         set_fs(KERNEL_DS);
197         size = f->f_op->write(f, (char *)&dinfo, 
198                               sizeof(struct lustre_disk_dqinfo), &offset);
199         set_fs(fs);
200         if (size != sizeof(struct lustre_disk_dqinfo)) {
201                 CWARN("Can't write info structure on device %s.\n",
202                       f->f_vfsmnt->mnt_sb->s_id);
203                 return -1;
204         }
205         return 0;
206 }
207
208 #define DQ2MQ(v) ((sizeof(v) == sizeof(__u64)) ? \
209                 le64_to_cpu(v) : le32_to_cpu(v))
210
211 #define MQ2DQ(v,newv) ((sizeof(v) == sizeof(__u64)) ? \
212                 (v = cpu_to_le64((__u64)newv)) : (v = cpu_to_le32((__u32)newv)))
213
214 #define DQF_GET(var,ver,field) ((ver == LUSTRE_QUOTA_V1)?\
215                 DQ2MQ(((struct lustre_disk_dqblk*)(var))->field):\
216                 DQ2MQ(((struct lustre_disk_dqblk_v2*)(var))->field))
217
218 #define DQF_PUT(var,ver,field,val) ((ver == LUSTRE_QUOTA_V1)?\
219                 MQ2DQ(((struct lustre_disk_dqblk*)(var))->field, val):\
220                 MQ2DQ(((struct lustre_disk_dqblk_v2*)(var))->field, val))
221
222 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
223                  lustre_quota_version_t version)
224 {
225         m->dqb_ihardlimit = DQF_GET(d, version, dqb_ihardlimit);
226         m->dqb_isoftlimit = DQF_GET(d, version, dqb_isoftlimit);
227         m->dqb_curinodes = DQF_GET(d, version, dqb_curinodes);
228         m->dqb_itime = DQF_GET(d, version, dqb_itime);
229         m->dqb_bhardlimit = DQF_GET(d, version, dqb_bhardlimit);
230         m->dqb_bsoftlimit = DQF_GET(d, version, dqb_bsoftlimit);
231         m->dqb_curspace = DQF_GET(d, version, dqb_curspace);
232         m->dqb_btime = DQF_GET(d, version, dqb_btime);
233 }
234
235 static int check_quota_bounds(struct lustre_mem_dqblk *m, 
236                               lustre_quota_version_t version)
237 {
238         return (version == LUSTRE_QUOTA_V1  &&
239                 m->dqb_ihardlimit <= MAX_UL &&
240                 m->dqb_isoftlimit <= MAX_UL &&
241                 m->dqb_curinodes <= MAX_UL  &&
242                 m->dqb_bhardlimit <= MAX_UL &&
243                 m->dqb_bsoftlimit <= MAX_UL) ||
244                 version != LUSTRE_QUOTA_V1;
245 }
246
247 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
248                        qid_t id, lustre_quota_version_t version)
249 {
250         if (!check_quota_bounds(m, version))
251                 return -EINVAL;
252
253         DQF_PUT(d, version, dqb_ihardlimit, m->dqb_ihardlimit);
254         DQF_PUT(d, version, dqb_isoftlimit, m->dqb_isoftlimit);
255         DQF_PUT(d, version, dqb_curinodes, m->dqb_curinodes);
256         DQF_PUT(d, version, dqb_itime, m->dqb_itime);
257         DQF_PUT(d, version, dqb_bhardlimit, m->dqb_bhardlimit);
258         DQF_PUT(d, version, dqb_bsoftlimit, m->dqb_bsoftlimit);
259         DQF_PUT(d, version, dqb_curspace, m->dqb_curspace);
260         DQF_PUT(d, version, dqb_btime, m->dqb_btime);
261         DQF_PUT(d, version, dqb_id, id);
262
263         return 0;
264 }
265
266 dqbuf_t getdqbuf(void)
267 {
268         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
269         if (!buf)
270                 CWARN("VFS: Not enough memory for quota buffers.\n");
271         return buf;
272 }
273
274 void freedqbuf(dqbuf_t buf)
275 {
276         kfree(buf);
277 }
278
279 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
280 {
281         mm_segment_t fs;
282         ssize_t ret;
283         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
284
285         memset(buf, 0, LUSTRE_DQBLKSIZE);
286         fs = get_fs();
287         set_fs(KERNEL_DS);
288         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
289         set_fs(fs);
290         return ret;
291 }
292
293 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
294 {
295         mm_segment_t fs;
296         ssize_t ret;
297         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
298
299         fs = get_fs();
300         set_fs(KERNEL_DS);
301         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
302         set_fs(fs);
303         return ret;
304 }
305
306 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
307 {
308         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
309 }
310
311 /* Remove empty block from list and return it */
312 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
313 {
314         dqbuf_t buf = getdqbuf();
315         struct lustre_disk_dqdbheader *dh =
316             (struct lustre_disk_dqdbheader *)buf;
317         int ret, blk;
318
319         if (!buf)
320                 return -ENOMEM;
321         if (info->dqi_free_blk) {
322                 blk = info->dqi_free_blk;
323                 if ((ret = read_blk(filp, blk, buf)) < 0)
324                         goto out_buf;
325                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
326         } else {
327                 memset(buf, 0, LUSTRE_DQBLKSIZE);
328                 /* Assure block allocation... */
329                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
330                         goto out_buf;
331                 blk = info->dqi_blocks++;
332         }
333         lustre_mark_info_dirty(info);
334         ret = blk;
335 out_buf:
336         freedqbuf(buf);
337         return ret;
338 }
339
340 /* Insert empty block to the list */
341 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
342                    dqbuf_t buf, uint blk)
343 {
344         struct lustre_disk_dqdbheader *dh =
345             (struct lustre_disk_dqdbheader *)buf;
346         int err;
347
348         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
349         dh->dqdh_prev_free = cpu_to_le32(0);
350         dh->dqdh_entries = cpu_to_le16(0);
351         info->dqi_free_blk = blk;
352         lustre_mark_info_dirty(info);
353         if ((err = write_blk(filp, blk, buf)) < 0)
354                 /* Some strange block. We had better leave it... */
355                 return err;
356         return 0;
357 }
358
359 /* Remove given block from the list of blocks with free entries */
360 int remove_free_dqentry(struct file *filp,
361                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
362                         uint blk)
363 {
364         dqbuf_t tmpbuf = getdqbuf();
365         struct lustre_disk_dqdbheader *dh =
366             (struct lustre_disk_dqdbheader *)buf;
367         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
368             le32_to_cpu(dh->dqdh_prev_free);
369         int err;
370
371         if (!tmpbuf)
372                 return -ENOMEM;
373         if (nextblk) {
374                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
375                         goto out_buf;
376                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
377                     dh->dqdh_prev_free;
378                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
379                         goto out_buf;
380         }
381         if (prevblk) {
382                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
383                         goto out_buf;
384                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
385                     dh->dqdh_next_free;
386                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
387                         goto out_buf;
388         } else {
389                 info->dqi_free_entry = nextblk;
390                 lustre_mark_info_dirty(info);
391         }
392         freedqbuf(tmpbuf);
393         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
394         err = write_blk(filp, blk, buf);
395         if (err < 0)      /* No matter whether write succeeds block is out of list */
396                 CERROR("VFS: Can't write block (%u) with "
397                        "free entries (rc=%d).\n", blk, err);
398         return 0;
399 out_buf:
400         freedqbuf(tmpbuf);
401         return err;
402 }
403
404 /* Insert given block to the beginning of list with free entries */
405 int insert_free_dqentry(struct file *filp,
406                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
407                         uint blk)
408 {
409         dqbuf_t tmpbuf = getdqbuf();
410         struct lustre_disk_dqdbheader *dh =
411             (struct lustre_disk_dqdbheader *)buf;
412         int err;
413
414         if (!tmpbuf)
415                 return -ENOMEM;
416         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
417         dh->dqdh_prev_free = cpu_to_le32(0);
418         if ((err = write_blk(filp, blk, buf)) < 0)
419                 goto out_buf;
420         if (info->dqi_free_entry) {
421                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
422                         goto out_buf;
423                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
424                     cpu_to_le32(blk);
425                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
426                         goto out_buf;
427         }
428         freedqbuf(tmpbuf);
429         info->dqi_free_entry = blk;
430         lustre_mark_info_dirty(info);
431         return 0;
432 out_buf:
433         freedqbuf(tmpbuf);
434         return err;
435 }
436
437
438
439 /* Find space for dquot */
440 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
441                               lustre_quota_version_t version)
442 {
443         struct lustre_quota_info *lqi = dquot->dq_info;
444         struct file *filp = lqi->qi_files[dquot->dq_type];
445         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
446         uint blk, i;
447         struct lustre_disk_dqdbheader *dh;
448         void *ddquot;
449         int dqblk_sz = lustre_disk_dqblk_sz[version];
450         int dqstrinblk = lustre_dqstrinblk[version];
451         dqbuf_t buf;
452
453         *err = 0;
454         if (!(buf = getdqbuf())) {
455                 *err = -ENOMEM;
456                 return 0;
457         }
458         dh = (struct lustre_disk_dqdbheader *)buf;
459         ddquot = GETENTRIES(buf, version);
460         if (info->dqi_free_entry) {
461                 blk = info->dqi_free_entry;
462                 if ((*err = read_blk(filp, blk, buf)) < 0)
463                         goto out_buf;
464         } else {
465                 blk = get_free_dqblk(filp, info);
466                 if ((int)blk < 0) {
467                         *err = blk;
468                         freedqbuf(buf);
469                         return 0;
470                 }
471                 memset(buf, 0, LUSTRE_DQBLKSIZE);
472                 info->dqi_free_entry = blk; /* This is enough as block is 
473                                                already zeroed and entry list
474                                                is empty... */
475                 lustre_mark_info_dirty(info);
476         }
477
478         /* Will block be full */
479         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
480                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
481                         CERROR("VFS: Can't remove block %u"
482                                " from entry free list.\n", blk);
483                         goto out_buf;
484                 }
485         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
486         /* Find free structure in block */
487         for (i = 0; i < dqstrinblk &&
488              memcmp((char *)&emptydquot[version],
489                     (char*)ddquot + i * dqblk_sz,
490                     dqblk_sz); i++);
491
492         if (i == dqstrinblk) {
493                 CERROR("VFS: Data block full but it shouldn't.\n");
494                 *err = -EIO;
495                 goto out_buf;
496         }
497
498         if ((*err = write_blk(filp, blk, buf)) < 0) {
499                 CERROR("VFS: Can't write quota data block %u.\n", blk);
500                 goto out_buf;
501         }
502         dquot->dq_off =
503             (blk << LUSTRE_DQBLKSIZE_BITS) +
504             sizeof(struct lustre_disk_dqdbheader) +
505             i * dqblk_sz;
506         freedqbuf(buf);
507         return blk;
508 out_buf:
509         freedqbuf(buf);
510         return 0;
511 }
512
513 /* Insert reference to structure into the trie */
514 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
515                           lustre_quota_version_t version)
516 {
517         struct lustre_quota_info *lqi = dquot->dq_info;
518         struct file *filp = lqi->qi_files[dquot->dq_type];
519         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
520         dqbuf_t buf;
521         int ret = 0, newson = 0, newact = 0;
522         u32 *ref;
523         uint newblk;
524
525         if (!(buf = getdqbuf()))
526                 return -ENOMEM;
527         if (!*treeblk) {
528                 ret = get_free_dqblk(filp, info);
529                 if (ret < 0)
530                         goto out_buf;
531                 *treeblk = ret;
532                 memset(buf, 0, LUSTRE_DQBLKSIZE);
533                 newact = 1;
534         } else {
535                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
536                         CERROR("VFS: Can't read tree quota block %u.\n",
537                                *treeblk);
538                         goto out_buf;
539                 }
540         }
541         ref = (u32 *) buf;
542         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
543         if (!newblk)
544                 newson = 1;
545         if (depth == LUSTRE_DQTREEDEPTH - 1) {
546
547                 if (newblk) {
548                         CERROR("VFS: Inserting already present quota entry "
549                                "(block %u).\n", 
550                                ref[GETIDINDEX(dquot->dq_id, depth)]);
551                         ret = -EIO;
552                         goto out_buf;
553                 }
554
555                 newblk = find_free_dqentry(dquot, &ret, version);
556         } else
557                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
558         if (newson && ret >= 0) {
559                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
560                 ret = write_blk(filp, *treeblk, buf);
561         } else if (newact && ret < 0)
562                 put_free_dqblk(filp, info, buf, *treeblk);
563 out_buf:
564         freedqbuf(buf);
565         return ret;
566 }
567
568 /* Wrapper for inserting quota structure into tree */
569 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
570                                  lustre_quota_version_t version)
571 {
572         int tmp = LUSTRE_DQTREEOFF;
573         return do_insert_tree(dquot, &tmp, 0, version);
574 }
575
576 /*
577  *  We don't have to be afraid of deadlocks as we never have quotas on quota files...
578  */
579 static int lustre_write_dquot(struct lustre_dquot *dquot, 
580                               lustre_quota_version_t version)
581 {
582         int type = dquot->dq_type;
583         struct file *filp;
584         mm_segment_t fs;
585         loff_t offset;
586         ssize_t ret;
587         int dqblk_sz = lustre_disk_dqblk_sz[version];
588         char ddquot[sizeof(union lustre_disk_dqblk_un)];
589
590         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
591         if (ret < 0)
592                 return ret;
593
594         if (!dquot->dq_off)
595                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
596                         CERROR("VFS: Error %Zd occurred while creating quota.\n",
597                                ret);
598                         return ret;
599                 }
600         filp = dquot->dq_info->qi_files[type];
601         offset = dquot->dq_off;
602         /* Argh... We may need to write structure full of zeroes but that would be
603          * treated as an empty place by the rest of the code. Format change would
604          * be definitely cleaner but the problems probably are not worth it */
605         if (!memcmp((char *)&emptydquot[version], ddquot, dqblk_sz))
606                 DQF_PUT(ddquot, version, dqb_itime, 1);
607         fs = get_fs();
608         set_fs(KERNEL_DS);
609         ret = filp->f_op->write(filp, ddquot,
610                                 dqblk_sz, &offset);
611         set_fs(fs);
612         if (ret != dqblk_sz) {
613                 CWARN("VFS: dquota write failed on dev %s\n",
614                       filp->f_dentry->d_sb->s_id);
615                 if (ret >= 0)
616                         ret = -ENOSPC;
617         } else
618                 ret = 0;
619
620         return ret;
621 }
622
623 /* Free dquot entry in data block */
624 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
625                         lustre_quota_version_t version)
626 {
627         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
628         struct lustre_mem_dqinfo *info =
629             &dquot->dq_info->qi_info[dquot->dq_type];
630         struct lustre_disk_dqdbheader *dh;
631         dqbuf_t buf = getdqbuf();
632         int dqstrinblk = lustre_dqstrinblk[version];
633         int ret = 0;
634
635         if (!buf)
636                 return -ENOMEM;
637         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
638                 CERROR("VFS: Quota structure has offset to other block (%u) "
639                        "than it should (%u).\n", blk, 
640                        (uint)(dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
641                 goto out_buf;
642         }
643         if ((ret = read_blk(filp, blk, buf)) < 0) {
644                 CERROR("VFS: Can't read quota data block %u\n", blk);
645                 goto out_buf;
646         }
647         dh = (struct lustre_disk_dqdbheader *)buf;
648         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
649         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
650                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
651                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
652                         CERROR("VFS: Can't move quota data block (%u) "
653                                "to free list.\n", blk);
654                         goto out_buf;
655                 }
656         } else {
657                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
658                        0, lustre_disk_dqblk_sz[version]);
659                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
660                         /* Insert will write block itself */
661                         if ((ret =
662                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
663                                 CERROR("VFS: Can't insert quota data block (%u) "
664                                        "to free entry list.\n", blk);
665                                 goto out_buf;
666                         }
667                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
668                         CERROR("VFS: Can't write quota data block %u\n", blk);
669                         goto out_buf;
670                 }
671         }
672         dquot->dq_off = 0;      /* Quota is now unattached */
673 out_buf:
674         freedqbuf(buf);
675         return ret;
676 }
677
678 /* Remove reference to dquot from tree */
679 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
680                        lustre_quota_version_t version)
681 {
682         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
683         struct lustre_mem_dqinfo *info =
684             &dquot->dq_info->qi_info[dquot->dq_type];
685         dqbuf_t buf = getdqbuf();
686         int ret = 0;
687         uint newblk;
688         u32 *ref = (u32 *) buf;
689
690         if (!buf)
691                 return -ENOMEM;
692         if ((ret = read_blk(filp, *blk, buf)) < 0) {
693                 CERROR("VFS: Can't read quota data block %u\n", *blk);
694                 goto out_buf;
695         }
696         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
697         if (depth == LUSTRE_DQTREEDEPTH - 1) {
698                 ret = free_dqentry(dquot, newblk, version);
699                 newblk = 0;
700         } else
701                 ret = remove_tree(dquot, &newblk, depth + 1, version);
702         if (ret >= 0 && !newblk) {
703                 int i;
704                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
705                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
706                         /* Block got empty? */ ;
707                 /* don't put the root block into free blk list! */
708                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
709                         put_free_dqblk(filp, info, buf, *blk);
710                         *blk = 0;
711                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
712                         CERROR("VFS: Can't write quota tree block %u.\n", *blk);
713         }
714 out_buf:
715         freedqbuf(buf);
716         return ret;
717 }
718
719 /* Delete dquot from tree */
720 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
721                                 lustre_quota_version_t version)
722 {
723         uint tmp = LUSTRE_DQTREEOFF;
724
725         if (!dquot->dq_off)     /* Even not allocated? */
726                 return 0;
727         return remove_tree(dquot, &tmp, 0, version);
728 }
729
730 /* Find entry in block */
731 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
732                                  lustre_quota_version_t version)
733 {
734         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
735         dqbuf_t buf = getdqbuf();
736         loff_t ret = 0;
737         int i;
738         char *ddquot = GETENTRIES(buf, version);
739         int dqblk_sz = lustre_disk_dqblk_sz[version];
740         int dqstrinblk = lustre_dqstrinblk[version];
741
742         if (!buf)
743                 return -ENOMEM;
744         if ((ret = read_blk(filp, blk, buf)) < 0) {
745                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
746                 goto out_buf;
747         }
748         if (dquot->dq_id)
749                 for (i = 0; i < dqstrinblk && 
750                      DQF_GET(ddquot+i*dqblk_sz, version, dqb_id) != dquot->dq_id;
751                      i++) ;
752         else {                  /* ID 0 as a bit more complicated searching... */
753                 for (i = 0; i < dqstrinblk; i++)
754                         if (!DQF_GET(ddquot + i*dqblk_sz, version, dqb_id)
755                             && memcmp((char *)&emptydquot[version],
756                                       ddquot + i*dqblk_sz,
757                                       dqblk_sz))
758                                 break;
759         }
760         if (i == dqstrinblk) {
761                 CERROR("VFS: Quota for id %u referenced but not present.\n",
762                        dquot->dq_id);
763                 ret = -EIO;
764                 goto out_buf;
765         } else
766                 ret =
767                     (blk << LUSTRE_DQBLKSIZE_BITS) +
768                     sizeof(struct lustre_disk_dqdbheader) +
769                     i * dqblk_sz;
770 out_buf:
771         freedqbuf(buf);
772         return ret;
773 }
774
775 /* Find entry for given id in the tree */
776 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
777                                 lustre_quota_version_t version)
778 {
779         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
780         dqbuf_t buf = getdqbuf();
781         loff_t ret = 0;
782         u32 *ref = (u32 *) buf;
783
784         if (!buf)
785                 return -ENOMEM;
786         if ((ret = read_blk(filp, blk, buf)) < 0) {
787                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
788                 goto out_buf;
789         }
790         ret = 0;
791         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
792         if (!blk)               /* No reference? */
793                 goto out_buf;
794         if (depth < LUSTRE_DQTREEDEPTH - 1)
795                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
796         else
797                 ret = find_block_dqentry(dquot, blk, version);
798 out_buf:
799         freedqbuf(buf);
800         return ret;
801 }
802
803 /* Find entry for given id in the tree - wrapper function */
804 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
805                                   lustre_quota_version_t version)
806 {
807         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
808 }
809
810
811 int lustre_read_dquot(struct lustre_dquot *dquot)
812 {
813         int type = dquot->dq_type;
814         struct file *filp;
815         mm_segment_t fs;
816         loff_t offset;
817         int ret = 0, dqblk_sz;
818         lustre_quota_version_t version;
819
820         /* Invalidated quota? */
821         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
822                 CERROR("VFS: Quota invalidated while reading!\n");
823                 return -EIO;
824         }
825
826         version = dquot->dq_info->qi_version;
827         dqblk_sz = lustre_disk_dqblk_sz[version];
828
829         offset = find_dqentry(dquot, version);
830         if (offset <= 0) {      /* Entry not present? */
831                 if (offset < 0)
832                         CERROR("VFS: Can't read quota structure for id %u.\n",
833                                dquot->dq_id);
834                 dquot->dq_off = 0;
835                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
836                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
837                 ret = offset;
838         } else {
839                 char ddquot[sizeof(union lustre_disk_dqblk_un)];
840
841                 dquot->dq_off = offset;
842                 fs = get_fs();
843                 set_fs(KERNEL_DS);
844                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
845                     dqblk_sz) {
846                         if (ret >= 0)
847                                 ret = -EIO;
848                         CERROR("VFS: Error while reading quota structure "
849                                "for id %u.\n", dquot->dq_id);
850                         memset(ddquot, 0, dqblk_sz);
851                 } else {
852                         ret = 0;
853                         /* We need to escape back all-zero structure */
854                         if (!memcmp((char *)&fakedquot[version],
855                                     ddquot, dqblk_sz))
856                                 DQF_PUT(ddquot, version, dqb_itime, 0);
857                 }
858                 set_fs(fs);
859                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
860         }
861
862         return ret;
863 }
864
865 /* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */
866 int lustre_commit_dquot(struct lustre_dquot *dquot)
867 {
868         int rc = 0;
869         lustre_quota_version_t version = dquot->dq_info->qi_version;
870
871         /* always clear the flag so we don't loop on an IO error... */
872         clear_bit(DQ_MOD_B, &dquot->dq_flags);
873
874         /* The block/inode usage in admin quotafile isn't the real usage
875          * over all cluster, so keep the fake dquot entry on disk is
876          * meaningless, just remove it */
877         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
878                 rc = lustre_delete_dquot(dquot, version);
879         else
880                 rc = lustre_write_dquot(dquot, version);
881
882         if (rc < 0)
883                 return rc;
884
885         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
886                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
887
888         return rc;
889 }
890
891 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
892 {
893         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
894         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
895         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
896         struct lustre_disk_dqheader dqhead;
897         ssize_t size;
898         loff_t offset = 0;
899         struct file *fp = lqi->qi_files[type];
900         int rc = 0;
901
902         /* write quotafile header */
903         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
904                                        fake_magics[type] : quota_magics[type]);
905         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
906         size = fp->f_op->write(fp, (char *)&dqhead,
907                                sizeof(struct lustre_disk_dqheader), &offset);
908
909         if (size != sizeof(struct lustre_disk_dqheader)) {
910                 CERROR("error writing quotafile header (rc:%d)\n", rc);
911                 rc = size;
912         }
913
914         return rc;
915 }
916
917 /* We need to export this function to initialize quotafile, because we haven't
918  * user level check utility */
919 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
920                                    int fakemagics)
921 {
922         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
923         int rc;
924
925         rc = lustre_init_quota_header(lqi, type, fakemagics);
926         if (rc)
927                 return rc;
928
929         /* write init quota info */
930         memset(dqinfo, 0, sizeof(*dqinfo));
931         dqinfo->dqi_bgrace = MAX_DQ_TIME;
932         dqinfo->dqi_igrace = MAX_IQ_TIME;
933         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
934
935         return lustre_write_quota_info(lqi, type);
936 }
937
938 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
939 {
940         return lustre_init_quota_info_generic(lqi, type, 0);
941 }
942
943 ssize_t quota_read(struct file *file, struct inode *inode, int type,
944                    uint blk, dqbuf_t buf)
945 {
946         if (file) {
947                 return read_blk(file, blk, buf);
948         } else {
949 #ifndef KERNEL_SUPPORTS_QUOTA_READ
950                 return -ENOTSUPP;
951 #else
952                 struct super_block *sb = inode->i_sb;
953                 memset(buf, 0, LUSTRE_DQBLKSIZE);
954                 return sb->s_op->quota_read(sb, type, (char *)buf,
955                                             LUSTRE_DQBLKSIZE, 
956                                             blk << LUSTRE_DQBLKSIZE_BITS);
957 #endif
958         }
959 }
960
961 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
962                               uint blk, struct list_head *list)
963 {
964         dqbuf_t buf = getdqbuf();
965         loff_t ret = 0;
966         struct lustre_disk_dqdbheader *dqhead =
967             (struct lustre_disk_dqdbheader *)buf;
968         struct dqblk *blk_item;
969         struct dqblk *pos;
970         struct list_head *tmp;
971
972         if (!buf)
973                 return -ENOMEM;
974         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
975                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
976                 goto out_buf;
977         }
978         ret = 0;
979
980         if (!le32_to_cpu(dqhead->dqdh_entries))
981                 goto out_buf;
982
983         if (list_empty(list)) {
984                 tmp = list;
985                 goto done;
986         }
987
988         list_for_each_entry(pos, list, link) {
989                 if (blk == pos->blk)    /* we got this blk already */
990                         goto out_buf;
991                 if (blk > pos->blk)
992                         continue;
993                 break;
994         }
995         tmp = &pos->link;
996 done:
997         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
998         if (!blk_item) {
999                 ret = -ENOMEM;
1000                 goto out_buf;
1001         }
1002         blk_item->blk = blk;
1003         INIT_LIST_HEAD(&blk_item->link);
1004
1005         list_add_tail(&blk_item->link, tmp);
1006
1007 out_buf:
1008         freedqbuf(buf);
1009         return ret;
1010 }
1011
1012 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1013                       uint blk, int depth, struct list_head *list)
1014 {
1015         dqbuf_t buf = getdqbuf();
1016         loff_t ret = 0;
1017         int index;
1018         u32 *ref = (u32 *) buf;
1019
1020         if (!buf)
1021                 return -ENOMEM;
1022         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1023                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1024                 goto out_buf;
1025         }
1026         ret = 0;
1027
1028         for (index = 0; index <= 0xff && !ret; index++) {
1029                 blk = le32_to_cpu(ref[index]);
1030                 if (!blk)       /* No reference */
1031                         continue;
1032
1033                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1034                         ret = walk_tree_dqentry(filp, inode, type, blk,
1035                                                 depth + 1, list);
1036                 else
1037                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1038         }
1039 out_buf:
1040         freedqbuf(buf);
1041         return ret;
1042 }
1043
1044 /* Walk through the quota file (v2 format) to get all ids with quota limit */
1045 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1046                     struct list_head *list)
1047 {
1048         struct list_head blk_list;
1049         struct dqblk *blk_item, *tmp;
1050         dqbuf_t buf = NULL;
1051         char *ddquot;
1052         int rc;
1053         lustre_quota_version_t version;
1054
1055         ENTRY;
1056
1057         LASSERT(ergo(fp == NULL, inode != NULL));
1058
1059         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V1) == 0)
1060                 version = LUSTRE_QUOTA_V1;
1061         else if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1062                 version = LUSTRE_QUOTA_V2;
1063         else {
1064                 CERROR("unknown quota file format!\n");
1065                 RETURN(-EINVAL);
1066         }
1067
1068         if (!list_empty(list)) {
1069                 CERROR("not empty list\n");
1070                 RETURN(-EINVAL);
1071         }
1072
1073         INIT_LIST_HEAD(&blk_list);
1074         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1075         if (rc) {
1076                 CERROR("walk through quota file failed!(%d)\n", rc);
1077                 GOTO(out_free, rc);
1078         }
1079         if (list_empty(&blk_list))
1080                 RETURN(0);
1081
1082         buf = getdqbuf();
1083         if (!buf)
1084                 RETURN(-ENOMEM);
1085         ddquot = GETENTRIES(buf, version);
1086
1087         list_for_each_entry(blk_item, &blk_list, link) {
1088                 loff_t ret = 0;
1089                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1090
1091                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1092                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1093                         CERROR("VFS: Can't read quota tree block %u.\n",
1094                                blk_item->blk);
1095                         GOTO(out_free, rc = ret);
1096                 }
1097
1098                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1099                         struct dquot_id *dqid;
1100                         /* skip empty entry */
1101                         if (!memcmp((char *)&emptydquot[version],
1102                                     ddquot + i*dqblk_sz, dqblk_sz))
1103                                 continue;
1104
1105                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), GFP_NOFS);
1106                         if (!dqid)
1107                                 GOTO(out_free, rc = -ENOMEM);
1108
1109                         dqid->di_id = DQF_GET(ddquot + i * dqblk_sz,
1110                                               version, dqb_id);
1111                         dqid->di_flag = DQF_GET(ddquot + i * dqblk_sz, version,
1112                                                 dqb_ihardlimit) ? QI_SET : 0;
1113                         dqid->di_flag |= DQF_GET(ddquot + i * dqblk_sz, version,
1114                                                  dqb_bhardlimit) ? QB_SET : 0;
1115                         INIT_LIST_HEAD(&dqid->di_link);
1116                         list_add(&dqid->di_link, list);
1117                 }
1118         }
1119
1120 out_free:
1121         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1122                 list_del_init(&blk_item->link);
1123                 kfree(blk_item);
1124         }
1125         if (buf)
1126                 freedqbuf(buf);
1127
1128         RETURN(rc);
1129 }
1130
1131
1132 EXPORT_SYMBOL(lustre_read_quota_info);
1133 EXPORT_SYMBOL(lustre_write_quota_info);
1134 EXPORT_SYMBOL(lustre_check_quota_file);
1135 EXPORT_SYMBOL(lustre_read_dquot);
1136 EXPORT_SYMBOL(lustre_commit_dquot);
1137 EXPORT_SYMBOL(lustre_init_quota_info);
1138 EXPORT_SYMBOL(lustre_get_qids);
1139 #endif