Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/lvfs/lustre_quota_fmt.c
37  *
38  * Lustre administrative quota format.
39  * from linux/fs/quota_v2.c
40  */
41
42 #ifndef EXPORT_SYMTAB
43 # define EXPORT_SYMTAB
44 #endif
45
46 #include <linux/errno.h>
47 #include <linux/fs.h>
48 #include <linux/mount.h>
49 #include <linux/kernel.h>
50 #include <linux/init.h>
51 #include <linux/module.h>
52 #include <linux/slab.h>
53 #include <linux/quotaio_v1.h>
54
55 #include <asm/byteorder.h>
56 #include <asm/uaccess.h>
57
58 #include <lustre_quota.h>
59 #include <obd_support.h>
60 #include "lustre_quota_fmt.h"
61
62 #ifdef HAVE_QUOTA_SUPPORT
63
64 static const uint lustre_initqversions[][MAXQUOTAS] = {
65         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
66 };
67
68 static const int lustre_dqstrinblk[] = {
69         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
70 };
71
72 static const int lustre_disk_dqblk_sz[] = {
73         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
74 };
75
76 int check_quota_file(struct file *f, struct inode *inode, int type, 
77                      lustre_quota_version_t version)
78 {
79         struct lustre_disk_dqheader dqhead;
80         mm_segment_t fs;
81         ssize_t size;
82         loff_t offset = 0;
83         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
84         const uint *quota_versions = lustre_initqversions[version];
85
86         if (f) {
87                 fs = get_fs();
88                 set_fs(KERNEL_DS);
89                 size = f->f_op->read(f, (char *)&dqhead,
90                                      sizeof(struct lustre_disk_dqheader), 
91                                      &offset);
92                 set_fs(fs);
93         } else { 
94 #ifndef KERNEL_SUPPORTS_QUOTA_READ
95                 size = 0;
96 #else
97                 struct super_block *sb = inode->i_sb;
98                 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 
99                                             sizeof(struct lustre_disk_dqheader), 0);
100 #endif
101         }
102         if (size != sizeof(struct lustre_disk_dqheader))
103                 return -EINVAL;
104         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
105             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
106                 return -EINVAL;
107         return 0;
108 }
109
110 /**
111  * Check whether given file is really lustre admin quotafile
112  */
113 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
114 {
115         struct file *f = lqi->qi_files[type];
116         return check_quota_file(f, NULL, type, lqi->qi_version);
117 }
118
119 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
120 {
121         mm_segment_t fs;
122         struct lustre_disk_dqinfo dinfo;
123         ssize_t size;
124         loff_t offset = LUSTRE_DQINFOOFF;
125
126         fs = get_fs();
127         set_fs(KERNEL_DS);
128         size = f->f_op->read(f, (char *)&dinfo, 
129                              sizeof(struct lustre_disk_dqinfo), &offset);
130         set_fs(fs);
131         if (size != sizeof(struct lustre_disk_dqinfo)) {
132                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
133                        f->f_vfsmnt->mnt_sb->s_id);
134                 return -EINVAL;
135         }
136         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
137         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
138         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
139         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
140         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
141         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
142         return 0;
143 }
144
145 /**
146  * Read information header from quota file
147  */
148 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
149 {
150         return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
151 }
152
153 /**
154  * Write information header to quota file
155  */
156 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
157 {
158         mm_segment_t fs;
159         struct lustre_disk_dqinfo dinfo;
160         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
161         struct file *f = lqi->qi_files[type];
162         ssize_t size;
163         loff_t offset = LUSTRE_DQINFOOFF;
164
165         info->dqi_flags &= ~DQF_INFO_DIRTY;
166         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
167         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
168         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
169         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
170         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
171         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
172         fs = get_fs();
173         set_fs(KERNEL_DS);
174         size = f->f_op->write(f, (char *)&dinfo, 
175                               sizeof(struct lustre_disk_dqinfo), &offset);
176         set_fs(fs);
177         if (size != sizeof(struct lustre_disk_dqinfo)) {
178                 CDEBUG(D_WARNING, 
179                        "Can't write info structure on device %s.\n",
180                        f->f_vfsmnt->mnt_sb->s_id);
181                 return -1;
182         }
183         return 0;
184 }
185
186 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
187                  lustre_quota_version_t version)
188 {
189         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
190
191         LASSERT(version == LUSTRE_QUOTA_V2);
192
193         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
194         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
195         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
196         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
197         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
198         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
199         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
200         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
201 }
202
203 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
204                        qid_t id, lustre_quota_version_t version)
205 {
206         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
207
208         LASSERT(version == LUSTRE_QUOTA_V2);
209
210         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
211         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
212         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
213         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
214         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
215         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
216         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
217         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
218         dqblk->dqb_id = cpu_to_le32(id);
219
220         return 0;
221 }
222
223 dqbuf_t getdqbuf(void)
224 {
225         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
226         if (!buf)
227                 CDEBUG(D_WARNING, 
228                        "VFS: Not enough memory for quota buffers.\n");
229         return buf;
230 }
231
232 void freedqbuf(dqbuf_t buf)
233 {
234         kfree(buf);
235 }
236
237 ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
238 {
239         mm_segment_t fs;
240         ssize_t ret;
241         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
242
243         memset(buf, 0, LUSTRE_DQBLKSIZE);
244         fs = get_fs();
245         set_fs(KERNEL_DS);
246         ret = filp->f_op->read(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
247         set_fs(fs);
248         return ret;
249 }
250
251 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
252 {
253         mm_segment_t fs;
254         ssize_t ret;
255         loff_t offset = blk << LUSTRE_DQBLKSIZE_BITS;
256
257         fs = get_fs();
258         set_fs(KERNEL_DS);
259         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
260         set_fs(fs);
261         return ret;
262 }
263
264 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
265 {
266         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
267 }
268
269 /**
270  * Remove empty block from list and return it
271  */
272 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
273 {
274         dqbuf_t buf = getdqbuf();
275         struct lustre_disk_dqdbheader *dh =
276             (struct lustre_disk_dqdbheader *)buf;
277         int ret, blk;
278
279         if (!buf)
280                 return -ENOMEM;
281         if (info->dqi_free_blk) {
282                 blk = info->dqi_free_blk;
283                 if ((ret = read_blk(filp, blk, buf)) < 0)
284                         goto out_buf;
285                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
286         } else {
287                 memset(buf, 0, LUSTRE_DQBLKSIZE);
288                 /* Assure block allocation... */
289                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
290                         goto out_buf;
291                 blk = info->dqi_blocks++;
292         }
293         lustre_mark_info_dirty(info);
294         ret = blk;
295 out_buf:
296         freedqbuf(buf);
297         return ret;
298 }
299
300 /**
301  * Insert empty block to the list
302  */
303 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
304                    dqbuf_t buf, uint blk)
305 {
306         struct lustre_disk_dqdbheader *dh =
307             (struct lustre_disk_dqdbheader *)buf;
308         int err;
309
310         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
311         dh->dqdh_prev_free = cpu_to_le32(0);
312         dh->dqdh_entries = cpu_to_le16(0);
313         info->dqi_free_blk = blk;
314         lustre_mark_info_dirty(info);
315         if ((err = write_blk(filp, blk, buf)) < 0)
316                 /* Some strange block. We had better leave it... */
317                 return err;
318         return 0;
319 }
320
321 /**
322  * Remove given block from the list of blocks with free entries
323  */
324 int remove_free_dqentry(struct file *filp,
325                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
326                         uint blk)
327 {
328         dqbuf_t tmpbuf = getdqbuf();
329         struct lustre_disk_dqdbheader *dh =
330             (struct lustre_disk_dqdbheader *)buf;
331         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
332             le32_to_cpu(dh->dqdh_prev_free);
333         int err;
334
335         if (!tmpbuf)
336                 return -ENOMEM;
337         if (nextblk) {
338                 if ((err = read_blk(filp, nextblk, tmpbuf)) < 0)
339                         goto out_buf;
340                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
341                     dh->dqdh_prev_free;
342                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
343                         goto out_buf;
344         }
345         if (prevblk) {
346                 if ((err = read_blk(filp, prevblk, tmpbuf)) < 0)
347                         goto out_buf;
348                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
349                     dh->dqdh_next_free;
350                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
351                         goto out_buf;
352         } else {
353                 info->dqi_free_entry = nextblk;
354                 lustre_mark_info_dirty(info);
355         }
356         freedqbuf(tmpbuf);
357         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
358         if (write_blk(filp, blk, buf) < 0)
359                 /* No matter whether write succeeds block is out of list */
360                 CDEBUG(D_ERROR, 
361                        "VFS: Can't write block (%u) with free entries.\n", blk);
362         return 0;
363 out_buf:
364         freedqbuf(tmpbuf);
365         return err;
366 }
367
368 /**
369  * Insert given block to the beginning of list with free entries
370  */
371 int insert_free_dqentry(struct file *filp,
372                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
373                         uint blk)
374 {
375         dqbuf_t tmpbuf = getdqbuf();
376         struct lustre_disk_dqdbheader *dh =
377             (struct lustre_disk_dqdbheader *)buf;
378         int err;
379
380         if (!tmpbuf)
381                 return -ENOMEM;
382         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
383         dh->dqdh_prev_free = cpu_to_le32(0);
384         if ((err = write_blk(filp, blk, buf)) < 0)
385                 goto out_buf;
386         if (info->dqi_free_entry) {
387                 if ((err = read_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
388                         goto out_buf;
389                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
390                     cpu_to_le32(blk);
391                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
392                         goto out_buf;
393         }
394         freedqbuf(tmpbuf);
395         info->dqi_free_entry = blk;
396         lustre_mark_info_dirty(info);
397         return 0;
398 out_buf:
399         freedqbuf(tmpbuf);
400         return err;
401 }
402
403
404
405 /**
406  * Find space for dquot
407  */
408 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
409                               lustre_quota_version_t version)
410 {
411         struct lustre_quota_info *lqi = dquot->dq_info;
412         struct file *filp = lqi->qi_files[dquot->dq_type];
413         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
414         uint blk, i;
415         struct lustre_disk_dqdbheader *dh;
416         void *ddquot;
417         int dqblk_sz = lustre_disk_dqblk_sz[version];
418         int dqstrinblk = lustre_dqstrinblk[version];
419         char fakedquot[dqblk_sz];
420         dqbuf_t buf;
421
422         *err = 0;
423         if (!(buf = getdqbuf())) {
424                 *err = -ENOMEM;
425                 return 0;
426         }
427         dh = (struct lustre_disk_dqdbheader *)buf;
428         ddquot = GETENTRIES(buf, version);
429         if (info->dqi_free_entry) {
430                 blk = info->dqi_free_entry;
431                 if ((*err = read_blk(filp, blk, buf)) < 0)
432                         goto out_buf;
433         } else {
434                 blk = get_free_dqblk(filp, info);
435                 if ((int)blk < 0) {
436                         *err = blk;
437                         freedqbuf(buf);
438                         return 0;
439                 }
440                 memset(buf, 0, LUSTRE_DQBLKSIZE);
441                 info->dqi_free_entry = blk; /* This is enough as block is 
442                                                already zeroed and entry list
443                                                is empty... */
444                 lustre_mark_info_dirty(info);
445         }
446
447         /* Will block be full */
448         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
449                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
450                         CDEBUG(D_ERROR, 
451                                "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n",
452                                blk);
453                         goto out_buf;
454                 }
455         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
456         memset(fakedquot, 0, dqblk_sz);
457         /* Find free structure in block */
458         for (i = 0; i < dqstrinblk &&
459              memcmp(fakedquot, (char*)ddquot + i * dqblk_sz, 
460                     sizeof(fakedquot)); i++);
461
462         if (i == dqstrinblk) {
463                 CDEBUG(D_ERROR, 
464                        "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
465                 *err = -EIO;
466                 goto out_buf;
467         }
468
469         if ((*err = write_blk(filp, blk, buf)) < 0) {
470                 CDEBUG(D_ERROR,
471                        "VFS: find_free_dqentry(): Can't write quota data block %u.\n",
472                        blk);
473                 goto out_buf;
474         }
475         dquot->dq_off =
476             (blk << LUSTRE_DQBLKSIZE_BITS) +
477             sizeof(struct lustre_disk_dqdbheader) +
478             i * dqblk_sz;
479         freedqbuf(buf);
480         return blk;
481 out_buf:
482         freedqbuf(buf);
483         return 0;
484 }
485
486 /**
487  * Insert reference to structure into the trie
488  */
489 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
490                           lustre_quota_version_t version)
491 {
492         struct lustre_quota_info *lqi = dquot->dq_info;
493         struct file *filp = lqi->qi_files[dquot->dq_type];
494         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
495         dqbuf_t buf;
496         int ret = 0, newson = 0, newact = 0;
497         u32 *ref;
498         uint newblk;
499
500         if (!(buf = getdqbuf()))
501                 return -ENOMEM;
502         if (!*treeblk) {
503                 ret = get_free_dqblk(filp, info);
504                 if (ret < 0)
505                         goto out_buf;
506                 *treeblk = ret;
507                 memset(buf, 0, LUSTRE_DQBLKSIZE);
508                 newact = 1;
509         } else {
510                 if ((ret = read_blk(filp, *treeblk, buf)) < 0) {
511                         CDEBUG(D_ERROR,
512                                "VFS: Can't read tree quota block %u.\n",
513                                *treeblk);
514                         goto out_buf;
515                 }
516         }
517         ref = (u32 *) buf;
518         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
519         if (!newblk)
520                 newson = 1;
521         if (depth == LUSTRE_DQTREEDEPTH - 1) {
522
523                 if (newblk) {
524                         CDEBUG(D_ERROR, 
525                                "VFS: Inserting already present quota entry (block %u).\n",
526                                ref[GETIDINDEX(dquot->dq_id, depth)]);
527                         ret = -EIO;
528                         goto out_buf;
529                 }
530
531                 newblk = find_free_dqentry(dquot, &ret, version);
532         } else
533                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
534         if (newson && ret >= 0) {
535                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
536                 ret = write_blk(filp, *treeblk, buf);
537         } else if (newact && ret < 0)
538                 put_free_dqblk(filp, info, buf, *treeblk);
539 out_buf:
540         freedqbuf(buf);
541         return ret;
542 }
543
544 /**
545  * Wrapper for inserting quota structure into tree
546  */
547 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
548                                  lustre_quota_version_t version)
549 {
550         int tmp = LUSTRE_DQTREEOFF;
551         return do_insert_tree(dquot, &tmp, 0, version);
552 }
553
554 /**
555  * We don't have to be afraid of deadlocks as we never have quotas on
556  * quota files...
557  */
558 static int lustre_write_dquot(struct lustre_dquot *dquot, 
559                               lustre_quota_version_t version)
560 {
561         int type = dquot->dq_type;
562         struct file *filp;
563         mm_segment_t fs;
564         loff_t offset;
565         ssize_t ret;
566         int dqblk_sz = lustre_disk_dqblk_sz[version];
567         char ddquot[dqblk_sz], empty[dqblk_sz];
568
569         ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
570         if (ret < 0)
571                 return ret;
572
573         if (!dquot->dq_off)
574                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
575                         CDEBUG(D_ERROR,
576                                "VFS: Error %Zd occurred while creating quota.\n",
577                                ret);
578                         return ret;
579                 }
580         filp = dquot->dq_info->qi_files[type];
581         offset = dquot->dq_off;
582         /* Argh... We may need to write structure full of zeroes but that would be
583          * treated as an empty place by the rest of the code. Format change would
584          * be definitely cleaner but the problems probably are not worth it */
585         memset(empty, 0, dqblk_sz);
586         if (!memcmp(empty, ddquot, dqblk_sz))
587                 ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime = cpu_to_le64(1);
588         fs = get_fs();
589         set_fs(KERNEL_DS);
590         ret = filp->f_op->write(filp, ddquot,
591                                 dqblk_sz, &offset);
592         set_fs(fs);
593         if (ret != dqblk_sz) {
594                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
595                        filp->f_dentry->d_sb->s_id);
596                 if (ret >= 0)
597                         ret = -ENOSPC;
598         } else
599                 ret = 0;
600
601         return ret;
602 }
603
604 /**
605  * Free dquot entry in data block
606  */
607 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
608                         lustre_quota_version_t version)
609 {
610         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
611         struct lustre_mem_dqinfo *info =
612             &dquot->dq_info->qi_info[dquot->dq_type];
613         struct lustre_disk_dqdbheader *dh;
614         dqbuf_t buf = getdqbuf();
615         int dqstrinblk = lustre_dqstrinblk[version];
616         int ret = 0;
617
618         if (!buf)
619                 return -ENOMEM;
620         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
621                 CDEBUG(D_ERROR,
622                        "VFS: Quota structure has offset to other block (%u) than it should (%u).\n",
623                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
624                 goto out_buf;
625         }
626         if ((ret = read_blk(filp, blk, buf)) < 0) {
627                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
628                 goto out_buf;
629         }
630         dh = (struct lustre_disk_dqdbheader *)buf;
631         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
632         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
633                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
634                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
635                         CDEBUG(D_ERROR,
636                                "VFS: Can't move quota data block (%u) to free list.\n",
637                                blk);
638                         goto out_buf;
639                 }
640         } else {
641                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
642                        0, lustre_disk_dqblk_sz[version]);
643                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
644                         /* Insert will write block itself */
645                         if ((ret =
646                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
647                                 CDEBUG(D_ERROR,
648                                        "VFS: Can't insert quota data block (%u) to free entry list.\n",
649                                        blk);
650                                 goto out_buf;
651                         }
652                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
653                         CDEBUG(D_ERROR,
654                                "VFS: Can't write quota data block %u\n", blk);
655                         goto out_buf;
656                 }
657         }
658         dquot->dq_off = 0;      /* Quota is now unattached */
659 out_buf:
660         freedqbuf(buf);
661         return ret;
662 }
663
664 /**
665  * Remove reference to dquot from tree
666  */
667 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
668                        lustre_quota_version_t version)
669 {
670         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
671         struct lustre_mem_dqinfo *info =
672             &dquot->dq_info->qi_info[dquot->dq_type];
673         dqbuf_t buf = getdqbuf();
674         int ret = 0;
675         uint newblk;
676         u32 *ref = (u32 *) buf;
677
678         if (!buf)
679                 return -ENOMEM;
680         if ((ret = read_blk(filp, *blk, buf)) < 0) {
681                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", *blk);
682                 goto out_buf;
683         }
684         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
685         if (depth == LUSTRE_DQTREEDEPTH - 1) {
686                 ret = free_dqentry(dquot, newblk, version);
687                 newblk = 0;
688         } else
689                 ret = remove_tree(dquot, &newblk, depth + 1, version);
690         if (ret >= 0 && !newblk) {
691                 int i;
692                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
693                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
694                         /* Block got empty? */ ;
695                 /* don't put the root block into free blk list! */
696                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
697                         put_free_dqblk(filp, info, buf, *blk);
698                         *blk = 0;
699                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
700                         CDEBUG(D_ERROR,
701                                "VFS: Can't write quota tree block %u.\n", *blk);
702         }
703 out_buf:
704         freedqbuf(buf);
705         return ret;
706 }
707
708 /**
709  * Delete dquot from tree
710  */
711 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
712                                 lustre_quota_version_t version)
713 {
714         uint tmp = LUSTRE_DQTREEOFF;
715
716         if (!dquot->dq_off)     /* Even not allocated? */
717                 return 0;
718         return remove_tree(dquot, &tmp, 0, version);
719 }
720
721 /**
722  * Find entry in block
723  */
724 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
725                                  lustre_quota_version_t version)
726 {
727         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
728         dqbuf_t buf = getdqbuf();
729         loff_t ret = 0;
730         int i;
731         struct lustre_disk_dqblk_v2 *ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
732         int dqblk_sz = lustre_disk_dqblk_sz[version];
733         int dqstrinblk = lustre_dqstrinblk[version];
734
735         LASSERT(version == LUSTRE_QUOTA_V2);
736
737         if (!buf)
738                 return -ENOMEM;
739         if ((ret = read_blk(filp, blk, buf)) < 0) {
740                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
741                 goto out_buf;
742         }
743         if (dquot->dq_id)
744                 for (i = 0; i < dqstrinblk && 
745                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
746                      i++) ;
747         else {                  /* ID 0 as a bit more complicated searching... */
748                 char fakedquot[dqblk_sz];
749
750                 memset(fakedquot, 0, sizeof(fakedquot));
751                 for (i = 0; i < dqstrinblk; i++)
752                         if (!le32_to_cpu(ddquot[i].dqb_id)
753                             && memcmp(fakedquot, ddquot + i,
754                                       dqblk_sz))
755                                 break;
756         }
757         if (i == dqstrinblk) {
758                 CDEBUG(D_ERROR,
759                        "VFS: Quota for id %u referenced but not present.\n",
760                        dquot->dq_id);
761                 ret = -EIO;
762                 goto out_buf;
763         } else
764                 ret =
765                     (blk << LUSTRE_DQBLKSIZE_BITS) +
766                     sizeof(struct lustre_disk_dqdbheader) +
767                     i * dqblk_sz;
768 out_buf:
769         freedqbuf(buf);
770         return ret;
771 }
772
773 /**
774  * Find entry for given id in the tree
775  */
776 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
777                                 lustre_quota_version_t version)
778 {
779         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
780         dqbuf_t buf = getdqbuf();
781         loff_t ret = 0;
782         u32 *ref = (u32 *) buf;
783
784         if (!buf)
785                 return -ENOMEM;
786         if ((ret = read_blk(filp, blk, buf)) < 0) {
787                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
788                 goto out_buf;
789         }
790         ret = 0;
791         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
792         if (!blk)               /* No reference? */
793                 goto out_buf;
794         if (depth < LUSTRE_DQTREEDEPTH - 1)
795                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
796         else
797                 ret = find_block_dqentry(dquot, blk, version);
798 out_buf:
799         freedqbuf(buf);
800         return ret;
801 }
802
803 /**
804  * Find entry for given id in the tree - wrapper function
805  */
806 static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
807                                   lustre_quota_version_t version)
808 {
809         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
810 }
811
812 int lustre_read_dquot(struct lustre_dquot *dquot)
813 {
814         int type = dquot->dq_type;
815         struct file *filp;
816         mm_segment_t fs;
817         loff_t offset;
818         int ret = 0, dqblk_sz;
819         lustre_quota_version_t version;
820
821         /* Invalidated quota? */
822         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
823                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
824                 return -EIO;
825         }
826
827         version = dquot->dq_info->qi_version;
828         LASSERT(version == LUSTRE_QUOTA_V2);
829         dqblk_sz = lustre_disk_dqblk_sz[version];
830
831         offset = find_dqentry(dquot, version);
832         if (offset <= 0) {      /* Entry not present? */
833                 if (offset < 0)
834                         CDEBUG(D_ERROR,
835                                "VFS: Can't read quota structure for id %u.\n",
836                                dquot->dq_id);
837                 dquot->dq_off = 0;
838                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
839                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
840                 ret = offset;
841         } else {
842                 char ddquot[dqblk_sz], empty[dqblk_sz];
843
844                 dquot->dq_off = offset;
845                 fs = get_fs();
846                 set_fs(KERNEL_DS);
847                 if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
848                     dqblk_sz) {
849                         if (ret >= 0)
850                                 ret = -EIO;
851                         CDEBUG(D_ERROR,
852                                "VFS: Error while reading quota structure for id %u.\n",
853                                dquot->dq_id);
854                         memset(ddquot, 0, dqblk_sz);
855                 } else {
856                         ret = 0;
857                         /* We need to escape back all-zero structure */
858                         memset(empty, 0, dqblk_sz);
859                         ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(1);
860                         if (!memcmp(empty, ddquot, dqblk_sz))
861                                 ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(0);
862                 }
863                 set_fs(fs);
864                 disk2memdqb(&dquot->dq_dqb, ddquot, version);
865         }
866
867         return ret;
868 }
869
870 /**
871  * Commit changes of dquot to disk - it might also mean deleting
872  * it when quota became fake.
873  */
874 int lustre_commit_dquot(struct lustre_dquot *dquot)
875 {
876         int rc = 0;
877         lustre_quota_version_t version = dquot->dq_info->qi_version;
878
879         /* always clear the flag so we don't loop on an IO error... */
880         clear_bit(DQ_MOD_B, &dquot->dq_flags);
881
882         /* The block/inode usage in admin quotafile isn't the real usage
883          * over all cluster, so keep the fake dquot entry on disk is
884          * meaningless, just remove it */
885         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
886                 rc = lustre_delete_dquot(dquot, version);
887         else
888                 rc = lustre_write_dquot(dquot, version);
889
890         if (rc < 0)
891                 return rc;
892
893         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
894                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
895
896         return rc;
897 }
898
899 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
900 {
901         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
902         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
903         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
904         struct lustre_disk_dqheader dqhead;
905         ssize_t size;
906         loff_t offset = 0;
907         struct file *fp = lqi->qi_files[type];
908         int rc = 0;
909
910         /* write quotafile header */
911         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
912                                        fake_magics[type] : quota_magics[type]);
913         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
914         size = fp->f_op->write(fp, (char *)&dqhead,
915                                sizeof(struct lustre_disk_dqheader), &offset);
916
917         if (size != sizeof(struct lustre_disk_dqheader)) {
918                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
919                 rc = size;
920         }
921
922         return rc;
923 }
924
925 /**
926  * We need to export this function to initialize quotafile, because we haven't
927  * user level check utility
928  */
929 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
930                                    int fakemagics)
931 {
932         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
933         int rc;
934
935         rc = lustre_init_quota_header(lqi, type, fakemagics);
936         if (rc)
937                 return rc;
938
939         /* write init quota info */
940         memset(dqinfo, 0, sizeof(*dqinfo));
941         dqinfo->dqi_bgrace = MAX_DQ_TIME;
942         dqinfo->dqi_igrace = MAX_IQ_TIME;
943         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
944
945         return lustre_write_quota_info(lqi, type);
946 }
947
948 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
949 {
950         return lustre_init_quota_info_generic(lqi, type, 0);
951 }
952
953 ssize_t quota_read(struct file *file, struct inode *inode, int type,
954                    uint blk, dqbuf_t buf)
955 {
956         if (file) {
957                 return read_blk(file, blk, buf);
958         } else {
959 #ifndef KERNEL_SUPPORTS_QUOTA_READ
960                 return -ENOTSUPP;
961 #else
962                 struct super_block *sb = inode->i_sb;
963                 memset(buf, 0, LUSTRE_DQBLKSIZE);
964                 return sb->s_op->quota_read(sb, type, (char *)buf,
965                                             LUSTRE_DQBLKSIZE, 
966                                             blk << LUSTRE_DQBLKSIZE_BITS);
967 #endif
968         }
969 }
970
971 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
972                               uint blk, struct list_head *list)
973 {
974         dqbuf_t buf = getdqbuf();
975         loff_t ret = 0;
976         struct lustre_disk_dqdbheader *dqhead =
977             (struct lustre_disk_dqdbheader *)buf;
978         struct dqblk *blk_item;
979         struct dqblk *pos;
980         struct list_head *tmp;
981
982         if (!buf)
983                 return -ENOMEM;
984         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
985                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
986                 goto out_buf;
987         }
988         ret = 0;
989
990         if (!le32_to_cpu(dqhead->dqdh_entries))
991                 goto out_buf;
992
993         if (list_empty(list)) {
994                 tmp = list;
995                 goto done;
996         }
997
998         list_for_each_entry(pos, list, link) {
999                 if (blk == pos->blk)    /* we got this blk already */
1000                         goto out_buf;
1001                 if (blk > pos->blk)
1002                         continue;
1003                 break;
1004         }
1005         tmp = &pos->link;
1006 done:
1007         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
1008         if (!blk_item) {
1009                 ret = -ENOMEM;
1010                 goto out_buf;
1011         }
1012         blk_item->blk = blk;
1013         INIT_LIST_HEAD(&blk_item->link);
1014
1015         list_add_tail(&blk_item->link, tmp);
1016
1017 out_buf:
1018         freedqbuf(buf);
1019         return ret;
1020 }
1021
1022 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1023                       uint blk, int depth, struct list_head *list)
1024 {
1025         dqbuf_t buf = getdqbuf();
1026         loff_t ret = 0;
1027         int index;
1028         u32 *ref = (u32 *) buf;
1029
1030         if (!buf)
1031                 return -ENOMEM;
1032         if ((ret = quota_read(filp, inode, type, blk, buf)) < 0) {
1033                 CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk);
1034                 goto out_buf;
1035         }
1036         ret = 0;
1037
1038         for (index = 0; index <= 0xff && !ret; index++) {
1039                 blk = le32_to_cpu(ref[index]);
1040                 if (!blk)       /* No reference */
1041                         continue;
1042
1043                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1044                         ret = walk_tree_dqentry(filp, inode, type, blk,
1045                                                 depth + 1, list);
1046                 else
1047                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1048         }
1049 out_buf:
1050         freedqbuf(buf);
1051         return ret;
1052 }
1053
1054 /**
1055  * Walk through the quota file (v2 format) to get all ids with quota limit
1056  */
1057 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1058                     struct list_head *list)
1059 {
1060         struct list_head blk_list;
1061         struct dqblk *blk_item, *tmp;
1062         dqbuf_t buf = NULL;
1063         struct lustre_disk_dqblk_v2 *ddquot;
1064         int rc;
1065         lustre_quota_version_t version;
1066
1067         ENTRY;
1068
1069         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1070                 version = LUSTRE_QUOTA_V2;
1071         else {
1072                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1073                 RETURN(-EINVAL);
1074         }
1075
1076         if (!list_empty(list)) {
1077                 CDEBUG(D_ERROR, "not empty list\n");
1078                 RETURN(-EINVAL);
1079         }
1080
1081         INIT_LIST_HEAD(&blk_list);
1082         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1083         if (rc) {
1084                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1085                 GOTO(out_free, rc);
1086         }
1087         if (list_empty(&blk_list))
1088                 RETURN(0);
1089
1090         buf = getdqbuf();
1091         if (!buf)
1092                 RETURN(-ENOMEM);
1093         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1094
1095         list_for_each_entry(blk_item, &blk_list, link) {
1096                 loff_t ret = 0;
1097                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1098                 char fakedquot[dqblk_sz];
1099
1100                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1101                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
1102                         CDEBUG(D_ERROR,
1103                                "VFS: Can't read quota tree block %u.\n",
1104                                blk_item->blk);
1105                         GOTO(out_free, rc = ret);
1106                 }
1107
1108                 memset(fakedquot, 0, dqblk_sz);
1109                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1110                         struct dquot_id *dqid;
1111                         /* skip empty entry */
1112                         if (!memcmp(fakedquot, ddquot + i, dqblk_sz))
1113                                 continue;
1114
1115                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
1116                         if (!dqid) 
1117                                 GOTO(out_free, rc = -ENOMEM);
1118
1119                         dqid->di_id = le32_to_cpu(ddquot[i].dqb_id);
1120                         INIT_LIST_HEAD(&dqid->di_link);
1121                         list_add(&dqid->di_link, list);
1122                 }
1123         }
1124
1125 out_free:
1126         list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1127                 list_del_init(&blk_item->link);
1128                 kfree(blk_item);
1129         }
1130         if (buf)
1131                 freedqbuf(buf);
1132
1133         RETURN(rc);
1134 }
1135
1136
1137 EXPORT_SYMBOL(lustre_read_quota_info);
1138 EXPORT_SYMBOL(lustre_write_quota_info);
1139 EXPORT_SYMBOL(lustre_check_quota_file);
1140 EXPORT_SYMBOL(lustre_read_dquot);
1141 EXPORT_SYMBOL(lustre_commit_dquot);
1142 EXPORT_SYMBOL(lustre_init_quota_info);
1143 EXPORT_SYMBOL(lustre_get_qids);
1144 #endif