Whamcloud - gitweb
LU-1302 mgs: mgs uses llog over OSD
[fs/lustre-release.git] / lustre / lvfs / lustre_quota_fmt.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lustre/lvfs/lustre_quota_fmt.c
35  *
36  * Lustre administrative quota format.
37  * from linux/fs/quota_v2.c
38  */
39
40 #include <linux/errno.h>
41 #include <linux/fs.h>
42 #include <linux/mount.h>
43 #include <linux/kernel.h>
44 #include <linux/init.h>
45 #include <linux/module.h>
46 #include <linux/slab.h>
47 #ifdef HAVE_QUOTAIO_V1_H
48 # include <linux/quotaio_v1.h>
49 #endif
50
51 #include <asm/byteorder.h>
52 #include <asm/uaccess.h>
53
54 #include <lustre_quota.h>
55 #include <obd_support.h>
56 #include "lustre_quota_fmt.h"
57
58 static const uint lustre_initqversions[][MAXQUOTAS] = {
59         [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
60 };
61
62 static const int lustre_dqstrinblk[] = {
63         [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
64 };
65
66 static const int lustre_disk_dqblk_sz[] = {
67         [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
68 };
69
70 static const union
71 {
72         struct lustre_disk_dqblk_v2 r1;
73 } fakedquot[] = {
74         [LUSTRE_QUOTA_V2] = {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)}}
75 };
76
77 static const union
78 {
79         struct lustre_disk_dqblk_v2 r1;
80 } emptydquot[] = {
81         [LUSTRE_QUOTA_V2] = {.r1 = { 0 }}
82 };
83
84 extern void *lustre_quota_journal_start(struct inode *inode, int delete);
85 extern void lustre_quota_journal_stop(void *handle);
86 extern ssize_t lustre_read_quota(struct file *f, struct inode *inode, int type,
87                                  char *buf, int count, loff_t pos);
88 extern ssize_t lustre_write_quota(struct file *f, char *buf, int count, loff_t pos);
89
90 int check_quota_file(struct file *f, struct inode *inode, int type,
91                      lustre_quota_version_t version)
92 {
93         struct lustre_disk_dqheader dqhead;
94         ssize_t size;
95         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
96         const uint *quota_versions = lustre_initqversions[version];
97
98         size = lustre_read_quota(f, inode, type, (char *)&dqhead,
99                                  sizeof(struct lustre_disk_dqheader), 0);
100         if (size != sizeof(struct lustre_disk_dqheader))
101                 return -EINVAL;
102         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
103             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
104                 return -EINVAL;
105         return 0;
106 }
107
108 /**
109  * Check whether given file is really lustre admin quotafile
110  */
111 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
112 {
113         struct file *f = lqi->qi_files[type];
114         return check_quota_file(f, NULL, type, lqi->qi_version);
115 }
116 EXPORT_SYMBOL(lustre_check_quota_file);
117
118 int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
119 {
120         struct lustre_disk_dqinfo dinfo;
121         ssize_t size;
122
123         size = lustre_read_quota(f, NULL, 0, (char *)&dinfo,
124                                  sizeof(struct lustre_disk_dqinfo),
125                                  LUSTRE_DQINFOOFF);
126
127         if (size != sizeof(struct lustre_disk_dqinfo)) {
128                 CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
129                        f->f_vfsmnt->mnt_sb->s_id);
130                 return -EINVAL;
131         }
132         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
133         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
134         info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
135         info->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
136         info->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
137         info->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
138         return 0;
139 }
140
141 /**
142  * Read information header from quota file
143  */
144 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
145 {
146         return lustre_read_quota_file_info(lqi->qi_files[type],
147                                            &lqi->qi_info[type]);
148 }
149 EXPORT_SYMBOL(lustre_read_quota_info);
150
151 /**
152  * Write information header to quota file
153  */
154 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
155 {
156         struct lustre_disk_dqinfo dinfo;
157         struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
158         struct file *f = lqi->qi_files[type];
159         ssize_t size;
160
161         info->dqi_flags &= ~DQF_INFO_DIRTY;
162         dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
163         dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
164         dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
165         dinfo.dqi_blocks = cpu_to_le32(info->dqi_blocks);
166         dinfo.dqi_free_blk = cpu_to_le32(info->dqi_free_blk);
167         dinfo.dqi_free_entry = cpu_to_le32(info->dqi_free_entry);
168
169         size = lustre_write_quota(f, (char *)&dinfo,
170                                   sizeof(struct lustre_disk_dqinfo),
171                                   LUSTRE_DQINFOOFF);
172
173         if (size != sizeof(struct lustre_disk_dqinfo)) {
174                 CDEBUG(D_WARNING,
175                        "Can't write info structure on device %s.\n",
176                        f->f_vfsmnt->mnt_sb->s_id);
177                 return -1;
178         }
179         return 0;
180 }
181 EXPORT_SYMBOL(lustre_write_quota_info);
182
183 void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
184                  lustre_quota_version_t version)
185 {
186         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
187
188         LASSERT(version == LUSTRE_QUOTA_V2);
189
190         m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
191         m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
192         m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
193         m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
194         m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
195         m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
196         m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
197         m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
198 }
199
200 static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
201                        qid_t id, lustre_quota_version_t version)
202 {
203         struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
204
205         LASSERT(version == LUSTRE_QUOTA_V2);
206
207         dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
208         dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
209         dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
210         dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
211         dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
212         dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
213         dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
214         dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
215         dqblk->dqb_id = cpu_to_le32(id);
216
217         return 0;
218 }
219
220 dqbuf_t getdqbuf(void)
221 {
222         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
223         if (!buf)
224                 CDEBUG(D_WARNING, 
225                        "VFS: Not enough memory for quota buffers.\n");
226         return buf;
227 }
228
229 void freedqbuf(dqbuf_t buf)
230 {
231         kfree(buf);
232 }
233
234 ssize_t read_blk(struct file *filp, struct inode *inode, int type,
235                  uint blk, dqbuf_t buf)
236 {
237         ssize_t ret;
238
239         memset(buf, 0, LUSTRE_DQBLKSIZE);
240         ret = lustre_read_quota(filp, inode, type, (char *)buf, LUSTRE_DQBLKSIZE,
241                                 blk << LUSTRE_DQBLKSIZE_BITS);
242
243         /* Reading past EOF just returns a block of zeros */
244         if (ret == -EBADR)
245                 ret = 0;
246
247         return ret;
248 }
249
250 ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
251 {
252         ssize_t ret;
253
254         ret = lustre_write_quota(filp, (char *)buf, LUSTRE_DQBLKSIZE,
255                                  blk << LUSTRE_DQBLKSIZE_BITS);
256
257         return ret;
258 }
259
260 void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
261 {
262         cfs_set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
263 }
264
265 /**
266  * Remove empty block from list and return it
267  */
268 int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
269 {
270         dqbuf_t buf = getdqbuf();
271         struct lustre_disk_dqdbheader *dh =
272             (struct lustre_disk_dqdbheader *)buf;
273         int ret, blk;
274
275         if (!buf)
276                 return -ENOMEM;
277         if (info->dqi_free_blk) {
278                 blk = info->dqi_free_blk;
279                 if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0)
280                         goto out_buf;
281                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
282         } else {
283                 memset(buf, 0, LUSTRE_DQBLKSIZE);
284                 /* Assure block allocation... */
285                 if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
286                         goto out_buf;
287                 blk = info->dqi_blocks++;
288         }
289         lustre_mark_info_dirty(info);
290         ret = blk;
291 out_buf:
292         freedqbuf(buf);
293         return ret;
294 }
295
296 /**
297  * Insert empty block to the list
298  */
299 int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
300                    dqbuf_t buf, uint blk)
301 {
302         struct lustre_disk_dqdbheader *dh =
303             (struct lustre_disk_dqdbheader *)buf;
304         int err;
305
306         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
307         dh->dqdh_prev_free = cpu_to_le32(0);
308         dh->dqdh_entries = cpu_to_le16(0);
309         info->dqi_free_blk = blk;
310         lustre_mark_info_dirty(info);
311         if ((err = write_blk(filp, blk, buf)) < 0)
312                 /* Some strange block. We had better leave it... */
313                 return err;
314         return 0;
315 }
316
317 /**
318  * Remove given block from the list of blocks with free entries
319  */
320 int remove_free_dqentry(struct file *filp,
321                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
322                         uint blk)
323 {
324         dqbuf_t tmpbuf = getdqbuf();
325         struct lustre_disk_dqdbheader *dh =
326             (struct lustre_disk_dqdbheader *)buf;
327         uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk =
328             le32_to_cpu(dh->dqdh_prev_free);
329         int err;
330
331         if (!tmpbuf)
332                 return -ENOMEM;
333         if (nextblk) {
334                 if ((err = read_blk(filp, NULL, 0, nextblk, tmpbuf)) < 0)
335                         goto out_buf;
336                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
337                     dh->dqdh_prev_free;
338                 if ((err = write_blk(filp, nextblk, tmpbuf)) < 0)
339                         goto out_buf;
340         }
341         if (prevblk) {
342                 if ((err = read_blk(filp, NULL, 0, prevblk, tmpbuf)) < 0)
343                         goto out_buf;
344                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
345                     dh->dqdh_next_free;
346                 if ((err = write_blk(filp, prevblk, tmpbuf)) < 0)
347                         goto out_buf;
348         } else {
349                 info->dqi_free_entry = nextblk;
350                 lustre_mark_info_dirty(info);
351         }
352         freedqbuf(tmpbuf);
353         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
354         if (write_blk(filp, blk, buf) < 0)
355                 /* No matter whether write succeeds block is out of list */
356                 CDEBUG(D_ERROR, 
357                        "VFS: Can't write block (%u) with free entries.\n", blk);
358         return 0;
359 out_buf:
360         freedqbuf(tmpbuf);
361         return err;
362 }
363
364 /**
365  * Insert given block to the beginning of list with free entries
366  */
367 int insert_free_dqentry(struct file *filp,
368                         struct lustre_mem_dqinfo *info, dqbuf_t buf,
369                         uint blk)
370 {
371         dqbuf_t tmpbuf = getdqbuf();
372         struct lustre_disk_dqdbheader *dh =
373             (struct lustre_disk_dqdbheader *)buf;
374         int err;
375
376         if (!tmpbuf)
377                 return -ENOMEM;
378         dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
379         dh->dqdh_prev_free = cpu_to_le32(0);
380         if ((err = write_blk(filp, blk, buf)) < 0)
381                 goto out_buf;
382         if (info->dqi_free_entry) {
383                 if ((err = read_blk(filp, NULL, 0, info->dqi_free_entry, tmpbuf)) < 0)
384                         goto out_buf;
385                 ((struct lustre_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
386                     cpu_to_le32(blk);
387                 if ((err = write_blk(filp, info->dqi_free_entry, tmpbuf)) < 0)
388                         goto out_buf;
389         }
390         freedqbuf(tmpbuf);
391         info->dqi_free_entry = blk;
392         lustre_mark_info_dirty(info);
393         return 0;
394 out_buf:
395         freedqbuf(tmpbuf);
396         return err;
397 }
398
399
400
401 /**
402  * Find space for dquot
403  */
404 static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
405                               lustre_quota_version_t version)
406 {
407         struct lustre_quota_info *lqi = dquot->dq_info;
408         struct file *filp = lqi->qi_files[dquot->dq_type];
409         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
410         uint blk, i;
411         struct lustre_disk_dqdbheader *dh;
412         void *ddquot;
413         int dqblk_sz = lustre_disk_dqblk_sz[version];
414         int dqstrinblk = lustre_dqstrinblk[version];
415         dqbuf_t buf;
416
417         *err = 0;
418         if (!(buf = getdqbuf())) {
419                 *err = -ENOMEM;
420                 return 0;
421         }
422         dh = (struct lustre_disk_dqdbheader *)buf;
423         ddquot = GETENTRIES(buf, version);
424         if (info->dqi_free_entry) {
425                 blk = info->dqi_free_entry;
426                 if ((*err = read_blk(filp, NULL, 0, blk, buf)) < 0)
427                         goto out_buf;
428         } else {
429                 blk = get_free_dqblk(filp, info);
430                 if ((int)blk < 0) {
431                         *err = blk;
432                         freedqbuf(buf);
433                         return 0;
434                 }
435                 memset(buf, 0, LUSTRE_DQBLKSIZE);
436                 info->dqi_free_entry = blk; /* This is enough as block is 
437                                                already zeroed and entry list
438                                                is empty... */
439                 lustre_mark_info_dirty(info);
440         }
441
442         /* Will block be full */
443         if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
444                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
445                         CDEBUG(D_ERROR, 
446                                "VFS: find_free_dqentry(): Can't remove block "
447                                "(%u) from entry free list.\n", blk);
448                         goto out_buf;
449                 }
450         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
451         /* Find free structure in block */
452         for (i = 0; i < dqstrinblk &&
453              memcmp((char *)&emptydquot[version],
454                     (char *)ddquot + i * dqblk_sz, dqblk_sz);
455              i++);
456
457         if (i == dqstrinblk) {
458                 CDEBUG(D_ERROR, 
459                        "VFS: find_free_dqentry(): Data block full but it "
460                        "shouldn't.\n");
461                 *err = -EIO;
462                 goto out_buf;
463         }
464
465         if ((*err = write_blk(filp, blk, buf)) < 0) {
466                 CDEBUG(D_ERROR,
467                        "VFS: find_free_dqentry(): Can't write quota data "
468                        "block %u.\n", blk);
469                 goto out_buf;
470         }
471         dquot->dq_off =
472             (blk << LUSTRE_DQBLKSIZE_BITS) +
473             sizeof(struct lustre_disk_dqdbheader) +
474             i * dqblk_sz;
475         freedqbuf(buf);
476         return blk;
477 out_buf:
478         freedqbuf(buf);
479         return 0;
480 }
481
482 /**
483  * Insert reference to structure into the trie
484  */
485 static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth,
486                           lustre_quota_version_t version)
487 {
488         struct lustre_quota_info *lqi = dquot->dq_info;
489         struct file *filp = lqi->qi_files[dquot->dq_type];
490         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
491         dqbuf_t buf;
492         int ret = 0, newson = 0, newact = 0;
493         u32 *ref;
494         uint newblk;
495
496         if (!(buf = getdqbuf()))
497                 return -ENOMEM;
498         if (!*treeblk) {
499                 ret = get_free_dqblk(filp, info);
500                 if (ret < 0)
501                         goto out_buf;
502                 *treeblk = ret;
503                 memset(buf, 0, LUSTRE_DQBLKSIZE);
504                 newact = 1;
505         } else {
506                 if ((ret = read_blk(filp, NULL, 0, *treeblk, buf)) < 0) {
507                         CERROR("VFS: Can't read tree quota block %u.\n",
508                                *treeblk);
509                         goto out_buf;
510                 }
511         }
512         ref = (u32 *) buf;
513         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
514         if (!newblk)
515                 newson = 1;
516         if (depth == LUSTRE_DQTREEDEPTH - 1) {
517
518                 if (newblk) {
519                         CDEBUG(D_ERROR, 
520                                "VFS: Inserting already present quota entry "
521                                "(block %u).\n",
522                                ref[GETIDINDEX(dquot->dq_id, depth)]);
523                         ret = -EIO;
524                         goto out_buf;
525                 }
526
527                 newblk = find_free_dqentry(dquot, &ret, version);
528         } else
529                 ret = do_insert_tree(dquot, &newblk, depth + 1, version);
530         if (newson && ret >= 0) {
531                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
532                 ret = write_blk(filp, *treeblk, buf);
533         } else if (newact && ret < 0)
534                 put_free_dqblk(filp, info, buf, *treeblk);
535 out_buf:
536         freedqbuf(buf);
537         return ret;
538 }
539
540 /**
541  * Wrapper for inserting quota structure into tree
542  */
543 static inline int dq_insert_tree(struct lustre_dquot *dquot, 
544                                  lustre_quota_version_t version)
545 {
546         int tmp = LUSTRE_DQTREEOFF;
547         return do_insert_tree(dquot, &tmp, 0, version);
548 }
549
550 /**
551  * We don't have to be afraid of deadlocks as we never have quotas on
552  * quota files...
553  */
554 static int lustre_write_dquot(struct lustre_dquot *dquot, 
555                               lustre_quota_version_t version)
556 {
557         int type = dquot->dq_type;
558         struct file *filp;
559         loff_t offset;
560         ssize_t ret;
561         int dqblk_sz = lustre_disk_dqblk_sz[version];
562         struct lustre_disk_dqblk_v2 ddquot;
563
564         ret = mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, version);
565         if (ret < 0)
566                 return ret;
567
568         if (!dquot->dq_off)
569                 if ((ret = dq_insert_tree(dquot, version)) < 0) {
570                         CDEBUG(D_ERROR,
571                                "VFS: Error %Zd occurred while creating "
572                                "quota.\n", ret);
573                         return ret;
574                 }
575         filp = dquot->dq_info->qi_files[type];
576         offset = dquot->dq_off;
577         /* Argh... We may need to write structure full of zeroes but that would
578          * be treated as an empty place by the rest of the code. Format change
579          * would be definitely cleaner but the problems probably are not worth
580          * it */
581         if (!memcmp((char *)&emptydquot[version], (char *)&ddquot, dqblk_sz))
582                 ddquot.dqb_itime = cpu_to_le64(1);
583
584         ret = lustre_write_quota(filp, (char *)&ddquot, dqblk_sz, offset);
585         if (ret != dqblk_sz) {
586                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
587                        filp->f_dentry->d_sb->s_id);
588                 if (ret >= 0)
589                         ret = -ENOSPC;
590         } else
591                 ret = 0;
592
593         return ret;
594 }
595
596 /**
597  * Free dquot entry in data block
598  */
599 static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
600                         lustre_quota_version_t version)
601 {
602         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
603         struct lustre_mem_dqinfo *info =
604             &dquot->dq_info->qi_info[dquot->dq_type];
605         struct lustre_disk_dqdbheader *dh;
606         dqbuf_t buf = getdqbuf();
607         int dqstrinblk = lustre_dqstrinblk[version];
608         int ret = 0;
609
610         if (!buf)
611                 return -ENOMEM;
612         if (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS != blk) {
613                 CDEBUG(D_ERROR,
614                        "VFS: Quota structure has offset to other block (%u) "
615                        "than it should (%u).\n",
616                        blk, (uint) (dquot->dq_off >> LUSTRE_DQBLKSIZE_BITS));
617                 goto out_buf;
618         }
619         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
620                 CDEBUG(D_ERROR, "VFS: Can't read quota data block %u\n", blk);
621                 goto out_buf;
622         }
623         dh = (struct lustre_disk_dqdbheader *)buf;
624         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) - 1);
625         if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
626                 if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 ||
627                     (ret = put_free_dqblk(filp, info, buf, blk)) < 0) {
628                         CDEBUG(D_ERROR,
629                                "VFS: Can't move quota data block (%u) to free "
630                                "list.\n", blk);
631                         goto out_buf;
632                 }
633         } else {
634                 memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
635                        0, lustre_disk_dqblk_sz[version]);
636                 if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
637                         /* Insert will write block itself */
638                         if ((ret =
639                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
640                                 CDEBUG(D_ERROR,
641                                        "VFS: Can't insert quota data block "
642                                        "(%u) to free entry list.\n", blk);
643                                 goto out_buf;
644                         }
645                 } else if ((ret = write_blk(filp, blk, buf)) < 0) {
646                         CDEBUG(D_ERROR,
647                                "VFS: Can't write quota data block %u\n", blk);
648                         goto out_buf;
649                 }
650         }
651         dquot->dq_off = 0;      /* Quota is now unattached */
652 out_buf:
653         freedqbuf(buf);
654         return ret;
655 }
656
657 /**
658  * Remove reference to dquot from tree
659  */
660 static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
661                        lustre_quota_version_t version)
662 {
663         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
664         struct lustre_mem_dqinfo *info =
665             &dquot->dq_info->qi_info[dquot->dq_type];
666         dqbuf_t buf = getdqbuf();
667         int ret = 0;
668         uint newblk;
669         u32 *ref = (u32 *) buf;
670
671         if (!buf)
672                 return -ENOMEM;
673         if ((ret = read_blk(filp, NULL, 0, *blk, buf)) < 0) {
674                 CERROR("VFS: Can't read quota data block %u\n", *blk);
675                 goto out_buf;
676         }
677         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
678         if (depth == LUSTRE_DQTREEDEPTH - 1) {
679                 ret = free_dqentry(dquot, newblk, version);
680                 newblk = 0;
681         } else
682                 ret = remove_tree(dquot, &newblk, depth + 1, version);
683         if (ret >= 0 && !newblk) {
684                 int i;
685                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
686                 for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
687                         /* Block got empty? */ ;
688                 /* don't put the root block into free blk list! */
689                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
690                         put_free_dqblk(filp, info, buf, *blk);
691                         *blk = 0;
692                 } else if ((ret = write_blk(filp, *blk, buf)) < 0)
693                         CDEBUG(D_ERROR,
694                                "VFS: Can't write quota tree block %u.\n", *blk);
695         }
696 out_buf:
697         freedqbuf(buf);
698         return ret;
699 }
700
701 /**
702  * Delete dquot from tree
703  */
704 static int lustre_delete_dquot(struct lustre_dquot *dquot, 
705                                 lustre_quota_version_t version)
706 {
707         uint tmp = LUSTRE_DQTREEOFF;
708
709         if (!dquot->dq_off)     /* Even not allocated? */
710                 return 0;
711         return remove_tree(dquot, &tmp, 0, version);
712 }
713
714 /**
715  * Find entry in block
716  */
717 static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
718                                  lustre_quota_version_t version)
719 {
720         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
721         dqbuf_t buf = getdqbuf();
722         loff_t ret = 0;
723         int i;
724         struct lustre_disk_dqblk_v2 *ddquot =
725                 (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
726         int dqblk_sz = lustre_disk_dqblk_sz[version];
727         int dqstrinblk = lustre_dqstrinblk[version];
728
729         LASSERT(version == LUSTRE_QUOTA_V2);
730
731         if (!buf)
732                 return -ENOMEM;
733         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
734                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
735                 goto out_buf;
736         }
737         if (dquot->dq_id)
738                 for (i = 0; i < dqstrinblk && 
739                      le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
740                      i++) ;
741         else {                  /* ID 0 as a bit more complicated searching... */
742                 for (i = 0; i < dqstrinblk; i++)
743                         if (!le32_to_cpu(ddquot[i].dqb_id)
744                             && memcmp((char *)&emptydquot[version],
745                                       (char *)&ddquot[i], dqblk_sz))
746                                 break;
747         }
748         if (i == dqstrinblk) {
749                 CDEBUG(D_ERROR,
750                        "VFS: Quota for id %u referenced but not present.\n",
751                        dquot->dq_id);
752                 ret = -EIO;
753                 goto out_buf;
754         } else
755                 ret =
756                     (blk << LUSTRE_DQBLKSIZE_BITS) +
757                     sizeof(struct lustre_disk_dqdbheader) +
758                     i * dqblk_sz;
759 out_buf:
760         freedqbuf(buf);
761         return ret;
762 }
763
764 /**
765  * Find entry for given id in the tree
766  */
767 static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth,
768                                 lustre_quota_version_t version)
769 {
770         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
771         dqbuf_t buf = getdqbuf();
772         loff_t ret = 0;
773         u32 *ref = (u32 *) buf;
774
775         if (!buf)
776                 return -ENOMEM;
777         if ((ret = read_blk(filp, NULL, 0, blk, buf)) < 0) {
778                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
779                 goto out_buf;
780         }
781         ret = 0;
782         blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
783         if (!blk)               /* No reference? */
784                 goto out_buf;
785         if (depth < LUSTRE_DQTREEDEPTH - 1)
786                 ret = find_tree_dqentry(dquot, blk, depth + 1, version);
787         else
788                 ret = find_block_dqentry(dquot, blk, version);
789 out_buf:
790         freedqbuf(buf);
791         return ret;
792 }
793
794 /**
795  * Find entry for given id in the tree - wrapper function
796  */
797 static inline loff_t find_dqentry(struct lustre_dquot *dquot,
798                                   lustre_quota_version_t version)
799 {
800         return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
801 }
802
803 int lustre_read_dquot(struct lustre_dquot *dquot)
804 {
805         int type = dquot->dq_type;
806         struct file *filp;
807         loff_t offset;
808         int ret = 0, dqblk_sz;
809         lustre_quota_version_t version;
810
811         /* Invalidated quota? */
812         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
813                 CDEBUG(D_ERROR, "VFS: Quota invalidated while reading!\n");
814                 return -ESRCH;
815         }
816
817         version = dquot->dq_info->qi_version;
818         LASSERT(version == LUSTRE_QUOTA_V2);
819         dqblk_sz = lustre_disk_dqblk_sz[version];
820
821         offset = find_dqentry(dquot, version);
822         if (offset <= 0) {      /* Entry not present? */
823                 if (offset < 0)
824                         CDEBUG(D_ERROR,
825                                "VFS: Can't read quota structure for id %u.\n",
826                                dquot->dq_id);
827                 dquot->dq_off = 0;
828                 cfs_set_bit(DQ_FAKE_B, &dquot->dq_flags);
829                 memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
830                 ret = offset;
831         } else {
832                 struct lustre_disk_dqblk_v2 ddquot;
833
834                 dquot->dq_off = offset;
835                 if ((ret = lustre_read_quota(filp, NULL, type, (char *)&ddquot,
836                                              dqblk_sz, offset)) != dqblk_sz) {
837                         if (ret >= 0)
838                                 ret = -EIO;
839                         CDEBUG(D_ERROR,
840                                "VFS: Error while reading quota structure for id "
841                                "%u.\n", dquot->dq_id);
842                         memset((char *)&ddquot, 0, dqblk_sz);
843                 } else {
844                         ret = 0;
845                         /* We need to escape back all-zero structure */
846                         if (!memcmp((char *)&fakedquot[version],
847                                     (char *)&ddquot, dqblk_sz))
848                                 ddquot.dqb_itime = cpu_to_le64(0);
849                 }
850                 disk2memdqb(&dquot->dq_dqb, &ddquot, version);
851         }
852
853         return ret;
854 }
855 EXPORT_SYMBOL(lustre_read_dquot);
856
857 /**
858  * Commit changes of dquot to disk - it might also mean deleting
859  * it when quota became fake.
860  */
861 int lustre_commit_dquot(struct lustre_dquot *dquot)
862 {
863         int rc = 0;
864         lustre_quota_version_t version = dquot->dq_info->qi_version;
865         void *handle;
866         struct inode *inode = dquot->dq_info->qi_files[dquot->dq_type]->f_dentry->d_inode;
867         int delete = 0;
868
869         /* always clear the flag so we don't loop on an IO error... */
870         cfs_clear_bit(DQ_MOD_B, &dquot->dq_flags);
871
872         /* The block/inode usage in admin quotafile isn't the real usage
873          * over all cluster, so keep the fake dquot entry on disk is
874          * meaningless, just remove it */
875         if (cfs_test_bit(DQ_FAKE_B, &dquot->dq_flags))
876                 delete = 1;
877         handle = lustre_quota_journal_start(inode, delete);
878         if (unlikely(IS_ERR(handle))) {
879                 rc = PTR_ERR(handle);
880                 CERROR("fail to lustre_quota_journal_start: rc = %d\n", rc);
881                 return rc;
882         }
883
884         if (delete)
885                 rc = lustre_delete_dquot(dquot, version);
886         else
887                 rc = lustre_write_dquot(dquot, version);
888         lustre_quota_journal_stop(handle);
889
890         if (rc < 0)
891                 return rc;
892
893         if (lustre_info_dirty(&dquot->dq_info->qi_info[dquot->dq_type]))
894                 rc = lustre_write_quota_info(dquot->dq_info, dquot->dq_type);
895
896         return rc;
897 }
898 EXPORT_SYMBOL(lustre_commit_dquot);
899
900 int lustre_init_quota_header(struct lustre_quota_info *lqi, int type,
901                              int fakemagics)
902 {
903         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
904         static const uint fake_magics[] = LUSTRE_BADQMAGICS;
905         const uint* quota_versions = lustre_initqversions[lqi->qi_version];
906         struct lustre_disk_dqheader dqhead;
907         ssize_t size;
908         struct file *fp = lqi->qi_files[type];
909         int rc = 0;
910
911         /* write quotafile header */
912         dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
913                                        fake_magics[type] : quota_magics[type]);
914         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
915         size = lustre_write_quota(fp, (char *)&dqhead,
916                                   sizeof(struct lustre_disk_dqheader), 0);
917
918         if (size != sizeof(struct lustre_disk_dqheader)) {
919                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
920                 rc = size;
921         }
922
923         return rc;
924 }
925
926 /**
927  * We need to export this function to initialize quotafile, because we haven't
928  * user level check utility
929  */
930 int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
931                                    int fakemagics)
932 {
933         struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
934         int rc;
935
936         rc = lustre_init_quota_header(lqi, type, fakemagics);
937         if (rc)
938                 return rc;
939
940         /* write init quota info */
941         memset(dqinfo, 0, sizeof(*dqinfo));
942         dqinfo->dqi_bgrace = MAX_DQ_TIME;
943         dqinfo->dqi_igrace = MAX_IQ_TIME;
944         dqinfo->dqi_blocks = LUSTRE_DQTREEOFF + 1;
945
946         return lustre_write_quota_info(lqi, type);
947 }
948
949 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
950 {
951         return lustre_init_quota_info_generic(lqi, type, 0);
952 }
953 EXPORT_SYMBOL(lustre_init_quota_info);
954
955 static int walk_block_dqentry(struct file *filp, struct inode *inode, int type,
956                               uint blk, cfs_list_t *list)
957 {
958         dqbuf_t buf = getdqbuf();
959         loff_t ret = 0;
960         struct lustre_disk_dqdbheader *dqhead =
961             (struct lustre_disk_dqdbheader *)buf;
962         struct dqblk *blk_item;
963         struct dqblk *pos;
964         cfs_list_t *tmp;
965
966         if (!buf)
967                 return -ENOMEM;
968         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
969                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
970                 goto out_buf;
971         }
972         ret = 0;
973
974         if (!le32_to_cpu(dqhead->dqdh_entries))
975                 goto out_buf;
976
977         if (cfs_list_empty(list)) {
978                 tmp = list;
979                 goto done;
980         }
981
982         cfs_list_for_each_entry(pos, list, link) {
983                 if (blk == pos->blk)    /* we got this blk already */
984                         goto out_buf;
985                 if (blk > pos->blk)
986                         continue;
987                 break;
988         }
989         tmp = &pos->link;
990 done:
991         blk_item = kmalloc(sizeof(*blk_item), GFP_NOFS);
992         if (!blk_item) {
993                 ret = -ENOMEM;
994                 goto out_buf;
995         }
996         blk_item->blk = blk;
997         CFS_INIT_LIST_HEAD(&blk_item->link);
998
999         cfs_list_add_tail(&blk_item->link, tmp);
1000
1001 out_buf:
1002         freedqbuf(buf);
1003         return ret;
1004 }
1005
1006 int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
1007                       uint blk, int depth, cfs_list_t *list)
1008 {
1009         dqbuf_t buf = getdqbuf();
1010         loff_t ret = 0;
1011         int index;
1012         u32 *ref = (u32 *) buf;
1013
1014         if (!buf)
1015                 return -ENOMEM;
1016         if ((ret = read_blk(filp, inode, type, blk, buf)) < 0) {
1017                 CERROR("VFS: Can't read quota tree block %u.\n", blk);
1018                 goto out_buf;
1019         }
1020         ret = 0;
1021
1022         for (index = 0; index <= 0xff && !ret; index++) {
1023                 blk = le32_to_cpu(ref[index]);
1024                 if (!blk)       /* No reference */
1025                         continue;
1026
1027                 if (depth < LUSTRE_DQTREEDEPTH - 1)
1028                         ret = walk_tree_dqentry(filp, inode, type, blk,
1029                                                 depth + 1, list);
1030                 else
1031                         ret = walk_block_dqentry(filp, inode, type, blk, list);
1032         }
1033 out_buf:
1034         freedqbuf(buf);
1035         return ret;
1036 }
1037
1038 /**
1039  * Walk through the quota file (v2 format) to get all ids with quota limit
1040  */
1041 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
1042                     cfs_list_t *list)
1043 {
1044         cfs_list_t blk_list;
1045         struct dqblk *blk_item, *tmp;
1046         dqbuf_t buf = NULL;
1047         struct lustre_disk_dqblk_v2 *ddquot;
1048         int rc;
1049         lustre_quota_version_t version;
1050
1051         ENTRY;
1052
1053         LASSERT(ergo(fp == NULL, inode != NULL));
1054
1055         if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
1056                 version = LUSTRE_QUOTA_V2;
1057         else {
1058                 CDEBUG(D_ERROR, "unknown quota file format!\n");
1059                 RETURN(-EINVAL);
1060         }
1061
1062         if (!cfs_list_empty(list)) {
1063                 CDEBUG(D_ERROR, "not empty list\n");
1064                 RETURN(-EINVAL);
1065         }
1066
1067         CFS_INIT_LIST_HEAD(&blk_list);
1068         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
1069         if (rc) {
1070                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
1071                 GOTO(out_free, rc);
1072         }
1073         if (cfs_list_empty(&blk_list))
1074                 RETURN(0);
1075
1076         buf = getdqbuf();
1077         if (!buf)
1078                 RETURN(-ENOMEM);
1079         ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
1080
1081         cfs_list_for_each_entry(blk_item, &blk_list, link) {
1082                 loff_t ret = 0;
1083                 int i, dqblk_sz = lustre_disk_dqblk_sz[version];
1084
1085                 memset(buf, 0, LUSTRE_DQBLKSIZE);
1086                 if ((ret = read_blk(fp, inode, type, blk_item->blk, buf)) < 0) {
1087                         CERROR("VFS: Can't read quota tree block %u.\n",
1088                                blk_item->blk);
1089                         GOTO(out_free, rc = ret);
1090                 }
1091
1092                 for (i = 0; i < lustre_dqstrinblk[version]; i++) {
1093                         struct dquot_id *dqid;
1094                         /* skip empty entry */
1095                         if (!memcmp((char *)&emptydquot[version],
1096                                     (char *)&ddquot[i], dqblk_sz))
1097                                 continue;
1098
1099                         OBD_ALLOC_GFP(dqid, sizeof(*dqid), CFS_ALLOC_NOFS);
1100                         if (!dqid)
1101                                 GOTO(out_free, rc = -ENOMEM);
1102
1103                         dqid->di_id    = le32_to_cpu(ddquot[i].dqb_id);
1104                         dqid->di_flag  = le64_to_cpu(ddquot[i].dqb_ihardlimit) ?
1105                                          QI_SET : 0;
1106                         dqid->di_flag |= le64_to_cpu(ddquot[i].dqb_bhardlimit) ?
1107                                          QB_SET : 0;
1108
1109                         CFS_INIT_LIST_HEAD(&dqid->di_link);
1110                         cfs_list_add(&dqid->di_link, list);
1111                 }
1112         }
1113
1114 out_free:
1115         cfs_list_for_each_entry_safe(blk_item, tmp, &blk_list, link) {
1116                 cfs_list_del_init(&blk_item->link);
1117                 kfree(blk_item);
1118         }
1119         if (buf)
1120                 freedqbuf(buf);
1121
1122         RETURN(rc);
1123 }
1124 EXPORT_SYMBOL(lustre_get_qids);