Whamcloud - gitweb
2b7635f58fb68bd69f71618690fe10bc43fcedca
[fs/lustre-release.git] / lustre / quota / quota_context.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/quota/quota_context.c
37  *
38  * Lustre Quota Context
39  *
40  * Author: Niu YaWei <niu@clusterfs.com>
41  */
42
43 #ifndef EXPORT_SYMTAB
44 # define EXPORT_SYMTAB
45 #endif
46
47 #define DEBUG_SUBSYSTEM S_MDS
48
49 #include <linux/version.h>
50 #include <linux/fs.h>
51 #include <asm/unistd.h>
52 #include <linux/slab.h>
53 #include <linux/quotaops.h>
54 #include <linux/module.h>
55 #include <linux/init.h>
56
57 #include <obd_class.h>
58 #include <lustre_quota.h>
59 #include <lustre_fsfilt.h>
60 #include "quota_internal.h"
61
62 unsigned long default_bunit_sz = 100 * 1024 * 1024;       /* 100M bytes */
63 unsigned long default_btune_ratio = 50;                   /* 50 percentage */
64 unsigned long default_iunit_sz = 5000;       /* 5000 inodes */
65 unsigned long default_itune_ratio = 50;      /* 50 percentage */
66
67 cfs_mem_cache_t *qunit_cachep = NULL;
68 struct list_head qunit_hash[NR_DQHASH];
69 spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED;
70
71 struct lustre_qunit {
72         struct list_head lq_hash;               /* Hash list in memory */
73         atomic_t lq_refcnt;                     /* Use count */
74         struct lustre_quota_ctxt *lq_ctxt;      /* Quota context this applies to */
75         struct qunit_data lq_data;              /* See qunit_data */
76         unsigned int lq_opc;                    /* QUOTA_DQACQ, QUOTA_DQREL */
77         struct list_head lq_waiters;            /* All write threads waiting for this qunit */
78 };
79
80 int should_translate_quota (struct obd_import *imp)
81 {
82         ENTRY;
83
84         LASSERT(imp);
85         if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && 
86             !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT))
87                 RETURN(0);
88         else
89                 RETURN(1);
90 }
91
92 void qunit_cache_cleanup(void)
93 {
94         int i;
95         ENTRY;
96
97         spin_lock(&qunit_hash_lock);
98         for (i = 0; i < NR_DQHASH; i++)
99                 LASSERT(list_empty(qunit_hash + i));
100         spin_unlock(&qunit_hash_lock);
101
102         if (qunit_cachep) {
103                 int rc;
104                 rc = cfs_mem_cache_destroy(qunit_cachep);
105                 LASSERTF(rc == 0, "couldn't destroy qunit_cache slab\n");
106                 qunit_cachep = NULL;
107         }
108         EXIT;
109 }
110
111 int qunit_cache_init(void)
112 {
113         int i;
114         ENTRY;
115
116         LASSERT(qunit_cachep == NULL);
117         qunit_cachep = cfs_mem_cache_create("ll_qunit_cache",
118                                             sizeof(struct lustre_qunit),
119                                             0, 0);
120         if (!qunit_cachep)
121                 RETURN(-ENOMEM);
122
123         spin_lock(&qunit_hash_lock);
124         for (i = 0; i < NR_DQHASH; i++)
125                 CFS_INIT_LIST_HEAD(qunit_hash + i);
126         spin_unlock(&qunit_hash_lock);
127         RETURN(0);
128 }
129
130 static inline int
131 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
132              __attribute__((__const__));
133
134 static inline int
135 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
136 {
137         unsigned int id = qdata->qd_id;
138         unsigned int type = qdata->qd_flags & QUOTA_IS_GRP;
139
140         unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
141         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
142         return tmp;
143 }
144
145 /* compute the remaining quota for certain gid or uid b=11693 */
146 int compute_remquota(struct obd_device *obd,
147                      struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
148 {
149         struct super_block *sb = qctxt->lqc_sb;
150         __u64 usage, limit;
151         struct obd_quotactl *qctl;
152         int ret = QUOTA_RET_OK;
153         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
154         ENTRY;
155
156         if (!sb_any_quota_enabled(sb))
157                 RETURN(QUOTA_RET_NOQUOTA);
158
159         /* ignore root user */
160         if (qdata->qd_id == 0 && qdata_type == USRQUOTA)
161                 RETURN(QUOTA_RET_NOLIMIT);
162
163         OBD_ALLOC_PTR(qctl);
164         if (qctl == NULL) 
165                 RETURN(-ENOMEM);
166
167         /* get fs quota usage & limit */
168         qctl->qc_cmd = Q_GETQUOTA;
169         qctl->qc_id = qdata->qd_id;
170         qctl->qc_type = qdata_type;
171         ret = fsfilt_quotactl(obd, sb, qctl);
172         if (ret) {
173                 if (ret == -ESRCH)      /* no limit */
174                         ret = QUOTA_RET_NOLIMIT;
175                 else
176                         CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", 
177                                ret);
178                 GOTO(out, ret);
179         }
180
181         usage = qctl->qc_dqblk.dqb_curspace;
182         limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
183         if (!limit){            /* no limit */
184                 ret = QUOTA_RET_NOLIMIT;
185                 GOTO(out, ret);
186         }
187
188         if (limit >= usage)
189                 qdata->qd_count = limit - usage;
190         else
191                 qdata->qd_count = 0;
192         EXIT;
193 out:
194         OBD_FREE_PTR(qctl);
195         return ret;
196 }
197
198 /* caller must hold qunit_hash_lock */
199 static inline struct lustre_qunit *find_qunit(unsigned int hashent,
200                                               struct lustre_quota_ctxt *qctxt,
201                                               struct qunit_data *qdata)
202 {
203         struct lustre_qunit *qunit = NULL;
204         struct qunit_data *tmp;
205
206         LASSERT_SPIN_LOCKED(&qunit_hash_lock);
207         list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) {
208                 tmp = &qunit->lq_data;
209                 if (qunit->lq_ctxt == qctxt &&
210                     qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags)
211                         return qunit;
212         }
213         return NULL;
214 }
215
216 /* check_cur_qunit - check the current usage of qunit.
217  * @qctxt: quota context
218  * @qdata: the type of quota unit to be checked
219  *
220  * return: 1 - need acquire qunit;
221  *         2 - need release qunit;
222  *         0 - need do nothing.
223  *       < 0 - error.
224  */
225 static int
226 check_cur_qunit(struct obd_device *obd,
227                 struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
228 {
229         struct super_block *sb = qctxt->lqc_sb;
230         unsigned long qunit_sz, tune_sz;
231         __u64 usage, limit;
232         struct obd_quotactl *qctl;
233         int ret = 0;
234         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
235         __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
236         ENTRY;
237
238         if (!sb_any_quota_enabled(sb))
239                 RETURN(0);
240
241         OBD_ALLOC_PTR(qctl);
242         if (qctl == NULL)
243                 RETURN(-ENOMEM);
244
245         /* get fs quota usage & limit */
246         qctl->qc_cmd = Q_GETQUOTA;
247         qctl->qc_id = qdata->qd_id;
248         qctl->qc_type = qdata_type;
249         ret = fsfilt_quotactl(obd, sb, qctl);
250         if (ret) {
251                 if (ret == -ESRCH)      /* no limit */
252                         ret = 0;
253                 else
254                         CERROR("can't get fs quota usage! (rc:%d)\n", ret);
255                 GOTO(out, ret);
256         }
257
258         if (is_blk) {
259                 usage = qctl->qc_dqblk.dqb_curspace;
260                 limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
261                 qunit_sz = qctxt->lqc_bunit_sz;
262                 tune_sz = qctxt->lqc_btune_sz;
263
264                 LASSERT(!(qunit_sz % QUOTABLOCK_SIZE));
265         } else {
266                 usage = qctl->qc_dqblk.dqb_curinodes;
267                 limit = qctl->qc_dqblk.dqb_ihardlimit;
268                 qunit_sz = qctxt->lqc_iunit_sz;
269                 tune_sz = qctxt->lqc_itune_sz;
270         }
271
272         /* ignore the no quota limit case */
273         if (!limit)
274                 GOTO(out, ret = 0);
275
276         /* we don't count the MIN_QLIMIT */
277         if ((limit == MIN_QLIMIT && !is_blk) ||
278             (toqb(limit) == MIN_QLIMIT && is_blk))
279                 limit = 0;
280
281         LASSERT(qdata->qd_count == 0);
282         if (limit <= usage + tune_sz) {
283                 while (qdata->qd_count + limit <= usage + tune_sz)
284                         qdata->qd_count += qunit_sz;
285                 ret = 1;
286         } else if (limit > usage + qunit_sz + tune_sz) {
287                 while (limit - qdata->qd_count > usage + qunit_sz + tune_sz)
288                         qdata->qd_count += qunit_sz;
289                 ret = 2;
290         }
291         LASSERT(ret == 0 || qdata->qd_count);
292         EXIT;
293 out:
294         OBD_FREE_PTR(qctl);
295         return ret;
296 }
297
298 /* caller must hold qunit_hash_lock */
299 static struct lustre_qunit *dqacq_in_flight(struct lustre_quota_ctxt *qctxt,
300                                             struct qunit_data *qdata)
301 {
302         unsigned int hashent = qunit_hashfn(qctxt, qdata);
303         struct lustre_qunit *qunit;
304         ENTRY;
305
306         LASSERT_SPIN_LOCKED(&qunit_hash_lock);
307         qunit = find_qunit(hashent, qctxt, qdata);
308         RETURN(qunit);
309 }
310
311 static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt,
312                                         struct qunit_data *qdata, int opc)
313 {
314         struct lustre_qunit *qunit = NULL;
315         ENTRY;
316
317         OBD_SLAB_ALLOC(qunit, qunit_cachep, CFS_ALLOC_IO, sizeof(*qunit));
318         if (qunit == NULL)
319                 RETURN(NULL);
320
321         CFS_INIT_LIST_HEAD(&qunit->lq_hash);
322         CFS_INIT_LIST_HEAD(&qunit->lq_waiters);
323         atomic_set(&qunit->lq_refcnt, 1);
324         qunit->lq_ctxt = qctxt;
325         memcpy(&qunit->lq_data, qdata, sizeof(*qdata));
326         qunit->lq_opc = opc;
327
328         RETURN(qunit);
329 }
330
331 static inline void free_qunit(struct lustre_qunit *qunit)
332 {
333         OBD_SLAB_FREE(qunit, qunit_cachep, sizeof(*qunit));
334 }
335
336 static inline void qunit_get(struct lustre_qunit *qunit)
337 {
338         atomic_inc(&qunit->lq_refcnt);
339 }
340
341 static void qunit_put(struct lustre_qunit *qunit)
342 {
343         LASSERT(atomic_read(&qunit->lq_refcnt));
344         if (atomic_dec_and_test(&qunit->lq_refcnt))
345                 free_qunit(qunit);
346 }
347
348 static void
349 insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit)
350 {
351         struct list_head *head;
352
353         LASSERT(list_empty(&qunit->lq_hash));
354         head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data);
355         list_add(&qunit->lq_hash, head);
356 }
357
358 static void remove_qunit_nolock(struct lustre_qunit *qunit)
359 {
360         LASSERT(!list_empty(&qunit->lq_hash));
361         list_del_init(&qunit->lq_hash);
362 }
363
364 struct qunit_waiter {
365         struct list_head qw_entry;
366         cfs_waitq_t      qw_waitq;
367         int qw_rc;
368 };
369
370 #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
371                                  (limit = count) : (limit += count)
372
373
374 /* FIXME check if this mds is the master of specified id */
375 static int
376 is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
377           unsigned int id, int type)
378 {
379         return qctxt->lqc_handler ? 1 : 0;
380 }
381
382 static int
383 schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
384                struct qunit_data *qdata, int opc, int wait);
385
386 static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
387                                        struct qunit_data *qdata, int opc, int wait)
388 {
389         int rc = 0;
390         unsigned long factor;
391         struct qunit_data tmp_qdata;
392         ENTRY;
393
394         LASSERT(qdata && qdata->qd_count);
395         QDATA_DEBUG(qdata, "%s quota split.\n",
396                     (qdata->qd_flags & QUOTA_IS_BLOCK) ? "block" : "inode");
397         if (qdata->qd_flags & QUOTA_IS_BLOCK)
398                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * 
399                         qctxt->lqc_bunit_sz;
400         else
401                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * 
402                         qctxt->lqc_iunit_sz;
403
404         if (qctxt->lqc_import && should_translate_quota(qctxt->lqc_import) &&
405             qdata->qd_count > factor) {
406                         tmp_qdata = *qdata;
407                 tmp_qdata.qd_count = factor;
408                         qdata->qd_count -= tmp_qdata.qd_count;
409                 QDATA_DEBUG((&tmp_qdata), "be split.\n");
410                 rc = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait);
411         } else{
412                 QDATA_DEBUG(qdata, "don't be split.\n");
413                 rc = schedule_dqacq(obd, qctxt, qdata, opc, wait);
414         }
415
416         RETURN(rc);
417 }
418
419 static int
420 dqacq_completion(struct obd_device *obd,
421                  struct lustre_quota_ctxt *qctxt,
422                  struct qunit_data *qdata, int rc, int opc)
423 {
424         struct lustre_qunit *qunit = NULL;
425         struct super_block *sb = qctxt->lqc_sb;
426         unsigned long qunit_sz;
427         struct qunit_waiter *qw, *tmp;
428         int err = 0;
429         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
430         __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
431         __u64 qd_tmp = qdata->qd_count;
432         unsigned long div_r;
433         ENTRY;
434
435         LASSERT(qdata);
436         qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
437         div_r = do_div(qd_tmp, qunit_sz);
438         LASSERTF(!div_r, "qunit_sz: %lu, return qunit_sz: "LPU64"\n",
439                  qunit_sz, qd_tmp);
440
441         /* update local operational quota file */
442         if (rc == 0) {
443                 __u32 count = QUSG(qdata->qd_count, is_blk);
444                 struct obd_quotactl *qctl;
445                 __u64 *hardlimit;
446
447                 OBD_ALLOC_PTR(qctl);
448                 if (qctl == NULL)
449                         GOTO(out, err = -ENOMEM);
450
451                 /* acq/rel qunit for specified uid/gid is serialized,
452                  * so there is no race between get fs quota limit and
453                  * set fs quota limit */
454                 qctl->qc_cmd = Q_GETQUOTA;
455                 qctl->qc_id = qdata->qd_id;
456                 qctl->qc_type = qdata_type;
457                 err = fsfilt_quotactl(obd, sb, qctl);
458                 if (err) {
459                         CERROR("error get quota fs limit! (rc:%d)\n", err);
460                         GOTO(out_mem, err);
461                 }
462
463                 if (is_blk) {
464                         qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
465                         hardlimit = &qctl->qc_dqblk.dqb_bhardlimit;
466                 } else {
467                         qctl->qc_dqblk.dqb_valid = QIF_ILIMITS;
468                         hardlimit = &qctl->qc_dqblk.dqb_ihardlimit;
469                 }
470
471                 switch (opc) {
472                 case QUOTA_DQACQ:
473                         CDEBUG(D_QUOTA, "%s(acq):count: %d, hardlimt: "LPU64 
474                                ",type: %s.\n", obd->obd_name, count, *hardlimit, 
475                                qdata_type ? "grp": "usr");
476                         INC_QLIMIT(*hardlimit, count);
477                         break;
478                 case QUOTA_DQREL:
479                         CDEBUG(D_QUOTA, "%s(rel):count: %d, hardlimt: "LPU64 
480                                ",type: %s.\n", obd->obd_name, count, *hardlimit, 
481                                qdata_type ? "grp": "usr");
482                         LASSERTF(count < *hardlimit, 
483                                  "count: %d, hardlimit: "LPU64".\n", 
484                                  count, *hardlimit);
485                         *hardlimit -= count;
486                         break;
487                 default:
488                         LBUG();
489                 }
490
491                 /* clear quota limit */
492                 if (count == 0)
493                         *hardlimit = 0;
494
495                 qctl->qc_cmd = Q_SETQUOTA;
496                 err = fsfilt_quotactl(obd, sb, qctl);
497                 if (err)
498                         CERROR("error set quota fs limit! (rc:%d)\n", err);
499
500                 QDATA_DEBUG(qdata, "%s completion\n",
501                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
502 out_mem:
503                 OBD_FREE_PTR(qctl);
504         } else if (rc == -EDQUOT) {
505                 QDATA_DEBUG(qdata, "acquire qunit got EDQUOT.\n");
506         } else if (rc == -EBUSY) {
507                 QDATA_DEBUG(qdata, "it's is recovering, got EBUSY.\n");
508         } else {
509                 CERROR("acquire qunit got error! (rc:%d)\n", rc);
510         }
511 out:
512         /* remove the qunit from hash */
513         spin_lock(&qunit_hash_lock);
514
515         qunit = dqacq_in_flight(qctxt, qdata);
516         /* this qunit has been removed by qctxt_cleanup() */
517         if (!qunit) {
518                 spin_unlock(&qunit_hash_lock);
519                 RETURN(err);
520         }
521
522         LASSERT(opc == qunit->lq_opc);
523         remove_qunit_nolock(qunit);
524
525         /* wake up all waiters */
526         list_for_each_entry_safe(qw, tmp, &qunit->lq_waiters, qw_entry) {
527                 list_del_init(&qw->qw_entry);
528                 qw->qw_rc = rc;
529                 wake_up(&qw->qw_waitq);
530         }
531
532         spin_unlock(&qunit_hash_lock);
533
534         qunit_put(qunit);
535
536         /* don't reschedule in such cases:
537          *   - acq/rel failure, but not for quota recovery.
538          *   - local dqacq/dqrel.
539          *   - local disk io failure.
540          */
541         if (err || (rc && rc != -EBUSY) || 
542             is_master(obd, qctxt, qdata->qd_id, qdata_type))
543                 RETURN(err);
544
545         /* reschedule another dqacq/dqrel if needed */
546         qdata->qd_count = 0;
547         rc = check_cur_qunit(obd, qctxt, qdata);
548         if (rc > 0) {
549                 int opc;
550                 opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
551                 rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0);
552                 QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc);
553         }
554         RETURN(err);
555 }
556
557 struct dqacq_async_args {
558         struct lustre_quota_ctxt *aa_ctxt;
559         struct lustre_qunit *aa_qunit;
560 };
561
562 static int dqacq_interpret(const struct lu_env *env,
563                            struct ptlrpc_request *req, void *data, int rc)
564 {
565         struct dqacq_async_args *aa = (struct dqacq_async_args *)data;
566         struct lustre_quota_ctxt *qctxt = aa->aa_ctxt;
567         struct lustre_qunit *qunit = aa->aa_qunit;
568         struct obd_device *obd = req->rq_import->imp_obd;
569         struct qunit_data *qdata = NULL;
570         struct qunit_data_old *qdata_old = NULL;
571         ENTRY;
572
573         LASSERT(req);
574         LASSERT(req->rq_import);
575
576         if ((req->rq_import->imp_connect_data.ocd_connect_flags &
577              OBD_CONNECT_QUOTA64) &&
578             !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
579                 CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
580
581                 qdata = req_capsule_server_swab_get(&req->rq_pill,
582                                                     &RMF_QUNIT_DATA,
583                                           (void*)lustre_swab_qdata);
584         } else {
585                 CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
586
587                 qdata = req_capsule_server_swab_get(&req->rq_pill,
588                                                     &RMF_QUNIT_DATA,
589                                        (void*)lustre_swab_qdata_old);
590                 qdata = lustre_quota_old_to_new(qdata_old);
591         }
592         if (qdata == NULL) {
593                 DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data");
594                 RETURN(-EPROTO);
595         }
596
597         LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
598                 (qdata->qd_flags & QUOTA_IS_GRP) ==
599                  (qunit->lq_data.qd_flags & QUOTA_IS_GRP) &&
600                 (qdata->qd_count == qunit->lq_data.qd_count ||
601                  qdata->qd_count == 0));
602
603         QDATA_DEBUG(qdata, "%s interpret rc(%d).\n",
604                     lustre_msg_get_opc(req->rq_reqmsg) == QUOTA_DQACQ ?
605                     "DQACQ" : "DQREL", rc);
606
607         rc = dqacq_completion(obd, qctxt, qdata, rc,
608                               lustre_msg_get_opc(req->rq_reqmsg));
609
610         RETURN(rc);
611 }
612
613 static int got_qunit(struct qunit_waiter *waiter)
614 {
615         int rc = 0;
616         ENTRY;
617         spin_lock(&qunit_hash_lock);
618         rc = list_empty(&waiter->qw_entry);
619         spin_unlock(&qunit_hash_lock);
620         RETURN(rc);
621 }
622
623 static int
624 schedule_dqacq(struct obd_device *obd,
625                struct lustre_quota_ctxt *qctxt,
626                struct qunit_data *qdata, int opc, int wait)
627 {
628         struct lustre_qunit *qunit, *empty;
629         struct qunit_waiter qw;
630         struct l_wait_info lwi = { 0 };
631         struct ptlrpc_request *req;
632         struct qunit_data *reqdata;
633         struct dqacq_async_args *aa;
634         unsigned long factor;   
635         int rc = 0;
636         ENTRY;
637
638         CFS_INIT_LIST_HEAD(&qw.qw_entry);
639         init_waitqueue_head(&qw.qw_waitq);
640         qw.qw_rc = 0;
641
642         if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL)
643                 RETURN(-ENOMEM);
644
645         spin_lock(&qunit_hash_lock);
646
647         qunit = dqacq_in_flight(qctxt, qdata);
648         if (qunit) {
649                 if (wait)
650                         list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
651                 spin_unlock(&qunit_hash_lock);
652
653                 free_qunit(empty);
654                 goto wait_completion;
655         }
656         qunit = empty;
657         insert_qunit_nolock(qctxt, qunit);
658         if (wait)
659                 list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
660         spin_unlock(&qunit_hash_lock);
661
662         LASSERT(qunit);
663
664         /* master is going to dqacq/dqrel from itself */
665         if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP))
666         {
667                 int rc2;
668                 QDATA_DEBUG(qdata, "local %s.\n",
669                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
670                 rc = qctxt->lqc_handler(obd, qdata, opc);
671                 rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc);
672                 RETURN((rc && rc != -EDQUOT) ? rc : rc2);
673         }
674
675         /* build dqacq/dqrel request */
676         LASSERT(qctxt->lqc_import);
677
678         req = ptlrpc_request_alloc_pack(qctxt->lqc_import, &RQF_MDS_QUOTA_DQACQ,
679                                         LUSTRE_MDS_VERSION, opc);
680         if (req == NULL) {
681                 dqacq_completion(obd, qctxt, qdata, -ENOMEM, opc);
682                 RETURN(-ENOMEM);
683         }
684
685         if (qdata->qd_flags & QUOTA_IS_BLOCK)
686                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * 
687                          qctxt->lqc_bunit_sz;
688         else
689                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * 
690                          qctxt->lqc_iunit_sz;
691
692         LASSERT(!should_translate_quota(qctxt->lqc_import) || 
693                 qdata->qd_count <= factor);
694         if (should_translate_quota(qctxt->lqc_import))
695         {
696                 struct qunit_data_old *reqdata_old, *tmp;
697                         
698                 reqdata_old = req_capsule_client_get(&req->rq_pill,
699                                                      &RMF_QUNIT_DATA);
700
701                 tmp = lustre_quota_new_to_old(qdata);
702                 *reqdata_old = *tmp;
703                 req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
704                                      sizeof(*reqdata_old));
705                 CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
706         } else {
707                 reqdata = req_capsule_client_get(&req->rq_pill,
708                                                  &RMF_QUNIT_DATA);
709
710                 *reqdata = *qdata;
711                 req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
712                                      sizeof(*reqdata));
713                 CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
714         }
715         ptlrpc_request_set_replen(req);
716
717         CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
718         aa = (struct dqacq_async_args *)&req->rq_async_args;
719         aa->aa_ctxt = qctxt;
720         aa->aa_qunit = qunit;
721
722         req->rq_interpret_reply = dqacq_interpret;
723         ptlrpcd_add_req(req);
724
725         QDATA_DEBUG(qdata, "%s scheduled.\n",
726                     opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
727 wait_completion:
728         if (wait && qunit) {
729                 struct qunit_data *p = &qunit->lq_data;
730                 QDATA_DEBUG(p, "wait for dqacq.\n");
731
732                 l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
733                 if (qw.qw_rc == 0)
734                         rc = -EAGAIN;
735
736                 CDEBUG(D_QUOTA, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
737         }
738         RETURN(rc);
739 }
740
741 int
742 qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
743                    uid_t uid, gid_t gid, __u32 isblk, int wait)
744 {
745         int ret, rc = 0, i = USRQUOTA;
746         __u32 id[MAXQUOTAS] = { uid, gid };
747         struct qunit_data qdata[MAXQUOTAS];
748         ENTRY;
749
750         CLASSERT(MAXQUOTAS < 4);
751         if (!sb_any_quota_enabled(qctxt->lqc_sb))
752                 RETURN(0);
753
754         for (i = 0; i < MAXQUOTAS; i++) {
755                 qdata[i].qd_id = id[i];
756                 qdata[i].qd_flags = 0;
757                 qdata[i].qd_flags |= i;
758                 qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;        
759                 qdata[i].qd_count = 0;
760
761                 ret = check_cur_qunit(obd, qctxt, &qdata[i]);
762                 if (ret > 0) {
763                         int opc;
764                         /* need acquire or release */
765                         opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
766                         ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i], 
767                                                           opc, wait);
768                         if (!rc)
769                                 rc = ret;
770                 }
771         }
772
773         RETURN(rc);
774 }
775
776 int
777 qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id,
778                          unsigned short type, int isblk)
779 {
780         struct lustre_qunit *qunit = NULL;
781         struct qunit_waiter qw;
782         struct qunit_data qdata;
783         struct l_wait_info lwi = { 0 };
784         ENTRY;
785
786         CFS_INIT_LIST_HEAD(&qw.qw_entry);
787         init_waitqueue_head(&qw.qw_waitq);
788         qw.qw_rc = 0;
789
790         qdata.qd_id = id;
791         qdata.qd_flags = 0;
792         qdata.qd_flags |= type;
793         qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
794         qdata.qd_count = 0;
795
796         spin_lock(&qunit_hash_lock);
797
798         qunit = dqacq_in_flight(qctxt, &qdata);
799         if (qunit)
800                 list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
801
802         spin_unlock(&qunit_hash_lock);
803
804         if (qunit) {
805                 struct qunit_data *p = &qdata;
806                 QDATA_DEBUG(p, "wait for dqacq completion.\n");
807                 l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
808                 QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
809         }
810         RETURN(0);
811 }
812
813 int
814 qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
815            dqacq_handler_t handler)
816 {
817         int rc = 0;
818         ENTRY;
819
820         rc = ptlrpcd_addref();
821         if (rc)
822                 RETURN(rc);
823
824         qctxt->lqc_handler = handler;
825         qctxt->lqc_sb = sb;
826         qctxt->lqc_import = NULL;
827         qctxt->lqc_recovery = 0;
828         qctxt->lqc_atype = 0;
829         qctxt->lqc_status= 0;
830         qctxt->lqc_bunit_sz = default_bunit_sz;
831         qctxt->lqc_btune_sz = default_bunit_sz / 100 * default_btune_ratio;
832         qctxt->lqc_iunit_sz = default_iunit_sz;
833         qctxt->lqc_itune_sz = default_iunit_sz * default_itune_ratio / 100;
834
835         RETURN(0);
836 }
837
838 void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force)
839 {
840         struct lustre_qunit *qunit, *tmp;
841         struct qunit_waiter *qw, *tmp2;
842         int i;
843         ENTRY;
844
845         spin_lock(&qunit_hash_lock);
846
847         for (i = 0; i < NR_DQHASH; i++) {
848                 list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) {
849                         if (qunit->lq_ctxt != qctxt)
850                                 continue;
851
852                         remove_qunit_nolock(qunit);
853                         /* wake up all waiters */
854                         list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters,
855                                                  qw_entry) {
856                                 list_del_init(&qw->qw_entry);
857                                 qw->qw_rc = 0;
858                                 wake_up(&qw->qw_waitq);
859                         }
860                         qunit_put(qunit);
861                 }
862         }
863
864         spin_unlock(&qunit_hash_lock);
865
866         ptlrpcd_decref();
867
868         EXIT;
869 }
870
871 struct qslave_recov_thread_data {
872         struct obd_device *obd;
873         struct lustre_quota_ctxt *qctxt;
874         struct completion comp;
875 };
876
877 /* FIXME only recovery block quota by now */
878 static int qslave_recovery_main(void *arg)
879 {
880         struct qslave_recov_thread_data *data = arg;
881         struct obd_device *obd = data->obd;
882         struct lustre_quota_ctxt *qctxt = data->qctxt;
883         unsigned int type;
884         int rc = 0;
885         ENTRY;
886
887         ptlrpc_daemonize("qslave_recovd");
888
889         complete(&data->comp);
890
891         if (qctxt->lqc_recovery)
892                 RETURN(0);
893         qctxt->lqc_recovery = 1;
894
895         for (type = USRQUOTA; type < MAXQUOTAS; type++) {
896                 struct qunit_data qdata;
897                 struct quota_info *dqopt = sb_dqopt(qctxt->lqc_sb);
898                 struct list_head id_list;
899                 struct dquot_id *dqid, *tmp;
900                 int ret;
901
902                 LOCK_DQONOFF_MUTEX(dqopt);
903                 if (!sb_has_quota_enabled(qctxt->lqc_sb, type)) {
904                         UNLOCK_DQONOFF_MUTEX(dqopt);
905                         break;
906                 }
907
908                 LASSERT(dqopt->files[type] != NULL);
909                 CFS_INIT_LIST_HEAD(&id_list);
910 #ifndef KERNEL_SUPPORTS_QUOTA_READ 
911                 rc = fsfilt_qids(obd, dqopt->files[type], NULL, type, &id_list);
912 #else
913                 rc = fsfilt_qids(obd, NULL, dqopt->files[type], type, &id_list);
914 #endif
915                 UNLOCK_DQONOFF_MUTEX(dqopt);
916                 if (rc)
917                         CERROR("Get ids from quota file failed. (rc:%d)\n", rc);
918
919                 list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
920                         list_del_init(&dqid->di_link);
921                         /* skip slave recovery on itself */
922                         if (is_master(obd, qctxt, dqid->di_id, type))
923                                 goto free;
924                         if (rc && rc != -EBUSY)
925                                 goto free;
926
927                         qdata.qd_id = dqid->di_id;
928                         qdata.qd_flags = 0;
929                         qdata.qd_flags |= type;
930                         qdata.qd_flags |= QUOTA_IS_BLOCK;
931                         qdata.qd_count = 0;
932
933                         ret = check_cur_qunit(obd, qctxt, &qdata);
934                         if (ret > 0) {
935                                 int opc;
936                                 opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
937                                 rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0);
938                         } else
939                                 rc = 0;
940
941                         if (rc)
942                                 CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR,
943                                        "qslave recovery failed! (id:%d type:%d "
944                                        " rc:%d)\n", dqid->di_id, type, rc);
945 free:
946                         kfree(dqid);
947                 }
948         }
949
950         qctxt->lqc_recovery = 0;
951         RETURN(rc);
952 }
953
954 void
955 qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt)
956 {
957         struct qslave_recov_thread_data data;
958         int rc;
959         ENTRY;
960
961         if (!sb_any_quota_enabled(qctxt->lqc_sb))
962                 goto exit;
963
964         data.obd = obd;
965         data.qctxt = qctxt;
966         init_completion(&data.comp);
967
968         rc = kernel_thread(qslave_recovery_main, &data, CLONE_VM|CLONE_FILES);
969         if (rc < 0) {
970                 CERROR("Cannot start quota recovery thread: rc %d\n", rc);
971                 goto exit;
972         }
973         wait_for_completion(&data.comp);
974 exit:
975         EXIT;
976 }