Whamcloud - gitweb
b=16098
[fs/lustre-release.git] / lustre / quota / quota_context.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/quota/quota_context.c
37  *
38  * Lustre Quota Context
39  *
40  * Author: Niu YaWei <niu@clusterfs.com>
41  */
42
43 #ifndef EXPORT_SYMTAB
44 # define EXPORT_SYMTAB
45 #endif
46
47 #define DEBUG_SUBSYSTEM S_MDS
48
49 #include <linux/version.h>
50 #include <linux/fs.h>
51 #include <asm/unistd.h>
52 #include <linux/slab.h>
53 #include <linux/quotaops.h>
54 #include <linux/module.h>
55 #include <linux/init.h>
56
57 #include <obd_class.h>
58 #include <lustre_quota.h>
59 #include <lustre_fsfilt.h>
60 #include "quota_internal.h"
61
62 unsigned long default_bunit_sz = 100 * 1024 * 1024;       /* 100M bytes */
63 unsigned long default_btune_ratio = 50;                   /* 50 percentage */
64 unsigned long default_iunit_sz = 5000;       /* 5000 inodes */
65 unsigned long default_itune_ratio = 50;      /* 50 percentage */
66
67 cfs_mem_cache_t *qunit_cachep = NULL;
68 struct list_head qunit_hash[NR_DQHASH];
69 spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED;
70
71 struct lustre_qunit {
72         struct list_head lq_hash;               /* Hash list in memory */
73         atomic_t lq_refcnt;                     /* Use count */
74         struct lustre_quota_ctxt *lq_ctxt;      /* Quota context this applies to */
75         struct qunit_data lq_data;              /* See qunit_data */
76         unsigned int lq_opc;                    /* QUOTA_DQACQ, QUOTA_DQREL */
77         struct list_head lq_waiters;            /* All write threads waiting for this qunit */
78 };
79
80 int should_translate_quota (struct obd_import *imp)
81 {
82         ENTRY;
83
84         LASSERT(imp);
85         if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && 
86             !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT))
87                 RETURN(0);
88         else
89                 RETURN(1);
90 }
91
92 void qunit_cache_cleanup(void)
93 {
94         int i;
95         ENTRY;
96
97         spin_lock(&qunit_hash_lock);
98         for (i = 0; i < NR_DQHASH; i++)
99                 LASSERT(list_empty(qunit_hash + i));
100         spin_unlock(&qunit_hash_lock);
101
102         if (qunit_cachep) {
103                 int rc;
104                 rc = cfs_mem_cache_destroy(qunit_cachep);
105                 LASSERTF(rc == 0, "couldn't destory qunit_cache slab\n");
106                 qunit_cachep = NULL;
107         }
108         EXIT;
109 }
110
111 int qunit_cache_init(void)
112 {
113         int i;
114         ENTRY;
115
116         LASSERT(qunit_cachep == NULL);
117         qunit_cachep = cfs_mem_cache_create("ll_qunit_cache",
118                                             sizeof(struct lustre_qunit),
119                                             0, 0);
120         if (!qunit_cachep)
121                 RETURN(-ENOMEM);
122
123         spin_lock(&qunit_hash_lock);
124         for (i = 0; i < NR_DQHASH; i++)
125                 CFS_INIT_LIST_HEAD(qunit_hash + i);
126         spin_unlock(&qunit_hash_lock);
127         RETURN(0);
128 }
129
130 static inline int
131 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
132              __attribute__((__const__));
133
134 static inline int
135 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
136 {
137         unsigned int id = qdata->qd_id;
138         unsigned int type = qdata->qd_flags & QUOTA_IS_GRP;
139
140         unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
141         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
142         return tmp;
143 }
144
145 /* compute the remaining quota for certain gid or uid b=11693 */
146 int compute_remquota(struct obd_device *obd,
147                      struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
148 {
149         struct super_block *sb = qctxt->lqc_sb;
150         __u64 usage, limit;
151         struct obd_quotactl *qctl;
152         int ret = QUOTA_RET_OK;
153         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
154         ENTRY;
155
156         if (!sb_any_quota_enabled(sb))
157                 RETURN(QUOTA_RET_NOQUOTA);
158
159         /* ignore root user */
160         if (qdata->qd_id == 0 && qdata_type == USRQUOTA)
161                 RETURN(QUOTA_RET_NOLIMIT);
162
163         OBD_ALLOC_PTR(qctl);
164         if (qctl == NULL) 
165                 RETURN(-ENOMEM);
166
167         /* get fs quota usage & limit */
168         qctl->qc_cmd = Q_GETQUOTA;
169         qctl->qc_id = qdata->qd_id;
170         qctl->qc_type = qdata_type;
171         ret = fsfilt_quotactl(obd, sb, qctl);
172         if (ret) {
173                 if (ret == -ESRCH)      /* no limit */
174                         ret = QUOTA_RET_NOLIMIT;
175                 else
176                         CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", 
177                                ret);
178                 GOTO(out, ret);
179         }
180
181         usage = qctl->qc_dqblk.dqb_curspace;
182         limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
183         if (!limit){            /* no limit */
184                 ret = QUOTA_RET_NOLIMIT;
185                 GOTO(out, ret);
186         }
187
188         if (limit >= usage)
189                 qdata->qd_count = limit - usage;
190         else
191                 qdata->qd_count = 0;
192         EXIT;
193 out:
194         OBD_FREE_PTR(qctl);
195         return ret;
196 }
197
198 /* caller must hold qunit_hash_lock */
199 static inline struct lustre_qunit *find_qunit(unsigned int hashent,
200                                               struct lustre_quota_ctxt *qctxt,
201                                               struct qunit_data *qdata)
202 {
203         struct lustre_qunit *qunit = NULL;
204         struct qunit_data *tmp;
205
206         LASSERT_SPIN_LOCKED(&qunit_hash_lock);
207         list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) {
208                 tmp = &qunit->lq_data;
209                 if (qunit->lq_ctxt == qctxt &&
210                     qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags)
211                         return qunit;
212         }
213         return NULL;
214 }
215
216 /* check_cur_qunit - check the current usage of qunit.
217  * @qctxt: quota context
218  * @qdata: the type of quota unit to be checked
219  *
220  * return: 1 - need acquire qunit;
221  *         2 - need release qunit;
222  *         0 - need do nothing.
223  *       < 0 - error.
224  */
225 static int
226 check_cur_qunit(struct obd_device *obd,
227                 struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
228 {
229         struct super_block *sb = qctxt->lqc_sb;
230         unsigned long qunit_sz, tune_sz;
231         __u64 usage, limit;
232         struct obd_quotactl *qctl;
233         int ret = 0;
234         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
235         __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
236         ENTRY;
237
238         if (!sb_any_quota_enabled(sb))
239                 RETURN(0);
240
241         OBD_ALLOC_PTR(qctl);
242         if (qctl == NULL)
243                 RETURN(-ENOMEM);
244
245         /* get fs quota usage & limit */
246         qctl->qc_cmd = Q_GETQUOTA;
247         qctl->qc_id = qdata->qd_id;
248         qctl->qc_type = qdata_type;
249         ret = fsfilt_quotactl(obd, sb, qctl);
250         if (ret) {
251                 if (ret == -ESRCH)      /* no limit */
252                         ret = 0;
253                 else
254                         CERROR("can't get fs quota usage! (rc:%d)\n", ret);
255                 GOTO(out, ret);
256         }
257
258         if (is_blk) {
259                 usage = qctl->qc_dqblk.dqb_curspace;
260                 limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
261                 qunit_sz = qctxt->lqc_bunit_sz;
262                 tune_sz = qctxt->lqc_btune_sz;
263
264                 LASSERT(!(qunit_sz % QUOTABLOCK_SIZE));
265         } else {
266                 usage = qctl->qc_dqblk.dqb_curinodes;
267                 limit = qctl->qc_dqblk.dqb_ihardlimit;
268                 qunit_sz = qctxt->lqc_iunit_sz;
269                 tune_sz = qctxt->lqc_itune_sz;
270         }
271
272         /* ignore the no quota limit case */
273         if (!limit)
274                 GOTO(out, ret = 0);
275
276         /* we don't count the MIN_QLIMIT */
277         if ((limit == MIN_QLIMIT && !is_blk) ||
278             (toqb(limit) == MIN_QLIMIT && is_blk))
279                 limit = 0;
280
281         LASSERT(qdata->qd_count == 0);
282         if (limit <= usage + tune_sz) {
283                 while (qdata->qd_count + limit <= usage + tune_sz)
284                         qdata->qd_count += qunit_sz;
285                 ret = 1;
286         } else if (limit > usage + qunit_sz + tune_sz) {
287                 while (limit - qdata->qd_count > usage + qunit_sz + tune_sz)
288                         qdata->qd_count += qunit_sz;
289                 ret = 2;
290         }
291         LASSERT(ret == 0 || qdata->qd_count);
292         EXIT;
293 out:
294         OBD_FREE_PTR(qctl);
295         return ret;
296 }
297
298 /* caller must hold qunit_hash_lock */
299 static struct lustre_qunit *dqacq_in_flight(struct lustre_quota_ctxt *qctxt,
300                                             struct qunit_data *qdata)
301 {
302         unsigned int hashent = qunit_hashfn(qctxt, qdata);
303         struct lustre_qunit *qunit;
304         ENTRY;
305
306         LASSERT_SPIN_LOCKED(&qunit_hash_lock);
307         qunit = find_qunit(hashent, qctxt, qdata);
308         RETURN(qunit);
309 }
310
311 static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt,
312                                         struct qunit_data *qdata, int opc)
313 {
314         struct lustre_qunit *qunit = NULL;
315         ENTRY;
316
317         OBD_SLAB_ALLOC(qunit, qunit_cachep, CFS_ALLOC_IO, sizeof(*qunit));
318         if (qunit == NULL)
319                 RETURN(NULL);
320
321         CFS_INIT_LIST_HEAD(&qunit->lq_hash);
322         CFS_INIT_LIST_HEAD(&qunit->lq_waiters);
323         atomic_set(&qunit->lq_refcnt, 1);
324         qunit->lq_ctxt = qctxt;
325         memcpy(&qunit->lq_data, qdata, sizeof(*qdata));
326         qunit->lq_opc = opc;
327
328         RETURN(qunit);
329 }
330
331 static inline void free_qunit(struct lustre_qunit *qunit)
332 {
333         OBD_SLAB_FREE(qunit, qunit_cachep, sizeof(*qunit));
334 }
335
336 static inline void qunit_get(struct lustre_qunit *qunit)
337 {
338         atomic_inc(&qunit->lq_refcnt);
339 }
340
341 static void qunit_put(struct lustre_qunit *qunit)
342 {
343         LASSERT(atomic_read(&qunit->lq_refcnt));
344         if (atomic_dec_and_test(&qunit->lq_refcnt))
345                 free_qunit(qunit);
346 }
347
348 static void
349 insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit)
350 {
351         struct list_head *head;
352
353         LASSERT(list_empty(&qunit->lq_hash));
354         head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data);
355         list_add(&qunit->lq_hash, head);
356 }
357
358 static void remove_qunit_nolock(struct lustre_qunit *qunit)
359 {
360         LASSERT(!list_empty(&qunit->lq_hash));
361         list_del_init(&qunit->lq_hash);
362 }
363
364 struct qunit_waiter {
365         struct list_head qw_entry;
366         cfs_waitq_t      qw_waitq;
367         int qw_rc;
368 };
369
370 #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
371                                  (limit = count) : (limit += count)
372
373
374 /* FIXME check if this mds is the master of specified id */
375 static int
376 is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
377           unsigned int id, int type)
378 {
379         return qctxt->lqc_handler ? 1 : 0;
380 }
381
382 static int
383 schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
384                struct qunit_data *qdata, int opc, int wait);
385
386 static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
387                                        struct qunit_data *qdata, int opc, int wait)
388 {
389         int rc = 0;
390         unsigned long factor;
391         struct qunit_data tmp_qdata;
392         ENTRY;
393
394         LASSERT(qdata && qdata->qd_count);
395         QDATA_DEBUG(qdata, "%s quota split.\n",
396                     (qdata->qd_flags & QUOTA_IS_BLOCK) ? "block" : "inode");
397         if (qdata->qd_flags & QUOTA_IS_BLOCK)
398                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * 
399                         qctxt->lqc_bunit_sz;
400         else
401                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * 
402                         qctxt->lqc_iunit_sz;
403
404         if (qctxt->lqc_import && should_translate_quota(qctxt->lqc_import) &&
405             qdata->qd_count > factor) {
406                         tmp_qdata = *qdata;
407                 tmp_qdata.qd_count = factor;
408                         qdata->qd_count -= tmp_qdata.qd_count;
409                 QDATA_DEBUG((&tmp_qdata), "be split.\n");
410                 rc = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait);
411         } else{
412                 QDATA_DEBUG(qdata, "don't be split.\n");
413                 rc = schedule_dqacq(obd, qctxt, qdata, opc, wait);
414         }
415
416         RETURN(rc);
417 }
418
419 static int
420 dqacq_completion(struct obd_device *obd,
421                  struct lustre_quota_ctxt *qctxt,
422                  struct qunit_data *qdata, int rc, int opc)
423 {
424         struct lustre_qunit *qunit = NULL;
425         struct super_block *sb = qctxt->lqc_sb;
426         unsigned long qunit_sz;
427         struct qunit_waiter *qw, *tmp;
428         int err = 0;
429         __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
430         __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
431         __u64 qd_tmp = qdata->qd_count;
432         unsigned long div_r;
433         ENTRY;
434
435         LASSERT(qdata);
436         qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
437         div_r = do_div(qd_tmp, qunit_sz);
438         LASSERTF(!div_r, "qunit_sz: %lu, return qunit_sz: "LPU64"\n",
439                  qunit_sz, qd_tmp);
440
441         /* update local operational quota file */
442         if (rc == 0) {
443                 __u32 count = QUSG(qdata->qd_count, is_blk);
444                 struct obd_quotactl *qctl;
445                 __u64 *hardlimit;
446
447                 OBD_ALLOC_PTR(qctl);
448                 if (qctl == NULL)
449                         GOTO(out, err = -ENOMEM);
450
451                 /* acq/rel qunit for specified uid/gid is serialized,
452                  * so there is no race between get fs quota limit and
453                  * set fs quota limit */
454                 qctl->qc_cmd = Q_GETQUOTA;
455                 qctl->qc_id = qdata->qd_id;
456                 qctl->qc_type = qdata_type;
457                 err = fsfilt_quotactl(obd, sb, qctl);
458                 if (err) {
459                         CERROR("error get quota fs limit! (rc:%d)\n", err);
460                         GOTO(out_mem, err);
461                 }
462
463                 if (is_blk) {
464                         qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
465                         hardlimit = &qctl->qc_dqblk.dqb_bhardlimit;
466                 } else {
467                         qctl->qc_dqblk.dqb_valid = QIF_ILIMITS;
468                         hardlimit = &qctl->qc_dqblk.dqb_ihardlimit;
469                 }
470
471                 switch (opc) {
472                 case QUOTA_DQACQ:
473                         CDEBUG(D_QUOTA, "%s(acq):count: %d, hardlimt: "LPU64 
474                                ",type: %s.\n", obd->obd_name, count, *hardlimit, 
475                                qdata_type ? "grp": "usr");
476                         INC_QLIMIT(*hardlimit, count);
477                         break;
478                 case QUOTA_DQREL:
479                         CDEBUG(D_QUOTA, "%s(rel):count: %d, hardlimt: "LPU64 
480                                ",type: %s.\n", obd->obd_name, count, *hardlimit, 
481                                qdata_type ? "grp": "usr");
482                         LASSERTF(count < *hardlimit, 
483                                  "count: %d, hardlimit: "LPU64".\n", 
484                                  count, *hardlimit);
485                         *hardlimit -= count;
486                         break;
487                 default:
488                         LBUG();
489                 }
490
491                 /* clear quota limit */
492                 if (count == 0)
493                         *hardlimit = 0;
494
495                 qctl->qc_cmd = Q_SETQUOTA;
496                 err = fsfilt_quotactl(obd, sb, qctl);
497                 if (err)
498                         CERROR("error set quota fs limit! (rc:%d)\n", err);
499
500                 QDATA_DEBUG(qdata, "%s completion\n",
501                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
502 out_mem:
503                 OBD_FREE_PTR(qctl);
504         } else if (rc == -EDQUOT) {
505                 QDATA_DEBUG(qdata, "acquire qunit got EDQUOT.\n");
506         } else if (rc == -EBUSY) {
507                 QDATA_DEBUG(qdata, "it's is recovering, got EBUSY.\n");
508         } else {
509                 CERROR("acquire qunit got error! (rc:%d)\n", rc);
510         }
511 out:
512         /* remove the qunit from hash */
513         spin_lock(&qunit_hash_lock);
514
515         qunit = dqacq_in_flight(qctxt, qdata);
516         /* this qunit has been removed by qctxt_cleanup() */
517         if (!qunit) {
518                 spin_unlock(&qunit_hash_lock);
519                 RETURN(err);
520         }
521
522         LASSERT(opc == qunit->lq_opc);
523         remove_qunit_nolock(qunit);
524
525         /* wake up all waiters */
526         list_for_each_entry_safe(qw, tmp, &qunit->lq_waiters, qw_entry) {
527                 list_del_init(&qw->qw_entry);
528                 qw->qw_rc = rc;
529                 wake_up(&qw->qw_waitq);
530         }
531
532         spin_unlock(&qunit_hash_lock);
533
534         qunit_put(qunit);
535
536         /* don't reschedule in such cases:
537          *   - acq/rel failure, but not for quota recovery.
538          *   - local dqacq/dqrel.
539          *   - local disk io failure.
540          */
541         if (err || (rc && rc != -EBUSY) || 
542             is_master(obd, qctxt, qdata->qd_id, qdata_type))
543                 RETURN(err);
544
545         /* reschedule another dqacq/dqrel if needed */
546         qdata->qd_count = 0;
547         rc = check_cur_qunit(obd, qctxt, qdata);
548         if (rc > 0) {
549                 int opc;
550                 opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
551                 rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0);
552                 QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc);
553         }
554         RETURN(err);
555 }
556
557 struct dqacq_async_args {
558         struct lustre_quota_ctxt *aa_ctxt;
559         struct lustre_qunit *aa_qunit;
560 };
561
562 static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc)
563 {
564         struct dqacq_async_args *aa = (struct dqacq_async_args *)data;
565         struct lustre_quota_ctxt *qctxt = aa->aa_ctxt;
566         struct lustre_qunit *qunit = aa->aa_qunit;
567         struct obd_device *obd = req->rq_import->imp_obd;
568         struct qunit_data *qdata = NULL;
569         struct qunit_data_old *qdata_old = NULL;
570         ENTRY;
571
572         LASSERT(req);
573         LASSERT(req->rq_import);
574
575         if ((req->rq_import->imp_connect_data.ocd_connect_flags &
576              OBD_CONNECT_QUOTA64) &&
577             !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
578                 CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
579
580                 qdata = req_capsule_server_swab_get(&req->rq_pill,
581                                                     &RMF_QUNIT_DATA,
582                                           (void*)lustre_swab_qdata);
583         } else {
584                 CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
585
586                 qdata = req_capsule_server_swab_get(&req->rq_pill,
587                                                     &RMF_QUNIT_DATA,
588                                        (void*)lustre_swab_qdata_old);
589                 qdata = lustre_quota_old_to_new(qdata_old);
590         }
591         if (qdata == NULL) {
592                 DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data");
593                 RETURN(-EPROTO);
594         }
595
596         LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
597                 (qdata->qd_flags & QUOTA_IS_GRP) ==
598                  (qunit->lq_data.qd_flags & QUOTA_IS_GRP) &&
599                 (qdata->qd_count == qunit->lq_data.qd_count ||
600                  qdata->qd_count == 0));
601
602         QDATA_DEBUG(qdata, "%s interpret rc(%d).\n",
603                     lustre_msg_get_opc(req->rq_reqmsg) == QUOTA_DQACQ ?
604                     "DQACQ" : "DQREL", rc);
605
606         rc = dqacq_completion(obd, qctxt, qdata, rc,
607                               lustre_msg_get_opc(req->rq_reqmsg));
608
609         RETURN(rc);
610 }
611
612 static int got_qunit(struct qunit_waiter *waiter)
613 {
614         int rc = 0;
615         ENTRY;
616         spin_lock(&qunit_hash_lock);
617         rc = list_empty(&waiter->qw_entry);
618         spin_unlock(&qunit_hash_lock);
619         RETURN(rc);
620 }
621
622 static int
623 schedule_dqacq(struct obd_device *obd,
624                struct lustre_quota_ctxt *qctxt,
625                struct qunit_data *qdata, int opc, int wait)
626 {
627         struct lustre_qunit *qunit, *empty;
628         struct qunit_waiter qw;
629         struct l_wait_info lwi = { 0 };
630         struct ptlrpc_request *req;
631         struct qunit_data *reqdata;
632         struct dqacq_async_args *aa;
633         unsigned long factor;   
634         int rc = 0;
635         ENTRY;
636
637         CFS_INIT_LIST_HEAD(&qw.qw_entry);
638         init_waitqueue_head(&qw.qw_waitq);
639         qw.qw_rc = 0;
640
641         if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL)
642                 RETURN(-ENOMEM);
643
644         spin_lock(&qunit_hash_lock);
645
646         qunit = dqacq_in_flight(qctxt, qdata);
647         if (qunit) {
648                 if (wait)
649                         list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
650                 spin_unlock(&qunit_hash_lock);
651
652                 free_qunit(empty);
653                 goto wait_completion;
654         }
655         qunit = empty;
656         insert_qunit_nolock(qctxt, qunit);
657         if (wait)
658                 list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
659         spin_unlock(&qunit_hash_lock);
660
661         LASSERT(qunit);
662
663         /* master is going to dqacq/dqrel from itself */
664         if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP))
665         {
666                 int rc2;
667                 QDATA_DEBUG(qdata, "local %s.\n",
668                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
669                 rc = qctxt->lqc_handler(obd, qdata, opc);
670                 rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc);
671                 RETURN((rc && rc != -EDQUOT) ? rc : rc2);
672         }
673
674         /* build dqacq/dqrel request */
675         LASSERT(qctxt->lqc_import);
676
677         req = ptlrpc_request_alloc_pack(qctxt->lqc_import, &RQF_MDS_QUOTA_DQACQ,
678                                         LUSTRE_MDS_VERSION, opc);
679         if (req == NULL) {
680                 dqacq_completion(obd, qctxt, qdata, -ENOMEM, opc);
681                 RETURN(-ENOMEM);
682         }
683
684         if (qdata->qd_flags & QUOTA_IS_BLOCK)
685                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * 
686                          qctxt->lqc_bunit_sz;
687         else
688                 factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * 
689                          qctxt->lqc_iunit_sz;
690
691         LASSERT(!should_translate_quota(qctxt->lqc_import) || 
692                 qdata->qd_count <= factor);
693         if (should_translate_quota(qctxt->lqc_import))
694         {
695                 struct qunit_data_old *reqdata_old, *tmp;
696                         
697                 reqdata_old = req_capsule_client_get(&req->rq_pill,
698                                                      &RMF_QUNIT_DATA);
699
700                 tmp = lustre_quota_new_to_old(qdata);
701                 *reqdata_old = *tmp;
702                 req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
703                                      sizeof(*reqdata_old));
704                 CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
705         } else {
706                 reqdata = req_capsule_client_get(&req->rq_pill,
707                                                  &RMF_QUNIT_DATA);
708
709                 *reqdata = *qdata;
710                 req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
711                                      sizeof(*reqdata));
712                 CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
713         }
714         ptlrpc_request_set_replen(req);
715
716         CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
717         aa = (struct dqacq_async_args *)&req->rq_async_args;
718         aa->aa_ctxt = qctxt;
719         aa->aa_qunit = qunit;
720
721         req->rq_interpret_reply = dqacq_interpret;
722         ptlrpcd_add_req(req);
723
724         QDATA_DEBUG(qdata, "%s scheduled.\n",
725                     opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
726 wait_completion:
727         if (wait && qunit) {
728                 struct qunit_data *p = &qunit->lq_data;
729                 QDATA_DEBUG(p, "wait for dqacq.\n");
730
731                 l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
732                 if (qw.qw_rc == 0)
733                         rc = -EAGAIN;
734
735                 CDEBUG(D_QUOTA, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
736         }
737         RETURN(rc);
738 }
739
740 int
741 qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
742                    uid_t uid, gid_t gid, __u32 isblk, int wait)
743 {
744         int ret, rc = 0, i = USRQUOTA;
745         __u32 id[MAXQUOTAS] = { uid, gid };
746         struct qunit_data qdata[MAXQUOTAS];
747         ENTRY;
748
749         CLASSERT(MAXQUOTAS < 4);
750         if (!sb_any_quota_enabled(qctxt->lqc_sb))
751                 RETURN(0);
752
753         for (i = 0; i < MAXQUOTAS; i++) {
754                 qdata[i].qd_id = id[i];
755                 qdata[i].qd_flags = 0;
756                 qdata[i].qd_flags |= i;
757                 qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;        
758                 qdata[i].qd_count = 0;
759
760                 ret = check_cur_qunit(obd, qctxt, &qdata[i]);
761                 if (ret > 0) {
762                         int opc;
763                         /* need acquire or release */
764                         opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
765                         ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i], 
766                                                           opc, wait);
767                         if (!rc)
768                                 rc = ret;
769                 }
770         }
771
772         RETURN(rc);
773 }
774
775 int
776 qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id,
777                          unsigned short type, int isblk)
778 {
779         struct lustre_qunit *qunit = NULL;
780         struct qunit_waiter qw;
781         struct qunit_data qdata;
782         struct l_wait_info lwi = { 0 };
783         ENTRY;
784
785         CFS_INIT_LIST_HEAD(&qw.qw_entry);
786         init_waitqueue_head(&qw.qw_waitq);
787         qw.qw_rc = 0;
788
789         qdata.qd_id = id;
790         qdata.qd_flags = 0;
791         qdata.qd_flags |= type;
792         qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
793         qdata.qd_count = 0;
794
795         spin_lock(&qunit_hash_lock);
796
797         qunit = dqacq_in_flight(qctxt, &qdata);
798         if (qunit)
799                 list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
800
801         spin_unlock(&qunit_hash_lock);
802
803         if (qunit) {
804                 struct qunit_data *p = &qdata;
805                 QDATA_DEBUG(p, "wait for dqacq completion.\n");
806                 l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
807                 QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
808         }
809         RETURN(0);
810 }
811
812 int
813 qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
814            dqacq_handler_t handler)
815 {
816         int rc = 0;
817         ENTRY;
818
819         rc = ptlrpcd_addref();
820         if (rc)
821                 RETURN(rc);
822
823         qctxt->lqc_handler = handler;
824         qctxt->lqc_sb = sb;
825         qctxt->lqc_import = NULL;
826         qctxt->lqc_recovery = 0;
827         qctxt->lqc_atype = 0;
828         qctxt->lqc_status= 0;
829         qctxt->lqc_bunit_sz = default_bunit_sz;
830         qctxt->lqc_btune_sz = default_bunit_sz / 100 * default_btune_ratio;
831         qctxt->lqc_iunit_sz = default_iunit_sz;
832         qctxt->lqc_itune_sz = default_iunit_sz * default_itune_ratio / 100;
833
834         RETURN(0);
835 }
836
837 void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force)
838 {
839         struct lustre_qunit *qunit, *tmp;
840         struct qunit_waiter *qw, *tmp2;
841         int i;
842         ENTRY;
843
844         spin_lock(&qunit_hash_lock);
845
846         for (i = 0; i < NR_DQHASH; i++) {
847                 list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) {
848                         if (qunit->lq_ctxt != qctxt)
849                                 continue;
850
851                         remove_qunit_nolock(qunit);
852                         /* wake up all waiters */
853                         list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters,
854                                                  qw_entry) {
855                                 list_del_init(&qw->qw_entry);
856                                 qw->qw_rc = 0;
857                                 wake_up(&qw->qw_waitq);
858                         }
859                         qunit_put(qunit);
860                 }
861         }
862
863         spin_unlock(&qunit_hash_lock);
864
865         ptlrpcd_decref();
866
867         EXIT;
868 }
869
870 struct qslave_recov_thread_data {
871         struct obd_device *obd;
872         struct lustre_quota_ctxt *qctxt;
873         struct completion comp;
874 };
875
876 /* FIXME only recovery block quota by now */
877 static int qslave_recovery_main(void *arg)
878 {
879         struct qslave_recov_thread_data *data = arg;
880         struct obd_device *obd = data->obd;
881         struct lustre_quota_ctxt *qctxt = data->qctxt;
882         unsigned int type;
883         int rc = 0;
884         ENTRY;
885
886         ptlrpc_daemonize("qslave_recovd");
887
888         complete(&data->comp);
889
890         if (qctxt->lqc_recovery)
891                 RETURN(0);
892         qctxt->lqc_recovery = 1;
893
894         for (type = USRQUOTA; type < MAXQUOTAS; type++) {
895                 struct qunit_data qdata;
896                 struct quota_info *dqopt = sb_dqopt(qctxt->lqc_sb);
897                 struct list_head id_list;
898                 struct dquot_id *dqid, *tmp;
899                 int ret;
900
901                 LOCK_DQONOFF_MUTEX(dqopt);
902                 if (!sb_has_quota_enabled(qctxt->lqc_sb, type)) {
903                         UNLOCK_DQONOFF_MUTEX(dqopt);
904                         break;
905                 }
906
907                 LASSERT(dqopt->files[type] != NULL);
908                 CFS_INIT_LIST_HEAD(&id_list);
909 #ifndef KERNEL_SUPPORTS_QUOTA_READ 
910                 rc = fsfilt_qids(obd, dqopt->files[type], NULL, type, &id_list);
911 #else
912                 rc = fsfilt_qids(obd, NULL, dqopt->files[type], type, &id_list);
913 #endif
914                 UNLOCK_DQONOFF_MUTEX(dqopt);
915                 if (rc)
916                         CERROR("Get ids from quota file failed. (rc:%d)\n", rc);
917
918                 list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
919                         list_del_init(&dqid->di_link);
920                         /* skip slave recovery on itself */
921                         if (is_master(obd, qctxt, dqid->di_id, type))
922                                 goto free;
923                         if (rc && rc != -EBUSY)
924                                 goto free;
925
926                         qdata.qd_id = dqid->di_id;
927                         qdata.qd_flags = 0;
928                         qdata.qd_flags |= type;
929                         qdata.qd_flags |= QUOTA_IS_BLOCK;
930                         qdata.qd_count = 0;
931
932                         ret = check_cur_qunit(obd, qctxt, &qdata);
933                         if (ret > 0) {
934                                 int opc;
935                                 opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
936                                 rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0);
937                         } else
938                                 rc = 0;
939
940                         if (rc)
941                                 CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR,
942                                        "qslave recovery failed! (id:%d type:%d "
943                                        " rc:%d)\n", dqid->di_id, type, rc);
944 free:
945                         kfree(dqid);
946                 }
947         }
948
949         qctxt->lqc_recovery = 0;
950         RETURN(rc);
951 }
952
953 void
954 qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt)
955 {
956         struct qslave_recov_thread_data data;
957         int rc;
958         ENTRY;
959
960         if (!sb_any_quota_enabled(qctxt->lqc_sb))
961                 goto exit;
962
963         data.obd = obd;
964         data.qctxt = qctxt;
965         init_completion(&data.comp);
966
967         rc = kernel_thread(qslave_recovery_main, &data, CLONE_VM|CLONE_FILES);
968         if (rc < 0) {
969                 CERROR("Cannot start quota recovery thread: rc %d\n", rc);
970                 goto exit;
971         }
972         wait_for_completion(&data.comp);
973 exit:
974         EXIT;
975 }