Whamcloud - gitweb
b=21846 fix the test_32 in sanity-quota.sh and enable it
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 #  include <linux/smp_lock.h>
48 #  include <linux/buffer_head.h>
49 #  include <linux/workqueue.h>
50 #  include <linux/mount.h>
51 #else /* __KERNEL__ */
52 # include <liblustre.h>
53 #endif
54
55 #include <obd_class.h>
56 #include <lustre_mds.h>
57 #include <lustre_dlm.h>
58 #include <lustre_cfg.h>
59 #include <obd_ost.h>
60 #include <lustre_fsfilt.h>
61 #include <lustre_quota.h>
62 #include <lprocfs_status.h>
63 #include "quota_internal.h"
64
65 #ifdef __KERNEL__
66
67 #ifdef HAVE_QUOTA_SUPPORT
68
69 static cfs_time_t last_print = 0;
70 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
71
72 static int filter_quota_setup(struct obd_device *obd)
73 {
74         int rc = 0;
75         struct obd_device_target *obt = &obd->u.obt;
76         ENTRY;
77
78 #ifdef HAVE_QUOTA64
79         obt->obt_qfmt = LUSTRE_QUOTA_V2;
80 #else
81         obt->obt_qfmt = LUSTRE_QUOTA_V1;
82 #endif
83         atomic_set(&obt->obt_quotachecking, 1);
84         rc = qctxt_init(obd, NULL);
85         if (rc)
86                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
87
88         RETURN(rc);
89 }
90
91 static int filter_quota_cleanup(struct obd_device *obd)
92 {
93         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
94         return 0;
95 }
96
97 static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd)
98 {
99         struct obd_import *imp;
100         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
101         ENTRY;
102
103         /* setup the quota context import */
104         spin_lock(&obd->u.obt.obt_qctxt.lqc_lock);
105         obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse;
106         spin_unlock(&obd->u.obt.obt_qctxt.lqc_lock);
107         CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n",
108                obd->obd_name,exp->exp_imp_reverse, obd);
109
110         /* make imp's connect flags equal relative exp's connect flags
111          * adding it to avoid the scan export list
112          */
113         imp = exp->exp_imp_reverse;
114         if (imp)
115                 imp->imp_connect_data.ocd_connect_flags |=
116                         (exp->exp_connect_flags &
117                          (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
118
119         cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
120         /* start quota slave recovery thread. (release high limits) */
121         qslave_start_recovery(obd, &obd->u.obt.obt_qctxt);
122         RETURN(0);
123 }
124
125 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
126 {
127         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
128         ENTRY;
129
130         /* lquota may be not set up before destroying export, b=14896 */
131         if (!obd->obd_set_up)
132                 RETURN(0);
133
134         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
135          * should be invalid b=12374 */
136         if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) {
137                 spin_lock(&qctxt->lqc_lock);
138                 qctxt->lqc_import = NULL;
139                 spin_unlock(&qctxt->lqc_lock);
140                 ptlrpc_cleanup_imp(exp->exp_imp_reverse);
141                 dqacq_interrupt(qctxt);
142                 CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
143                        obd->obd_name, obd);
144         }
145         RETURN(0);
146 }
147
148 static int target_quota_enforce(struct obd_device *obd, unsigned int ignore)
149 {
150         ENTRY;
151
152         if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
153                 RETURN(-EINVAL);
154
155         if (!!cfs_cap_raised(CFS_CAP_SYS_RESOURCE) == !!ignore)
156                 RETURN(-EALREADY);
157
158         if (ignore) {
159                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
160                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
161         } else {
162                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
163         }
164
165         RETURN(0);
166 }
167
168 #define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
169 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
170 {
171         struct obd_device_target *obt = &obd->u.obt;
172         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
173         int err, cnt, rc = 0;
174         struct obd_quotactl *oqctl;
175         ENTRY;
176
177         if (!ll_sb_any_quota_active(obt->obt_sb))
178                 RETURN(0);
179
180         OBD_ALLOC_PTR(oqctl);
181         if (!oqctl)
182                 RETURN(-ENOMEM);
183
184         /* set over quota flags for a uid/gid */
185         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
186         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
187
188         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
189                 struct lustre_qunit_size *lqs = NULL;
190
191                 lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
192                                        qctxt, 0);
193                 if (lqs == NULL || IS_ERR(lqs)) {
194                         rc = PTR_ERR(lqs);
195                         break;
196                 } else {
197                         spin_lock(&lqs->lqs_lock);
198                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
199                                 oa->o_flags |= (cnt == USRQUOTA) ?
200                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
201                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
202                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
203                                        qctxt->lqc_sync_blk);
204                                 spin_unlock(&lqs->lqs_lock);
205                                 /* this is for quota_search_lqs */
206                                 lqs_putref(lqs);
207                                 continue;
208                         }
209                         spin_unlock(&lqs->lqs_lock);
210                         /* this is for quota_search_lqs */
211                         lqs_putref(lqs);
212                 }
213
214                 memset(oqctl, 0, sizeof(*oqctl));
215
216                 oqctl->qc_cmd = Q_GETQUOTA;
217                 oqctl->qc_type = cnt;
218                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
219                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
220                 if (err) {
221                         if (!rc)
222                                 rc = err;
223                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
224                                                              OBD_MD_FLGRPQUOTA);
225                         continue;
226                 }
227
228                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
229                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
230                     oqctl->qc_dqblk.dqb_bhardlimit))
231                         oa->o_flags |= (cnt == USRQUOTA) ?
232                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
233         }
234         OBD_FREE_PTR(oqctl);
235         RETURN(rc);
236 }
237
238 static int filter_quota_acquire(struct obd_device *obd, unsigned int uid,
239                                 unsigned int gid, struct obd_trans_info *oti)
240 {
241         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
242         int rc;
243         ENTRY;
244
245         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, LQUOTA_FLAGS_BLK, 1, oti);
246         RETURN(rc);
247 }
248
249 /* check whether the left quota of certain uid and gid can satisfy a block_write
250  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA */
251 static int quota_check_common(struct obd_device *obd, unsigned int uid,
252                               unsigned int gid, int count, int cycle, int isblk,
253                               struct inode *inode, int frags, int pending[2])
254 {
255         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
256         int i;
257         __u32 id[MAXQUOTAS] = { uid, gid };
258         struct qunit_data qdata[MAXQUOTAS];
259         int mb = 0;
260         int rc = 0, rc2[2] = { 0, 0 };
261         ENTRY;
262
263         spin_lock(&qctxt->lqc_lock);
264         if (!qctxt->lqc_valid){
265                 spin_unlock(&qctxt->lqc_lock);
266                 RETURN(rc);
267         }
268         spin_unlock(&qctxt->lqc_lock);
269
270         for (i = 0; i < MAXQUOTAS; i++) {
271                 struct lustre_qunit_size *lqs = NULL;
272
273                 qdata[i].qd_id = id[i];
274                 qdata[i].qd_flags = i;
275                 if (isblk)
276                         QDATA_SET_BLK(&qdata[i]);
277                 qdata[i].qd_count = 0;
278
279                 /* ignore root user */
280                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
281                         continue;
282
283                 lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
284                 if (lqs == NULL || IS_ERR(lqs))
285                         continue;
286
287                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
288                 spin_lock(&lqs->lqs_lock);
289                 if (!cycle) {
290                         if (isblk) {
291                                 pending[i] = count * CFS_PAGE_SIZE;
292                                 /* in order to complete this write, we need extra
293                                  * meta blocks. This function can get it through
294                                  * data needed to be written b=16542 */
295                                 mb = pending[i];
296                                 LASSERT(inode && frags > 0);
297                                 if (fsfilt_get_mblk(obd, qctxt->lqc_sb, &mb,
298                                                     inode, frags) < 0)
299                                         CERROR("%s: can't get extra meta "
300                                                "blocks\n", obd->obd_name);
301                                 else
302                                         pending[i] += mb;
303                                 lqs->lqs_bwrite_pending += pending[i];
304                         } else {
305                                 pending[i] = count;
306                                 lqs->lqs_iwrite_pending += pending[i];
307                         }
308                 }
309
310                 /* if xx_rec < 0, that means quota are releasing,
311                  * and it may return before we use quota. So if
312                  * we find this situation, we assuming it has
313                  * returned b=18491 */
314                 if (isblk && lqs->lqs_blk_rec < 0) {
315                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
316                                 qdata[i].qd_count = 0;
317                         else
318                                 qdata[i].qd_count += lqs->lqs_blk_rec;
319                 }
320                 if (!isblk && lqs->lqs_ino_rec < 0) {
321                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
322                                 qdata[i].qd_count = 0;
323                         else
324                                 qdata[i].qd_count += lqs->lqs_ino_rec;
325                 }
326
327                 CDEBUG(D_QUOTA, "count=%d lqs_pending=%lu qd_count="LPU64
328                        " isblk=%d mb=%d pending[%d]=%d\n", count,
329                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
330                        qdata[i].qd_count, isblk, mb, i, pending[i]);
331                 if (rc2[i] == QUOTA_RET_OK) {
332                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
333                                 rc2[i] = QUOTA_RET_ACQUOTA;
334                         if (!isblk && qdata[i].qd_count <
335                             lqs->lqs_iwrite_pending)
336                                 rc2[i] = QUOTA_RET_ACQUOTA;
337                 }
338
339                 spin_unlock(&lqs->lqs_lock);
340
341                 if (lqs->lqs_blk_rec  < 0 &&
342                     qdata[i].qd_count <
343                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
344                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
345
346                 /* When cycle is zero, lqs_*_pending will be changed. We will
347                  * get reference of the lqs here and put reference of lqs in
348                  * quota_pending_commit b=14784 */
349                 if (!cycle)
350                         lqs_getref(lqs);
351
352                 /* this is for quota_search_lqs */
353                 lqs_putref(lqs);
354         }
355
356         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
357                 RETURN(QUOTA_RET_ACQUOTA);
358         else
359                 RETURN(rc);
360 }
361
362 static int quota_chk_acq_common(struct obd_export *exp, unsigned int uid,
363                                 unsigned int gid, int count, int pending[2],
364                                 int isblk, quota_acquire acquire,
365                                 struct obd_trans_info *oti, struct inode *inode,
366                                 int frags)
367 {
368         struct obd_device *obd = exp->exp_obd;
369         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
370         struct timeval work_start;
371         struct timeval work_end;
372         long timediff;
373         struct l_wait_info lwi = { 0 };
374         int rc = 0, cycle = 0, count_err = 1;
375         ENTRY;
376
377         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
378         if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
379                 /* If the client has been evicted or if it
380                  * timed out and tried to reconnect already,
381                  * abort the request immediately */
382                 RETURN(-ENOTCONN);
383
384         /* Unfortunately, if quota master is too busy to handle the
385          * pre-dqacq in time and quota hash on ost is used up, we
386          * have to wait for the completion of in flight dqacq/dqrel,
387          * in order to get enough quota for write b=12588 */
388         do_gettimeofday(&work_start);
389         while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk,
390                                         inode, frags, pending)) & QUOTA_RET_ACQUOTA) {
391
392                 spin_lock(&qctxt->lqc_lock);
393                 if (!qctxt->lqc_import && oti) {
394                         spin_unlock(&qctxt->lqc_lock);
395
396                         LASSERT(oti && oti->oti_thread &&
397                                 oti->oti_thread->t_watchdog);
398
399                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
400                         CDEBUG(D_QUOTA, "sleep for quota master\n");
401                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
402                                      &lwi);
403                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
404                         lc_watchdog_touch(oti->oti_thread->t_watchdog,
405                                  GET_TIMEOUT(oti->oti_thread->t_svc));
406                 } else {
407                         spin_unlock(&qctxt->lqc_lock);
408                 }
409
410                 cycle++;
411                 if (isblk)
412                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
413                 /* after acquire(), we should run quota_check_common again
414                  * so that we confirm there are enough quota to finish write */
415                 rc = acquire(obd, uid, gid, oti);
416
417                 /* please reference to dqacq_completion for the below */
418                 /* a new request is finished, try again */
419                 if (rc == QUOTA_REQ_RETURNED) {
420                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
421                         continue;
422                 }
423
424                 /* it is out of quota already */
425                 if (rc == -EDQUOT) {
426                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
427                         break;
428                 }
429
430                 if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
431                         /* The client has been evicted or tried to
432                          * to reconnect already, abort the request */
433                         RETURN(-ENOTCONN);
434
435                 /* -EBUSY and others, wait a second and try again */
436                 if (rc < 0) {
437                         cfs_waitq_t        waitq;
438                         struct l_wait_info lwi;
439
440                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
441                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
442                                          GET_TIMEOUT(oti->oti_thread->t_svc));
443                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
444                                count_err++);
445
446                         init_waitqueue_head(&waitq);
447                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
448                                           NULL);
449                         l_wait_event(waitq, 0, &lwi);
450                 }
451
452                 if (rc < 0 || cycle % 10 == 0) {
453                         spin_lock(&last_print_lock);
454                         if (last_print == 0 ||
455                             cfs_time_before((last_print + cfs_time_seconds(30)),
456                                             cfs_time_current())) {
457                                 CWARN("still haven't managed to acquire quota "
458                                       "space from the quota master after %d "
459                                       "retries (err=%d, rc=%d)\n",
460                                       cycle, count_err - 1, rc);
461                                 last_print = cfs_time_current();
462                         }
463                         spin_unlock(&last_print_lock);
464                 }
465
466                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
467                        cycle);
468         }
469
470         do_gettimeofday(&work_end);
471         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
472         lprocfs_counter_add(qctxt->lqc_stats,
473                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
474                                     LQUOTA_WAIT_FOR_CHK_INO,
475                             timediff);
476
477         if (rc > 0)
478                 rc = 0;
479         RETURN(rc);
480 }
481
482 int quota_is_set(struct obd_device *obd, unsigned int uid,
483                  unsigned int gid, int flag)
484 {
485         struct lustre_qunit_size *lqs;
486         __u32 id[MAXQUOTAS] = { uid, gid };
487         int i, q_set = 0;
488
489         if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
490                 RETURN(0);
491
492         for (i = 0; i < MAXQUOTAS; i++) {
493                 lqs = quota_search_lqs(LQS_KEY(i, id[i]),
494                                        &obd->u.obt.obt_qctxt, 0);
495                 if (lqs && !IS_ERR(lqs)) {
496                         if (lqs->lqs_flags & flag)
497                                 q_set = 1;
498                         lqs_putref(lqs);
499                 }
500         }
501
502         return q_set;
503 }
504
505 static int filter_quota_check(struct obd_export *exp, unsigned int uid,
506                               unsigned int gid, int npage, int pending[2],
507                               quota_acquire acquire, struct obd_trans_info *oti,
508                               struct inode *inode, int frags)
509 {
510         return quota_is_set(exp->exp_obd, uid, gid, QB_SET) ?
511                 quota_chk_acq_common(exp, uid, gid, npage, pending,
512                                      LQUOTA_FLAGS_BLK, acquire, oti, inode,
513                                      frags) : 0;
514 }
515
516 /* when a block_write or inode_create rpc is finished, adjust the record for
517  * pending blocks and inodes*/
518 static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
519                                 unsigned int gid, int pending[2], int isblk)
520 {
521         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
522         struct timeval work_start;
523         struct timeval work_end;
524         long timediff;
525         int i;
526         __u32 id[MAXQUOTAS] = { uid, gid };
527         struct qunit_data qdata[MAXQUOTAS];
528         ENTRY;
529
530         CDEBUG(D_QUOTA, "%s: commit pending quota\n", obd->obd_name);
531         CLASSERT(MAXQUOTAS < 4);
532
533         do_gettimeofday(&work_start);
534         for (i = 0; i < MAXQUOTAS; i++) {
535                 struct lustre_qunit_size *lqs = NULL;
536                 int flag = 0;
537
538                 qdata[i].qd_id = id[i];
539                 qdata[i].qd_flags = i;
540                 if (isblk)
541                         QDATA_SET_BLK(&qdata[i]);
542                 qdata[i].qd_count = 0;
543
544                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
545                         continue;
546
547                 lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
548                 if (lqs == NULL || IS_ERR(lqs))
549                         continue;
550
551                 spin_lock(&lqs->lqs_lock);
552                 if (isblk) {
553                         if (lqs->lqs_bwrite_pending >= pending[i]) {
554                                 lqs->lqs_bwrite_pending -= pending[i];
555                                 flag = 1;
556                         } else {
557                                 CERROR("%s: there are too many blocks!\n",
558                                        obd->obd_name);
559                         }
560                 } else {
561                         if (lqs->lqs_iwrite_pending >= pending[i]) {
562                                 lqs->lqs_iwrite_pending -= pending[i];
563                                 flag = 1;
564                         } else {
565                                 CERROR("%s: there are too many files!\n",
566                                        obd->obd_name);
567                         }
568                 }
569                 CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
570                        obd->obd_name,
571                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
572                        i, pending[i], isblk);
573
574                 spin_unlock(&lqs->lqs_lock);
575                 lqs_putref(lqs);
576                 /* When lqs_*_pening is changed back, we'll putref lqs
577                  * here b=14784 */
578                 if (flag)
579                         lqs_putref(lqs);
580         }
581         do_gettimeofday(&work_end);
582         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
583         lprocfs_counter_add(qctxt->lqc_stats,
584                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
585                                     LQUOTA_WAIT_FOR_COMMIT_INO,
586                             timediff);
587
588         RETURN(0);
589 }
590
591 static int filter_quota_pending_commit(struct obd_device *obd, unsigned int uid,
592                                        unsigned int gid, int pending[2])
593 {
594         return quota_pending_commit(obd, uid, gid, pending, LQUOTA_FLAGS_BLK);
595 }
596
597 static int mds_quota_init(void)
598 {
599         return lustre_dquot_init();
600 }
601
602 static int mds_quota_exit(void)
603 {
604         lustre_dquot_exit();
605         return 0;
606 }
607
608 static int mds_quota_setup(struct obd_device *obd)
609 {
610         struct obd_device_target *obt = &obd->u.obt;
611         struct mds_obd *mds = &obd->u.mds;
612         int rc;
613         ENTRY;
614
615 #ifdef HAVE_QUOTA64
616         obt->obt_qfmt = LUSTRE_QUOTA_V2;
617 #else
618         obt->obt_qfmt = LUSTRE_QUOTA_V1;
619 #endif
620         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
621         atomic_set(&obt->obt_quotachecking, 1);
622         /* initialize quota master and quota context */
623         sema_init(&mds->mds_qonoff_sem, 1);
624         rc = qctxt_init(obd, dqacq_handler);
625         if (rc) {
626                 CERROR("%s: initialize quota context failed! (rc:%d)\n",
627                        obd->obd_name, rc);
628                 RETURN(rc);
629         }
630         RETURN(rc);
631 }
632
633 static int mds_quota_cleanup(struct obd_device *obd)
634 {
635         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
636         RETURN(0);
637 }
638
639 static int mds_quota_fs_cleanup(struct obd_device *obd)
640 {
641         struct mds_obd *mds = &obd->u.mds;
642         struct obd_quotactl oqctl;
643         ENTRY;
644
645         memset(&oqctl, 0, sizeof(oqctl));
646         oqctl.qc_type = UGQUOTA;
647
648         down(&mds->mds_qonoff_sem);
649         mds_admin_quota_off(obd, &oqctl);
650         up(&mds->mds_qonoff_sem);
651         RETURN(0);
652 }
653
654 static int mds_quota_check(struct obd_export *exp, unsigned int uid,
655                            unsigned int gid, int inodes, int pending[2],
656                            quota_acquire acquire, struct obd_trans_info *oti,
657                            struct inode *inode, int frags)
658 {
659         return quota_is_set(exp->exp_obd, uid, gid, QI_SET) ?
660                 quota_chk_acq_common(exp, uid, gid, inodes, pending, 0,
661                                      acquire, oti, inode, frags) : 0;
662 }
663
664 static int mds_quota_acquire(struct obd_device *obd, unsigned int uid,
665                              unsigned int gid, struct obd_trans_info *oti)
666 {
667         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
668         int rc;
669         ENTRY;
670
671         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 0, 1, oti);
672         RETURN(rc);
673 }
674
675 static int mds_quota_pending_commit(struct obd_device *obd, unsigned int uid,
676                                     unsigned int gid, int pending[2])
677 {
678         return quota_pending_commit(obd, uid, gid, pending, 0);
679 }
680 #endif /* HAVE_QUOTA_SUPPORT */
681 #endif /* __KERNEL__ */
682
683 struct osc_quota_info {
684         struct list_head        oqi_hash;       /* hash list */
685         struct client_obd      *oqi_cli;        /* osc obd */
686         unsigned int            oqi_id;         /* uid/gid of a file */
687         short                   oqi_type;       /* quota type */
688 };
689
690 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
691
692 static struct list_head qinfo_hash[NR_DQHASH];
693 /* SLAB cache for client quota context */
694 cfs_mem_cache_t *qinfo_cachep = NULL;
695
696 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
697                          __attribute__((__const__));
698
699 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
700 {
701         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
702         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
703         return tmp;
704 }
705
706 /* caller must hold qinfo_list_lock */
707 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
708 {
709         struct list_head *head = qinfo_hash +
710                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
711
712         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
713         list_add(&oqi->oqi_hash, head);
714 }
715
716 /* caller must hold qinfo_list_lock */
717 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
718 {
719         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
720         list_del_init(&oqi->oqi_hash);
721 }
722
723 /* caller must hold qinfo_list_lock */
724 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
725                                                 unsigned int id, int type)
726 {
727         unsigned int hashent = hashfn(cli, id, type);
728         struct osc_quota_info *oqi;
729
730         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
731         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
732                 if (oqi->oqi_cli == cli &&
733                     oqi->oqi_id == id && oqi->oqi_type == type)
734                         return oqi;
735         }
736         return NULL;
737 }
738
739 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
740                                           unsigned int id, int type)
741 {
742         struct osc_quota_info *oqi;
743         ENTRY;
744
745         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
746         if(!oqi)
747                 RETURN(NULL);
748
749         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
750         oqi->oqi_cli = cli;
751         oqi->oqi_id = id;
752         oqi->oqi_type = type;
753
754         RETURN(oqi);
755 }
756
757 static void free_qinfo(struct osc_quota_info *oqi)
758 {
759         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
760 }
761
762 int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
763 {
764         unsigned int id;
765         int cnt, rc = QUOTA_OK;
766         ENTRY;
767
768         spin_lock(&qinfo_list_lock);
769         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
770                 struct osc_quota_info *oqi = NULL;
771
772                 id = (cnt == USRQUOTA) ? uid : gid;
773                 oqi = find_qinfo(cli, id, cnt);
774                 if (oqi) {
775                         rc = NO_QUOTA;
776                         break;
777                 }
778         }
779         spin_unlock(&qinfo_list_lock);
780
781         RETURN(rc);
782 }
783
784 int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
785                     obd_flag valid, obd_flag flags)
786 {
787         unsigned int id;
788         obd_flag noquota;
789         int cnt, rc = 0;
790         ENTRY;
791
792
793         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
794                 struct osc_quota_info *oqi, *old;
795
796                 if (!(valid & ((cnt == USRQUOTA) ?
797                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
798                         continue;
799
800                 id = (cnt == USRQUOTA) ? uid : gid;
801                 noquota = (cnt == USRQUOTA) ?
802                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
803
804                 oqi = alloc_qinfo(cli, id, cnt);
805                 if (!oqi) {
806                         rc = -ENOMEM;
807                         break;
808                 }
809
810                 spin_lock(&qinfo_list_lock);
811                 old = find_qinfo(cli, id, cnt);
812                 if (old && !noquota)
813                         remove_qinfo_hash(old);
814                 else if (!old && noquota)
815                         insert_qinfo_hash(oqi);
816                 spin_unlock(&qinfo_list_lock);
817
818                 if (old || !noquota)
819                         free_qinfo(oqi);
820                 if (old && !noquota)
821                         free_qinfo(old);
822         }
823
824         RETURN(rc);
825 }
826
827 int osc_quota_cleanup(struct obd_device *obd)
828 {
829         struct client_obd *cli = &obd->u.cli;
830         struct osc_quota_info *oqi, *n;
831         int i;
832         ENTRY;
833
834         spin_lock(&qinfo_list_lock);
835         for (i = 0; i < NR_DQHASH; i++) {
836                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
837                         if (oqi->oqi_cli != cli)
838                                 continue;
839                         remove_qinfo_hash(oqi);
840                         free_qinfo(oqi);
841                 }
842         }
843         spin_unlock(&qinfo_list_lock);
844
845         RETURN(0);
846 }
847
848 int osc_quota_init(void)
849 {
850         int i;
851         ENTRY;
852
853         LASSERT(qinfo_cachep == NULL);
854         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
855                                             sizeof(struct osc_quota_info),
856                                             0, 0);
857         if (!qinfo_cachep)
858                 RETURN(-ENOMEM);
859
860         for (i = 0; i < NR_DQHASH; i++)
861                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
862
863         RETURN(0);
864 }
865
866 int osc_quota_exit(void)
867 {
868         struct osc_quota_info *oqi, *n;
869         int i, rc;
870         ENTRY;
871
872         spin_lock(&qinfo_list_lock);
873         for (i = 0; i < NR_DQHASH; i++) {
874                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
875                         remove_qinfo_hash(oqi);
876                         free_qinfo(oqi);
877                 }
878         }
879         spin_unlock(&qinfo_list_lock);
880
881         rc = cfs_mem_cache_destroy(qinfo_cachep);
882         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
883         qinfo_cachep = NULL;
884
885         RETURN(0);
886 }
887
888 #ifdef __KERNEL__
889 #ifdef HAVE_QUOTA_SUPPORT
890 quota_interface_t mds_quota_interface = {
891         .quota_init     = mds_quota_init,
892         .quota_exit     = mds_quota_exit,
893         .quota_setup    = mds_quota_setup,
894         .quota_cleanup  = mds_quota_cleanup,
895         .quota_check    = target_quota_check,
896         .quota_ctl      = mds_quota_ctl,
897         .quota_fs_cleanup       =mds_quota_fs_cleanup,
898         .quota_recovery = mds_quota_recovery,
899         .quota_adjust   = mds_quota_adjust,
900         .quota_chkquota = mds_quota_check,
901         .quota_enforce  = target_quota_enforce,
902         .quota_acquire  = mds_quota_acquire,
903         .quota_pending_commit = mds_quota_pending_commit,
904 };
905
906 quota_interface_t filter_quota_interface = {
907         .quota_setup    = filter_quota_setup,
908         .quota_cleanup  = filter_quota_cleanup,
909         .quota_check    = target_quota_check,
910         .quota_ctl      = filter_quota_ctl,
911         .quota_setinfo  = filter_quota_setinfo,
912         .quota_clearinfo = filter_quota_clearinfo,
913         .quota_enforce  = target_quota_enforce,
914         .quota_getflag  = filter_quota_getflag,
915         .quota_acquire  = filter_quota_acquire,
916         .quota_adjust   = filter_quota_adjust,
917         .quota_chkquota = filter_quota_check,
918         .quota_adjust_qunit   = filter_quota_adjust_qunit,
919         .quota_pending_commit = filter_quota_pending_commit,
920 };
921 #endif
922 #endif /* __KERNEL__ */
923
924 quota_interface_t mdc_quota_interface = {
925         .quota_ctl      = client_quota_ctl,
926         .quota_check    = client_quota_check,
927         .quota_poll_check = client_quota_poll_check,
928 };
929
930 quota_interface_t osc_quota_interface = {
931         .quota_ctl      = client_quota_ctl,
932         .quota_check    = client_quota_check,
933         .quota_poll_check = client_quota_poll_check,
934         .quota_init     = osc_quota_init,
935         .quota_exit     = osc_quota_exit,
936         .quota_chkdq    = osc_quota_chkdq,
937         .quota_setdq    = osc_quota_setdq,
938         .quota_cleanup  = osc_quota_cleanup,
939         .quota_adjust_qunit = client_quota_adjust_qunit,
940 };
941
942 quota_interface_t lov_quota_interface = {
943         .quota_check    = lov_quota_check,
944         .quota_ctl      = lov_quota_ctl,
945         .quota_adjust_qunit = lov_quota_adjust_qunit,
946 };
947
948 #ifdef __KERNEL__
949
950 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
951
952 static int __init init_lustre_quota(void)
953 {
954 #ifdef HAVE_QUOTA_SUPPORT
955         int rc = 0;
956
957         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
958                                                 proc_lustre_root,
959                                                 NULL, NULL);
960         if (IS_ERR(lquota_type_proc_dir)) {
961                 CERROR("LProcFS failed in lquota-init\n");
962                 rc = PTR_ERR(lquota_type_proc_dir);
963                 return rc;
964         }
965
966         rc = qunit_cache_init();
967         if (rc)
968                 return rc;
969
970         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
971         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
972 #endif
973         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
974         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
975         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
976         return 0;
977 }
978
979 static void /*__exit*/ exit_lustre_quota(void)
980 {
981         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
982         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
983         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
984 #ifdef HAVE_QUOTA_SUPPORT
985         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
986         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
987
988         qunit_cache_cleanup();
989
990         if (lquota_type_proc_dir)
991                 lprocfs_remove(&lquota_type_proc_dir);
992 #endif
993 }
994
995 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
996 MODULE_DESCRIPTION("Lustre Quota");
997 MODULE_LICENSE("GPL");
998
999 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
1000
1001 #ifdef HAVE_QUOTA_SUPPORT
1002 EXPORT_SYMBOL(mds_quota_interface);
1003 EXPORT_SYMBOL(filter_quota_interface);
1004 #endif
1005 EXPORT_SYMBOL(mdc_quota_interface);
1006 EXPORT_SYMBOL(osc_quota_interface);
1007 EXPORT_SYMBOL(lov_quota_interface);
1008 #endif /* __KERNEL */