Whamcloud - gitweb
LU-432 QUOTA_OK / NO_QUOTA definitions
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 #  include <linux/smp_lock.h>
48 #  include <linux/buffer_head.h>
49 #  include <linux/workqueue.h>
50 #  include <linux/mount.h>
51 #else /* __KERNEL__ */
52 # include <liblustre.h>
53 #endif
54
55 /* Linux 2.6.34+ no longer define QUOTA_OK */
56 #ifndef QUOTA_OK
57 #define QUOTA_OK 0
58 #endif
59
60 #include <obd_class.h>
61 #include <lustre_mds.h>
62 #include <lustre_dlm.h>
63 #include <lustre_cfg.h>
64 #include <obd_ost.h>
65 #include <lustre_fsfilt.h>
66 #include <lustre_quota.h>
67 #include <lprocfs_status.h>
68 #include "quota_internal.h"
69
70 #ifdef __KERNEL__
71
72 #ifdef HAVE_QUOTA_SUPPORT
73
74 static cfs_time_t last_print = 0;
75 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
76
77 static int filter_quota_setup(struct obd_device *obd)
78 {
79         int rc = 0;
80         struct obd_device_target *obt = &obd->u.obt;
81         ENTRY;
82
83 #ifdef HAVE_QUOTA64
84         obt->obt_qfmt = LUSTRE_QUOTA_V2;
85 #else
86         obt->obt_qfmt = LUSTRE_QUOTA_V1;
87 #endif
88         atomic_set(&obt->obt_quotachecking, 1);
89         rc = qctxt_init(obd, NULL);
90         if (rc)
91                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
92
93         RETURN(rc);
94 }
95
96 static int filter_quota_cleanup(struct obd_device *obd)
97 {
98         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
99         return 0;
100 }
101
102 static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd)
103 {
104         struct obd_import *imp;
105         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
106         ENTRY;
107
108         /* setup the quota context import */
109         spin_lock(&obd->u.obt.obt_qctxt.lqc_lock);
110         obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse;
111         spin_unlock(&obd->u.obt.obt_qctxt.lqc_lock);
112         CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n",
113                obd->obd_name,exp->exp_imp_reverse, obd);
114
115         /* make imp's connect flags equal relative exp's connect flags
116          * adding it to avoid the scan export list
117          */
118         imp = exp->exp_imp_reverse;
119         if (imp)
120                 imp->imp_connect_data.ocd_connect_flags |=
121                         (exp->exp_connect_flags &
122                          (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
123
124         cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
125         /* start quota slave recovery thread. (release high limits) */
126         qslave_start_recovery(obd, &obd->u.obt.obt_qctxt);
127         RETURN(0);
128 }
129
130 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
131 {
132         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
133         ENTRY;
134
135         /* lquota may be not set up before destroying export, b=14896 */
136         if (!obd->obd_set_up)
137                 RETURN(0);
138
139         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
140          * should be invalid b=12374 */
141         if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) {
142                 spin_lock(&qctxt->lqc_lock);
143                 qctxt->lqc_import = NULL;
144                 spin_unlock(&qctxt->lqc_lock);
145                 ptlrpc_cleanup_imp(exp->exp_imp_reverse);
146                 dqacq_interrupt(qctxt);
147                 CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
148                        obd->obd_name, obd);
149         }
150         RETURN(0);
151 }
152
153 static int target_quota_enforce(struct obd_device *obd, unsigned int ignore)
154 {
155         ENTRY;
156
157         if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
158                 RETURN(-EINVAL);
159
160         if (!!cfs_cap_raised(CFS_CAP_SYS_RESOURCE) == !!ignore)
161                 RETURN(-EALREADY);
162
163         if (ignore) {
164                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
165                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
166         } else {
167                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
168         }
169
170         RETURN(0);
171 }
172
173 #define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
174 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
175 {
176         struct obd_device_target *obt = &obd->u.obt;
177         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
178         int err, cnt, rc = 0;
179         struct obd_quotactl *oqctl;
180         ENTRY;
181
182         if (!ll_sb_any_quota_active(obt->obt_sb))
183                 RETURN(0);
184
185         OBD_ALLOC_PTR(oqctl);
186         if (!oqctl)
187                 RETURN(-ENOMEM);
188
189         /* set over quota flags for a uid/gid */
190         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
191         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
192
193         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
194                 struct lustre_qunit_size *lqs = NULL;
195
196                 lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
197                                        qctxt, 0);
198                 if (lqs == NULL || IS_ERR(lqs)) {
199                         rc = PTR_ERR(lqs);
200                         break;
201                 } else {
202                         spin_lock(&lqs->lqs_lock);
203                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
204                                 oa->o_flags |= (cnt == USRQUOTA) ?
205                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
206                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
207                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
208                                        qctxt->lqc_sync_blk);
209                                 spin_unlock(&lqs->lqs_lock);
210                                 /* this is for quota_search_lqs */
211                                 lqs_putref(lqs);
212                                 continue;
213                         }
214                         spin_unlock(&lqs->lqs_lock);
215                         /* this is for quota_search_lqs */
216                         lqs_putref(lqs);
217                 }
218
219                 memset(oqctl, 0, sizeof(*oqctl));
220
221                 oqctl->qc_cmd = Q_GETQUOTA;
222                 oqctl->qc_type = cnt;
223                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
224                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
225                 if (err) {
226                         if (!rc)
227                                 rc = err;
228                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
229                                                              OBD_MD_FLGRPQUOTA);
230                         continue;
231                 }
232
233                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
234                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
235                     oqctl->qc_dqblk.dqb_bhardlimit))
236                         oa->o_flags |= (cnt == USRQUOTA) ?
237                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
238         }
239         OBD_FREE_PTR(oqctl);
240         RETURN(rc);
241 }
242
243 static int filter_quota_acquire(struct obd_device *obd, unsigned int uid,
244                                 unsigned int gid, struct obd_trans_info *oti)
245 {
246         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
247         int rc;
248         ENTRY;
249
250         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, LQUOTA_FLAGS_BLK, 1, oti);
251         RETURN(rc);
252 }
253
254 /* check whether the left quota of certain uid and gid can satisfy a block_write
255  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA */
256 static int quota_check_common(struct obd_device *obd, unsigned int uid,
257                               unsigned int gid, int count, int cycle, int isblk,
258                               struct inode *inode, int frags, int pending[2])
259 {
260         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
261         int i;
262         __u32 id[MAXQUOTAS] = { uid, gid };
263         struct qunit_data qdata[MAXQUOTAS];
264         int mb = 0;
265         int rc = 0, rc2[2] = { 0, 0 };
266         ENTRY;
267
268         spin_lock(&qctxt->lqc_lock);
269         if (!qctxt->lqc_valid){
270                 spin_unlock(&qctxt->lqc_lock);
271                 RETURN(rc);
272         }
273         spin_unlock(&qctxt->lqc_lock);
274
275         for (i = 0; i < MAXQUOTAS; i++) {
276                 struct lustre_qunit_size *lqs = NULL;
277
278                 qdata[i].qd_id = id[i];
279                 qdata[i].qd_flags = i;
280                 if (isblk)
281                         QDATA_SET_BLK(&qdata[i]);
282                 qdata[i].qd_count = 0;
283
284                 /* ignore root user */
285                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
286                         continue;
287
288                 lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
289                 if (lqs == NULL || IS_ERR(lqs))
290                         continue;
291
292                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
293                 spin_lock(&lqs->lqs_lock);
294                 if (!cycle) {
295                         if (isblk) {
296                                 pending[i] = count * CFS_PAGE_SIZE;
297                                 /* in order to complete this write, we need extra
298                                  * meta blocks. This function can get it through
299                                  * data needed to be written b=16542 */
300                                 mb = pending[i];
301                                 LASSERT(inode && frags > 0);
302                                 if (fsfilt_get_mblk(obd, qctxt->lqc_sb, &mb,
303                                                     inode, frags) < 0)
304                                         CERROR("%s: can't get extra meta "
305                                                "blocks\n", obd->obd_name);
306                                 else
307                                         pending[i] += mb;
308                                 lqs->lqs_bwrite_pending += pending[i];
309                         } else {
310                                 pending[i] = count;
311                                 lqs->lqs_iwrite_pending += pending[i];
312                         }
313                 }
314
315                 /* if xx_rec < 0, that means quota are releasing,
316                  * and it may return before we use quota. So if
317                  * we find this situation, we assuming it has
318                  * returned b=18491 */
319                 if (isblk && lqs->lqs_blk_rec < 0) {
320                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
321                                 qdata[i].qd_count = 0;
322                         else
323                                 qdata[i].qd_count += lqs->lqs_blk_rec;
324                 }
325                 if (!isblk && lqs->lqs_ino_rec < 0) {
326                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
327                                 qdata[i].qd_count = 0;
328                         else
329                                 qdata[i].qd_count += lqs->lqs_ino_rec;
330                 }
331
332                 CDEBUG(D_QUOTA, "count=%d lqs_pending=%lu qd_count="LPU64
333                        " isblk=%d mb=%d pending[%d]=%d\n", count,
334                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
335                        qdata[i].qd_count, isblk, mb, i, pending[i]);
336                 if (rc2[i] == QUOTA_RET_OK) {
337                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
338                                 rc2[i] = QUOTA_RET_ACQUOTA;
339                         if (!isblk && qdata[i].qd_count <
340                             lqs->lqs_iwrite_pending)
341                                 rc2[i] = QUOTA_RET_ACQUOTA;
342                 }
343
344                 spin_unlock(&lqs->lqs_lock);
345
346                 if (lqs->lqs_blk_rec  < 0 &&
347                     qdata[i].qd_count <
348                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
349                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
350
351                 /* When cycle is zero, lqs_*_pending will be changed. We will
352                  * get reference of the lqs here and put reference of lqs in
353                  * quota_pending_commit b=14784 */
354                 if (!cycle)
355                         lqs_getref(lqs);
356
357                 /* this is for quota_search_lqs */
358                 lqs_putref(lqs);
359         }
360
361         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
362                 RETURN(QUOTA_RET_ACQUOTA);
363         else
364                 RETURN(rc);
365 }
366
367 static int quota_chk_acq_common(struct obd_export *exp, unsigned int uid,
368                                 unsigned int gid, int count, int pending[2],
369                                 int isblk, quota_acquire acquire,
370                                 struct obd_trans_info *oti, struct inode *inode,
371                                 int frags)
372 {
373         struct obd_device *obd = exp->exp_obd;
374         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
375         struct timeval work_start;
376         struct timeval work_end;
377         long timediff;
378         struct l_wait_info lwi = { 0 };
379         int rc = 0, cycle = 0, count_err = 1;
380         ENTRY;
381
382         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
383         if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
384                 /* If the client has been evicted or if it
385                  * timed out and tried to reconnect already,
386                  * abort the request immediately */
387                 RETURN(-ENOTCONN);
388
389         /* Unfortunately, if quota master is too busy to handle the
390          * pre-dqacq in time and quota hash on ost is used up, we
391          * have to wait for the completion of in flight dqacq/dqrel,
392          * in order to get enough quota for write b=12588 */
393         do_gettimeofday(&work_start);
394         while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk,
395                                         inode, frags, pending)) & QUOTA_RET_ACQUOTA) {
396
397                 spin_lock(&qctxt->lqc_lock);
398                 if (!qctxt->lqc_import && oti) {
399                         spin_unlock(&qctxt->lqc_lock);
400
401                         LASSERT(oti && oti->oti_thread &&
402                                 oti->oti_thread->t_watchdog);
403
404                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
405                         CDEBUG(D_QUOTA, "sleep for quota master\n");
406                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
407                                      &lwi);
408                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
409                         lc_watchdog_touch(oti->oti_thread->t_watchdog,
410                                  GET_TIMEOUT(oti->oti_thread->t_svc));
411                 } else {
412                         spin_unlock(&qctxt->lqc_lock);
413                 }
414
415                 cycle++;
416                 if (isblk)
417                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
418                 /* after acquire(), we should run quota_check_common again
419                  * so that we confirm there are enough quota to finish write */
420                 rc = acquire(obd, uid, gid, oti);
421
422                 /* please reference to dqacq_completion for the below */
423                 /* a new request is finished, try again */
424                 if (rc == QUOTA_REQ_RETURNED) {
425                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
426                         continue;
427                 }
428
429                 /* it is out of quota already */
430                 if (rc == -EDQUOT) {
431                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
432                         break;
433                 }
434
435                 if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
436                         /* The client has been evicted or tried to
437                          * to reconnect already, abort the request */
438                         RETURN(-ENOTCONN);
439
440                 /* -EBUSY and others, wait a second and try again */
441                 if (rc < 0) {
442                         cfs_waitq_t        waitq;
443                         struct l_wait_info lwi;
444
445                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
446                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
447                                          GET_TIMEOUT(oti->oti_thread->t_svc));
448                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
449                                count_err++);
450
451                         init_waitqueue_head(&waitq);
452                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
453                                           NULL);
454                         l_wait_event(waitq, 0, &lwi);
455                 }
456
457                 if (rc < 0 || cycle % 10 == 0) {
458                         spin_lock(&last_print_lock);
459                         if (last_print == 0 ||
460                             cfs_time_before((last_print + cfs_time_seconds(30)),
461                                             cfs_time_current())) {
462                                 CWARN("still haven't managed to acquire quota "
463                                       "space from the quota master after %d "
464                                       "retries (err=%d, rc=%d)\n",
465                                       cycle, count_err - 1, rc);
466                                 last_print = cfs_time_current();
467                         }
468                         spin_unlock(&last_print_lock);
469                 }
470
471                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
472                        cycle);
473         }
474
475         do_gettimeofday(&work_end);
476         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
477         lprocfs_counter_add(qctxt->lqc_stats,
478                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
479                                     LQUOTA_WAIT_FOR_CHK_INO,
480                             timediff);
481
482         if (rc > 0)
483                 rc = 0;
484         RETURN(rc);
485 }
486
487 int quota_is_set(struct obd_device *obd, unsigned int uid,
488                  unsigned int gid, int flag)
489 {
490         struct lustre_qunit_size *lqs;
491         __u32 id[MAXQUOTAS] = { uid, gid };
492         int i, q_set = 0;
493
494         if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
495                 RETURN(0);
496
497         for (i = 0; i < MAXQUOTAS; i++) {
498                 lqs = quota_search_lqs(LQS_KEY(i, id[i]),
499                                        &obd->u.obt.obt_qctxt, 0);
500                 if (lqs && !IS_ERR(lqs)) {
501                         if (lqs->lqs_flags & flag)
502                                 q_set = 1;
503                         lqs_putref(lqs);
504                 }
505         }
506
507         return q_set;
508 }
509
510 static int filter_quota_check(struct obd_export *exp, unsigned int uid,
511                               unsigned int gid, int npage, int pending[2],
512                               quota_acquire acquire, struct obd_trans_info *oti,
513                               struct inode *inode, int frags)
514 {
515         return quota_is_set(exp->exp_obd, uid, gid, QB_SET) ?
516                 quota_chk_acq_common(exp, uid, gid, npage, pending,
517                                      LQUOTA_FLAGS_BLK, acquire, oti, inode,
518                                      frags) : 0;
519 }
520
521 /* when a block_write or inode_create rpc is finished, adjust the record for
522  * pending blocks and inodes*/
523 static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
524                                 unsigned int gid, int pending[2], int isblk)
525 {
526         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
527         struct timeval work_start;
528         struct timeval work_end;
529         long timediff;
530         int i;
531         __u32 id[MAXQUOTAS] = { uid, gid };
532         struct qunit_data qdata[MAXQUOTAS];
533         ENTRY;
534
535         CDEBUG(D_QUOTA, "%s: commit pending quota\n", obd->obd_name);
536         CLASSERT(MAXQUOTAS < 4);
537
538         do_gettimeofday(&work_start);
539         for (i = 0; i < MAXQUOTAS; i++) {
540                 struct lustre_qunit_size *lqs = NULL;
541                 int flag = 0;
542
543                 qdata[i].qd_id = id[i];
544                 qdata[i].qd_flags = i;
545                 if (isblk)
546                         QDATA_SET_BLK(&qdata[i]);
547                 qdata[i].qd_count = 0;
548
549                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
550                         continue;
551
552                 lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
553                 if (lqs == NULL || IS_ERR(lqs))
554                         continue;
555
556                 spin_lock(&lqs->lqs_lock);
557                 if (isblk) {
558                         if (lqs->lqs_bwrite_pending >= pending[i]) {
559                                 lqs->lqs_bwrite_pending -= pending[i];
560                                 flag = 1;
561                         } else {
562                                 CERROR("%s: there are too many blocks!\n",
563                                        obd->obd_name);
564                         }
565                 } else {
566                         if (lqs->lqs_iwrite_pending >= pending[i]) {
567                                 lqs->lqs_iwrite_pending -= pending[i];
568                                 flag = 1;
569                         } else {
570                                 CERROR("%s: there are too many files!\n",
571                                        obd->obd_name);
572                         }
573                 }
574                 CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
575                        obd->obd_name,
576                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
577                        i, pending[i], isblk);
578
579                 spin_unlock(&lqs->lqs_lock);
580                 lqs_putref(lqs);
581                 /* When lqs_*_pening is changed back, we'll putref lqs
582                  * here b=14784 */
583                 if (flag)
584                         lqs_putref(lqs);
585         }
586         do_gettimeofday(&work_end);
587         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
588         lprocfs_counter_add(qctxt->lqc_stats,
589                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
590                                     LQUOTA_WAIT_FOR_COMMIT_INO,
591                             timediff);
592
593         RETURN(0);
594 }
595
596 static int filter_quota_pending_commit(struct obd_device *obd, unsigned int uid,
597                                        unsigned int gid, int pending[2])
598 {
599         return quota_pending_commit(obd, uid, gid, pending, LQUOTA_FLAGS_BLK);
600 }
601
602 static int mds_quota_init(void)
603 {
604         return lustre_dquot_init();
605 }
606
607 static int mds_quota_exit(void)
608 {
609         lustre_dquot_exit();
610         return 0;
611 }
612
613 static int mds_quota_setup(struct obd_device *obd)
614 {
615         struct obd_device_target *obt = &obd->u.obt;
616         struct mds_obd *mds = &obd->u.mds;
617         int rc;
618         ENTRY;
619
620 #ifdef HAVE_QUOTA64
621         obt->obt_qfmt = LUSTRE_QUOTA_V2;
622 #else
623         obt->obt_qfmt = LUSTRE_QUOTA_V1;
624 #endif
625         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
626         atomic_set(&obt->obt_quotachecking, 1);
627         /* initialize quota master and quota context */
628         sema_init(&mds->mds_qonoff_sem, 1);
629         rc = qctxt_init(obd, dqacq_handler);
630         if (rc) {
631                 CERROR("%s: initialize quota context failed! (rc:%d)\n",
632                        obd->obd_name, rc);
633                 RETURN(rc);
634         }
635         RETURN(rc);
636 }
637
638 static int mds_quota_cleanup(struct obd_device *obd)
639 {
640         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
641         RETURN(0);
642 }
643
644 static int mds_quota_fs_cleanup(struct obd_device *obd)
645 {
646         struct mds_obd *mds = &obd->u.mds;
647         struct obd_quotactl oqctl;
648         ENTRY;
649
650         memset(&oqctl, 0, sizeof(oqctl));
651         oqctl.qc_type = UGQUOTA;
652
653         down(&mds->mds_qonoff_sem);
654         mds_admin_quota_off(obd, &oqctl);
655         up(&mds->mds_qonoff_sem);
656         RETURN(0);
657 }
658
659 static int mds_quota_check(struct obd_export *exp, unsigned int uid,
660                            unsigned int gid, int inodes, int pending[2],
661                            quota_acquire acquire, struct obd_trans_info *oti,
662                            struct inode *inode, int frags)
663 {
664         return quota_is_set(exp->exp_obd, uid, gid, QI_SET) ?
665                 quota_chk_acq_common(exp, uid, gid, inodes, pending, 0,
666                                      acquire, oti, inode, frags) : 0;
667 }
668
669 static int mds_quota_acquire(struct obd_device *obd, unsigned int uid,
670                              unsigned int gid, struct obd_trans_info *oti)
671 {
672         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
673         int rc;
674         ENTRY;
675
676         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 0, 1, oti);
677         RETURN(rc);
678 }
679
680 static int mds_quota_pending_commit(struct obd_device *obd, unsigned int uid,
681                                     unsigned int gid, int pending[2])
682 {
683         return quota_pending_commit(obd, uid, gid, pending, 0);
684 }
685 #endif /* HAVE_QUOTA_SUPPORT */
686 #endif /* __KERNEL__ */
687
688 struct osc_quota_info {
689         struct list_head        oqi_hash;       /* hash list */
690         struct client_obd      *oqi_cli;        /* osc obd */
691         unsigned int            oqi_id;         /* uid/gid of a file */
692         short                   oqi_type;       /* quota type */
693 };
694
695 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
696
697 static struct list_head qinfo_hash[NR_DQHASH];
698 /* SLAB cache for client quota context */
699 cfs_mem_cache_t *qinfo_cachep = NULL;
700
701 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
702                          __attribute__((__const__));
703
704 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
705 {
706         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
707         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
708         return tmp;
709 }
710
711 /* caller must hold qinfo_list_lock */
712 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
713 {
714         struct list_head *head = qinfo_hash +
715                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
716
717         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
718         list_add(&oqi->oqi_hash, head);
719 }
720
721 /* caller must hold qinfo_list_lock */
722 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
723 {
724         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
725         list_del_init(&oqi->oqi_hash);
726 }
727
728 /* caller must hold qinfo_list_lock */
729 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
730                                                 unsigned int id, int type)
731 {
732         unsigned int hashent = hashfn(cli, id, type);
733         struct osc_quota_info *oqi;
734
735         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
736         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
737                 if (oqi->oqi_cli == cli &&
738                     oqi->oqi_id == id && oqi->oqi_type == type)
739                         return oqi;
740         }
741         return NULL;
742 }
743
744 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
745                                           unsigned int id, int type)
746 {
747         struct osc_quota_info *oqi;
748         ENTRY;
749
750         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_IO, sizeof(*oqi));
751         if(!oqi)
752                 RETURN(NULL);
753
754         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
755         oqi->oqi_cli = cli;
756         oqi->oqi_id = id;
757         oqi->oqi_type = type;
758
759         RETURN(oqi);
760 }
761
762 static void free_qinfo(struct osc_quota_info *oqi)
763 {
764         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
765 }
766
767 int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
768 {
769         unsigned int id;
770         int cnt, rc = QUOTA_OK;
771         ENTRY;
772
773         spin_lock(&qinfo_list_lock);
774         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
775                 struct osc_quota_info *oqi = NULL;
776
777                 id = (cnt == USRQUOTA) ? uid : gid;
778                 oqi = find_qinfo(cli, id, cnt);
779                 if (oqi) {
780                         rc = NO_QUOTA;
781                         break;
782                 }
783         }
784         spin_unlock(&qinfo_list_lock);
785
786         RETURN(rc);
787 }
788
789 int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
790                     obd_flag valid, obd_flag flags)
791 {
792         unsigned int id;
793         obd_flag noquota;
794         int cnt, rc = 0;
795         ENTRY;
796
797
798         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
799                 struct osc_quota_info *oqi, *old;
800
801                 if (!(valid & ((cnt == USRQUOTA) ?
802                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
803                         continue;
804
805                 id = (cnt == USRQUOTA) ? uid : gid;
806                 noquota = (cnt == USRQUOTA) ?
807                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
808
809                 oqi = alloc_qinfo(cli, id, cnt);
810                 if (!oqi) {
811                         rc = -ENOMEM;
812                         break;
813                 }
814
815                 spin_lock(&qinfo_list_lock);
816                 old = find_qinfo(cli, id, cnt);
817                 if (old && !noquota)
818                         remove_qinfo_hash(old);
819                 else if (!old && noquota)
820                         insert_qinfo_hash(oqi);
821                 spin_unlock(&qinfo_list_lock);
822
823                 if (old || !noquota)
824                         free_qinfo(oqi);
825                 if (old && !noquota)
826                         free_qinfo(old);
827         }
828
829         RETURN(rc);
830 }
831
832 int osc_quota_cleanup(struct obd_device *obd)
833 {
834         struct client_obd *cli = &obd->u.cli;
835         struct osc_quota_info *oqi, *n;
836         int i;
837         ENTRY;
838
839         spin_lock(&qinfo_list_lock);
840         for (i = 0; i < NR_DQHASH; i++) {
841                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
842                         if (oqi->oqi_cli != cli)
843                                 continue;
844                         remove_qinfo_hash(oqi);
845                         free_qinfo(oqi);
846                 }
847         }
848         spin_unlock(&qinfo_list_lock);
849
850         RETURN(0);
851 }
852
853 int osc_quota_init(void)
854 {
855         int i;
856         ENTRY;
857
858         LASSERT(qinfo_cachep == NULL);
859         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
860                                             sizeof(struct osc_quota_info),
861                                             0, 0);
862         if (!qinfo_cachep)
863                 RETURN(-ENOMEM);
864
865         for (i = 0; i < NR_DQHASH; i++)
866                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
867
868         RETURN(0);
869 }
870
871 int osc_quota_exit(void)
872 {
873         struct osc_quota_info *oqi, *n;
874         int i, rc;
875         ENTRY;
876
877         spin_lock(&qinfo_list_lock);
878         for (i = 0; i < NR_DQHASH; i++) {
879                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
880                         remove_qinfo_hash(oqi);
881                         free_qinfo(oqi);
882                 }
883         }
884         spin_unlock(&qinfo_list_lock);
885
886         rc = cfs_mem_cache_destroy(qinfo_cachep);
887         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
888         qinfo_cachep = NULL;
889
890         RETURN(0);
891 }
892
893 #ifdef __KERNEL__
894 #ifdef HAVE_QUOTA_SUPPORT
895 quota_interface_t mds_quota_interface = {
896         .quota_init     = mds_quota_init,
897         .quota_exit     = mds_quota_exit,
898         .quota_setup    = mds_quota_setup,
899         .quota_cleanup  = mds_quota_cleanup,
900         .quota_check    = target_quota_check,
901         .quota_ctl      = mds_quota_ctl,
902         .quota_fs_cleanup       =mds_quota_fs_cleanup,
903         .quota_recovery = mds_quota_recovery,
904         .quota_adjust   = mds_quota_adjust,
905         .quota_chkquota = mds_quota_check,
906         .quota_enforce  = target_quota_enforce,
907         .quota_acquire  = mds_quota_acquire,
908         .quota_pending_commit = mds_quota_pending_commit,
909 };
910
911 quota_interface_t filter_quota_interface = {
912         .quota_setup    = filter_quota_setup,
913         .quota_cleanup  = filter_quota_cleanup,
914         .quota_check    = target_quota_check,
915         .quota_ctl      = filter_quota_ctl,
916         .quota_setinfo  = filter_quota_setinfo,
917         .quota_clearinfo = filter_quota_clearinfo,
918         .quota_enforce  = target_quota_enforce,
919         .quota_getflag  = filter_quota_getflag,
920         .quota_acquire  = filter_quota_acquire,
921         .quota_adjust   = filter_quota_adjust,
922         .quota_chkquota = filter_quota_check,
923         .quota_adjust_qunit   = filter_quota_adjust_qunit,
924         .quota_pending_commit = filter_quota_pending_commit,
925 };
926 #endif
927 #endif /* __KERNEL__ */
928
929 quota_interface_t mdc_quota_interface = {
930         .quota_ctl      = client_quota_ctl,
931         .quota_check    = client_quota_check,
932         .quota_poll_check = client_quota_poll_check,
933 };
934
935 quota_interface_t osc_quota_interface = {
936         .quota_ctl      = client_quota_ctl,
937         .quota_check    = client_quota_check,
938         .quota_poll_check = client_quota_poll_check,
939         .quota_init     = osc_quota_init,
940         .quota_exit     = osc_quota_exit,
941         .quota_chkdq    = osc_quota_chkdq,
942         .quota_setdq    = osc_quota_setdq,
943         .quota_cleanup  = osc_quota_cleanup,
944         .quota_adjust_qunit = client_quota_adjust_qunit,
945 };
946
947 quota_interface_t lov_quota_interface = {
948         .quota_check    = lov_quota_check,
949         .quota_ctl      = lov_quota_ctl,
950         .quota_adjust_qunit = lov_quota_adjust_qunit,
951 };
952
953 #ifdef __KERNEL__
954
955 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
956
957 static int __init init_lustre_quota(void)
958 {
959 #ifdef HAVE_QUOTA_SUPPORT
960         int rc = 0;
961
962         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
963                                                 proc_lustre_root,
964                                                 NULL, NULL);
965         if (IS_ERR(lquota_type_proc_dir)) {
966                 CERROR("LProcFS failed in lquota-init\n");
967                 rc = PTR_ERR(lquota_type_proc_dir);
968                 return rc;
969         }
970
971         rc = qunit_cache_init();
972         if (rc)
973                 return rc;
974
975         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
976         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
977 #endif
978         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
979         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
980         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
981         return 0;
982 }
983
984 static void /*__exit*/ exit_lustre_quota(void)
985 {
986         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
987         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
988         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
989 #ifdef HAVE_QUOTA_SUPPORT
990         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
991         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
992
993         qunit_cache_cleanup();
994
995         if (lquota_type_proc_dir)
996                 lprocfs_remove(&lquota_type_proc_dir);
997 #endif
998 }
999
1000 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
1001 MODULE_DESCRIPTION("Lustre Quota");
1002 MODULE_LICENSE("GPL");
1003
1004 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
1005
1006 #ifdef HAVE_QUOTA_SUPPORT
1007 EXPORT_SYMBOL(mds_quota_interface);
1008 EXPORT_SYMBOL(filter_quota_interface);
1009 #endif
1010 EXPORT_SYMBOL(mdc_quota_interface);
1011 EXPORT_SYMBOL(osc_quota_interface);
1012 EXPORT_SYMBOL(lov_quota_interface);
1013 #endif /* __KERNEL */