Whamcloud - gitweb
add 2.6.27 kernel support
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 # include <linux/jbd.h>
48 # include <linux/smp_lock.h>
49 # include <linux/buffer_head.h>
50 # include <linux/workqueue.h>
51 # include <linux/mount.h>
52 #else /* __KERNEL__ */
53 # include <liblustre.h>
54 #endif
55
56 #include <obd_class.h>
57 #include <lustre_mds.h>
58 #include <lustre_dlm.h>
59 #include <lustre_cfg.h>
60 #include <obd_ost.h>
61 #include <lustre_fsfilt.h>
62 #include <lustre_quota.h>
63 #include <lprocfs_status.h>
64 #include "quota_internal.h"
65
66 #ifdef __KERNEL__
67
68 #ifdef HAVE_QUOTA_SUPPORT
69
70 static cfs_time_t last_print = 0;
71 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
72
73 static int filter_quota_setup(struct obd_device *obd)
74 {
75         int rc = 0;
76         struct obd_device_target *obt = &obd->u.obt;
77         ENTRY;
78
79         init_rwsem(&obt->obt_rwsem);
80         obt->obt_qfmt = LUSTRE_QUOTA_V2;
81         sema_init(&obt->obt_quotachecking, 1);
82         rc = qctxt_init(obd, NULL);
83         if (rc)
84                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
85
86         RETURN(rc);
87 }
88
89 static int filter_quota_cleanup(struct obd_device *obd)
90 {
91         ENTRY;
92         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
93         RETURN(0);
94 }
95
96 static int filter_quota_setinfo(struct obd_device *obd, void *data)
97 {
98         struct obd_export *exp = data;
99         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
100         struct obd_import *imp = exp->exp_imp_reverse;
101         ENTRY;
102
103         LASSERT(imp != NULL);
104
105         /* setup the quota context import */
106         spin_lock(&qctxt->lqc_lock);
107         if (qctxt->lqc_import != NULL) {
108                 spin_unlock(&qctxt->lqc_lock);
109                 if (qctxt->lqc_import == imp)
110                         CDEBUG(D_WARNING, "%s: lqc_import(%p) of obd(%p) was "
111                                "activated already.\n", obd->obd_name, imp, obd);
112                 else
113                         CDEBUG(D_ERROR, "%s: lqc_import(%p:%p) of obd(%p) was "
114                                "activated by others.\n", obd->obd_name,
115                                qctxt->lqc_import, imp, obd);
116         } else {
117                 qctxt->lqc_import = imp;
118                 /* make imp's connect flags equal relative exp's connect flags
119                  * adding it to avoid the scan export list */
120                 imp->imp_connect_data.ocd_connect_flags |=
121                                 (exp->exp_connect_flags &
122                                  (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
123                 spin_unlock(&qctxt->lqc_lock);
124                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated "
125                        "now.\n", obd->obd_name, imp, obd);
126
127                 cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
128                 /* start quota slave recovery thread. (release high limits) */
129                 qslave_start_recovery(obd, qctxt);
130         }
131         RETURN(0);
132 }
133
134 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
135 {
136         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
137         struct obd_import *imp = exp->exp_imp_reverse;
138         ENTRY;
139
140         /* lquota may be not set up before destroying export, b=14896 */
141         if (!obd->obd_set_up)
142                 RETURN(0);
143
144         if (unlikely(imp == NULL))
145                 RETURN(0);
146
147         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
148          * should be invalid b=12374 */
149         spin_lock(&qctxt->lqc_lock);
150         if (qctxt->lqc_import == imp) {
151                 qctxt->lqc_import = NULL;
152                 spin_unlock(&qctxt->lqc_lock);
153                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is invalid now.\n",
154                        obd->obd_name, imp, obd);
155                 ptlrpc_cleanup_imp(imp);
156                 dqacq_interrupt(qctxt);
157         } else {
158                 spin_unlock(&qctxt->lqc_lock);
159         }
160         RETURN(0);
161 }
162
163 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
164 {
165         ENTRY;
166
167         if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
168                 RETURN(0);
169
170         if (ignore) {
171                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
172                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
173         } else {
174                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
175         }
176
177         RETURN(0);
178 }
179
180 #define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
181 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
182 {
183         struct obd_device_target *obt = &obd->u.obt;
184         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
185         int err, cnt, rc = 0;
186         struct obd_quotactl *oqctl;
187         ENTRY;
188
189         if (!ll_sb_any_quota_active(obt->obt_sb))
190                 RETURN(0);
191
192         OBD_ALLOC_PTR(oqctl);
193         if (!oqctl) {
194                 CERROR("Not enough memory!");
195                 RETURN(-ENOMEM);
196         }
197
198         /* set over quota flags for a uid/gid */
199         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
200         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
201
202         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
203                 struct lustre_qunit_size *lqs = NULL;
204
205                 lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
206                                        qctxt, 0);
207                 if (lqs == NULL || IS_ERR(lqs)) {
208                         rc = PTR_ERR(lqs);
209                         break;
210                 } else {
211                         spin_lock(&lqs->lqs_lock);
212                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
213                                 oa->o_flags |= (cnt == USRQUOTA) ?
214                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
215                                 spin_unlock(&lqs->lqs_lock);
216                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
217                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
218                                        qctxt->lqc_sync_blk);
219                                 /* this is for quota_search_lqs */
220                                 lqs_putref(lqs);
221                                 continue;
222                         }
223                         spin_unlock(&lqs->lqs_lock);
224                         /* this is for quota_search_lqs */
225                         lqs_putref(lqs);
226                 }
227
228                 memset(oqctl, 0, sizeof(*oqctl));
229
230                 oqctl->qc_cmd = Q_GETQUOTA;
231                 oqctl->qc_type = cnt;
232                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
233                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
234                 if (err) {
235                         if (!rc)
236                                 rc = err;
237                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
238                                                              OBD_MD_FLGRPQUOTA);
239                         continue;
240                 }
241
242                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
243                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
244                     oqctl->qc_dqblk.dqb_bhardlimit))
245                         oa->o_flags |= (cnt == USRQUOTA) ?
246                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
247         }
248         OBD_FREE_PTR(oqctl);
249         RETURN(rc);
250 }
251
252 /**
253  * check whether the left quota of certain uid and gid can satisfy a block_write
254  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
255  */
256 static int quota_check_common(struct obd_device *obd, const unsigned int id[],
257                               int pending[], int count, int cycle, int isblk,
258                               struct inode *inode, int frags)
259 {
260         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
261         int i;
262         struct qunit_data qdata[MAXQUOTAS];
263         int mb = 0;
264         int rc = 0, rc2[2] = { 0, 0 };
265         ENTRY;
266
267         spin_lock(&qctxt->lqc_lock);
268         if (!qctxt->lqc_valid){
269                 spin_unlock(&qctxt->lqc_lock);
270                 RETURN(rc);
271         }
272         spin_unlock(&qctxt->lqc_lock);
273
274         for (i = 0; i < MAXQUOTAS; i++) {
275                 struct lustre_qunit_size *lqs = NULL;
276
277                 qdata[i].qd_id = id[i];
278                 qdata[i].qd_flags = i;
279                 if (isblk)
280                         QDATA_SET_BLK(&qdata[i]);
281                 qdata[i].qd_count = 0;
282
283                 /* ignore root user */
284                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
285                         continue;
286
287                 lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
288                 if (lqs == NULL || IS_ERR(lqs))
289                         continue;
290
291                 if (IS_ERR(lqs)) {
292                         CERROR("can not find lqs for check_common: "
293                                "[id %u] [%c] [isblk %d] [count %d] [rc %ld]\n",
294                                id[i], i % 2 ? 'g': 'u', isblk, count,
295                                PTR_ERR(lqs));
296                         RETURN(PTR_ERR(lqs));
297                 }
298
299                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
300                 spin_lock(&lqs->lqs_lock);
301                 if (!cycle) {
302                         if (isblk) {
303                                 pending[i] = count * CFS_PAGE_SIZE;
304                                 /* in order to complete this write, we need extra
305                                  * meta blocks. This function can get it through
306                                  * data needed to be written b=16542 */
307                                 if (inode) {
308                                         mb = pending[i];
309                                         rc = fsfilt_get_mblk(obd, qctxt->lqc_sb,
310                                                              &mb, inode,frags);
311                                         if (rc)
312                                                 CDEBUG(D_ERROR,
313                                                        "can't get extra "
314                                                        "meta blocks.\n");
315                                         else
316                                                 pending[i] += mb;
317                                 }
318                                 lqs->lqs_bwrite_pending += pending[i];
319                         } else {
320                                 pending[i] = count;
321                                 lqs->lqs_iwrite_pending += pending[i];
322                         }
323                 }
324
325                 /* if xx_rec < 0, that means quota are releasing,
326                  * and it may return before we use quota. So if
327                  * we find this situation, we assuming it has
328                  * returned b=18491 */
329                 if (isblk && lqs->lqs_blk_rec < 0) {
330                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
331                                 qdata[i].qd_count = 0;
332                         else
333                                 qdata[i].qd_count += lqs->lqs_blk_rec;
334                 }
335                 if (!isblk && lqs->lqs_ino_rec < 0) {
336                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
337                                 qdata[i].qd_count = 0;
338                         else
339                                 qdata[i].qd_count += lqs->lqs_ino_rec;
340                 }
341
342                 CDEBUG(D_QUOTA, "[id %u] [%c] [isblk %d] [count %d]"
343                        " [lqs pending: %lu] [qd_count: "LPU64"] [metablocks: %d]"
344                        " [pending: %d]\n", id[i], i % 2 ? 'g': 'u', isblk, count,
345                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
346                        qdata[i].qd_count, mb, pending[i]);
347                 if (rc2[i] == QUOTA_RET_OK) {
348                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
349                                 rc2[i] = QUOTA_RET_ACQUOTA;
350                         if (!isblk && qdata[i].qd_count <
351                             lqs->lqs_iwrite_pending)
352                                 rc2[i] = QUOTA_RET_ACQUOTA;
353                 }
354
355                 spin_unlock(&lqs->lqs_lock);
356
357                 if (lqs->lqs_blk_rec  < 0 &&
358                     qdata[i].qd_count <
359                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
360                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
361
362                 /* When cycle is zero, lqs_*_pending will be changed. We will
363                  * get reference of the lqs here and put reference of lqs in
364                  * quota_pending_commit b=14784 */
365                 if (!cycle)
366                         lqs_getref(lqs);
367
368                 /* this is for quota_search_lqs */
369                 lqs_putref(lqs);
370         }
371
372         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
373                 RETURN(QUOTA_RET_ACQUOTA);
374         else
375                 RETURN(rc);
376 }
377
378 int quota_is_set(struct obd_device *obd, const unsigned int id[], int flag)
379 {
380         struct lustre_qunit_size *lqs;
381         int i, q_set = 0;
382
383         if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
384                 RETURN(0);
385
386         for (i = 0; i < MAXQUOTAS; i++) {
387                 lqs = quota_search_lqs(LQS_KEY(i, id[i]),
388                                        &obd->u.obt.obt_qctxt, 0);
389                 if (lqs && !IS_ERR(lqs)) {
390                         if (lqs->lqs_flags & flag)
391                                 q_set = 1;
392                         lqs_putref(lqs);
393                 }
394         }
395
396         return q_set;
397 }
398
399 static int quota_chk_acq_common(struct obd_device *obd, const unsigned int id[],
400                                 int pending[], int count, quota_acquire acquire,
401                                 struct obd_trans_info *oti, int isblk,
402                                 struct inode *inode, int frags)
403 {
404         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
405         struct timeval work_start;
406         struct timeval work_end;
407         long timediff;
408         struct l_wait_info lwi = { 0 };
409         int rc = 0, cycle = 0, count_err = 1;
410         ENTRY;
411
412         if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET))
413                 RETURN(0);
414
415         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
416         pending[USRQUOTA] = pending[GRPQUOTA] = 0;
417         /* Unfortunately, if quota master is too busy to handle the
418          * pre-dqacq in time and quota hash on ost is used up, we
419          * have to wait for the completion of in flight dqacq/dqrel,
420          * in order to get enough quota for write b=12588 */
421         do_gettimeofday(&work_start);
422         while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
423                                         inode, frags)) &
424                QUOTA_RET_ACQUOTA) {
425
426                 spin_lock(&qctxt->lqc_lock);
427                 if (!qctxt->lqc_import && oti) {
428                         spin_unlock(&qctxt->lqc_lock);
429
430                         LASSERT(oti && oti->oti_thread &&
431                                 oti->oti_thread->t_watchdog);
432
433                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
434                         CDEBUG(D_QUOTA, "sleep for quota master\n");
435                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
436                                      &lwi);
437                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
438                         lc_watchdog_touch(oti->oti_thread->t_watchdog,
439                                  GET_TIMEOUT(oti->oti_thread->t_svc));
440                 } else {
441                         spin_unlock(&qctxt->lqc_lock);
442                 }
443
444                 cycle++;
445                 if (isblk)
446                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
447                 /* after acquire(), we should run quota_check_common again
448                  * so that we confirm there are enough quota to finish write */
449                 rc = acquire(obd, id, oti, isblk);
450
451                 /* please reference to dqacq_completion for the below */
452                 /* a new request is finished, try again */
453                 if (rc == QUOTA_REQ_RETURNED) {
454                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
455                         continue;
456                 }
457
458                 /* it is out of quota already */
459                 if (rc == -EDQUOT) {
460                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
461                         break;
462                 }
463
464                 /* Related quota has been disabled by master, but enabled by
465                  * slave, do not try again. */
466                 if (unlikely(rc == -ESRCH)) {
467                         CERROR("mismatched quota configuration, stop try.\n");
468                         break;
469                 }
470
471                 /* -EBUSY and others, wait a second and try again */
472                 if (rc < 0) {
473                         cfs_waitq_t        waitq;
474                         struct l_wait_info lwi;
475
476                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
477                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
478                                          GET_TIMEOUT(oti->oti_thread->t_svc));
479                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
480                                count_err++);
481
482                         init_waitqueue_head(&waitq);
483                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
484                                           NULL);
485                         l_wait_event(waitq, 0, &lwi);
486                 }
487
488                 if (rc < 0 || cycle % 10 == 0) {
489                         spin_lock(&last_print_lock);
490                         if (last_print == 0 ||
491                             cfs_time_before((last_print + cfs_time_seconds(30)),
492                                             cfs_time_current())) {
493                                 last_print = cfs_time_current();
494                                 spin_unlock(&last_print_lock);
495                                 CWARN("still haven't managed to acquire quota "
496                                       "space from the quota master after %d "
497                                       "retries (err=%d, rc=%d)\n",
498                                       cycle, count_err - 1, rc);
499                         } else {
500                                 spin_unlock(&last_print_lock);
501                         }
502                 }
503
504                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
505                        cycle);
506         }
507         do_gettimeofday(&work_end);
508         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
509         lprocfs_counter_add(qctxt->lqc_stats,
510                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
511                                     LQUOTA_WAIT_FOR_CHK_INO,
512                             timediff);
513
514         RETURN(rc);
515 }
516
517 /**
518  * when a block_write or inode_create rpc is finished, adjust the record for
519  * pending blocks and inodes
520  */
521 static int quota_pending_commit(struct obd_device *obd, const unsigned int id[],
522                                 int pending[], int isblk)
523 {
524         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
525         struct timeval work_start;
526         struct timeval work_end;
527         long timediff;
528         int i;
529         struct qunit_data qdata[MAXQUOTAS];
530         ENTRY;
531
532         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
533         CLASSERT(MAXQUOTAS < 4);
534         if (!ll_sb_any_quota_active(qctxt->lqc_sb))
535                 RETURN(0);
536
537         do_gettimeofday(&work_start);
538         for (i = 0; i < MAXQUOTAS; i++) {
539                 struct lustre_qunit_size *lqs = NULL;
540
541                 LASSERT(pending[i] >= 0);
542                 if (pending[i] == 0)
543                         continue;
544
545                 qdata[i].qd_id = id[i];
546                 qdata[i].qd_flags = i;
547                 if (isblk)
548                         QDATA_SET_BLK(&qdata[i]);
549                 qdata[i].qd_count = 0;
550
551                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
552                         continue;
553
554                 lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
555                 if (lqs == NULL || IS_ERR(lqs)) {
556                         CERROR("can not find lqs for pending_commit: "
557                                "[id %u] [%c] [pending %u] [isblk %d] (rc %ld), "
558                                "maybe cause unexpected lqs refcount error!\n",
559                                id[i], i ? 'g': 'u', pending[i], isblk,
560                                lqs ? PTR_ERR(lqs) : -1);
561                         continue;
562                 }
563
564                 spin_lock(&lqs->lqs_lock);
565                 if (isblk) {
566                         LASSERTF(lqs->lqs_bwrite_pending >= pending[i],
567                                  "there are too many blocks! [id %u] [%c] "
568                                  "[bwrite_pending %lu] [pending %u]\n",
569                                  id[i], i % 2 ? 'g' : 'u',
570                                  lqs->lqs_bwrite_pending, pending[i]);
571
572                         lqs->lqs_bwrite_pending -= pending[i];
573                 } else {
574                         LASSERTF(lqs->lqs_iwrite_pending >= pending[i],
575                                 "there are too many files! [id %u] [%c] "
576                                 "[iwrite_pending %lu] [pending %u]\n",
577                                 id[i], i % 2 ? 'g' : 'u',
578                                 lqs->lqs_iwrite_pending, pending[i]);
579
580                         lqs->lqs_iwrite_pending -= pending[i];
581                 }
582                 CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
583                        obd->obd_name,
584                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
585                        i, pending[i], isblk);
586                 spin_unlock(&lqs->lqs_lock);
587
588                 /* for quota_search_lqs in pending_commit */
589                 lqs_putref(lqs);
590                 /* for quota_search_lqs in quota_check */
591                 lqs_putref(lqs);
592         }
593         do_gettimeofday(&work_end);
594         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
595         lprocfs_counter_add(qctxt->lqc_stats,
596                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
597                                     LQUOTA_WAIT_FOR_COMMIT_INO,
598                             timediff);
599
600         RETURN(0);
601 }
602
603 static int mds_quota_init(void)
604 {
605         return lustre_dquot_init();
606 }
607
608 static int mds_quota_exit(void)
609 {
610         lustre_dquot_exit();
611         return 0;
612 }
613
614 static int mds_quota_setup(struct obd_device *obd)
615 {
616         struct obd_device_target *obt = &obd->u.obt;
617         struct mds_obd *mds = &obd->u.mds;
618         int rc;
619         ENTRY;
620
621         if (unlikely(mds->mds_quota)) {
622                 CWARN("try to reinitialize quota context!\n");
623                 RETURN(0);
624         }
625
626         init_rwsem(&obt->obt_rwsem);
627         obt->obt_qfmt = LUSTRE_QUOTA_V2;
628         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
629         sema_init(&obt->obt_quotachecking, 1);
630         /* initialize quota master and quota context */
631         sema_init(&mds->mds_qonoff_sem, 1);
632         rc = qctxt_init(obd, dqacq_handler);
633         if (rc) {
634                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
635                 RETURN(rc);
636         }
637         mds->mds_quota = 1;
638         RETURN(rc);
639 }
640
641 static int mds_quota_cleanup(struct obd_device *obd)
642 {
643         ENTRY;
644         if (unlikely(!obd->u.mds.mds_quota))
645                 RETURN(0);
646
647         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
648         RETURN(0);
649 }
650
651 static int mds_quota_setinfo(struct obd_device *obd, void *data)
652 {
653         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
654         ENTRY;
655
656         if (unlikely(!obd->u.mds.mds_quota))
657                 RETURN(0);
658
659         if (data != NULL)
660                 QUOTA_MASTER_READY(qctxt);
661         else
662                 QUOTA_MASTER_UNREADY(qctxt);
663         RETURN(0);
664 }
665
666 static int mds_quota_fs_cleanup(struct obd_device *obd)
667 {
668         struct mds_obd *mds = &obd->u.mds;
669         struct obd_quotactl oqctl;
670         ENTRY;
671
672         if (unlikely(!mds->mds_quota))
673                 RETURN(0);
674
675         mds->mds_quota = 0;
676         memset(&oqctl, 0, sizeof(oqctl));
677         oqctl.qc_type = UGQUOTA;
678
679         down(&mds->mds_qonoff_sem);
680         mds_admin_quota_off(obd, &oqctl);
681         up(&mds->mds_qonoff_sem);
682         RETURN(0);
683 }
684
685 static int quota_acquire_common(struct obd_device *obd, const unsigned int id[],
686                                 struct obd_trans_info *oti, int isblk)
687 {
688         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
689         int rc;
690         ENTRY;
691
692         rc = qctxt_adjust_qunit(obd, qctxt, id, isblk, 1, oti);
693         RETURN(rc);
694 }
695
696 #endif /* HAVE_QUOTA_SUPPORT */
697 #endif /* __KERNEL__ */
698
699 struct osc_quota_info {
700         struct list_head        oqi_hash;       /* hash list */
701         struct client_obd      *oqi_cli;        /* osc obd */
702         unsigned int            oqi_id;         /* uid/gid of a file */
703         short                   oqi_type;       /* quota type */
704 };
705
706 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
707
708 static struct list_head qinfo_hash[NR_DQHASH];
709 /* SLAB cache for client quota context */
710 cfs_mem_cache_t *qinfo_cachep = NULL;
711
712 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
713                          __attribute__((__const__));
714
715 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
716 {
717         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
718         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
719         return tmp;
720 }
721
722 /* caller must hold qinfo_list_lock */
723 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
724 {
725         struct list_head *head = qinfo_hash +
726                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
727
728         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
729         list_add(&oqi->oqi_hash, head);
730 }
731
732 /* caller must hold qinfo_list_lock */
733 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
734 {
735         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
736         list_del_init(&oqi->oqi_hash);
737 }
738
739 /* caller must hold qinfo_list_lock */
740 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
741                                                 unsigned int id, int type)
742 {
743         unsigned int hashent = hashfn(cli, id, type);
744         struct osc_quota_info *oqi;
745         ENTRY;
746
747         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
748         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
749                 if (oqi->oqi_cli == cli &&
750                     oqi->oqi_id == id && oqi->oqi_type == type)
751                         return oqi;
752         }
753         RETURN(NULL);
754 }
755
756 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
757                                           unsigned int id, int type)
758 {
759         struct osc_quota_info *oqi;
760         ENTRY;
761
762         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
763         if(!oqi)
764                 RETURN(NULL);
765
766         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
767         oqi->oqi_cli = cli;
768         oqi->oqi_id = id;
769         oqi->oqi_type = type;
770
771         RETURN(oqi);
772 }
773
774 static void free_qinfo(struct osc_quota_info *oqi)
775 {
776         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
777 }
778
779 int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
780 {
781         unsigned int id;
782         int cnt, rc = QUOTA_OK;
783         ENTRY;
784
785         spin_lock(&qinfo_list_lock);
786         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
787                 struct osc_quota_info *oqi = NULL;
788
789                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
790                 oqi = find_qinfo(cli, id, cnt);
791                 if (oqi) {
792                         rc = NO_QUOTA;
793                         break;
794                 }
795         }
796         spin_unlock(&qinfo_list_lock);
797
798         RETURN(rc);
799 }
800
801 int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
802                     obd_flag valid, obd_flag flags)
803 {
804         unsigned int id;
805         obd_flag noquota;
806         int cnt, rc = 0;
807         ENTRY;
808
809
810         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
811                 struct osc_quota_info *oqi, *old;
812
813                 if (!(valid & ((cnt == USRQUOTA) ?
814                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
815                         continue;
816
817                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
818                 noquota = (cnt == USRQUOTA) ?
819                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
820
821                 oqi = alloc_qinfo(cli, id, cnt);
822                 if (oqi) {
823                         spin_lock(&qinfo_list_lock);
824
825                         old = find_qinfo(cli, id, cnt);
826                         if (old && !noquota)
827                                 remove_qinfo_hash(old);
828                         else if (!old && noquota)
829                                 insert_qinfo_hash(oqi);
830
831                         spin_unlock(&qinfo_list_lock);
832
833                         if (old || !noquota)
834                                 free_qinfo(oqi);
835                         if (old && !noquota)
836                                 free_qinfo(old);
837                 } else {
838                         CERROR("not enough mem!\n");
839                         rc = -ENOMEM;
840                         break;
841                 }
842         }
843
844         RETURN(rc);
845 }
846
847 int osc_quota_cleanup(struct obd_device *obd)
848 {
849         struct client_obd *cli = &obd->u.cli;
850         struct osc_quota_info *oqi, *n;
851         int i;
852         ENTRY;
853
854         spin_lock(&qinfo_list_lock);
855         for (i = 0; i < NR_DQHASH; i++) {
856                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
857                         if (oqi->oqi_cli != cli)
858                                 continue;
859                         remove_qinfo_hash(oqi);
860                         free_qinfo(oqi);
861                 }
862         }
863         spin_unlock(&qinfo_list_lock);
864
865         RETURN(0);
866 }
867
868 int osc_quota_init(void)
869 {
870         int i;
871         ENTRY;
872
873         LASSERT(qinfo_cachep == NULL);
874         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
875                                             sizeof(struct osc_quota_info),
876                                             0, 0);
877         if (!qinfo_cachep)
878                 RETURN(-ENOMEM);
879
880         for (i = 0; i < NR_DQHASH; i++)
881                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
882
883         RETURN(0);
884 }
885
886 int osc_quota_exit(void)
887 {
888         struct osc_quota_info *oqi, *n;
889         int i, rc;
890         ENTRY;
891
892         spin_lock(&qinfo_list_lock);
893         for (i = 0; i < NR_DQHASH; i++) {
894                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
895                         remove_qinfo_hash(oqi);
896                         free_qinfo(oqi);
897                 }
898         }
899         spin_unlock(&qinfo_list_lock);
900
901         rc = cfs_mem_cache_destroy(qinfo_cachep);
902         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
903         qinfo_cachep = NULL;
904
905         RETURN(0);
906 }
907
908 #ifdef __KERNEL__
909 #ifdef HAVE_QUOTA_SUPPORT
910 quota_interface_t mds_quota_interface = {
911         .quota_init     = mds_quota_init,
912         .quota_exit     = mds_quota_exit,
913         .quota_setup    = mds_quota_setup,
914         .quota_cleanup  = mds_quota_cleanup,
915         .quota_check    = target_quota_check,
916         .quota_ctl      = mds_quota_ctl,
917         .quota_setinfo  = mds_quota_setinfo,
918         .quota_fs_cleanup = mds_quota_fs_cleanup,
919         .quota_recovery = mds_quota_recovery,
920         .quota_adjust   = mds_quota_adjust,
921         .quota_chkquota = quota_chk_acq_common,
922         .quota_acquire  = quota_acquire_common,
923         .quota_pending_commit = quota_pending_commit,
924 };
925
926 quota_interface_t filter_quota_interface = {
927         .quota_setup    = filter_quota_setup,
928         .quota_cleanup  = filter_quota_cleanup,
929         .quota_check    = target_quota_check,
930         .quota_ctl      = filter_quota_ctl,
931         .quota_setinfo  = filter_quota_setinfo,
932         .quota_clearinfo = filter_quota_clearinfo,
933         .quota_enforce  = filter_quota_enforce,
934         .quota_getflag  = filter_quota_getflag,
935         .quota_acquire  = quota_acquire_common,
936         .quota_adjust   = filter_quota_adjust,
937         .quota_chkquota = quota_chk_acq_common,
938         .quota_adjust_qunit   = filter_quota_adjust_qunit,
939         .quota_pending_commit = quota_pending_commit,
940 };
941 #endif
942 #endif /* __KERNEL__ */
943
944 quota_interface_t mdc_quota_interface = {
945         .quota_ctl      = client_quota_ctl,
946         .quota_check    = client_quota_check,
947         .quota_poll_check = client_quota_poll_check,
948 };
949
950 quota_interface_t lmv_quota_interface = {
951         .quota_ctl      = lmv_quota_ctl,
952         .quota_check    = lmv_quota_check,
953 };
954
955 quota_interface_t osc_quota_interface = {
956         .quota_ctl      = client_quota_ctl,
957         .quota_check    = client_quota_check,
958         .quota_poll_check = client_quota_poll_check,
959         .quota_init     = osc_quota_init,
960         .quota_exit     = osc_quota_exit,
961         .quota_chkdq    = osc_quota_chkdq,
962         .quota_setdq    = osc_quota_setdq,
963         .quota_cleanup  = osc_quota_cleanup,
964         .quota_adjust_qunit = client_quota_adjust_qunit,
965 };
966
967 quota_interface_t lov_quota_interface = {
968         .quota_ctl      = lov_quota_ctl,
969         .quota_check    = lov_quota_check,
970         .quota_adjust_qunit = lov_quota_adjust_qunit,
971 };
972
973 #ifdef __KERNEL__
974
975 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
976
977 static int __init init_lustre_quota(void)
978 {
979 #ifdef HAVE_QUOTA_SUPPORT
980         int rc = 0;
981
982         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
983                                                 proc_lustre_root,
984                                                 NULL, NULL);
985         if (IS_ERR(lquota_type_proc_dir)) {
986                 CERROR("LProcFS failed in lquota-init\n");
987                 rc = PTR_ERR(lquota_type_proc_dir);
988                 return rc;
989         }
990
991         rc = qunit_cache_init();
992         if (rc)
993                 return rc;
994
995         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
996         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
997 #endif
998         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
999         PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
1000         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
1001         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
1002         return 0;
1003 }
1004
1005 static void /*__exit*/ exit_lustre_quota(void)
1006 {
1007         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
1008         PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
1009         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
1010         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
1011 #ifdef HAVE_QUOTA_SUPPORT
1012         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
1013         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
1014
1015         qunit_cache_cleanup();
1016
1017         if (lquota_type_proc_dir)
1018                 lprocfs_remove(&lquota_type_proc_dir);
1019 #endif
1020 }
1021
1022 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
1023 MODULE_DESCRIPTION("Lustre Quota");
1024 MODULE_LICENSE("GPL");
1025
1026 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
1027
1028 #ifdef HAVE_QUOTA_SUPPORT
1029 EXPORT_SYMBOL(mds_quota_interface);
1030 EXPORT_SYMBOL(filter_quota_interface);
1031 #endif
1032 EXPORT_SYMBOL(mdc_quota_interface);
1033 EXPORT_SYMBOL(lmv_quota_interface);
1034 EXPORT_SYMBOL(osc_quota_interface);
1035 EXPORT_SYMBOL(lov_quota_interface);
1036 #endif /* __KERNEL */