Whamcloud - gitweb
LU-369 assert(t_watchdog) failed in quota_chk_acq_common()
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 # include <linux/jbd.h>
48 # include <linux/smp_lock.h>
49 # include <linux/buffer_head.h>
50 # include <linux/workqueue.h>
51 # include <linux/mount.h>
52 #else /* __KERNEL__ */
53 # include <liblustre.h>
54 #endif
55
56 #include <obd_class.h>
57 #include <lustre_mds.h>
58 #include <lustre_dlm.h>
59 #include <lustre_cfg.h>
60 #include <obd_ost.h>
61 #include <lustre_fsfilt.h>
62 #include <lustre_quota.h>
63 #include <lprocfs_status.h>
64 #include "quota_internal.h"
65
66 #ifdef __KERNEL__
67
68 #ifdef HAVE_QUOTA_SUPPORT
69
70 static cfs_time_t last_print = 0;
71 static cfs_spinlock_t last_print_lock = CFS_SPIN_LOCK_UNLOCKED;
72
73 static int filter_quota_setup(struct obd_device *obd)
74 {
75         int rc = 0;
76         struct obd_device_target *obt = &obd->u.obt;
77         ENTRY;
78
79         cfs_init_rwsem(&obt->obt_rwsem);
80         obt->obt_qfmt = LUSTRE_QUOTA_V2;
81         cfs_sema_init(&obt->obt_quotachecking, 1);
82         rc = qctxt_init(obd, NULL);
83         if (rc)
84                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
85
86         RETURN(rc);
87 }
88
89 static int filter_quota_cleanup(struct obd_device *obd)
90 {
91         ENTRY;
92         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
93         RETURN(0);
94 }
95
96 static int filter_quota_setinfo(struct obd_device *obd, void *data)
97 {
98         struct obd_export *exp = data;
99         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
100         struct obd_import *imp = exp->exp_imp_reverse;
101         ENTRY;
102
103         LASSERT(imp != NULL);
104
105         /* setup the quota context import */
106         cfs_spin_lock(&qctxt->lqc_lock);
107         if (qctxt->lqc_import != NULL) {
108                 cfs_spin_unlock(&qctxt->lqc_lock);
109                 if (qctxt->lqc_import == imp)
110                         CDEBUG(D_WARNING, "%s: lqc_import(%p) of obd(%p) was "
111                                "activated already.\n", obd->obd_name, imp, obd);
112                 else
113                         CERROR("%s: lqc_import(%p:%p) of obd(%p) was "
114                                "activated by others.\n", obd->obd_name,
115                                qctxt->lqc_import, imp, obd);
116         } else {
117                 qctxt->lqc_import = imp;
118                 /* make imp's connect flags equal relative exp's connect flags
119                  * adding it to avoid the scan export list */
120                 imp->imp_connect_data.ocd_connect_flags |=
121                                 (exp->exp_connect_flags &
122                                  (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
123                 cfs_spin_unlock(&qctxt->lqc_lock);
124                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated "
125                        "now.\n", obd->obd_name, imp, obd);
126
127                 cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
128                 /* start quota slave recovery thread. (release high limits) */
129                 qslave_start_recovery(obd, qctxt);
130         }
131         RETURN(0);
132 }
133
134 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
135 {
136         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
137         struct obd_import *imp = exp->exp_imp_reverse;
138         ENTRY;
139
140         /* lquota may be not set up before destroying export, b=14896 */
141         if (!obd->obd_set_up)
142                 RETURN(0);
143
144         if (unlikely(imp == NULL))
145                 RETURN(0);
146
147         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
148          * should be invalid b=12374 */
149         cfs_spin_lock(&qctxt->lqc_lock);
150         if (qctxt->lqc_import == imp) {
151                 qctxt->lqc_import = NULL;
152                 cfs_spin_unlock(&qctxt->lqc_lock);
153                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is invalid now.\n",
154                        obd->obd_name, imp, obd);
155                 ptlrpc_cleanup_imp(imp);
156                 dqacq_interrupt(qctxt);
157         } else {
158                 cfs_spin_unlock(&qctxt->lqc_lock);
159         }
160         RETURN(0);
161 }
162
163 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
164 {
165         ENTRY;
166
167         if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
168                 RETURN(0);
169
170         if (ignore) {
171                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
172                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
173         } else {
174                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
175         }
176
177         RETURN(0);
178 }
179
180 #define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
181 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
182 {
183         struct obd_device_target *obt = &obd->u.obt;
184         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
185         int err, cnt, rc = 0;
186         struct obd_quotactl *oqctl;
187         ENTRY;
188
189         if (!ll_sb_any_quota_active(obt->obt_sb))
190                 RETURN(0);
191
192         OBD_ALLOC_PTR(oqctl);
193         if (!oqctl)
194                 RETURN(-ENOMEM);
195
196         /* set over quota flags for a uid/gid */
197         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
198         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
199
200         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
201                 struct lustre_qunit_size *lqs = NULL;
202
203                 lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
204                                        qctxt, 0);
205                 if (lqs == NULL || IS_ERR(lqs)) {
206                         rc = PTR_ERR(lqs);
207                         if (rc)
208                                 CDEBUG(D_QUOTA, "search lqs for %s %d failed, "
209                                        "(rc = %d)\n",
210                                        cnt == USRQUOTA ? "user" : "group",
211                                        cnt == USRQUOTA ? oa->o_uid : oa->o_gid,
212                                        rc);
213                         break;
214                 } else {
215                         cfs_spin_lock(&lqs->lqs_lock);
216                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
217                                 oa->o_flags |= (cnt == USRQUOTA) ?
218                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
219                                 cfs_spin_unlock(&lqs->lqs_lock);
220                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
221                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
222                                        qctxt->lqc_sync_blk);
223                                 /* this is for quota_search_lqs */
224                                 lqs_putref(lqs);
225                                 continue;
226                         }
227                         cfs_spin_unlock(&lqs->lqs_lock);
228                         /* this is for quota_search_lqs */
229                         lqs_putref(lqs);
230                 }
231
232                 memset(oqctl, 0, sizeof(*oqctl));
233
234                 oqctl->qc_cmd = Q_GETQUOTA;
235                 oqctl->qc_type = cnt;
236                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
237                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
238                 if (err) {
239                         if (!rc)
240                                 rc = err;
241                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
242                                                              OBD_MD_FLGRPQUOTA);
243                         CDEBUG(D_QUOTA, "fsfilt getquota for %s %d failed, "
244                                "(rc = %d)\n",
245                                cnt == USRQUOTA ? "user" : "group",
246                                cnt == USRQUOTA ? oa->o_uid : oa->o_gid, err);
247                         continue;
248                 }
249
250                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
251                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
252                     oqctl->qc_dqblk.dqb_bhardlimit)) {
253                         oa->o_flags |= (cnt == USRQUOTA) ?
254                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
255                         CDEBUG(D_QUOTA, "out of quota for %s %d\n",
256                                cnt == USRQUOTA ? "user" : "group",
257                                cnt == USRQUOTA ? oa->o_uid : oa->o_gid);
258                 }
259         }
260         OBD_FREE_PTR(oqctl);
261         RETURN(rc);
262 }
263
264 /**
265  * check whether the left quota of certain uid and gid can satisfy a block_write
266  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
267  */
268 static int quota_check_common(struct obd_device *obd, const unsigned int id[],
269                               int pending[], int count, int cycle, int isblk,
270                               struct inode *inode, int frags)
271 {
272         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
273         int i;
274         struct qunit_data qdata[MAXQUOTAS];
275         int mb = 0;
276         int rc = 0, rc2[2] = { 0, 0 };
277         ENTRY;
278
279         cfs_spin_lock(&qctxt->lqc_lock);
280         if (!qctxt->lqc_valid){
281                 cfs_spin_unlock(&qctxt->lqc_lock);
282                 RETURN(rc);
283         }
284         cfs_spin_unlock(&qctxt->lqc_lock);
285
286         for (i = 0; i < MAXQUOTAS; i++) {
287                 struct lustre_qunit_size *lqs = NULL;
288
289                 qdata[i].qd_id = id[i];
290                 qdata[i].qd_flags = i;
291                 if (isblk)
292                         QDATA_SET_BLK(&qdata[i]);
293                 qdata[i].qd_count = 0;
294
295                 /* ignore root user */
296                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
297                         continue;
298
299                 lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
300                 if (lqs == NULL || IS_ERR(lqs))
301                         continue;
302
303                 if (IS_ERR(lqs)) {
304                         CERROR("can not find lqs for check_common: "
305                                "[id %u] [%c] [isblk %d] [count %d] [rc %ld]\n",
306                                id[i], i % 2 ? 'g': 'u', isblk, count,
307                                PTR_ERR(lqs));
308                         RETURN(PTR_ERR(lqs));
309                 }
310
311                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
312                 cfs_spin_lock(&lqs->lqs_lock);
313                 if (!cycle) {
314                         if (isblk) {
315                                 pending[i] = count * CFS_PAGE_SIZE;
316                                 /* in order to complete this write, we need extra
317                                  * meta blocks. This function can get it through
318                                  * data needed to be written b=16542 */
319                                 if (inode) {
320                                         mb = pending[i];
321                                         rc = fsfilt_get_mblk(obd, qctxt->lqc_sb,
322                                                              &mb, inode,
323                                                              frags);
324                                         if (rc)
325                                                 CERROR("%s: can't get extra "
326                                                        "meta blocks\n",
327                                                        obd->obd_name);
328                                         else
329                                                 pending[i] += mb;
330                                 }
331                                 lqs->lqs_bwrite_pending += pending[i];
332                         } else {
333                                 pending[i] = count;
334                                 lqs->lqs_iwrite_pending += pending[i];
335                         }
336                 }
337
338                 /* if xx_rec < 0, that means quota are releasing,
339                  * and it may return before we use quota. So if
340                  * we find this situation, we assuming it has
341                  * returned b=18491 */
342                 if (isblk && lqs->lqs_blk_rec < 0) {
343                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
344                                 qdata[i].qd_count = 0;
345                         else
346                                 qdata[i].qd_count += lqs->lqs_blk_rec;
347                 }
348                 if (!isblk && lqs->lqs_ino_rec < 0) {
349                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
350                                 qdata[i].qd_count = 0;
351                         else
352                                 qdata[i].qd_count += lqs->lqs_ino_rec;
353                 }
354
355                 CDEBUG(D_QUOTA, "[id %u] [%c] [isblk %d] [count %d]"
356                        " [lqs pending: %lu] [qd_count: "LPU64"] [metablocks: %d]"
357                        " [pending: %d]\n", id[i], i % 2 ? 'g': 'u', isblk, count,
358                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
359                        qdata[i].qd_count, mb, pending[i]);
360                 if (rc2[i] == QUOTA_RET_OK) {
361                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
362                                 rc2[i] = QUOTA_RET_ACQUOTA;
363                         if (!isblk && qdata[i].qd_count <
364                             lqs->lqs_iwrite_pending)
365                                 rc2[i] = QUOTA_RET_ACQUOTA;
366                 }
367
368                 cfs_spin_unlock(&lqs->lqs_lock);
369
370                 if (lqs->lqs_blk_rec  < 0 &&
371                     qdata[i].qd_count <
372                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
373                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
374
375                 /* When cycle is zero, lqs_*_pending will be changed. We will
376                  * get reference of the lqs here and put reference of lqs in
377                  * quota_pending_commit b=14784 */
378                 if (!cycle)
379                         lqs_getref(lqs);
380
381                 /* this is for quota_search_lqs */
382                 lqs_putref(lqs);
383         }
384
385         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
386                 RETURN(QUOTA_RET_ACQUOTA);
387         else
388                 RETURN(rc);
389 }
390
391 int quota_is_set(struct obd_device *obd, const unsigned int id[], int flag)
392 {
393         struct lustre_qunit_size *lqs;
394         int i, q_set = 0;
395
396         if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
397                 RETURN(0);
398
399         for (i = 0; i < MAXQUOTAS; i++) {
400                 lqs = quota_search_lqs(LQS_KEY(i, id[i]),
401                                        &obd->u.obt.obt_qctxt, 0);
402                 if (lqs && !IS_ERR(lqs)) {
403                         if (lqs->lqs_flags & flag)
404                                 q_set = 1;
405                         lqs_putref(lqs);
406                 }
407         }
408
409         return q_set;
410 }
411
412 static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp,
413                                 const unsigned int id[], int pending[],
414                                 int count, quota_acquire acquire,
415                                 struct obd_trans_info *oti, int isblk,
416                                 struct inode *inode, int frags)
417 {
418         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
419         struct timeval work_start;
420         struct timeval work_end;
421         long timediff;
422         struct l_wait_info lwi = { 0 };
423         int rc = 0, cycle = 0, count_err = 1;
424         ENTRY;
425
426         if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET))
427                 RETURN(0);
428
429         if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
430                 /* If the client has been evicted or if it
431                  * timed out and tried to reconnect already,
432                  * abort the request immediately */
433                 RETURN(-ENOTCONN);
434
435         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
436         pending[USRQUOTA] = pending[GRPQUOTA] = 0;
437         /* Unfortunately, if quota master is too busy to handle the
438          * pre-dqacq in time and quota hash on ost is used up, we
439          * have to wait for the completion of in flight dqacq/dqrel,
440          * in order to get enough quota for write b=12588 */
441         cfs_gettimeofday(&work_start);
442         while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
443                                         inode, frags)) &
444                QUOTA_RET_ACQUOTA) {
445
446                 cfs_spin_lock(&qctxt->lqc_lock);
447                 if (!qctxt->lqc_import && oti) {
448                         cfs_spin_unlock(&qctxt->lqc_lock);
449                         LASSERT(oti->oti_thread);
450                         /* The recovery thread doesn't have watchdog
451                          * attached. LU-369 */
452                         if (oti->oti_thread->t_watchdog)
453                                 lc_watchdog_disable(oti->oti_thread->\
454                                                 t_watchdog);
455                         CDEBUG(D_QUOTA, "sleep for quota master\n");
456                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
457                                      &lwi);
458                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
459                         if (oti->oti_thread->t_watchdog)
460                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
461                                        CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
462                 } else {
463                         cfs_spin_unlock(&qctxt->lqc_lock);
464                 }
465
466                 cycle++;
467                 if (isblk)
468                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
469                 /* after acquire(), we should run quota_check_common again
470                  * so that we confirm there are enough quota to finish write */
471                 rc = acquire(obd, id, oti, isblk);
472
473                 /* please reference to dqacq_completion for the below */
474                 /* a new request is finished, try again */
475                 if (rc == QUOTA_REQ_RETURNED) {
476                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
477                         continue;
478                 }
479
480                 /* it is out of quota already */
481                 if (rc == -EDQUOT) {
482                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
483                         break;
484                 }
485
486                 /* Related quota has been disabled by master, but enabled by
487                  * slave, do not try again. */
488                 if (unlikely(rc == -ESRCH)) {
489                         CERROR("mismatched quota configuration, stop try.\n");
490                         break;
491                 }
492
493                 if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
494                         /* The client has been evicted or tried to
495                          * to reconnect already, abort the request */
496                         RETURN(-ENOTCONN);
497
498                 /* -EBUSY and others, wait a second and try again */
499                 if (rc < 0) {
500                         cfs_waitq_t        waitq;
501                         struct l_wait_info lwi;
502
503                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
504                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
505                                        CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
506                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
507                                count_err++);
508
509                         cfs_waitq_init(&waitq);
510                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
511                                           NULL);
512                         l_wait_event(waitq, 0, &lwi);
513                 }
514
515                 if (rc < 0 || cycle % 10 == 0) {
516                         cfs_spin_lock(&last_print_lock);
517                         if (last_print == 0 ||
518                             cfs_time_before((last_print + cfs_time_seconds(30)),
519                                             cfs_time_current())) {
520                                 last_print = cfs_time_current();
521                                 cfs_spin_unlock(&last_print_lock);
522                                 CWARN("still haven't managed to acquire quota "
523                                       "space from the quota master after %d "
524                                       "retries (err=%d, rc=%d)\n",
525                                       cycle, count_err - 1, rc);
526                         } else {
527                                 cfs_spin_unlock(&last_print_lock);
528                         }
529                 }
530
531                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
532                        cycle);
533         }
534         cfs_gettimeofday(&work_end);
535         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
536         lprocfs_counter_add(qctxt->lqc_stats,
537                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
538                                     LQUOTA_WAIT_FOR_CHK_INO,
539                             timediff);
540
541         if (rc > 0)
542                 rc = 0;
543         RETURN(rc);
544 }
545
546 /**
547  * when a block_write or inode_create rpc is finished, adjust the record for
548  * pending blocks and inodes
549  */
550 static int quota_pending_commit(struct obd_device *obd, const unsigned int id[],
551                                 int pending[], int isblk)
552 {
553         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
554         struct timeval work_start;
555         struct timeval work_end;
556         long timediff;
557         int i;
558         struct qunit_data qdata[MAXQUOTAS];
559         ENTRY;
560
561         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
562         CLASSERT(MAXQUOTAS < 4);
563         if (!ll_sb_any_quota_active(qctxt->lqc_sb))
564                 RETURN(0);
565
566         cfs_gettimeofday(&work_start);
567         for (i = 0; i < MAXQUOTAS; i++) {
568                 struct lustre_qunit_size *lqs = NULL;
569
570                 LASSERT(pending[i] >= 0);
571                 if (pending[i] == 0)
572                         continue;
573
574                 qdata[i].qd_id = id[i];
575                 qdata[i].qd_flags = i;
576                 if (isblk)
577                         QDATA_SET_BLK(&qdata[i]);
578                 qdata[i].qd_count = 0;
579
580                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
581                         continue;
582
583                 lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
584                 if (lqs == NULL || IS_ERR(lqs)) {
585                         CERROR("can not find lqs for pending_commit: "
586                                "[id %u] [%c] [pending %u] [isblk %d] (rc %ld), "
587                                "maybe cause unexpected lqs refcount error!\n",
588                                id[i], i ? 'g': 'u', pending[i], isblk,
589                                lqs ? PTR_ERR(lqs) : -1);
590                         continue;
591                 }
592
593                 cfs_spin_lock(&lqs->lqs_lock);
594                 if (isblk) {
595                         LASSERTF(lqs->lqs_bwrite_pending >= pending[i],
596                                  "there are too many blocks! [id %u] [%c] "
597                                  "[bwrite_pending %lu] [pending %u]\n",
598                                  id[i], i % 2 ? 'g' : 'u',
599                                  lqs->lqs_bwrite_pending, pending[i]);
600
601                         lqs->lqs_bwrite_pending -= pending[i];
602                 } else {
603                         LASSERTF(lqs->lqs_iwrite_pending >= pending[i],
604                                 "there are too many files! [id %u] [%c] "
605                                 "[iwrite_pending %lu] [pending %u]\n",
606                                 id[i], i % 2 ? 'g' : 'u',
607                                 lqs->lqs_iwrite_pending, pending[i]);
608
609                         lqs->lqs_iwrite_pending -= pending[i];
610                 }
611                 CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
612                        obd->obd_name,
613                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
614                        i, pending[i], isblk);
615                 cfs_spin_unlock(&lqs->lqs_lock);
616
617                 /* for quota_search_lqs in pending_commit */
618                 lqs_putref(lqs);
619                 /* for quota_search_lqs in quota_check */
620                 lqs_putref(lqs);
621         }
622         cfs_gettimeofday(&work_end);
623         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
624         lprocfs_counter_add(qctxt->lqc_stats,
625                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
626                                     LQUOTA_WAIT_FOR_COMMIT_INO,
627                             timediff);
628
629         RETURN(0);
630 }
631
632 static int mds_quota_init(void)
633 {
634         return lustre_dquot_init();
635 }
636
637 static int mds_quota_exit(void)
638 {
639         lustre_dquot_exit();
640         return 0;
641 }
642
643 static int mds_quota_setup(struct obd_device *obd)
644 {
645         struct obd_device_target *obt = &obd->u.obt;
646         struct mds_obd *mds = &obd->u.mds;
647         int rc;
648         ENTRY;
649
650         if (unlikely(mds->mds_quota)) {
651                 CWARN("try to reinitialize quota context!\n");
652                 RETURN(0);
653         }
654
655         cfs_init_rwsem(&obt->obt_rwsem);
656         obt->obt_qfmt = LUSTRE_QUOTA_V2;
657         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
658         cfs_sema_init(&obt->obt_quotachecking, 1);
659         /* initialize quota master and quota context */
660         cfs_init_rwsem(&mds->mds_qonoff_sem);
661         rc = qctxt_init(obd, dqacq_handler);
662         if (rc) {
663                 CERROR("%s: initialize quota context failed! (rc:%d)\n",
664                        obd->obd_name, rc);
665                 RETURN(rc);
666         }
667         mds->mds_quota = 1;
668         RETURN(rc);
669 }
670
671 static int mds_quota_cleanup(struct obd_device *obd)
672 {
673         ENTRY;
674         if (unlikely(!obd->u.mds.mds_quota))
675                 RETURN(0);
676
677         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
678         RETURN(0);
679 }
680
681 static int mds_quota_setinfo(struct obd_device *obd, void *data)
682 {
683         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
684         ENTRY;
685
686         if (unlikely(!obd->u.mds.mds_quota))
687                 RETURN(0);
688
689         if (data != NULL)
690                 QUOTA_MASTER_READY(qctxt);
691         else
692                 QUOTA_MASTER_UNREADY(qctxt);
693         RETURN(0);
694 }
695
696 static int mds_quota_fs_cleanup(struct obd_device *obd)
697 {
698         struct mds_obd *mds = &obd->u.mds;
699         struct obd_quotactl oqctl;
700         ENTRY;
701
702         if (unlikely(!mds->mds_quota))
703                 RETURN(0);
704
705         mds->mds_quota = 0;
706         memset(&oqctl, 0, sizeof(oqctl));
707         oqctl.qc_type = UGQUOTA;
708
709         cfs_down_write(&mds->mds_qonoff_sem);
710         mds_admin_quota_off(obd, &oqctl);
711         cfs_up_write(&mds->mds_qonoff_sem);
712         RETURN(0);
713 }
714
715 static int quota_acquire_common(struct obd_device *obd, const unsigned int id[],
716                                 struct obd_trans_info *oti, int isblk)
717 {
718         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
719         int rc;
720         ENTRY;
721
722         rc = qctxt_adjust_qunit(obd, qctxt, id, isblk, 1, oti);
723         RETURN(rc);
724 }
725
726 #endif /* HAVE_QUOTA_SUPPORT */
727 #endif /* __KERNEL__ */
728
729 struct osc_quota_info {
730         cfs_list_t              oqi_hash;       /* hash list */
731         struct client_obd      *oqi_cli;        /* osc obd */
732         unsigned int            oqi_id;         /* uid/gid of a file */
733         short                   oqi_type;       /* quota type */
734 };
735
736 cfs_spinlock_t qinfo_list_lock = CFS_SPIN_LOCK_UNLOCKED;
737
738 static cfs_list_t qinfo_hash[NR_DQHASH];
739 /* SLAB cache for client quota context */
740 cfs_mem_cache_t *qinfo_cachep = NULL;
741
742 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
743                          __attribute__((__const__));
744
745 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
746 {
747         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
748         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
749         return tmp;
750 }
751
752 /* caller must hold qinfo_list_lock */
753 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
754 {
755         cfs_list_t *head = qinfo_hash +
756                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
757
758         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
759         cfs_list_add(&oqi->oqi_hash, head);
760 }
761
762 /* caller must hold qinfo_list_lock */
763 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
764 {
765         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
766         cfs_list_del_init(&oqi->oqi_hash);
767 }
768
769 /* caller must hold qinfo_list_lock */
770 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
771                                                 unsigned int id, int type)
772 {
773         unsigned int hashent = hashfn(cli, id, type);
774         struct osc_quota_info *oqi;
775         ENTRY;
776
777         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
778         cfs_list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
779                 if (oqi->oqi_cli == cli &&
780                     oqi->oqi_id == id && oqi->oqi_type == type)
781                         return oqi;
782         }
783         RETURN(NULL);
784 }
785
786 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
787                                           unsigned int id, int type)
788 {
789         struct osc_quota_info *oqi;
790         ENTRY;
791
792         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_IO, sizeof(*oqi));
793         if(!oqi)
794                 RETURN(NULL);
795
796         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
797         oqi->oqi_cli = cli;
798         oqi->oqi_id = id;
799         oqi->oqi_type = type;
800
801         RETURN(oqi);
802 }
803
804 static void free_qinfo(struct osc_quota_info *oqi)
805 {
806         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
807 }
808
809 int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
810 {
811         unsigned int id;
812         int cnt, rc = QUOTA_OK;
813         ENTRY;
814
815         cfs_spin_lock(&qinfo_list_lock);
816         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
817                 struct osc_quota_info *oqi = NULL;
818
819                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
820                 oqi = find_qinfo(cli, id, cnt);
821                 if (oqi) {
822                         rc = NO_QUOTA;
823                         break;
824                 }
825         }
826         cfs_spin_unlock(&qinfo_list_lock);
827
828         if (rc == NO_QUOTA)
829                 CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
830                        cnt == USRQUOTA ? "user" : "group", id);
831         RETURN(rc);
832 }
833
834 int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
835                     obd_flag valid, obd_flag flags)
836 {
837         unsigned int id;
838         obd_flag noquota;
839         int cnt, rc = 0;
840         ENTRY;
841
842
843         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
844                 struct osc_quota_info *oqi = NULL, *old;
845
846                 if (!(valid & ((cnt == USRQUOTA) ?
847                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
848                         continue;
849
850                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
851                 noquota = (cnt == USRQUOTA) ?
852                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
853
854                 if (noquota) {
855                         oqi = alloc_qinfo(cli, id, cnt);
856                         if (!oqi) {
857                                 rc = -ENOMEM;
858                                 CDEBUG(D_QUOTA, "setdq for %s %d failed, "
859                                        "(rc = %d)\n",
860                                        cnt == USRQUOTA ? "user" : "group",
861                                        id, rc);
862                                 break;
863                         }
864                 }
865
866                 cfs_spin_lock(&qinfo_list_lock);
867                 old = find_qinfo(cli, id, cnt);
868                 if (old && !noquota)
869                         remove_qinfo_hash(old);
870                 else if (!old && noquota)
871                         insert_qinfo_hash(oqi);
872                 cfs_spin_unlock(&qinfo_list_lock);
873
874                 if (old && !noquota)
875                         CDEBUG(D_QUOTA, "setdq to remove for %s %d\n",
876                                cnt == USRQUOTA ? "user" : "group", id);
877                 else if (!old && noquota)
878                         CDEBUG(D_QUOTA, "setdq to insert for %s %d\n",
879                                cnt == USRQUOTA ? "user" : "group", id);
880
881                 if (old) {
882                         if (noquota)
883                                 free_qinfo(oqi);
884                         else
885                                 free_qinfo(old);
886                 }
887         }
888
889         RETURN(rc);
890 }
891
892 int osc_quota_cleanup(struct obd_device *obd)
893 {
894         struct client_obd *cli = &obd->u.cli;
895         struct osc_quota_info *oqi, *n;
896         int i;
897         ENTRY;
898
899         cfs_spin_lock(&qinfo_list_lock);
900         for (i = 0; i < NR_DQHASH; i++) {
901                 cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
902                         if (oqi->oqi_cli != cli)
903                                 continue;
904                         remove_qinfo_hash(oqi);
905                         free_qinfo(oqi);
906                 }
907         }
908         cfs_spin_unlock(&qinfo_list_lock);
909
910         RETURN(0);
911 }
912
913 int osc_quota_init(void)
914 {
915         int i;
916         ENTRY;
917
918         LASSERT(qinfo_cachep == NULL);
919         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
920                                             sizeof(struct osc_quota_info),
921                                             0, 0);
922         if (!qinfo_cachep)
923                 RETURN(-ENOMEM);
924
925         for (i = 0; i < NR_DQHASH; i++)
926                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
927
928         RETURN(0);
929 }
930
931 int osc_quota_exit(void)
932 {
933         struct osc_quota_info *oqi, *n;
934         int i, rc;
935         ENTRY;
936
937         cfs_spin_lock(&qinfo_list_lock);
938         for (i = 0; i < NR_DQHASH; i++) {
939                 cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
940                         remove_qinfo_hash(oqi);
941                         free_qinfo(oqi);
942                 }
943         }
944         cfs_spin_unlock(&qinfo_list_lock);
945
946         rc = cfs_mem_cache_destroy(qinfo_cachep);
947         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
948         qinfo_cachep = NULL;
949
950         RETURN(0);
951 }
952
953 #ifdef __KERNEL__
954 #ifdef HAVE_QUOTA_SUPPORT
955 quota_interface_t mds_quota_interface = {
956         .quota_init     = mds_quota_init,
957         .quota_exit     = mds_quota_exit,
958         .quota_setup    = mds_quota_setup,
959         .quota_cleanup  = mds_quota_cleanup,
960         .quota_check    = target_quota_check,
961         .quota_ctl      = mds_quota_ctl,
962         .quota_setinfo  = mds_quota_setinfo,
963         .quota_fs_cleanup = mds_quota_fs_cleanup,
964         .quota_recovery = mds_quota_recovery,
965         .quota_adjust   = mds_quota_adjust,
966         .quota_chkquota = quota_chk_acq_common,
967         .quota_acquire  = quota_acquire_common,
968         .quota_pending_commit = quota_pending_commit,
969 };
970
971 quota_interface_t filter_quota_interface = {
972         .quota_setup    = filter_quota_setup,
973         .quota_cleanup  = filter_quota_cleanup,
974         .quota_check    = target_quota_check,
975         .quota_ctl      = filter_quota_ctl,
976         .quota_setinfo  = filter_quota_setinfo,
977         .quota_clearinfo = filter_quota_clearinfo,
978         .quota_enforce  = filter_quota_enforce,
979         .quota_getflag  = filter_quota_getflag,
980         .quota_acquire  = quota_acquire_common,
981         .quota_adjust   = filter_quota_adjust,
982         .quota_chkquota = quota_chk_acq_common,
983         .quota_adjust_qunit   = filter_quota_adjust_qunit,
984         .quota_pending_commit = quota_pending_commit,
985 };
986 #endif
987 #endif /* __KERNEL__ */
988
989 quota_interface_t mdc_quota_interface = {
990         .quota_ctl      = client_quota_ctl,
991         .quota_check    = client_quota_check,
992         .quota_poll_check = client_quota_poll_check,
993 };
994
995 quota_interface_t lmv_quota_interface = {
996         .quota_ctl      = lmv_quota_ctl,
997         .quota_check    = lmv_quota_check,
998 };
999
1000 quota_interface_t osc_quota_interface = {
1001         .quota_ctl      = client_quota_ctl,
1002         .quota_check    = client_quota_check,
1003         .quota_poll_check = client_quota_poll_check,
1004         .quota_init     = osc_quota_init,
1005         .quota_exit     = osc_quota_exit,
1006         .quota_chkdq    = osc_quota_chkdq,
1007         .quota_setdq    = osc_quota_setdq,
1008         .quota_cleanup  = osc_quota_cleanup,
1009         .quota_adjust_qunit = client_quota_adjust_qunit,
1010 };
1011
1012 quota_interface_t lov_quota_interface = {
1013         .quota_ctl      = lov_quota_ctl,
1014         .quota_check    = lov_quota_check,
1015         .quota_adjust_qunit = lov_quota_adjust_qunit,
1016 };
1017
1018 #ifdef __KERNEL__
1019
1020 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
1021
1022 static int __init init_lustre_quota(void)
1023 {
1024 #ifdef HAVE_QUOTA_SUPPORT
1025         int rc = 0;
1026
1027         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
1028                                                 proc_lustre_root,
1029                                                 NULL, NULL);
1030         if (IS_ERR(lquota_type_proc_dir)) {
1031                 CERROR("LProcFS failed in lquota-init\n");
1032                 rc = PTR_ERR(lquota_type_proc_dir);
1033                 return rc;
1034         }
1035
1036         rc = qunit_cache_init();
1037         if (rc)
1038                 return rc;
1039
1040         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
1041         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
1042 #endif
1043         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
1044         PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
1045         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
1046         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
1047         return 0;
1048 }
1049
1050 static void /*__exit*/ exit_lustre_quota(void)
1051 {
1052         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
1053         PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
1054         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
1055         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
1056 #ifdef HAVE_QUOTA_SUPPORT
1057         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
1058         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
1059
1060         qunit_cache_cleanup();
1061
1062         if (lquota_type_proc_dir)
1063                 lprocfs_remove(&lquota_type_proc_dir);
1064 #endif
1065 }
1066
1067 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
1068 MODULE_DESCRIPTION("Lustre Quota");
1069 MODULE_LICENSE("GPL");
1070
1071 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
1072
1073 #ifdef HAVE_QUOTA_SUPPORT
1074 EXPORT_SYMBOL(mds_quota_interface);
1075 EXPORT_SYMBOL(filter_quota_interface);
1076 #endif
1077 EXPORT_SYMBOL(mdc_quota_interface);
1078 EXPORT_SYMBOL(lmv_quota_interface);
1079 EXPORT_SYMBOL(osc_quota_interface);
1080 EXPORT_SYMBOL(lov_quota_interface);
1081 #endif /* __KERNEL */