Whamcloud - gitweb
LU-1057 quota: speed up lookup in osc_quota_chkdq
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  *
32  * Copyright (c) 2011 Whamcloud, Inc.
33  *
34  */
35 /*
36  * This file is part of Lustre, http://www.lustre.org/
37  * Lustre is a trademark of Sun Microsystems, Inc.
38  */
39
40 #ifndef EXPORT_SYMTAB
41 # define EXPORT_SYMTAB
42 #endif
43 #define DEBUG_SUBSYSTEM S_LQUOTA
44
45 #ifdef __KERNEL__
46 # include <linux/version.h>
47 # include <linux/module.h>
48 # include <linux/init.h>
49 # include <linux/fs.h>
50 # include <linux/jbd.h>
51 # include <linux/smp_lock.h>
52 # include <linux/buffer_head.h>
53 # include <linux/workqueue.h>
54 # include <linux/mount.h>
55 #else /* __KERNEL__ */
56 # include <liblustre.h>
57 #endif
58
59 #include <obd_class.h>
60 #include <lustre_mds.h>
61 #include <lustre_dlm.h>
62 #include <lustre_cfg.h>
63 #include <obd_ost.h>
64 #include <lustre_fsfilt.h>
65 #include <lustre_quota.h>
66 #include <lprocfs_status.h>
67 #include "quota_internal.h"
68
69 #ifdef __KERNEL__
70
71 #ifdef HAVE_QUOTA_SUPPORT
72
73 static cfs_time_t last_print = 0;
74 static cfs_spinlock_t last_print_lock = CFS_SPIN_LOCK_UNLOCKED;
75
76 static int filter_quota_setup(struct obd_device *obd)
77 {
78         int rc = 0;
79         struct obd_device_target *obt = &obd->u.obt;
80         ENTRY;
81
82         cfs_init_rwsem(&obt->obt_rwsem);
83         obt->obt_qfmt = LUSTRE_QUOTA_V2;
84         cfs_sema_init(&obt->obt_quotachecking, 1);
85         rc = qctxt_init(obd, NULL);
86         if (rc)
87                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
88
89         RETURN(rc);
90 }
91
92 static int filter_quota_cleanup(struct obd_device *obd)
93 {
94         ENTRY;
95         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
96         RETURN(0);
97 }
98
99 static int filter_quota_setinfo(struct obd_device *obd, void *data)
100 {
101         struct obd_export *exp = data;
102         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
103         struct obd_import *imp = exp->exp_imp_reverse;
104         ENTRY;
105
106         LASSERT(imp != NULL);
107
108         /* setup the quota context import */
109         cfs_spin_lock(&qctxt->lqc_lock);
110         if (qctxt->lqc_import != NULL) {
111                 cfs_spin_unlock(&qctxt->lqc_lock);
112                 if (qctxt->lqc_import == imp)
113                         CDEBUG(D_WARNING, "%s: lqc_import(%p) of obd(%p) was "
114                                "activated already.\n", obd->obd_name, imp, obd);
115                 else
116                         CERROR("%s: lqc_import(%p:%p) of obd(%p) was "
117                                "activated by others.\n", obd->obd_name,
118                                qctxt->lqc_import, imp, obd);
119         } else {
120                 qctxt->lqc_import = imp;
121                 /* make imp's connect flags equal relative exp's connect flags
122                  * adding it to avoid the scan export list */
123                 imp->imp_connect_data.ocd_connect_flags |=
124                                 (exp->exp_connect_flags &
125                                  (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
126                 cfs_spin_unlock(&qctxt->lqc_lock);
127                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated "
128                        "now.\n", obd->obd_name, imp, obd);
129
130                 cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
131                 /* start quota slave recovery thread. (release high limits) */
132                 qslave_start_recovery(obd, qctxt);
133         }
134         RETURN(0);
135 }
136
137 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
138 {
139         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
140         struct obd_import *imp = exp->exp_imp_reverse;
141         ENTRY;
142
143         /* lquota may be not set up before destroying export, b=14896 */
144         if (!obd->obd_set_up)
145                 RETURN(0);
146
147         if (unlikely(imp == NULL))
148                 RETURN(0);
149
150         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
151          * should be invalid b=12374 */
152         cfs_spin_lock(&qctxt->lqc_lock);
153         if (qctxt->lqc_import == imp) {
154                 qctxt->lqc_import = NULL;
155                 cfs_spin_unlock(&qctxt->lqc_lock);
156                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is invalid now.\n",
157                        obd->obd_name, imp, obd);
158                 ptlrpc_cleanup_imp(imp);
159                 dqacq_interrupt(qctxt);
160         } else {
161                 cfs_spin_unlock(&qctxt->lqc_lock);
162         }
163         RETURN(0);
164 }
165
166 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
167 {
168         ENTRY;
169
170         if (!ll_sb_any_quota_active(obd->u.obt.obt_sb))
171                 RETURN(0);
172
173         if (ignore) {
174                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
175                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
176         } else {
177                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
178         }
179
180         RETURN(0);
181 }
182
183 #define GET_OA_ID(flag, oa) (flag == USRQUOTA ? oa->o_uid : oa->o_gid)
184 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
185 {
186         struct obd_device_target *obt = &obd->u.obt;
187         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
188         int err, cnt, rc = 0;
189         struct obd_quotactl *oqctl;
190         ENTRY;
191
192         if (!ll_sb_any_quota_active(obt->obt_sb))
193                 RETURN(0);
194
195         OBD_ALLOC_PTR(oqctl);
196         if (!oqctl)
197                 RETURN(-ENOMEM);
198
199         /* set over quota flags for a uid/gid */
200         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
201         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
202
203         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
204                 struct lustre_qunit_size *lqs = NULL;
205
206                 lqs = quota_search_lqs(LQS_KEY(cnt, GET_OA_ID(cnt, oa)),
207                                        qctxt, 0);
208                 if (IS_ERR(lqs)) {
209                         rc = PTR_ERR(lqs);
210                         CDEBUG(D_QUOTA, "search lqs for %s %d failed, "
211                                "(rc = %d)\n",
212                                cnt == USRQUOTA ? "user" : "group",
213                                GET_OA_ID(cnt, oa), rc);
214                         break;
215                 } else if (lqs == NULL) {
216                         /* continue to check group quota if the file's owner
217                          * doesn't have quota limit. LU-530 */
218                         continue;
219                 } else {
220                         cfs_spin_lock(&lqs->lqs_lock);
221                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
222                                 oa->o_flags |= (cnt == USRQUOTA) ?
223                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
224                                 cfs_spin_unlock(&lqs->lqs_lock);
225                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
226                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
227                                        qctxt->lqc_sync_blk);
228                                 /* this is for quota_search_lqs */
229                                 lqs_putref(lqs);
230                                 continue;
231                         }
232                         cfs_spin_unlock(&lqs->lqs_lock);
233                         /* this is for quota_search_lqs */
234                         lqs_putref(lqs);
235                 }
236
237                 memset(oqctl, 0, sizeof(*oqctl));
238
239                 oqctl->qc_cmd = Q_GETQUOTA;
240                 oqctl->qc_type = cnt;
241                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
242                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
243                 if (err) {
244                         if (!rc)
245                                 rc = err;
246                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
247                                                              OBD_MD_FLGRPQUOTA);
248                         CDEBUG(D_QUOTA, "fsfilt getquota for %s %d failed, "
249                                "(rc = %d)\n",
250                                cnt == USRQUOTA ? "user" : "group",
251                                cnt == USRQUOTA ? oa->o_uid : oa->o_gid, err);
252                         continue;
253                 }
254
255                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
256                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
257                     oqctl->qc_dqblk.dqb_bhardlimit)) {
258                         oa->o_flags |= (cnt == USRQUOTA) ?
259                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
260                         CDEBUG(D_QUOTA, "out of quota for %s %d\n",
261                                cnt == USRQUOTA ? "user" : "group",
262                                cnt == USRQUOTA ? oa->o_uid : oa->o_gid);
263                 }
264         }
265         OBD_FREE_PTR(oqctl);
266         RETURN(rc);
267 }
268
269 /**
270  * check whether the left quota of certain uid and gid can satisfy a block_write
271  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
272  */
273 static int quota_check_common(struct obd_device *obd, const unsigned int id[],
274                               int pending[], int count, int cycle, int isblk,
275                               struct inode *inode, int frags)
276 {
277         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
278         int i;
279         struct qunit_data qdata[MAXQUOTAS];
280         int mb = 0;
281         int rc = 0, rc2[2] = { 0, 0 };
282         ENTRY;
283
284         cfs_spin_lock(&qctxt->lqc_lock);
285         if (!qctxt->lqc_valid){
286                 cfs_spin_unlock(&qctxt->lqc_lock);
287                 RETURN(rc);
288         }
289         cfs_spin_unlock(&qctxt->lqc_lock);
290
291         for (i = 0; i < MAXQUOTAS; i++) {
292                 struct lustre_qunit_size *lqs = NULL;
293
294                 qdata[i].qd_id = id[i];
295                 qdata[i].qd_flags = i;
296                 if (isblk)
297                         QDATA_SET_BLK(&qdata[i]);
298                 qdata[i].qd_count = 0;
299
300                 /* ignore root user */
301                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
302                         continue;
303
304                 lqs = quota_search_lqs(LQS_KEY(i, id[i]), qctxt, 0);
305                 if (lqs == NULL || IS_ERR(lqs))
306                         continue;
307
308                 if (IS_ERR(lqs)) {
309                         CERROR("can not find lqs for check_common: "
310                                "[id %u] [%c] [isblk %d] [count %d] [rc %ld]\n",
311                                id[i], i % 2 ? 'g': 'u', isblk, count,
312                                PTR_ERR(lqs));
313                         RETURN(PTR_ERR(lqs));
314                 }
315
316                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
317                 cfs_spin_lock(&lqs->lqs_lock);
318                 if (!cycle) {
319                         if (isblk) {
320                                 pending[i] = count * CFS_PAGE_SIZE;
321                                 /* in order to complete this write, we need extra
322                                  * meta blocks. This function can get it through
323                                  * data needed to be written b=16542 */
324                                 if (inode) {
325                                         mb = pending[i];
326                                         rc = fsfilt_get_mblk(obd, qctxt->lqc_sb,
327                                                              &mb, inode,
328                                                              frags);
329                                         if (rc)
330                                                 CERROR("%s: can't get extra "
331                                                        "meta blocks\n",
332                                                        obd->obd_name);
333                                         else
334                                                 pending[i] += mb;
335                                 }
336                                 LASSERTF(pending[i] >= 0, "pending is not valid"
337                                          ", count=%d, mb=%d\n", count, mb);
338                                 lqs->lqs_bwrite_pending += pending[i];
339                         } else {
340                                 pending[i] = count;
341                                 lqs->lqs_iwrite_pending += pending[i];
342                         }
343                 }
344
345                 /* if xx_rec < 0, that means quota are releasing,
346                  * and it may return before we use quota. So if
347                  * we find this situation, we assuming it has
348                  * returned b=18491 */
349                 if (isblk && lqs->lqs_blk_rec < 0) {
350                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
351                                 qdata[i].qd_count = 0;
352                         else
353                                 qdata[i].qd_count += lqs->lqs_blk_rec;
354                 }
355                 if (!isblk && lqs->lqs_ino_rec < 0) {
356                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
357                                 qdata[i].qd_count = 0;
358                         else
359                                 qdata[i].qd_count += lqs->lqs_ino_rec;
360                 }
361
362                 CDEBUG(D_QUOTA, "[id %u] [%c] [isblk %d] [count %d]"
363                        " [lqs pending: %lu] [qd_count: "LPU64"] [metablocks: %d]"
364                        " [pending: %d]\n", id[i], i % 2 ? 'g': 'u', isblk, count,
365                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
366                        qdata[i].qd_count, mb, pending[i]);
367                 if (rc2[i] == QUOTA_RET_OK) {
368                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
369                                 rc2[i] = QUOTA_RET_ACQUOTA;
370                         if (!isblk && qdata[i].qd_count <
371                             lqs->lqs_iwrite_pending)
372                                 rc2[i] = QUOTA_RET_ACQUOTA;
373                 }
374
375                 cfs_spin_unlock(&lqs->lqs_lock);
376
377                 if (lqs->lqs_blk_rec  < 0 &&
378                     qdata[i].qd_count <
379                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
380                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
381
382                 /* When cycle is zero, lqs_*_pending will be changed. We will
383                  * get reference of the lqs here and put reference of lqs in
384                  * quota_pending_commit b=14784 */
385                 if (!cycle)
386                         lqs_getref(lqs);
387
388                 /* this is for quota_search_lqs */
389                 lqs_putref(lqs);
390         }
391
392         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
393                 RETURN(QUOTA_RET_ACQUOTA);
394         else
395                 RETURN(rc);
396 }
397
398 int quota_is_set(struct obd_device *obd, const unsigned int id[], int flag)
399 {
400         struct lustre_qunit_size *lqs;
401         int i, q_set = 0;
402
403         if (!ll_sb_any_quota_active(obd->u.obt.obt_qctxt.lqc_sb))
404                 RETURN(0);
405
406         for (i = 0; i < MAXQUOTAS; i++) {
407                 lqs = quota_search_lqs(LQS_KEY(i, id[i]),
408                                        &obd->u.obt.obt_qctxt, 0);
409                 if (lqs && !IS_ERR(lqs)) {
410                         if (lqs->lqs_flags & flag)
411                                 q_set = 1;
412                         lqs_putref(lqs);
413                 }
414         }
415
416         return q_set;
417 }
418
419 static int quota_chk_acq_common(struct obd_device *obd, struct obd_export *exp,
420                                 const unsigned int id[], int pending[],
421                                 int count, quota_acquire acquire,
422                                 struct obd_trans_info *oti, int isblk,
423                                 struct inode *inode, int frags)
424 {
425         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
426         struct timeval work_start;
427         struct timeval work_end;
428         long timediff;
429         struct l_wait_info lwi = { 0 };
430         int rc = 0, cycle = 0, count_err = 1;
431         ENTRY;
432
433         if (!quota_is_set(obd, id, isblk ? QB_SET : QI_SET))
434                 RETURN(0);
435
436         if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
437                 /* If the client has been evicted or if it
438                  * timed out and tried to reconnect already,
439                  * abort the request immediately */
440                 RETURN(-ENOTCONN);
441
442         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
443         pending[USRQUOTA] = pending[GRPQUOTA] = 0;
444         /* Unfortunately, if quota master is too busy to handle the
445          * pre-dqacq in time and quota hash on ost is used up, we
446          * have to wait for the completion of in flight dqacq/dqrel,
447          * in order to get enough quota for write b=12588 */
448         cfs_gettimeofday(&work_start);
449         while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
450                                         inode, frags)) &
451                QUOTA_RET_ACQUOTA) {
452
453                 cfs_spin_lock(&qctxt->lqc_lock);
454                 if (!qctxt->lqc_import && oti) {
455                         cfs_spin_unlock(&qctxt->lqc_lock);
456                         LASSERT(oti->oti_thread);
457                         /* The recovery thread doesn't have watchdog
458                          * attached. LU-369 */
459                         if (oti->oti_thread->t_watchdog)
460                                 lc_watchdog_disable(oti->oti_thread->\
461                                                 t_watchdog);
462                         CDEBUG(D_QUOTA, "sleep for quota master\n");
463                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
464                                      &lwi);
465                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
466                         if (oti->oti_thread->t_watchdog)
467                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
468                                        CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
469                 } else {
470                         cfs_spin_unlock(&qctxt->lqc_lock);
471                 }
472
473                 cycle++;
474                 if (isblk)
475                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
476                 /* after acquire(), we should run quota_check_common again
477                  * so that we confirm there are enough quota to finish write */
478                 rc = acquire(obd, id, oti, isblk);
479
480                 /* please reference to dqacq_completion for the below */
481                 /* a new request is finished, try again */
482                 if (rc == QUOTA_REQ_RETURNED) {
483                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
484                         continue;
485                 }
486
487                 /* it is out of quota already */
488                 if (rc == -EDQUOT) {
489                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
490                         break;
491                 }
492
493                 /* Related quota has been disabled by master, but enabled by
494                  * slave, do not try again. */
495                 if (unlikely(rc == -ESRCH)) {
496                         CERROR("mismatched quota configuration, stop try.\n");
497                         break;
498                 }
499
500                 if (isblk && (exp->exp_failed || exp->exp_abort_active_req))
501                         /* The client has been evicted or tried to
502                          * to reconnect already, abort the request */
503                         RETURN(-ENOTCONN);
504
505                 /* -EBUSY and others, wait a second and try again */
506                 if (rc < 0) {
507                         cfs_waitq_t        waitq;
508                         struct l_wait_info lwi;
509
510                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
511                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
512                                        CFS_GET_TIMEOUT(oti->oti_thread->t_svc));
513                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
514                                count_err++);
515
516                         cfs_waitq_init(&waitq);
517                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
518                                           NULL);
519                         l_wait_event(waitq, 0, &lwi);
520                 }
521
522                 if (rc < 0 || cycle % 10 == 0) {
523                         cfs_spin_lock(&last_print_lock);
524                         if (last_print == 0 ||
525                             cfs_time_before((last_print + cfs_time_seconds(30)),
526                                             cfs_time_current())) {
527                                 last_print = cfs_time_current();
528                                 cfs_spin_unlock(&last_print_lock);
529                                 CWARN("still haven't managed to acquire quota "
530                                       "space from the quota master after %d "
531                                       "retries (err=%d, rc=%d)\n",
532                                       cycle, count_err - 1, rc);
533                         } else {
534                                 cfs_spin_unlock(&last_print_lock);
535                         }
536                 }
537
538                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
539                        cycle);
540         }
541         cfs_gettimeofday(&work_end);
542         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
543         lprocfs_counter_add(qctxt->lqc_stats,
544                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
545                                     LQUOTA_WAIT_FOR_CHK_INO,
546                             timediff);
547
548         if (rc > 0)
549                 rc = 0;
550         RETURN(rc);
551 }
552
553 /**
554  * when a block_write or inode_create rpc is finished, adjust the record for
555  * pending blocks and inodes
556  */
557 static int quota_pending_commit(struct obd_device *obd, const unsigned int id[],
558                                 int pending[], int isblk)
559 {
560         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
561         struct timeval work_start;
562         struct timeval work_end;
563         long timediff;
564         int i;
565         struct qunit_data qdata[MAXQUOTAS];
566         ENTRY;
567
568         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
569         CLASSERT(MAXQUOTAS < 4);
570         if (!ll_sb_any_quota_active(qctxt->lqc_sb))
571                 RETURN(0);
572
573         cfs_gettimeofday(&work_start);
574         for (i = 0; i < MAXQUOTAS; i++) {
575                 struct lustre_qunit_size *lqs = NULL;
576
577                 LASSERT(pending[i] >= 0);
578                 if (pending[i] == 0)
579                         continue;
580
581                 qdata[i].qd_id = id[i];
582                 qdata[i].qd_flags = i;
583                 if (isblk)
584                         QDATA_SET_BLK(&qdata[i]);
585                 qdata[i].qd_count = 0;
586
587                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
588                         continue;
589
590                 lqs = quota_search_lqs(LQS_KEY(i, qdata[i].qd_id), qctxt, 0);
591                 if (lqs == NULL || IS_ERR(lqs)) {
592                         CERROR("can not find lqs for pending_commit: "
593                                "[id %u] [%c] [pending %u] [isblk %d] (rc %ld), "
594                                "maybe cause unexpected lqs refcount error!\n",
595                                id[i], i ? 'g': 'u', pending[i], isblk,
596                                lqs ? PTR_ERR(lqs) : -1);
597                         continue;
598                 }
599
600                 cfs_spin_lock(&lqs->lqs_lock);
601                 if (isblk) {
602                         LASSERTF(lqs->lqs_bwrite_pending >= pending[i],
603                                  "there are too many blocks! [id %u] [%c] "
604                                  "[bwrite_pending %lu] [pending %u]\n",
605                                  id[i], i % 2 ? 'g' : 'u',
606                                  lqs->lqs_bwrite_pending, pending[i]);
607
608                         lqs->lqs_bwrite_pending -= pending[i];
609                 } else {
610                         LASSERTF(lqs->lqs_iwrite_pending >= pending[i],
611                                 "there are too many files! [id %u] [%c] "
612                                 "[iwrite_pending %lu] [pending %u]\n",
613                                 id[i], i % 2 ? 'g' : 'u',
614                                 lqs->lqs_iwrite_pending, pending[i]);
615
616                         lqs->lqs_iwrite_pending -= pending[i];
617                 }
618                 CDEBUG(D_QUOTA, "%s: lqs_pending=%lu pending[%d]=%d isblk=%d\n",
619                        obd->obd_name,
620                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
621                        i, pending[i], isblk);
622                 cfs_spin_unlock(&lqs->lqs_lock);
623
624                 /* for quota_search_lqs in pending_commit */
625                 lqs_putref(lqs);
626                 /* for quota_search_lqs in quota_check */
627                 lqs_putref(lqs);
628         }
629         cfs_gettimeofday(&work_end);
630         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
631         lprocfs_counter_add(qctxt->lqc_stats,
632                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
633                                     LQUOTA_WAIT_FOR_COMMIT_INO,
634                             timediff);
635
636         RETURN(0);
637 }
638
639 static int mds_quota_init(void)
640 {
641         return lustre_dquot_init();
642 }
643
644 static int mds_quota_exit(void)
645 {
646         lustre_dquot_exit();
647         return 0;
648 }
649
650 static int mds_quota_setup(struct obd_device *obd)
651 {
652         struct obd_device_target *obt = &obd->u.obt;
653         struct mds_obd *mds = &obd->u.mds;
654         int rc;
655         ENTRY;
656
657         if (unlikely(mds->mds_quota)) {
658                 CWARN("try to reinitialize quota context!\n");
659                 RETURN(0);
660         }
661
662         cfs_init_rwsem(&obt->obt_rwsem);
663         obt->obt_qfmt = LUSTRE_QUOTA_V2;
664         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
665         cfs_sema_init(&obt->obt_quotachecking, 1);
666         /* initialize quota master and quota context */
667         cfs_init_rwsem(&mds->mds_qonoff_sem);
668         rc = qctxt_init(obd, dqacq_handler);
669         if (rc) {
670                 CERROR("%s: initialize quota context failed! (rc:%d)\n",
671                        obd->obd_name, rc);
672                 RETURN(rc);
673         }
674         mds->mds_quota = 1;
675         RETURN(rc);
676 }
677
678 static int mds_quota_cleanup(struct obd_device *obd)
679 {
680         ENTRY;
681         if (unlikely(!obd->u.mds.mds_quota))
682                 RETURN(0);
683
684         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
685         RETURN(0);
686 }
687
688 static int mds_quota_setinfo(struct obd_device *obd, void *data)
689 {
690         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
691         ENTRY;
692
693         if (unlikely(!obd->u.mds.mds_quota))
694                 RETURN(0);
695
696         if (data != NULL)
697                 QUOTA_MASTER_READY(qctxt);
698         else
699                 QUOTA_MASTER_UNREADY(qctxt);
700         RETURN(0);
701 }
702
703 static int mds_quota_fs_cleanup(struct obd_device *obd)
704 {
705         struct mds_obd *mds = &obd->u.mds;
706         struct obd_quotactl oqctl;
707         ENTRY;
708
709         if (unlikely(!mds->mds_quota))
710                 RETURN(0);
711
712         mds->mds_quota = 0;
713         memset(&oqctl, 0, sizeof(oqctl));
714         oqctl.qc_type = UGQUOTA;
715
716         cfs_down_write(&mds->mds_qonoff_sem);
717         mds_admin_quota_off(obd, &oqctl);
718         cfs_up_write(&mds->mds_qonoff_sem);
719         RETURN(0);
720 }
721
722 static int quota_acquire_common(struct obd_device *obd, const unsigned int id[],
723                                 struct obd_trans_info *oti, int isblk)
724 {
725         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
726         int rc;
727         ENTRY;
728
729         rc = qctxt_adjust_qunit(obd, qctxt, id, isblk, 1, oti);
730         RETURN(rc);
731 }
732
733 #endif /* HAVE_QUOTA_SUPPORT */
734 #endif /* __KERNEL__ */
735
736 struct osc_quota_info {
737         /** linkage for quota hash table */
738         cfs_hlist_node_t oqi_hash;
739         obd_uid          oqi_id;
740 };
741
742 /* SLAB cache for client quota context */
743 cfs_mem_cache_t *qinfo_cachep = NULL;
744
745 static inline struct osc_quota_info *osc_oqi_alloc(obd_uid id)
746 {
747         struct osc_quota_info *oqi;
748
749         OBD_SLAB_ALLOC_PTR(oqi, qinfo_cachep);
750         if (oqi != NULL)
751                 oqi->oqi_id = id;
752
753         return oqi;
754 }
755
756 int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
757 {
758         int type;
759         ENTRY;
760
761         for (type = 0; type < MAXQUOTAS; type++) {
762                 struct osc_quota_info *oqi;
763
764                 oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
765                 if (oqi) {
766                         obd_uid id = oqi->oqi_id;
767
768                         LASSERTF(id == qid[type],
769                                  "The ids don't match %u != %u\n",
770                                  id, qid[type]);
771
772                         /* the slot is busy, the user is about to run out of
773                          * quota space on this OST */
774                         CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
775                                type == USRQUOTA ? "user" : "grout", qid[type]);
776                         RETURN(NO_QUOTA);
777                 }
778         }
779
780         RETURN(QUOTA_OK);
781 }
782
783 #define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \
784                                                 : OBD_MD_FLGRPQUOTA)
785 #define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \
786                                                 : OBD_FL_NO_GRPQUOTA)
787
788 int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
789                     obd_flag valid, obd_flag flags)
790 {
791         int type;
792         int rc = 0;
793         ENTRY;
794
795         if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0)
796                 RETURN(0);
797
798         for (type = 0; type < MAXQUOTAS; type++) {
799                 struct osc_quota_info *oqi;
800
801                 if ((valid & MD_QUOTA_FLAG(type)) == 0)
802                         continue;
803
804                 /* lookup the ID in the per-type hash table */
805                 oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
806                 if ((flags & FL_QUOTA_FLAG(type)) != 0) {
807                         /* This ID is getting close to its quota limit, let's
808                          * switch to sync I/O */
809                         if (oqi != NULL)
810                                 continue;
811
812                         oqi = osc_oqi_alloc(qid[type]);
813                         if (oqi == NULL) {
814                                 rc = -ENOMEM;
815                                 break;
816                         }
817
818                         rc = cfs_hash_add_unique(cli->cl_quota_hash[type],
819                                                  &qid[type], &oqi->oqi_hash);
820                         /* race with others? */
821                         if (rc == -EALREADY) {
822                                 rc = 0;
823                                 OBD_SLAB_FREE_PTR(oqi, qinfo_cachep);
824                         }
825
826                         CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n",
827                                cli->cl_import->imp_obd->obd_name,
828                                type == USRQUOTA ? "user" : "group",
829                                qid[type], rc);
830                 } else {
831                         /* This ID is now off the hook, let's remove it from
832                          * the hash table */
833                         if (oqi == NULL)
834                                 continue;
835
836                         oqi = cfs_hash_del_key(cli->cl_quota_hash[type],
837                                                &qid[type]);
838                         if (oqi)
839                                 OBD_SLAB_FREE_PTR(oqi, qinfo_cachep);
840
841                         CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n",
842                                cli->cl_import->imp_obd->obd_name,
843                                type == USRQUOTA ? "user" : "group",
844                                qid[type], oqi);
845                 }
846         }
847
848         RETURN(rc);
849 }
850
851 /*
852  * Hash operations for uid/gid <-> osc_quota_info
853  */
854 static unsigned
855 oqi_hashfn(cfs_hash_t *hs, const void *key, unsigned mask)
856 {
857         return cfs_hash_u32_hash(*((__u32*)key), mask);
858 }
859
860 static int
861 oqi_keycmp(const void *key, cfs_hlist_node_t *hnode)
862 {
863         struct osc_quota_info *oqi;
864         obd_uid uid;
865
866         LASSERT(key != NULL);
867         uid = *((obd_uid*)key);
868         oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
869
870         return uid == oqi->oqi_id;
871 }
872
873 static void *
874 oqi_key(cfs_hlist_node_t *hnode)
875 {
876         struct osc_quota_info *oqi;
877         oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
878         return &oqi->oqi_id;
879 }
880
881 static void *
882 oqi_object(cfs_hlist_node_t *hnode)
883 {
884         return cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
885 }
886
887 static void
888 oqi_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
889 {
890 }
891
892 static void
893 oqi_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
894 {
895 }
896
897 static void
898 oqi_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
899 {
900         struct osc_quota_info *oqi;
901
902         oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
903
904         OBD_SLAB_FREE_PTR(oqi, qinfo_cachep);
905 }
906
907 #define HASH_QUOTA_BKT_BITS 5
908 #define HASH_QUOTA_CUR_BITS 5
909 #define HASH_QUOTA_MAX_BITS 15
910
911 static cfs_hash_ops_t quota_hash_ops = {
912         .hs_hash       = oqi_hashfn,
913         .hs_keycmp     = oqi_keycmp,
914         .hs_key        = oqi_key,
915         .hs_object     = oqi_object,
916         .hs_get        = oqi_get,
917         .hs_put_locked = oqi_put_locked,
918         .hs_exit       = oqi_exit,
919 };
920
921 int osc_quota_setup(struct obd_device *obd)
922 {
923         struct client_obd *cli = &obd->u.cli;
924         int i, type;
925         ENTRY;
926
927         for (type = 0; type < MAXQUOTAS; type++) {
928                 cli->cl_quota_hash[type] = cfs_hash_create("QUOTA_HASH",
929                                                            HASH_QUOTA_CUR_BITS,
930                                                            HASH_QUOTA_MAX_BITS,
931                                                            HASH_QUOTA_BKT_BITS,
932                                                            0,
933                                                            CFS_HASH_MIN_THETA,
934                                                            CFS_HASH_MAX_THETA,
935                                                            &quota_hash_ops,
936                                                            CFS_HASH_DEFAULT);
937                 if (cli->cl_quota_hash[type] == NULL)
938                         break;
939         }
940
941         if (type == MAXQUOTAS)
942                 RETURN(0);
943
944         for (i = 0; i < type; i++)
945                 cfs_hash_putref(cli->cl_quota_hash[i]);
946
947         RETURN(-ENOMEM);
948 }
949
950 int osc_quota_cleanup(struct obd_device *obd)
951 {
952         struct client_obd *cli = &obd->u.cli;
953         int type;
954         ENTRY;
955
956         for (type = 0; type < MAXQUOTAS; type++)
957                 cfs_hash_putref(cli->cl_quota_hash[type]);
958
959         RETURN(0);
960 }
961
962 int osc_quota_init(void)
963 {
964         ENTRY;
965
966         LASSERT(qinfo_cachep == NULL);
967         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
968                                             sizeof(struct osc_quota_info),
969                                             0, 0);
970         if (!qinfo_cachep)
971                 RETURN(-ENOMEM);
972
973         RETURN(0);
974 }
975
976 int osc_quota_exit(void)
977 {
978         int rc;
979         ENTRY;
980
981         rc = cfs_mem_cache_destroy(qinfo_cachep);
982         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
983         qinfo_cachep = NULL;
984
985         RETURN(0);
986 }
987
988
989 #ifdef __KERNEL__
990 #ifdef HAVE_QUOTA_SUPPORT
991 quota_interface_t mds_quota_interface = {
992         .quota_init     = mds_quota_init,
993         .quota_exit     = mds_quota_exit,
994         .quota_setup    = mds_quota_setup,
995         .quota_cleanup  = mds_quota_cleanup,
996         .quota_check    = target_quota_check,
997         .quota_ctl      = mds_quota_ctl,
998         .quota_setinfo  = mds_quota_setinfo,
999         .quota_fs_cleanup = mds_quota_fs_cleanup,
1000         .quota_recovery = mds_quota_recovery,
1001         .quota_adjust   = mds_quota_adjust,
1002         .quota_chkquota = quota_chk_acq_common,
1003         .quota_acquire  = quota_acquire_common,
1004         .quota_pending_commit = quota_pending_commit,
1005 };
1006
1007 quota_interface_t filter_quota_interface = {
1008         .quota_setup    = filter_quota_setup,
1009         .quota_cleanup  = filter_quota_cleanup,
1010         .quota_check    = target_quota_check,
1011         .quota_ctl      = filter_quota_ctl,
1012         .quota_setinfo  = filter_quota_setinfo,
1013         .quota_clearinfo = filter_quota_clearinfo,
1014         .quota_enforce  = filter_quota_enforce,
1015         .quota_getflag  = filter_quota_getflag,
1016         .quota_acquire  = quota_acquire_common,
1017         .quota_adjust   = filter_quota_adjust,
1018         .quota_chkquota = quota_chk_acq_common,
1019         .quota_adjust_qunit   = filter_quota_adjust_qunit,
1020         .quota_pending_commit = quota_pending_commit,
1021 };
1022 #endif
1023 #endif /* __KERNEL__ */
1024
1025 quota_interface_t mdc_quota_interface = {
1026         .quota_ctl      = client_quota_ctl,
1027         .quota_check    = client_quota_check,
1028         .quota_poll_check = client_quota_poll_check,
1029 };
1030
1031 quota_interface_t lmv_quota_interface = {
1032         .quota_ctl      = lmv_quota_ctl,
1033         .quota_check    = lmv_quota_check,
1034 };
1035
1036 quota_interface_t osc_quota_interface = {
1037         .quota_ctl      = client_quota_ctl,
1038         .quota_check    = client_quota_check,
1039         .quota_poll_check = client_quota_poll_check,
1040         .quota_init     = osc_quota_init,
1041         .quota_exit     = osc_quota_exit,
1042         .quota_setup    = osc_quota_setup,
1043         .quota_chkdq    = osc_quota_chkdq,
1044         .quota_setdq    = osc_quota_setdq,
1045         .quota_cleanup  = osc_quota_cleanup,
1046         .quota_adjust_qunit = client_quota_adjust_qunit,
1047 };
1048
1049 quota_interface_t lov_quota_interface = {
1050         .quota_ctl      = lov_quota_ctl,
1051         .quota_check    = lov_quota_check,
1052         .quota_adjust_qunit = lov_quota_adjust_qunit,
1053 };
1054
1055 #ifdef __KERNEL__
1056
1057 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
1058
1059 static int __init init_lustre_quota(void)
1060 {
1061 #ifdef HAVE_QUOTA_SUPPORT
1062         int rc = 0;
1063
1064         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
1065                                                 proc_lustre_root,
1066                                                 NULL, NULL);
1067         if (IS_ERR(lquota_type_proc_dir)) {
1068                 CERROR("LProcFS failed in lquota-init\n");
1069                 rc = PTR_ERR(lquota_type_proc_dir);
1070                 return rc;
1071         }
1072
1073         rc = qunit_cache_init();
1074         if (rc)
1075                 return rc;
1076
1077         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
1078         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
1079 #endif
1080         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
1081         PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
1082         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
1083         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
1084         return 0;
1085 }
1086
1087 static void /*__exit*/ exit_lustre_quota(void)
1088 {
1089         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
1090         PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
1091         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
1092         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
1093 #ifdef HAVE_QUOTA_SUPPORT
1094         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
1095         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
1096
1097         qunit_cache_cleanup();
1098
1099         if (lquota_type_proc_dir)
1100                 lprocfs_remove(&lquota_type_proc_dir);
1101 #endif
1102 }
1103
1104 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
1105 MODULE_DESCRIPTION("Lustre Quota");
1106 MODULE_LICENSE("GPL");
1107
1108 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
1109
1110 #ifdef HAVE_QUOTA_SUPPORT
1111 EXPORT_SYMBOL(mds_quota_interface);
1112 EXPORT_SYMBOL(filter_quota_interface);
1113 #endif
1114 EXPORT_SYMBOL(mdc_quota_interface);
1115 EXPORT_SYMBOL(lmv_quota_interface);
1116 EXPORT_SYMBOL(osc_quota_interface);
1117 EXPORT_SYMBOL(lov_quota_interface);
1118 #endif /* __KERNEL */