Whamcloud - gitweb
dff4ce419b0937f5c27757662d151f4fb9189135
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 # include <linux/jbd.h>
48 # include <linux/smp_lock.h>
49 # include <linux/buffer_head.h>
50 # include <linux/workqueue.h>
51 # include <linux/mount.h>
52 #else /* __KERNEL__ */
53 # include <liblustre.h>
54 #endif
55
56 #include <obd_class.h>
57 #include <lustre_mds.h>
58 #include <lustre_dlm.h>
59 #include <lustre_cfg.h>
60 #include <obd_ost.h>
61 #include <lustre_fsfilt.h>
62 #include <lustre_quota.h>
63 #include <lprocfs_status.h>
64 #include "quota_internal.h"
65
66 #ifdef __KERNEL__
67
68 #ifdef HAVE_QUOTA_SUPPORT
69
70 static cfs_time_t last_print = 0;
71 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
72
73 static int filter_quota_setup(struct obd_device *obd)
74 {
75         int rc = 0;
76         struct obd_device_target *obt = &obd->u.obt;
77         ENTRY;
78
79         init_rwsem(&obt->obt_rwsem);
80         obt->obt_qfmt = LUSTRE_QUOTA_V2;
81         atomic_set(&obt->obt_quotachecking, 1);
82         rc = qctxt_init(obd, NULL);
83         if (rc)
84                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
85
86         RETURN(rc);
87 }
88
89 static int filter_quota_cleanup(struct obd_device *obd)
90 {
91         ENTRY;
92         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
93         RETURN(0);
94 }
95
96 static int filter_quota_setinfo(struct obd_device *obd, void *data)
97 {
98         struct obd_export *exp = data;
99         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
100         struct obd_import *imp = exp->exp_imp_reverse;
101         ENTRY;
102
103         LASSERT(imp != NULL);
104
105         /* setup the quota context import */
106         spin_lock(&qctxt->lqc_lock);
107         if (qctxt->lqc_import != NULL) {
108                 spin_unlock(&qctxt->lqc_lock);
109                 if (qctxt->lqc_import == imp)
110                         CDEBUG(D_WARNING, "%s: lqc_import(%p) of obd(%p) was "
111                                "activated already.\n", obd->obd_name, imp, obd);
112                 else
113                         CDEBUG(D_ERROR, "%s: lqc_import(%p:%p) of obd(%p) was "
114                                "activated by others.\n", obd->obd_name,
115                                qctxt->lqc_import, imp, obd);
116         } else {
117                 qctxt->lqc_import = imp;
118                 /* make imp's connect flags equal relative exp's connect flags
119                  * adding it to avoid the scan export list */
120                 imp->imp_connect_data.ocd_connect_flags |=
121                                 (exp->exp_connect_flags &
122                                  (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
123                 spin_unlock(&qctxt->lqc_lock);
124                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated "
125                        "now.\n", obd->obd_name, imp, obd);
126
127                 cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
128                 /* start quota slave recovery thread. (release high limits) */
129                 qslave_start_recovery(obd, qctxt);
130         }
131         RETURN(0);
132 }
133
134 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
135 {
136         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
137         struct obd_import *imp = exp->exp_imp_reverse;
138         ENTRY;
139
140         /* lquota may be not set up before destroying export, b=14896 */
141         if (!obd->obd_set_up)
142                 RETURN(0);
143
144         if (unlikely(imp == NULL))
145                 RETURN(0);
146
147         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
148          * should be invalid b=12374 */
149         spin_lock(&qctxt->lqc_lock);
150         if (qctxt->lqc_import == imp) {
151                 qctxt->lqc_import = NULL;
152                 spin_unlock(&qctxt->lqc_lock);
153                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is invalid now.\n",
154                        obd->obd_name, imp, obd);
155                 ptlrpc_cleanup_imp(imp);
156                 dqacq_interrupt(qctxt);
157         } else {
158                 spin_unlock(&qctxt->lqc_lock);
159         }
160         RETURN(0);
161 }
162
163 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
164 {
165         ENTRY;
166
167         if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
168                 RETURN(0);
169
170         if (ignore) {
171                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
172                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
173         } else {
174                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
175         }
176
177         RETURN(0);
178 }
179
180 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
181 {
182         struct obd_device_target *obt = &obd->u.obt;
183         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
184         int err, cnt, rc = 0;
185         struct obd_quotactl *oqctl;
186         ENTRY;
187
188         if (!sb_any_quota_enabled(obt->obt_sb))
189                 RETURN(0);
190
191         OBD_ALLOC_PTR(oqctl);
192         if (!oqctl) {
193                 CERROR("Not enough memory!");
194                 RETURN(-ENOMEM);
195         }
196
197         /* set over quota flags for a uid/gid */
198         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
199         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
200
201         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202                 struct quota_adjust_qunit oqaq_tmp;
203                 struct lustre_qunit_size *lqs = NULL;
204
205                 oqaq_tmp.qaq_flags = cnt;
206                 oqaq_tmp.qaq_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
207
208                 quota_search_lqs(NULL, &oqaq_tmp, qctxt, &lqs);
209                 if (lqs) {
210                         spin_lock(&lqs->lqs_lock);
211                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
212                                 oa->o_flags |= (cnt == USRQUOTA) ?
213                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
214                                 spin_unlock(&lqs->lqs_lock);
215                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
216                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
217                                        qctxt->lqc_sync_blk);
218                                 /* this is for quota_search_lqs */
219                                 lqs_putref(lqs);
220                                 continue;
221                         }
222                         spin_unlock(&lqs->lqs_lock);
223                         /* this is for quota_search_lqs */
224                         lqs_putref(lqs);
225                 }
226
227                 memset(oqctl, 0, sizeof(*oqctl));
228
229                 oqctl->qc_cmd = Q_GETQUOTA;
230                 oqctl->qc_type = cnt;
231                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
232                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
233                 if (err) {
234                         if (!rc)
235                                 rc = err;
236                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
237                                                              OBD_MD_FLGRPQUOTA);
238                         continue;
239                 }
240
241                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
242                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
243                     oqctl->qc_dqblk.dqb_bhardlimit))
244                         oa->o_flags |= (cnt == USRQUOTA) ?
245                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
246         }
247         OBD_FREE_PTR(oqctl);
248         RETURN(rc);
249 }
250
251 /**
252  * check whether the left quota of certain uid and gid can satisfy a block_write
253  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
254  */
255 static int quota_check_common(struct obd_device *obd, const unsigned int id[],
256                               int pending[], int count, int cycle, int isblk,
257                               struct inode *inode, int frags)
258 {
259         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
260         int i;
261         struct qunit_data qdata[MAXQUOTAS];
262         int mb = 0;
263         int rc = 0, rc2[2] = { 0, 0 };
264         ENTRY;
265
266         CLASSERT(MAXQUOTAS < 4);
267         if (!sb_any_quota_enabled(qctxt->lqc_sb))
268                 RETURN(rc);
269
270         spin_lock(&qctxt->lqc_lock);
271         if (!qctxt->lqc_valid){
272                 spin_unlock(&qctxt->lqc_lock);
273                 RETURN(rc);
274         }
275         spin_unlock(&qctxt->lqc_lock);
276
277         for (i = 0; i < MAXQUOTAS; i++) {
278                 struct lustre_qunit_size *lqs = NULL;
279
280                 qdata[i].qd_id = id[i];
281                 qdata[i].qd_flags = i;
282                 if (isblk)
283                         QDATA_SET_BLK(&qdata[i]);
284                 qdata[i].qd_count = 0;
285
286                 /* ignore root user */
287                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
288                         continue;
289
290                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
291                 if (!lqs)
292                         continue;
293
294                 if (IS_ERR(lqs)) {
295                         CERROR("can not find lqs for check_common: "
296                                "[id %u] [%c] [isblk %d] [count %d] [rc %ld]\n",
297                                id[i], i % 2 ? 'g': 'u', isblk, count,
298                                PTR_ERR(lqs));
299                         RETURN(PTR_ERR(lqs));
300                 }
301
302                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
303                 spin_lock(&lqs->lqs_lock);
304                 if (!cycle) {
305                         if (isblk) {
306                                 pending[i] = count * CFS_PAGE_SIZE;
307                                 /* in order to complete this write, we need extra
308                                  * meta blocks. This function can get it through
309                                  * data needed to be written b=16542 */
310                                 if (inode) {
311                                         mb = pending[i];
312                                         rc = fsfilt_get_mblk(obd, qctxt->lqc_sb,
313                                                              &mb, inode,frags);
314                                         if (rc)
315                                                 CDEBUG(D_ERROR,
316                                                        "can't get extra "
317                                                        "meta blocks.\n");
318                                         else
319                                                 pending[i] += mb;
320                                 }
321                                 lqs->lqs_bwrite_pending += pending[i];
322                         } else {
323                                 pending[i] = count;
324                                 lqs->lqs_iwrite_pending += pending[i];
325                         }
326                 }
327
328                 /* if xx_rec < 0, that means quota are releasing,
329                  * and it may return before we use quota. So if
330                  * we find this situation, we assuming it has
331                  * returned b=18491 */
332                 if (isblk && lqs->lqs_blk_rec < 0) {
333                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
334                                 qdata[i].qd_count = 0;
335                         else
336                                 qdata[i].qd_count += lqs->lqs_blk_rec;
337                 }
338                 if (!isblk && lqs->lqs_ino_rec < 0) {
339                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
340                                 qdata[i].qd_count = 0;
341                         else
342                                 qdata[i].qd_count += lqs->lqs_ino_rec;
343                 }
344
345                 CDEBUG(D_QUOTA, "[id %u] [%c] [isblk %d] [count %d]"
346                        " [lqs pending: %lu] [qd_count: "LPU64"] [metablocks: %d]"
347                        " [pending: %d]\n", id[i], i % 2 ? 'g': 'u', isblk, count,
348                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
349                        qdata[i].qd_count, mb, pending[i]);
350                 if (rc2[i] == QUOTA_RET_OK) {
351                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
352                                 rc2[i] = QUOTA_RET_ACQUOTA;
353                         if (!isblk && qdata[i].qd_count <
354                             lqs->lqs_iwrite_pending)
355                                 rc2[i] = QUOTA_RET_ACQUOTA;
356                 }
357
358                 spin_unlock(&lqs->lqs_lock);
359
360                 if (lqs->lqs_blk_rec  < 0 &&
361                     qdata[i].qd_count <
362                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
363                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
364
365                 /* When cycle is zero, lqs_*_pending will be changed. We will
366                  * get reference of the lqs here and put reference of lqs in
367                  * quota_pending_commit b=14784 */
368                 if (!cycle)
369                         lqs_getref(lqs);
370
371                 /* this is for quota_search_lqs */
372                 lqs_putref(lqs);
373         }
374
375         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
376                 RETURN(QUOTA_RET_ACQUOTA);
377         else
378                 RETURN(rc);
379 }
380
381 static int quota_chk_acq_common(struct obd_device *obd, const unsigned int id[],
382                                 int pending[], int count, quota_acquire acquire,
383                                 struct obd_trans_info *oti, int isblk,
384                                 struct inode *inode, int frags)
385 {
386         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
387         struct timeval work_start;
388         struct timeval work_end;
389         long timediff;
390         struct l_wait_info lwi = { 0 };
391         int rc = 0, cycle = 0, count_err = 1;
392         ENTRY;
393
394         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
395         pending[USRQUOTA] = pending[GRPQUOTA] = 0;
396         /* Unfortunately, if quota master is too busy to handle the
397          * pre-dqacq in time and quota hash on ost is used up, we
398          * have to wait for the completion of in flight dqacq/dqrel,
399          * in order to get enough quota for write b=12588 */
400         do_gettimeofday(&work_start);
401         while ((rc = quota_check_common(obd, id, pending, count, cycle, isblk,
402                                         inode, frags)) &
403                QUOTA_RET_ACQUOTA) {
404
405                 spin_lock(&qctxt->lqc_lock);
406                 if (!qctxt->lqc_import && oti) {
407                         spin_unlock(&qctxt->lqc_lock);
408
409                         LASSERT(oti && oti->oti_thread &&
410                                 oti->oti_thread->t_watchdog);
411
412                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
413                         CDEBUG(D_QUOTA, "sleep for quota master\n");
414                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
415                                      &lwi);
416                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
417                         lc_watchdog_touch(oti->oti_thread->t_watchdog,
418                                  GET_TIMEOUT(oti->oti_thread->t_svc));
419                 } else {
420                         spin_unlock(&qctxt->lqc_lock);
421                 }
422
423                 cycle++;
424                 if (isblk)
425                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
426                 /* after acquire(), we should run quota_check_common again
427                  * so that we confirm there are enough quota to finish write */
428                 rc = acquire(obd, id, oti, isblk);
429
430                 /* please reference to dqacq_completion for the below */
431                 /* a new request is finished, try again */
432                 if (rc == QUOTA_REQ_RETURNED) {
433                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
434                         continue;
435                 }
436
437                 /* it is out of quota already */
438                 if (rc == -EDQUOT) {
439                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
440                         break;
441                 }
442
443                 /* -EBUSY and others, wait a second and try again */
444                 if (rc < 0) {
445                         cfs_waitq_t        waitq;
446                         struct l_wait_info lwi;
447
448                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
449                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
450                                          GET_TIMEOUT(oti->oti_thread->t_svc));
451                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
452                                count_err++);
453
454                         init_waitqueue_head(&waitq);
455                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
456                                           NULL);
457                         l_wait_event(waitq, 0, &lwi);
458                 }
459
460                 if (rc < 0 || cycle % 10 == 2) {
461                         spin_lock(&last_print_lock);
462                         if (last_print == 0 ||
463                             cfs_time_before((last_print + cfs_time_seconds(30)),
464                                             cfs_time_current())) {
465                                 last_print = cfs_time_current();
466                                 spin_unlock(&last_print_lock);
467                                 CWARN("still haven't managed to acquire quota "
468                                       "space from the quota master after %d "
469                                       "retries (err=%d, rc=%d)\n",
470                                       cycle, count_err - 1, rc);
471                         } else {
472                                 spin_unlock(&last_print_lock);
473                         }
474                 }
475
476                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
477                        cycle);
478         }
479         do_gettimeofday(&work_end);
480         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
481         lprocfs_counter_add(qctxt->lqc_stats,
482                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
483                                     LQUOTA_WAIT_FOR_CHK_INO,
484                             timediff);
485
486         RETURN(rc);
487 }
488
489 /**
490  * when a block_write or inode_create rpc is finished, adjust the record for
491  * pending blocks and inodes
492  */
493 static int quota_pending_commit(struct obd_device *obd, const unsigned int id[],
494                                 int pending[], int isblk)
495 {
496         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
497         struct timeval work_start;
498         struct timeval work_end;
499         long timediff;
500         int i;
501         struct qunit_data qdata[MAXQUOTAS];
502         ENTRY;
503
504         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
505         CLASSERT(MAXQUOTAS < 4);
506         if (!sb_any_quota_enabled(qctxt->lqc_sb))
507                 RETURN(0);
508
509         do_gettimeofday(&work_start);
510         for (i = 0; i < MAXQUOTAS; i++) {
511                 struct lustre_qunit_size *lqs = NULL;
512
513                 LASSERT(pending[i] >= 0);
514                 if (pending[i] == 0)
515                         continue;
516
517                 qdata[i].qd_id = id[i];
518                 qdata[i].qd_flags = i;
519                 if (isblk)
520                         QDATA_SET_BLK(&qdata[i]);
521                 qdata[i].qd_count = 0;
522
523                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
524                         continue;
525
526                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
527                 if (lqs == NULL || IS_ERR(lqs)) {
528                         CERROR("can not find lqs for pending_commit: "
529                                "[id %u] [%c] [pending %u] [isblk %d] (rc %ld), "
530                                "maybe cause unexpected lqs refcount error!\n",
531                                id[i], i % 2 ? 'g': 'u', pending[i], isblk,
532                                lqs ? PTR_ERR(lqs) : -1);
533                         continue;
534                 }
535
536                 spin_lock(&lqs->lqs_lock);
537                 if (isblk) {
538                         LASSERTF(lqs->lqs_bwrite_pending >= pending[i],
539                                  "there are too many blocks! [id %u] [%c] "
540                                  "[bwrite_pending %lu] [pending %u]\n",
541                                  id[i], i % 2 ? 'g' : 'u',
542                                  lqs->lqs_bwrite_pending, pending[i]);
543
544                         lqs->lqs_bwrite_pending -= pending[i];
545                 } else {
546                         LASSERTF(lqs->lqs_iwrite_pending >= pending[i],
547                                 "there are too many files! [id %u] [%c] "
548                                 "[iwrite_pending %lu] [pending %u]\n",
549                                 id[i], i % 2 ? 'g' : 'u',
550                                 lqs->lqs_iwrite_pending, pending[i]);
551
552                         lqs->lqs_iwrite_pending -= pending[i];
553                 }
554                 CDEBUG(D_QUOTA, "id: %u, %c, lqs pending: %lu, pending: %d, "
555                        "isblk: %d.\n", id[i], i % 2 ? 'g' : 'u',
556                        isblk ? lqs->lqs_bwrite_pending: lqs->lqs_iwrite_pending,
557                        pending[i], isblk);
558                 spin_unlock(&lqs->lqs_lock);
559
560                 /* for quota_search_lqs in pending_commit */
561                 lqs_putref(lqs);
562                 /* for quota_search_lqs in quota_check */
563                 lqs_putref(lqs);
564         }
565         do_gettimeofday(&work_end);
566         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
567         lprocfs_counter_add(qctxt->lqc_stats,
568                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
569                                     LQUOTA_WAIT_FOR_COMMIT_INO,
570                             timediff);
571
572         RETURN(0);
573 }
574
575 static int mds_quota_init(void)
576 {
577         return lustre_dquot_init();
578 }
579
580 static int mds_quota_exit(void)
581 {
582         lustre_dquot_exit();
583         return 0;
584 }
585
586 static int mds_quota_setup(struct obd_device *obd)
587 {
588         struct obd_device_target *obt = &obd->u.obt;
589         struct mds_obd *mds = &obd->u.mds;
590         int rc;
591         ENTRY;
592
593         if (unlikely(mds->mds_quota)) {
594                 CWARN("try to reinitialize quota context!\n");
595                 RETURN(0);
596         }
597
598         init_rwsem(&obt->obt_rwsem);
599         obt->obt_qfmt = LUSTRE_QUOTA_V2;
600         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
601         atomic_set(&obt->obt_quotachecking, 1);
602         /* initialize quota master and quota context */
603         sema_init(&mds->mds_qonoff_sem, 1);
604         rc = qctxt_init(obd, dqacq_handler);
605         if (rc) {
606                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
607                 RETURN(rc);
608         }
609         mds->mds_quota = 1;
610         RETURN(rc);
611 }
612
613 static int mds_quota_cleanup(struct obd_device *obd)
614 {
615         ENTRY;
616         if (unlikely(!obd->u.mds.mds_quota))
617                 RETURN(0);
618
619         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
620         RETURN(0);
621 }
622
623 static int mds_quota_setinfo(struct obd_device *obd, void *data)
624 {
625         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
626         ENTRY;
627
628         if (unlikely(!obd->u.mds.mds_quota))
629                 RETURN(0);
630
631         if (data != NULL)
632                 QUOTA_MASTER_READY(qctxt);
633         else
634                 QUOTA_MASTER_UNREADY(qctxt);
635         RETURN(0);
636 }
637
638 static int mds_quota_fs_cleanup(struct obd_device *obd)
639 {
640         struct mds_obd *mds = &obd->u.mds;
641         struct obd_quotactl oqctl;
642         ENTRY;
643
644         if (unlikely(!mds->mds_quota))
645                 RETURN(0);
646
647         mds->mds_quota = 0;
648         memset(&oqctl, 0, sizeof(oqctl));
649         oqctl.qc_type = UGQUOTA;
650
651         down(&mds->mds_qonoff_sem);
652         mds_admin_quota_off(obd, &oqctl);
653         up(&mds->mds_qonoff_sem);
654         RETURN(0);
655 }
656
657 static int quota_acquire_common(struct obd_device *obd, const unsigned int id[],
658                                 struct obd_trans_info *oti, int isblk)
659 {
660         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
661         int rc;
662         ENTRY;
663
664         rc = qctxt_adjust_qunit(obd, qctxt, id, isblk, 1, oti);
665         RETURN(rc);
666 }
667
668 #endif /* HAVE_QUOTA_SUPPORT */
669 #endif /* __KERNEL__ */
670
671 struct osc_quota_info {
672         struct list_head        oqi_hash;       /* hash list */
673         struct client_obd      *oqi_cli;        /* osc obd */
674         unsigned int            oqi_id;         /* uid/gid of a file */
675         short                   oqi_type;       /* quota type */
676 };
677
678 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
679
680 static struct list_head qinfo_hash[NR_DQHASH];
681 /* SLAB cache for client quota context */
682 cfs_mem_cache_t *qinfo_cachep = NULL;
683
684 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
685                          __attribute__((__const__));
686
687 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
688 {
689         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
690         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
691         return tmp;
692 }
693
694 /* caller must hold qinfo_list_lock */
695 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
696 {
697         struct list_head *head = qinfo_hash +
698                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
699
700         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
701         list_add(&oqi->oqi_hash, head);
702 }
703
704 /* caller must hold qinfo_list_lock */
705 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
706 {
707         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
708         list_del_init(&oqi->oqi_hash);
709 }
710
711 /* caller must hold qinfo_list_lock */
712 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
713                                                 unsigned int id, int type)
714 {
715         unsigned int hashent = hashfn(cli, id, type);
716         struct osc_quota_info *oqi;
717         ENTRY;
718
719         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
720         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
721                 if (oqi->oqi_cli == cli &&
722                     oqi->oqi_id == id && oqi->oqi_type == type)
723                         return oqi;
724         }
725         RETURN(NULL);
726 }
727
728 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
729                                           unsigned int id, int type)
730 {
731         struct osc_quota_info *oqi;
732         ENTRY;
733
734         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
735         if(!oqi)
736                 RETURN(NULL);
737
738         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
739         oqi->oqi_cli = cli;
740         oqi->oqi_id = id;
741         oqi->oqi_type = type;
742
743         RETURN(oqi);
744 }
745
746 static void free_qinfo(struct osc_quota_info *oqi)
747 {
748         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
749 }
750
751 int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
752 {
753         unsigned int id;
754         int cnt, rc = QUOTA_OK;
755         ENTRY;
756
757         spin_lock(&qinfo_list_lock);
758         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
759                 struct osc_quota_info *oqi = NULL;
760
761                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
762                 oqi = find_qinfo(cli, id, cnt);
763                 if (oqi) {
764                         rc = NO_QUOTA;
765                         break;
766                 }
767         }
768         spin_unlock(&qinfo_list_lock);
769
770         RETURN(rc);
771 }
772
773 int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
774                     obd_flag valid, obd_flag flags)
775 {
776         unsigned int id;
777         obd_flag noquota;
778         int cnt, rc = 0;
779         ENTRY;
780
781
782         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
783                 struct osc_quota_info *oqi, *old;
784
785                 if (!(valid & ((cnt == USRQUOTA) ?
786                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
787                         continue;
788
789                 id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
790                 noquota = (cnt == USRQUOTA) ?
791                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
792
793                 oqi = alloc_qinfo(cli, id, cnt);
794                 if (oqi) {
795                         spin_lock(&qinfo_list_lock);
796
797                         old = find_qinfo(cli, id, cnt);
798                         if (old && !noquota)
799                                 remove_qinfo_hash(old);
800                         else if (!old && noquota)
801                                 insert_qinfo_hash(oqi);
802
803                         spin_unlock(&qinfo_list_lock);
804
805                         if (old || !noquota)
806                                 free_qinfo(oqi);
807                         if (old && !noquota)
808                                 free_qinfo(old);
809                 } else {
810                         CERROR("not enough mem!\n");
811                         rc = -ENOMEM;
812                         break;
813                 }
814         }
815
816         RETURN(rc);
817 }
818
819 int osc_quota_cleanup(struct obd_device *obd)
820 {
821         struct client_obd *cli = &obd->u.cli;
822         struct osc_quota_info *oqi, *n;
823         int i;
824         ENTRY;
825
826         spin_lock(&qinfo_list_lock);
827         for (i = 0; i < NR_DQHASH; i++) {
828                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
829                         if (oqi->oqi_cli != cli)
830                                 continue;
831                         remove_qinfo_hash(oqi);
832                         free_qinfo(oqi);
833                 }
834         }
835         spin_unlock(&qinfo_list_lock);
836
837         RETURN(0);
838 }
839
840 int osc_quota_init(void)
841 {
842         int i;
843         ENTRY;
844
845         LASSERT(qinfo_cachep == NULL);
846         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
847                                             sizeof(struct osc_quota_info),
848                                             0, 0);
849         if (!qinfo_cachep)
850                 RETURN(-ENOMEM);
851
852         for (i = 0; i < NR_DQHASH; i++)
853                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
854
855         RETURN(0);
856 }
857
858 int osc_quota_exit(void)
859 {
860         struct osc_quota_info *oqi, *n;
861         int i, rc;
862         ENTRY;
863
864         spin_lock(&qinfo_list_lock);
865         for (i = 0; i < NR_DQHASH; i++) {
866                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
867                         remove_qinfo_hash(oqi);
868                         free_qinfo(oqi);
869                 }
870         }
871         spin_unlock(&qinfo_list_lock);
872
873         rc = cfs_mem_cache_destroy(qinfo_cachep);
874         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
875         qinfo_cachep = NULL;
876
877         RETURN(0);
878 }
879
880 #ifdef __KERNEL__
881 #ifdef HAVE_QUOTA_SUPPORT
882 quota_interface_t mds_quota_interface = {
883         .quota_init     = mds_quota_init,
884         .quota_exit     = mds_quota_exit,
885         .quota_setup    = mds_quota_setup,
886         .quota_cleanup  = mds_quota_cleanup,
887         .quota_check    = target_quota_check,
888         .quota_ctl      = mds_quota_ctl,
889         .quota_setinfo  = mds_quota_setinfo,
890         .quota_fs_cleanup = mds_quota_fs_cleanup,
891         .quota_recovery = mds_quota_recovery,
892         .quota_adjust   = mds_quota_adjust,
893         .quota_chkquota = quota_chk_acq_common,
894         .quota_acquire  = quota_acquire_common,
895         .quota_pending_commit = quota_pending_commit,
896 };
897
898 quota_interface_t filter_quota_interface = {
899         .quota_setup    = filter_quota_setup,
900         .quota_cleanup  = filter_quota_cleanup,
901         .quota_check    = target_quota_check,
902         .quota_ctl      = filter_quota_ctl,
903         .quota_setinfo  = filter_quota_setinfo,
904         .quota_clearinfo = filter_quota_clearinfo,
905         .quota_enforce  = filter_quota_enforce,
906         .quota_getflag  = filter_quota_getflag,
907         .quota_acquire  = quota_acquire_common,
908         .quota_adjust   = filter_quota_adjust,
909         .quota_chkquota = quota_chk_acq_common,
910         .quota_adjust_qunit   = filter_quota_adjust_qunit,
911         .quota_pending_commit = quota_pending_commit,
912 };
913 #endif
914 #endif /* __KERNEL__ */
915
916 quota_interface_t mdc_quota_interface = {
917         .quota_ctl      = client_quota_ctl,
918         .quota_check    = client_quota_check,
919         .quota_poll_check = client_quota_poll_check,
920 };
921
922 quota_interface_t lmv_quota_interface = {
923         .quota_ctl      = lmv_quota_ctl,
924         .quota_check    = lmv_quota_check,
925 };
926
927 quota_interface_t osc_quota_interface = {
928         .quota_ctl      = client_quota_ctl,
929         .quota_check    = client_quota_check,
930         .quota_poll_check = client_quota_poll_check,
931         .quota_init     = osc_quota_init,
932         .quota_exit     = osc_quota_exit,
933         .quota_chkdq    = osc_quota_chkdq,
934         .quota_setdq    = osc_quota_setdq,
935         .quota_cleanup  = osc_quota_cleanup,
936         .quota_adjust_qunit = client_quota_adjust_qunit,
937 };
938
939 quota_interface_t lov_quota_interface = {
940         .quota_ctl      = lov_quota_ctl,
941         .quota_check    = lov_quota_check,
942         .quota_adjust_qunit = lov_quota_adjust_qunit,
943 };
944
945 #ifdef __KERNEL__
946
947 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
948
949 static int __init init_lustre_quota(void)
950 {
951 #ifdef HAVE_QUOTA_SUPPORT
952         int rc = 0;
953
954         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
955                                                 proc_lustre_root,
956                                                 NULL, NULL);
957         if (IS_ERR(lquota_type_proc_dir)) {
958                 CERROR("LProcFS failed in lquota-init\n");
959                 rc = PTR_ERR(lquota_type_proc_dir);
960                 return rc;
961         }
962
963         rc = qunit_cache_init();
964         if (rc)
965                 return rc;
966
967         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
968         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
969 #endif
970         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
971         PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
972         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
973         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
974         return 0;
975 }
976
977 static void /*__exit*/ exit_lustre_quota(void)
978 {
979         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
980         PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
981         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
982         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
983 #ifdef HAVE_QUOTA_SUPPORT
984         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
985         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
986
987         qunit_cache_cleanup();
988
989         if (lquota_type_proc_dir)
990                 lprocfs_remove(&lquota_type_proc_dir);
991 #endif
992 }
993
994 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
995 MODULE_DESCRIPTION("Lustre Quota");
996 MODULE_LICENSE("GPL");
997
998 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
999
1000 #ifdef HAVE_QUOTA_SUPPORT
1001 EXPORT_SYMBOL(mds_quota_interface);
1002 EXPORT_SYMBOL(filter_quota_interface);
1003 #endif
1004 EXPORT_SYMBOL(mdc_quota_interface);
1005 EXPORT_SYMBOL(lmv_quota_interface);
1006 EXPORT_SYMBOL(osc_quota_interface);
1007 EXPORT_SYMBOL(lov_quota_interface);
1008 #endif /* __KERNEL */