Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 # include <linux/jbd.h>
48 # include <linux/smp_lock.h>
49 # include <linux/buffer_head.h>
50 # include <linux/workqueue.h>
51 # include <linux/mount.h>
52 #else /* __KERNEL__ */
53 # include <liblustre.h>
54 #endif
55
56 #include <obd_class.h>
57 #include <lustre_mds.h>
58 #include <lustre_dlm.h>
59 #include <lustre_cfg.h>
60 #include <obd_ost.h>
61 #include <lustre_fsfilt.h>
62 #include <lustre_quota.h>
63 #include <lprocfs_status.h>
64 #include "quota_internal.h"
65
66 #ifdef __KERNEL__
67
68 #ifdef HAVE_QUOTA_SUPPORT
69
70 static cfs_time_t last_print = 0;
71 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
72
73 static int filter_quota_setup(struct obd_device *obd)
74 {
75         int rc = 0;
76         struct obd_device_target *obt = &obd->u.obt;
77         ENTRY;
78
79         init_rwsem(&obt->obt_rwsem);
80         obt->obt_qfmt = LUSTRE_QUOTA_V2;
81         atomic_set(&obt->obt_quotachecking, 1);
82         rc = qctxt_init(obd, NULL);
83         if (rc)
84                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
85
86         RETURN(rc);
87 }
88
89 static int filter_quota_cleanup(struct obd_device *obd)
90 {
91         ENTRY;
92         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
93         RETURN(0);
94 }
95
96 static int filter_quota_setinfo(struct obd_device *obd, void *data)
97 {
98         struct obd_export *exp = data;
99         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
100         struct obd_import *imp = exp->exp_imp_reverse;
101         ENTRY;
102
103         LASSERT(imp != NULL);
104
105         /* setup the quota context import */
106         spin_lock(&qctxt->lqc_lock);
107         if (qctxt->lqc_import != NULL) {
108                 spin_unlock(&qctxt->lqc_lock);
109                 if (qctxt->lqc_import == imp)
110                         CDEBUG(D_WARNING, "%s: lqc_import(%p) of obd(%p) was "
111                                "activated already.\n", obd->obd_name, imp, obd);
112                 else
113                         CDEBUG(D_ERROR, "%s: lqc_import(%p:%p) of obd(%p) was "
114                                "activated by others.\n", obd->obd_name,
115                                qctxt->lqc_import, imp, obd);
116         } else {
117                 qctxt->lqc_import = imp;
118                 /* make imp's connect flags equal relative exp's connect flags
119                  * adding it to avoid the scan export list */
120                 imp->imp_connect_data.ocd_connect_flags |=
121                                 (exp->exp_connect_flags &
122                                  (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
123                 spin_unlock(&qctxt->lqc_lock);
124                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated "
125                        "now.\n", obd->obd_name, imp, obd);
126
127                 cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
128                 /* start quota slave recovery thread. (release high limits) */
129                 qslave_start_recovery(obd, qctxt);
130         }
131         RETURN(0);
132 }
133
134 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
135 {
136         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
137         struct obd_import *imp = exp->exp_imp_reverse;
138         ENTRY;
139
140         /* lquota may be not set up before destroying export, b=14896 */
141         if (!obd->obd_set_up)
142                 RETURN(0);
143
144         if (unlikely(imp == NULL))
145                 RETURN(0);
146
147         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
148          * should be invalid b=12374 */
149         spin_lock(&qctxt->lqc_lock);
150         if (qctxt->lqc_import == imp) {
151                 qctxt->lqc_import = NULL;
152                 spin_unlock(&qctxt->lqc_lock);
153                 CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is invalid now.\n",
154                        obd->obd_name, imp, obd);
155                 ptlrpc_cleanup_imp(imp);
156                 dqacq_interrupt(qctxt);
157         } else {
158                 spin_unlock(&qctxt->lqc_lock);
159         }
160         RETURN(0);
161 }
162
163 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
164 {
165         ENTRY;
166
167         if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
168                 RETURN(0);
169
170         if (ignore) {
171                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
172                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
173         } else {
174                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
175         }
176
177         RETURN(0);
178 }
179
180 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
181 {
182         struct obd_device_target *obt = &obd->u.obt;
183         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
184         int err, cnt, rc = 0;
185         struct obd_quotactl *oqctl;
186         ENTRY;
187
188         if (!sb_any_quota_enabled(obt->obt_sb))
189                 RETURN(0);
190
191         OBD_ALLOC_PTR(oqctl);
192         if (!oqctl) {
193                 CERROR("Not enough memory!");
194                 RETURN(-ENOMEM);
195         }
196
197         /* set over quota flags for a uid/gid */
198         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
199         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
200
201         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202                 struct quota_adjust_qunit oqaq_tmp;
203                 struct lustre_qunit_size *lqs = NULL;
204
205                 oqaq_tmp.qaq_flags = cnt;
206                 oqaq_tmp.qaq_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
207
208                 quota_search_lqs(NULL, &oqaq_tmp, qctxt, &lqs);
209                 if (lqs) {
210                         spin_lock(&lqs->lqs_lock);
211                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
212                                 oa->o_flags |= (cnt == USRQUOTA) ?
213                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
214                                 spin_unlock(&lqs->lqs_lock);
215                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
216                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
217                                        qctxt->lqc_sync_blk);
218                                 /* this is for quota_search_lqs */
219                                 lqs_putref(lqs);
220                                 continue;
221                         }
222                         spin_unlock(&lqs->lqs_lock);
223                         /* this is for quota_search_lqs */
224                         lqs_putref(lqs);
225                 }
226
227                 memset(oqctl, 0, sizeof(*oqctl));
228
229                 oqctl->qc_cmd = Q_GETQUOTA;
230                 oqctl->qc_type = cnt;
231                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
232                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
233                 if (err) {
234                         if (!rc)
235                                 rc = err;
236                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
237                                                              OBD_MD_FLGRPQUOTA);
238                         continue;
239                 }
240
241                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
242                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
243                     oqctl->qc_dqblk.dqb_bhardlimit))
244                         oa->o_flags |= (cnt == USRQUOTA) ?
245                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
246         }
247         OBD_FREE_PTR(oqctl);
248         RETURN(rc);
249 }
250
251 /**
252  * check whether the left quota of certain uid and gid can satisfy a block_write
253  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
254  */
255 static int quota_check_common(struct obd_device *obd, unsigned int uid,
256                               unsigned int gid, int count, int cycle, int isblk,
257                               struct inode *inode, int frags, int *pending)
258 {
259         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
260         int i;
261         __u32 id[MAXQUOTAS] = { uid, gid };
262         struct qunit_data qdata[MAXQUOTAS];
263         int mb = 0;
264         int rc = 0, rc2[2] = { 0, 0 };
265         ENTRY;
266
267         CLASSERT(MAXQUOTAS < 4);
268         if (!sb_any_quota_enabled(qctxt->lqc_sb))
269                 RETURN(rc);
270
271         spin_lock(&qctxt->lqc_lock);
272         if (!qctxt->lqc_valid){
273                 spin_unlock(&qctxt->lqc_lock);
274                 RETURN(rc);
275         }
276         spin_unlock(&qctxt->lqc_lock);
277
278         for (i = 0; i < MAXQUOTAS; i++) {
279                 struct lustre_qunit_size *lqs = NULL;
280
281                 qdata[i].qd_id = id[i];
282                 qdata[i].qd_flags = i;
283                 if (isblk)
284                         QDATA_SET_BLK(&qdata[i]);
285                 qdata[i].qd_count = 0;
286
287                 /* ignore root user */
288                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
289                         continue;
290
291                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
292                 if (!lqs)
293                         continue;
294
295                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
296                 spin_lock(&lqs->lqs_lock);
297                 if (!cycle) {
298                         if (isblk) {
299                                 *pending = count * CFS_PAGE_SIZE;
300                                 /* in order to complete this write, we need extra
301                                  * meta blocks. This function can get it through
302                                  * data needed to be written b=16542 */
303                                 if (inode) {
304                                         mb = *pending;
305                                         rc = fsfilt_get_mblk(obd, qctxt->lqc_sb,
306                                                              &mb, inode,frags);
307                                         if (rc)
308                                                 CDEBUG(D_ERROR,
309                                                        "can't get extra "
310                                                        "meta blocks.\n");
311                                         else
312                                                 *pending += mb;
313                                 }
314                                 lqs->lqs_bwrite_pending += *pending;
315                         } else {
316                                 *pending = count;
317                                 lqs->lqs_iwrite_pending += *pending;
318                         }
319                 }
320
321                 /* if xx_rec < 0, that means quota are releasing,
322                  * and it may return before we use quota. So if
323                  * we find this situation, we assuming it has
324                  * returned b=18491 */
325                 if (isblk && lqs->lqs_blk_rec < 0) {
326                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
327                                 qdata[i].qd_count = 0;
328                         else
329                                 qdata[i].qd_count += lqs->lqs_blk_rec;
330                 }
331                 if (!isblk && lqs->lqs_ino_rec < 0) {
332                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
333                                 qdata[i].qd_count = 0;
334                         else
335                                 qdata[i].qd_count += lqs->lqs_ino_rec;
336                 }
337
338
339                 CDEBUG(D_QUOTA, "count: %d, lqs pending: %lu, qd_count: "LPU64
340                        ", metablocks: %d, isblk: %d, pending: %d.\n", count,
341                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
342                        qdata[i].qd_count, mb, isblk, *pending);
343                 if (rc2[i] == QUOTA_RET_OK) {
344                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
345                                 rc2[i] = QUOTA_RET_ACQUOTA;
346                         if (!isblk && qdata[i].qd_count <
347                             lqs->lqs_iwrite_pending)
348                                 rc2[i] = QUOTA_RET_ACQUOTA;
349                 }
350
351                 spin_unlock(&lqs->lqs_lock);
352
353                 if (lqs->lqs_blk_rec  < 0 &&
354                     qdata[i].qd_count <
355                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
356                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
357
358                 /* When cycle is zero, lqs_*_pending will be changed. We will
359                  * get reference of the lqs here and put reference of lqs in
360                  * quota_pending_commit b=14784 */
361                 if (!cycle)
362                         lqs_getref(lqs);
363
364                 /* this is for quota_search_lqs */
365                 lqs_putref(lqs);
366         }
367
368         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
369                 RETURN(QUOTA_RET_ACQUOTA);
370         else
371                 RETURN(rc);
372 }
373
374 static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid,
375                                 unsigned int gid, int count, int *pending,
376                                 quota_acquire acquire,
377                                 struct obd_trans_info *oti, int isblk,
378                                 struct inode *inode, int frags)
379 {
380         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
381         struct timeval work_start;
382         struct timeval work_end;
383         long timediff;
384         struct l_wait_info lwi = { 0 };
385         int rc = 0, cycle = 0, count_err = 1;
386         ENTRY;
387
388         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
389         *pending = 0;
390         /* Unfortunately, if quota master is too busy to handle the
391          * pre-dqacq in time and quota hash on ost is used up, we
392          * have to wait for the completion of in flight dqacq/dqrel,
393          * in order to get enough quota for write b=12588 */
394         do_gettimeofday(&work_start);
395         while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk,
396                                         inode, frags, pending)) &
397                QUOTA_RET_ACQUOTA) {
398
399                 spin_lock(&qctxt->lqc_lock);
400                 if (!qctxt->lqc_import && oti) {
401                         spin_unlock(&qctxt->lqc_lock);
402
403                         LASSERT(oti && oti->oti_thread &&
404                                 oti->oti_thread->t_watchdog);
405
406                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
407                         CDEBUG(D_QUOTA, "sleep for quota master\n");
408                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
409                                      &lwi);
410                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
411                         lc_watchdog_touch(oti->oti_thread->t_watchdog,
412                                  GET_TIMEOUT(oti->oti_thread->t_svc));
413                 } else {
414                         spin_unlock(&qctxt->lqc_lock);
415                 }
416
417                 cycle++;
418                 if (isblk)
419                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
420                 /* after acquire(), we should run quota_check_common again
421                  * so that we confirm there are enough quota to finish write */
422                 rc = acquire(obd, uid, gid, oti, isblk);
423
424                 /* please reference to dqacq_completion for the below */
425                 /* a new request is finished, try again */
426                 if (rc == QUOTA_REQ_RETURNED) {
427                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
428                         continue;
429                 }
430
431                 /* it is out of quota already */
432                 if (rc == -EDQUOT) {
433                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
434                         break;
435                 }
436
437                 /* -EBUSY and others, wait a second and try again */
438                 if (rc < 0) {
439                         cfs_waitq_t        waitq;
440                         struct l_wait_info lwi;
441
442                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
443                                 lc_watchdog_touch(oti->oti_thread->t_watchdog,
444                                          GET_TIMEOUT(oti->oti_thread->t_svc));
445                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
446                                count_err++);
447
448                         init_waitqueue_head(&waitq);
449                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
450                                           NULL);
451                         l_wait_event(waitq, 0, &lwi);
452                 }
453
454                 if (rc < 0 || cycle % 10 == 2) {
455                         spin_lock(&last_print_lock);
456                         if (last_print == 0 ||
457                             cfs_time_before((last_print + cfs_time_seconds(30)),
458                                             cfs_time_current())) {
459                                 last_print = cfs_time_current();
460                                 spin_unlock(&last_print_lock);
461                                 CWARN("still haven't managed to acquire quota "
462                                       "space from the quota master after %d "
463                                       "retries (err=%d, rc=%d)\n",
464                                       cycle, count_err - 1, rc);
465                         } else {
466                                 spin_unlock(&last_print_lock);
467                         }
468                 }
469
470                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
471                        cycle);
472         }
473         do_gettimeofday(&work_end);
474         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
475         lprocfs_counter_add(qctxt->lqc_stats,
476                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
477                                     LQUOTA_WAIT_FOR_CHK_INO,
478                             timediff);
479
480         RETURN(rc);
481 }
482
483 /**
484  * when a block_write or inode_create rpc is finished, adjust the record for
485  * pending blocks and inodes
486  */
487 static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
488                                 unsigned int gid, int pending, int isblk)
489 {
490         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
491         struct timeval work_start;
492         struct timeval work_end;
493         long timediff;
494         int i;
495         __u32 id[MAXQUOTAS] = { uid, gid };
496         struct qunit_data qdata[MAXQUOTAS];
497         ENTRY;
498
499         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
500         CLASSERT(MAXQUOTAS < 4);
501         if (!sb_any_quota_enabled(qctxt->lqc_sb))
502                 RETURN(0);
503
504         do_gettimeofday(&work_start);
505         for (i = 0; i < MAXQUOTAS; i++) {
506                 struct lustre_qunit_size *lqs = NULL;
507
508                 qdata[i].qd_id = id[i];
509                 qdata[i].qd_flags = i;
510                 if (isblk)
511                         QDATA_SET_BLK(&qdata[i]);
512                 qdata[i].qd_count = 0;
513
514                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
515                         continue;
516
517                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
518                 if (lqs) {
519                         int flag = 0;
520                         spin_lock(&lqs->lqs_lock);
521                         if (isblk) {
522                                 if (lqs->lqs_bwrite_pending >= pending) {
523                                         lqs->lqs_bwrite_pending -= pending;
524                                         spin_unlock(&lqs->lqs_lock);
525                                         flag = 1;
526                                 } else {
527                                         spin_unlock(&lqs->lqs_lock);
528                                         CDEBUG(D_ERROR,
529                                                "there are too many blocks!\n");
530                                 }
531                         } else {
532                                 if (lqs->lqs_iwrite_pending >= pending) {
533                                         lqs->lqs_iwrite_pending -= pending;
534                                         spin_unlock(&lqs->lqs_lock);
535                                         flag = 1;
536                                 } else {
537                                         spin_unlock(&lqs->lqs_lock);
538                                         CDEBUG(D_ERROR,
539                                                "there are too many files!\n");
540                                 }
541                         }
542                         CDEBUG(D_QUOTA, "lqs pending: %lu, pending: %d, "
543                                "isblk: %d.\n",
544                                isblk ? lqs->lqs_bwrite_pending :
545                                lqs->lqs_iwrite_pending, pending, isblk);
546
547                         lqs_putref(lqs);
548                         /* When lqs_*_pening is changed back, we'll putref lqs
549                          * here b=14784 */
550                         if (flag)
551                                 lqs_putref(lqs);
552                 }
553         }
554         do_gettimeofday(&work_end);
555         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
556         lprocfs_counter_add(qctxt->lqc_stats,
557                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
558                                     LQUOTA_WAIT_FOR_COMMIT_INO,
559                             timediff);
560
561         RETURN(0);
562 }
563
564 static int mds_quota_init(void)
565 {
566         return lustre_dquot_init();
567 }
568
569 static int mds_quota_exit(void)
570 {
571         lustre_dquot_exit();
572         return 0;
573 }
574
575 static int mds_quota_setup(struct obd_device *obd)
576 {
577         struct obd_device_target *obt = &obd->u.obt;
578         struct mds_obd *mds = &obd->u.mds;
579         int rc;
580         ENTRY;
581
582         if (unlikely(mds->mds_quota)) {
583                 CWARN("try to reinitialize quota context!\n");
584                 RETURN(0);
585         }
586
587         init_rwsem(&obt->obt_rwsem);
588         obt->obt_qfmt = LUSTRE_QUOTA_V2;
589         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
590         atomic_set(&obt->obt_quotachecking, 1);
591         /* initialize quota master and quota context */
592         sema_init(&mds->mds_qonoff_sem, 1);
593         rc = qctxt_init(obd, dqacq_handler);
594         if (rc) {
595                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
596                 RETURN(rc);
597         }
598         mds->mds_quota = 1;
599         RETURN(rc);
600 }
601
602 static int mds_quota_cleanup(struct obd_device *obd)
603 {
604         ENTRY;
605         if (unlikely(!obd->u.mds.mds_quota))
606                 RETURN(0);
607
608         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
609         RETURN(0);
610 }
611
612 static int mds_quota_setinfo(struct obd_device *obd, void *data)
613 {
614         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
615         ENTRY;
616
617         if (unlikely(!obd->u.mds.mds_quota))
618                 RETURN(0);
619
620         if (data != NULL)
621                 QUOTA_MASTER_READY(qctxt);
622         else
623                 QUOTA_MASTER_UNREADY(qctxt);
624         RETURN(0);
625 }
626
627 static int mds_quota_fs_cleanup(struct obd_device *obd)
628 {
629         struct mds_obd *mds = &obd->u.mds;
630         struct obd_quotactl oqctl;
631         ENTRY;
632
633         if (unlikely(!mds->mds_quota))
634                 RETURN(0);
635
636         mds->mds_quota = 0;
637         memset(&oqctl, 0, sizeof(oqctl));
638         oqctl.qc_type = UGQUOTA;
639
640         down(&mds->mds_qonoff_sem);
641         mds_admin_quota_off(obd, &oqctl);
642         up(&mds->mds_qonoff_sem);
643         RETURN(0);
644 }
645
646 static int quota_acquire_common(struct obd_device *obd, unsigned int uid,
647                                 unsigned int gid, struct obd_trans_info *oti,
648                                 int isblk)
649 {
650         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
651         int rc;
652         ENTRY;
653
654         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, isblk, 1, oti);
655         RETURN(rc);
656 }
657
658 #endif /* HAVE_QUOTA_SUPPORT */
659 #endif /* __KERNEL__ */
660
661 struct osc_quota_info {
662         struct list_head        oqi_hash;       /* hash list */
663         struct client_obd      *oqi_cli;        /* osc obd */
664         unsigned int            oqi_id;         /* uid/gid of a file */
665         short                   oqi_type;       /* quota type */
666 };
667
668 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
669
670 static struct list_head qinfo_hash[NR_DQHASH];
671 /* SLAB cache for client quota context */
672 cfs_mem_cache_t *qinfo_cachep = NULL;
673
674 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
675                          __attribute__((__const__));
676
677 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
678 {
679         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
680         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
681         return tmp;
682 }
683
684 /* caller must hold qinfo_list_lock */
685 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
686 {
687         struct list_head *head = qinfo_hash +
688                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
689
690         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
691         list_add(&oqi->oqi_hash, head);
692 }
693
694 /* caller must hold qinfo_list_lock */
695 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
696 {
697         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
698         list_del_init(&oqi->oqi_hash);
699 }
700
701 /* caller must hold qinfo_list_lock */
702 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
703                                                 unsigned int id, int type)
704 {
705         unsigned int hashent = hashfn(cli, id, type);
706         struct osc_quota_info *oqi;
707         ENTRY;
708
709         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
710         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
711                 if (oqi->oqi_cli == cli &&
712                     oqi->oqi_id == id && oqi->oqi_type == type)
713                         return oqi;
714         }
715         RETURN(NULL);
716 }
717
718 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
719                                           unsigned int id, int type)
720 {
721         struct osc_quota_info *oqi;
722         ENTRY;
723
724         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
725         if(!oqi)
726                 RETURN(NULL);
727
728         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
729         oqi->oqi_cli = cli;
730         oqi->oqi_id = id;
731         oqi->oqi_type = type;
732
733         RETURN(oqi);
734 }
735
736 static void free_qinfo(struct osc_quota_info *oqi)
737 {
738         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
739 }
740
741 int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
742 {
743         unsigned int id;
744         int cnt, rc = QUOTA_OK;
745         ENTRY;
746
747         spin_lock(&qinfo_list_lock);
748         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
749                 struct osc_quota_info *oqi = NULL;
750
751                 id = (cnt == USRQUOTA) ? uid : gid;
752                 oqi = find_qinfo(cli, id, cnt);
753                 if (oqi) {
754                         rc = NO_QUOTA;
755                         break;
756                 }
757         }
758         spin_unlock(&qinfo_list_lock);
759
760         RETURN(rc);
761 }
762
763 int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
764                     obd_flag valid, obd_flag flags)
765 {
766         unsigned int id;
767         obd_flag noquota;
768         int cnt, rc = 0;
769         ENTRY;
770
771
772         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
773                 struct osc_quota_info *oqi, *old;
774
775                 if (!(valid & ((cnt == USRQUOTA) ?
776                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
777                         continue;
778
779                 id = (cnt == USRQUOTA) ? uid : gid;
780                 noquota = (cnt == USRQUOTA) ?
781                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
782
783                 oqi = alloc_qinfo(cli, id, cnt);
784                 if (oqi) {
785                         spin_lock(&qinfo_list_lock);
786
787                         old = find_qinfo(cli, id, cnt);
788                         if (old && !noquota)
789                                 remove_qinfo_hash(old);
790                         else if (!old && noquota)
791                                 insert_qinfo_hash(oqi);
792
793                         spin_unlock(&qinfo_list_lock);
794
795                         if (old || !noquota)
796                                 free_qinfo(oqi);
797                         if (old && !noquota)
798                                 free_qinfo(old);
799                 } else {
800                         CERROR("not enough mem!\n");
801                         rc = -ENOMEM;
802                         break;
803                 }
804         }
805
806         RETURN(rc);
807 }
808
809 int osc_quota_cleanup(struct obd_device *obd)
810 {
811         struct client_obd *cli = &obd->u.cli;
812         struct osc_quota_info *oqi, *n;
813         int i;
814         ENTRY;
815
816         spin_lock(&qinfo_list_lock);
817         for (i = 0; i < NR_DQHASH; i++) {
818                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
819                         if (oqi->oqi_cli != cli)
820                                 continue;
821                         remove_qinfo_hash(oqi);
822                         free_qinfo(oqi);
823                 }
824         }
825         spin_unlock(&qinfo_list_lock);
826
827         RETURN(0);
828 }
829
830 int osc_quota_init(void)
831 {
832         int i;
833         ENTRY;
834
835         LASSERT(qinfo_cachep == NULL);
836         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
837                                             sizeof(struct osc_quota_info),
838                                             0, 0);
839         if (!qinfo_cachep)
840                 RETURN(-ENOMEM);
841
842         for (i = 0; i < NR_DQHASH; i++)
843                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
844
845         RETURN(0);
846 }
847
848 int osc_quota_exit(void)
849 {
850         struct osc_quota_info *oqi, *n;
851         int i, rc;
852         ENTRY;
853
854         spin_lock(&qinfo_list_lock);
855         for (i = 0; i < NR_DQHASH; i++) {
856                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
857                         remove_qinfo_hash(oqi);
858                         free_qinfo(oqi);
859                 }
860         }
861         spin_unlock(&qinfo_list_lock);
862
863         rc = cfs_mem_cache_destroy(qinfo_cachep);
864         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
865         qinfo_cachep = NULL;
866
867         RETURN(0);
868 }
869
870 #ifdef __KERNEL__
871 #ifdef HAVE_QUOTA_SUPPORT
872 quota_interface_t mds_quota_interface = {
873         .quota_init     = mds_quota_init,
874         .quota_exit     = mds_quota_exit,
875         .quota_setup    = mds_quota_setup,
876         .quota_cleanup  = mds_quota_cleanup,
877         .quota_check    = target_quota_check,
878         .quota_ctl      = mds_quota_ctl,
879         .quota_setinfo  = mds_quota_setinfo,
880         .quota_fs_cleanup = mds_quota_fs_cleanup,
881         .quota_recovery = mds_quota_recovery,
882         .quota_adjust   = mds_quota_adjust,
883         .quota_chkquota = quota_chk_acq_common,
884         .quota_acquire  = quota_acquire_common,
885         .quota_pending_commit = quota_pending_commit,
886 };
887
888 quota_interface_t filter_quota_interface = {
889         .quota_setup    = filter_quota_setup,
890         .quota_cleanup  = filter_quota_cleanup,
891         .quota_check    = target_quota_check,
892         .quota_ctl      = filter_quota_ctl,
893         .quota_setinfo  = filter_quota_setinfo,
894         .quota_clearinfo = filter_quota_clearinfo,
895         .quota_enforce  = filter_quota_enforce,
896         .quota_getflag  = filter_quota_getflag,
897         .quota_acquire  = quota_acquire_common,
898         .quota_adjust   = filter_quota_adjust,
899         .quota_chkquota = quota_chk_acq_common,
900         .quota_adjust_qunit   = filter_quota_adjust_qunit,
901         .quota_pending_commit = quota_pending_commit,
902 };
903 #endif
904 #endif /* __KERNEL__ */
905
906 quota_interface_t mdc_quota_interface = {
907         .quota_ctl      = client_quota_ctl,
908         .quota_check    = client_quota_check,
909         .quota_poll_check = client_quota_poll_check,
910 };
911
912 quota_interface_t lmv_quota_interface = {
913         .quota_ctl      = lmv_quota_ctl,
914         .quota_check    = lmv_quota_check,
915 };
916
917 quota_interface_t osc_quota_interface = {
918         .quota_ctl      = client_quota_ctl,
919         .quota_check    = client_quota_check,
920         .quota_poll_check = client_quota_poll_check,
921         .quota_init     = osc_quota_init,
922         .quota_exit     = osc_quota_exit,
923         .quota_chkdq    = osc_quota_chkdq,
924         .quota_setdq    = osc_quota_setdq,
925         .quota_cleanup  = osc_quota_cleanup,
926         .quota_adjust_qunit = client_quota_adjust_qunit,
927 };
928
929 quota_interface_t lov_quota_interface = {
930         .quota_ctl      = lov_quota_ctl,
931         .quota_check    = lov_quota_check,
932         .quota_adjust_qunit = lov_quota_adjust_qunit,
933 };
934
935 #ifdef __KERNEL__
936
937 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
938
939 static int __init init_lustre_quota(void)
940 {
941 #ifdef HAVE_QUOTA_SUPPORT
942         int rc = 0;
943
944         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
945                                                 proc_lustre_root,
946                                                 NULL, NULL);
947         if (IS_ERR(lquota_type_proc_dir)) {
948                 CERROR("LProcFS failed in lquota-init\n");
949                 rc = PTR_ERR(lquota_type_proc_dir);
950                 return rc;
951         }
952
953         rc = qunit_cache_init();
954         if (rc)
955                 return rc;
956
957         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
958         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
959 #endif
960         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
961         PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
962         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
963         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
964         return 0;
965 }
966
967 static void /*__exit*/ exit_lustre_quota(void)
968 {
969         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
970         PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
971         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
972         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
973 #ifdef HAVE_QUOTA_SUPPORT
974         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
975         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
976
977         qunit_cache_cleanup();
978
979         if (lquota_type_proc_dir)
980                 lprocfs_remove(&lquota_type_proc_dir);
981 #endif
982 }
983
984 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
985 MODULE_DESCRIPTION("Lustre Quota");
986 MODULE_LICENSE("GPL");
987
988 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
989
990 #ifdef HAVE_QUOTA_SUPPORT
991 EXPORT_SYMBOL(mds_quota_interface);
992 EXPORT_SYMBOL(filter_quota_interface);
993 #endif
994 EXPORT_SYMBOL(mdc_quota_interface);
995 EXPORT_SYMBOL(lmv_quota_interface);
996 EXPORT_SYMBOL(osc_quota_interface);
997 EXPORT_SYMBOL(lov_quota_interface);
998 #endif /* __KERNEL */