Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / lustre / quota / quota_interface.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #ifndef EXPORT_SYMTAB
38 # define EXPORT_SYMTAB
39 #endif
40 #define DEBUG_SUBSYSTEM S_LQUOTA
41
42 #ifdef __KERNEL__
43 # include <linux/version.h>
44 # include <linux/module.h>
45 # include <linux/init.h>
46 # include <linux/fs.h>
47 # include <linux/jbd.h>
48 #  include <linux/smp_lock.h>
49 #  include <linux/buffer_head.h>
50 #  include <linux/workqueue.h>
51 #  include <linux/mount.h>
52 #else /* __KERNEL__ */
53 # include <liblustre.h>
54 #endif
55
56 #include <obd_class.h>
57 #include <lustre_mds.h>
58 #include <lustre_dlm.h>
59 #include <lustre_cfg.h>
60 #include <obd_ost.h>
61 #include <lustre_fsfilt.h>
62 #include <lustre_quota.h>
63 #include <lprocfs_status.h>
64 #include "quota_internal.h"
65
66 #ifdef __KERNEL__
67
68 #ifdef HAVE_QUOTA_SUPPORT
69
70 static cfs_time_t last_print = 0;
71 static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
72
73 static int filter_quota_setup(struct obd_device *obd)
74 {
75         int rc = 0;
76         struct obd_device_target *obt = &obd->u.obt;
77         ENTRY;
78
79 #ifdef HAVE_QUOTA64
80         obt->obt_qfmt = LUSTRE_QUOTA_V2;
81 #else
82         obt->obt_qfmt = LUSTRE_QUOTA_V1;
83 #endif
84         atomic_set(&obt->obt_quotachecking, 1);
85         rc = qctxt_init(obd, NULL);
86         if (rc)
87                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
88
89         RETURN(rc);
90 }
91
92 static int filter_quota_cleanup(struct obd_device *obd)
93 {
94         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
95         return 0;
96 }
97
98 static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd)
99 {
100         struct obd_import *imp;
101         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
102         ENTRY;
103
104         /* setup the quota context import */
105         spin_lock(&obd->u.obt.obt_qctxt.lqc_lock);
106         obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse;
107         spin_unlock(&obd->u.obt.obt_qctxt.lqc_lock);
108         CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n",
109                obd->obd_name,exp->exp_imp_reverse, obd);
110
111         /* make imp's connect flags equal relative exp's connect flags
112          * adding it to avoid the scan export list
113          */
114         imp = exp->exp_imp_reverse;
115         if (imp)
116                 imp->imp_connect_data.ocd_connect_flags |=
117                         (exp->exp_connect_flags &
118                          (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
119
120         cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
121         /* start quota slave recovery thread. (release high limits) */
122         qslave_start_recovery(obd, &obd->u.obt.obt_qctxt);
123         RETURN(0);
124 }
125
126 static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
127 {
128         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
129         ENTRY;
130
131         /* lquota may be not set up before destroying export, b=14896 */
132         if (!obd->obd_set_up)
133                 RETURN(0);
134
135         /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
136          * should be invalid b=12374 */
137         if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) {
138                 spin_lock(&qctxt->lqc_lock);
139                 qctxt->lqc_import = NULL;
140                 spin_unlock(&qctxt->lqc_lock);
141                 ptlrpc_cleanup_imp(exp->exp_imp_reverse);
142                 dqacq_interrupt(qctxt);
143                 CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
144                        obd->obd_name, obd);
145         }
146         RETURN(0);
147 }
148
149 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
150 {
151         ENTRY;
152
153         if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
154                 RETURN(0);
155
156         if (ignore) {
157                 CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
158                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
159         } else {
160                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
161         }
162
163         RETURN(0);
164 }
165
166 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
167 {
168         struct obd_device_target *obt = &obd->u.obt;
169         struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
170         int err, cnt, rc = 0;
171         struct obd_quotactl *oqctl;
172         ENTRY;
173
174         if (!sb_any_quota_enabled(obt->obt_sb))
175                 RETURN(0);
176
177         OBD_ALLOC_PTR(oqctl);
178         if (!oqctl) {
179                 CERROR("Not enough memory!");
180                 RETURN(-ENOMEM);
181         }
182
183         /* set over quota flags for a uid/gid */
184         oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
185         oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
186
187         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
188                 struct quota_adjust_qunit oqaq_tmp;
189                 struct lustre_qunit_size *lqs = NULL;
190
191                 oqaq_tmp.qaq_flags = cnt;
192                 oqaq_tmp.qaq_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
193
194                 quota_search_lqs(NULL, &oqaq_tmp, qctxt, &lqs);
195                 if (lqs) {
196                         spin_lock(&lqs->lqs_lock);
197                         if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
198                                 oa->o_flags |= (cnt == USRQUOTA) ?
199                                         OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
200                                 CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
201                                        "sync_blk(%d)\n", lqs->lqs_bunit_sz,
202                                        qctxt->lqc_sync_blk);
203                                 spin_unlock(&lqs->lqs_lock);
204                                 /* this is for quota_search_lqs */
205                                 lqs_putref(lqs);
206                                 continue;
207                         }
208                         spin_unlock(&lqs->lqs_lock);
209                         /* this is for quota_search_lqs */
210                         lqs_putref(lqs);
211                 }
212
213                 memset(oqctl, 0, sizeof(*oqctl));
214
215                 oqctl->qc_cmd = Q_GETQUOTA;
216                 oqctl->qc_type = cnt;
217                 oqctl->qc_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
218                 err = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
219                 if (err) {
220                         if (!rc)
221                                 rc = err;
222                         oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
223                                                              OBD_MD_FLGRPQUOTA);
224                         continue;
225                 }
226
227                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
228                    (toqb(oqctl->qc_dqblk.dqb_curspace) >=
229                     oqctl->qc_dqblk.dqb_bhardlimit))
230                         oa->o_flags |= (cnt == USRQUOTA) ?
231                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
232         }
233         OBD_FREE_PTR(oqctl);
234         RETURN(rc);
235 }
236
237 static int filter_quota_acquire(struct obd_device *obd, unsigned int uid,
238                                 unsigned int gid, struct obd_trans_info *oti)
239 {
240         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
241         int rc;
242         ENTRY;
243
244         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, LQUOTA_FLAGS_BLK, 1, oti);
245         RETURN(rc);
246 }
247
248 /* check whether the left quota of certain uid and gid can satisfy a block_write
249  * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA */
250 static int quota_check_common(struct obd_device *obd, unsigned int uid,
251                               unsigned int gid, int count, int cycle, int isblk,
252                               struct inode *inode, int frags, int *pending)
253 {
254         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
255         int i;
256         __u32 id[MAXQUOTAS] = { uid, gid };
257         struct qunit_data qdata[MAXQUOTAS];
258         int mb = 0;
259         int rc = 0, rc2[2] = { 0, 0 };
260         ENTRY;
261
262         CLASSERT(MAXQUOTAS < 4);
263         if (!sb_any_quota_enabled(qctxt->lqc_sb))
264                 RETURN(rc);
265
266         spin_lock(&qctxt->lqc_lock);
267         if (!qctxt->lqc_valid){
268                 spin_unlock(&qctxt->lqc_lock);
269                 RETURN(rc);
270         }
271         spin_unlock(&qctxt->lqc_lock);
272
273         for (i = 0; i < MAXQUOTAS; i++) {
274                 struct lustre_qunit_size *lqs = NULL;
275
276                 qdata[i].qd_id = id[i];
277                 qdata[i].qd_flags = i;
278                 if (isblk)
279                         QDATA_SET_BLK(&qdata[i]);
280                 qdata[i].qd_count = 0;
281
282                 /* ignore root user */
283                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
284                         continue;
285
286                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
287                 if (!lqs)
288                         continue;
289
290                 rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
291                 spin_lock(&lqs->lqs_lock);
292                 if (!cycle) {
293                         if (isblk) {
294                                 *pending = count * CFS_PAGE_SIZE;
295                                 /* in order to complete this write, we need extra
296                                  * meta blocks. This function can get it through
297                                  * data needed to be written b=16542 */
298                                 mb = *pending;
299                                 LASSERT(inode && frags > 0);
300                                 if (fsfilt_get_mblk(obd, qctxt->lqc_sb, &mb,
301                                                     inode, frags) < 0)
302                                         CDEBUG(D_ERROR,
303                                                "can't get extra meta blocks.\n");
304                                 else
305                                         *pending += mb;
306                                 lqs->lqs_bwrite_pending += *pending;
307                         } else {
308                                 *pending = count;
309                                 lqs->lqs_iwrite_pending += *pending;
310                         }
311                 }
312
313                 /* if xx_rec < 0, that means quota are releasing,
314                  * and it may return before we use quota. So if
315                  * we find this situation, we assuming it has
316                  * returned b=18491 */
317                 if (isblk && lqs->lqs_blk_rec < 0) {
318                         if (qdata[i].qd_count < -lqs->lqs_blk_rec)
319                                 qdata[i].qd_count = 0;
320                         else
321                                 qdata[i].qd_count += lqs->lqs_blk_rec;
322                 }
323                 if (!isblk && lqs->lqs_ino_rec < 0) {
324                         if (qdata[i].qd_count < -lqs->lqs_ino_rec)
325                                 qdata[i].qd_count = 0;
326                         else
327                                 qdata[i].qd_count += lqs->lqs_ino_rec;
328                 }
329
330                 CDEBUG(D_QUOTA, "count: %d, lqs pending: %lu, qd_count: "LPU64
331                        ", metablocks: %d, isblk: %d, pending: %d.\n", count,
332                        isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
333                        qdata[i].qd_count, mb, isblk, *pending);
334                 if (rc2[i] == QUOTA_RET_OK) {
335                         if (isblk && qdata[i].qd_count < lqs->lqs_bwrite_pending)
336                                 rc2[i] = QUOTA_RET_ACQUOTA;
337                         if (!isblk && qdata[i].qd_count <
338                             lqs->lqs_iwrite_pending)
339                                 rc2[i] = QUOTA_RET_ACQUOTA;
340                 }
341
342                 spin_unlock(&lqs->lqs_lock);
343
344                 if (lqs->lqs_blk_rec  < 0 &&
345                     qdata[i].qd_count <
346                     lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb)
347                         OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5);
348
349                 /* When cycle is zero, lqs_*_pending will be changed. We will
350                  * get reference of the lqs here and put reference of lqs in
351                  * quota_pending_commit b=14784 */
352                 if (!cycle)
353                         lqs_getref(lqs);
354
355                 /* this is for quota_search_lqs */
356                 lqs_putref(lqs);
357         }
358
359         if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
360                 RETURN(QUOTA_RET_ACQUOTA);
361         else
362                 RETURN(rc);
363 }
364
365 static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid,
366                                 unsigned int gid, int count, int *pending,
367                                 int isblk, quota_acquire acquire,
368                                 struct obd_trans_info *oti, struct inode *inode,
369                                 int frags)
370 {
371         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
372         struct timeval work_start;
373         struct timeval work_end;
374         long timediff;
375         struct l_wait_info lwi = { 0 };
376         int rc = 0, cycle = 0, count_err = 1;
377         ENTRY;
378
379         CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
380         /* Unfortunately, if quota master is too busy to handle the
381          * pre-dqacq in time and quota hash on ost is used up, we
382          * have to wait for the completion of in flight dqacq/dqrel,
383          * in order to get enough quota for write b=12588 */
384         do_gettimeofday(&work_start);
385         while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk,
386                                         inode, frags, pending)) & QUOTA_RET_ACQUOTA) {
387
388                 spin_lock(&qctxt->lqc_lock);
389                 if (!qctxt->lqc_import && oti) {
390                         spin_unlock(&qctxt->lqc_lock);
391
392                         LASSERT(oti && oti->oti_thread &&
393                                 oti->oti_thread->t_watchdog);
394
395                         lc_watchdog_disable(oti->oti_thread->t_watchdog);
396                         CDEBUG(D_QUOTA, "sleep for quota master\n");
397                         l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
398                                      &lwi);
399                         CDEBUG(D_QUOTA, "wake up when quota master is back\n");
400                         lc_watchdog_touch(oti->oti_thread->t_watchdog);
401                 } else {
402                         spin_unlock(&qctxt->lqc_lock);
403                 }
404
405                 cycle++;
406                 if (isblk)
407                         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
408                 /* after acquire(), we should run quota_check_common again
409                  * so that we confirm there are enough quota to finish write */
410                 rc = acquire(obd, uid, gid, oti);
411
412                 /* please reference to dqacq_completion for the below */
413                 /* a new request is finished, try again */
414                 if (rc == QUOTA_REQ_RETURNED) {
415                         CDEBUG(D_QUOTA, "finish a quota req, try again\n");
416                         continue;
417                 }
418
419                 /* it is out of quota already */
420                 if (rc == -EDQUOT) {
421                         CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
422                         break;
423                 }
424
425                 /* -EBUSY and others, wait a second and try again */
426                 if (rc < 0) {
427                         cfs_waitq_t        waitq;
428                         struct l_wait_info lwi;
429
430                         if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
431                                 lc_watchdog_touch(oti->oti_thread->t_watchdog);
432                         CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
433                                count_err++);
434
435                         init_waitqueue_head(&waitq);
436                         lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
437                                           NULL);
438                         l_wait_event(waitq, 0, &lwi);
439                 }
440
441                 if (rc < 0 || cycle % 10 == 2) {
442                         spin_lock(&last_print_lock);
443                         if (last_print == 0 ||
444                             cfs_time_before((last_print + cfs_time_seconds(30)),
445                                             cfs_time_current())) {
446                                 CWARN("still haven't managed to acquire quota "
447                                       "space from the quota master after %d "
448                                       "retries (err=%d, rc=%d)\n",
449                                       cycle, count_err - 1, rc);
450                                 last_print = cfs_time_current();
451                         }
452                         spin_unlock(&last_print_lock);
453                 }
454
455                 CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
456                        cycle);
457         }
458
459         do_gettimeofday(&work_end);
460         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
461         lprocfs_counter_add(qctxt->lqc_stats,
462                             isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
463                                     LQUOTA_WAIT_FOR_CHK_INO,
464                             timediff);
465
466         RETURN(rc);
467 }
468
469 static int filter_quota_check(struct obd_device *obd, unsigned int uid,
470                               unsigned int gid, int npage, int *pending,
471                               quota_acquire acquire, struct obd_trans_info *oti,
472                               struct inode *inode, int frags)
473 {
474         return quota_chk_acq_common(obd, uid, gid, npage, pending, LQUOTA_FLAGS_BLK,
475                                     acquire, oti, inode, frags);
476 }
477
478 /* when a block_write or inode_create rpc is finished, adjust the record for
479  * pending blocks and inodes*/
480 static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
481                                 unsigned int gid, int pending, int isblk)
482 {
483         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
484         struct timeval work_start;
485         struct timeval work_end;
486         long timediff;
487         int i;
488         __u32 id[MAXQUOTAS] = { uid, gid };
489         struct qunit_data qdata[MAXQUOTAS];
490         ENTRY;
491
492         CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
493         CLASSERT(MAXQUOTAS < 4);
494         if (!sb_any_quota_enabled(qctxt->lqc_sb))
495                 RETURN(0);
496
497         do_gettimeofday(&work_start);
498         for (i = 0; i < MAXQUOTAS; i++) {
499                 struct lustre_qunit_size *lqs = NULL;
500
501                 qdata[i].qd_id = id[i];
502                 qdata[i].qd_flags = i;
503                 if (isblk)
504                         QDATA_SET_BLK(&qdata[i]);
505                 qdata[i].qd_count = 0;
506
507                 if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
508                         continue;
509
510                 quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
511                 if (lqs) {
512                         int flag = 0;
513                         spin_lock(&lqs->lqs_lock);
514                         if (isblk) {
515                                 if (lqs->lqs_bwrite_pending >= pending) {
516                                         lqs->lqs_bwrite_pending -= pending;
517                                         flag = 1;
518                                 } else {
519                                         CDEBUG(D_ERROR,
520                                                "there are too many blocks!\n");
521                                 }
522                         } else {
523                                 if (lqs->lqs_iwrite_pending >= pending) {
524                                         lqs->lqs_iwrite_pending -= pending;
525                                         flag = 1;
526                                 } else {
527                                         CDEBUG(D_ERROR,
528                                                "there are too many files!\n");
529                                 }
530                         }
531                         CDEBUG(D_QUOTA, "lqs pending: %lu, pending: %d, "
532                                "isblk: %d.\n",
533                                isblk ? lqs->lqs_bwrite_pending :
534                                lqs->lqs_iwrite_pending, pending, isblk);
535
536                         spin_unlock(&lqs->lqs_lock);
537                         lqs_putref(lqs);
538                         /* When lqs_*_pening is changed back, we'll putref lqs
539                          * here b=14784 */
540                         if (flag)
541                                 lqs_putref(lqs);
542                 }
543         }
544         do_gettimeofday(&work_end);
545         timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
546         lprocfs_counter_add(qctxt->lqc_stats,
547                             isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
548                                     LQUOTA_WAIT_FOR_COMMIT_INO,
549                             timediff);
550
551         RETURN(0);
552 }
553
554 static int filter_quota_pending_commit(struct obd_device *obd, unsigned int uid,
555                                        unsigned int gid, int blocks)
556 {
557         return quota_pending_commit(obd, uid, gid, blocks, LQUOTA_FLAGS_BLK);
558 }
559
560 static int mds_quota_init(void)
561 {
562         return lustre_dquot_init();
563 }
564
565 static int mds_quota_exit(void)
566 {
567         lustre_dquot_exit();
568         return 0;
569 }
570
571 static int mds_quota_setup(struct obd_device *obd)
572 {
573         struct obd_device_target *obt = &obd->u.obt;
574         struct mds_obd *mds = &obd->u.mds;
575         int rc;
576         ENTRY;
577
578 #ifdef HAVE_QUOTA64
579         obt->obt_qfmt = LUSTRE_QUOTA_V2;
580 #else
581         obt->obt_qfmt = LUSTRE_QUOTA_V1;
582 #endif
583         mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
584         atomic_set(&obt->obt_quotachecking, 1);
585         /* initialize quota master and quota context */
586         sema_init(&mds->mds_qonoff_sem, 1);
587         rc = qctxt_init(obd, dqacq_handler);
588         if (rc) {
589                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
590                 RETURN(rc);
591         }
592         RETURN(rc);
593 }
594
595 static int mds_quota_cleanup(struct obd_device *obd)
596 {
597         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
598         RETURN(0);
599 }
600
601 static int mds_quota_fs_cleanup(struct obd_device *obd)
602 {
603         struct mds_obd *mds = &obd->u.mds;
604         struct obd_quotactl oqctl;
605         ENTRY;
606
607         memset(&oqctl, 0, sizeof(oqctl));
608         oqctl.qc_type = UGQUOTA;
609
610         down(&mds->mds_qonoff_sem);
611         mds_admin_quota_off(obd, &oqctl);
612         up(&mds->mds_qonoff_sem);
613         RETURN(0);
614 }
615
616 static int mds_quota_check(struct obd_device *obd, unsigned int uid,
617                            unsigned int gid, int inodes, int *pending,
618                            quota_acquire acquire, struct obd_trans_info *oti,
619                            struct inode *inode, int frags)
620 {
621         return quota_chk_acq_common(obd, uid, gid, inodes, pending, 0,
622                                     acquire, oti, inode, frags);
623 }
624
625 static int mds_quota_acquire(struct obd_device *obd, unsigned int uid,
626                              unsigned int gid, struct obd_trans_info *oti)
627 {
628         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
629         int rc;
630         ENTRY;
631
632         rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 0, 1, oti);
633         RETURN(rc);
634 }
635
636 static int mds_quota_pending_commit(struct obd_device *obd, unsigned int uid,
637                                     unsigned int gid, int inodes)
638 {
639         return quota_pending_commit(obd, uid, gid, inodes, 0);
640 }
641 #endif /* HAVE_QUOTA_SUPPORT */
642 #endif /* __KERNEL__ */
643
644 struct osc_quota_info {
645         struct list_head        oqi_hash;       /* hash list */
646         struct client_obd      *oqi_cli;        /* osc obd */
647         unsigned int            oqi_id;         /* uid/gid of a file */
648         short                   oqi_type;       /* quota type */
649 };
650
651 spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
652
653 static struct list_head qinfo_hash[NR_DQHASH];
654 /* SLAB cache for client quota context */
655 cfs_mem_cache_t *qinfo_cachep = NULL;
656
657 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
658                          __attribute__((__const__));
659
660 static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
661 {
662         unsigned long tmp = ((unsigned long)cli>>6) ^ id;
663         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
664         return tmp;
665 }
666
667 /* caller must hold qinfo_list_lock */
668 static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
669 {
670         struct list_head *head = qinfo_hash +
671                 hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
672
673         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
674         list_add(&oqi->oqi_hash, head);
675 }
676
677 /* caller must hold qinfo_list_lock */
678 static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
679 {
680         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
681         list_del_init(&oqi->oqi_hash);
682 }
683
684 /* caller must hold qinfo_list_lock */
685 static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
686                                                 unsigned int id, int type)
687 {
688         unsigned int hashent = hashfn(cli, id, type);
689         struct osc_quota_info *oqi;
690
691         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
692         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
693                 if (oqi->oqi_cli == cli &&
694                     oqi->oqi_id == id && oqi->oqi_type == type)
695                         return oqi;
696         }
697         return NULL;
698 }
699
700 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
701                                           unsigned int id, int type)
702 {
703         struct osc_quota_info *oqi;
704         ENTRY;
705
706         OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
707         if(!oqi)
708                 RETURN(NULL);
709
710         CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
711         oqi->oqi_cli = cli;
712         oqi->oqi_id = id;
713         oqi->oqi_type = type;
714
715         RETURN(oqi);
716 }
717
718 static void free_qinfo(struct osc_quota_info *oqi)
719 {
720         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
721 }
722
723 int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
724 {
725         unsigned int id;
726         int cnt, rc = QUOTA_OK;
727         ENTRY;
728
729         spin_lock(&qinfo_list_lock);
730         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
731                 struct osc_quota_info *oqi = NULL;
732
733                 id = (cnt == USRQUOTA) ? uid : gid;
734                 oqi = find_qinfo(cli, id, cnt);
735                 if (oqi) {
736                         rc = NO_QUOTA;
737                         break;
738                 }
739         }
740         spin_unlock(&qinfo_list_lock);
741
742         RETURN(rc);
743 }
744
745 int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
746                     obd_flag valid, obd_flag flags)
747 {
748         unsigned int id;
749         obd_flag noquota;
750         int cnt, rc = 0;
751         ENTRY;
752
753
754         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
755                 struct osc_quota_info *oqi, *old;
756
757                 if (!(valid & ((cnt == USRQUOTA) ?
758                     OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
759                         continue;
760
761                 id = (cnt == USRQUOTA) ? uid : gid;
762                 noquota = (cnt == USRQUOTA) ?
763                     (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
764
765                 oqi = alloc_qinfo(cli, id, cnt);
766                 if (oqi) {
767                         spin_lock(&qinfo_list_lock);
768
769                         old = find_qinfo(cli, id, cnt);
770                         if (old && !noquota)
771                                 remove_qinfo_hash(old);
772                         else if (!old && noquota)
773                                 insert_qinfo_hash(oqi);
774
775                         spin_unlock(&qinfo_list_lock);
776
777                         if (old || !noquota)
778                                 free_qinfo(oqi);
779                         if (old && !noquota)
780                                 free_qinfo(old);
781                 } else {
782                         CERROR("not enough mem!\n");
783                         rc = -ENOMEM;
784                         break;
785                 }
786         }
787
788         RETURN(rc);
789 }
790
791 int osc_quota_cleanup(struct obd_device *obd)
792 {
793         struct client_obd *cli = &obd->u.cli;
794         struct osc_quota_info *oqi, *n;
795         int i;
796         ENTRY;
797
798         spin_lock(&qinfo_list_lock);
799         for (i = 0; i < NR_DQHASH; i++) {
800                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
801                         if (oqi->oqi_cli != cli)
802                                 continue;
803                         remove_qinfo_hash(oqi);
804                         free_qinfo(oqi);
805                 }
806         }
807         spin_unlock(&qinfo_list_lock);
808
809         RETURN(0);
810 }
811
812 int osc_quota_init(void)
813 {
814         int i;
815         ENTRY;
816
817         LASSERT(qinfo_cachep == NULL);
818         qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
819                                             sizeof(struct osc_quota_info),
820                                             0, 0);
821         if (!qinfo_cachep)
822                 RETURN(-ENOMEM);
823
824         for (i = 0; i < NR_DQHASH; i++)
825                 CFS_INIT_LIST_HEAD(qinfo_hash + i);
826
827         RETURN(0);
828 }
829
830 int osc_quota_exit(void)
831 {
832         struct osc_quota_info *oqi, *n;
833         int i, rc;
834         ENTRY;
835
836         spin_lock(&qinfo_list_lock);
837         for (i = 0; i < NR_DQHASH; i++) {
838                 list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
839                         remove_qinfo_hash(oqi);
840                         free_qinfo(oqi);
841                 }
842         }
843         spin_unlock(&qinfo_list_lock);
844
845         rc = cfs_mem_cache_destroy(qinfo_cachep);
846         LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
847         qinfo_cachep = NULL;
848
849         RETURN(0);
850 }
851
852 #ifdef __KERNEL__
853 #ifdef HAVE_QUOTA_SUPPORT
854 quota_interface_t mds_quota_interface = {
855         .quota_init     = mds_quota_init,
856         .quota_exit     = mds_quota_exit,
857         .quota_setup    = mds_quota_setup,
858         .quota_cleanup  = mds_quota_cleanup,
859         .quota_check    = target_quota_check,
860         .quota_ctl      = mds_quota_ctl,
861         .quota_fs_cleanup       =mds_quota_fs_cleanup,
862         .quota_recovery = mds_quota_recovery,
863         .quota_adjust   = mds_quota_adjust,
864         .quota_chkquota = mds_quota_check,
865         .quota_acquire  = mds_quota_acquire,
866         .quota_pending_commit = mds_quota_pending_commit,
867 };
868
869 quota_interface_t filter_quota_interface = {
870         .quota_setup    = filter_quota_setup,
871         .quota_cleanup  = filter_quota_cleanup,
872         .quota_check    = target_quota_check,
873         .quota_ctl      = filter_quota_ctl,
874         .quota_setinfo  = filter_quota_setinfo,
875         .quota_clearinfo = filter_quota_clearinfo,
876         .quota_enforce  = filter_quota_enforce,
877         .quota_getflag  = filter_quota_getflag,
878         .quota_acquire  = filter_quota_acquire,
879         .quota_adjust   = filter_quota_adjust,
880         .quota_chkquota = filter_quota_check,
881         .quota_adjust_qunit   = filter_quota_adjust_qunit,
882         .quota_pending_commit = filter_quota_pending_commit,
883 };
884 #endif
885 #endif /* __KERNEL__ */
886
887 quota_interface_t mdc_quota_interface = {
888         .quota_ctl      = client_quota_ctl,
889         .quota_check    = client_quota_check,
890         .quota_poll_check = client_quota_poll_check,
891 };
892
893 quota_interface_t osc_quota_interface = {
894         .quota_ctl      = client_quota_ctl,
895         .quota_check    = client_quota_check,
896         .quota_poll_check = client_quota_poll_check,
897         .quota_init     = osc_quota_init,
898         .quota_exit     = osc_quota_exit,
899         .quota_chkdq    = osc_quota_chkdq,
900         .quota_setdq    = osc_quota_setdq,
901         .quota_cleanup  = osc_quota_cleanup,
902         .quota_adjust_qunit = client_quota_adjust_qunit,
903 };
904
905 quota_interface_t lov_quota_interface = {
906         .quota_check    = lov_quota_check,
907         .quota_ctl      = lov_quota_ctl,
908         .quota_adjust_qunit = lov_quota_adjust_qunit,
909 };
910
911 #ifdef __KERNEL__
912
913 cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
914
915 static int __init init_lustre_quota(void)
916 {
917 #ifdef HAVE_QUOTA_SUPPORT
918         int rc = 0;
919
920         lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
921                                                 proc_lustre_root,
922                                                 NULL, NULL);
923         if (IS_ERR(lquota_type_proc_dir)) {
924                 CERROR("LProcFS failed in lquota-init\n");
925                 rc = PTR_ERR(lquota_type_proc_dir);
926                 return rc;
927         }
928
929         rc = qunit_cache_init();
930         if (rc)
931                 return rc;
932
933         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
934         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
935 #endif
936         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
937         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
938         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
939         return 0;
940 }
941
942 static void /*__exit*/ exit_lustre_quota(void)
943 {
944         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
945         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
946         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
947 #ifdef HAVE_QUOTA_SUPPORT
948         PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
949         PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
950
951         qunit_cache_cleanup();
952
953         if (lquota_type_proc_dir)
954                 lprocfs_remove(&lquota_type_proc_dir);
955 #endif
956 }
957
958 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
959 MODULE_DESCRIPTION("Lustre Quota");
960 MODULE_LICENSE("GPL");
961
962 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
963
964 #ifdef HAVE_QUOTA_SUPPORT
965 EXPORT_SYMBOL(mds_quota_interface);
966 EXPORT_SYMBOL(filter_quota_interface);
967 #endif
968 EXPORT_SYMBOL(mdc_quota_interface);
969 EXPORT_SYMBOL(osc_quota_interface);
970 EXPORT_SYMBOL(lov_quota_interface);
971 #endif /* __KERNEL */