Whamcloud - gitweb
Land b_head_quota onto HEAD (20081116_0105)
authorfanyong <fanyong>
Sat, 15 Nov 2008 18:39:35 +0000 (18:39 +0000)
committerfanyong <fanyong>
Sat, 15 Nov 2008 18:39:35 +0000 (18:39 +0000)
b=13058
i=johann
i=yury.umanets

143 files changed:
lustre/ChangeLog
lustre/Makefile.in
lustre/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/cmm/cmm_device.c
lustre/cmm/cmm_object.c
lustre/cmm/mdc_device.c
lustre/doc/lfs.1
lustre/fid/fid_store.c
lustre/fld/fld_index.c
lustre/include/class_hash.h
lustre/include/dt_object.h
lustre/include/linux/lustre_acl.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_user.h
lustre/include/lprocfs_status.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre_capa.h
lustre/include/lustre_export.h
lustre/include/lustre_lib.h
lustre/include/lustre_net.h
lustre/include/lustre_quota.h
lustre/include/lustre_req_layout.h
lustre/include/lustre_sec.h
lustre/include/md_object.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_ost.h
lustre/include/obd_support.h
lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch [new file with mode: 0644]
lustre/kernel_patches/patches/quota-large-limits-rhel5.patch [new file with mode: 0644]
lustre/kernel_patches/patches/quota-large-limits-sles10.patch [new file with mode: 0644]
lustre/kernel_patches/series/2.6-rhel5.series
lustre/kernel_patches/series/2.6-sles10.series
lustre/kernel_patches/series/2.6.22-vanilla.series
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/liblustre/Makefile.am
lustre/liblustre/file.c
lustre/liblustre/lutil.c
lustre/llite/dir.c
lustre/llite/llite_capa.c
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lmv/lmv_obd.c
lustre/lov/lov_obd.c
lustre/lov/lov_request.c
lustre/lvfs/autoMakefile.am
lustre/lvfs/fsfilt_ext3.c
lustre/lvfs/fsfilt_reiserfs.c
lustre/lvfs/lustre_quota_fmt.c
lustre/lvfs/lustre_quota_fmt.h
lustre/lvfs/quotafmt_test.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_request.c
lustre/mdd/Makefile.in
lustre/mdd/mdd_device.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_lov.c
lustre/mdd/mdd_lproc.c
lustre/mdd/mdd_object.c
lustre/mdd/mdd_orphans.c
lustre/mdd/mdd_permission.c
lustre/mdd/mdd_quota.c [new file with mode: 0644]
lustre/mdd/mdd_trans.c
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_lov.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_identity.c
lustre/mdt/mdt_idmap.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lib.c
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c
lustre/mdt/mdt_xattr.c
lustre/obdclass/capa.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lu_object.c
lustre/obdclass/obd_config.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_capa.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_log.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/osc_cl_internal.h
lustre/osc/osc_io.c
lustre/osc/osc_page.c
lustre/osc/osc_request.c
lustre/osd/osd_handler.c
lustre/osd/osd_internal.h
lustre/osd/osd_oi.c
lustre/osd/osd_oi.h
lustre/ost/ost_handler.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/sec.c
lustre/ptlrpc/service.c
lustre/ptlrpc/wiretest.c
lustre/quota/Makefile.in
lustre/quota/autoMakefile.am
lustre/quota/lproc_quota.c [new file with mode: 0644]
lustre/quota/quota_adjust_qunit.c [new file with mode: 0644]
lustre/quota/quota_check.c
lustre/quota/quota_context.c
lustre/quota/quota_ctl.c
lustre/quota/quota_interface.c
lustre/quota/quota_internal.h
lustre/quota/quota_master.c
lustre/tests/acceptance-small.sh
lustre/tests/cfg/insanity-lmv.sh
lustre/tests/cfg/lmv.sh
lustre/tests/cfg/local.sh
lustre/tests/sanity-quota.sh
lustre/tests/sanity-sec.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/l_getidentity.c
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/lmc
lustre/utils/req-layout.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 6a4513f..0026ea9 100644 (file)
@@ -66,6 +66,15 @@ Description: Hitting mdc_commit_close() ASSERTION
 Details    : Properly handle request reference release in
             ll_release_openhandle().
 
+Severity   : major
+Bugzilla   : 14840
+Description: quota recovery deadlock during mds failover
+Details    : This patch includes att18982, att18236, att18237 in bz14840.
+             Slove the problems:
+             1. fix osts hang when mds does failover with quotaon
+             2. prevent watchdog storm when osts threads wait for the
+               recovery of mds
+
 Severity   : normal
 Bugzilla   : 15975
 Frequency  : only patchless client
@@ -150,6 +159,23 @@ Details    : Apply the MGS_CONNECT_SUPPORTED mask at reconnect time so
             the connect flags are properly negotiated.
 
 Severity   : normal
+Frequency  : often
+Bugzilla   : 16125
+Description: quotas are not honored with O_DIRECT
+Details    : all writes with the flag O_DIRECT will use grants which leads to
+            this problem. Now using OBD_BRW_SYNC to guard this.
+
+Severity   : normal
+Bugzilla   : 15058
+Description: add quota statistics
+Details    : 1. sort out quota proc entries and proc code.
+            2. add quota statistics
+
+Severity   : enhancement
+Bugzilla   : 13058
+Description: enable quota support for HEAD.
+
+Severity   : normal
 Bugzilla   : 16006
 Description: Properly propagate oinfo flags from lov to osc for statfs
 Details    : restore missing copy oi_flags to lov requests.
index 82c5433..f1c44fa 100644 (file)
@@ -6,9 +6,9 @@ subdir-m += ptlrpc
 subdir-m += osc
 subdir-m += obdecho
 subdir-m += mgc
+subdir-m += quota
 
 @SERVER_TRUE@subdir-m += mds obdfilter ost mgs mdt cmm mdd osd
 @CLIENT_TRUE@subdir-m += mdc lmv llite fld
-@QUOTA_TRUE@subdir-m += quota
 
 @INCLUDE_RULES@
index 3ad4024..51658ae 100644 (file)
@@ -58,9 +58,7 @@ if CLIENT
 SUBDIRS += $(CLIENT_SUBDIRS)
 endif
 
-if QUOTA
 SUBDIRS += $(QUOTA_SUBDIRS)
-endif
 
 # this needs to be after the client subdirs
 if LIBLUSTRE
index 2a87e8f..317d3c2 100644 (file)
@@ -703,6 +703,18 @@ LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],[
 ])
 ])
 
+#
+# LC_CONFIG_RMTCLIENT
+#
+dnl FIXME
+dnl the AES symbol usually tied with arch, e.g. CRYPTO_AES_586
+dnl FIXME
+AC_DEFUN([LC_CONFIG_RMTCLIENT],
+[LB_LINUX_CONFIG_IM([CRYPTO_AES],[],[
+       AC_MSG_ERROR([Lustre remote client require that CONFIG_CRYPTO_AES is enabled in your kernel.])
+])
+])
+
 AC_DEFUN([LC_SUNRPC_CACHE],
 [AC_MSG_CHECKING([if sunrpc struct cache_head uses kref])
 LB_LINUX_TRY_COMPILE([
@@ -784,11 +796,6 @@ AC_DEFUN([LC_CONFIG_GSS],
                            [AC_MSG_WARN([kernel TWOFISH support is recommended by using GSS.])])
         LB_LINUX_CONFIG_IM([CRYPTO_CAST6],[],
                            [AC_MSG_WARN([kernel CAST6 support is recommended by using GSS.])])
-       dnl FIXME
-       dnl the AES symbol usually tied with arch, e.g. CRYPTO_AES_586
-       dnl FIXME
-       LB_LINUX_CONFIG_IM([CRYPTO_AES],[],
-                           [AC_MSG_WARN([kernel AES support is recommended by using GSS.])])
 
        AC_CHECK_LIB([gssapi], [gss_init_sec_context],
                      [GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi"],
@@ -1551,9 +1558,9 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_CONFIG_PINGER
          LC_CONFIG_CHECKSUM
          LC_CONFIG_LIBLUSTRE_RECOVERY
-         LC_CONFIG_QUOTA
          LC_CONFIG_HEALTH_CHECK_WRITE
          LC_CONFIG_LRU_RESIZE
+         LC_QUOTA_MODULE
 
          LC_TASK_PPTR
          # RHEL4 patches
@@ -1591,6 +1598,7 @@ AC_DEFUN([LC_PROG_LINUX],
 
          LC_FUNC_SET_FS_PWD
          LC_CAPA_CRYPTO
+         LC_CONFIG_RMTCLIENT
          LC_CONFIG_GSS
          LC_FUNC_MS_FLOCK_LOCK
          LC_FUNC_HAVE_CAN_SLEEP_ARG
@@ -1599,6 +1607,7 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_COOKIE_FOLLOW_LINK
          LC_FUNC_RCU
          LC_PERCPU_COUNTER
+         LC_QUOTA64
 
          # does the kernel have VFS intent patches?
          LC_VFS_INTENT_PATCHES
@@ -1645,7 +1654,7 @@ AC_DEFUN([LC_PROG_LINUX],
 
          # raid5-zerocopy patch
          LC_PAGE_CONSTANT
-               
+
         # 2.6.22
          LC_INVALIDATE_BDEV_2ARG
          LC_ASYNC_BLOCK_CIPHER
@@ -1778,50 +1787,35 @@ fi
 #
 # LC_CONFIG_QUOTA
 #
-# whether to enable quota support
+# whether to enable quota support global control
 #
 AC_DEFUN([LC_CONFIG_QUOTA],
 [AC_ARG_ENABLE([quota],
        AC_HELP_STRING([--enable-quota],
                        [enable quota support]),
-       [],[enable_quota='default'])
-if test x$linux25 != xyes; then
-       enable_quota='no'
-fi
-LB_LINUX_CONFIG([QUOTA],[
-       if test x$enable_quota = xdefault; then
-               enable_quota='yes'
-       fi
-],[
-       if test x$enable_quota = xdefault; then
-               enable_quota='no'
-               AC_MSG_WARN([quota is not enabled because the kernel lacks quota support])
-       else
-               if test x$enable_quota = xyes; then
-                       AC_MSG_ERROR([cannot enable quota because the kernel lacks quota support])
-               fi
-       fi
+       [],[enable_quota='yes'])
 ])
-if test x$enable_quota != xno; then
+
+# whether to enable quota support(kernel modules)
+AC_DEFUN([LC_QUOTA_MODULE],
+[if test x$enable_quota != xno; then
+    LB_LINUX_CONFIG([QUOTA],[
+       enable_quota_module='yes'
        AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
+    ],[
+       enable_quota_module='no'
+       AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
+    ])
 fi
 ])
 
-#
-# LC_CONFIG_SPLIT
-#
-# whether to enable split support
-#
-AC_DEFUN([LC_CONFIG_SPLIT],
-[AC_MSG_CHECKING([whether to enable split support])
-AC_ARG_ENABLE([split],
-       AC_HELP_STRING([--enable-split],
-                       [enable split support]),
-       [],[enable_split='no'])
-AC_MSG_RESULT([$enable_split])
-if test x$enable_split != xno; then
-   AC_DEFINE(HAVE_SPLIT_SUPPORT, 1, [enable split support])
-fi
+AC_DEFUN([LC_QUOTA],
+[#check global
+LC_CONFIG_QUOTA
+#check for utils
+AC_CHECK_HEADER(sys/quota.h,
+                [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have <sys/quota.h>.])],
+                [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
 ])
 
 AC_DEFUN([LC_QUOTA_READ],
@@ -1840,6 +1834,23 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 #
+# LC_CONFIG_SPLIT
+#
+# whether to enable split support
+#
+AC_DEFUN([LC_CONFIG_SPLIT],
+[AC_MSG_CHECKING([whether to enable split support])
+AC_ARG_ENABLE([split],
+       AC_HELP_STRING([--enable-split],
+                       [enable split support]),
+       [],[enable_split='no'])
+AC_MSG_RESULT([$enable_split])
+if test x$enable_split != xno; then
+   AC_DEFINE(HAVE_SPLIT_SUPPORT, 1, [enable split support])
+fi
+])
+
+#
 # LC_COOKIE_FOLLOW_LINK
 #
 # kernel 2.6.13+ ->follow_link returns a cookie
@@ -1942,6 +1953,30 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 #
+# LC_QUOTA64
+# linux kernel may have 64-bit limits support
+#
+AC_DEFUN([LC_QUOTA64],
+[AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/kernel.h>
+        #include <linux/fs.h>
+        #include <linux/quotaio_v2.h>
+        int versions[] = V2_INITQVERSIONS_R1;
+        struct v2_disk_dqblk_r1 dqblk_r1;
+],[],[
+        AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
+        AC_MSG_RESULT([yes])
+
+],[
+        AC_MSG_WARN([You have got no 64-bit kernel quota support.])
+        AC_MSG_WARN([Continuing with limited quota support.])
+        AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.])
+        AC_MSG_RESULT([no])
+])
+])
+
+#
 # LC_CONFIGURE
 #
 # other configure checks
@@ -2046,7 +2081,7 @@ AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
 AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
 AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
 AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
-AM_CONDITIONAL(QUOTA, test x$enable_quota = xyes)
+AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes)
 AM_CONDITIONAL(SPLIT, test x$enable_split = xyes)
 AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
 AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes)
index d2c435d..839322a 100644 (file)
@@ -53,6 +53,9 @@
 #include <lustre_ver.h>
 #include "cmm_internal.h"
 #include "mdc_internal.h"
+#ifdef HAVE_QUOTA_SUPPORT
+# include <lustre_quota.h>
+#endif
 
 static struct obd_ops cmm_obd_device_ops = {
         .o_owner           = THIS_MODULE
@@ -127,12 +130,286 @@ static int cmm_update_capa_key(const struct lu_env *env,
         RETURN(rc);
 }
 
+#ifdef HAVE_QUOTA_SUPPORT
+static int cmm_quota_notify(const struct lu_env *env, struct md_device *m)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_notify(env,
+                                                          cmm_dev->cmm_child);
+        RETURN(rc);
+}
+
+static int cmm_quota_setup(const struct lu_env *env, struct md_device *m,
+                           void *data)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setup(env,
+                                                         cmm_dev->cmm_child,
+                                                         data);
+        RETURN(rc);
+}
+
+static int cmm_quota_cleanup(const struct lu_env *env, struct md_device *m)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_cleanup(env,
+                                                           cmm_dev->cmm_child);
+        RETURN(rc);
+}
+
+static int cmm_quota_recovery(const struct lu_env *env, struct md_device *m)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_recovery(env,
+                                                            cmm_dev->cmm_child);
+        RETURN(rc);
+}
+
+static int cmm_quota_check(const struct lu_env *env, struct md_device *m,
+                           struct obd_export *exp, __u32 type)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_check(env,
+                                                         cmm_dev->cmm_child,
+                                                         exp, type);
+        RETURN(rc);
+}
+
+static int cmm_quota_on(const struct lu_env *env, struct md_device *m,
+                        __u32 type, __u32 id)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_on(env,
+                                                      cmm_dev->cmm_child,
+                                                      type, id);
+        RETURN(rc);
+}
+
+static int cmm_quota_off(const struct lu_env *env, struct md_device *m,
+                         __u32 type, __u32 id)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_off(env,
+                                                       cmm_dev->cmm_child,
+                                                       type, id);
+        RETURN(rc);
+}
+
+static int cmm_quota_setinfo(const struct lu_env *env, struct md_device *m,
+                             __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setinfo(env,
+                                                           cmm_dev->cmm_child,
+                                                           type, id, dqinfo);
+        RETURN(rc);
+}
+
+static int cmm_quota_getinfo(const struct lu_env *env,
+                             const struct md_device *m,
+                             __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getinfo(env,
+                                                           cmm_dev->cmm_child,
+                                                           type, id, dqinfo);
+        RETURN(rc);
+}
+
+static int cmm_quota_setquota(const struct lu_env *env, struct md_device *m,
+                              __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setquota(env,
+                                                            cmm_dev->cmm_child,
+                                                            type, id, dqblk);
+        RETURN(rc);
+}
+
+static int cmm_quota_getquota(const struct lu_env *env,
+                              const struct md_device *m,
+                              __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getquota(env,
+                                                            cmm_dev->cmm_child,
+                                                            type, id, dqblk);
+        RETURN(rc);
+}
+
+static int cmm_quota_getoinfo(const struct lu_env *env,
+                              const struct md_device *m,
+                              __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getoinfo(env,
+                                                            cmm_dev->cmm_child,
+                                                            type, id, dqinfo);
+        RETURN(rc);
+}
+
+static int cmm_quota_getoquota(const struct lu_env *env,
+                               const struct md_device *m,
+                               __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getoquota(env,
+                                                             cmm_dev->cmm_child,
+                                                             type, id, dqblk);
+        RETURN(rc);
+}
+
+static int cmm_quota_invalidate(const struct lu_env *env, struct md_device *m,
+                                __u32 type)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_invalidate(env,
+                                                              cmm_dev->cmm_child,
+                                                              type);
+        RETURN(rc);
+}
+
+static int cmm_quota_finvalidate(const struct lu_env *env, struct md_device *m,
+                                 __u32 type)
+{
+        struct cmm_device *cmm_dev = md2cmm_dev(m);
+        int rc;
+        ENTRY;
+
+        /* disable quota for CMD case temporary. */
+        if (cmm_dev->cmm_tgt_count)
+                RETURN(-EOPNOTSUPP);
+
+        rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_finvalidate(env,
+                                                               cmm_dev->cmm_child,
+                                                               type);
+        RETURN(rc);
+}
+#endif
+
 static const struct md_device_operations cmm_md_ops = {
         .mdo_statfs          = cmm_statfs,
         .mdo_root_get        = cmm_root_get,
         .mdo_maxsize_get     = cmm_maxsize_get,
         .mdo_init_capa_ctxt  = cmm_init_capa_ctxt,
         .mdo_update_capa_key = cmm_update_capa_key,
+#ifdef HAVE_QUOTA_SUPPORT
+        .mdo_quota           = {
+                .mqo_notify      = cmm_quota_notify,
+                .mqo_setup       = cmm_quota_setup,
+                .mqo_cleanup     = cmm_quota_cleanup,
+                .mqo_recovery    = cmm_quota_recovery,
+                .mqo_check       = cmm_quota_check,
+                .mqo_on          = cmm_quota_on,
+                .mqo_off         = cmm_quota_off,
+                .mqo_setinfo     = cmm_quota_setinfo,
+                .mqo_getinfo     = cmm_quota_getinfo,
+                .mqo_setquota    = cmm_quota_setquota,
+                .mqo_getquota    = cmm_quota_getquota,
+                .mqo_getoinfo    = cmm_quota_getoinfo,
+                .mqo_getoquota   = cmm_quota_getoquota,
+                .mqo_invalidate  = cmm_quota_invalidate,
+                .mqo_finvalidate = cmm_quota_finvalidate
+        }
+#endif
 };
 
 extern struct lu_device_type mdc_device_type;
index 3309687..73c9dad 100644 (file)
@@ -1274,5 +1274,5 @@ static const struct md_dir_operations cmr_dir_ops = {
         .mdo_link        = cmr_link,
         .mdo_unlink      = cmr_unlink,
         .mdo_rename      = cmr_rename,
-        .mdo_rename_tgt  = cmr_rename_tgt,
+        .mdo_rename_tgt  = cmr_rename_tgt
 };
index 5507962..db2d0b1 100644 (file)
@@ -146,7 +146,7 @@ static int mdc_obd_add(const struct lu_env *env,
                 ocd->ocd_ibits_known = MDS_INODELOCK_UPDATE;
                 ocd->ocd_connect_flags = OBD_CONNECT_VERSION |
                                          OBD_CONNECT_ACL |
-                                         OBD_CONNECT_LCL_CLIENT |
+                                         OBD_CONNECT_RMT_CLIENT |
                                          OBD_CONNECT_MDS_CAPA |
                                          OBD_CONNECT_OSS_CAPA |
                                          OBD_CONNECT_IBITS |
index 3b3b7fb..532a60b 100644 (file)
@@ -44,13 +44,33 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
 .br
 .B lfs quotaoff [-ug] <filesystem>
 .br
-.B lfs setquota [-u|-g] <username|groupname> <block-softlimit>
-             \fB<block-hardlimit> <inode-softlimit> <inode-hardlimit>
+.B lfs quotainv [-ug] [-f] <filesystem>
+.br
+.B lfs setquota [-u|--user|-g|--group] <username|groupname>
+             \fB[--block-softlimit <block-softlimit>]
+             \fB[--block-hardlimit <block-hardlimit>]
+             \fB[--inode-softlimit <inode-softlimit>]
+             \fB[--inode-hardlimit <inode-hardlimit>]
              \fB<filesystem>\fR
 .br
-.B lfs setquota -t [-u|-g] <block-grace> <inode-grace> <filesystem>
+.B lfs setquota [-u|--user|-g|--group] <username|groupname>
+             \fB[-b <block-softlimit>] [-B <block-hardlimit>]
+             \fB[-i <inode-softlimit>] [-I <inode-hardlimit>]
+             \fB<filesystem>\fR
 .br
-.B lfs quota [-o obd_uuid] [-u|-g] <username|groupname> <filesystem>
+.B lfs setquota -t [-u|-g]
+             \fB[--block-grace <block-grace>]
+             \fB[--inode-grace <inode-grace>]
+             \fB<filesystem>\fR
+.br
+.B lfs setquota -t [-u|-g]
+             \fB[-b <block-grace>] [-i <inode-grace>]
+             \fB<filesystem>\fR
+.br
+
+.B lfs quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] <username|groupname> <filesystem>
+.br
+.B lfs quota <filesystem>
 .br
 .B lfs quota -t [-u|-g] <filesystem>
 .br
@@ -121,14 +141,17 @@ To turn filesystem quotas on. Options specify quota for users (-u) groups (-g) a
 .B quotaoff [-ugf] <filesystem>
 To turn filesystem quotas off.  Options specify quota for users (-u) groups (-g) and force (-f)
 .TP
-.B setquota  [-u|-g] <name> <block-softlimit> <block-hardlimit> <inode-softlimit> <inode-hardlimit> <filesystem>
-To set filesystem quotas for users or groups. Limits are specific as blocks and inodes, see EXAMPLES
+.B quotainv [-ug] [-f] <filesystem>
+Clear quota files (administrative quota files if used without -f, operational quota files otherwise), all of their quota entries, for (-u) users or (-g) groups; after quotainv one must use quotacheck before using quotas. DO NOT USE THIS COMMAND UNLESS YOU REALLY KNOW WHAT IT DOES. IT IS MAINLY FOR INTERNAL PURPOSES.
+.TP
+.B setquota  [-u|-g] <name> [--block-softlimit <block-softlimit>] [--block-hardlimit <block-hardlimit>] [--inode-softlimit <inode-softlimit>] [--inode-hardlimit <inode-hardlimit>] <filesystem>
+To set filesystem quotas for users or groups. Limits can be specified with -b, -k, -m, -g, -t, -p suffixes which specify units of 1, 2^10, 2^20, 2^30, 2^40 and 2^50 accordingly. Block limits unit is kilobyte (1024) by default and block limits are always kilobyte-grained (even if specified in bytes), see EXAMPLES
 .TP
-.B setquota -t [-u|-g] <block-grace> <inode-grace> <filesystem>
+.B setquota -t [-u|-g] [--block-grace <block-grace>] [--inode-grace <inode-grace>] <filesystem>
 To set filesystem quota grace times for users or groups. Grace time is specified in "XXwXXdXXhXXmXXs" format or as an integer seconds value, see EXAMPLES
 .TP
-.B quota [-o obd_uuid] [-u|-g] <username|groupname> <filesystem>
-To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name must be specified.
+.B quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] <username|groupname> <filesystem>
+To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name can be specified. If both user and group are omitted quotas for current uid/gid are shown. -v provides more verbose (with per-obd statistics) output.
 .TP
 .B quota -t [-u|-g] <filesystem>
 To display block and inode grace times for user (-u) or group (-g) quotas
@@ -141,7 +164,7 @@ Quit the interactive lfs session
 .SH EXAMPLES
 .TP
 .B $ lfs setstripe -s 128k -c 2 /mnt/lustre/file1
-This creats a file striped on two OSTs with 128kB on each stripe.
+This creates a file striped on two OSTs with 128kB on each stripe.
 .TP
 .B $ lfs setstripe -d /mnt/lustre/dir
 This deletes a default stripe pattern on dir. New files will use the default striping pattern created therein.
@@ -182,10 +205,10 @@ Turn quotas of user and group on
 .B $ lfs quotaoff -ug /mnt/lustre
 Turn quotas of user and group off
 .TP
-.B $ lfs setquota -u bob 0 1000000 0 10000 /mnt/lustre
-Set quotas of user `bob': 1GB block quota and 10,000 file quota
+.B $ lfs setquota -u bob --block-softlimit 2000000 --block-hardlimit 1000000 /mnt/lustre
+Set quotas of user `bob': 1GB block quota hardlimit and 2 GB block quota softlimit
 .TP
-.B $ lfs setquota -t -u 1000 1w4d /mnt/lustre
+.B $ lfs setquota -t -u --block-grace 1000 --inode-grace 1w4d /mnt/lustre
 Set grace times for user quotas: 1000 seconds for block quotas, 1 week and 4 days for inode quotas
 .TP
 .B $ lfs quota -u bob /mnt/lustre
index 42fda49..7a827da 100644 (file)
@@ -102,7 +102,7 @@ int seq_store_write(struct lu_server_seq *seq,
 
                 rc = dt_obj->do_body_ops->dbo_write(env, dt_obj,
                                                     seq_store_buf(info),
-                                                    &pos, th, BYPASS_CAPA);
+                                                    &pos, th, BYPASS_CAPA, 1);
                 if (rc == sizeof(info->sti_space)) {
                         CDEBUG(D_INFO, "%s: Space - "DRANGE"\n",
                                seq->lss_name, PRANGE(&seq->lss_space));
index a1e88d4..aba0bb0 100644 (file)
@@ -131,7 +131,7 @@ int fld_index_create(struct lu_server_fld *fld,
                 rc = dt_obj->do_index_ops->dio_insert(env, dt_obj,
                                                       fld_rec(env, mds),
                                                       fld_key(env, seq),
-                                                      th, BYPASS_CAPA);
+                                                      th, BYPASS_CAPA, 1);
                 dt_dev->dd_ops->dt_trans_stop(env, th);
         } else
                 rc = PTR_ERR(th);
index e2b2b11..6210c7f 100644 (file)
@@ -170,7 +170,7 @@ __lustre_hash_key_validate(lustre_hash_t *lh, void *key,
                            struct hlist_node *hnode)
 {
         if (unlikely(lh->lh_flags & LH_DEBUG))
-                LASSERT(lh_compare(lh, key, hnode));
+                LASSERT(lh_compare(lh, key, hnode) > 0);
 }
 
 /* Validate hnode is in the correct bucket */
@@ -193,7 +193,7 @@ __lustre_hash_bucket_lookup(lustre_hash_t *lh,
         struct hlist_node *hnode;
 
         hlist_for_each(hnode, &lhb->lhb_head)
-                if (lh_compare(lh, key, hnode))
+                if (lh_compare(lh, key, hnode) > 0)
                         return hnode;
 
         return NULL;
index fbbb9ad..0cd80c4 100644 (file)
@@ -66,6 +66,7 @@ struct txn_param;
 struct dt_device;
 struct dt_object;
 struct dt_index_features;
+struct dt_quota_ctxt;
 
 struct dt_device_param {
         unsigned           ddp_max_name_len;
@@ -82,11 +83,12 @@ enum dt_txn_op {
         DTO_IDNEX_UPDATE,
         DTO_OBJECT_CREATE,
         DTO_OBJECT_DELETE,
-        DTO_ATTR_SET,
+        DTO_ATTR_SET_BASE,
         DTO_XATTR_SET,
         DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */
         DTO_WRITE_BASE,
         DTO_WRITE_BLOCK,
+        DTO_ATTR_SET_CHOWN,
 
         DTO_NR
 };
@@ -144,6 +146,12 @@ struct dt_device_operations {
                                    struct dt_device *dev,
                                    int mode, unsigned long timeout,
                                    __u32 alg, struct lustre_capa_key *keys);
+        /**
+         * Initialize quota context.
+         */
+        void (*dt_init_quota_ctxt)(const struct lu_env *env,
+                                   struct dt_device *dev,
+                                   struct dt_quota_ctxt *ctxt, void *data);
 
         /**
          *  get transaction credits for given \a op.
@@ -337,7 +345,8 @@ struct dt_body_operations {
          */
         ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt,
                              const struct lu_buf *buf, loff_t *pos,
-                             struct thandle *handle, struct lustre_capa *capa);
+                             struct thandle *handle, struct lustre_capa *capa,
+                             int ignore_quota);
 };
 
 /**
@@ -370,7 +379,8 @@ struct dt_index_operations {
          */
         int (*dio_insert)(const struct lu_env *env, struct dt_object *dt,
                           const struct dt_rec *rec, const struct dt_key *key,
-                          struct thandle *handle, struct lustre_capa *capa);
+                          struct thandle *handle, struct lustre_capa *capa,
+                          int ignore_quota);
         /**
          * precondition: dt_object_exists(dt);
          */
index 713341e..cfdc247 100644 (file)
@@ -43,7 +43,7 @@
 #define _LUSTRE_LINUX_ACL_H
 
 #ifndef        _LUSTRE_ACL_H
-#error Shoud not include direectly. use #include <lustre/lustre_acl.h> instead
+#error Shoud not include direectly. use #include <lustre_acl.h> instead
 #endif
 
 #ifdef __KERNEL__
index 7f3f0da..13c0385 100644 (file)
@@ -501,9 +501,32 @@ struct blkcipher_desc {
 #define ll_crypto_blkcipher_encrypt_iv(desc, dst, src, bytes) \
         crypto_cipher_encrypt_iv((desc)->tfm, dst, src, bytes, (desc)->info)
 
-extern struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(
-                            const char * algname, u32 type, u32 mask);
 static inline
+struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname,
+                                                   u32 type, u32 mask)
+{
+        char        buf[CRYPTO_MAX_ALG_NAME + 1];
+        const char *pan = algname;
+        u32         flag = 0; 
+
+        if (strncmp("cbc(", algname, 4) == 0)
+                flag |= CRYPTO_TFM_MODE_CBC;
+        else if (strncmp("ecb(", algname, 4) == 0)
+                flag |= CRYPTO_TFM_MODE_ECB;
+        if (flag) {
+                char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')');
+                if (vp) {
+                        memcpy(buf, algname + 4, vp - algname - 4);
+                        buf[vp - algname - 4] = '\0';
+                        pan = buf;
+                } else {
+                        flag = 0;
+                }
+        }
+        return crypto_alloc_tfm(pan, flag);
+}
+
+static inline 
 struct ll_crypto_hash *ll_crypto_alloc_hash(const char *alg, u32 type, u32 mask)
 {
         char        buf[CRYPTO_MAX_ALG_NAME + 1];
index 2996e36..b544341 100644 (file)
@@ -113,12 +113,12 @@ struct fsfilt_operations {
         int     (* fs_read_record)(struct file *, void *, int size, loff_t *);
         int     (* fs_setup)(struct super_block *sb);
         int     (* fs_get_op_len)(int, struct fsfilt_objinfo *, int);
-        int     (* fs_quotactl)(struct super_block *sb,
-                                struct obd_quotactl *oqctl);
         int     (* fs_quotacheck)(struct super_block *sb,
                                   struct obd_quotactl *oqctl);
         __u64   (* fs_get_version) (struct inode *inode);
         __u64   (* fs_set_version) (struct inode *inode, __u64 new_version);
+        int     (* fs_quotactl)(struct super_block *sb,
+                                struct obd_quotactl *oqctl);
         int     (* fs_quotainfo)(struct lustre_quota_info *lqi, int type,
                                  int cmd);
         int     (* fs_qids)(struct file *file, struct inode *inode, int type,
@@ -167,18 +167,21 @@ static inline lvfs_sbdev_type fsfilt_journal_sbdev(struct obd_device *obd,
         return (lvfs_sbdev_type)0;
 }
 
-#define FSFILT_OP_UNLINK         1
-#define FSFILT_OP_RMDIR          2
-#define FSFILT_OP_RENAME         3
-#define FSFILT_OP_CREATE         4
-#define FSFILT_OP_MKDIR          5
-#define FSFILT_OP_SYMLINK        6
-#define FSFILT_OP_MKNOD          7
-#define FSFILT_OP_SETATTR        8
-#define FSFILT_OP_LINK           9
-#define FSFILT_OP_CANCEL_UNLINK 10
-#define FSFILT_OP_JOIN          11
-#define FSFILT_OP_NOOP          15
+#define FSFILT_OP_UNLINK                1
+#define FSFILT_OP_RMDIR                 2
+#define FSFILT_OP_RENAME                3
+#define FSFILT_OP_CREATE                4
+#define FSFILT_OP_MKDIR                 5
+#define FSFILT_OP_SYMLINK               6
+#define FSFILT_OP_MKNOD                 7
+#define FSFILT_OP_SETATTR               8
+#define FSFILT_OP_LINK                  9
+#define FSFILT_OP_CANCEL_UNLINK         10
+#define FSFILT_OP_JOIN                  11
+#define FSFILT_OP_NOOP                  15
+#define FSFILT_OP_UNLINK_PARTIAL_CHILD  21
+#define FSFILT_OP_UNLINK_PARTIAL_PARENT 22
+#define FSFILT_OP_CREATE_PARTIAL_CHILD  23
 
 #define __fsfilt_check_slow(obd, start, msg)                            \
 do {                                                                    \
index b44679e..da302bc 100644 (file)
@@ -48,6 +48,9 @@
 # endif
 #else
 # include <linux/version.h>
+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21)
+#  define NEED_QUOTA_DEFS
+# endif
 # ifdef HAVE_QUOTA_SUPPORT
 #  include <linux/quota.h>
 # endif
index 8ef613e..7763498 100644 (file)
@@ -222,7 +222,7 @@ static inline int opcode_offset(__u32 opc) {
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (OST_LAST_OPC - OST_FIRST_OPC));
-} else if (opc < FLD_LAST_OPC) {
+        } else if (opc < FLD_LAST_OPC) {
                 /* FLD opcode */
                 return (opc - FLD_FIRST_OPC +
                         (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
@@ -252,6 +252,18 @@ static inline int opcode_offset(__u32 opc) {
                         (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
                         (MDS_LAST_OPC - MDS_FIRST_OPC) +
                         (OST_LAST_OPC - OST_FIRST_OPC));
+        } else if (opc < QUOTA_LAST_OPC) {
+                /* LQUOTA Opcode */
+                return (opc - QUOTA_FIRST_OPC +
+                        (SEC_LAST_OPC - SEC_FIRST_OPC) +
+                        (SEQ_LAST_OPC - SEQ_FIRST_OPC) +
+                        (FLD_LAST_OPC - FLD_FIRST_OPC) +
+                        (LLOG_LAST_OPC - LLOG_FIRST_OPC) +
+                        (OBD_LAST_OPC - OBD_FIRST_OPC) +
+                        (MGS_LAST_OPC - MGS_FIRST_OPC) +
+                        (LDLM_LAST_OPC - LDLM_FIRST_OPC) +
+                        (MDS_LAST_OPC - MDS_FIRST_OPC) +
+                        (OST_LAST_OPC - OST_FIRST_OPC));
         } else {
                 /* Unknown Opcode */
                 return -1;
@@ -266,7 +278,8 @@ static inline int opcode_offset(__u32 opc) {
                             (SEQ_LAST_OPC - SEQ_FIRST_OPC)     + \
                             (MGS_LAST_OPC - MGS_FIRST_OPC)     + \
                             (LLOG_LAST_OPC - LLOG_FIRST_OPC)   + \
-                            (SEC_LAST_OPC - SEC_FIRST_OPC))
+                            (SEC_LAST_OPC - SEC_FIRST_OPC)     + \
+                            (QUOTA_LAST_OPC - QUOTA_FIRST_OPC))
 
 #define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR)  + \
                            (EXTRA_LAST_OPC - EXTRA_FIRST_OPC))
@@ -288,12 +301,13 @@ enum {
         LDLM_EXTENT_ENQUEUE,
         LDLM_FLOCK_ENQUEUE,
         LDLM_IBITS_ENQUEUE,
+        MDS_REINT_SETATTR,
         MDS_REINT_CREATE,
         MDS_REINT_LINK,
-        MDS_REINT_OPEN,
-        MDS_REINT_SETATTR,
-        MDS_REINT_RENAME,
         MDS_REINT_UNLINK,
+        MDS_REINT_RENAME,
+        MDS_REINT_OPEN,
+        MDS_REINT_SETXATTR,
         BRW_READ_BYTES,
         BRW_WRITE_BYTES,
         EXTRA_LAST_OPC
@@ -617,6 +631,56 @@ int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off,
 /* lprocfs_status.c: write recovery max time bz13079 */
 int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer,
                                     unsigned long count, void *data);
+
+/* all quota proc functions */
+extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
+                                  int *eof, void *data);
+extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
+                                  unsigned long count, void *data);
+extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
+                                  int *eof, void *data);
+extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
+                                  unsigned long count, void *data);
+extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
+                                  int *eof, void *data);
+extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
+                                  unsigned long count, void *data);
+extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
+                                  int *eof, void *data);
+extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
+                                  unsigned long count, void *data);
+extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
+                                 int *eof, void *data);
+extern int lprocfs_quota_wr_type(struct file *file, const char *buffer,
+                                 unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
+                                           int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
+                                           unsigned long count, void *data);
+extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data);
+extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
+                                     unsigned long count, void *data);
+extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
+                                      int count, int *eof, void *data);
+extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
+                                      unsigned long count, void *data);
+extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
+                                            int count, int *eof, void *data);
+extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
+                                            unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
+                                        int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
+                                        unsigned long count, void *data);
+extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
+                                        int count, int *eof, void *data);
+extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
+                                        unsigned long count, void *data);
+extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
+                                      int count, int *eof, void *data);
+extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
+                                      unsigned long count, void *data);
 #else
 /* LPROCFS is not defined */
 static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
@@ -651,7 +715,7 @@ static inline void lprocfs_init_ops_stats(int num_private_stats,
 static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats)
 { return; }
 static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-                                             unsigned int num_private_stats)
+                                          unsigned int num_private_stats)
 { return 0; }
 static inline int lprocfs_alloc_md_stats(struct obd_device *obddev,
                                          unsigned int num_private_stats)
@@ -663,7 +727,7 @@ struct obd_export;
 static inline int lprocfs_add_clear_entry(struct obd_export *exp)
 { return 0; }
 static inline int lprocfs_exp_setup(struct obd_export *exp,
-                                   lnet_nid_t *peer_nid, int *newnid)
+                                    lnet_nid_t *peer_nid, int *newnid)
 { return 0; }
 static inline int lprocfs_exp_cleanup(struct obd_export *exp)
 { return 0; }
index 8717532..3f4fd1e 100644 (file)
@@ -84,7 +84,7 @@ extern int llapi_poollist(char *name);
 extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum);
 #define HAVE_LLAPI_FILE_LOOKUP
 extern int llapi_file_lookup(int dirfd, const char *name);
+
 struct find_param {
         unsigned int maxdepth;
         time_t  atime;
@@ -151,8 +151,9 @@ extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid);
 extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid);
 extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
 extern int llapi_is_lustre_mnttype(const char *type);
+extern int llapi_get_obd_count(char *mnt, int *count, int is_mdt);
 extern int parse_size(char *optarg, unsigned long long *size,
-                      unsigned long long *size_units);
+                      unsigned long long *size_units, int bytes_spec);
 extern int llapi_path2fid(const char *path, unsigned long long *seq,
                           unsigned long *oid, unsigned long *ver);
 
index fbb1af2..81fd3b7 100644 (file)
@@ -653,8 +653,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_JOIN       0x00002000ULL /* files can be concatenated */
 #define OBD_CONNECT_ATTRFID    0x00004000ULL /* Server supports GetAttr By Fid */
 #define OBD_CONNECT_NODEVOH    0x00008000ULL /* No open handle for special nodes */
-#define OBD_CONNECT_LCL_CLIENT 0x00010000ULL /* local 1.8 client */
-#define OBD_CONNECT_RMT_CLIENT 0x00020000ULL /* Remote 1.8 client */
+#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /* Remote client */
+#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /* Remote client by force */
 #define OBD_CONNECT_BRW_SIZE   0x00040000ULL /* Max bytes per rpc */
 #define OBD_CONNECT_QUOTA64    0x00080000ULL /* 64bit qunit_data.qd_count b=10707*/
 #define OBD_CONNECT_MDS_CAPA   0x00100000ULL /* MDS capability */
@@ -683,8 +683,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
                                 OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
                                 OBD_CONNECT_NODEVOH |/* OBD_CONNECT_ATTRFID |*/\
-                                OBD_CONNECT_LCL_CLIENT | \
                                 OBD_CONNECT_RMT_CLIENT | \
+                                OBD_CONNECT_RMT_CLIENT_FORCE | \
                                 OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
                                 OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \
                                 OBD_CONNECT_FID | \
@@ -696,7 +696,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \
                                 OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET | \
                                 OBD_CONNECT_CKSUM | LRU_RESIZE_CONNECT_FLAG | \
-                                OBD_CONNECT_AT)
+                                OBD_CONNECT_AT | OBD_CONNECT_CHANGE_QS | \
+                                OBD_CONNECT_RMT_CLIENT | \
+                                OBD_CONNECT_RMT_CLIENT_FORCE)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT)
 
@@ -766,6 +768,7 @@ typedef enum {
         OST_SET_INFO   = 17,
         OST_QUOTACHECK = 18,
         OST_QUOTACTL   = 19,
+        OST_QUOTA_ADJUST_QUNIT = 20,
         OST_LAST_OPC
 } ost_cmd_t;
 #define OST_FIRST_OPC  OST_REPLY
@@ -908,6 +911,8 @@ struct lov_mds_md_v3 {            /* LOV EA mds/wire data (little-endian) */
 #define OBD_MD_FLCKSPLIT   (0x0000080000000000ULL) /* Check split on server */
 #define OBD_MD_FLCROSSREF  (0x0000100000000000ULL) /* Cross-ref case */
 
+#define OBD_FL_TRUNC       (0x0000200000000000ULL) /* for filter_truncate */
+
 #define OBD_MD_FLRMTLSETFACL    (0x0001000000000000ULL) /* lfs lsetfacl case */
 #define OBD_MD_FLRMTLGETFACL    (0x0002000000000000ULL) /* lfs lgetfacl case */
 #define OBD_MD_FLRMTRSETFACL    (0x0004000000000000ULL) /* lfs rsetfacl case */
@@ -1244,13 +1249,26 @@ extern void lustre_swab_mdt_epoch (struct mdt_epoch *b);
 #define Q_INITQUOTA     0x800101        /* init slave limits */
 #define Q_GETOINFO      0x800102        /* get obd quota info */
 #define Q_GETOQUOTA     0x800103        /* get obd quotas */
+#define Q_FINVALIDATE   0x800104        /* invalidate operational quotas */
+
+#define Q_TYPEMATCH(id, type) \
+        ((id) == (type) || (id) == UGQUOTA)
 
-#define Q_TYPESET(oqc, type) \
-        ((oqc)->qc_type == type || (oqc)->qc_type == UGQUOTA)
+#define Q_TYPESET(oqc, type) Q_TYPEMATCH((oqc)->qc_type, type)
 
 #define Q_GETOCMD(oqc) \
         ((oqc)->qc_cmd == Q_GETOINFO || (oqc)->qc_cmd == Q_GETOQUOTA)
 
+#define QCTL_COPY(out, in)              \
+do {                                    \
+        Q_COPY(out, in, qc_cmd);        \
+        Q_COPY(out, in, qc_type);       \
+        Q_COPY(out, in, qc_id);         \
+        Q_COPY(out, in, qc_stat);       \
+        Q_COPY(out, in, qc_dqinfo);     \
+        Q_COPY(out, in, qc_dqblk);      \
+} while (0)
+
 struct obd_quotactl {
         __u32                   qc_cmd;
         __u32                   qc_type;
@@ -1262,6 +1280,34 @@ struct obd_quotactl {
 
 extern void lustre_swab_obd_quotactl(struct obd_quotactl *q);
 
+struct quota_adjust_qunit {
+        __u32 qaq_flags;
+        __u32 qaq_id;
+        __u64 qaq_bunit_sz;
+        __u64 qaq_iunit_sz;
+        __u64 padding1;
+};
+extern void lustre_swab_quota_adjust_qunit(struct quota_adjust_qunit *q);
+
+/* flags in qunit_data and quota_adjust_qunit will use macroes below */
+#define LQUOTA_FLAGS_GRP       1UL   /* 0 is user, 1 is group */
+#define LQUOTA_FLAGS_BLK       2UL   /* 0 is inode, 1 is block */
+#define LQUOTA_FLAGS_ADJBLK    4UL   /* adjust the block qunit size */
+#define LQUOTA_FLAGS_ADJINO    8UL   /* adjust the inode qunit size */
+#define LQUOTA_FLAGS_CHG_QS   16UL   /* indicate whether it has capability of
+                                      * OBD_CONNECT_CHANGE_QS */
+
+/* the status of lqsk_flags in struct lustre_qunit_size_key */
+#define LQUOTA_QUNIT_FLAGS (LQUOTA_FLAGS_GRP | LQUOTA_FLAGS_BLK)
+
+#define QAQ_IS_GRP(qaq)    ((qaq)->qaq_flags & LQUOTA_FLAGS_GRP)
+#define QAQ_IS_ADJBLK(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJBLK)
+#define QAQ_IS_ADJINO(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJINO)
+
+#define QAQ_SET_GRP(qaq)    ((qaq)->qaq_flags |= LQUOTA_FLAGS_GRP)
+#define QAQ_SET_ADJBLK(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJBLK)
+#define QAQ_SET_ADJINO(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJINO)
+
 /* inode access permission for remote user, the inode info are omitted,
  * for client knows them. */
 struct mds_remote_perm {
@@ -1277,7 +1323,8 @@ enum {
         CFS_SETUID_PERM = 0x01,
         CFS_SETGID_PERM = 0x02,
         CFS_SETGRP_PERM = 0x04,
-        CFS_RMTACL_PERM = 0x08
+        CFS_RMTACL_PERM = 0x08,
+        CFS_RMTOWN_PERM = 0x10
 };
 
 extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p);
@@ -1421,7 +1468,8 @@ enum {
         MDS_CROSS_REF    = 1 << 1,
         MDS_VTX_BYPASS   = 1 << 2,
         MDS_PERM_BYPASS  = 1 << 3,
-        MDS_SOM          = 1 << 4
+        MDS_SOM          = 1 << 4,
+        MDS_QUOTA_IGNORE = 1 << 5
 };
 
 struct mds_rec_join {
@@ -2261,7 +2309,6 @@ struct obdo {
 extern void lustre_swab_obdo (struct obdo *o);
 
 /* request structure for OST's */
-
 struct ost_body {
         struct  obdo oa;
 };
@@ -2293,37 +2340,71 @@ extern void lustre_swab_llog_rec(struct llog_rec_hdr  *rec,
 struct lustre_cfg;
 extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg);
 
-/* quota. fixed by tianzy for bug10707 */
-#define QUOTA_IS_GRP   0X1UL  /* 0 is user, 1 is group. Used by qd_flags*/
-#define QUOTA_IS_BLOCK 0x2UL  /* 0 is inode, 1 is block. Used by qd_flags*/
-
+/* this will be used when OBD_CONNECT_CHANGE_QS is set */
 struct qunit_data {
-        __u32 qd_id; /* ID appiles to (uid, gid) */
-        __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit;
-                         * Block quota or file quota occupy one bit */
-        __u64 qd_count; /* acquire/release count (bytes for block quota) */
+        /**
+         * ID appiles to (uid, gid)
+         */
+        __u32 qd_id;
+        /**
+         * LQUOTA_FLAGS_* affect the responding bits
+         */
+        __u32 qd_flags;
+        /**
+         * acquire/release count (bytes for block quota)
+         */
+        __u64 qd_count;
+        /**
+         * when a master returns the reply to a slave, it will
+         * contain the current corresponding qunit size
+         */
+        __u64 qd_qunit;
+        __u64 padding;
 };
 
-struct qunit_data_old {
-        __u32 qd_id;    /* ID appiles to (uid, gid) */
-        __u32 qd_type;  /* Quota type (USRQUOTA, GRPQUOTA) */
-        __u32 qd_count; /* acquire/release count (bytes for block quota) */
-        __u32 qd_isblk; /* Block quota or file quota */
-};
+#define QDATA_IS_GRP(qdata)    ((qdata)->qd_flags & LQUOTA_FLAGS_GRP)
+#define QDATA_IS_BLK(qdata)    ((qdata)->qd_flags & LQUOTA_FLAGS_BLK)
+#define QDATA_IS_ADJBLK(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJBLK)
+#define QDATA_IS_ADJINO(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJINO)
+#define QDATA_IS_CHANGE_QS(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_CHG_QS)
+
+#define QDATA_SET_GRP(qdata)    ((qdata)->qd_flags |= LQUOTA_FLAGS_GRP)
+#define QDATA_SET_BLK(qdata)    ((qdata)->qd_flags |= LQUOTA_FLAGS_BLK)
+#define QDATA_SET_ADJBLK(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJBLK)
+#define QDATA_SET_ADJINO(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJINO)
+#define QDATA_SET_CHANGE_QS(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_CHG_QS)
+
+#define QDATA_CLR_GRP(qdata)        ((qdata)->qd_flags &= ~LQUOTA_FLAGS_GRP)
+#define QDATA_CLR_CHANGE_QS(qdata)  ((qdata)->qd_flags &= ~LQUOTA_FLAGS_CHG_QS)
 
 extern void lustre_swab_qdata(struct qunit_data *d);
-extern void lustre_swab_qdata_old(struct qunit_data_old *d);
-extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d);
-extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d);
+extern int quota_get_qdata(void*req, struct qunit_data *qdata,
+                           int is_req, int is_exp);
+extern int quota_copy_qdata(void *request, struct qunit_data *qdata,
+                            int is_req, int is_exp);
 
 typedef enum {
-        QUOTA_DQACQ     = 601,
-        QUOTA_DQREL     = 602,
+        QUOTA_DQACQ     = 901,
+        QUOTA_DQREL     = 902,
+        QUOTA_LAST_OPC
 } quota_cmd_t;
+#define QUOTA_FIRST_OPC QUOTA_DQACQ
 
 #define JOIN_FILE_ALIGN 4096
 
-/** security opcodes */
+#define QUOTA_REQUEST   1
+#define QUOTA_REPLY     0
+#define QUOTA_EXPORT    1
+#define QUOTA_IMPORT    0
+
+/* quota check function */
+#define QUOTA_RET_OK           0 /**< return successfully */
+#define QUOTA_RET_NOQUOTA      1 /**< not support quota */
+#define QUOTA_RET_NOLIMIT      2 /**< quota limit isn't set */
+#define QUOTA_RET_ACQUOTA      4 /**< need to acquire extra quota */
+#define QUOTA_RET_INC_PENDING  8 /**< pending value is increased */
+
+/* security opcodes */
 typedef enum {
         SEC_CTX_INIT            = 801,
         SEC_CTX_INIT_CONT       = 802,
@@ -2341,15 +2422,15 @@ typedef enum {
 /* NB take care when changing the sequence of elements this struct,
  * because the offset info is used in find_capa() */
 struct lustre_capa {
-        struct lu_fid   lc_fid;     /* fid */
-        __u64           lc_opc;     /* operations allowed */
-        __u32           lc_uid;     /* uid, it is obsolete, but maybe used in
-                                     * future, reserve it for 64-bits aligned.*/
-        __u32           lc_flags;   /* HMAC algorithm & flags */
-        __u32           lc_keyid;   /* key used for the capability */
-        __u32           lc_timeout; /* capa timeout value (sec) */
-        __u64           lc_expiry;  /* expiry time (sec) */
-        __u8            lc_hmac[CAPA_HMAC_MAX_LEN];   /* HMAC */
+        struct lu_fid   lc_fid;         /** fid */
+        __u64           lc_opc;         /** operations allowed */
+        __u64           lc_uid;         /** file owner */
+        __u64           lc_gid;         /** file group */
+        __u32           lc_flags;       /** HMAC algorithm & flags */
+        __u32           lc_keyid;       /** key# used for the capability */
+        __u32           lc_timeout;     /** capa timeout value (sec) */
+        __u32           lc_expiry;      /** expiry time (sec) */
+        __u8            lc_hmac[CAPA_HMAC_MAX_LEN];   /** HMAC */
 } __attribute__((packed));
 
 extern void lustre_swab_lustre_capa(struct lustre_capa *c);
@@ -2364,9 +2445,9 @@ enum {
         CAPA_OPC_OSS_WRITE    = 1<<5,  /**< write oss object data */
         CAPA_OPC_OSS_READ     = 1<<6,  /**< read oss object data */
         CAPA_OPC_OSS_TRUNC    = 1<<7,  /**< truncate oss object */
-        CAPA_OPC_META_WRITE   = 1<<8,  /**< write object meta data */
-        CAPA_OPC_META_READ    = 1<<9,  /**< read object meta data */
-
+        CAPA_OPC_OSS_DESTROY  = 1<<8,  /**< destroy oss object */
+        CAPA_OPC_META_WRITE   = 1<<9,  /**< write object meta data */
+        CAPA_OPC_META_READ    = 1<<10, /**< read object meta data */
 };
 
 #define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
@@ -2374,7 +2455,8 @@ enum {
         (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
          CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
 #define CAPA_OPC_OSS_ONLY                                                   \
-        (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC)
+        (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC |      \
+         CAPA_OPC_OSS_DESTROY)
 #define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
 #define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
 
@@ -2411,11 +2493,6 @@ struct lustre_capa_key {
 
 extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
 
-/* quota check function */
-#define QUOTA_RET_OK           0 /**< return successfully */
-#define QUOTA_RET_NOQUOTA      1 /**< not support quota */
-#define QUOTA_RET_NOLIMIT      2 /**< quota limit isn't set */
-#define QUOTA_RET_ACQUOTA      3 /**< need to acquire extra quota */
 #endif
 
 /** @} lustreidl */
index bd76396..12a0f0e 100644 (file)
@@ -98,6 +98,8 @@ struct obd_statfs;
 #define LL_IOC_FLUSHCTX                 _IOW ('f', 166, long)
 #define LL_IOC_RMTACL                   _IOW ('f', 167, long)
 
+#define LL_IOC_GETOBDCOUNT              _IOR ('f', 168, long)
+
 #define LL_IOC_LLOOP_ATTACH             _IOWR('f', 169, long)
 #define LL_IOC_LLOOP_DETACH             _IOWR('f', 170, long)
 #define LL_IOC_LLOOP_INFO               _IOWR('f', 171, long)
@@ -228,17 +230,19 @@ static inline char *obd_uuid2str(struct obd_uuid *uuid)
         return (char *)(uuid->uuid);
 }
 
-#define LUSTRE_Q_QUOTAON  0x800002     /* turn quotas on */
-#define LUSTRE_Q_QUOTAOFF 0x800003     /* turn quotas off */
-#define LUSTRE_Q_GETINFO  0x800005     /* get information about quota files */
-#define LUSTRE_Q_SETINFO  0x800006     /* set information about quota files */
-#define LUSTRE_Q_GETQUOTA 0x800007     /* get user quota structure */
-#define LUSTRE_Q_SETQUOTA 0x800008     /* set user quota structure */
+/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
+#define LUSTRE_Q_QUOTAON    0x800002     /* turn quotas on */
+#define LUSTRE_Q_QUOTAOFF   0x800003     /* turn quotas off */
+#define LUSTRE_Q_GETINFO    0x800005     /* get information about quota files */
+#define LUSTRE_Q_SETINFO    0x800006     /* set information about quota files */
+#define LUSTRE_Q_GETQUOTA   0x800007     /* get user quota structure */
+#define LUSTRE_Q_SETQUOTA   0x800008     /* set user quota structure */
+/* lustre-specific control commands */
+#define LUSTRE_Q_INVALIDATE  0x80000b     /* invalidate quota data */
+#define LUSTRE_Q_FINVALIDATE 0x80000c     /* invalidate filter quota data */
 
 #define UGQUOTA 2       /* set both USRQUOTA and GRPQUOTA */
 
-#define QFMT_LDISKFS 2  /* QFMT_VFS_V0(2), quota format for ldiskfs */
-
 struct if_quotacheck {
         char                    obd_type[16];
         struct obd_uuid         obd_uuid;
@@ -306,6 +310,10 @@ enum {
 
 #endif /* !__KERNEL__ */
 
+typedef enum lustre_quota_version {
+        LUSTRE_QUOTA_V2 = 1
+} lustre_quota_version_t;
+
 /* XXX: same as if_dqinfo struct in kernel */
 struct obd_dqinfo {
         __u64 dqi_bgrace;
@@ -328,11 +336,20 @@ struct obd_dqblk {
         __u32 padding;
 };
 
+enum {
+        QC_GENERAL      = 0,
+        QC_MDTIDX       = 1,
+        QC_OSTIDX       = 2,
+        QC_UUID         = 3
+};
+
 struct if_quotactl {
         __u32                   qc_cmd;
         __u32                   qc_type;
         __u32                   qc_id;
         __u32                   qc_stat;
+        __u32                   qc_valid;
+        __u32                   qc_idx;
         struct obd_dqinfo       qc_dqinfo;
         struct obd_dqblk        qc_dqblk;
         char                    obd_type[16];
index 1fb6a7d..7f65a44 100644 (file)
@@ -95,29 +95,24 @@ enum {
         CAPA_SITE_MAX
 };
 
-static inline __u64 capa_opc(struct lustre_capa *capa)
-{
-        return capa->lc_opc;
-}
-
-static inline __u32 capa_uid(struct lustre_capa *capa)
+static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
 {
-        return capa->lc_uid;
+        return &capa->lc_fid;
 }
 
-static inline struct lu_fid *capa_fid(struct lustre_capa *capa)
+static inline __u64 capa_opc(struct lustre_capa *capa)
 {
-        return &capa->lc_fid;
+        return capa->lc_opc;
 }
 
-static inline __u32 capa_keyid(struct lustre_capa *capa)
+static inline __u64 capa_uid(struct lustre_capa *capa)
 {
-        return capa->lc_keyid;
+        return capa->lc_uid;
 }
 
-static inline __u64 capa_expiry(struct lustre_capa *capa)
+static inline __u64 capa_gid(struct lustre_capa *capa)
 {
-        return capa->lc_expiry;
+        return capa->lc_gid;
 }
 
 static inline __u32 capa_flags(struct lustre_capa *capa)
@@ -127,9 +122,12 @@ static inline __u32 capa_flags(struct lustre_capa *capa)
 
 static inline __u32 capa_alg(struct lustre_capa *capa)
 {
-        __u32 alg = capa->lc_flags;
+        return (capa->lc_flags >> 24);
+}
 
-        return alg >> 24;
+static inline __u32 capa_keyid(struct lustre_capa *capa)
+{
+        return capa->lc_keyid;
 }
 
 static inline __u64 capa_key_mdsid(struct lustre_capa_key *key)
@@ -142,12 +140,23 @@ static inline __u32 capa_key_keyid(struct lustre_capa_key *key)
         return key->lk_keyid;
 }
 
+static inline __u32 capa_timeout(struct lustre_capa *capa)
+{
+        return capa->lc_timeout;
+}
+
+static inline __u32 capa_expiry(struct lustre_capa *capa)
+{
+        return capa->lc_expiry;
+}
+
 #define DEBUG_CAPA(level, c, fmt, args...)                                     \
 do {                                                                           \
-CDEBUG(level, fmt " capability@%p uid %u opc "LPX64" fid "DFID" keyid %u "     \
-       "expiry "LPU64" flags %u alg %d\n",                                     \
-       ##args, c, capa_uid(c), capa_opc(c), PFID(capa_fid(c)), capa_keyid(c),  \
-       capa_expiry(c), capa_flags(c), capa_alg(c));                            \
+CDEBUG(level, fmt " capability@%p fid "DFID" opc "LPX64" uid "LPU64" gid "     \
+       LPU64" flags %u alg %d keyid %u timeout %u expiry %u\n",                \
+       ##args, c, PFID(capa_fid(c)), capa_opc(c), capa_uid(c), capa_gid(c),    \
+       capa_flags(c), capa_alg(c), capa_keyid(c), capa_timeout(c),             \
+       capa_expiry(c));                                                        \
 } while (0)
 
 #define DEBUG_CAPA_KEY(level, k, fmt, args...)                                 \
@@ -172,38 +181,33 @@ struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa,
                              int alive);
 
 int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key);
+int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
+int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen);
 void capa_cpy(void *dst, struct obd_capa *ocapa);
-
-char *dump_capa_content(char *buf, char *key, int len);
-
 static inline struct obd_capa *alloc_capa(int site)
 {
 #ifdef __KERNEL__
         struct obd_capa *ocapa;
 
+        if (unlikely(site != CAPA_SITE_CLIENT && site != CAPA_SITE_SERVER))
+                return ERR_PTR(-EINVAL);
+
         OBD_SLAB_ALLOC(ocapa, capa_cachep, GFP_KERNEL, sizeof(*ocapa));
-        if (ocapa) {
-                atomic_set(&ocapa->c_refc, 0);
-                spin_lock_init(&ocapa->c_lock);
-                CFS_INIT_LIST_HEAD(&ocapa->c_list);
-                ocapa->c_site = site;
-        }
-        return ocapa;
-#else
-        return NULL;
-#endif
-}
+        if (unlikely(!ocapa))
+                return ERR_PTR(-ENOMEM);
+
+        CFS_INIT_LIST_HEAD(&ocapa->c_list);
+        atomic_set(&ocapa->c_refc, 1);
+        spin_lock_init(&ocapa->c_lock);
+        ocapa->c_site = site;
+        if (ocapa->c_site == CAPA_SITE_CLIENT)
+                CFS_INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
+        else
+                CFS_INIT_HLIST_NODE(&ocapa->u.tgt.c_hash);
 
-static inline void free_capa(struct obd_capa *ocapa)
-{
-#ifdef __KERNEL__
-        if (atomic_read(&ocapa->c_refc)) {
-                DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for",
-                           atomic_read(&ocapa->c_refc));
-                LBUG();
-        }
-        OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+        return ocapa;
 #else
+        return ERR_PTR(-EOPNOTSUPP);
 #endif
 }
 
@@ -225,7 +229,19 @@ static inline void capa_put(struct obd_capa *ocapa)
                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc is 0 for");
                 LBUG();
         }
-        atomic_dec(&ocapa->c_refc);
+
+        if (atomic_dec_and_test(&ocapa->c_refc)) {
+                LASSERT(list_empty(&ocapa->c_list));
+                if (ocapa->c_site == CAPA_SITE_CLIENT) {
+                        LASSERT(list_empty(&ocapa->u.cli.lli_list));
+                } else {
+                        struct hlist_node *hnode;
+
+                        hnode = &ocapa->u.tgt.c_hash;
+                        LASSERT(!hnode->next && !hnode->pprev);
+                }
+                OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa));
+        }
 }
 
 static inline int open_flags_to_accmode(int flags)
@@ -253,6 +269,11 @@ static inline void set_capa_expiry(struct obd_capa *ocapa)
                                        cfs_time_seconds(expiry));
 }
 
+static inline int capa_is_expired_sec(struct lustre_capa *capa)
+{
+        return (capa->lc_expiry - cfs_time_current_sec() <= 0);
+}
+
 static inline int capa_is_expired(struct obd_capa *ocapa)
 {
         return cfs_time_beforeq(ocapa->c_expiry, cfs_time_current());
@@ -284,5 +305,11 @@ struct filter_capa_key {
         struct lustre_capa_key  k_key;
 };
 
+enum {
+        LC_ID_NONE      = 0,
+        LC_ID_PLAIN     = 1,
+        LC_ID_CONVERT   = 2
+};
+
 #define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT)
 #endif /* __LINUX_CAPA_H_ */
index 4851fbc..b2a5c86 100644 (file)
@@ -56,7 +56,6 @@ struct mdt_export_data {
         __u64                   med_ibits_known;
         loff_t                  med_lr_off;
         int                     med_lr_idx;
-        unsigned int            med_rmtclient:1; /* remote client? */
         struct semaphore           med_idmap_sem;
         struct lustre_idmap_table *med_idmap;
 };
@@ -178,6 +177,20 @@ static inline int exp_connect_lru_resize(struct obd_export *exp)
         return !!(exp->exp_connect_flags & OBD_CONNECT_LRU_RESIZE);
 }
 
+static inline int exp_connect_rmtclient(struct obd_export *exp)
+{
+        LASSERT(exp != NULL);
+        return !!(exp->exp_connect_flags & OBD_CONNECT_RMT_CLIENT);
+}
+
+static inline int client_is_remote(struct obd_export *exp)
+{
+        struct obd_import *imp = class_exp2cliimp(exp);
+
+        return !!(imp->imp_connect_data.ocd_connect_flags &
+                  OBD_CONNECT_RMT_CLIENT);
+}
+
 static inline int imp_connect_lru_resize(struct obd_import *imp)
 {
         struct obd_connect_data *ocd;
index 83697fe..ba36693 100644 (file)
@@ -80,13 +80,12 @@ int target_pack_pool_reply(struct ptlrpc_request *req);
 int target_handle_ping(struct ptlrpc_request *req);
 void target_committed_to_req(struct ptlrpc_request *req);
 
-#ifdef HAVE_QUOTA_SUPPORT
 /* quotacheck callback, dqacq/dqrel callback handler */
 int target_handle_qc_callback(struct ptlrpc_request *req);
+#ifdef HAVE_QUOTA_SUPPORT
 int target_handle_dqacq_callback(struct ptlrpc_request *req);
 #else
 #define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP)
-#define target_handle_qc_callback(req) (0)
 #endif
 
 #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
index 558930d..1bb9ac2 100644 (file)
@@ -212,8 +212,8 @@ union ptlrpc_async_args {
          * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
          * a pointer to it here.  The pointer_arg ensures this struct is at
          * least big enough for that. */
-        void      *pointer_arg[9];
-        __u64      space[5];
+        void      *pointer_arg[11];
+        __u64      space[6];
 };
 
 struct ptlrpc_request_set;
@@ -625,13 +625,23 @@ struct ptlrpc_bulk_desc {
 };
 
 struct ptlrpc_thread {
-
-        struct list_head t_link; /* active threads in svc->srv_threads */
-
-        void *t_data;            /* thread-private data (preallocated memory) */
+        /**
+         * active threads in svc->srv_threads
+         */
+        struct list_head t_link;
+        /**
+         * thread-private data (preallocated memory)
+         */
+        void *t_data;
         __u32 t_flags;
-
-        unsigned int t_id; /* service thread index, from ptlrpc_start_threads */
+        /**
+         * service thread index, from ptlrpc_start_threads
+         */
+        unsigned int t_id;
+        /**
+         * put watchdog in the structure per thread b=14840
+         */
+        struct lc_watchdog *t_watchdog;
         cfs_waitq_t t_ctl_waitq;
         struct lu_env *t_env;
 };
index 2b26e24..b0dc442 100644 (file)
@@ -50,6 +50,8 @@
 #include <lustre/lustre_idl.h>
 #include <lustre_net.h>
 #include <lvfs.h>
+#include <obd_support.h>
+#include <class_hash.h>
 
 struct obd_device;
 struct client_obd;
@@ -62,6 +64,64 @@ struct client_obd;
 
 #ifdef __KERNEL__
 
+#ifdef LPROCFS
+enum {
+        LQUOTA_FIRST_STAT = 0,
+        /** @{ */
+        /**
+         * these four are for measuring quota requests, for both of
+         * quota master and quota slaves
+         */
+        LQUOTA_SYNC_ACQ = LQUOTA_FIRST_STAT,
+        LQUOTA_SYNC_REL,
+        LQUOTA_ASYNC_ACQ,
+        LQUOTA_ASYNC_REL,
+        /** }@ */
+        /** @{ */
+        /**
+         * these four measure how much time I/O threads spend on dealing
+         * with quota before and after writing data or creating files,
+         * only for quota slaves(lquota_chkquota and lquota_pending_commit)
+         */
+        LQUOTA_WAIT_FOR_CHK_BLK,
+        LQUOTA_WAIT_FOR_CHK_INO,
+        LQUOTA_WAIT_FOR_COMMIT_BLK,
+        LQUOTA_WAIT_FOR_COMMIT_INO,
+        /** }@ */
+        /** @{ */
+        /**
+         * these two are for measuring time waiting return of quota reqs
+         * (qctxt_wait_pending_dqacq), only for quota salves
+         */
+        LQUOTA_WAIT_PENDING_BLK_QUOTA,
+        LQUOTA_WAIT_PENDING_INO_QUOTA,
+        /** }@ */
+        /** @{ */
+        /**
+         * these two are for those when they are calling
+         * qctxt_wait_pending_dqacq, the quota req has returned already,
+         * only for quota salves
+         */
+        LQUOTA_NOWAIT_PENDING_BLK_QUOTA,
+        LQUOTA_NOWAIT_PENDING_INO_QUOTA,
+        /** }@ */
+        /** @{ */
+        /**
+         * these are for quota ctl
+         */
+        LQUOTA_QUOTA_CTL,
+        /** }@ */
+        /** @{ */
+        /**
+         * these are for adjust quota qunit, for both of
+         * quota master and quota slaves
+         */
+        LQUOTA_ADJUST_QUNIT,
+        LQUOTA_LAST_STAT
+        /** }@ */
+};
+#endif  /* LPROCFS */
+
 /* structures to access admin quotafile */
 struct lustre_mem_dqinfo {
         unsigned int dqi_bgrace;
@@ -75,28 +135,45 @@ struct lustre_mem_dqinfo {
 struct lustre_quota_info {
         struct file *qi_files[MAXQUOTAS];
         struct lustre_mem_dqinfo qi_info[MAXQUOTAS];
+        lustre_quota_version_t qi_version;
 };
 
 #define DQ_STATUS_AVAIL         0x0     /* Available dquot */
 #define DQ_STATUS_SET           0x01    /* Sombody is setting dquot */
 #define DQ_STATUS_RECOVERY      0x02    /* dquot is in recovery */
 
+struct lustre_mem_dqblk {
+        __u64 dqb_bhardlimit;  /**< absolute limit on disk blks alloc */
+        __u64 dqb_bsoftlimit;  /**< preferred limit on disk blks */
+        __u64 dqb_curspace;    /**< current used space */
+        __u64 dqb_ihardlimit;  /**< absolute limit on allocated inodes */
+        __u64 dqb_isoftlimit;  /**< preferred inode limit */
+        __u64 dqb_curinodes;   /**< current # allocated inodes */
+        time_t dqb_btime;      /**< time limit for excessive disk use */
+        time_t dqb_itime;      /**< time limit for excessive inode use */
+};
+
 struct lustre_dquot {
-        /* Hash list in memory, protect by dquot_hash_lock */
+        /** Hash list in memory, protect by dquot_hash_lock */
         struct list_head dq_hash;
-        /* Protect the data in lustre_dquot */
+        /** Protect the data in lustre_dquot */
         struct semaphore dq_sem;
-        /* Use count */
+        /** Use count */
         int dq_refcnt;
-        /* Pointer of quota info it belongs to */
+        /** Pointer of quota info it belongs to */
         struct lustre_quota_info *dq_info;
-        
-        loff_t dq_off;                  /* Offset of dquot on disk */
-        unsigned int dq_id;             /* ID this applies to (uid, gid) */
-        int dq_type;                    /* Type fo quota (USRQUOTA, GRPQUOUTA) */
-        unsigned short dq_status;       /* See DQ_STATUS_ */
-        unsigned long dq_flags;         /* See DQ_ in quota.h */
-        struct mem_dqblk dq_dqb;        /* Diskquota usage */
+        /** Offset of dquot on disk */
+        loff_t dq_off;
+        /** ID this applies to (uid, gid) */
+        unsigned int dq_id;
+        /** Type fo quota (USRQUOTA, GRPQUOUTA) */
+        int dq_type;
+        /** See DQ_STATUS_ */
+        unsigned short dq_status;
+        /** See DQ_ in quota.h */
+        unsigned long dq_flags;
+        /** Diskquota usage */
+        struct lustre_mem_dqblk dq_dqb;
 };
 
 struct dquot_id {
@@ -110,37 +187,234 @@ struct dquot_id {
 #define QFILE_INIT_INFO         4
 #define QFILE_RD_DQUOT          5
 #define QFILE_WR_DQUOT          6
+#define QFILE_CONVERT           7
 
 /* admin quotafile operations */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type);
 int lustre_read_quota_info(struct lustre_quota_info *lqi, int type);
 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type);
 int lustre_read_dquot(struct lustre_dquot *dquot);
 int lustre_commit_dquot(struct lustre_dquot *dquot);
 int lustre_init_quota_info(struct lustre_quota_info *lqi, int type);
-int lustre_get_qids(struct file *file, struct inode *inode, int type, 
+int lustre_get_qids(struct file *file, struct inode *inode, int type,
                     struct list_head *list);
+int lustre_quota_convert(struct lustre_quota_info *lqi, int type);
+#else
+
+#ifndef DQ_FAKE_B
+#define DQ_FAKE_B       6
+#endif
+
+static inline int lustre_check_quota_file(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+static inline int lustre_write_quota_info(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_commit_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_init_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+static inline int lustre_quota_convert(struct lustre_quota_info *lqi,
+                                       int type)
+{
+        return 0;
+}
+#endif  /* KERNEL_VERSION(2,5,0) */
 
 #define LL_DQUOT_OFF(sb)    DQUOT_OFF(sb)
 
 typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd,
                                 int opc);
+
+/* user quota is turned on on filter */
+#define LQC_USRQUOTA_FLAG (1 << 0)
+/* group quota is turned on on filter */
+#define LQC_GRPQUOTA_FLAG (1 << 1)
+
+#define UGQUOTA2LQC(id) ((Q_TYPEMATCH(id, USRQUOTA) ? LQC_USRQUOTA_FLAG : 0) | \
+                         (Q_TYPEMATCH(id, GRPQUOTA) ? LQC_GRPQUOTA_FLAG : 0))
+
 struct lustre_quota_ctxt {
-        struct super_block *lqc_sb;     /* superblock this applies to */
-        struct obd_import *lqc_import;  /* import used to send dqacq/dqrel RPC */
-        dqacq_handler_t lqc_handler;    /* dqacq/dqrel RPC handler, only for quota master */ 
-        unsigned long lqc_recovery:1,   /* Doing recovery */ 
-                      lqc_atype:2,      /* Turn on user/group quota at setup automatically, 
-                                         * 0: none, 1: user quota, 2: group quota, 3: both */
-                      lqc_status:1;     /* Quota status. 0:Off, 1:On */
-        unsigned long lqc_iunit_sz;     /* Unit size of file quota */
-        unsigned long lqc_itune_sz;     /* Trigger dqacq when available file quota less than
-                                         * this value, trigger dqrel when available file quota
-                                         * more than this value + 1 iunit */
-        unsigned long lqc_bunit_sz;     /* Unit size of block quota */
-        unsigned long lqc_btune_sz;     /* See comment of lqc_itune_sz */
+        /** superblock this applies to */
+        struct super_block *lqc_sb;
+        /** obd_device_target for obt_rwsem */
+        struct obd_device_target *lqc_obt;
+        /** import used to send dqacq/dqrel RPC */
+        struct obd_import *lqc_import;
+        /** dqacq/dqrel RPC handler, only for quota master */
+        dqacq_handler_t lqc_handler;
+        /** quota flags */
+        unsigned long lqc_flags;
+        /** @{ */
+        unsigned long lqc_recovery:1,   /** Doing recovery */
+                      lqc_switch_qs:1,  /**
+                                         * the function of change qunit size
+                                         * 0:Off, 1:On
+                                         */
+                      lqc_valid:1,      /** this qctxt is valid or not */
+                      lqc_setup:1;      /**
+                                         * tell whether of not quota_type has
+                                         * been processed, so that the master
+                                         * knows when it can start processing
+                                         * incoming acq/rel quota requests
+                                         */
+        /** }@ */
+        /**
+         * original unit size of file quota and
+         * upper limitation for adjust file qunit
+         */
+        unsigned long lqc_iunit_sz;
+        /**
+         * Trigger dqacq when available file
+         * quota less than this value, trigger
+         * dqrel when available file quota
+         * more than this value + 1 iunit
+         */
+        unsigned long lqc_itune_sz;
+        /**
+         * original unit size of block quota and
+         * upper limitation for adjust block qunit
+         */
+        unsigned long lqc_bunit_sz;
+        /** See comment of lqc_itune_sz */
+        unsigned long lqc_btune_sz;
+        /** all lustre_qunit_size structures */
+        struct lustre_hash *lqc_lqs_hash;
+
+        /** @{ */
+        /**
+         * the values below are relative to how master change its qunit sizes
+         */
+        /**
+         * this affects the boundary of
+         * shrinking and enlarging qunit size. default=4
+         */
+        unsigned long lqc_cqs_boundary_factor;
+        /** the least value of block qunit */
+        unsigned long lqc_cqs_least_bunit;
+        /** the least value of inode qunit */
+        unsigned long lqc_cqs_least_iunit;
+        /**
+         * when enlarging, qunit size will
+         * mutilple it; when shrinking,
+         * qunit size will divide it
+         */
+        unsigned long lqc_cqs_qs_factor;
+        /**
+         * avoid ping-pong effect of
+         * adjusting qunit size. How many
+         * seconds must be waited between
+         * enlarging and shinking qunit
+         */
+        /** }@ */
+        int           lqc_switch_seconds;
+        /**
+         * when blk qunit reaches this value,
+         * later write reqs from client should be sync b=16642
+         */
+        int           lqc_sync_blk;
+        /** guard lqc_imp_valid now */
+        spinlock_t    lqc_lock;
+        /**
+         * when mds isn't connected, threads
+         * on osts who send the quota reqs
+         * with wait==1 will be put here b=14840
+         */
+        cfs_waitq_t   lqc_wait_for_qmaster;
+        struct proc_dir_entry *lqc_proc_dir;
+        /** lquota statistics */
+        struct lprocfs_stats  *lqc_stats;
 };
 
+#define QUOTA_MASTER_READY(qctxt)   (qctxt)->lqc_setup = 1
+#define QUOTA_MASTER_UNREADY(qctxt) (qctxt)->lqc_setup = 0
+
+struct lustre_qunit_size {
+        struct hlist_node lqs_hash; /** the hash entry */
+        unsigned int lqs_id;        /** id of user/group */
+        unsigned long lqs_flags;    /** is user/group; FULLBUF or LESSBUF */
+        unsigned long lqs_iunit_sz; /** Unit size of file quota currently */
+        /**
+         * Trigger dqacq when available file quota
+         * less than this value, trigger dqrel
+         * when more than this value + 1 iunit
+         */
+        unsigned long lqs_itune_sz;
+        unsigned long lqs_bunit_sz; /** Unit size of block quota currently */
+        unsigned long lqs_btune_sz; /** See comment of lqs itune sz */
+        /** the blocks reached ost and don't finish */
+        unsigned long lqs_bwrite_pending;
+        /** the inodes reached mds and don't finish */
+        unsigned long lqs_iwrite_pending;
+        /** when inodes are allocated/released, this value will record it */
+        long long lqs_ino_rec;
+        /** when blocks are allocated/released, this value will record it */
+        long long lqs_blk_rec;
+        atomic_t lqs_refcount;
+        cfs_time_t lqs_last_bshrink;   /** time of last block shrink */
+        cfs_time_t lqs_last_ishrink;   /** time of last inode shrink */
+        spinlock_t lqs_lock;
+        struct quota_adjust_qunit lqs_key; /** hash key */
+        struct lustre_quota_ctxt *lqs_ctxt; /** quota ctxt */
+};
+
+#define LQS_IS_GRP(lqs)    ((lqs)->lqs_flags & LQUOTA_FLAGS_GRP)
+#define LQS_IS_ADJBLK(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJBLK)
+#define LQS_IS_ADJINO(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJINO)
+
+#define LQS_SET_GRP(lqs)    ((lqs)->lqs_flags |= LQUOTA_FLAGS_GRP)
+#define LQS_SET_ADJBLK(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJBLK)
+#define LQS_SET_ADJINO(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJINO)
+
+static inline void lqs_getref(struct lustre_qunit_size *lqs)
+{
+        atomic_inc(&lqs->lqs_refcount);
+        CDEBUG(D_QUOTA, "lqs=%p refcount %d\n",
+               lqs, atomic_read(&lqs->lqs_refcount));
+}
+
+static inline void lqs_putref(struct lustre_qunit_size *lqs)
+{
+        LASSERT(atomic_read(&lqs->lqs_refcount) > 0);
+
+        /* killing last ref, let's let hash table kill it */
+        if (atomic_read(&lqs->lqs_refcount) == 1) {
+                lustre_hash_del(lqs->lqs_ctxt->lqc_lqs_hash,
+                                &lqs->lqs_key, &lqs->lqs_hash);
+                OBD_FREE_PTR(lqs);
+        } else {
+                atomic_dec(&lqs->lqs_refcount);
+                CDEBUG(D_QUOTA, "lqs=%p refcount %d\n",
+                       lqs, atomic_read(&lqs->lqs_refcount));
+
+        }
+}
+
+static inline void lqs_initref(struct lustre_qunit_size *lqs)
+{
+        atomic_set(&lqs->lqs_refcount, 0);
+}
+
 #else
 
 struct lustre_quota_info {
@@ -149,6 +423,9 @@ struct lustre_quota_info {
 struct lustre_quota_ctxt {
 };
 
+#define QUOTA_MASTER_READY(qctxt)
+#define QUOTA_MASTER_UNREADY(qctxt)
+
 #endif  /* !__KERNEL__ */
 
 #else
@@ -168,58 +445,109 @@ struct lustre_quota_ctxt {
 #define MIN_QLIMIT      1
 
 struct quotacheck_thread_args {
-        struct obd_export   *qta_exp;   /* obd export */
-        struct obd_quotactl  qta_oqctl; /* obd_quotactl args */
-        struct super_block  *qta_sb;    /* obd super block */
-        atomic_t            *qta_sem;   /* obt_quotachecking */
+        struct obd_export   *qta_exp;   /** obd export */
+        struct obd_device   *qta_obd;   /** obd device */
+        struct obd_quotactl  qta_oqctl; /** obd_quotactl args */
+        struct super_block  *qta_sb;    /** obd super block */
+        atomic_t            *qta_sem;   /** obt_quotachecking */
 };
 
+struct obd_trans_info;
+typedef int (*quota_acquire)(struct obd_device *obd, unsigned int uid,
+                             unsigned int gid, struct obd_trans_info *oti,
+                             int isblk);
+
 typedef struct {
         int (*quota_init) (void);
         int (*quota_exit) (void);
         int (*quota_setup) (struct obd_device *);
         int (*quota_cleanup) (struct obd_device *);
-        /* For quota master, close admin quota files */
+        /**
+         * For quota master, close admin quota files
+         */
         int (*quota_fs_cleanup) (struct obd_device *);
-        int (*quota_ctl) (struct obd_export *, struct obd_quotactl *);
-        int (*quota_check) (struct obd_export *, struct obd_quotactl *);
+        int (*quota_ctl) (struct obd_device *, struct obd_export *,
+                          struct obd_quotactl *);
+        int (*quota_check) (struct obd_device *, struct obd_export *,
+                            struct obd_quotactl *);
         int (*quota_recovery) (struct obd_device *);
-        
-        /* For quota master/slave, adjust quota limit after fs operation */
-        int (*quota_adjust) (struct obd_device *, unsigned int[], 
-                             unsigned int[], int, int); 
-        
-        /* For quota slave, set import, trigger quota recovery */
-        int (*quota_setinfo) (struct obd_export *, struct obd_device *);
-        
-        /* For quota slave, set proper thread resoure capability */
+
+        /**
+         * For quota master/slave, adjust quota limit after fs operation
+         */
+        int (*quota_adjust) (struct obd_device *, unsigned int[],
+                             unsigned int[], int, int);
+
+        /**
+         * For quota slave, set import, trigger quota recovery,
+         * For quota master, set lqc_setup
+         */
+        int (*quota_setinfo) (struct obd_device *, void *);
+
+        /**
+         * For quota slave, clear import when relative import is invalid
+         */
+        int (*quota_clearinfo) (struct obd_export *, struct obd_device *);
+
+        /**
+         * For quota slave, set proper thread resoure capability
+         */
         int (*quota_enforce) (struct obd_device *, unsigned int);
-        
-        /* For quota slave, check whether specified uid/gid is over quota */
+
+        /**
+         * For quota slave, check whether specified uid/gid is over quota
+         */
         int (*quota_getflag) (struct obd_device *, struct obdo *);
-        
-        /* For quota slave, acquire/release quota from master if needed */
-        int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int);
-        
-        /* For quota slave, check whether specified uid/gid's remaining quota
-         * can finish a write rpc */
+
+        /**
+         * For quota slave, acquire/release quota from master if needed
+         */
+        int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int,
+                              struct obd_trans_info *, int);
+
+        /**
+         * For quota slave, check whether specified uid/gid's remaining quota
+         * can finish a block_write or inode_create rpc. It updates the pending
+         * record of block and inode, acquires quota if necessary
+         */
         int (*quota_chkquota) (struct obd_device *, unsigned int, unsigned int,
-                               int);
+                               int, int *, quota_acquire,
+                               struct obd_trans_info *, int);
 
-        /* For quota client, poll if the quota check done */
+        /**
+         * For quota client, poll if the quota check done
+         */
         int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *);
-        
-        /* For quota client, check whether specified uid/gid is over quota */
+
+        /**
+         * For quota client, check whether specified uid/gid is over quota
+         */
         int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int);
-        
-        /* For quota client, set over quota flag for specifed uid/gid */
+
+        /**
+         * For quota client, the actions after the pending write is committed
+         */
+        int (*quota_pending_commit) (struct obd_device *, unsigned int,
+                                     unsigned int, int, int);
+
+        /**
+         * For quota client, set over quota flag for specifed uid/gid
+         */
         int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int,
                             obd_flag, obd_flag);
+
+        /**
+         * For adjusting qunit size b=10600
+         */
+        int (*quota_adjust_qunit) (struct obd_export *exp,
+                                   struct quota_adjust_qunit *oqaq,
+                                   struct lustre_quota_ctxt *qctxt);
+
 } quota_interface_t;
 
 #define Q_COPY(out, in, member) (out)->member = (in)->member
 
-#define QUOTA_OP(interface, op) interface->quota_ ## op         
+#define QUOTA_OP(interface, op) interface->quota_ ## op
 
 #define QUOTA_CHECK_OP(interface, op)                           \
 do {                                                            \
@@ -235,39 +563,39 @@ static inline int lquota_init(quota_interface_t *interface)
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, init);
         rc = QUOTA_OP(interface, init)();
         RETURN(rc);
 }
 
-static inline int lquota_exit(quota_interface_t *interface) 
+static inline int lquota_exit(quota_interface_t *interface)
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, exit);
         rc = QUOTA_OP(interface, exit)();
         RETURN(rc);
 }
 
 static inline int lquota_setup(quota_interface_t *interface,
-                               struct obd_device *obd) 
+                               struct obd_device *obd)
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, setup);
         rc = QUOTA_OP(interface, setup)(obd);
         RETURN(rc);
 }
 
 static inline int lquota_cleanup(quota_interface_t *interface,
-                                 struct obd_device *obd) 
+                                 struct obd_device *obd)
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, cleanup);
         rc = QUOTA_OP(interface, cleanup)(obd);
         RETURN(rc);
@@ -278,32 +606,57 @@ static inline int lquota_fs_cleanup(quota_interface_t *interface,
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, fs_cleanup);
         rc = QUOTA_OP(interface, fs_cleanup)(obd);
         RETURN(rc);
 }
 
 static inline int lquota_recovery(quota_interface_t *interface,
-                                  struct obd_device *obd) 
-{        
+                                  struct obd_device *obd)
+{
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, recovery);
         rc = QUOTA_OP(interface, recovery)(obd);
         RETURN(rc);
 }
 
+static inline int lquota_check(quota_interface_t *interface,
+                               struct obd_device *obd,
+                               struct obd_export *exp,
+                               struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, check);
+        rc = QUOTA_OP(interface, check)(obd, exp, oqctl);
+        RETURN(rc);
+}
+
+static inline int lquota_ctl(quota_interface_t *interface,
+                             struct obd_device *obd,
+                             struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, ctl);
+        rc = QUOTA_OP(interface, ctl)(obd, NULL, oqctl);
+        RETURN(rc);
+}
+
 static inline int lquota_adjust(quota_interface_t *interface,
-                                struct obd_device *obd, 
-                                unsigned int qcids[], 
-                                unsigned int qpids[], 
-                                int rc, int opc) 
+                                struct obd_device *obd,
+                                unsigned int qcids[],
+                                unsigned int qpids[],
+                                int rc, int opc)
 {
         int ret;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, adjust);
         ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc);
         RETURN(ret);
@@ -315,7 +668,7 @@ static inline int lquota_chkdq(quota_interface_t *interface,
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, chkdq);
         rc = QUOTA_OP(interface, chkdq)(cli, uid, gid);
         RETURN(rc);
@@ -328,7 +681,7 @@ static inline int lquota_setdq(quota_interface_t *interface,
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, setdq);
         rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags);
         RETURN(rc);
@@ -340,26 +693,37 @@ static inline int lquota_poll_check(quota_interface_t *interface,
 {
         int rc;
         ENTRY;
-        
+
         QUOTA_CHECK_OP(interface, poll_check);
         rc = QUOTA_OP(interface, poll_check)(exp, qchk);
         RETURN(rc);
 }
 
-       
 static inline int lquota_setinfo(quota_interface_t *interface,
-                                 struct obd_export *exp, 
-                                 struct obd_device *obd) 
+                                 struct obd_device *obd,
+                                 void *data)
 {
         int rc;
         ENTRY;
 
         QUOTA_CHECK_OP(interface, setinfo);
-        rc = QUOTA_OP(interface, setinfo)(exp, obd);
+        rc = QUOTA_OP(interface, setinfo)(obd, data);
+        RETURN(rc);
+}
+
+static inline int lquota_clearinfo(quota_interface_t *interface,
+                                   struct obd_export *exp,
+                                   struct obd_device *obd)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, clearinfo);
+        rc = QUOTA_OP(interface, clearinfo)(exp, obd);
         RETURN(rc);
 }
 
-static inline int lquota_enforce(quota_interface_t *interface, 
+static inline int lquota_enforce(quota_interface_t *interface,
                                  struct obd_device *obd,
                                  unsigned int ignore)
 {
@@ -381,57 +745,60 @@ static inline int lquota_getflag(quota_interface_t *interface,
         rc = QUOTA_OP(interface, getflag)(obd, oa);
         RETURN(rc);
 }
-        
-static inline int lquota_acquire(quota_interface_t *interface,
-                                 struct obd_device *obd, 
-                                 unsigned int uid, unsigned int gid)
+
+static inline int lquota_chkquota(quota_interface_t *interface,
+                                  struct obd_device *obd,
+                                  unsigned int uid, unsigned int gid, int count,
+                                  int *flag, struct obd_trans_info *oti,
+                                  int isblk)
 {
         int rc;
         ENTRY;
 
+        QUOTA_CHECK_OP(interface, chkquota);
         QUOTA_CHECK_OP(interface, acquire);
-        rc = QUOTA_OP(interface, acquire)(obd, uid, gid);
+        rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, count, flag,
+                                           QUOTA_OP(interface, acquire), oti,
+                                           isblk);
         RETURN(rc);
 }
 
-static inline int lquota_chkquota(quota_interface_t *interface,
-                                  struct obd_device *obd,
-                                  unsigned int uid, unsigned int gid,
-                                  int npage)
+static inline int lquota_pending_commit(quota_interface_t *interface,
+                                        struct obd_device *obd,
+                                        unsigned int uid, unsigned int gid,
+                                        int npage, int isblk)
 {
         int rc;
         ENTRY;
-        
-        QUOTA_CHECK_OP(interface, chkquota);
-        rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, npage);
+
+        QUOTA_CHECK_OP(interface, pending_commit);
+        rc = QUOTA_OP(interface, pending_commit)(obd, uid, gid, npage, isblk);
         RETURN(rc);
 }
 
-int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data);
-int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data);
-int lprocfs_wr_bunit(struct file *file, const char *buffer,
-                     unsigned long count, void *data);
-int lprocfs_wr_iunit(struct file *file, const char *buffer,
-                     unsigned long count, void *data);
-int lprocfs_rd_btune(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data);
-int lprocfs_rd_itune(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data);
-int lprocfs_wr_btune(struct file *file, const char *buffer,
-                     unsigned long count, void *data);
-int lprocfs_wr_itune(struct file *file, const char *buffer,
-                     unsigned long count, void *data);
-int lprocfs_rd_type(char *page, char **start, off_t off, int count, 
-                    int *eof, void *data);
-int lprocfs_wr_type(struct file *file, const char *buffer,
-                    unsigned long count, void *data);
-
 #ifndef __KERNEL__
 extern quota_interface_t osc_quota_interface;
-extern quota_interface_t mdc_quota_interface;
 extern quota_interface_t lov_quota_interface;
+extern quota_interface_t mdc_quota_interface;
+extern quota_interface_t lmv_quota_interface;
+
+#ifndef MAXQUOTAS
+#define MAXQUOTAS 2
+#endif
+
+#ifndef USRQUOTA
+#define USRQUOTA 0
 #endif
 
+#ifndef GRPQUOTA
+#define GRPQUOTA 1
+#endif
+
+#endif
+
+#define LUSTRE_ADMIN_QUOTAFILES_V2 {\
+        "admin_quotafile_v2.usr",       /** user admin quotafile */\
+        "admin_quotafile_v2.grp"        /** group admin quotafile */\
+}
+
 #endif /* _LUSTRE_QUOTA_H */
index 1853571..4f0c777 100644 (file)
@@ -177,6 +177,7 @@ extern const struct req_format RQF_OST_CONNECT;
 extern const struct req_format RQF_OST_DISCONNECT;
 extern const struct req_format RQF_OST_QUOTACHECK;
 extern const struct req_format RQF_OST_QUOTACTL;
+extern const struct req_format RQF_OST_QUOTA_ADJUST_QUNIT;
 extern const struct req_format RQF_OST_GETATTR;
 extern const struct req_format RQF_OST_SETATTR;
 extern const struct req_format RQF_OST_CREATE;
@@ -244,6 +245,7 @@ extern const struct req_msg_field RMF_CAPA1;
 extern const struct req_msg_field RMF_CAPA2;
 extern const struct req_msg_field RMF_OBD_QUOTACHECK;
 extern const struct req_msg_field RMF_OBD_QUOTACTL;
+extern const struct req_msg_field RMF_QUOTA_ADJUST_QUNIT;
 extern const struct req_msg_field RMF_QUNIT_DATA;
 extern const struct req_msg_field RMF_STRING;
 
index 00c20d2..24d2a41 100644 (file)
@@ -779,5 +779,14 @@ int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read,
                   struct ptlrpc_bulk_sec_desc *bsdv, int vsize,
                   struct ptlrpc_bulk_sec_desc *bsdr, int rsize);
 
+#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN)
+#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE)
+
+enum {
+        LUSTRE_SEC_NONE         = 0,
+        LUSTRE_SEC_REMOTE       = 1,
+        LUSTRE_SEC_SPECIFY      = 2,
+        LUSTRE_SEC_ALL          = 3
+};
 
 #endif /* _LUSTRE_SEC_H_ */
index c2fc95c..ecc92dc 100644 (file)
@@ -62,7 +62,7 @@
 struct md_device;
 struct md_device_operations;
 struct md_object;
-
+struct obd_export;
 
 enum {
         UCRED_INVALID   = -1,
@@ -73,18 +73,18 @@ enum {
 
 struct md_ucred {
         __u32               mu_valid;
-        __u32                   mu_o_uid;
-        __u32                   mu_o_gid;
-        __u32                   mu_o_fsuid;
-        __u32                   mu_o_fsgid;
-        __u32                   mu_uid;
-        __u32                   mu_gid;
-        __u32                   mu_fsuid;
-        __u32                   mu_fsgid;
-        __u32                   mu_suppgids[2];
-        cfs_cap_t               mu_cap;
-        __u32                   mu_umask;
-       struct group_info      *mu_ginfo;
+        __u32               mu_o_uid;
+        __u32               mu_o_gid;
+        __u32               mu_o_fsuid;
+        __u32               mu_o_fsgid;
+        __u32               mu_uid;
+        __u32               mu_gid;
+        __u32               mu_fsuid;
+        __u32               mu_fsgid;
+        __u32               mu_suppgids[2];
+        cfs_cap_t           mu_cap;
+        __u32               mu_umask;
+       struct group_info  *mu_ginfo;
        struct md_identity *mu_identity;
 };
 
@@ -95,6 +95,8 @@ enum {
 /** there are at most 5 fids in one operation, see rename, NOTE the last one
  * is a temporary one used for is_subdir() */
 struct md_capainfo {
+        __u32                   mc_auth;
+        __u32                   mc_padding;
         const struct lu_fid    *mc_fid[MD_CAPAINFO_MAX];
         struct lustre_capa     *mc_capa[MD_CAPAINFO_MAX];
 };
@@ -315,6 +317,82 @@ struct md_device_operations {
         int (*mdo_update_capa_key)(const struct lu_env *env,
                                    struct md_device *m,
                                    struct lustre_capa_key *key);
+
+#ifdef HAVE_QUOTA_SUPPORT
+        struct md_quota_operations {
+                int (*mqo_notify)(const struct lu_env *env,
+                                  struct md_device *m);
+
+                int (*mqo_setup)(const struct lu_env *env,
+                                 struct md_device *m,
+                                 void *data);
+
+                int (*mqo_cleanup)(const struct lu_env *env,
+                                   struct md_device *m);
+
+                int (*mqo_recovery)(const struct lu_env *env,
+                                    struct md_device *m);
+
+                int (*mqo_check)(const struct lu_env *env,
+                                 struct md_device *m,
+                                 struct obd_export *exp,
+                                 __u32 type);
+
+                int (*mqo_on)(const struct lu_env *env,
+                              struct md_device *m,
+                              __u32 type,
+                              __u32 id);
+
+                int (*mqo_off)(const struct lu_env *env,
+                               struct md_device *m,
+                               __u32 type,
+                               __u32 id);
+
+                int (*mqo_setinfo)(const struct lu_env *env,
+                                   struct md_device *m,
+                                   __u32 type,
+                                   __u32 id,
+                                   struct obd_dqinfo *dqinfo);
+
+                int (*mqo_getinfo)(const struct lu_env *env,
+                                   const struct md_device *m,
+                                   __u32 type,
+                                   __u32 id,
+                                   struct obd_dqinfo *dqinfo);
+
+                int (*mqo_setquota)(const struct lu_env *env,
+                                    struct md_device *m,
+                                    __u32 type,
+                                    __u32 id,
+                                    struct obd_dqblk *dqblk);
+
+                int (*mqo_getquota)(const struct lu_env *env,
+                                    const struct md_device *m,
+                                    __u32 type,
+                                    __u32 id,
+                                    struct obd_dqblk *dqblk);
+
+                int (*mqo_getoinfo)(const struct lu_env *env,
+                                    const struct md_device *m,
+                                    __u32 type,
+                                    __u32 id,
+                                    struct obd_dqinfo *dqinfo);
+
+                int (*mqo_getoquota)(const struct lu_env *env,
+                                     const struct md_device *m,
+                                     __u32 type,
+                                     __u32 id,
+                                     struct obd_dqblk *dqblk);
+
+                int (*mqo_invalidate)(const struct lu_env *env,
+                                      struct md_device *m,
+                                      __u32 type);
+
+                int (*mqo_finvalidate)(const struct lu_env *env,
+                                       struct md_device *m,
+                                       __u32 type);
+        } mdo_quota;
+#endif
 };
 
 enum md_upcall_event {
index 15d4273..2a5b2d8 100644 (file)
@@ -248,6 +248,8 @@ struct obd_device_target {
         struct super_block       *obt_sb;
         atomic_t                  obt_quotachecking;
         struct lustre_quota_ctxt  obt_qctxt;
+        lustre_quota_version_t    obt_qfmt;
+        struct rw_semaphore       obt_rwsem;
 };
 
 /* llog contexts */
@@ -362,6 +364,7 @@ struct filter_obd {
         struct list_head         fo_capa_keys;
         struct hlist_head       *fo_capa_hash;
         struct llog_commit_master *fo_lcm;
+        int                      fo_sec_level;
 };
 
 #define OSC_MAX_RIF_DEFAULT       8
@@ -517,9 +520,9 @@ struct mds_obd {
         __u32                            mds_id;
 
         /* mark pages dirty for write. */
-        bitmap_t                         *mds_lov_page_dirty;
+        bitmap_t                        *mds_lov_page_dirty;
         /* array for store pages with obd_id */
-        void                            **mds_lov_page_array;
+        void                           **mds_lov_page_array;
         /* file for store objid */
         struct file                     *mds_lov_objid_filp;
         __u32                            mds_lov_objid_count;
@@ -537,13 +540,14 @@ struct mds_obd {
                                          mds_fl_acl:1,
                                          mds_evict_ost_nids:1,
                                          mds_fl_cfglog:1,
-                                         mds_fl_synced:1;
+                                         mds_fl_synced:1,
+                                         mds_quota:1;
 
         struct upcall_cache             *mds_identity_cache;
 
         /* for capability keys update */
         struct lustre_capa_key          *mds_capa_keys;
-        struct rw_semaphore             mds_notify_lock;
+        struct rw_semaphore              mds_notify_lock;
 };
 
 /* lov objid */
@@ -788,7 +792,7 @@ struct obd_trans_info {
         int                      oti_numcookies;
 
         /* initial thread handling transaction */
-        int                      oti_thread_id;
+        struct ptlrpc_thread *   oti_thread;
         __u32                    oti_conn_cnt;
 
         struct obd_uuid         *oti_ost_uuid;
@@ -808,7 +812,7 @@ static inline void oti_init(struct obd_trans_info *oti,
 
         if (req->rq_repmsg != NULL)
                 oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
-        oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1;
+        oti->oti_thread = req->rq_svc_thread;
         if (req->rq_reqmsg != NULL)
                 oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
 }
@@ -1214,7 +1218,7 @@ struct obd_ops {
                         struct lov_stripe_md **ea, struct obd_trans_info *oti);
         int (*o_destroy)(struct obd_export *exp, struct obdo *oa,
                          struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                         struct obd_export *md_exp);
+                         struct obd_export *md_exp, void *capa);
         int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo,
                          struct obd_trans_info *oti);
         int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
@@ -1292,8 +1296,14 @@ struct obd_ops {
         struct obd_uuid *(*o_get_uuid) (struct obd_export *exp);
 
         /* quota methods */
-        int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
-        int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
+        int (*o_quotacheck)(struct obd_device *, struct obd_export *,
+                            struct obd_quotactl *);
+        int (*o_quotactl)(struct obd_device *, struct obd_export *,
+                          struct obd_quotactl *);
+        int (*o_quota_adjust_qunit)(struct obd_export *exp,
+                                    struct quota_adjust_qunit *oqaq,
+                                    struct lustre_quota_ctxt *qctxt);
+
 
         int (*o_ping)(struct obd_export *exp);
 
@@ -1436,6 +1446,8 @@ struct md_ops {
                                void *opaque);
         int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc,
                             renew_capa_cb_t cb);
+        int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *,
+                             const struct req_msg_field *, struct obd_capa **);
 
         int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *,
                                  struct obd_capa *, __u32,
@@ -1526,6 +1538,7 @@ static inline void init_obd_quota_ops(quota_interface_t *interface,
         LASSERT(obd_ops);
         obd_ops->o_quotacheck = QUOTA_OP(interface, check);
         obd_ops->o_quotactl = QUOTA_OP(interface, ctl);
+        obd_ops->o_quota_adjust_qunit = QUOTA_OP(interface, adjust_qunit);
 }
 
 static inline __u64 oinfo_mdsno(struct obd_info *oinfo)
index 3e885b2..8fe3e6c 100644 (file)
@@ -86,8 +86,6 @@ void class_obd_list(void);
 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
                                           const char * typ_name,
                                           struct obd_uuid *grp_uuid);
-struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid,
-                                             struct obd_uuid *grp_uuid);
 struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
                                            int *next);
 struct obd_device * class_num2obd(int num);
@@ -669,7 +667,7 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo,
 static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
                               struct lov_stripe_md *ea,
                               struct obd_trans_info *oti,
-                              struct obd_export *md_exp)
+                              struct obd_export *md_exp, void *capa)
 {
         int rc;
         ENTRY;
@@ -677,7 +675,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
         EXP_CHECK_DT_OP(exp, destroy);
         EXP_COUNTER_INCREMENT(exp, destroy);
 
-        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp);
+        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp, capa);
         RETURN(rc);
 }
 
@@ -1469,7 +1467,7 @@ static inline int obd_quotacheck(struct obd_export *exp,
         EXP_CHECK_DT_OP(exp, quotacheck);
         EXP_COUNTER_INCREMENT(exp, quotacheck);
 
-        rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl);
+        rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl);
         RETURN(rc);
 }
 
@@ -1482,7 +1480,39 @@ static inline int obd_quotactl(struct obd_export *exp,
         EXP_CHECK_DT_OP(exp, quotactl);
         EXP_COUNTER_INCREMENT(exp, quotactl);
 
-        rc = OBP(exp->exp_obd, quotactl)(exp, oqctl);
+        rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl);
+        RETURN(rc);
+}
+
+static inline int obd_quota_adjust_qunit(struct obd_export *exp,
+                                         struct quota_adjust_qunit *oqaq,
+                                         struct lustre_quota_ctxt *qctxt)
+{
+#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT)
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
+#endif
+        int rc;
+        ENTRY;
+
+#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT)
+        if (qctxt)
+                do_gettimeofday(&work_start);
+#endif
+        EXP_CHECK_DT_OP(exp, quota_adjust_qunit);
+        EXP_COUNTER_INCREMENT(exp, quota_adjust_qunit);
+
+        rc = OBP(exp->exp_obd, quota_adjust_qunit)(exp, oqaq, qctxt);
+
+#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT)
+        if (qctxt) {
+                do_gettimeofday(&work_end);
+                timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+                lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_ADJUST_QUNIT,
+                                    timediff);
+        }
+#endif
         RETURN(rc);
 }
 
@@ -1932,6 +1962,19 @@ static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa,
         RETURN(rc);
 }
 
+static inline int md_unpack_capa(struct obd_export *exp,
+                                 struct ptlrpc_request *req,
+                                 const struct req_msg_field *field,
+                                 struct obd_capa **oc)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, unpack_capa);
+        EXP_MD_COUNTER_INCREMENT(exp, unpack_capa);
+        rc = MDP(exp->exp_obd, unpack_capa)(exp, req, field, oc);
+        RETURN(rc);
+}
+
 static inline int md_intent_getattr_async(struct obd_export *exp,
                                           struct md_enqueue_info *minfo,
                                           struct ldlm_enqueue_info *einfo)
index b849ff0..8ddb969 100644 (file)
@@ -54,6 +54,7 @@ struct osc_brw_async_args {
         struct brw_page  **aa_ppga;
         struct client_obd *aa_cli;
         struct list_head   aa_oaps;
+        struct obd_capa   *aa_ocapa;
         struct cl_req     *aa_clerq;
 };
 
index 25ab060..50379c5 100644 (file)
@@ -192,6 +192,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MDS_CLOSE_NET_REP       0x13f
 #define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT   0x140
 #define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x141
+#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ     0x142
+#define OBD_FAIL_MDS_DROP_QUOTA_REQ      0x143
 
 #define OBD_FAIL_OST                     0x200
 #define OBD_FAIL_OST_CONNECT_NET         0x201
@@ -320,7 +322,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MGS_PAUSE_REQ           0x904
 #define OBD_FAIL_MGS_PAUSE_TARGET_REG    0x905
 
-#define OBD_FAIL_QUOTA_QD_COUNT_32BIT    0xA00
+#define OBD_FAIL_QUOTA_RET_QDATA         0xA02
 
 #define OBD_FAIL_LPROC_REMOVE            0xB00
 
diff --git a/lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch b/lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch
new file mode 100644 (file)
index 0000000..b8c6b0d
--- /dev/null
@@ -0,0 +1,127 @@
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 23 Mar 2006 11:00:17 +0000 (-0800)
+Subject: [PATCH] Fix oops in invalidate_dquots()
+X-Git-Tag: v2.6.17-rc1~1059
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=6362e4d4eda61efb04ac1cdae32e48ac6d90b701
+
+[PATCH] Fix oops in invalidate_dquots()
+
+When quota is being turned off we assumed that all the references to dquots
+were already dropped.  That need not be true as inodes being deleted are
+not on superblock's inodes list and hence we need not reach it when
+removing quota references from inodes.  So invalidate_dquots() has to wait
+for all the users of dquots (as quota is already marked as turned off, no
+new references can be acquired and so this is bound to happen rather
+early).  When we do this, we can also remove the iprune_sem locking as it
+was protecting us against exactly the same problem when freeing inodes
+icache memory.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+Signed-off-by: Linus Torvalds <torvalds@osdl.org>
+---
+
+diff --git a/fs/dquot.c b/fs/dquot.c
+index 1966c89..9376a43 100644
+--- a/fs/dquot.c
++++ b/fs/dquot.c
+@@ -118,8 +118,7 @@
+  * spinlock to internal buffers before writing.
+  *
+  * Lock ordering (including related VFS locks) is the following:
+- *   i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem >
+- *   > dquot->dq_lock > dqio_sem
++ *  i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem
+  * i_mutex on quota files is special (it's below dqio_sem)
+  */
+@@ -407,23 +406,49 @@ out_dqlock:
+ /* Invalidate all dquots on the list. Note that this function is called after
+  * quota is disabled and pointers from inodes removed so there cannot be new
+- * quota users. Also because we hold dqonoff_sem there can be no quota users
+- * for this sb+type at all. */
++ * quota users. There can still be some users of quotas due to inodes being
++ * just deleted or pruned by prune_icache() (those are not attached to any
++ * list). We have to wait for such users.
++ */
+ static void invalidate_dquots(struct super_block *sb, int type)
+ {
+       struct dquot *dquot, *tmp;
++restart:
+       spin_lock(&dq_list_lock);
+       list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
+               if (dquot->dq_sb != sb)
+                       continue;
+               if (dquot->dq_type != type)
+                       continue;
+-#ifdef __DQUOT_PARANOIA
+-              if (atomic_read(&dquot->dq_count))
+-                      BUG();
+-#endif
+-              /* Quota now has no users and it has been written on last dqput() */
++              /* Wait for dquot users */
++              if (atomic_read(&dquot->dq_count)) {
++                      DEFINE_WAIT(wait);
++
++                      atomic_inc(&dquot->dq_count);
++                      prepare_to_wait(&dquot->dq_wait_unused, &wait,
++                                      TASK_UNINTERRUPTIBLE);
++                      spin_unlock(&dq_list_lock);
++                      /* Once dqput() wakes us up, we know it's time to free
++                       * the dquot.
++                       * IMPORTANT: we rely on the fact that there is always
++                       * at most one process waiting for dquot to free.
++                       * Otherwise dq_count would be > 1 and we would never
++                       * wake up.
++                       */
++                      if (atomic_read(&dquot->dq_count) > 1)
++                              schedule();
++                      finish_wait(&dquot->dq_wait_unused, &wait);
++                      dqput(dquot);
++                      /* At this moment dquot() need not exist (it could be
++                       * reclaimed by prune_dqcache(). Hence we must
++                       * restart. */
++                      goto restart;
++              }
++              /*
++               * Quota now has no users and it has been written on last
++               * dqput()
++               */
+               remove_dquot_hash(dquot);
+               remove_free_dquot(dquot);
+               remove_inuse(dquot);
+@@ -540,6 +565,10 @@ we_slept:
+       if (atomic_read(&dquot->dq_count) > 1) {
+               /* We have more than one user... nothing to do */
+               atomic_dec(&dquot->dq_count);
++              /* Releasing dquot during quotaoff phase? */
++              if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
++                  atomic_read(&dquot->dq_count) == 1)
++                      wake_up(&dquot->dq_wait_unused);
+               spin_unlock(&dq_list_lock);
+               return;
+       }
+@@ -581,6 +610,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
+       INIT_LIST_HEAD(&dquot->dq_inuse);
+       INIT_HLIST_NODE(&dquot->dq_hash);
+       INIT_LIST_HEAD(&dquot->dq_dirty);
++      init_waitqueue_head(&dquot->dq_wait_unused);
+       dquot->dq_sb = sb;
+       dquot->dq_type = type;
+       atomic_set(&dquot->dq_count, 1);
+@@ -732,13 +762,9 @@ static void drop_dquot_ref(struct super_block *sb, int type)
+ {
+       LIST_HEAD(tofree_head);
+-      /* We need to be guarded against prune_icache to reach all the
+-       * inodes - otherwise some can be on the local list of prune_icache */
+-      down(&iprune_sem);
+       down_write(&sb_dqopt(sb)->dqptr_sem);
+       remove_dquot_ref(sb, type, &tofree_head);
+       up_write(&sb_dqopt(sb)->dqptr_sem);
+-      up(&iprune_sem);
+       put_dquot_list(&tofree_head);
+ }
diff --git a/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch b/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch
new file mode 100644 (file)
index 0000000..4f3a3bc
--- /dev/null
@@ -0,0 +1,616 @@
+diff -rNpu linux-2.6.16.54-0.2.5/fs/dquot.c linux-2.6.16.54-0.2.5-quota/fs/dquot.c
+--- linux-2.6.16.54-0.2.5/fs/dquot.c   2008-03-18 15:48:26.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/dquot.c     2008-03-17 22:43:11.000000000 +0300
+@@ -1588,10 +1588,19 @@ int vfs_get_dqblk(struct super_block *sb
+ }
+ /* Generic routine for setting common part of quota structure */
+-static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
++static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
+ {
+       struct mem_dqblk *dm = &dquot->dq_dqb;
+       int check_blim = 0, check_ilim = 0;
++      struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
++
++      if ((di->dqb_valid & QIF_BLIMITS &&
++           (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
++            di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
++          (di->dqb_valid & QIF_ILIMITS &&
++           (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
++            di->dqb_isoftlimit > dqi->dqi_maxilimit)))
++              return -ERANGE;
+       spin_lock(&dq_data_lock);
+       if (di->dqb_valid & QIF_SPACE) {
+@@ -1623,7 +1632,7 @@ static void do_set_dqblk(struct dquot *d
+                       clear_bit(DQ_BLKS_B, &dquot->dq_flags);
+               }
+               else if (!(di->dqb_valid & QIF_BTIME))  /* Set grace only if user hasn't provided his own... */
+-                      dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
++                      dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
+       }
+       if (check_ilim) {
+               if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
+@@ -1631,7 +1640,7 @@ static void do_set_dqblk(struct dquot *d
+                       clear_bit(DQ_INODES_B, &dquot->dq_flags);
+               }
+               else if (!(di->dqb_valid & QIF_ITIME))  /* Set grace only if user hasn't provided his own... */
+-                      dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
++                      dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
+       }
+       if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
+               clear_bit(DQ_FAKE_B, &dquot->dq_flags);
+@@ -1639,21 +1648,24 @@ static void do_set_dqblk(struct dquot *d
+               set_bit(DQ_FAKE_B, &dquot->dq_flags);
+       spin_unlock(&dq_data_lock);
+       mark_dquot_dirty(dquot);
++
++      return 0;
+ }
+ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
+ {
+       struct dquot *dquot;
++      int rc;
+       mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+       if (!(dquot = dqget(sb, id, type))) {
+               mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+               return -ESRCH;
+       }
+-      do_set_dqblk(dquot, di);
++      rc = do_set_dqblk(dquot, di);
+       dqput(dquot);
+       mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+-      return 0;
++      return rc;
+ }
+ /* Generic routine for getting common part of quota file information */
+diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v1.c linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c
+--- linux-2.6.16.54-0.2.5/fs/quota_v1.c        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c  2008-03-17 22:42:47.000000000 +0300
+@@ -139,6 +139,9 @@ static int v1_read_file_info(struct supe
+               goto out;
+       }
+       ret = 0;
++      /* limits are stored as unsigned 32-bit data */
++      dqopt->info[type].dqi_maxblimit = 0xffffffff;
++      dqopt->info[type].dqi_maxilimit = 0xffffffff;
+       dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
+       dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
+ out:
+diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v2.c linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c
+--- linux-2.6.16.54-0.2.5/fs/quota_v2.c        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c  2008-03-18 11:58:02.000000000 +0300
+@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL");
+ typedef char *dqbuf_t;
+ #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
+-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
++#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
++                       sizeof(struct v2_disk_dqdbheader)))
++#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
++
++static const union v2_disk_dqblk emptydquot;
++static const union v2_disk_dqblk fakedquot[2] = {
++      {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
++      {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
++};
+-/* Check whether given file is really vfsv0 quotafile */
+-static int v2_check_quota_file(struct super_block *sb, int type)
++static inline uint v2_dqblksz(uint rev)
++{
++      uint sz;
++
++      REV_ASSERT(rev);
++
++      if (rev == 0)
++              sz = sizeof(struct v2_disk_dqblk_r0);
++      else
++              sz = sizeof(struct v2_disk_dqblk_r1);
++
++      return sz;
++}
++
++/* Number of quota entries in a block */
++static inline int v2_dqstrinblk(uint rev)
++{
++      return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
++}
++
++/* Get revision of a quota file, -1 if it does not look a quota file */
++static int v2_quota_file_revision(struct super_block *sb, int type)
+ {
+       struct v2_disk_dqheader dqhead;
+       ssize_t size;
+       static const uint quota_magics[] = V2_INITQMAGICS;
+-      static const uint quota_versions[] = V2_INITQVERSIONS;
++      static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
++      static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
+  
+       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
+       if (size != sizeof(struct v2_disk_dqheader)) {
+               printk("quota_v2: failed read expected=%zd got=%zd\n",
+                       sizeof(struct v2_disk_dqheader), size);
+-              return 0;
++              return -1;
+       }
+-      if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
+-          le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+-              return 0;
+-      return 1;
++      if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
++              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
++                      return 0;
++              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
++                      return 1;
++      }
++      return -1;
++}
++
++/* Check whether given file is really vfsv0 quotafile */
++static inline int v2_check_quota_file(struct super_block *sb, int type)
++{
++      return v2_quota_file_revision(sb, type) != -1;
+ }
+ /* Read information header from quota file */
+@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe
+       struct v2_disk_dqinfo dinfo;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+       ssize_t size;
++      int rev;
++
++      rev = v2_quota_file_revision(sb, type);
++      if (rev < 0) {
++              printk(KERN_WARNING "Second quota file check failed.\n");
++              return -1;
++      }
+       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+@@ -65,6 +110,16 @@ static int v2_read_file_info(struct supe
+       info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+       info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+       info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
++
++      info->u.v2_i.dqi_revision = rev;
++      if (rev == 0) {
++              info->dqi_maxblimit = 0xffffffffULL;
++              info->dqi_maxilimit = 0xffffffffULL;
++      } else {
++              info->dqi_maxblimit = 0xffffffffffffffffULL;
++              info->dqi_maxilimit = 0xffffffffffffffffULL;
++      }
++
+       return 0;
+ }
+@@ -94,29 +149,61 @@ static int v2_write_file_info(struct sup
+       return 0;
+ }
+-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
++static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
+ {
+-      m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
+-      m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
+-      m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
+-      m->dqb_itime = le64_to_cpu(d->dqb_itime);
+-      m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
+-      m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+-      m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+-      m->dqb_btime = le64_to_cpu(d->dqb_btime);
+-}
+-
+-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+-{
+-      d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
+-      d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
+-      d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
+-      d->dqb_itime = cpu_to_le64(m->dqb_itime);
+-      d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
+-      d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+-      d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+-      d->dqb_btime = cpu_to_le64(m->dqb_btime);
+-      d->dqb_id = cpu_to_le32(id);
++      REV_ASSERT(rev);
++
++      if (rev == 0) {
++              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
++              m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
++              m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
++              m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
++              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
++              m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
++              m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
++              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
++              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
++      } else {
++              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
++              m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
++              m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
++              m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
++              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
++              m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
++              m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
++              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
++              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
++      }
++}
++
++static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
++                      qid_t id, uint rev)
++{
++      REV_ASSERT(rev);
++
++      if (rev == 0) {
++              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
++              ddqblk->dqb_id = cpu_to_le32(id);
++              ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
++              ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
++              ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
++              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
++              ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
++              ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
++              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
++              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
++      } else {
++              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
++              ddqblk->dqb_id = cpu_to_le32(id);
++              ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
++              ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
++              ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
++              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
++              ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
++              ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
++              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
++              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
++      }
+ }
+ static dqbuf_t getdqbuf(void)
+@@ -268,10 +355,10 @@ static uint find_free_dqentry(struct dqu
+ {
+       struct super_block *sb = dquot->dq_sb;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
+-      uint blk, i;
++      uint blk, i, rev = info->u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
+       struct v2_disk_dqdbheader *dh;
+-      struct v2_disk_dqblk *ddquot;
+-      struct v2_disk_dqblk fakedquot;
++      union v2_disk_dqblk *ddquot;
+       dqbuf_t buf;
+       *err = 0;
+@@ -298,17 +385,18 @@ static uint find_free_dqentry(struct dqu
+               info->u.v2_i.dqi_free_entry = blk;
+               mark_info_dirty(sb, dquot->dq_type);
+       }
+-      if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
++      /* Block will be full? */
++      if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
+               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
+                       printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
+                       goto out_buf;
+               }
+       dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
+-      memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+       /* Find free structure in block */
+-      for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
++      for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
++           i++, ddquot = (char *)ddquot + dqblksz);
+ #ifdef __QUOTA_V2_PARANOIA
+-      if (i == V2_DQSTRINBLK) {
++      if (i == dqstrinblk) {
+               printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
+               *err = -EIO;
+               goto out_buf;
+@@ -318,7 +406,8 @@ static uint find_free_dqentry(struct dqu
+               printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
+               goto out_buf;
+       }
+-      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
++      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
++                      ((char *)ddquot - (char *)buf);
+       freedqbuf(buf);
+       return blk;
+ out_buf:
+@@ -392,7 +481,9 @@ static int v2_write_dquot(struct dquot *
+ {
+       int type = dquot->dq_type;
+       ssize_t ret;
+-      struct v2_disk_dqblk ddquot, empty;
++      union v2_disk_dqblk ddquot;
++      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev);
+       /* dq_off is guarded by dqio_sem */
+       if (!dquot->dq_off)
+@@ -401,18 +492,22 @@ static int v2_write_dquot(struct dquot *
+                       return ret;
+               }
+       spin_lock(&dq_data_lock);
+-      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
++      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
+       /* Argh... We may need to write structure full of zeroes but that would be
+        * treated as an empty place by the rest of the code. Format change would
+        * be definitely cleaner but the problems probably are not worth it */
+-      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+-      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+-              ddquot.dqb_itime = cpu_to_le64(1);
++      if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
++              if (rev == 0)
++                      ddquot.r0.dqb_itime = cpu_to_le64(1);
++              else
++                      ddquot.r1.dqb_itime = cpu_to_le64(1);
++      }
+       spin_unlock(&dq_data_lock);
+       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
+-            (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
+-      if (ret != sizeof(struct v2_disk_dqblk)) {
+-              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
++            (char *)&ddquot, dqblksz, dquot->dq_off);
++      if (ret != dqblksz) {
++              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
++                      dquot->dq_sb->s_id);
+               if (ret >= 0)
+                       ret = -ENOSPC;
+       }
+@@ -431,6 +526,7 @@ static int free_dqentry(struct dquot *dq
+       struct v2_disk_dqdbheader *dh;
+       dqbuf_t buf = getdqbuf();
+       int ret = 0;
++      uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+       if (!buf)
+               return -ENOMEM;
+@@ -456,8 +552,8 @@ static int free_dqentry(struct dquot *dq
+       }
+       else {
+               memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
+-                sizeof(struct v2_disk_dqblk));
+-              if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
++                v2_dqblksz(rev));
++              if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
+                       /* Insert will write block itself */
+                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
+                               printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
+@@ -529,41 +625,56 @@ static int v2_delete_dquot(struct dquot 
+       return remove_tree(dquot, &tmp, 0);
+ }
++static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
++{
++      __u32 dq_id;
++
++      REV_ASSERT(rev);
++
++      if (rev == 0)
++              dq_id = le32_to_cpu(ddquot->r0.dqb_id);
++      else
++              dq_id = le32_to_cpu(ddquot->r1.dqb_id);
++
++      return dq_id;
++}
++
+ /* Find entry in block */
+ static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
+ {
+       dqbuf_t buf = getdqbuf();
+       loff_t ret = 0;
+       int i;
+-      struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
++      union v2_disk_dqblk *ddquot = GETENTRIES(buf);
++      int type = dquot->dq_type;
++      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
+       if (!buf)
+               return -ENOMEM;
+-      if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
++
++      ret = read_blk(dquot->dq_sb, type, blk, buf);
++      if (ret < 0) {
+               printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+               goto out_buf;
+       }
+       if (dquot->dq_id)
+-              for (i = 0; i < V2_DQSTRINBLK &&
+-                   le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
++              for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
++                   i++, ddquot = (char *)ddquot + dqblksz);
+       else {  /* ID 0 as a bit more complicated searching... */
+-              struct v2_disk_dqblk fakedquot;
+-
+-              memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+-              for (i = 0; i < V2_DQSTRINBLK; i++)
+-                      if (!le32_to_cpu(ddquot[i].dqb_id) &&
+-                          memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
++              for (i = 0; i < dqstrinblk; i++, ddquot = (char *)ddquot+dqblksz)
++                      if (!dqid(ddquot, rev) &&
++                          memcmp(&emptydquot, ddquot, dqblksz))
+                               break;
+       }
+-      if (i == V2_DQSTRINBLK) {
++      if (i == dqstrinblk) {
+               printk(KERN_ERR "VFS: Quota for id %u referenced "
+                 "but not present.\n", dquot->dq_id);
+               ret = -EIO;
+               goto out_buf;
+       }
+       else
+-              ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
+-                v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
++              ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
+ out_buf:
+       freedqbuf(buf);
+       return ret;
+@@ -605,7 +716,7 @@ static int v2_read_dquot(struct dquot *d
+ {
+       int type = dquot->dq_type;
+       loff_t offset;
+-      struct v2_disk_dqblk ddquot, empty;
++      union v2_disk_dqblk ddquot;
+       int ret = 0;
+ #ifdef __QUOTA_V2_PARANOIA
+@@ -626,25 +737,30 @@ static int v2_read_dquot(struct dquot *d
+               ret = offset;
+       }
+       else {
++              uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
++                         dqi_revision;
++              uint  dqblksz = v2_dqblksz(rev);
+               dquot->dq_off = offset;
+-              if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+-                  (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
+-                  != sizeof(struct v2_disk_dqblk)) {
++              ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
++                                         (char *)&ddquot, dqblksz, offset);
++              if (ret != dqblksz) {
+                       if (ret >= 0)
+                               ret = -EIO;
+                       printk(KERN_ERR "VFS: Error while reading quota "
+                         "structure for id %u.\n", dquot->dq_id);
+-                      memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
++                      memset(&ddquot, 0, dqblksz);
+               }
+               else {
+                       ret = 0;
+                       /* We need to escape back all-zero structure */
+-                      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+-                      empty.dqb_itime = cpu_to_le64(1);
+-                      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+-                              ddquot.dqb_itime = 0;
++                      if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
++                              if (rev == 0)
++                                      ddquot.r0.dqb_itime = cpu_to_le64(0);
++                              else
++                                      ddquot.r1.dqb_itime = cpu_to_le64(0);
++                      }
+               }
+-              disk2memdqb(&dquot->dq_dqb, &ddquot);
++              disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
+               if (!dquot->dq_dqb.dqb_bhardlimit &&
+                       !dquot->dq_dqb.dqb_bsoftlimit &&
+                       !dquot->dq_dqb.dqb_ihardlimit &&
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h
+--- linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h     2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h       2008-03-17 23:39:54.000000000 +0300
+@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
+       unsigned int dqi_blocks;
+       unsigned int dqi_free_blk;
+       unsigned int dqi_free_entry;
++      unsigned int dqi_revision;
+ };
+ #endif /* _LINUX_DQBLK_V2_H */
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quota.h linux-2.6.16.54-0.2.5-quota/include/linux/quota.h
+--- linux-2.6.16.54-0.2.5/include/linux/quota.h        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/quota.h  2008-03-17 23:39:54.000000000 +0300
+@@ -148,12 +148,12 @@ struct if_dqinfo {
+  * Data for one user/group kept in memory
+  */
+ struct mem_dqblk {
+-      __u32 dqb_bhardlimit;   /* absolute limit on disk blks alloc */
+-      __u32 dqb_bsoftlimit;   /* preferred limit on disk blks */
++      qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
++      qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
+       qsize_t dqb_curspace;   /* current used space */
+-      __u32 dqb_ihardlimit;   /* absolute limit on allocated inodes */
+-      __u32 dqb_isoftlimit;   /* preferred inode limit */
+-      __u32 dqb_curinodes;    /* current # allocated inodes */
++      qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
++      qsize_t dqb_isoftlimit; /* preferred inode limit */
++      qsize_t dqb_curinodes;  /* current # allocated inodes */
+       time_t dqb_btime;       /* time limit for excessive disk use */
+       time_t dqb_itime;       /* time limit for excessive inode use */
+ };
+@@ -169,6 +169,8 @@ struct mem_dqinfo {
+       unsigned long dqi_flags;
+       unsigned int dqi_bgrace;
+       unsigned int dqi_igrace;
++      qsize_t dqi_maxblimit;
++      qsize_t dqi_maxilimit;
+       union {
+               struct v1_mem_dqinfo v1_i;
+               struct v2_mem_dqinfo v2_i;
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h
+--- linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h   2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h     2008-03-17 23:39:54.000000000 +0300
+@@ -16,28 +16,51 @@
+       0xd9c01927      /* GRPQUOTA */\
+ }
+-#define V2_INITQVERSIONS {\
++#define V2_INITQVERSIONS_R0 {\
+       0,              /* USRQUOTA */\
+       0               /* GRPQUOTA */\
+ }
++#define V2_INITQVERSIONS_R1 {\
++      1,              /* USRQUOTA */\
++      1               /* GRPQUOTA */\
++}
++
+ /*
+  * The following structure defines the format of the disk quota file
+  * (as it appears on disk) - the file is a radix tree whose leaves point
+  * to blocks of these structures.
+  */
+-struct v2_disk_dqblk {
++struct v2_disk_dqblk_r0 {
+       __le32 dqb_id;          /* id this quota applies to */
+       __le32 dqb_ihardlimit;  /* absolute limit on allocated inodes */
+       __le32 dqb_isoftlimit;  /* preferred inode limit */
+       __le32 dqb_curinodes;   /* current # allocated inodes */
+-      __le32 dqb_bhardlimit;  /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+-      __le32 dqb_bsoftlimit;  /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
++      __le32 dqb_bhardlimit;  /* absolute limit on disk space */
++      __le32 dqb_bsoftlimit;  /* preferred limit on disk space */
++      __le64 dqb_curspace;    /* current space occupied (in bytes) */
++      __le64 dqb_btime;       /* time limit for excessive disk use */
++      __le64 dqb_itime;       /* time limit for excessive inode use */
++};
++
++struct v2_disk_dqblk_r1 {
++      __le32 dqb_id;          /* id this quota applies to */
++      __le32 dqb_padding;     /* padding field */
++      __le64 dqb_ihardlimit;  /* absolute limit on allocated inodes */
++      __le64 dqb_isoftlimit;  /* preferred inode limit */
++      __le64 dqb_curinodes;   /* current # allocated inodes */
++      __le64 dqb_bhardlimit;  /* absolute limit on disk space */
++      __le64 dqb_bsoftlimit;  /* preferred limit on disk space */
+       __le64 dqb_curspace;    /* current space occupied (in bytes) */
+       __le64 dqb_btime;       /* time limit for excessive disk use */
+       __le64 dqb_itime;       /* time limit for excessive inode use */
+ };
++union v2_disk_dqblk {
++      struct v2_disk_dqblk_r0 r0;
++      struct v2_disk_dqblk_r1 r1;
++};
++
+ /*
+  * Here are header structures as written on disk and their in-memory copies
+  */
+@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
+ /*
+  *  Structure of header of block with quota structures. It is padded to 16 bytes so
+- *  there will be space for exactly 21 quota-entries in a block
++ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
+  */
+ struct v2_disk_dqdbheader {
+       __le32 dqdh_next_free;  /* Number of next block with free entry */
+@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
+ #define V2_DQBLKSIZE  (1 << V2_DQBLKSIZE_BITS)        /* Size of block with quota structures */
+ #define V2_DQTREEOFF  1               /* Offset of tree in file in blocks */
+ #define V2_DQTREEDEPTH        4               /* Depth of quota tree */
+-#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))     /* Number of entries in one blocks */
+ #endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/lustre/kernel_patches/patches/quota-large-limits-sles10.patch b/lustre/kernel_patches/patches/quota-large-limits-sles10.patch
new file mode 100644 (file)
index 0000000..fcef1c2
--- /dev/null
@@ -0,0 +1,616 @@
+diff -rNpu linux-2.6.16.54-0.2.5/fs/dquot.c linux-2.6.16.54-0.2.5-quota/fs/dquot.c
+--- linux-2.6.16.54-0.2.5/fs/dquot.c   2008-03-18 15:48:26.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/dquot.c     2008-03-17 22:43:11.000000000 +0300
+@@ -1588,10 +1588,19 @@ int vfs_get_dqblk(struct super_block *sb
+ }
+ /* Generic routine for setting common part of quota structure */
+-static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
++static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
+ {
+       struct mem_dqblk *dm = &dquot->dq_dqb;
+       int check_blim = 0, check_ilim = 0;
++      struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
++
++      if ((di->dqb_valid & QIF_BLIMITS &&
++           (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
++            di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
++          (di->dqb_valid & QIF_ILIMITS &&
++           (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
++            di->dqb_isoftlimit > dqi->dqi_maxilimit)))
++              return -ERANGE;
+       spin_lock(&dq_data_lock);
+       if (di->dqb_valid & QIF_SPACE) {
+@@ -1623,7 +1632,7 @@ static void do_set_dqblk(struct dquot *d
+                       clear_bit(DQ_BLKS_B, &dquot->dq_flags);
+               }
+               else if (!(di->dqb_valid & QIF_BTIME))  /* Set grace only if user hasn't provided his own... */
+-                      dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
++                      dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
+       }
+       if (check_ilim) {
+               if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
+@@ -1631,7 +1640,7 @@ static void do_set_dqblk(struct dquot *d
+                       clear_bit(DQ_INODES_B, &dquot->dq_flags);
+               }
+               else if (!(di->dqb_valid & QIF_ITIME))  /* Set grace only if user hasn't provided his own... */
+-                      dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
++                      dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
+       }
+       if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
+               clear_bit(DQ_FAKE_B, &dquot->dq_flags);
+@@ -1639,21 +1648,24 @@ static void do_set_dqblk(struct dquot *d
+               set_bit(DQ_FAKE_B, &dquot->dq_flags);
+       spin_unlock(&dq_data_lock);
+       mark_dquot_dirty(dquot);
++
++      return 0;
+ }
+ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
+ {
+       struct dquot *dquot;
++      int rc;
+       down(&sb_dqopt(sb)->dqonoff_sem);
+       if (!(dquot = dqget(sb, id, type))) {
+               up(&sb_dqopt(sb)->dqonoff_sem);
+               return -ESRCH;
+       }
+-      do_set_dqblk(dquot, di);
++      rc = do_set_dqblk(dquot, di);
+       dqput(dquot);
+       up(&sb_dqopt(sb)->dqonoff_sem);
+-      return 0;
++      return rc;
+ }
+ /* Generic routine for getting common part of quota file information */
+diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v1.c linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c
+--- linux-2.6.16.54-0.2.5/fs/quota_v1.c        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c  2008-03-17 22:42:47.000000000 +0300
+@@ -139,6 +139,9 @@ static int v1_read_file_info(struct supe
+               goto out;
+       }
+       ret = 0;
++      /* limits are stored as unsigned 32-bit data */
++      dqopt->info[type].dqi_maxblimit = 0xffffffff;
++      dqopt->info[type].dqi_maxilimit = 0xffffffff;
+       dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
+       dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
+ out:
+diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v2.c linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c
+--- linux-2.6.16.54-0.2.5/fs/quota_v2.c        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c  2008-03-18 11:58:02.000000000 +0300
+@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL");
+ typedef char *dqbuf_t;
+ #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
+-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
++#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
++                       sizeof(struct v2_disk_dqdbheader)))
++#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
++
++static const union v2_disk_dqblk emptydquot;
++static const union v2_disk_dqblk fakedquot[2] = {
++      {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
++      {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
++};
+-/* Check whether given file is really vfsv0 quotafile */
+-static int v2_check_quota_file(struct super_block *sb, int type)
++static inline uint v2_dqblksz(uint rev)
++{
++      uint sz;
++
++      REV_ASSERT(rev);
++
++      if (rev == 0)
++              sz = sizeof(struct v2_disk_dqblk_r0);
++      else
++              sz = sizeof(struct v2_disk_dqblk_r1);
++
++      return sz;
++}
++
++/* Number of quota entries in a block */
++static inline int v2_dqstrinblk(uint rev)
++{
++      return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
++}
++
++/* Get revision of a quota file, -1 if it does not look a quota file */
++static int v2_quota_file_revision(struct super_block *sb, int type)
+ {
+       struct v2_disk_dqheader dqhead;
+       ssize_t size;
+       static const uint quota_magics[] = V2_INITQMAGICS;
+-      static const uint quota_versions[] = V2_INITQVERSIONS;
++      static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
++      static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
+  
+       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
+       if (size != sizeof(struct v2_disk_dqheader)) {
+               printk("quota_v2: failed read expected=%zd got=%zd\n",
+                       sizeof(struct v2_disk_dqheader), size);
+-              return 0;
++              return -1;
+       }
+-      if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
+-          le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
+-              return 0;
+-      return 1;
++      if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
++              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
++                      return 0;
++              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
++                      return 1;
++      }
++      return -1;
++}
++
++/* Check whether given file is really vfsv0 quotafile */
++static inline int v2_check_quota_file(struct super_block *sb, int type)
++{
++      return v2_quota_file_revision(sb, type) != -1;
+ }
+ /* Read information header from quota file */
+@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe
+       struct v2_disk_dqinfo dinfo;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+       ssize_t size;
++      int rev;
++
++      rev = v2_quota_file_revision(sb, type);
++      if (rev < 0) {
++              printk(KERN_WARNING "Second quota file check failed.\n");
++              return -1;
++      }
+       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
+@@ -65,6 +110,16 @@ static int v2_read_file_info(struct supe
+       info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+       info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+       info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
++
++      info->u.v2_i.dqi_revision = rev;
++      if (rev == 0) {
++              info->dqi_maxblimit = 0xffffffffULL;
++              info->dqi_maxilimit = 0xffffffffULL;
++      } else {
++              info->dqi_maxblimit = 0xffffffffffffffffULL;
++              info->dqi_maxilimit = 0xffffffffffffffffULL;
++      }
++
+       return 0;
+ }
+@@ -94,29 +149,61 @@ static int v2_write_file_info(struct sup
+       return 0;
+ }
+-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
++static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
+ {
+-      m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
+-      m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
+-      m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
+-      m->dqb_itime = le64_to_cpu(d->dqb_itime);
+-      m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
+-      m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+-      m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+-      m->dqb_btime = le64_to_cpu(d->dqb_btime);
+-}
+-
+-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+-{
+-      d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
+-      d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
+-      d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
+-      d->dqb_itime = cpu_to_le64(m->dqb_itime);
+-      d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
+-      d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+-      d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+-      d->dqb_btime = cpu_to_le64(m->dqb_btime);
+-      d->dqb_id = cpu_to_le32(id);
++      REV_ASSERT(rev);
++
++      if (rev == 0) {
++              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
++              m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
++              m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
++              m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
++              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
++              m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
++              m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
++              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
++              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
++      } else {
++              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
++              m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
++              m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
++              m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
++              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
++              m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
++              m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
++              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
++              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
++      }
++}
++
++static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
++                      qid_t id, uint rev)
++{
++      REV_ASSERT(rev);
++
++      if (rev == 0) {
++              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
++              ddqblk->dqb_id = cpu_to_le32(id);
++              ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
++              ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
++              ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
++              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
++              ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
++              ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
++              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
++              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
++      } else {
++              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
++              ddqblk->dqb_id = cpu_to_le32(id);
++              ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
++              ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
++              ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
++              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
++              ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
++              ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
++              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
++              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
++      }
+ }
+ static dqbuf_t getdqbuf(void)
+@@ -268,10 +355,10 @@ static uint find_free_dqentry(struct dqu
+ {
+       struct super_block *sb = dquot->dq_sb;
+       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
+-      uint blk, i;
++      uint blk, i, rev = info->u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
+       struct v2_disk_dqdbheader *dh;
+-      struct v2_disk_dqblk *ddquot;
+-      struct v2_disk_dqblk fakedquot;
++      union v2_disk_dqblk *ddquot;
+       dqbuf_t buf;
+       *err = 0;
+@@ -298,17 +385,18 @@ static uint find_free_dqentry(struct dqu
+               info->u.v2_i.dqi_free_entry = blk;
+               mark_info_dirty(sb, dquot->dq_type);
+       }
+-      if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
++      /* Block will be full? */
++      if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
+               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
+                       printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
+                       goto out_buf;
+               }
+       dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
+-      memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+       /* Find free structure in block */
+-      for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
++      for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
++           i++, ddquot = (char *)ddquot + dqblksz);
+ #ifdef __QUOTA_V2_PARANOIA
+-      if (i == V2_DQSTRINBLK) {
++      if (i == dqstrinblk) {
+               printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
+               *err = -EIO;
+               goto out_buf;
+@@ -318,7 +406,8 @@ static uint find_free_dqentry(struct dqu
+               printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
+               goto out_buf;
+       }
+-      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
++      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
++                      ((char *)ddquot - (char *)buf);
+       freedqbuf(buf);
+       return blk;
+ out_buf:
+@@ -392,7 +481,9 @@ static int v2_write_dquot(struct dquot *
+ {
+       int type = dquot->dq_type;
+       ssize_t ret;
+-      struct v2_disk_dqblk ddquot, empty;
++      union v2_disk_dqblk ddquot;
++      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev);
+       /* dq_off is guarded by dqio_sem */
+       if (!dquot->dq_off)
+@@ -401,18 +492,22 @@ static int v2_write_dquot(struct dquot *
+                       return ret;
+               }
+       spin_lock(&dq_data_lock);
+-      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
++      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
+       /* Argh... We may need to write structure full of zeroes but that would be
+        * treated as an empty place by the rest of the code. Format change would
+        * be definitely cleaner but the problems probably are not worth it */
+-      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+-      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+-              ddquot.dqb_itime = cpu_to_le64(1);
++      if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
++              if (rev == 0)
++                      ddquot.r0.dqb_itime = cpu_to_le64(1);
++              else
++                      ddquot.r1.dqb_itime = cpu_to_le64(1);
++      }
+       spin_unlock(&dq_data_lock);
+       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
+-            (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
+-      if (ret != sizeof(struct v2_disk_dqblk)) {
+-              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
++            (char *)&ddquot, dqblksz, dquot->dq_off);
++      if (ret != dqblksz) {
++              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
++                      dquot->dq_sb->s_id);
+               if (ret >= 0)
+                       ret = -ENOSPC;
+       }
+@@ -431,6 +526,7 @@ static int free_dqentry(struct dquot *dq
+       struct v2_disk_dqdbheader *dh;
+       dqbuf_t buf = getdqbuf();
+       int ret = 0;
++      uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
+       if (!buf)
+               return -ENOMEM;
+@@ -456,8 +552,8 @@ static int free_dqentry(struct dquot *dq
+       }
+       else {
+               memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
+-                sizeof(struct v2_disk_dqblk));
+-              if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
++                v2_dqblksz(rev));
++              if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
+                       /* Insert will write block itself */
+                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
+                               printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
+@@ -529,41 +625,56 @@ static int v2_delete_dquot(struct dquot 
+       return remove_tree(dquot, &tmp, 0);
+ }
++static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
++{
++      __u32 dq_id;
++
++      REV_ASSERT(rev);
++
++      if (rev == 0)
++              dq_id = le32_to_cpu(ddquot->r0.dqb_id);
++      else
++              dq_id = le32_to_cpu(ddquot->r1.dqb_id);
++
++      return dq_id;
++}
++
+ /* Find entry in block */
+ static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
+ {
+       dqbuf_t buf = getdqbuf();
+       loff_t ret = 0;
+       int i;
+-      struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
++      union v2_disk_dqblk *ddquot = GETENTRIES(buf);
++      int type = dquot->dq_type;
++      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
++      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
+       if (!buf)
+               return -ENOMEM;
+-      if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
++
++      ret = read_blk(dquot->dq_sb, type, blk, buf);
++      if (ret < 0) {
+               printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+               goto out_buf;
+       }
+       if (dquot->dq_id)
+-              for (i = 0; i < V2_DQSTRINBLK &&
+-                   le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
++              for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
++                   i++, ddquot = (char *)ddquot + dqblksz);
+       else {  /* ID 0 as a bit more complicated searching... */
+-              struct v2_disk_dqblk fakedquot;
+-
+-              memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
+-              for (i = 0; i < V2_DQSTRINBLK; i++)
+-                      if (!le32_to_cpu(ddquot[i].dqb_id) &&
+-                          memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
++              for (i = 0; i < dqstrinblk; i++, ddquot = (char *)ddquot+dqblksz)
++                      if (!dqid(ddquot, rev) &&
++                          memcmp(&emptydquot, ddquot, dqblksz))
+                               break;
+       }
+-      if (i == V2_DQSTRINBLK) {
++      if (i == dqstrinblk) {
+               printk(KERN_ERR "VFS: Quota for id %u referenced "
+                 "but not present.\n", dquot->dq_id);
+               ret = -EIO;
+               goto out_buf;
+       }
+       else
+-              ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
+-                v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
++              ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
+ out_buf:
+       freedqbuf(buf);
+       return ret;
+@@ -605,7 +716,7 @@ static int v2_read_dquot(struct dquot *d
+ {
+       int type = dquot->dq_type;
+       loff_t offset;
+-      struct v2_disk_dqblk ddquot, empty;
++      union v2_disk_dqblk ddquot;
+       int ret = 0;
+ #ifdef __QUOTA_V2_PARANOIA
+@@ -626,25 +737,30 @@ static int v2_read_dquot(struct dquot *d
+               ret = offset;
+       }
+       else {
++              uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
++                         dqi_revision;
++              uint  dqblksz = v2_dqblksz(rev);
+               dquot->dq_off = offset;
+-              if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
+-                  (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
+-                  != sizeof(struct v2_disk_dqblk)) {
++              ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
++                                         (char *)&ddquot, dqblksz, offset);
++              if (ret != dqblksz) {
+                       if (ret >= 0)
+                               ret = -EIO;
+                       printk(KERN_ERR "VFS: Error while reading quota "
+                         "structure for id %u.\n", dquot->dq_id);
+-                      memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
++                      memset(&ddquot, 0, dqblksz);
+               }
+               else {
+                       ret = 0;
+                       /* We need to escape back all-zero structure */
+-                      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+-                      empty.dqb_itime = cpu_to_le64(1);
+-                      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
+-                              ddquot.dqb_itime = 0;
++                      if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
++                              if (rev == 0)
++                                      ddquot.r0.dqb_itime = cpu_to_le64(0);
++                              else
++                                      ddquot.r1.dqb_itime = cpu_to_le64(0);
++                      }
+               }
+-              disk2memdqb(&dquot->dq_dqb, &ddquot);
++              disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
+               if (!dquot->dq_dqb.dqb_bhardlimit &&
+                       !dquot->dq_dqb.dqb_bsoftlimit &&
+                       !dquot->dq_dqb.dqb_ihardlimit &&
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h
+--- linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h     2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h       2008-03-17 23:39:54.000000000 +0300
+@@ -21,6 +21,7 @@ struct v2_mem_dqinfo {
+       unsigned int dqi_blocks;
+       unsigned int dqi_free_blk;
+       unsigned int dqi_free_entry;
++      unsigned int dqi_revision;
+ };
+ #endif /* _LINUX_DQBLK_V2_H */
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quota.h linux-2.6.16.54-0.2.5-quota/include/linux/quota.h
+--- linux-2.6.16.54-0.2.5/include/linux/quota.h        2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/quota.h  2008-03-17 23:39:54.000000000 +0300
+@@ -148,12 +148,12 @@ struct if_dqinfo {
+  * Data for one user/group kept in memory
+  */
+ struct mem_dqblk {
+-      __u32 dqb_bhardlimit;   /* absolute limit on disk blks alloc */
+-      __u32 dqb_bsoftlimit;   /* preferred limit on disk blks */
++      qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
++      qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
+       qsize_t dqb_curspace;   /* current used space */
+-      __u32 dqb_ihardlimit;   /* absolute limit on allocated inodes */
+-      __u32 dqb_isoftlimit;   /* preferred inode limit */
+-      __u32 dqb_curinodes;    /* current # allocated inodes */
++      qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
++      qsize_t dqb_isoftlimit; /* preferred inode limit */
++      qsize_t dqb_curinodes;  /* current # allocated inodes */
+       time_t dqb_btime;       /* time limit for excessive disk use */
+       time_t dqb_itime;       /* time limit for excessive inode use */
+ };
+@@ -169,6 +169,8 @@ struct mem_dqinfo {
+       unsigned long dqi_flags;
+       unsigned int dqi_bgrace;
+       unsigned int dqi_igrace;
++      qsize_t dqi_maxblimit;
++      qsize_t dqi_maxilimit;
+       union {
+               struct v1_mem_dqinfo v1_i;
+               struct v2_mem_dqinfo v2_i;
+diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h
+--- linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h   2006-03-20 08:53:29.000000000 +0300
++++ linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h     2008-03-17 23:39:54.000000000 +0300
+@@ -16,28 +16,51 @@
+       0xd9c01927      /* GRPQUOTA */\
+ }
+-#define V2_INITQVERSIONS {\
++#define V2_INITQVERSIONS_R0 {\
+       0,              /* USRQUOTA */\
+       0               /* GRPQUOTA */\
+ }
++#define V2_INITQVERSIONS_R1 {\
++      1,              /* USRQUOTA */\
++      1               /* GRPQUOTA */\
++}
++
+ /*
+  * The following structure defines the format of the disk quota file
+  * (as it appears on disk) - the file is a radix tree whose leaves point
+  * to blocks of these structures.
+  */
+-struct v2_disk_dqblk {
++struct v2_disk_dqblk_r0 {
+       __le32 dqb_id;          /* id this quota applies to */
+       __le32 dqb_ihardlimit;  /* absolute limit on allocated inodes */
+       __le32 dqb_isoftlimit;  /* preferred inode limit */
+       __le32 dqb_curinodes;   /* current # allocated inodes */
+-      __le32 dqb_bhardlimit;  /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+-      __le32 dqb_bsoftlimit;  /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
++      __le32 dqb_bhardlimit;  /* absolute limit on disk space */
++      __le32 dqb_bsoftlimit;  /* preferred limit on disk space */
++      __le64 dqb_curspace;    /* current space occupied (in bytes) */
++      __le64 dqb_btime;       /* time limit for excessive disk use */
++      __le64 dqb_itime;       /* time limit for excessive inode use */
++};
++
++struct v2_disk_dqblk_r1 {
++      __le32 dqb_id;          /* id this quota applies to */
++      __le32 dqb_padding;     /* padding field */
++      __le64 dqb_ihardlimit;  /* absolute limit on allocated inodes */
++      __le64 dqb_isoftlimit;  /* preferred inode limit */
++      __le64 dqb_curinodes;   /* current # allocated inodes */
++      __le64 dqb_bhardlimit;  /* absolute limit on disk space */
++      __le64 dqb_bsoftlimit;  /* preferred limit on disk space */
+       __le64 dqb_curspace;    /* current space occupied (in bytes) */
+       __le64 dqb_btime;       /* time limit for excessive disk use */
+       __le64 dqb_itime;       /* time limit for excessive inode use */
+ };
++union v2_disk_dqblk {
++      struct v2_disk_dqblk_r0 r0;
++      struct v2_disk_dqblk_r1 r1;
++};
++
+ /*
+  * Here are header structures as written on disk and their in-memory copies
+  */
+@@ -59,7 +82,7 @@ struct v2_disk_dqinfo {
+ /*
+  *  Structure of header of block with quota structures. It is padded to 16 bytes so
+- *  there will be space for exactly 21 quota-entries in a block
++ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
+  */
+ struct v2_disk_dqdbheader {
+       __le32 dqdh_next_free;  /* Number of next block with free entry */
+@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader {
+ #define V2_DQBLKSIZE  (1 << V2_DQBLKSIZE_BITS)        /* Size of block with quota structures */
+ #define V2_DQTREEOFF  1               /* Offset of tree in file in blocks */
+ #define V2_DQTREEDEPTH        4               /* Depth of quota tree */
+-#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))     /* Number of entries in one blocks */
+ #endif /* _LINUX_QUOTAIO_V2_H */
index 9e7a8ed..8495289 100644 (file)
@@ -19,3 +19,4 @@ raid5-merge-ios-rhel5.patch
 raid5-zerocopy-rhel5.patch
 md-rebuild-policy.patch
 jbd-journal-chksum-2.6.18-vanilla.patch
+quota-large-limits-rhel5.patch
index 72adc21..070f943 100644 (file)
@@ -10,7 +10,9 @@ export_symbol_numa-2.6-fc5.patch
 blkdev_tunables-2.6-sles10.patch
 jbd-stats-2.6-sles10.patch
 i_filter_data.patch
+quota-fix-oops-in-invalidate_dquots.patch
 jbd-journal-chksum-2.6-sles10.patch
 proc-sleep-2.6.16-sles10.patch 
 export-nr_free_buffer_pages.patch 
 fmode-exec-2.6-sles10.patch
+quota-large-limits-sles10.patch
index eba2991..fe32803 100644 (file)
@@ -11,3 +11,4 @@ export-2.6.18-vanilla.patch
 8kstack-2.6.12.patch
 export-show_task-2.6.18-vanilla.patch 
 sd_iostats-2.6.22-vanilla.patch
+quota-large-limits-rhel5.patch
index 81d357b..49fd0bf 100644 (file)
@@ -51,7 +51,6 @@
 #include <lustre_sec.h>
 #include "ldlm_internal.h"
 
-
 /* @priority: if non-zero, move the selected to the list head
  * @create: if zero, only search in existed connections
  */
@@ -2166,33 +2165,37 @@ void target_committed_to_req(struct ptlrpc_request *req)
 
 EXPORT_SYMBOL(target_committed_to_req);
 
-#ifdef HAVE_QUOTA_SUPPORT
 int target_handle_qc_callback(struct ptlrpc_request *req)
 {
         struct obd_quotactl *oqctl;
         struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
 
         oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
-        if (oqctl == NULL)
+        if (oqctl == NULL) {
+                CERROR("Can't unpack obd_quotactl\n");
                 RETURN(-EPROTO);
+        }
 
         cli->cl_qchk_stat = oqctl->qc_stat;
 
         return 0;
 }
 
+#ifdef HAVE_QUOTA_SUPPORT
 int target_handle_dqacq_callback(struct ptlrpc_request *req)
 {
 #ifdef __KERNEL__
         struct obd_device *obd = req->rq_export->exp_obd;
         struct obd_device *master_obd;
+        struct obd_device_target *obt;
         struct lustre_quota_ctxt *qctxt;
-        struct qunit_data *qdata;
-        void* rep;
-        struct qunit_data_old *qdata_old;
+        struct qunit_data *qdata = NULL;
         int rc = 0;
         ENTRY;
 
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_DROP_QUOTA_REQ))
+                RETURN(rc);
+
         rc = req_capsule_server_pack(&req->rq_pill);
         if (rc) {
                 CERROR("packing reply failed!: rc = %d\n", rc);
@@ -2201,52 +2204,73 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
 
         LASSERT(req->rq_export);
 
-        /* fixed for bug10707 */
-        if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
-            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
-                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
-                rep = req_capsule_server_get(&req->rq_pill,
-                                             &RMF_QUNIT_DATA);
-                LASSERT(rep);
-                qdata = req_capsule_client_swab_get(&req->rq_pill,
-                                                    &RMF_QUNIT_DATA,
-                                          (void*)lustre_swab_qdata);
-        } else {
-                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
-                rep = req_capsule_server_get(&req->rq_pill, &RMF_QUNIT_DATA);
-                LASSERT(rep);
-                qdata_old = req_capsule_client_swab_get(&req->rq_pill,
-                                                        &RMF_QUNIT_DATA,
-                                           (void*)lustre_swab_qdata_old);
-                qdata = lustre_quota_old_to_new(qdata_old);
+        OBD_ALLOC(qdata, sizeof(struct qunit_data));
+        if (!qdata)
+                RETURN(-ENOMEM);
+        rc = quota_get_qdata(req, qdata, QUOTA_REQUEST, QUOTA_EXPORT);
+        if (rc < 0) {
+                CDEBUG(D_ERROR, "Can't unpack qunit_data(rc: %d)\n", rc);
+                GOTO(out, rc);
         }
 
-        if (qdata == NULL)
-                RETURN(-EPROTO);
-
         /* we use the observer */
-        LASSERT(obd->obd_observer && obd->obd_observer->obd_observer);
+        if (!obd->obd_observer || !obd->obd_observer->obd_observer) {
+                CERROR("Can't find the observer, it is recovering\n");
+                req->rq_status = -EIO;
+                GOTO(send_reply, rc = -EIO);
+        }
+
         master_obd = obd->obd_observer->obd_observer;
-        qctxt = &master_obd->u.obt.obt_qctxt;
+        obt = &master_obd->u.obt;
+        qctxt = &obt->obt_qctxt;
+
+        if (!qctxt->lqc_setup || !qctxt->lqc_valid) {
+                /* quota_type has not been processed yet, return EAGAIN
+                 * until we know whether or not quotas are supposed to
+                 * be enabled */
+                CDEBUG(D_QUOTA, "quota_type not processed yet, return "
+                       "-EAGAIN\n");
+                req->rq_status = -EAGAIN;
+                rc = ptlrpc_reply(req);
+                GOTO(out, rc);
+        }
+
+        down_read(&obt->obt_rwsem);
+        if (qctxt->lqc_lqs_hash == NULL) {
+                up_read(&obt->obt_rwsem);
+                /* quota_type has not been processed yet, return EAGAIN
+                 * until we know whether or not quotas are supposed to
+                 * be enabled */
+                CDEBUG(D_QUOTA, "quota_ctxt is not ready yet, return "
+                       "-EAGAIN\n");
+                req->rq_status = -EAGAIN;
+                rc = ptlrpc_reply(req);
+                GOTO(out, rc);
+        }
 
         LASSERT(qctxt->lqc_handler);
         rc = qctxt->lqc_handler(master_obd, qdata,
                                 lustre_msg_get_opc(req->rq_reqmsg));
+        up_read(&obt->obt_rwsem);
         if (rc && rc != -EDQUOT)
                 CDEBUG(rc == -EBUSY  ? D_QUOTA : D_ERROR,
                        "dqacq failed! (rc:%d)\n", rc);
+        req->rq_status = rc;
 
-        /* the qd_count might be changed in lqc_handler */
-        if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) &&
-            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
-                memcpy(rep, qdata, sizeof(*qdata));
-        } else {
-                qdata_old = lustre_quota_new_to_old(qdata);
-                memcpy(rep, qdata_old, sizeof(*qdata_old));
+        /* there are three forms of qunit(historic causes), so we need to
+         * adjust the same form to different forms slaves needed */
+        rc = quota_copy_qdata(req, qdata, QUOTA_REPLY, QUOTA_EXPORT);
+        if (rc < 0) {
+                CDEBUG(D_ERROR, "Can't pack qunit_data(rc: %d)\n", rc);
+                GOTO(out, rc);
         }
-        req->rq_status = rc;
-        rc = ptlrpc_reply(req);
 
+        /* Block the quota req. b=14840 */
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_BLOCK_QUOTA_REQ, obd_timeout);
+send_reply:
+        rc = ptlrpc_reply(req);
+out:
+        OBD_FREE(qdata, sizeof(struct qunit_data));
         RETURN(rc);
 #else
         return 0;
index 27fa1af..50d4504 100644 (file)
@@ -268,7 +268,8 @@ int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
         }
         lock->l_destroyed = 1;
 
-        if (lock->l_export && lock->l_export->exp_lock_hash)
+        if (lock->l_export && lock->l_export->exp_lock_hash &&
+            !hlist_unhashed(&lock->l_exp_hash))
                 lustre_hash_del(lock->l_export->exp_lock_hash,
                                 &lock->l_remote_handle, &lock->l_exp_hash);
 
index 35f3bcf..b809cba 100644 (file)
@@ -1882,7 +1882,8 @@ void ldlm_revoke_lock_cb(void *obj, void *data)
         LASSERT(!lock->l_blocking_lock);
 
         lock->l_flags |= LDLM_FL_AST_SENT;
-        if (lock->l_export && lock->l_export->exp_lock_hash)
+        if (lock->l_export && lock->l_export->exp_lock_hash &&
+            !hlist_unhashed(&lock->l_exp_hash))
                 lustre_hash_del(lock->l_export->exp_lock_hash,
                                 &lock->l_remote_handle, &lock->l_exp_hash);
         list_add_tail(&lock->l_rk_ast, rpc_list);
index 116d0c9..326a8c0 100644 (file)
@@ -22,7 +22,7 @@ LUSTRE_LIBS = libllite.a \
               $(top_builddir)/lustre/obdclass/liblustreclass.a \
               $(top_builddir)/lustre/lvfs/liblvfs.a
 
-if QUOTA
+if LIBLUSTRE
 QUOTA_LIBS = $(top_builddir)/lustre/quota/libquota.a
 endif
 
index 07b7ddd..38fb136 100644 (file)
@@ -320,7 +320,7 @@ int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir)
                 }
         }
 
-        rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL);
+        rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL);
         OBDO_FREE(oa);
         if (rc)
                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
index 0de50cf..fc9bc5d 100644 (file)
@@ -77,14 +77,14 @@ void *inter_module_get(char *arg)
                 return ldlm_namespace_cleanup;
         else if (!strcmp(arg, "ldlm_replay_locks"))
                 return ldlm_replay_locks;
-#ifdef HAVE_QUOTA_SUPPORT
-        else if (!strcmp(arg, "osc_quota_interface"))
-                return &osc_quota_interface;
         else if (!strcmp(arg, "mdc_quota_interface"))
                 return &mdc_quota_interface;
+        else if (!strcmp(arg, "lmv_quota_interface"))
+                return &lmv_quota_interface;
+        else if (!strcmp(arg, "osc_quota_interface"))
+                return &osc_quota_interface;
         else if (!strcmp(arg, "lov_quota_interface"))
                 return &lov_quota_interface;
-#endif
         else
                 return NULL;
 }
index 061f82e..4366c86 100644 (file)
@@ -500,16 +500,6 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
         RETURN(rc);
 }
 
-#define QCTL_COPY(out, in)              \
-do {                                    \
-        Q_COPY(out, in, qc_cmd);        \
-        Q_COPY(out, in, qc_type);       \
-        Q_COPY(out, in, qc_id);         \
-        Q_COPY(out, in, qc_stat);       \
-        Q_COPY(out, in, qc_dqinfo);     \
-        Q_COPY(out, in, qc_dqblk);      \
-} while (0)
-
 int ll_send_mgc_param(struct obd_export *mgc, char *string)
 {
         struct mgs_send_param *msp;
@@ -1011,7 +1001,8 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 struct obd_quotactl *oqctl;
                 int rc, error = 0;
 
-                if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+                    sbi->ll_flags & LL_SBI_RMT_CLIENT)
                         RETURN(-EPERM);
 
                 OBD_ALLOC_PTR(oqctl);
@@ -1035,7 +1026,8 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 struct if_quotacheck *check;
                 int rc;
 
-                if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+                    sbi->ll_flags & LL_SBI_RMT_CLIENT)
                         RETURN(-EPERM);
 
                 OBD_ALLOC_PTR(check);
@@ -1063,47 +1055,39 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 OBD_FREE_PTR(check);
                 RETURN(rc);
         }
-#ifdef HAVE_QUOTA_SUPPORT
         case OBD_IOC_QUOTACTL: {
                 struct if_quotactl *qctl;
-                struct obd_quotactl *oqctl;
-
-                int cmd, type, id, rc = 0;
+                int cmd, type, id, valid, rc = 0;
 
                 OBD_ALLOC_PTR(qctl);
                 if (!qctl)
                         RETURN(-ENOMEM);
 
-                OBD_ALLOC_PTR(oqctl);
-                if (!oqctl) {
-                        OBD_FREE_PTR(qctl);
-                        RETURN(-ENOMEM);
-                }
                 if (copy_from_user(qctl, (void *)arg, sizeof(*qctl)))
                         GOTO(out_quotactl, rc = -EFAULT);
 
                 cmd = qctl->qc_cmd;
                 type = qctl->qc_type;
                 id = qctl->qc_id;
+                valid = qctl->qc_valid;
+
                 switch (cmd) {
+                case LUSTRE_Q_INVALIDATE:
+                case LUSTRE_Q_FINVALIDATE:
                 case Q_QUOTAON:
                 case Q_QUOTAOFF:
                 case Q_SETQUOTA:
                 case Q_SETINFO:
-                        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                        if (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+                            sbi->ll_flags & LL_SBI_RMT_CLIENT)
                                 GOTO(out_quotactl, rc = -EPERM);
                         break;
                 case Q_GETQUOTA:
                         if (((type == USRQUOTA && current->euid != id) ||
                              (type == GRPQUOTA && !in_egroup_p(id))) &&
-                            !cfs_capable(CFS_CAP_SYS_ADMIN))
+                            (!cfs_capable(CFS_CAP_SYS_ADMIN) ||
+                             sbi->ll_flags & LL_SBI_RMT_CLIENT))
                                 GOTO(out_quotactl, rc = -EPERM);
-
-                        /* XXX: dqb_valid is borrowed as a flag to mark that
-                         *      only mds quota is wanted */
-                        if (qctl->qc_dqblk.dqb_valid)
-                                qctl->obd_uuid = sbi->ll_md_exp->exp_obd->
-                                                        u.cli.cl_target_uuid;
                         break;
                 case Q_GETINFO:
                         break;
@@ -1112,69 +1096,76 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         GOTO(out_quotactl, rc = -ENOTTY);
                 }
 
-                QCTL_COPY(oqctl, qctl);
-
-                if (qctl->obd_uuid.uuid[0]) {
-                        struct obd_device *obd;
-                        struct obd_uuid *uuid = &qctl->obd_uuid;
-
-                        obd = class_find_client_notype(uuid,
-                                         &sbi->ll_dt_exp->exp_obd->obd_uuid);
-                        if (!obd)
-                                GOTO(out_quotactl, rc = -ENOENT);
+                if (valid != QC_GENERAL) {
+                        if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+                                GOTO(out_quotactl, rc = -EOPNOTSUPP);
 
                         if (cmd == Q_GETINFO)
-                                oqctl->qc_cmd = Q_GETOINFO;
+                                qctl->qc_cmd = Q_GETOINFO;
                         else if (cmd == Q_GETQUOTA)
-                                oqctl->qc_cmd = Q_GETOQUOTA;
+                                qctl->qc_cmd = Q_GETOQUOTA;
                         else
                                 GOTO(out_quotactl, rc = -EINVAL);
 
-                        if (sbi->ll_md_exp->exp_obd == obd) {
-                                rc = obd_quotactl(sbi->ll_md_exp, oqctl);
-                        } else {
-                                int i;
-                                struct obd_export *exp;
-                                struct lov_obd *lov = &sbi->ll_dt_exp->
-                                                            exp_obd->u.lov;
-
-                                for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                                        if (!lov->lov_tgts[i] ||
-                                            !lov->lov_tgts[i]->ltd_active)
-                                                continue;
-                                        exp = lov->lov_tgts[i]->ltd_exp;
-                                        if (exp->exp_obd == obd) {
-                                                rc = obd_quotactl(exp, oqctl);
-                                                break;
-                                        }
-                                }
+                        switch (valid) {
+                        case QC_MDTIDX:
+                                rc = obd_iocontrol(OBD_IOC_QUOTACTL,
+                                                   sbi->ll_md_exp,
+                                                   sizeof(*qctl), qctl, NULL);
+                                break;
+                        case QC_OSTIDX:
+                                rc = obd_iocontrol(OBD_IOC_QUOTACTL,
+                                                   sbi->ll_dt_exp,
+                                                   sizeof(*qctl), qctl, NULL);
+                                break;
+                        case QC_UUID:
+                                rc = obd_iocontrol(OBD_IOC_QUOTACTL,
+                                                   sbi->ll_md_exp,
+                                                   sizeof(*qctl), qctl, NULL);
+                                if (rc == -EAGAIN)
+                                        rc = obd_iocontrol(OBD_IOC_QUOTACTL,
+                                                           sbi->ll_dt_exp,
+                                                           sizeof(*qctl), qctl,
+                                                           NULL);
+                                break;
+                        default:
+                                rc = -EINVAL;
+                                break;
                         }
 
-                        oqctl->qc_cmd = cmd;
-                        QCTL_COPY(qctl, oqctl);
-
-                        if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
-                                rc = -EFAULT;
-
-                        GOTO(out_quotactl, rc);
-                }
-
-                rc = obd_quotactl(sbi->ll_md_exp, oqctl);
-                if (rc && rc != -EBUSY && cmd == Q_QUOTAON) {
-                        oqctl->qc_cmd = Q_QUOTAOFF;
-                        obd_quotactl(sbi->ll_md_exp, oqctl);
+                        if (rc)
+                                GOTO(out_quotactl, rc);
+                        else
+                                qctl->qc_cmd = cmd;
+                } else {
+                        struct obd_quotactl *oqctl;
+
+                        OBD_ALLOC_PTR(oqctl);
+                        if (!oqctl)
+                                GOTO(out_quotactl, rc = -ENOMEM);
+
+                        QCTL_COPY(oqctl, qctl);
+                        rc = obd_quotactl(sbi->ll_md_exp, oqctl);
+                        if (rc) {
+                                if (rc != -EBUSY && cmd == Q_QUOTAON) {
+                                        oqctl->qc_cmd = Q_QUOTAOFF;
+                                        obd_quotactl(sbi->ll_md_exp, oqctl);
+                                }
+                                OBD_FREE_PTR(oqctl);
+                                GOTO(out_quotactl, rc);
+                        } else {
+                                QCTL_COPY(qctl, oqctl);
+                                OBD_FREE_PTR(oqctl);
+                        }
                 }
 
-                QCTL_COPY(qctl, oqctl);
-
                 if (copy_to_user((void *)arg, qctl, sizeof(*qctl)))
                         rc = -EFAULT;
+
         out_quotactl:
                 OBD_FREE_PTR(qctl);
-                OBD_FREE_PTR(oqctl);
                 RETURN(rc);
         }
-#endif /* HAVE_QUOTA_SUPPORT */
         case OBD_IOC_GETNAME: {
                 struct obd_device *obd = class_exp2obd(sbi->ll_dt_exp);
                 if (!obd)
@@ -1202,6 +1193,27 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 RETURN(0);
         }
 #endif
+        case LL_IOC_GETOBDCOUNT: {
+                int count;
+
+                if (copy_from_user(&count, (int *)arg, sizeof(int)))
+                        RETURN(-EFAULT);
+
+                if (!count) {
+                        /* get ost count */
+                        struct lov_obd *lov = &sbi->ll_dt_exp->exp_obd->u.lov;
+                        count = lov->desc.ld_tgt_count;
+                } else {
+                        /* get mdt count */
+                        struct lmv_obd *lmv = &sbi->ll_md_exp->exp_obd->u.lmv;
+                        count = lmv->desc.ld_tgt_count;
+                }
+
+                if (copy_to_user((int *)arg, &count, sizeof(int)))
+                        RETURN(-EFAULT);
+
+                RETURN(0);
+        }
         default:
                 RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg));
         }
index 818008a..1a7bd1f 100644 (file)
@@ -157,9 +157,10 @@ static void ll_delete_capa(struct obd_capa *ocapa)
         }
 
         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
-        list_del(&ocapa->c_list);
+        list_del_init(&ocapa->c_list);
         capa_count[CAPA_SITE_CLIENT]--;
-        free_capa(ocapa);
+        /* release the ref when alloc */
+        capa_put(ocapa);
 }
 
 /* three places where client capa is deleted:
@@ -238,7 +239,6 @@ static int capa_thread_main(void *unused)
                         capa_get(ocapa);
                         ll_capa_renewed++;
                         spin_unlock(&capa_lock);
-
                         rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
                                            ll_update_capa);
                         spin_lock(&capa_lock);
@@ -259,7 +259,7 @@ static int capa_thread_main(void *unused)
                                 break;
                         }
 
-                        if (atomic_read(&ocapa->c_refc)) {
+                        if (atomic_read(&ocapa->c_refc) > 1) {
                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
                                            "expired(c_refc %d), don't release",
                                            atomic_read(&ocapa->c_refc));
@@ -312,27 +312,6 @@ void ll_capa_thread_stop(void)
                    ll_capa_thread.t_flags & SVC_STOPPED);
 }
 
-static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
-{
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct obd_capa *ocapa;
-
-        /* inside capa_lock */
-        list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
-                if ((capa_opc(&ocapa->c_capa) & opc) != opc)
-                        continue;
-
-                LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
-                                  ll_inode2fid(inode)));
-                LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
-
-                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
-                return ocapa;
-        }
-
-        return NULL;
-}
-
 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -353,14 +332,17 @@ struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
                         continue;
                 if ((opc & CAPA_OPC_OSS_WRITE) &&
                     capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
-                        found = 1; break;
+                        found = 1;
+                        break;
                 } else if ((opc & CAPA_OPC_OSS_READ) &&
                            capa_opc_supported(&ocapa->c_capa,
                                               CAPA_OPC_OSS_READ)) {
-                        found = 1; break;
+                        found = 1;
+                        break;
                 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
                            capa_opc_supported(&ocapa->c_capa, opc)) {
-                        found = 1; break;
+                        found = 1;
+                        break;
                 }
         }
 
@@ -429,12 +411,33 @@ static struct obd_capa *do_add_mds_capa(struct inode *inode,
 
                 DEBUG_CAPA(D_SEC, capa, "update MDS");
 
-                free_capa(ocapa);
+                capa_put(ocapa);
                 ocapa = old;
         }
         return ocapa;
 }
 
+static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct obd_capa *ocapa;
+
+        /* inside capa_lock */
+        list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
+                if ((capa_opc(&ocapa->c_capa) & opc) != opc)
+                        continue;
+
+                LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
+                                  ll_inode2fid(inode)));
+                LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
+
+                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
+                return ocapa;
+        }
+
+        return NULL;
+}
+
 static inline void inode_add_oss_capa(struct inode *inode,
                                       struct obd_capa *ocapa)
 {
@@ -479,7 +482,7 @@ static struct obd_capa *do_add_oss_capa(struct inode *inode,
 
                 DEBUG_CAPA(D_SEC, capa, "update OSS");
 
-                free_capa(ocapa);
+                capa_put(ocapa);
                 ocapa = old;
         }
 
@@ -496,7 +499,7 @@ struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
         /* truncate capa won't renew */
         if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
                 set_capa_expiry(ocapa);
-                list_del(&ocapa->c_list);
+                list_del_init(&ocapa->c_list);
                 sort_add_capa(ocapa, ll_capa_list);
 
                 update_capa_timer(ocapa, capa_renewal_time(ocapa));
@@ -547,18 +550,18 @@ int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
                         }
                 }
 
-                list_del(&ocapa->c_list);
+                list_del_init(&ocapa->c_list);
                 sort_add_capa(ocapa, &ll_idle_capas);
                 spin_unlock(&capa_lock);
 
                 capa_put(ocapa);
                 iput(inode);
-                return rc;
+                RETURN(rc);
         }
 
         spin_lock(&ocapa->c_lock);
         LASSERT(!memcmp(&ocapa->c_capa, capa,
-                        offsetof(struct lustre_capa, lc_flags)));
+                        offsetof(struct lustre_capa, lc_opc)));
         ocapa->c_capa = *capa;
         set_capa_expiry(ocapa);
         spin_unlock(&ocapa->c_lock);
@@ -616,10 +619,13 @@ void ll_truncate_free_capa(struct obd_capa *ocapa)
         LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate");
 
+        /* release ref when find */
         capa_put(ocapa);
-        spin_lock(&capa_lock);
-        ll_delete_capa(ocapa);
-        spin_unlock(&capa_lock);
+        if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) {
+                spin_lock(&capa_lock);
+                ll_delete_capa(ocapa);
+                spin_unlock(&capa_lock);
+        }
 }
 
 void ll_clear_inode_capas(struct inode *inode)
index 3ed9c85..4542588 100644 (file)
@@ -232,7 +232,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT_VERSION  | OBD_CONNECT_MDS_CAPA |
                                   OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET|
                                   OBD_CONNECT_FID      | OBD_CONNECT_AT |
-                                  OBD_CONNECT_LOV_V3;
+                                  OBD_CONNECT_LOV_V3 | OBD_CONNECT_RMT_CLIENT;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
@@ -263,13 +263,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
 
         /* real client */
         data->ocd_connect_flags |= OBD_CONNECT_REAL;
-        if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-                data->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT;
-                data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT;
-        } else {
-                data->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT;
-                data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT;
-        }
+        if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+                data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
 
         err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data, NULL);
         if (err == -EBUSY) {
@@ -347,21 +342,16 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
         if (data->ocd_connect_flags & OBD_CONNECT_JOIN)
                 sbi->ll_flags |= LL_SBI_JOIN;
 
-        if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
-                if (!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) {
-                        /* sometimes local client claims to be remote, but mdt
-                         * will disagree when client gss not applied. */
-                        LCONSOLE_INFO("client claims to be remote, but server "
-                                      "rejected, forced to be local.\n");
-                        sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
+        if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) {
+                if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
+                        sbi->ll_flags |= LL_SBI_RMT_CLIENT;
+                        LCONSOLE_INFO("client is set as remote by default.\n");
                 }
         } else {
-                if (!(data->ocd_connect_flags & OBD_CONNECT_LCL_CLIENT)) {
-                        /* with gss applied, remote client can not claim to be
-                         * local, so mdt maybe force client to be remote. */
-                        LCONSOLE_INFO("client claims to be local, but server "
-                                      "rejected, forced to be remote.\n");
-                        sbi->ll_flags |= LL_SBI_RMT_CLIENT;
+                if (sbi->ll_flags & LL_SBI_RMT_CLIENT) {
+                        sbi->ll_flags &= ~LL_SBI_RMT_CLIENT;
+                        LCONSOLE_INFO("client claims to be remote, but server "
+                                      "rejected, forced to be local.\n");
                 }
         }
 
@@ -385,9 +375,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
                                   OBD_CONNECT_CANCELSET | OBD_CONNECT_FID      |
                                   OBD_CONNECT_SRVLOCK   | OBD_CONNECT_TRUNCLOCK|
-                                  OBD_CONNECT_AT;
-        if (sbi->ll_flags & LL_SBI_OSS_CAPA)
-                data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA;
+                                  OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT |
+                                  OBD_CONNECT_OSS_CAPA;
 
         if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) {
                 /* OBD_CONNECT_CKSUM should always be set, even if checksums are
@@ -406,6 +395,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 #endif
+        if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
+                data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE;
+
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
                data->ocd_version, data->ocd_grant);
@@ -471,7 +463,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
         err = md_getattr(sbi->ll_md_exp, &sbi->ll_root_fid, oc, valid, 0,
                          &request);
         if (oc)
-                free_capa(oc);
+                capa_put(oc);
         if (err) {
                 CERROR("md_getattr failed for root: rc = %d\n", err);
                 GOTO(out_lock_cn_cb, err);
@@ -2114,6 +2106,8 @@ int ll_process_config(struct lustre_cfg *lcfg)
            proc fns must be able to handle that! */
         rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars,
                                       lcfg, sb);
+        if (rc > 0)
+               rc = 0;
         return(rc);
 }
 
index 0933e2f..91c81c3 100644 (file)
@@ -1047,6 +1047,7 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
         struct lov_stripe_md *lsm = NULL;
         struct obd_trans_info oti = { 0 };
         struct obdo *oa;
+        struct obd_capa *oc = NULL;
         int rc;
         ENTRY;
 
@@ -1101,7 +1102,14 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
                 }
         }
 
-        rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir));
+        if (body->valid & OBD_MD_FLOSSCAPA) {
+                rc = md_unpack_capa(ll_i2mdexp(dir), request, &RMF_CAPA2, &oc);
+                if (rc)
+                        GOTO(out_free_memmd, rc);
+        }
+
+        rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc);
+        capa_put(oc);
         OBDO_FREE(oa);
         if (rc)
                 CERROR("obd destroy objid "LPX64" error %d\n",
index ad6c65f..004218e 100644 (file)
@@ -725,12 +725,13 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 {
         struct obd_device    *obddev = class_exp2obd(exp);
         struct lmv_obd       *lmv = &obddev->u.lmv;
-        int                   i;
+        int                   i = 0;
         int                   rc = 0;
         int                   set = 0;
+        int                   count = lmv->desc.ld_tgt_count;
         ENTRY;
 
-        if (lmv->desc.ld_tgt_count == 0)
+        if (count == 0)
                 RETURN(-ENOTTY);
 
         switch (cmd) {
@@ -743,7 +744,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
                 memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
                 LASSERT(data->ioc_plen1 == sizeof(struct obd_statfs));
 
-                if ((index >= lmv->desc.ld_tgt_count))
+                if ((index >= count))
                         RETURN(-ENODEV);
 
                 if (!lmv->tgts[index].ltd_active)
@@ -764,8 +765,54 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
                         RETURN(-EFAULT);
                 break;
         }
+        case OBD_IOC_QUOTACTL: {
+                struct if_quotactl *qctl = karg;
+                struct lmv_tgt_desc *tgt = NULL;
+                struct obd_quotactl *oqctl;
+
+                if (qctl->qc_valid == QC_MDTIDX) {
+                        if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+                                RETURN(-EINVAL);
+
+                        tgt = &lmv->tgts[qctl->qc_idx];
+                        if (!tgt->ltd_exp)
+                                RETURN(-EINVAL);
+                } else if (qctl->qc_valid == QC_UUID) {
+                        for (i = 0; i < count; i++) {
+                                tgt = &lmv->tgts[i];
+                                if (!obd_uuid_equals(&tgt->ltd_uuid,
+                                                     &qctl->obd_uuid))
+                                        continue;
+
+                                if (tgt->ltd_exp == NULL)
+                                        RETURN(-EINVAL);
+
+                                break;
+                        }
+                } else {
+                        RETURN(-EINVAL);
+                }
+
+                if (i >= count)
+                        RETURN(-EAGAIN);
+
+                LASSERT(tgt && tgt->ltd_exp);
+                OBD_ALLOC_PTR(oqctl);
+                if (!oqctl)
+                        RETURN(-ENOMEM);
+
+                QCTL_COPY(oqctl, qctl);
+                rc = obd_quotactl(tgt->ltd_exp, oqctl);
+                if (rc == 0) {
+                        QCTL_COPY(qctl, oqctl);
+                        qctl->qc_valid = QC_MDTIDX;
+                        qctl->obd_uuid = tgt->ltd_uuid;
+                }
+                OBD_FREE_PTR(oqctl);
+                break;
+        }
         default : {
-                for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+                for (i = 0; i < count; i++) {
                         int err;
 
                         if (lmv->tgts[i].ltd_exp == NULL)
@@ -773,7 +820,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 
                         err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len,
                                             karg, uarg);
-                        if (err) {
+                        if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) {
+                                RETURN(err);
+                        } else if (err) {
                                 if (lmv->tgts[i].ltd_active) {
                                         CERROR("error: iocontrol MDC %s on MDT"
                                                "idx %d cmd %x: err = %d\n",
@@ -2837,6 +2886,18 @@ static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *oc,
         RETURN(rc);
 }
 
+int lmv_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req,
+                    const struct req_msg_field *field, struct obd_capa **oc)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct lmv_obd *lmv = &obd->u.lmv;
+        int rc;
+
+        ENTRY;
+        rc = md_unpack_capa(lmv->tgts[0].ltd_exp, req, field, oc);
+        RETURN(rc);
+}
+
 int lmv_intent_getattr_async(struct obd_export *exp,
                              struct md_enqueue_info *minfo,
                              struct ldlm_enqueue_info *einfo)
@@ -2960,11 +3021,15 @@ struct md_ops lmv_md_ops = {
         .m_set_open_replay_data = lmv_set_open_replay_data,
         .m_clear_open_replay_data = lmv_clear_open_replay_data,
         .m_renew_capa           = lmv_renew_capa,
+        .m_unpack_capa          = lmv_unpack_capa,
         .m_get_remote_perm      = lmv_get_remote_perm,
         .m_intent_getattr_async = lmv_intent_getattr_async,
         .m_revalidate_lock      = lmv_revalidate_lock
 };
 
+static quota_interface_t *quota_interface;
+extern quota_interface_t lmv_quota_interface;
+
 int __init lmv_init(void)
 {
         struct lprocfs_static_vars lvars;
@@ -2979,10 +3044,18 @@ int __init lmv_init(void)
         }
 
         lprocfs_lmv_init_vars(&lvars);
+
+        request_module("lquota");
+        quota_interface = PORTAL_SYMBOL_GET(lmv_quota_interface);
+        init_obd_quota_ops(quota_interface, &lmv_obd_ops);
+
         rc = class_register_type(&lmv_obd_ops, &lmv_md_ops,
                                  lvars.module_vars, LUSTRE_LMV_NAME, NULL);
-        if (rc)
+        if (rc) {
+                if (quota_interface)
+                        PORTAL_SYMBOL_PUT(lmv_quota_interface);
                 cfs_mem_cache_destroy(lmv_object_cache);
+        }
 
         return rc;
 }
@@ -2990,6 +3063,9 @@ int __init lmv_init(void)
 #ifdef __KERNEL__
 static void lmv_exit(void)
 {
+        if (quota_interface)
+                PORTAL_SYMBOL_PUT(lmv_quota_interface);
+
         class_unregister_type(LUSTRE_LMV_NAME);
 
         LASSERTF(atomic_read(&lmv_object_count) == 0,
index e876da0..ea90841 100644 (file)
@@ -931,6 +931,8 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
 
                 rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars,
                                               lcfg, obd);
+               if (rc > 0)
+                       rc = 0;
                 GOTO(out, rc);
         }
         case LCFG_POOL_NEW:
@@ -1130,7 +1132,7 @@ do {
 
 static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                        struct lov_stripe_md *lsm, struct obd_trans_info *oti,
-                       struct obd_export *md_exp)
+                       struct obd_export *md_exp, void *capa)
 {
         struct lov_request_set *set;
         struct obd_info oinfo;
@@ -1163,7 +1165,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                         oti->oti_logcookies = set->set_cookies + req->rq_stripe;
 
                 err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp,
-                                  req->rq_oi.oi_oa, NULL, oti, NULL);
+                                  req->rq_oi.oi_oa, NULL, oti, NULL, capa);
                 err = lov_update_common_set(set, req, err);
                 if (err) {
                         CERROR("error: destroying objid "LPX64" subobj "
@@ -1901,7 +1903,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 {
         struct obd_device *obddev = class_exp2obd(exp);
         struct lov_obd *lov = &obddev->u.lov;
-        int i, rc = 0, count = lov->desc.ld_tgt_count;
+        int i = 0, rc = 0, count = lov->desc.ld_tgt_count;
         struct obd_uuid *uuidp;
         ENTRY;
 
@@ -1995,6 +1997,53 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
         case LL_IOC_LOV_SETEA:
                 rc = lov_setea(exp, karg, uarg);
                 break;
+        case OBD_IOC_QUOTACTL: {
+                struct if_quotactl *qctl = karg;
+                struct lov_tgt_desc *tgt = NULL;
+                struct obd_quotactl *oqctl;
+
+                if (qctl->qc_valid == QC_OSTIDX) {
+                        if (qctl->qc_idx < 0 || count <= qctl->qc_idx)
+                                RETURN(-EINVAL);
+
+                        tgt = lov->lov_tgts[qctl->qc_idx];
+                        if (!tgt || !tgt->ltd_exp)
+                                RETURN(-EINVAL);
+                } else if (qctl->qc_valid == QC_UUID) {
+                        for (i = 0; i < count; i++) {
+                                tgt = lov->lov_tgts[i];
+                                if (!tgt ||
+                                    !obd_uuid_equals(&tgt->ltd_uuid,
+                                                     &qctl->obd_uuid))
+                                        continue;
+
+                                if (tgt->ltd_exp == NULL)
+                                        RETURN(-EINVAL);
+
+                                break;
+                        }
+                } else {
+                        RETURN(-EINVAL);
+                }
+
+                if (i >= count)
+                        RETURN(-EAGAIN);
+
+                LASSERT(tgt && tgt->ltd_exp);
+                OBD_ALLOC_PTR(oqctl);
+                if (!oqctl)
+                        RETURN(-ENOMEM);
+
+                QCTL_COPY(oqctl, qctl);
+                rc = obd_quotactl(tgt->ltd_exp, oqctl);
+                if (rc == 0) {
+                        QCTL_COPY(qctl, oqctl);
+                        qctl->qc_valid = QC_OSTIDX;
+                        qctl->obd_uuid = tgt->ltd_uuid;
+                }
+                OBD_FREE_PTR(oqctl);
+                break;
+        }
         default: {
                 int set = 0;
 
index ba95f06..176968f 100644 (file)
@@ -620,7 +620,8 @@ cleanup:
                         continue;
 
                 sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp;
-                err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL);
+                err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL,
+                                  NULL);
                 if (err)
                         CERROR("Failed to uncreate objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
index 1b5311b..b80a28d 100644 (file)
@@ -60,7 +60,7 @@ sources: fsfilt_$(BACKINGFS).c
 else #SERVER
 sources:
 
-endif
+endif #SERVER
 
 ldiskfs_sed_flags = \
        -e "s/dx_hash_info/ext3_dx_hash_info/g" \
@@ -104,8 +104,7 @@ install-data-hook: $(install_data_hook)
 DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \
        lvfs_internal.h lvfs_linux.c lvfs_userfs.c \
        upcall_cache.c prng.c lvfs_lib.c \
-       lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c \
-        # quotacheck_test.c quotactl_test.c fsfilt_ext3_quota.h
+       lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c
 
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
 CLEANFILES = fsfilt-*.c fsfilt_ldiskfs*.c fsfilt_extN.c sources
index 25ed99c..26ed65b 100644 (file)
@@ -67,6 +67,8 @@
 #include <linux/ext3_extents.h>
 #endif
 
+#include "lustre_quota_fmt.h"
+
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15)
 #define FSFILT_DATA_TRANS_BLOCKS(sb)      EXT3_DATA_TRANS_BLOCKS
 #define FSFILT_DELETE_TRANS_BLOCKS(sb)    EXT3_DELETE_TRANS_BLOCKS
@@ -723,9 +725,7 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
         int rc;
 
         memset(&sfs, 0, sizeof(sfs));
-
         rc = ll_do_statfs(sb, &sfs);
-
         if (!rc && sfs.f_bfree < sfs.f_ffree) {
                 sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree;
                 sfs.f_ffree = sfs.f_bfree;
@@ -883,7 +883,6 @@ static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base,
         pblock = ext3_mb_new_blocks(handle, &ar, err);
         *count = ar.len;
         return pblock;
-
 }
 #endif
 
@@ -1315,19 +1314,37 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize,
 
 static int fsfilt_ext3_setup(struct super_block *sb)
 {
+#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,6)) && \
+     defined(HAVE_QUOTA_SUPPORT)) || defined(S_PDIROPS)
+        struct ext3_sb_info *sbi = EXT3_SB(sb);
 #if 0
-        EXT3_SB(sb)->dx_lock = fsfilt_ext3_dx_lock;
-        EXT3_SB(sb)->dx_unlock = fsfilt_ext3_dx_unlock;
+        sbi->dx_lock = fsfilt_ext3_dx_lock;
+        sbi->dx_unlock = fsfilt_ext3_dx_unlock;
+#endif
 #endif
 #ifdef S_PDIROPS
         CWARN("Enabling PDIROPS\n");
-        set_opt(EXT3_SB(sb)->s_mount_opt, PDIROPS);
+        set_opt(sbi->s_mount_opt, PDIROPS);
         sb->s_flags |= S_PDIROPS;
 #endif
         if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
                 CWARN("filesystem doesn't have dir_index feature enabled\n");
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)) && defined(HAVE_QUOTA_SUPPORT)
-        set_opt(EXT3_SB(sb)->s_mount_opt, QUOTA);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,6)) && defined(HAVE_QUOTA_SUPPORT)
+        /* enable journaled quota support */
+        /* kfreed in ext3_put_super() */
+        sbi->s_qf_names[USRQUOTA] = kstrdup("lquota.user.reserved", GFP_KERNEL);
+        if (!sbi->s_qf_names[USRQUOTA])
+                return -ENOMEM;
+        sbi->s_qf_names[GRPQUOTA] = kstrdup("lquota.group.reserved", GFP_KERNEL);
+        if (!sbi->s_qf_names[GRPQUOTA]) {
+                kfree(sbi->s_qf_names[USRQUOTA]);
+                sbi->s_qf_names[USRQUOTA] = NULL;
+                return -ENOMEM;
+        }
+        sbi->s_jquota_fmt = QFMT_VFS_V0;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13))
+        set_opt(sbi->s_mount_opt, QUOTA);
+#endif
 #endif
         return 0;
 }
@@ -1363,8 +1380,7 @@ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs)
         return 0;
 }
 
-static const char *op_quotafile[] = { "lquota.user", "lquota.group" };
-
+#ifdef HAVE_QUOTA_SUPPORT
 #define DQINFO_COPY(out, in)                    \
 do {                                            \
         Q_COPY(out, in, dqi_bgrace);            \
@@ -1386,8 +1402,6 @@ do {                                            \
         Q_COPY(out, in, dqb_valid);             \
 } while (0)
 
-
-
 static int fsfilt_ext3_quotactl(struct super_block *sb,
                                 struct obd_quotactl *oqc)
 {
@@ -1419,10 +1433,15 @@ static int fsfilt_ext3_quotactl(struct super_block *sb,
                                 continue;
 
                         if (oqc->qc_cmd == Q_QUOTAON) {
+                                char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2;
+
+                                LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
+
                                 if (!qcop->quota_on)
                                         GOTO(out, rc = -ENOSYS);
-                                rc = qcop->quota_on(sb, i, oqc->qc_id,
-                                                    (char *)op_quotafile[i]);
+
+                                rc = qcop->quota_on(sb, i, QFMT_VFS_V0,
+                                                    name[i]);
                         } else if (oqc->qc_cmd == Q_QUOTAOFF) {
                                 if (!qcop->quota_off)
                                         GOTO(out, rc = -ENOSYS);
@@ -1455,14 +1474,38 @@ static int fsfilt_ext3_quotactl(struct super_block *sb,
                 if (!qcop->get_dqblk)
                         GOTO(out, rc = -ENOSYS);
                 rc = qcop->get_dqblk(sb, oqc->qc_type, oqc->qc_id, dqblk);
+                if (!rc)
+                        dqblk->dqb_valid = QIF_LIMITS | QIF_USAGE;
                 break;
         case Q_SYNC:
                 if (!sb->s_qcop->quota_sync)
                         GOTO(out, rc = -ENOSYS);
                 qcop->quota_sync(sb, oqc->qc_type);
                 break;
+        case Q_FINVALIDATE:
+                CDEBUG(D_WARNING, "invalidating operational quota files\n");
+                for (i = 0; i < MAXQUOTAS; i++) {
+                        struct file *fp;
+                        char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2;
+
+                        LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
+
+                        if (!Q_TYPESET(oqc, i))
+                                continue;
+
+                        fp = filp_open(name[i], O_CREAT | O_TRUNC | O_RDWR, 0644);
+                        if (IS_ERR(fp)) {
+                                rc = PTR_ERR(fp);
+                                CERROR("error invalidating operational quota file"
+                                       " %s (rc:%d)\n", name[i], rc);
+                        } else {
+                                filp_close(fp, 0);
+                        }
+
+                }
+                break;
         default:
-                CERROR("unsupported quotactl command: %d", oqc->qc_cmd);
+                CERROR("unsupported quotactl command: %d\n", oqc->qc_cmd);
                 LBUG();
         }
 out:
@@ -1473,26 +1516,26 @@ out:
         OBD_FREE_PTR(dqblk);
 
         if (rc)
-                CDEBUG(D_QUOTA, "quotactl command %#x, id %u, type %d "
+                CDEBUG(D_QUOTA, "quotactl command %#x, id %u, type %u "
                                 "failed: %d\n",
                        oqc->qc_cmd, oqc->qc_id, oqc->qc_type, rc);
         RETURN(rc);
 }
 
 struct chk_dqblk{
-        struct hlist_node       dqb_hash;        /* quotacheck hash */
-        struct list_head        dqb_list;        /* in list also */
-        qid_t                   dqb_id;          /* uid/gid */
-        short                   dqb_type;        /* USRQUOTA/GRPQUOTA */
-        __u32                   dqb_bhardlimit;  /* block hard limit */
-        __u32                   dqb_bsoftlimit;  /* block soft limit */
-        qsize_t                 dqb_curspace;    /* current space */
-        __u32                   dqb_ihardlimit;  /* inode hard limit */
-        __u32                   dqb_isoftlimit;  /* inode soft limit */
-        __u32                   dqb_curinodes;   /* current inodes */
-        __u64                   dqb_btime;       /* block grace time */
-        __u64                   dqb_itime;       /* inode grace time */
-        __u32                   dqb_valid;       /* flag for above fields */
+        struct hlist_node       dqb_hash;        /** quotacheck hash */
+        struct list_head        dqb_list;        /** in list also */
+        qid_t                   dqb_id;          /** uid/gid */
+        short                   dqb_type;        /** USRQUOTA/GRPQUOTA */
+        qsize_t                 dqb_bhardlimit;  /** block hard limit */
+        qsize_t                 dqb_bsoftlimit;  /** block soft limit */
+        qsize_t                 dqb_curspace;    /** current space */
+        qsize_t                 dqb_ihardlimit;  /** inode hard limit */
+        qsize_t                 dqb_isoftlimit;  /** inode soft limit */
+        qsize_t                 dqb_curinodes;   /** current inodes */
+        __u64                   dqb_btime;       /** block grace time */
+        __u64                   dqb_itime;       /** inode grace time */
+        __u32                   dqb_valid;       /** flag for above fields */
 };
 
 static inline unsigned int chkquot_hash(qid_t id, int type)
@@ -1568,7 +1611,7 @@ cqget(struct super_block *sb, struct hlist_head *hash, struct list_head *list,
         return cdqb;
 }
 
-static inline int quota_onoff(struct super_block *sb, int cmd, int type)
+static inline int quota_onoff(struct super_block *sb, int cmd, int type, int qfmt)
 {
         struct obd_quotactl *oqctl;
         int rc;
@@ -1578,7 +1621,7 @@ static inline int quota_onoff(struct super_block *sb, int cmd, int type)
                 RETURN(-ENOMEM);
 
         oqctl->qc_cmd = cmd;
-        oqctl->qc_id = QFMT_LDISKFS;
+        oqctl->qc_id = qfmt;
         oqctl->qc_type = type;
         rc = fsfilt_ext3_quotactl(sb, oqctl);
 
@@ -1700,24 +1743,8 @@ static int add_inode_quota(struct inode *inode, struct qchk_ctxt *qctxt,
         return rc;
 }
 
-static int v2_write_dqheader(struct file *f, int type)
-{
-        static const __u32 quota_magics[] = V2_INITQMAGICS;
-        static const __u32 quota_versions[] = V2_INITQVERSIONS;
-        struct v2_disk_dqheader dqhead;
-        loff_t offset = 0;
-
-        CLASSERT(ARRAY_SIZE(quota_magics) == ARRAY_SIZE(quota_versions));
-        LASSERT(0 <= type && type < ARRAY_SIZE(quota_magics));
-
-        dqhead.dqh_magic = cpu_to_le32(quota_magics[type]);
-        dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
-
-        return cfs_user_write(f, (char *)&dqhead, sizeof(dqhead), &offset);
-}
-
 /* write dqinfo struct in a new quota file */
-static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info)
+static int v3_write_dqinfo(struct file *f, int type, struct if_dqinfo *info)
 {
         struct v2_disk_dqinfo dqinfo;
         __u32 blocks = V2_DQTREEOFF + 1;
@@ -1741,6 +1768,22 @@ static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info)
         return cfs_user_write(f, (char *)&dqinfo, sizeof(dqinfo), &offset);
 }
 
+static int v3_write_dqheader(struct file *f, int type)
+{
+        static const __u32 quota_magics[] = V2_INITQMAGICS;
+        static const __u32 quota_versions[] = V2_INITQVERSIONS_R1;
+        struct v2_disk_dqheader dqhead;
+        loff_t offset = 0;
+
+        CLASSERT(ARRAY_SIZE(quota_magics) == ARRAY_SIZE(quota_versions));
+        LASSERT(0 <= type && type < ARRAY_SIZE(quota_magics));
+
+        dqhead.dqh_magic = cpu_to_le32(quota_magics[type]);
+        dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
+
+        return cfs_user_write(f, (char *)&dqhead, sizeof(dqhead), &offset);
+}
+
 static int create_new_quota_files(struct qchk_ctxt *qctxt,
                                   struct obd_quotactl *oqc)
 {
@@ -1751,32 +1794,36 @@ static int create_new_quota_files(struct qchk_ctxt *qctxt,
                 struct if_dqinfo *info = qctxt->qckt_first_check[i]?
                                          NULL : &qctxt->qckt_dqinfo[i];
                 struct file *file;
+                const char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2;
 
                 if (!Q_TYPESET(oqc, i))
                         continue;
 
-                file = filp_open(op_quotafile[i], O_RDWR | O_CREAT | O_TRUNC,
-                                 0644);
+                LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2);
+
+                file = filp_open(name[i], O_RDWR | O_CREAT | O_TRUNC, 0644);
                 if (IS_ERR(file)) {
                         rc = PTR_ERR(file);
                         CERROR("can't create %s file: rc = %d\n",
-                               op_quotafile[i], rc);
+                               name[i], rc);
                         GOTO(out, rc);
                 }
 
                 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
-                        CERROR("file %s is not regular", op_quotafile[i]);
+                        CERROR("file %s is not regular", name[i]);
                         filp_close(file, 0);
                         GOTO(out, rc = -EINVAL);
                 }
 
-                rc = v2_write_dqheader(file, i);
+                DQUOT_DROP(file->f_dentry->d_inode);
+
+                rc = v3_write_dqheader(file, i);
                 if (rc) {
                         filp_close(file, 0);
                         GOTO(out, rc);
                 }
 
-                rc = v2_write_dqinfo(file, i, info);
+                rc = v3_write_dqinfo(file, i, info);
                 filp_close(file, 0);
                 if (rc)
                         GOTO(out, rc);
@@ -1872,12 +1919,12 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
                 if (!Q_TYPESET(oqc, i))
                         continue;
 
-                rc = quota_onoff(sb, Q_QUOTAON, i);
+                rc = quota_onoff(sb, Q_QUOTAON, i, oqc->qc_id);
                 if (!rc || rc == -EBUSY) {
                         rc = read_old_dqinfo(sb, i, qctxt->qckt_dqinfo);
                         if (rc)
                                 GOTO(out, rc);
-                } else if (rc == -ENOENT) {
+                } else if (rc == -ENOENT || rc == -EINVAL || rc == -EEXIST) {
                         qctxt->qckt_first_check[i] = 1;
                 } else if (rc) {
                         GOTO(out, rc);
@@ -1945,14 +1992,14 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb,
         }
 #endif
         /* turn off quota cause we are to dump chk_dqblk to files */
-        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type);
+        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type, oqc->qc_id);
 
         rc = create_new_quota_files(qctxt, oqc);
         if (rc)
                 GOTO(out, rc);
 
         /* we use vfs functions to set dqblk, so turn quota on */
-        rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type);
+        rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type, oqc->qc_id);
 out:
         /* dump and free chk_dqblk */
         rc = prune_chkquots(sb, qctxt, rc);
@@ -1960,7 +2007,7 @@ out:
 
         /* turn off quota, `lfs quotacheck` will turn on when all
          * nodes quotacheck finish. */
-        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type);
+        quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type, oqc->qc_id);
 
         oqc->qc_stat = rc;
         if (rc)
@@ -1969,7 +2016,6 @@ out:
         RETURN(rc);
 }
 
-#ifdef HAVE_QUOTA_SUPPORT
 static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type,
                                  int cmd)
 {
@@ -1994,9 +2040,15 @@ static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type,
         case QFILE_INIT_INFO:
                 rc = lustre_init_quota_info(lqi, type);
                 break;
+        case QFILE_CONVERT:
+                rc = -ENOTSUPP;
+                CERROR("quota CONVERT command is not supported\n");
+                break;
         default:
-                CERROR("Unsupported admin quota file cmd %d\n", cmd);
-                LBUG();
+                rc = -ENOTSUPP;
+                CERROR("Unsupported admin quota file cmd %d\n"
+                       "Are lquota.ko and fsfilt_ldiskfs.ko modules in sync?\n",
+                       cmd);
                 break;
         }
         RETURN(rc);
@@ -2076,13 +2128,13 @@ static struct fsfilt_operations fsfilt_ext3_ops = {
         .fs_setup               = fsfilt_ext3_setup,
         .fs_send_bio            = fsfilt_ext3_send_bio,
         .fs_get_op_len          = fsfilt_ext3_get_op_len,
-        .fs_quotactl            = fsfilt_ext3_quotactl,
-        .fs_quotacheck          = fsfilt_ext3_quotacheck,
 #ifdef HAVE_DISK_INODE_VERSION
         .fs_get_version         = fsfilt_ext3_get_version,
         .fs_set_version         = fsfilt_ext3_set_version,
 #endif
 #ifdef HAVE_QUOTA_SUPPORT
+        .fs_quotactl            = fsfilt_ext3_quotactl,
+        .fs_quotacheck          = fsfilt_ext3_quotacheck,
         .fs_quotainfo           = fsfilt_ext3_quotainfo,
         .fs_qids                = fsfilt_ext3_qids,
         .fs_dquot               = fsfilt_ext3_dquot,
index 2f58e2a..83db369 100644 (file)
@@ -184,9 +184,7 @@ static int fsfilt_reiserfs_statfs(struct super_block *sb,
         int rc;
 
         memset(&sfs, 0, sizeof(sfs));
-
         rc = ll_do_statfs(sb, &sfs);
-
         statfs_pack(osfs, &sfs);
         return rc;
 }
index b0ddb5c..ee713e4 100644 (file)
@@ -39,7 +39,6 @@
  * from linux/fs/quota_v2.c
  */
 
-
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
 #include <asm/uaccess.h>
 
 #include <lustre_quota.h>
+#include <obd_support.h>
 #include "lustre_quota_fmt.h"
 
-typedef char *dqbuf_t;
+#ifdef HAVE_QUOTA_SUPPORT
+
+static const uint lustre_initqversions[][MAXQUOTAS] = {
+        [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2
+};
+
+static const int lustre_dqstrinblk[] = {
+        [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2
+};
 
-#define GETIDINDEX(id, depth) (((id) >> ((LUSTRE_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct lustre_disk_dqblk *)(((char *)buf)+sizeof(struct lustre_disk_dqdbheader)))
+static const int lustre_disk_dqblk_sz[] = {
+        [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2)
+};
 
-static int check_quota_file(struct file *f, struct inode *inode, int type)
+int check_quota_file(struct file *f, struct inode *inode, int type, 
+                     lustre_quota_version_t version)
 {
         struct lustre_disk_dqheader dqhead;
         mm_segment_t fs;
         ssize_t size;
         loff_t offset = 0;
         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
-        static const uint quota_versions[] = LUSTRE_INITQVERSIONS;
+        const uint *quota_versions = lustre_initqversions[version];
 
         if (f) {
                 fs = get_fs();
@@ -90,27 +100,26 @@ static int check_quota_file(struct file *f, struct inode *inode, int type)
 #endif
         }
         if (size != sizeof(struct lustre_disk_dqheader))
-                return 0;
+                return -EINVAL;
         if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
             le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
-                return 0;
-        return 1;
+                return -EINVAL;
+        return 0;
 }
 
-/* Check whether given file is really lustre admin quotafile */
+/**
+ * Check whether given file is really lustre admin quotafile
+ */
 int lustre_check_quota_file(struct lustre_quota_info *lqi, int type)
 {
         struct file *f = lqi->qi_files[type];
-        return check_quota_file(f, NULL, type);
+        return check_quota_file(f, NULL, type, lqi->qi_version);
 }
 
-/* Read information header from quota file */
-int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
+int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info)
 {
         mm_segment_t fs;
         struct lustre_disk_dqinfo dinfo;
-        struct lustre_mem_dqinfo *info = &lqi->qi_info[type];
-        struct file *f = lqi->qi_files[type];
         ssize_t size;
         loff_t offset = LUSTRE_DQINFOOFF;
 
@@ -120,9 +129,9 @@ int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
                              sizeof(struct lustre_disk_dqinfo), &offset);
         set_fs(fs);
         if (size != sizeof(struct lustre_disk_dqinfo)) {
-                CDEBUG(D_WARNING, "Can't read info structure on device %s.\n",
+                CDEBUG(D_ERROR, "Can't read info structure on device %s.\n",
                        f->f_vfsmnt->mnt_sb->s_id);
-                return -1;
+                return -EINVAL;
         }
         info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
         info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
@@ -133,7 +142,17 @@ int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
         return 0;
 }
 
-/* Write information header to quota file */
+/**
+ * Read information header from quota file
+ */
+int lustre_read_quota_info(struct lustre_quota_info *lqi, int type)
+{
+        return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]);
+}
+
+/**
+ * Write information header to quota file
+ */
 int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
 {
         mm_segment_t fs;
@@ -164,33 +183,44 @@ int lustre_write_quota_info(struct lustre_quota_info *lqi, int type)
         return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct lustre_disk_dqblk *d)
+void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
+                 lustre_quota_version_t version)
 {
-        m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
-        m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
-        m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
-        m->dqb_itime = le64_to_cpu(d->dqb_itime);
-        m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-        m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
-        m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
-        m->dqb_btime = le64_to_cpu(d->dqb_btime);
+        struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
+
+        LASSERT(version == LUSTRE_QUOTA_V2);
+
+        m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit);
+        m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit);
+        m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes);
+        m->dqb_itime = le64_to_cpu(dqblk->dqb_itime);
+        m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit);
+        m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit);
+        m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace);
+        m->dqb_btime = le64_to_cpu(dqblk->dqb_btime);
 }
 
-static void mem2diskdqb(struct lustre_disk_dqblk *d, struct mem_dqblk *m,
-                        qid_t id)
+static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m,
+                       qid_t id, lustre_quota_version_t version)
 {
-        d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
-        d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
-        d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
-        d->dqb_itime = cpu_to_le64(m->dqb_itime);
-        d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-        d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
-        d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-        d->dqb_btime = cpu_to_le64(m->dqb_btime);
-        d->dqb_id = cpu_to_le32(id);
+        struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d;
+
+        LASSERT(version == LUSTRE_QUOTA_V2);
+
+        dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+        dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+        dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+        dqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
+        dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+        dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+        dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+        dqblk->dqb_btime = cpu_to_le64(m->dqb_btime);
+        dqblk->dqb_id = cpu_to_le32(id);
+
+        return 0;
 }
 
-static dqbuf_t getdqbuf(void)
+dqbuf_t getdqbuf(void)
 {
         dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS);
         if (!buf)
@@ -199,12 +229,12 @@ static dqbuf_t getdqbuf(void)
         return buf;
 }
 
-static inline void freedqbuf(dqbuf_t buf)
+void freedqbuf(dqbuf_t buf)
 {
         kfree(buf);
 }
 
-static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
+ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
 {
         mm_segment_t fs;
         ssize_t ret;
@@ -218,7 +248,7 @@ static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf)
         return ret;
 }
 
-static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
+ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
 {
         mm_segment_t fs;
         ssize_t ret;
@@ -229,18 +259,17 @@ static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf)
         ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset);
         set_fs(fs);
         return ret;
-
 }
 
-static void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
+void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info)
 {
         set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
 }
 
-#define lustre_info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-
-/* Remove empty block from list and return it */
-static int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
+/**
+ * Remove empty block from list and return it
+ */
+int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
 {
         dqbuf_t buf = getdqbuf();
         struct lustre_disk_dqdbheader *dh =
@@ -256,7 +285,8 @@ static int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info)
                 info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
         } else {
                 memset(buf, 0, LUSTRE_DQBLKSIZE);
-                if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0) /* Assure block allocation... */
+                /* Assure block allocation... */
+                if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0)
                         goto out_buf;
                 blk = info->dqi_blocks++;
         }
@@ -267,9 +297,11 @@ out_buf:
         return ret;
 }
 
-/* Insert empty block to the list */
-static int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
-                          dqbuf_t buf, uint blk)
+/**
+ * Insert empty block to the list
+ */
+int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
+                   dqbuf_t buf, uint blk)
 {
         struct lustre_disk_dqdbheader *dh =
             (struct lustre_disk_dqdbheader *)buf;
@@ -286,10 +318,12 @@ static int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
         return 0;
 }
 
-/* Remove given block from the list of blocks with free entries */
-static int remove_free_dqentry(struct file *filp,
-                               struct lustre_mem_dqinfo *info, dqbuf_t buf,
-                               uint blk)
+/**
+ * Remove given block from the list of blocks with free entries
+ */
+int remove_free_dqentry(struct file *filp,
+                        struct lustre_mem_dqinfo *info, dqbuf_t buf,
+                        uint blk)
 {
         dqbuf_t tmpbuf = getdqbuf();
         struct lustre_disk_dqdbheader *dh =
@@ -321,7 +355,8 @@ static int remove_free_dqentry(struct file *filp,
         }
         freedqbuf(tmpbuf);
         dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
-        if (write_blk(filp, blk, buf) < 0)      /* No matter whether write succeeds block is out of list */
+        if (write_blk(filp, blk, buf) < 0)
+                /* No matter whether write succeeds block is out of list */
                 CDEBUG(D_ERROR, 
                        "VFS: Can't write block (%u) with free entries.\n", blk);
         return 0;
@@ -330,10 +365,12 @@ out_buf:
         return err;
 }
 
-/* Insert given block to the beginning of list with free entries */
-static int insert_free_dqentry(struct file *filp,
-                               struct lustre_mem_dqinfo *info, dqbuf_t buf,
-                               uint blk)
+/**
+ * Insert given block to the beginning of list with free entries
+ */
+int insert_free_dqentry(struct file *filp,
+                        struct lustre_mem_dqinfo *info, dqbuf_t buf,
+                        uint blk)
 {
         dqbuf_t tmpbuf = getdqbuf();
         struct lustre_disk_dqdbheader *dh =
@@ -363,16 +400,23 @@ out_buf:
         return err;
 }
 
-/* Find space for dquot */
-static uint find_free_dqentry(struct lustre_dquot *dquot, int *err)
+
+
+/**
+ * Find space for dquot
+ */
+static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, 
+                              lustre_quota_version_t version)
 {
         struct lustre_quota_info *lqi = dquot->dq_info;
         struct file *filp = lqi->qi_files[dquot->dq_type];
         struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type];
         uint blk, i;
         struct lustre_disk_dqdbheader *dh;
-        struct lustre_disk_dqblk *ddquot;
-        struct lustre_disk_dqblk fakedquot;
+        void *ddquot;
+        int dqblk_sz = lustre_disk_dqblk_sz[version];
+        int dqstrinblk = lustre_dqstrinblk[version];
+        char fakedquot[dqblk_sz];
         dqbuf_t buf;
 
         *err = 0;
@@ -381,7 +425,7 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err)
                 return 0;
         }
         dh = (struct lustre_disk_dqdbheader *)buf;
-        ddquot = GETENTRIES(buf);
+        ddquot = GETENTRIES(buf, version);
         if (info->dqi_free_entry) {
                 blk = info->dqi_free_entry;
                 if ((*err = read_blk(filp, blk, buf)) < 0)
@@ -394,10 +438,14 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err)
                         return 0;
                 }
                 memset(buf, 0, LUSTRE_DQBLKSIZE);
-                info->dqi_free_entry = blk;     /* This is enough as block is already zeroed and entry list is empty... */
+                info->dqi_free_entry = blk; /* This is enough as block is 
+                                               already zeroed and entry list
+                                               is empty... */
                 lustre_mark_info_dirty(info);
         }
-        if (le16_to_cpu(dh->dqdh_entries) + 1 >= LUSTRE_DQSTRINBLK)     /* Block will be full? */
+
+        /* Will block be full */
+        if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk)
                 if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) {
                         CDEBUG(D_ERROR, 
                                "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n",
@@ -405,12 +453,13 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err)
                         goto out_buf;
                 }
         dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1);
-        memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk));
+        memset(fakedquot, 0, dqblk_sz);
         /* Find free structure in block */
-        for (i = 0; i < LUSTRE_DQSTRINBLK && 
-             memcmp(&fakedquot, ddquot + i, sizeof(fakedquot)); i++) ;
+        for (i = 0; i < dqstrinblk &&
+             memcmp(fakedquot, (char*)ddquot + i * dqblk_sz, 
+                    sizeof(fakedquot)); i++);
 
-        if (i == LUSTRE_DQSTRINBLK) {
+        if (i == dqstrinblk) {
                 CDEBUG(D_ERROR, 
                        "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
                 *err = -EIO;
@@ -426,7 +475,7 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err)
         dquot->dq_off =
             (blk << LUSTRE_DQBLKSIZE_BITS) +
             sizeof(struct lustre_disk_dqdbheader) +
-            i * sizeof(struct lustre_disk_dqblk);
+            i * dqblk_sz;
         freedqbuf(buf);
         return blk;
 out_buf:
@@ -434,8 +483,11 @@ out_buf:
         return 0;
 }
 
-/* Insert reference to structure into the trie */
-static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth)
+/**
+ * Insert reference to structure into the trie
+ */
+static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, 
+                          lustre_quota_version_t version)
 {
         struct lustre_quota_info *lqi = dquot->dq_info;
         struct file *filp = lqi->qi_files[dquot->dq_type];
@@ -476,9 +528,9 @@ static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth)
                         goto out_buf;
                 }
 
-                newblk = find_free_dqentry(dquot, &ret);
+                newblk = find_free_dqentry(dquot, &ret, version);
         } else
-                ret = do_insert_tree(dquot, &newblk, depth + 1);
+                ret = do_insert_tree(dquot, &newblk, depth + 1, version);
         if (newson && ret >= 0) {
                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
                 ret = write_blk(filp, *treeblk, buf);
@@ -489,27 +541,37 @@ out_buf:
         return ret;
 }
 
-/* Wrapper for inserting quota structure into tree */
-static inline int dq_insert_tree(struct lustre_dquot *dquot)
+/**
+ * Wrapper for inserting quota structure into tree
+ */
+static inline int dq_insert_tree(struct lustre_dquot *dquot, 
+                                 lustre_quota_version_t version)
 {
         int tmp = LUSTRE_DQTREEOFF;
-        return do_insert_tree(dquot, &tmp, 0);
+        return do_insert_tree(dquot, &tmp, 0, version);
 }
 
-/*
- *     We don't have to be afraid of deadlocks as we never have quotas on quota files...
+/**
+ * We don't have to be afraid of deadlocks as we never have quotas on
+ * quota files...
  */
-static int lustre_write_dquot(struct lustre_dquot *dquot)
+static int lustre_write_dquot(struct lustre_dquot *dquot, 
+                              lustre_quota_version_t version)
 {
         int type = dquot->dq_type;
         struct file *filp;
         mm_segment_t fs;
         loff_t offset;
         ssize_t ret;
-        struct lustre_disk_dqblk ddquot, empty;
+        int dqblk_sz = lustre_disk_dqblk_sz[version];
+        char ddquot[dqblk_sz], empty[dqblk_sz];
+
+        ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version);
+        if (ret < 0)
+                return ret;
 
         if (!dquot->dq_off)
-                if ((ret = dq_insert_tree(dquot)) < 0) {
+                if ((ret = dq_insert_tree(dquot, version)) < 0) {
                         CDEBUG(D_ERROR,
                                "VFS: Error %Zd occurred while creating quota.\n",
                                ret);
@@ -517,19 +579,18 @@ static int lustre_write_dquot(struct lustre_dquot *dquot)
                 }
         filp = dquot->dq_info->qi_files[type];
         offset = dquot->dq_off;
-        mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
         /* Argh... We may need to write structure full of zeroes but that would be
          * treated as an empty place by the rest of the code. Format change would
          * be definitely cleaner but the problems probably are not worth it */
-        memset(&empty, 0, sizeof(struct lustre_disk_dqblk));
-        if (!memcmp(&empty, &ddquot, sizeof(struct lustre_disk_dqblk)))
-                ddquot.dqb_itime = cpu_to_le64(1);
+        memset(empty, 0, dqblk_sz);
+        if (!memcmp(empty, ddquot, dqblk_sz))
+                ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime = cpu_to_le64(1);
         fs = get_fs();
         set_fs(KERNEL_DS);
-        ret = filp->f_op->write(filp, (char *)&ddquot,
-                                sizeof(struct lustre_disk_dqblk), &offset);
+        ret = filp->f_op->write(filp, ddquot,
+                                dqblk_sz, &offset);
         set_fs(fs);
-        if (ret != sizeof(struct lustre_disk_dqblk)) {
+        if (ret != dqblk_sz) {
                 CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n",
                        filp->f_dentry->d_sb->s_id);
                 if (ret >= 0)
@@ -540,14 +601,18 @@ static int lustre_write_dquot(struct lustre_dquot *dquot)
         return ret;
 }
 
-/* Free dquot entry in data block */
-static int free_dqentry(struct lustre_dquot *dquot, uint blk)
+/**
+ * Free dquot entry in data block
+ */
+static int free_dqentry(struct lustre_dquot *dquot, uint blk, 
+                        lustre_quota_version_t version)
 {
         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
         struct lustre_mem_dqinfo *info =
             &dquot->dq_info->qi_info[dquot->dq_type];
         struct lustre_disk_dqdbheader *dh;
         dqbuf_t buf = getdqbuf();
+        int dqstrinblk = lustre_dqstrinblk[version];
         int ret = 0;
 
         if (!buf)
@@ -573,10 +638,9 @@ static int free_dqentry(struct lustre_dquot *dquot, uint blk)
                         goto out_buf;
                 }
         } else {
-                memset(buf +
-                       (dquot->dq_off & ((1 << LUSTRE_DQBLKSIZE_BITS) - 1)), 0,
-                       sizeof(struct lustre_disk_dqblk));
-                if (le16_to_cpu(dh->dqdh_entries) == LUSTRE_DQSTRINBLK - 1) {
+                memset(buf + (dquot->dq_off & ((1<<LUSTRE_DQBLKSIZE_BITS) - 1)),
+                       0, lustre_disk_dqblk_sz[version]);
+                if (le16_to_cpu(dh->dqdh_entries) == dqstrinblk - 1) {
                         /* Insert will write block itself */
                         if ((ret =
                              insert_free_dqentry(filp, info, buf, blk)) < 0) {
@@ -597,8 +661,11 @@ out_buf:
         return ret;
 }
 
-/* Remove reference to dquot from tree */
-static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth)
+/**
+ * Remove reference to dquot from tree
+ */
+static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, 
+                       lustre_quota_version_t version)
 {
         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
         struct lustre_mem_dqinfo *info =
@@ -616,14 +683,15 @@ static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth)
         }
         newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
         if (depth == LUSTRE_DQTREEDEPTH - 1) {
-                ret = free_dqentry(dquot, newblk);
+                ret = free_dqentry(dquot, newblk, version);
                 newblk = 0;
         } else
-                ret = remove_tree(dquot, &newblk, depth + 1);
+                ret = remove_tree(dquot, &newblk, depth + 1, version);
         if (ret >= 0 && !newblk) {
                 int i;
                 ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
-                for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++) ;     /* Block got empty? */
+                for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++)
+                        /* Block got empty? */ ;
                 /* don't put the root block into free blk list! */
                 if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) {
                         put_free_dqblk(filp, info, buf, *blk);
@@ -637,24 +705,34 @@ out_buf:
         return ret;
 }
 
-/* Delete dquot from tree */
-static int lustre_delete_dquot(struct lustre_dquot *dquot)
+/**
+ * Delete dquot from tree
+ */
+static int lustre_delete_dquot(struct lustre_dquot *dquot, 
+                                lustre_quota_version_t version)
 {
         uint tmp = LUSTRE_DQTREEOFF;
 
         if (!dquot->dq_off)     /* Even not allocated? */
                 return 0;
-        return remove_tree(dquot, &tmp, 0);
+        return remove_tree(dquot, &tmp, 0, version);
 }
 
-/* Find entry in block */
-static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk)
+/**
+ * Find entry in block
+ */
+static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, 
+                                 lustre_quota_version_t version)
 {
         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
         dqbuf_t buf = getdqbuf();
         loff_t ret = 0;
         int i;
-        struct lustre_disk_dqblk *ddquot = GETENTRIES(buf);
+        struct lustre_disk_dqblk_v2 *ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
+        int dqblk_sz = lustre_disk_dqblk_sz[version];
+        int dqstrinblk = lustre_dqstrinblk[version];
+
+        LASSERT(version == LUSTRE_QUOTA_V2);
 
         if (!buf)
                 return -ENOMEM;
@@ -663,20 +741,20 @@ static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk)
                 goto out_buf;
         }
         if (dquot->dq_id)
-                for (i = 0;
-                     i < LUSTRE_DQSTRINBLK
-                     && le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++) ;
+                for (i = 0; i < dqstrinblk && 
+                     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id;
+                     i++) ;
         else {                  /* ID 0 as a bit more complicated searching... */
-                struct lustre_disk_dqblk fakedquot;
+                char fakedquot[dqblk_sz];
 
-                memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk));
-                for (i = 0; i < LUSTRE_DQSTRINBLK; i++)
+                memset(fakedquot, 0, sizeof(fakedquot));
+                for (i = 0; i < dqstrinblk; i++)
                         if (!le32_to_cpu(ddquot[i].dqb_id)
-                            && memcmp(&fakedquot, ddquot + i,
-                                      sizeof(struct lustre_disk_dqblk)))
+                            && memcmp(fakedquot, ddquot + i,
+                                      dqblk_sz))
                                 break;
         }
-        if (i == LUSTRE_DQSTRINBLK) {
+        if (i == dqstrinblk) {
                 CDEBUG(D_ERROR,
                        "VFS: Quota for id %u referenced but not present.\n",
                        dquot->dq_id);
@@ -686,14 +764,17 @@ static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk)
                 ret =
                     (blk << LUSTRE_DQBLKSIZE_BITS) +
                     sizeof(struct lustre_disk_dqdbheader) +
-                    i * sizeof(struct lustre_disk_dqblk);
+                    i * dqblk_sz;
 out_buf:
         freedqbuf(buf);
         return ret;
 }
 
-/* Find entry for given id in the tree */
-static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth)
+/**
+ * Find entry for given id in the tree
+ */
+static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, 
+                                lustre_quota_version_t version)
 {
         struct file *filp = dquot->dq_info->qi_files[dquot->dq_type];
         dqbuf_t buf = getdqbuf();
@@ -711,18 +792,21 @@ static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth)
         if (!blk)               /* No reference? */
                 goto out_buf;
         if (depth < LUSTRE_DQTREEDEPTH - 1)
-                ret = find_tree_dqentry(dquot, blk, depth + 1);
+                ret = find_tree_dqentry(dquot, blk, depth + 1, version);
         else
-                ret = find_block_dqentry(dquot, blk);
+                ret = find_block_dqentry(dquot, blk, version);
 out_buf:
         freedqbuf(buf);
         return ret;
 }
 
-/* Find entry for given id in the tree - wrapper function */
-static inline loff_t find_dqentry(struct lustre_dquot *dquot)
+/**
+ * Find entry for given id in the tree - wrapper function
+ */
+static inline loff_t find_dqentry(struct lustre_dquot *dquot, 
+                                  lustre_quota_version_t version)
 {
-        return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0);
+        return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version);
 }
 
 int lustre_read_dquot(struct lustre_dquot *dquot)
@@ -731,8 +815,8 @@ int lustre_read_dquot(struct lustre_dquot *dquot)
         struct file *filp;
         mm_segment_t fs;
         loff_t offset;
-        struct lustre_disk_dqblk ddquot, empty;
-        int ret = 0;
+        int ret = 0, dqblk_sz;
+        lustre_quota_version_t version;
 
         /* Invalidated quota? */
         if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
@@ -740,7 +824,11 @@ int lustre_read_dquot(struct lustre_dquot *dquot)
                 return -EIO;
         }
 
-        offset = find_dqentry(dquot);
+        version = dquot->dq_info->qi_version;
+        LASSERT(version == LUSTRE_QUOTA_V2);
+        dqblk_sz = lustre_disk_dqblk_sz[version];
+
+        offset = find_dqentry(dquot, version);
         if (offset <= 0) {      /* Entry not present? */
                 if (offset < 0)
                         CDEBUG(D_ERROR,
@@ -748,42 +836,46 @@ int lustre_read_dquot(struct lustre_dquot *dquot)
                                dquot->dq_id);
                 dquot->dq_off = 0;
                 set_bit(DQ_FAKE_B, &dquot->dq_flags);
-                memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+                memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk));
                 ret = offset;
         } else {
+                char ddquot[dqblk_sz], empty[dqblk_sz];
+
                 dquot->dq_off = offset;
                 fs = get_fs();
                 set_fs(KERNEL_DS);
-                if ((ret = filp->f_op->read(filp, (char *)&ddquot,
-                                            sizeof(struct lustre_disk_dqblk),
-                                            &offset)) !=
-                    sizeof(struct lustre_disk_dqblk)) {
+                if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) !=
+                    dqblk_sz) {
                         if (ret >= 0)
                                 ret = -EIO;
                         CDEBUG(D_ERROR,
                                "VFS: Error while reading quota structure for id %u.\n",
                                dquot->dq_id);
-                        memset(&ddquot, 0, sizeof(struct lustre_disk_dqblk));
+                        memset(ddquot, 0, dqblk_sz);
                 } else {
                         ret = 0;
                         /* We need to escape back all-zero structure */
-                        memset(&empty, 0, sizeof(struct lustre_disk_dqblk));
-                        empty.dqb_itime = cpu_to_le64(1);
-                        if (!memcmp(&empty, &ddquot,
-                                    sizeof(struct lustre_disk_dqblk)))
-                                ddquot.dqb_itime = 0;
+                        memset(empty, 0, dqblk_sz);
+                        ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(1);
+                        if (!memcmp(empty, ddquot, dqblk_sz))
+                                ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(0);
                 }
                 set_fs(fs);
-                disk2memdqb(&dquot->dq_dqb, &ddquot);
+                disk2memdqb(&dquot->dq_dqb, ddquot, version);
         }
 
         return ret;
 }
 
-/* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */
+/**
+ * Commit changes of dquot to disk - it might also mean deleting
+ * it when quota became fake.
+ */
 int lustre_commit_dquot(struct lustre_dquot *dquot)
 {
         int rc = 0;
+        lustre_quota_version_t version = dquot->dq_info->qi_version;
+
         /* always clear the flag so we don't loop on an IO error... */
         clear_bit(DQ_MOD_B, &dquot->dq_flags);
 
@@ -791,9 +883,9 @@ int lustre_commit_dquot(struct lustre_dquot *dquot)
          * over all cluster, so keep the fake dquot entry on disk is
          * meaningless, just remove it */
         if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
-                rc = lustre_delete_dquot(dquot);
+                rc = lustre_delete_dquot(dquot, version);
         else
-                rc = lustre_write_dquot(dquot);
+                rc = lustre_write_dquot(dquot, version);
 
         if (rc < 0)
                 return rc;
@@ -804,21 +896,20 @@ int lustre_commit_dquot(struct lustre_dquot *dquot)
         return rc;
 }
 
-/* We need to export this function to initialize quotafile, because we haven't
- * user level check utility */
-int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
+int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics)
 {
-        struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
+        static const uint quota_magics[] = LUSTRE_INITQMAGICS;
+        static const uint fake_magics[] = LUSTRE_BADQMAGICS;
+        const uint* quota_versions = lustre_initqversions[lqi->qi_version];
         struct lustre_disk_dqheader dqhead;
-        struct file *fp = lqi->qi_files[type];
         ssize_t size;
         loff_t offset = 0;
+        struct file *fp = lqi->qi_files[type];
         int rc = 0;
-        static const uint quota_magics[] = LUSTRE_INITQMAGICS;
-        static const uint quota_versions[] = LUSTRE_INITQVERSIONS;
 
         /* write quotafile header */
-        dqhead.dqh_magic = cpu_to_le32(quota_magics[type]);
+        dqhead.dqh_magic = cpu_to_le32(fakemagics ? 
+                                       fake_magics[type] : quota_magics[type]);
         dqhead.dqh_version = cpu_to_le32(quota_versions[type]);
         size = fp->f_op->write(fp, (char *)&dqhead,
                                sizeof(struct lustre_disk_dqheader), &offset);
@@ -827,6 +918,21 @@ int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
                 CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc);
                 rc = size;
         }
+
+        return rc;
+}
+
+/**
+ * We need to export this function to initialize quotafile, because we haven't
+ * user level check utility
+ */
+int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
+                                   int fakemagics)
+{
+        struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type];
+        int rc;
+
+        rc = lustre_init_quota_header(lqi, type, fakemagics);
         if (rc)
                 return rc;
 
@@ -839,13 +945,13 @@ int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
         return lustre_write_quota_info(lqi, type);
 }
 
-struct dqblk {
-        struct list_head link;
-        uint blk;
-};
+int lustre_init_quota_info(struct lustre_quota_info *lqi, int type)
+{
+        return lustre_init_quota_info_generic(lqi, type, 0);
+}
 
-static ssize_t quota_read(struct file *file, struct inode *inode, int type,
-                          uint blk, dqbuf_t buf)
+ssize_t quota_read(struct file *file, struct inode *inode, int type,
+                   uint blk, dqbuf_t buf)
 {
         if (file) {
                 return read_blk(file, blk, buf);
@@ -913,8 +1019,8 @@ out_buf:
         return ret;
 }
 
-static int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
-                             uint blk, int depth, struct list_head *list)
+int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, 
+                      uint blk, int depth, struct list_head *list)
 {
         dqbuf_t buf = getdqbuf();
         loff_t ret = 0;
@@ -935,7 +1041,7 @@ static int walk_tree_dqentry(struct file *filp, struct inode *inode, int type,
                         continue;
 
                 if (depth < LUSTRE_DQTREEDEPTH - 1)
-                        ret = walk_tree_dqentry(filp, inode, type, blk, 
+                        ret = walk_tree_dqentry(filp, inode, type, blk,
                                                 depth + 1, list);
                 else
                         ret = walk_block_dqentry(filp, inode, type, blk, list);
@@ -945,67 +1051,71 @@ out_buf:
         return ret;
 }
 
-/* Walk through the quota file (v2 format) to get all ids with quota limit */
+/**
+ * Walk through the quota file (v2 format) to get all ids with quota limit
+ */
 int lustre_get_qids(struct file *fp, struct inode *inode, int type,
                     struct list_head *list)
 {
         struct list_head blk_list;
         struct dqblk *blk_item, *tmp;
         dqbuf_t buf = NULL;
-        struct lustre_disk_dqblk *ddquot;
+        struct lustre_disk_dqblk_v2 *ddquot;
         int rc;
+        lustre_quota_version_t version;
+
+        ENTRY;
 
-        if (!check_quota_file(fp, inode, type)) {
+        if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0)
+                version = LUSTRE_QUOTA_V2;
+        else {
                 CDEBUG(D_ERROR, "unknown quota file format!\n");
-                return -EINVAL;
+                RETURN(-EINVAL);
         }
+
         if (!list_empty(list)) {
                 CDEBUG(D_ERROR, "not empty list\n");
-                return -EINVAL;
+                RETURN(-EINVAL);
         }
 
         INIT_LIST_HEAD(&blk_list);
         rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list);
         if (rc) {
                 CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc);
-                goto out_free;
+                GOTO(out_free, rc);
         }
         if (list_empty(&blk_list))
-                return 0;
+                RETURN(0);
 
         buf = getdqbuf();
         if (!buf)
-                return -ENOMEM;
-        ddquot = GETENTRIES(buf);
+                RETURN(-ENOMEM);
+        ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version);
 
         list_for_each_entry(blk_item, &blk_list, link) {
                 loff_t ret = 0;
-                int i;
-                struct lustre_disk_dqblk fakedquot;
+                int i, dqblk_sz = lustre_disk_dqblk_sz[version];
+                char fakedquot[dqblk_sz];
 
                 memset(buf, 0, LUSTRE_DQBLKSIZE);
                 if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) {
                         CDEBUG(D_ERROR,
                                "VFS: Can't read quota tree block %u.\n",
                                blk_item->blk);
-                        rc = ret;
-                        goto out_free;
+                        GOTO(out_free, rc = ret);
                 }
 
-                memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk));
-                for (i = 0; i < LUSTRE_DQSTRINBLK; i++) {
+                memset(fakedquot, 0, dqblk_sz);
+                for (i = 0; i < lustre_dqstrinblk[version]; i++) {
                         struct dquot_id *dqid;
                         /* skip empty entry */
-                        if (!memcmp
-                            (&fakedquot, ddquot + i,
-                             sizeof(struct lustre_disk_dqblk)))
+                        if (!memcmp(fakedquot, ddquot + i, dqblk_sz))
                                 continue;
 
                         dqid = kmalloc(sizeof(*dqid), GFP_NOFS);
-                        if (!dqid) {
-                                rc = -ENOMEM;
-                                goto out_free;
-                        }
+                        if (!dqid) 
+                                GOTO(out_free, rc = -ENOMEM);
+
                         dqid->di_id = le32_to_cpu(ddquot[i].dqb_id);
                         INIT_LIST_HEAD(&dqid->di_link);
                         list_add(&dqid->di_link, list);
@@ -1019,13 +1129,16 @@ out_free:
         }
         if (buf)
                 freedqbuf(buf);
-        return rc;
+
+        RETURN(rc);
 }
 
-EXPORT_SYMBOL(lustre_check_quota_file);
+
 EXPORT_SYMBOL(lustre_read_quota_info);
 EXPORT_SYMBOL(lustre_write_quota_info);
+EXPORT_SYMBOL(lustre_check_quota_file);
 EXPORT_SYMBOL(lustre_read_dquot);
 EXPORT_SYMBOL(lustre_commit_dquot);
 EXPORT_SYMBOL(lustre_init_quota_info);
 EXPORT_SYMBOL(lustre_get_qids);
+#endif
index ffdac51..4072509 100644 (file)
@@ -41,6 +41,8 @@
 #ifndef _LUSTRE_QUOTA_FMT_H
 #define _LUSTRE_QUOTA_FMT_H
 
+#ifdef HAVE_QUOTA_SUPPORT
+
 #include <linux/types.h>
 #include <linux/quota.h>
 
  * Same with quota v2's magic
  */
 #define LUSTRE_INITQMAGICS {\
-       0xd9c01f11,     /* USRQUOTA */\
-       0xd9c01927      /* GRPQUOTA */\
+        0xd9c01f11,     /** USRQUOTA */\
+        0xd9c01927      /** GRPQUOTA */\
+}
+
+/* Invalid magics that mark quota file as inconsistent */
+#define LUSTRE_BADQMAGICS {\
+        0xbadbadba,     /** USRQUOTA */\
+        0xbadbadba      /** GRPQUOTA */\
 }
 
-#define LUSTRE_INITQVERSIONS {\
-       0,              /* USRQUOTA */\
-       0               /* GRPQUOTA */\
+/* for the verson 2 of lustre_disk_dqblk*/
+#define LUSTRE_INITQVERSIONS_V2 {\
+        1,             /* USRQUOTA */\
+        1              /* GRPQUOTA */\
 }
 
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
- * to blocks of these structures.
+ * to blocks of these structures. for the version 2.
  */
-struct lustre_disk_dqblk {
-        __u32 dqb_id;           /* id this quota applies to */
-        __u32 dqb_ihardlimit;   /* absolute limit on allocated inodes */
-        __u32 dqb_isoftlimit;   /* preferred inode limit */
-        __u32 dqb_curinodes;    /* current # allocated inodes */
-        __u32 dqb_bhardlimit;   /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-        __u32 dqb_bsoftlimit;   /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
-        __u64 dqb_curspace;     /* current space occupied (in bytes) */
-        __u64 dqb_btime;        /* time limit for excessive disk use */
-        __u64 dqb_itime;        /* time limit for excessive inode use */
+struct lustre_disk_dqblk_v2 {
+        __u32 dqb_id;           /**< id this quota applies to */
+        __u32 padding;
+        __u64 dqb_ihardlimit;   /**< absolute limit on allocated inodes */
+        __u64 dqb_isoftlimit;   /**< preferred inode limit */
+        __u64 dqb_curinodes;    /**< current # allocated inodes */
+        __u64 dqb_bhardlimit;   /**< absolute limit on disk space (in QUOTABLOCK_SIZE) */
+        __u64 dqb_bsoftlimit;   /**< preferred limit on disk space (in QUOTABLOCK_SIZE) */
+        __u64 dqb_curspace;     /**< current space occupied (in bytes) */
+        __u64 dqb_btime;        /**< time limit for excessive disk use */
+        __u64 dqb_itime;        /**< time limit for excessive inode use */
 };
 
+/* Number of entries in one blocks(14 entries) */
+#define LUSTRE_DQSTRINBLK_V2 \
+                ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) \
+               / sizeof(struct lustre_disk_dqblk_v2)) 
+#define GETENTRIES_V2(buf) (((char *)buf)+sizeof(struct lustre_disk_dqdbheader))
+
+#define GETENTRIES(buf,version) ((version == LUSTRE_QUOTA_V2) ? \
+                                GETENTRIES_V2(buf) : 0)
+
 /*
  * Here are header structures as written on disk and their in-memory copies
  */
@@ -117,6 +136,62 @@ static void lprocfs_quotfmt_test_init_vars(struct lprocfs_static_vars *lvars) {}
 #define LUSTRE_DQBLKSIZE       (1 << LUSTRE_DQBLKSIZE_BITS)    /* Size of block with quota structures */
 #define LUSTRE_DQTREEOFF       1       /* Offset of tree in file in blocks */
 #define LUSTRE_DQTREEDEPTH     4       /* Depth of quota tree */
-#define LUSTRE_DQSTRINBLK      ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) / sizeof(struct lustre_disk_dqblk)) /* Number of entries in one blocks */
 
+typedef char *dqbuf_t;
+
+#define GETIDINDEX(id, depth) (((id) >> ((LUSTRE_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
+
+#define MAX_UL (0xffffffffUL)
+
+#define lustre_info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
+
+struct dqblk {
+        struct list_head link;
+        uint blk;
+};
+
+/* come from lustre_fmt_common.c */
+dqbuf_t getdqbuf(void);
+void freedqbuf(dqbuf_t buf);
+void disk2memdqb(struct lustre_mem_dqblk *m, void *d,
+                        enum lustre_quota_version version);
+void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info);
+int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, 
+                             int fakemagics);
+int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type,
+                                   int fakemagics);
+int lustre_read_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info);
+int lustre_write_quota_info(struct lustre_quota_info *lqi, int type);
+ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf);
+ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf);
+int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info);
+int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info,
+                          dqbuf_t buf, uint blk);
+int remove_free_dqentry(struct file *filp,
+                               struct lustre_mem_dqinfo *info, dqbuf_t buf,
+                               uint blk);
+int insert_free_dqentry(struct file *filp,
+                               struct lustre_mem_dqinfo *info, dqbuf_t buf,
+                               uint blk);
+ssize_t quota_read(struct file *file, struct inode *inode, int type,
+                   uint blk, dqbuf_t buf);
+int walk_tree_dqentry(struct file *filp, struct inode *inode, int type,
+                      uint blk, int depth, struct list_head *list);
+int check_quota_file(struct file *f, struct inode *inode, int type,
+                     lustre_quota_version_t version);
+int lustre_check_quota_file(struct lustre_quota_info *lqi, int type);
+int lustre_read_dquot(struct lustre_dquot *dquot);
+int lustre_commit_dquot(struct lustre_dquot *dquot);
+int lustre_init_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_get_qids(struct file *fp, struct inode *inode, int type,
+                    struct list_head *list);
+
+#define LUSTRE_ADMIN_QUOTAFILES_V2 {\
+        "admin_quotafile_v2.usr",       /* user admin quotafile */\
+        "admin_quotafile_v2.grp"        /* group admin quotafile */\
+}
+
+#define LUSTRE_OPQFILES_NAMES_V2 { "lquota_v2.user", "lquota_v2.group" }
 #endif                          /* lustre_quota_fmt.h */
+#endif
index de6c32e..b360570 100644 (file)
@@ -57,6 +57,8 @@
 
 #include "lustre_quota_fmt.h"
 
+#ifdef HAVE_QUOTA_SUPPORT
+
 char *test_quotafile[2] = { "usrquota_test", "grpquota_test" };
 
 static int quotfmt_initialize(struct lustre_quota_info *lqi,
@@ -65,7 +67,7 @@ static int quotfmt_initialize(struct lustre_quota_info *lqi,
 {
         struct lustre_disk_dqheader dqhead;
         static const uint quota_magics[] = LUSTRE_INITQMAGICS;
-        static const uint quota_versions[] = LUSTRE_INITQVERSIONS;
+        static const uint quota_versions[] = LUSTRE_INITQVERSIONS_V2;
         struct file *fp;
         struct inode *parent_inode = tgt->obd_lvfs_ctxt.pwd->d_inode;
         size_t size;
@@ -107,7 +109,7 @@ static int quotfmt_initialize(struct lustre_quota_info *lqi,
                                        sizeof(struct lustre_disk_dqheader),
                                        &offset);
                 if (size != sizeof(struct lustre_disk_dqheader)) {
-                        CERROR("error writing quoafile header %s (rc = %d)\n",
+                        CERROR("error writing quotafile header %s (rc = %d)\n",
                                name, rc);
                         rc = size;
                         break;
@@ -166,7 +168,7 @@ static int quotfmt_test_1(struct lustre_quota_info *lqi)
         ENTRY;
 
         for (i = 0; i < MAXQUOTAS; i++) {
-                if (!lustre_check_quota_file(lqi, i))
+                if (lustre_check_quota_file(lqi, i))
                         RETURN(-EINVAL);
         }
         RETURN(0);
@@ -256,7 +258,7 @@ static void put_rand_dquot(struct lustre_dquot *dquot)
 static int write_check_dquot(struct lustre_quota_info *lqi)
 {
         struct lustre_dquot *dquot;
-        struct mem_dqblk dqblk;
+        struct lustre_mem_dqblk dqblk;
         int rc = 0;
         ENTRY;
 
@@ -541,3 +543,5 @@ MODULE_LICENSE("GPL");
 
 module_init(quotfmt_test_init);
 module_exit(quotfmt_test_exit);
+
+#endif /* HAVE_QUOTA_SUPPORT */
index 62b85bf..c259f2b 100644 (file)
@@ -78,25 +78,6 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
 void mdc_enter_request(struct client_obd *cli);
 void mdc_exit_request(struct client_obd *cli);
 
-static inline int client_is_remote(struct obd_export *exp)
-{
-        struct obd_import *imp = class_exp2cliimp(exp);
-
-        if (imp->imp_connect_flags_orig & OBD_CONNECT_RMT_CLIENT) {
-                if (!(imp->imp_connect_data.ocd_connect_flags &
-                    OBD_CONNECT_RMT_CLIENT))
-                        return 0;
-                else
-                        return 1;
-        } else {
-                if (!(imp->imp_connect_data.ocd_connect_flags &
-                    OBD_CONNECT_LCL_CLIENT))
-                        return 1;
-                else
-                        return 0;
-        }
-}
-
 /* mdc/mdc_locks.c */
 int mdc_set_lock_data(struct obd_export *exp,
                       __u64 *lockh, void *data);
index 4789295..0ca79b9 100644 (file)
 #include <lustre_param.h>
 #include "mdc_internal.h"
 
-quota_interface_t *quota_interface;
-
 #define REQUEST_MINOR 244
 
+static quota_interface_t *quota_interface;
 extern quota_interface_t mdc_quota_interface;
 
 static int mdc_cleanup(struct obd_device *obd);
 
-static struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req,
-                                        const struct req_msg_field *field)
+int mdc_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req,
+                    const struct req_msg_field *field, struct obd_capa **oc)
 {
         struct lustre_capa *capa;
-        struct obd_capa *oc;
+        struct obd_capa *c;
+        ENTRY;
 
         /* swabbed already in mdc_enqueue */
         capa = req_capsule_server_get(&req->rq_pill, field);
         if (capa == NULL)
-                return ERR_PTR(-EPROTO);
+                RETURN(-EPROTO);
 
-        oc = alloc_capa(CAPA_SITE_CLIENT);
-        if (!oc) {
+        c = alloc_capa(CAPA_SITE_CLIENT);
+        if (IS_ERR(c)) {
                 CDEBUG(D_INFO, "alloc capa failed!\n");
-                return ERR_PTR(-ENOMEM);
+                RETURN(PTR_ERR(c));
+        } else {
+                c->c_capa = *capa;
+                *oc = c;
+                RETURN(0);
         }
-        oc->c_capa = *capa;
-
-        return oc;
 }
 
 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
@@ -116,12 +117,9 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid,
                 GOTO(out, rc = -EPROTO);
 
         if (body->valid & OBD_MD_FLMDSCAPA) {
-                struct obd_capa *oc;
-
-                oc = mdc_unpack_capa(req, &RMF_CAPA1);
-                if (IS_ERR(oc))
-                        GOTO(out, rc = PTR_ERR(oc));
-                *pc = oc;
+                rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, pc);
+                if (rc)
+                        GOTO(out, rc);
         }
 
         *rootfid = body->fid1;
@@ -584,28 +582,34 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req,
                 }
         }
         if (md->body->valid & OBD_MD_FLMDSCAPA) {
-                struct obd_capa *oc = mdc_unpack_capa(req, &RMF_CAPA1);
+                struct obd_capa *oc = NULL;
 
-                if (IS_ERR(oc))
-                        GOTO(out, rc = PTR_ERR(oc));
+                rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, &oc);
+                if (rc)
+                        GOTO(out, rc);
                 md->mds_capa = oc;
         }
 
         if (md->body->valid & OBD_MD_FLOSSCAPA) {
-                struct obd_capa *oc = mdc_unpack_capa(req, &RMF_CAPA2);
+                struct obd_capa *oc = NULL;
 
-                if (IS_ERR(oc))
-                        GOTO(out, rc = PTR_ERR(oc));
+                rc = mdc_unpack_capa(NULL, req, &RMF_CAPA2, &oc);
+                if (rc)
+                        GOTO(out, rc);
                 md->oss_capa = oc;
         }
 
         EXIT;
 out:
         if (rc) {
-                if (md->oss_capa)
-                        free_capa(md->oss_capa);
-                if (md->mds_capa)
-                        free_capa(md->mds_capa);
+                if (md->oss_capa) {
+                        capa_put(md->oss_capa);
+                        md->oss_capa = NULL;
+                }
+                if (md->mds_capa) {
+                        capa_put(md->mds_capa);
+                        md->mds_capa = NULL;
+                }
 #ifdef CONFIG_FS_POSIX_ACL
                 posix_acl_release(md->posix_acl);
 #endif
@@ -1689,6 +1693,8 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf)
         default:
                 rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars,
                                               lcfg, obd);
+               if (rc > 0)
+                       rc = 0;
                 break;
         }
         return(rc);
@@ -1862,13 +1868,12 @@ struct md_ops mdc_md_ops = {
         .m_set_open_replay_data = mdc_set_open_replay_data,
         .m_clear_open_replay_data = mdc_clear_open_replay_data,
         .m_renew_capa       = mdc_renew_capa,
+        .m_unpack_capa      = mdc_unpack_capa,
         .m_get_remote_perm  = mdc_get_remote_perm,
         .m_intent_getattr_async = mdc_intent_getattr_async,
         .m_revalidate_lock      = mdc_revalidate_lock
 };
 
-extern quota_interface_t mdc_quota_interface;
-
 int __init mdc_init(void)
 {
         int rc;
index f1568ea..bfecc0c 100644 (file)
@@ -1,6 +1,6 @@
 MODULES := mdd
 mdd-objs := mdd_object.o mdd_lov.o mdd_orphans.o mdd_lproc.o mdd_dir.o
-mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o
+mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o mdd_quota.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
 
index b84f3b4..26a905f 100644 (file)
@@ -146,7 +146,7 @@ static int mdd_process_config(const struct lu_env *env,
 
                 lprocfs_mdd_init_vars(&lvars);
                 rc = class_process_proc_param(PARAM_MDD, lvars.obd_vars, cfg,m);
-                if (rc == -ENOSYS)
+                if (rc > 0 || rc == -ENOSYS)
                         /* we don't understand; pass it on */
                         rc = next->ld_ops->ldo_process_config(env, next, cfg);
                 break;
@@ -406,6 +406,25 @@ const struct md_device_operations mdd_ops = {
         .mdo_maxsize_get    = mdd_maxsize_get,
         .mdo_init_capa_ctxt = mdd_init_capa_ctxt,
         .mdo_update_capa_key= mdd_update_capa_key,
+#ifdef HAVE_QUOTA_SUPPORT
+        .mdo_quota          = {
+                .mqo_notify      = mdd_quota_notify,
+                .mqo_setup       = mdd_quota_setup,
+                .mqo_cleanup     = mdd_quota_cleanup,
+                .mqo_recovery    = mdd_quota_recovery,
+                .mqo_check       = mdd_quota_check,
+                .mqo_on          = mdd_quota_on,
+                .mqo_off         = mdd_quota_off,
+                .mqo_setinfo     = mdd_quota_setinfo,
+                .mqo_getinfo     = mdd_quota_getinfo,
+                .mqo_setquota    = mdd_quota_setquota,
+                .mqo_getquota    = mdd_quota_getquota,
+                .mqo_getoinfo    = mdd_quota_getoinfo,
+                .mqo_getoquota   = mdd_quota_getoquota,
+                .mqo_invalidate  = mdd_quota_invalidate,
+                .mqo_finvalidate = mdd_quota_finvalidate
+        }
+#endif
 };
 
 static struct lu_device_type_operations mdd_device_type_ops = {
index 3d96f45..7450c1e 100644 (file)
@@ -514,10 +514,13 @@ static int __mdd_index_insert(const struct lu_env *env, struct mdd_object *pobj,
         ENTRY;
 
         if (dt_try_as_dir(env, next)) {
+                struct md_ucred  *uc = md_ucred(env);
+
                 rc = next->do_index_ops->dio_insert(env, next,
                                                     __mdd_fid_rec(env, lf),
                                                     (const struct dt_key *)name,
-                                                    handle, capa);
+                                                    handle, capa, uc->mu_cap &
+                                                    CFS_CAP_SYS_RESOURCE_MASK);
         } else {
                 rc = -ENOTDIR;
         }
@@ -570,10 +573,13 @@ __mdd_index_insert_only(const struct lu_env *env, struct mdd_object *pobj,
         ENTRY;
 
         if (dt_try_as_dir(env, next)) {
+                struct md_ucred  *uc = md_ucred(env);
+
                 rc = next->do_index_ops->dio_insert(env, next,
                                                     __mdd_fid_rec(env, lf),
                                                     (const struct dt_key *)name,
-                                                    handle, capa);
+                                                    handle, capa, uc->mu_cap &
+                                                    CFS_CAP_SYS_RESOURCE_MASK);
         } else {
                 rc = -ENOTDIR;
         }
@@ -591,13 +597,35 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
         struct mdd_device *mdd = mdo2mdd(src_obj);
         struct dynlock_handle *dlh;
         struct thandle *handle;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, rec_pending = 0;
+#endif
         int rc;
         ENTRY;
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_tobj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        quota_opc = FSFILT_OP_LINK;
+                        mdd_quota_wrapper(la_tmp, qids);
+                        /* get block quota for parent */
+                        lquota_chkquota(mds_quota_interface_ref, obd,
+                                        qids[USRQUOTA], qids[GRPQUOTA], 1,
+                                        &rec_pending, NULL, LQUOTA_FLAGS_BLK);
+                }
+        }
+#endif
+
         mdd_txn_param_build(env, mdd, MDD_TXN_LINK_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         dlh = mdd_pdo_write_lock(env, mdd_tobj, name, MOR_TGT_CHILD);
         if (dlh == NULL)
@@ -632,6 +660,19 @@ out_unlock:
         mdd_pdo_write_unlock(env, mdd_tobj, dlh);
 out_trans:
         mdd_trans_stop(env, mdd, rc, handle);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc) {
+                if (rec_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qids[USRQUOTA], qids[GRPQUOTA],
+                                              1, 1);
+                /* Trigger dqacq for the parent owner. If failed,
+                 * the next call for lquota_chkquota will process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, 0, qids, rc,
+                              quota_opc);
+        }
+#endif
         return rc;
 }
 
@@ -689,6 +730,13 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct dynlock_handle *dlh;
         struct thandle    *handle;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qcids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qpids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0;
+#endif
         int rc, is_dir;
         ENTRY;
 
@@ -738,6 +786,23 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
                 GOTO(cleanup, rc);
 
         rc = mdd_finish_unlink(env, mdd_cobj, ma, handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota && ma->ma_valid & MA_INODE &&
+            ma->ma_attr.la_nlink == 0) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_pobj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        mdd_quota_wrapper(la_tmp, qpids);
+                        if (mdd_cobj->mod_count == 0) {
+                                quota_opc = FSFILT_OP_UNLINK;
+                                mdd_quota_wrapper(&ma->ma_attr, qcids);
+                        } else {
+                                quota_opc = FSFILT_OP_UNLINK_PARTIAL_PARENT;
+                        }
+                }
+        }
+#endif
 
         if (rc == 0)
                 obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp,
@@ -749,6 +814,13 @@ cleanup:
         mdd_pdo_write_unlock(env, mdd_pobj, dlh);
 out_trans:
         mdd_trans_stop(env, mdd, rc, handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc)
+                /* Trigger dqrel on the owner of child and parent. If failed,
+                 * the next call for lquota_chkquota will process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc,
+                              quota_opc);
+#endif
         return rc;
 }
 
@@ -785,13 +857,41 @@ static int mdd_name_insert(const struct lu_env *env,
         struct dynlock_handle *dlh;
         struct thandle *handle;
         int is_dir = S_ISDIR(ma->ma_attr.la_mode);
+#ifdef HAVE_QUOTA_SUPPORT
+        struct md_ucred *uc = md_ucred(env);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, rec_pending = 0;
+        cfs_cap_t save = uc->mu_cap;
+#endif
         int rc;
         ENTRY;
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                if (!(ma->ma_attr_flags & MDS_QUOTA_IGNORE)) {
+                        struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                        rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA);
+                        if (!rc) {
+                                quota_opc = FSFILT_OP_LINK;
+                                mdd_quota_wrapper(la_tmp, qids);
+                                /* get block quota for parent */
+                                lquota_chkquota(mds_quota_interface_ref, obd,
+                                                qids[USRQUOTA], qids[GRPQUOTA],
+                                                1, &rec_pending, NULL,
+                                                LQUOTA_FLAGS_BLK);
+                        }
+                } else {
+                        uc->mu_cap |= CFS_CAP_SYS_RESOURCE_MASK;
+                }
+        }
+#endif
         mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_INSERT_OP);
         handle = mdd_trans_start(env, mdo2mdd(pobj));
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         dlh = mdd_pdo_write_lock(env, mdd_obj, name, MOR_TGT_PARENT);
         if (dlh == NULL)
@@ -823,6 +923,23 @@ out_unlock:
         mdd_pdo_write_unlock(env, mdd_obj, dlh);
 out_trans:
         mdd_trans_stop(env, mdo2mdd(pobj), rc, handle);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                if (quota_opc) {
+                        if (rec_pending)
+                                lquota_pending_commit(mds_quota_interface_ref,
+                                                      obd, qids[USRQUOTA],
+                                                      qids[GRPQUOTA], 1, 1);
+                        /* Trigger dqacq for the parent owner. If failed,
+                         * the next call for lquota_chkquota will process it*/
+                        lquota_adjust(mds_quota_interface_ref, obd, 0, qids,
+                                      rc, quota_opc);
+                } else {
+                        uc->mu_cap = save;
+                }
+        }
+#endif
         return rc;
 }
 
@@ -858,13 +975,30 @@ static int mdd_name_remove(const struct lu_env *env,
         struct dynlock_handle *dlh;
         struct thandle *handle;
         int is_dir = S_ISDIR(ma->ma_attr.la_mode);
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0;
+#endif
         int rc;
         ENTRY;
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        quota_opc = FSFILT_OP_UNLINK_PARTIAL_PARENT;
+                        mdd_quota_wrapper(la_tmp, qids);
+                }
+        }
+#endif
         mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_DELETE_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         dlh = mdd_pdo_write_lock(env, mdd_obj, name, MOR_TGT_PARENT);
         if (dlh == NULL)
@@ -896,6 +1030,14 @@ out_unlock:
         mdd_pdo_write_unlock(env, mdd_obj, dlh);
 out_trans:
         mdd_trans_stop(env, mdd, rc, handle);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        /* Trigger dqrel for the parent owner.
+         * If failed, the next call for lquota_chkquota will process it. */
+        if (quota_opc)
+                lquota_adjust(mds_quota_interface_ref, obd, 0, qids, rc,
+                              quota_opc);
+#endif
         return rc;
 }
 
@@ -939,13 +1081,35 @@ static int mdd_rename_tgt(const struct lu_env *env,
         struct mdd_device *mdd = mdo2mdd(pobj);
         struct dynlock_handle *dlh;
         struct thandle *handle;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qcids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qpids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, rec_pending = 0;
+#endif
         int rc;
         ENTRY;
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota && !tobj) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_tpobj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        quota_opc = FSFILT_OP_LINK;
+                        mdd_quota_wrapper(la_tmp, qpids);
+                        /* get block quota for target parent */
+                        lquota_chkquota(mds_quota_interface_ref, obd,
+                                        qpids[USRQUOTA], qpids[GRPQUOTA], 1,
+                                        &rec_pending, NULL, LQUOTA_FLAGS_BLK);
+                }
+        }
+#endif
         mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_TGT_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         dlh = mdd_pdo_write_lock(env, mdd_tpobj, name, MOR_TGT_PARENT);
         if (dlh == NULL)
@@ -998,6 +1162,14 @@ static int mdd_rename_tgt(const struct lu_env *env,
                 rc = mdd_finish_unlink(env, mdd_tobj, ma, handle);
                 if (rc)
                         GOTO(cleanup, rc);
+
+#ifdef HAVE_QUOTA_SUPPORT
+                if (mds->mds_quota && ma->ma_valid & MA_INODE &&
+                    ma->ma_attr.la_nlink == 0 && mdd_tobj->mod_count == 0) {
+                        quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD;
+                        mdd_quota_wrapper(&ma->ma_attr, qcids);
+                }
+#endif
         }
         EXIT;
 cleanup:
@@ -1006,6 +1178,22 @@ cleanup:
         mdd_pdo_write_unlock(env, mdd_tpobj, dlh);
 out_trans:
         mdd_trans_stop(env, mdd, rc, handle);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                if (rec_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qpids[USRQUOTA],
+                                              qpids[GRPQUOTA],
+                                              1, 1);
+                if (quota_opc)
+                        /* Trigger dqrel/dqacq on the target owner of child and
+                         * parent. If failed, the next call for lquota_chkquota
+                         * will process it. */
+                        lquota_adjust(mds_quota_interface_ref, obd, qcids,
+                                      qpids, rc, quota_opc);
+        }
+#endif
         return rc;
 }
 
@@ -1284,6 +1472,14 @@ static int mdd_create(const struct lu_env *env,
         char                   *name = lname->ln_name;
         int rc, created = 0, initialized = 0, inserted = 0, lmm_size = 0;
         int got_def_acl = 0;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qcids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qpids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, block_count = 0;
+        int inode_pending = 0, block_pending = 0, parent_pending = 0;
+#endif
         ENTRY;
 
         /*
@@ -1327,6 +1523,51 @@ static int mdd_create(const struct lu_env *env,
         if (rc)
                 RETURN(rc);
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_pobj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        int same = 0;
+
+                        quota_opc = FSFILT_OP_CREATE;
+                        mdd_quota_wrapper(&ma->ma_attr, qcids);
+                        mdd_quota_wrapper(la_tmp, qpids);
+                        /* get file quota for child */
+                        lquota_chkquota(mds_quota_interface_ref, obd,
+                                        qcids[USRQUOTA], qcids[GRPQUOTA], 1,
+                                        &inode_pending, NULL, 0);
+                        switch (ma->ma_attr.la_mode & S_IFMT) {
+                        case S_IFLNK:
+                        case S_IFDIR:
+                                block_count = 2;
+                                break;
+                        case S_IFREG:
+                                block_count = 1;
+                                break;
+                        }
+                        if (qcids[USRQUOTA] == qpids[USRQUOTA] &&
+                            qcids[GRPQUOTA] == qpids[GRPQUOTA]) {
+                                block_count += 1;
+                                same = 1;
+                        }
+                        /* get block quota for child and parent */
+                        if (block_count)
+                                lquota_chkquota(mds_quota_interface_ref, obd,
+                                                qcids[USRQUOTA], qcids[GRPQUOTA],
+                                                block_count,
+                                                &block_pending, NULL,
+                                                LQUOTA_FLAGS_BLK);
+                        if (!same)
+                                lquota_chkquota(mds_quota_interface_ref, obd,
+                                                qpids[USRQUOTA], qpids[GRPQUOTA], 1,
+                                                &parent_pending, NULL,
+                                                LQUOTA_FLAGS_BLK);
+                }
+        }
+#endif
+
         /*
          * No RPC inside the transaction, so OST objects should be created at
          * first.
@@ -1335,7 +1576,7 @@ static int mdd_create(const struct lu_env *env,
                 rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size,
                                     spec, attr);
                 if (rc)
-                        RETURN(rc);
+                        GOTO(out_pending, rc);
         }
 
         if (!S_ISLNK(attr->la_mode)) {
@@ -1422,6 +1663,7 @@ static int mdd_create(const struct lu_env *env,
         }
 
         if (S_ISLNK(attr->la_mode)) {
+                struct md_ucred  *uc = md_ucred(env);
                 struct dt_object *dt = mdd_object_child(son);
                 const char *target_name = spec->u.sp_symname;
                 int sym_len = strlen(target_name);
@@ -1430,7 +1672,9 @@ static int mdd_create(const struct lu_env *env,
 
                 buf = mdd_buf_get_const(env, target_name, sym_len);
                 rc = dt->do_body_ops->dbo_write(env, dt, buf, &pos, handle,
-                                                mdd_object_capa(env, son));
+                                                mdd_object_capa(env, son),
+                                                uc->mu_cap &
+                                                CFS_CAP_SYS_RESOURCE_MASK);
 
                 if (rc == sym_len)
                         rc = 0;
@@ -1479,6 +1723,27 @@ out_trans:
 out_free:
         /* finis lov_create stuff, free all temporary data */
         mdd_lov_create_finish(env, mdd, lmm, lmm_size, spec);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc) {
+                if (inode_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qcids[USRQUOTA], qcids[GRPQUOTA],
+                                              1, 0);
+                if (block_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qcids[USRQUOTA], qcids[GRPQUOTA],
+                                              block_count, 1);
+                if (parent_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qpids[USRQUOTA], qpids[GRPQUOTA],
+                                              1, 1);
+                /* Trigger dqacq on the owner of child and parent. If failed,
+                 * the next call for lquota_chkquota will process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc,
+                              quota_opc);
+        }
+#endif
         return rc;
 }
 
@@ -1581,8 +1846,15 @@ static int mdd_rename(const struct lu_env *env,
         struct mdd_object *mdd_tobj = NULL;
         struct dynlock_handle *sdlh, *tdlh;
         struct thandle *handle;
-        int is_dir;
-        int rc;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qspids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qtcids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qtpids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, rec_pending = 0;
+#endif
+        int rc, is_dir;
         ENTRY;
 
         LASSERT(ma->ma_attr.la_mode & S_IFMT);
@@ -1591,10 +1863,34 @@ static int mdd_rename(const struct lu_env *env,
         if (tobj)
                 mdd_tobj = md2mdd_obj(tobj);
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_spobj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        mdd_quota_wrapper(la_tmp, qspids);
+                        if (!tobj) {
+                                rc = mdd_la_get(env, mdd_tpobj, la_tmp,
+                                                BYPASS_CAPA);
+                                if (!rc) {
+                                        quota_opc = FSFILT_OP_LINK;
+                                        mdd_quota_wrapper(la_tmp, qtpids);
+                                        /* get block quota for target parent */
+                                        lquota_chkquota(mds_quota_interface_ref,
+                                                        obd, qtpids[USRQUOTA],
+                                                        qtpids[GRPQUOTA], 1,
+                                                        &rec_pending, NULL,
+                                                        LQUOTA_FLAGS_BLK);
+                                }
+                        }
+                }
+        }
+#endif
         mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         /* FIXME: Should consider tobj and sobj too in rename_lock. */
         rc = mdd_rename_order(env, mdd, mdd_spobj, mdd_tpobj);
@@ -1680,6 +1976,14 @@ static int mdd_rename(const struct lu_env *env,
                 mdd_write_unlock(env, mdd_tobj);
                 if (rc)
                         GOTO(cleanup, rc);
+
+#ifdef HAVE_QUOTA_SUPPORT
+                if (mds->mds_quota && ma->ma_valid & MA_INODE &&
+                    ma->ma_attr.la_nlink == 0 && mdd_tobj->mod_count == 0) {
+                        quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD;
+                        mdd_quota_wrapper(&ma->ma_attr, qtcids);
+                }
+#endif
         }
 
         la->la_valid = LA_CTIME | LA_MTIME;
@@ -1703,6 +2007,27 @@ cleanup_unlocked:
         mdd_trans_stop(env, mdd, rc, handle);
         if (mdd_sobj)
                 mdd_object_put(env, mdd_sobj);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                if (rec_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qtpids[USRQUOTA],
+                                              qtpids[GRPQUOTA],
+                                              1, 1);
+                /* Trigger dqrel on the source owner of parent.
+                 * If failed, the next call for lquota_chkquota will
+                 * process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, 0, qspids, rc,
+                              FSFILT_OP_UNLINK_PARTIAL_PARENT);
+                if (quota_opc)
+                        /* Trigger dqrel/dqacq on the target owner of child and
+                         * parent. If failed, the next call for lquota_chkquota
+                         * will process it. */
+                        lquota_adjust(mds_quota_interface_ref, obd, qtcids,
+                                      qtpids, rc, quota_opc);
+        }
+#endif
         return rc;
 }
 
index a2cdc61..16de10a 100644 (file)
 #include <linux/sched.h>
 #include <linux/capability.h>
 #include <linux/dynlocks.h>
+#ifdef HAVE_QUOTA_SUPPORT
+# include <lustre_quota.h>
+#endif
+#include <lustre_fsfilt.h>
+
+#ifdef HAVE_QUOTA_SUPPORT
+/* quota stuff */
+extern quota_interface_t *mds_quota_interface_ref;
+
+static inline void mdd_quota_wrapper(struct lu_attr *la, unsigned int *qids)
+{
+        qids[0] = la->la_uid;
+        qids[1] = la->la_gid;
+}
+#endif
 
 enum mdd_txn_op {
         MDD_TXN_OBJECT_DESTROY_OP = 0,
@@ -146,6 +161,7 @@ struct mdd_thread_info {
         int                       mti_max_lmm_size;
         struct llog_cookie       *mti_max_cookie;
         int                       mti_max_cookie_size;
+        struct obd_quotactl       mti_oqctl;
 };
 
 struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env,
@@ -293,11 +309,44 @@ int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj);
 extern const struct md_dir_operations    mdd_dir_ops;
 extern const struct md_object_operations mdd_obj_ops;
 
+/* mdd_quota.c*/
+#ifdef HAVE_QUOTA_SUPPORT
+int mdd_quota_notify(const struct lu_env *env, struct md_device *m);
+int mdd_quota_setup(const struct lu_env *env, struct md_device *m,
+                    void *data);
+int mdd_quota_cleanup(const struct lu_env *env, struct md_device *m);
+int mdd_quota_recovery(const struct lu_env *env, struct md_device *m);
+int mdd_quota_check(const struct lu_env *env, struct md_device *m,
+                    struct obd_export *exp, __u32 type);
+int mdd_quota_on(const struct lu_env *env, struct md_device *m,
+                 __u32 type, __u32 id);
+int mdd_quota_off(const struct lu_env *env, struct md_device *m,
+                  __u32 type, __u32 id);
+int mdd_quota_setinfo(const struct lu_env *env, struct md_device *m,
+                      __u32 type, __u32 id, struct obd_dqinfo *dqinfo);
+int mdd_quota_getinfo(const struct lu_env *env, const struct md_device *m,
+                      __u32 type, __u32 id, struct obd_dqinfo *dqinfo);
+int mdd_quota_setquota(const struct lu_env *env, struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqblk *dqblk);
+int mdd_quota_getquota(const struct lu_env *env, const struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqblk *dqblk);
+int mdd_quota_getoinfo(const struct lu_env *env, const struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqinfo *dqinfo);
+int mdd_quota_getoquota(const struct lu_env *env, const struct md_device *m,
+                        __u32 type, __u32 id, struct obd_dqblk *dqblk);
+int mdd_quota_invalidate(const struct lu_env *env, struct md_device *m,
+                         __u32 type);
+int mdd_quota_finvalidate(const struct lu_env *env, struct md_device *m,
+                          __u32 type);
+#endif
+
 /* mdd_trans.c */
 void mdd_txn_param_build(const struct lu_env *env, struct mdd_device *mdd,
                          enum mdd_txn_op);
 int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj,
                             struct md_attr *ma, enum mdd_txn_op);
+int mdd_setattr_txn_param_build(const struct lu_env *env, struct md_object *obj,
+                                struct md_attr *ma, enum mdd_txn_op);
 
 static inline void mdd_object_put(const struct lu_env *env,
                                   struct mdd_object *o)
index 38bc569..5e5bd18 100644 (file)
@@ -145,7 +145,7 @@ int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd,
 
         /*
          * Add here for obd notify mechanism, when adding a new ost, the mds
-         * will notify this mdd.
+         * will notify this mdd. The mds will be used for quota also.
          */
         obd->obd_upcall.onu_upcall = mdd_notify;
         obd->obd_upcall.onu_owner = mdd;
@@ -520,16 +520,11 @@ int mdd_lov_create(const struct lu_env *env, struct mdd_device *mdd,
                 oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER;
                 oinfo->oi_oa = oa;
                 oinfo->oi_md = lsm;
-                oinfo->oi_capa = mdo_capa_get(env, child, NULL,
-                                              CAPA_OPC_MDS_DEFAULT);
+                oinfo->oi_capa = NULL;
                 oinfo->oi_policy.l_extent.start = la->la_size;
                 oinfo->oi_policy.l_extent.end = OBD_OBJECT_EOF;
 
-                if (IS_ERR(oinfo->oi_capa))
-                        oinfo->oi_capa = NULL;
-
                 rc = obd_punch_rqset(lov_exp, oinfo, oti);
-                capa_put(oinfo->oi_capa);
                 if (rc) {
                         CERROR("Error setting attrs for "DFID": rc %d\n",
                                PFID(mdo2fid(child)), rc);
@@ -752,7 +747,6 @@ int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj,
         struct obd_device   *obd = mdd2obd_dev(mdd);
         struct lu_attr      *tmp_la = &mdd_env_info(env)->mti_la;
         const struct lu_fid *fid = mdd_object_fid(obj);
-        struct obd_capa     *oc;
         int rc = 0;
         ENTRY;
 
@@ -762,15 +756,8 @@ int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj,
         if (rc)
                 RETURN(rc);
 
-        oc = mdo_capa_get(env, obj, NULL, CAPA_OPC_MDS_DEFAULT);
-        if (IS_ERR(oc))
-                oc = NULL;
-
         rc = mdd_osc_setattr_async(obd, tmp_la->la_uid, tmp_la->la_gid, lmm,
                                    lmm_size, logcookies, fid_seq(fid),
-                                   fid_oid(fid), oc);
-
-        capa_put(oc);
-
+                                   fid_oid(fid), NULL);
         RETURN(rc);
 }
index 2d8bc67..9178114 100644 (file)
@@ -150,8 +150,29 @@ static int lprocfs_rd_atime_diff(char *page, char **start, off_t off,
         return snprintf(page, count, "%lu\n", mdd->mdd_atime_diff);
 }
 
+#ifdef HAVE_QUOTA_SUPPORT
+static int mdd_lprocfs_quota_rd_type(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data)
+{
+        struct mdd_device *mdd = data;
+        return lprocfs_quota_rd_type(page, start, off, count, eof,
+                                     mdd->mdd_obd_dev);
+}
+
+static int mdd_lprocfs_quota_wr_type(struct file *file, const char *buffer,
+                                     unsigned long count, void *data)
+{
+        struct mdd_device *mdd = data;
+        return lprocfs_quota_wr_type(file, buffer, count, mdd->mdd_obd_dev);
+}
+#endif
+
 static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
         { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
+#ifdef HAVE_QUOTA_SUPPORT
+        { "quota_type",      mdd_lprocfs_quota_rd_type,
+                             mdd_lprocfs_quota_wr_type, 0 },
+#endif
         { 0 }
 };
 
index 5cf0a15..c13cdd6 100644 (file)
@@ -840,9 +840,18 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
         struct llog_cookie *logcookies = NULL;
         int  rc, lmm_size = 0, cookie_size = 0;
         struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qnids[MAXQUOTAS] = { 0, 0 };
+        unsigned int qoids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, block_count = 0;
+        int inode_pending = 0, block_pending = 0;
+#endif
         ENTRY;
 
-        mdd_txn_param_build(env, mdd, MDD_TXN_ATTR_SET_OP);
+        mdd_setattr_txn_param_build(env, obj, (struct md_attr *)ma,
+                                    MDD_TXN_ATTR_SET_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
                 RETURN(PTR_ERR(handle));
@@ -871,6 +880,31 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
         if (rc)
                 GOTO(cleanup, rc);
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota && la_copy->la_valid & (LA_UID | LA_GID)) {
+                struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
+
+                rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA);
+                if (!rc) {
+                        quota_opc = FSFILT_OP_SETATTR;
+                        mdd_quota_wrapper(la_copy, qnids);
+                        mdd_quota_wrapper(la_tmp, qoids);
+                        /* get file quota for new owner */
+                        lquota_chkquota(mds_quota_interface_ref, obd,
+                                        qnids[USRQUOTA], qnids[GRPQUOTA], 1,
+                                        &inode_pending, NULL, 0);
+                        block_count = (la_tmp->la_blocks + 7) >> 3;
+                        if (block_count)
+                                /* get block quota for new owner */
+                                lquota_chkquota(mds_quota_interface_ref, obd,
+                                                qnids[USRQUOTA],
+                                                qnids[GRPQUOTA],
+                                                block_count, &block_pending,
+                                                NULL, LQUOTA_FLAGS_BLK);
+                }
+        }
+#endif
+
         if (la_copy->la_valid & LA_FLAGS) {
                 rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
                                                   handle, 1);
@@ -913,6 +947,23 @@ cleanup:
                 rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size,
                                            logcookies);
         }
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc) {
+                if (inode_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qnids[USRQUOTA], qnids[GRPQUOTA],
+                                              1, 0);
+                if (block_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qnids[USRQUOTA], qnids[GRPQUOTA],
+                                              block_count, 1);
+                /* Trigger dqrel/dqacq for original owner and new owner.
+                 * If failed, the next call for lquota_chkquota will
+                 * process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, qnids, qoids, rc,
+                              quota_opc);
+        }
+#endif
         RETURN(rc);
 }
 
@@ -1020,6 +1071,12 @@ static int mdd_ref_del(const struct lu_env *env, struct md_object *obj,
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct mdd_device *mdd = mdo2mdd(obj);
         struct thandle *handle;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0;
+#endif
         int rc;
         ENTRY;
 
@@ -1062,11 +1119,26 @@ static int mdd_ref_del(const struct lu_env *env, struct md_object *obj,
                 GOTO(cleanup, rc);
 
         rc = mdd_finish_unlink(env, mdd_obj, ma, handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota && ma->ma_valid & MA_INODE &&
+            ma->ma_attr.la_nlink == 0 && mdd_obj->mod_count == 0) {
+                quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD;
+                mdd_quota_wrapper(&ma->ma_attr, qids);
+        }
+#endif
+
 
         EXIT;
 cleanup:
         mdd_write_unlock(env, mdd_obj);
         mdd_trans_stop(env, mdd, rc, handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc)
+                /* Trigger dqrel on the owner of child. If failed,
+                 * the next call for lquota_chkquota will process it */
+                lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc,
+                              quota_opc);
+#endif
         return rc;
 }
 
@@ -1105,13 +1177,45 @@ static int mdd_object_create(const struct lu_env *env,
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         const struct lu_fid *pfid = spec->u.sp_pfid;
         struct thandle *handle;
-        int rc;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0, block_count = 0;
+        int inode_pending = 0, block_pending = 0;
+#endif
+        int rc = 0;
         ENTRY;
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if (mds->mds_quota) {
+                quota_opc = FSFILT_OP_CREATE_PARTIAL_CHILD;
+                mdd_quota_wrapper(&ma->ma_attr, qids);
+                /* get file quota for child */
+                lquota_chkquota(mds_quota_interface_ref, obd, qids[USRQUOTA],
+                                qids[GRPQUOTA], 1, &inode_pending, NULL, 0);
+                switch (ma->ma_attr.la_mode & S_IFMT) {
+                case S_IFLNK:
+                case S_IFDIR:
+                        block_count = 2;
+                        break;
+                case S_IFREG:
+                        block_count = 1;
+                        break;
+                }
+                /* get block quota for child */
+                if (block_count)
+                        lquota_chkquota(mds_quota_interface_ref, obd,
+                                        qids[USRQUOTA], qids[GRPQUOTA],
+                                        block_count, &block_pending, NULL,
+                                        LQUOTA_FLAGS_BLK);
+        }
+#endif
+
         mdd_txn_param_build(env, mdd, MDD_TXN_OBJECT_CREATE_OP);
         handle = mdd_trans_start(env, mdd);
         if (IS_ERR(handle))
-                RETURN(PTR_ERR(handle));
+                GOTO(out_pending, rc = PTR_ERR(handle));
 
         mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
         rc = mdd_oc_sanity_check(env, mdd_obj, ma);
@@ -1167,6 +1271,23 @@ unlock:
         mdd_write_unlock(env, mdd_obj);
 
         mdd_trans_stop(env, mdd, rc, handle);
+out_pending:
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc) {
+                if (inode_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qids[USRQUOTA], qids[GRPQUOTA],
+                                              1, 0);
+                if (block_pending)
+                        lquota_pending_commit(mds_quota_interface_ref, obd,
+                                              qids[USRQUOTA], qids[GRPQUOTA],
+                                              block_count, 1);
+                /* Trigger dqacq on the owner of child. If failed,
+                 * the next call for lquota_chkquota will process it. */
+                lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc,
+                              FSFILT_OP_CREATE_PARTIAL_CHILD);
+        }
+#endif
         return rc;
 }
 
@@ -1336,6 +1457,12 @@ static int mdd_close(const struct lu_env *env, struct md_object *obj,
         int rc;
         struct mdd_object *mdd_obj = md2mdd_obj(obj);
         struct thandle    *handle;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct obd_device *obd = mdo2mdd(obj)->mdd_obd_dev;
+        struct mds_obd *mds = &obd->u.mds;
+        unsigned int qids[MAXQUOTAS] = { 0, 0 };
+        int quota_opc = 0;
+#endif
         ENTRY;
 
         rc = mdd_log_txn_param_build(env, obj, ma, MDD_TXN_UNLINK_OP);
@@ -1350,13 +1477,27 @@ static int mdd_close(const struct lu_env *env, struct md_object *obj,
         mdd_obj->mod_count --;
 
         rc = mdd_iattr_get(env, mdd_obj, ma);
-        if (rc == 0 && mdd_obj->mod_count == 0 && ma->ma_attr.la_nlink == 0)
+        if (rc == 0 && mdd_obj->mod_count == 0 && ma->ma_attr.la_nlink == 0) {
                 rc = mdd_object_kill(env, mdd_obj, ma);
-        else
+#ifdef HAVE_QUOTA_SUPPORT
+                if (mds->mds_quota) {
+                        quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD;
+                        mdd_quota_wrapper(&ma->ma_attr, qids);
+                }
+#endif
+        } else {
                 ma->ma_valid &= ~(MA_LOV | MA_COOKIE);
+        }
 
         mdd_write_unlock(env, mdd_obj);
         mdd_trans_stop(env, mdo2mdd(obj), rc, handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (quota_opc)
+                /* Trigger dqrel on the owner of child. If failed,
+                 * the next call for lquota_chkquota will process it */
+                lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc,
+                              quota_opc);
+#endif
         RETURN(rc);
 }
 
index 24a134e..940a4df 100644 (file)
@@ -89,7 +89,7 @@ static int orph_index_insert(const struct lu_env *env,
 
         rc = dor->do_index_ops->dio_insert(env, dor, (struct dt_rec *)offset,
                                            (struct dt_key *)key, th,
-                                           BYPASS_CAPA);
+                                           BYPASS_CAPA, 1);
         RETURN(rc);
 }
 
index 80e5e83..efbc52a 100644 (file)
@@ -386,7 +386,7 @@ int mdd_capa_get(const struct lu_env *env, struct md_object *obj,
                           capa->lc_opc);
         if (IS_ERR(oc)) {
                 rc = PTR_ERR(oc);
-        } else {
+        } else if (likely(oc != NULL)) {
                 capa_cpy(capa, oc);
                 capa_put(oc);
         }
diff --git a/lustre/mdd/mdd_quota.c b/lustre/mdd/mdd_quota.c
new file mode 100644 (file)
index 0000000..7bc92cc
--- /dev/null
@@ -0,0 +1,276 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mdd/mdd_quota.c
+ *
+ * Lustre Metadata Server (mdd) routines
+ *
+ * Author: Fan Yong <Yong.Fan@Sun.Com>
+ */
+
+#ifdef HAVE_QUOTA_SUPPORT
+
+#include "mdd_internal.h"
+
+int mdd_quota_notify(const struct lu_env *env, struct md_device *m)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        ENTRY;
+
+        lquota_setinfo(mds_quota_interface_ref, obd, (void *)1);
+        RETURN(0);
+}
+
+int mdd_quota_setup(const struct lu_env *env, struct md_device *m,
+                    void *data)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct dt_device *dt = mdd->mdd_child;
+        int rc;
+        ENTRY;
+
+       LASSERT(obd->obd_fsops != NULL);
+        dt->dd_ops->dt_init_quota_ctxt(env, dt, (void *)obd, data);
+        rc = lquota_setup(mds_quota_interface_ref, obd);
+        RETURN(rc);
+}
+
+int mdd_quota_cleanup(const struct lu_env *env, struct md_device *m)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        int rc1, rc2;
+        ENTRY;
+
+        rc1 = lquota_cleanup(mds_quota_interface_ref, obd);
+        rc2 = lquota_fs_cleanup(mds_quota_interface_ref, obd);
+        RETURN(rc1 ? : rc2);
+}
+
+int mdd_quota_recovery(const struct lu_env *env, struct md_device *m)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        int rc;
+        ENTRY;
+
+        rc = lquota_recovery(mds_quota_interface_ref, obd);
+        RETURN(rc);
+}
+
+int mdd_quota_check(const struct lu_env *env, struct md_device *m,
+                    struct obd_export *exp, __u32 type)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_type = type;
+        rc = lquota_check(mds_quota_interface_ref, obd, exp, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_on(const struct lu_env *env, struct md_device *m,
+                 __u32 type, __u32 id)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_QUOTAON;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_off(const struct lu_env *env, struct md_device *m,
+                  __u32 type, __u32 id)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_QUOTAOFF;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_setinfo(const struct lu_env *env, struct md_device *m,
+                      __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_SETINFO;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        oqctl->qc_dqinfo = *dqinfo;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_getinfo(const struct lu_env *env, const struct md_device *m,
+                      __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct mdd_device *mdd = lu2mdd_dev(
+                                 &((struct md_device *)m)->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_GETINFO;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        *dqinfo = oqctl->qc_dqinfo;
+        RETURN(rc);
+}
+
+int mdd_quota_setquota(const struct lu_env *env, struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_SETQUOTA;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        oqctl->qc_dqblk = *dqblk;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_getquota(const struct lu_env *env, const struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct mdd_device *mdd = lu2mdd_dev(
+                                 &((struct md_device *)m)->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_GETQUOTA;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        *dqblk = oqctl->qc_dqblk;
+        RETURN(rc);
+}
+
+int mdd_quota_getoinfo(const struct lu_env *env, const struct md_device *m,
+                       __u32 type, __u32 id, struct obd_dqinfo *dqinfo)
+{
+        struct mdd_device *mdd = lu2mdd_dev(
+                                 &((struct md_device *)m)->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_GETOINFO;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        *dqinfo = oqctl->qc_dqinfo;
+        RETURN(rc);
+}
+
+int mdd_quota_getoquota(const struct lu_env *env, const struct md_device *m,
+                        __u32 type, __u32 id, struct obd_dqblk *dqblk)
+{
+        struct mdd_device *mdd = lu2mdd_dev(
+                                 &((struct md_device *)m)->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = Q_GETOQUOTA;
+        oqctl->qc_type = type;
+        oqctl->qc_id = id;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        *dqblk = oqctl->qc_dqblk;
+        RETURN(rc);
+}
+
+int mdd_quota_invalidate(const struct lu_env *env, struct md_device *m,
+                         __u32 type)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = LUSTRE_Q_INVALIDATE;
+        oqctl->qc_type = type;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+
+int mdd_quota_finvalidate(const struct lu_env *env, struct md_device *m,
+                          __u32 type)
+{
+        struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev);
+        struct obd_device *obd = mdd->mdd_obd_dev;
+        struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl;
+        int rc;
+        ENTRY;
+
+        oqctl->qc_cmd = LUSTRE_Q_FINVALIDATE;
+        oqctl->qc_type = type;
+        rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl);
+        RETURN(rc);
+}
+#endif
index 01ab561..2c0a827 100644 (file)
@@ -135,6 +135,20 @@ int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj,
         RETURN(rc);
 }
 
+int mdd_setattr_txn_param_build(const struct lu_env *env, struct md_object *obj,
+                                struct md_attr *ma, enum mdd_txn_op op)
+{
+        struct mdd_device *mdd = mdo2mdd(&md2mdd_obj(obj)->mod_obj);
+        ENTRY;
+
+        mdd_txn_param_build(env, mdd, op);
+        if (ma->ma_attr.la_valid & (LA_UID | LA_GID))
+                mdd_env_info(env)->mti_param.tp_credits =
+                                        dto_txn_credits[DTO_ATTR_SET_CHOWN];
+
+        RETURN(0);
+}
+
 static void mdd_txn_init_dto_credits(const struct lu_env *env,
                                      struct mdd_device *mdd, int *dto_credits)
 {
@@ -161,16 +175,18 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd)
                 mdd->mdd_tod[op].mod_op = op;
                 switch(op) {
                         case MDD_TXN_OBJECT_DESTROY_OP:
+                                /* Unused now */
                                 *c = dt[DTO_OBJECT_DELETE];
                                 break;
                         case MDD_TXN_OBJECT_CREATE_OP:
-                                /* OI_INSERT + CREATE OBJECT */
+                                /* OI INSERT + CREATE OBJECT */
                                 *c = dt[DTO_INDEX_INSERT] +
-                                        dt[DTO_OBJECT_CREATE];
+                                     dt[DTO_OBJECT_CREATE];
                                 break;
                         case MDD_TXN_ATTR_SET_OP:
                                 /* ATTR set + XATTR(lsm, lmv) set */
-                                *c = dt[DTO_ATTR_SET] + dt[DTO_XATTR_SET];
+                                *c = dt[DTO_ATTR_SET_BASE] +
+                                     dt[DTO_XATTR_SET];
                                 break;
                         case MDD_TXN_XATTR_SET_OP:
                                 *c = dt[DTO_XATTR_SET];
@@ -191,7 +207,7 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd)
                         case MDD_TXN_RENAME_OP:
                                 /* 2 delete index + 1 insert + Unlink log */
                                 *c = 2 * dt[DTO_INDEX_DELETE] +
-                                        dt[DTO_INDEX_INSERT];
+                                         dt[DTO_INDEX_INSERT];
                                 break;
                         case MDD_TXN_RENAME_TGT_OP:
                                 /* index insert + index delete */
@@ -209,7 +225,7 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd)
                                  * CREATE_OBJECT CREDITS
                                  */
                                  *c = 2 * dt[DTO_INDEX_INSERT] +
-                                         dt[DTO_OBJECT_CREATE];
+                                          dt[DTO_OBJECT_CREATE];
                                 break;
                         default:
                                 CERROR("Invalid op %d init its credit\n", op);
index 5bf89a2..c888039 100644 (file)
@@ -60,7 +60,6 @@
 #include <obd_lov.h>
 #include <lustre_fsfilt.h>
 #include <lprocfs_status.h>
-#include <lustre_quota.h>
 #include <lustre_disk.h>
 #include <lustre_param.h>
 
@@ -87,9 +86,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
 
         snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
 
-        CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino/gen %lu/%u, sb %p\n",
-               ino, generation, mds->mds_obt.obt_sb);
-
         /* under ext3 this is neither supposed to return bad inodes
            nor NULL inodes. */
         result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
@@ -253,9 +249,6 @@ int mds_postrecov(struct obd_device *obd)
                    obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK :
                    OBD_NOTIFY_SYNC, NULL);
 
-        /* quota recovery */
-        lquota_recovery(mds_quota_interface_ref, obd);
-
         RETURN(rc);
 }
 
@@ -311,9 +304,6 @@ struct lvfs_callback_ops mds_lvfs_ops = {
         l_fid2dentry:     mds_lvfs_fid2dentry,
 };
 
-quota_interface_t *mds_quota_interface_ref;
-extern quota_interface_t mds_quota_interface;
-
 static void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt)
 {
         struct mds_obd *mds = &obd->u.mds;
@@ -480,9 +470,23 @@ static struct obd_ops mds_cmd_obd_ops = {
         //   .o_health_check    = mds_cmd_health_check,
 };
 
+quota_interface_t *mds_quota_interface_ref;
+extern quota_interface_t mds_quota_interface;
+
 static int __init mds_cmd_init(void)
 {
         struct lprocfs_static_vars lvars;
+        int rc;
+
+        request_module("lquota");
+        mds_quota_interface_ref = PORTAL_SYMBOL_GET(mds_quota_interface);
+        rc = lquota_init(mds_quota_interface_ref);
+        if (rc) {
+                if (mds_quota_interface_ref)
+                        PORTAL_SYMBOL_PUT(mds_quota_interface);
+                return rc;
+        }
+        init_obd_quota_ops(mds_quota_interface_ref, &mds_cmd_obd_ops);
 
         lprocfs_mds_init_vars(&lvars);
         class_register_type(&mds_cmd_obd_ops, NULL, lvars.module_vars,
@@ -493,9 +497,14 @@ static int __init mds_cmd_init(void)
 
 static void /*__exit*/ mds_cmd_exit(void)
 {
+        lquota_exit(mds_quota_interface_ref);
+        if (mds_quota_interface_ref)
+                PORTAL_SYMBOL_PUT(mds_quota_interface);
+
         class_unregister_type(LUSTRE_MDS_NAME);
 }
 
+EXPORT_SYMBOL(mds_quota_interface_ref);
 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
 MODULE_DESCRIPTION("Lustre Metadata Server (MDS)");
 MODULE_LICENSE("GPL");
index f11796d..58aac97 100644 (file)
@@ -130,162 +130,6 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
         return count;
 }
 
-#if 0
-static int lprocfs_wr_group_info(struct file *file, const char *buffer,
-                                 unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-        struct mds_obd *mds = &obd->u.mds;
-        struct mds_grp_downcall_data sparam, *param = &sparam;
-        int size = 0, rc = count;
-
-        if (count < sizeof(param)) {
-                CERROR("%s: invalid data size %lu\n", obd->obd_name, count);
-                return count;
-        }
-
-        if (copy_from_user(param, buffer, sizeof(*param)) ||
-            param->mgd_magic != MDS_GRP_DOWNCALL_MAGIC) {
-                CERROR("%s: MDS group downcall bad params\n", obd->obd_name);
-                return count;
-        }
-
-        if (param->mgd_ngroups > NGROUPS_MAX) {
-                CWARN("%s: uid %u groups %d more than maximum %d\n",
-                      obd->obd_name, param->mgd_uid, param->mgd_ngroups,
-                      NGROUPS_MAX);
-                param->mgd_ngroups = NGROUPS_MAX;
-        }
-
-        if (param->mgd_ngroups > 0) {
-                size = offsetof(struct mds_grp_downcall_data,
-                                mgd_groups[param->mgd_ngroups]);
-                OBD_ALLOC(param, size);
-                if (!param) {
-                        CERROR("%s: fail to alloc %d bytes for uid %u"
-                               " with %d groups\n", obd->obd_name, size,
-                               sparam.mgd_uid, sparam.mgd_ngroups);
-                        param = &sparam;
-                        param->mgd_ngroups = 0;
-                } else if (copy_from_user(param, buffer, size)) {
-                        CERROR("%s: uid %u bad supplementary group data\n",
-                               obd->obd_name, sparam.mgd_uid);
-                        OBD_FREE(param, size);
-                        param = &sparam;
-                        param->mgd_ngroups = 0;
-                }
-        }
-        rc = upcall_cache_downcall(mds->mds_group_hash, param->mgd_err,
-                                   param->mgd_uid, param->mgd_gid,
-                                   param->mgd_ngroups, param->mgd_groups);
-
-        if (param && param != &sparam)
-                OBD_FREE(param, size);
-
-        return rc;
-}
-
-static int lprocfs_rd_group_expire(char *page, char **start, off_t off,
-                                   int count, int *eof, void *data)
-{
-        struct obd_device *obd = data;
-
-        *eof = 1;
-        return snprintf(page, count, "%lu\n",
-                        obd->u.mds.mds_group_hash->uc_entry_expire / HZ);
-}
-
-static int lprocfs_wr_group_expire(struct file *file, const char *buffer,
-                                   unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-        int val, rc;
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        if (val > 5)
-                obd->u.mds.mds_group_hash->uc_entry_expire = val * HZ;
-        else
-                CERROR("invalid expire time %u for group cache\n", val);
-
-        return count;
-}
-
-static int lprocfs_rd_group_acquire_expire(char *page, char **start, off_t off,
-                                           int count, int *eof, void *data)
-{
-        struct obd_device *obd = data;
-
-        *eof = 1;
-        return snprintf(page, count, "%lu\n",
-                        obd->u.mds.mds_group_hash->uc_acquire_expire / HZ);
-}
-
-static int lprocfs_wr_group_acquire_expire(struct file *file,const char *buffer,
-                                           unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-        int val, rc = 0;
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        if (val > 2)
-                obd->u.mds.mds_group_hash->uc_acquire_expire = val * HZ;
-
-        return count;
-}
-
-static int lprocfs_rd_group_upcall(char *page, char **start, off_t off,
-                                   int count, int *eof, void *data)
-{
-        struct obd_device *obd = data;
-
-        *eof = 1;
-        return snprintf(page, count, "%s\n",
-                        obd->u.mds.mds_group_hash->uc_upcall);
-}
-
-static int lprocfs_wr_group_upcall(struct file *file, const char *buffer,
-                                   unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-        struct upcall_cache *hash = obd->u.mds.mds_group_hash;
-        char kernbuf[UC_CACHE_UPCALL_MAXPATH] = { '\0' };
-
-        if (count >= UC_CACHE_UPCALL_MAXPATH) {
-                CERROR("%s: group upcall too long\n", obd->obd_name);
-                return -EINVAL;
-        }
-
-        if (copy_from_user(kernbuf, buffer,
-                           min(count, UC_CACHE_UPCALL_MAXPATH - 1)))
-                return -EFAULT;
-
-        /* Remove any extraneous bits from the upcall (e.g. linefeeds) */
-        sscanf(kernbuf, "%s", hash->uc_upcall);
-
-        if (strcmp(hash->uc_name, obd->obd_name) != 0)
-                CWARN("%s: write to upcall name %s for MDS %s\n",
-                      obd->obd_name, hash->uc_upcall, obd->obd_name);
-        CWARN("%s: group upcall set to %s\n", obd->obd_name, hash->uc_upcall);
-
-        return count;
-}
-
-static int lprocfs_wr_group_flush(struct file *file, const char *buffer,
-                                  unsigned long count, void *data)
-{
-        struct obd_device *obd = data;
-
-        upcall_cache_flush_idle(obd->u.mds.mds_group_hash);
-        return count;
-}
-#endif
-
 static int lprocfs_wr_atime_diff(struct file *file, const char *buffer,
                                  unsigned long count, void *data)
 {
@@ -336,23 +180,6 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "evict_ost_nids",  lprocfs_mds_rd_evictostnids,
                                                lprocfs_mds_wr_evictostnids, 0 },
         { "num_exports",     lprocfs_rd_num_exports, 0, 0 },
-#ifdef HAVE_QUOTA_SUPPORT
-        { "quota_bunit_sz",  lprocfs_rd_bunit, lprocfs_wr_bunit, 0 },
-        { "quota_btune_sz",  lprocfs_rd_btune, lprocfs_wr_btune, 0 },
-        { "quota_iunit_sz",  lprocfs_rd_iunit, lprocfs_wr_iunit, 0 },
-        { "quota_itune_sz",  lprocfs_rd_itune, lprocfs_wr_itune, 0 },
-        { "quota_type",      lprocfs_rd_type, lprocfs_wr_type, 0 },
-#endif
-#if 0
-        { "group_expire_interval", lprocfs_rd_group_expire,
-                             lprocfs_wr_group_expire, 0},
-        { "group_acquire_expire", lprocfs_rd_group_acquire_expire,
-                             lprocfs_wr_group_acquire_expire, 0},
-        { "group_upcall",    lprocfs_rd_group_upcall,
-                             lprocfs_wr_group_upcall, 0},
-        { "group_flush",     0, lprocfs_wr_group_flush, 0},
-        { "group_info",      0, lprocfs_wr_group_info, 0 },
-#endif
         { "atime_diff",      lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
         { 0 }
 };
index cecf56d..a39e495 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/kmod.h>
 #include <linux/version.h>
 #include <linux/sched.h>
-#include <lustre_quota.h>
 #include <linux/mount.h>
 #include <lustre_mds.h>
 #include <obd_class.h>
@@ -157,7 +156,7 @@ out_pop:
 
 int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                    struct obd_export *md_exp)
+                    struct obd_export *md_exp, void *capa)
 {
         struct mds_obd *mds = &exp->exp_obd->u.mds;
         struct inode *parent_inode = mds->mds_objects_dir->d_inode;
index 9cf0e71..c98aefa 100644 (file)
@@ -72,12 +72,10 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md **ea, struct obd_trans_info *oti);
 int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
                     struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                    struct obd_export *md_exp);
+                    struct obd_export *md_exp, void *capa);
 
 /* mds/handler.c */
 extern struct lvfs_callback_ops mds_lvfs_ops;
-/* quota stuff */
-extern quota_interface_t *mds_quota_interface_ref;
 
 /* mds/lproc_mds.c */
 enum {
index 7d0238e..1968b9c 100644 (file)
@@ -478,7 +478,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
         data->ocd_connect_flags = OBD_CONNECT_VERSION   | OBD_CONNECT_INDEX   |
                                   OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 |
                                   OBD_CONNECT_OSS_CAPA  | OBD_CONNECT_FID     |
-                                  OBD_CONNECT_AT;
+                                  OBD_CONNECT_AT | OBD_CONNECT_CHANGE_QS;
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 #endif
@@ -799,7 +799,5 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched,
         rc = mds_lov_start_synchronize(obd, watched, data,
                                        !(ev == OBD_NOTIFY_SYNC));
 
-        lquota_recovery(mds_quota_interface_ref, obd);
-
         RETURN(rc);
 }
index fd12681..50869dc 100644 (file)
@@ -67,6 +67,9 @@
 #include <lustre_mds.h>
 #include <lustre_mdt.h>
 #include "mdt_internal.h"
+#ifdef HAVE_QUOTA_SUPPORT
+# include <lustre_quota.h>
+#endif
 #include <lustre_acl.h>
 #include <lustre_param.h>
 
@@ -309,7 +312,8 @@ static int mdt_getstatus(struct mdt_thread_info *info)
 
         repbody->valid |= OBD_MD_FLID;
 
-        if (mdt->mdt_opts.mo_mds_capa) {
+        if (mdt->mdt_opts.mo_mds_capa &&
+            info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) {
                 struct mdt_object  *root;
                 struct lustre_capa *capa;
 
@@ -320,7 +324,6 @@ static int mdt_getstatus(struct mdt_thread_info *info)
                 capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1);
                 LASSERT(capa);
                 capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
-
                 rc = mo_capa_get(info->mti_env, mdt_object_child(root), capa,
                                  0);
                 mdt_object_put(info->mti_env, root);
@@ -432,7 +435,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
         struct md_object        *next = mdt_object_child(o);
         const struct mdt_body   *reqbody = info->mti_body;
         struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_export_data  *med = &req->rq_export->exp_mdt_data;
         struct md_attr          *ma = &info->mti_attr;
         struct lu_attr          *la = &ma->ma_attr;
         struct req_capsule      *pill = info->mti_pill;
@@ -537,7 +539,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                        repbody->max_cookiesize);
         }
 
-        if (med->med_rmtclient && (reqbody->valid & OBD_MD_FLRMTPERM)) {
+        if (exp_connect_rmtclient(info->mti_exp) &&
+            reqbody->valid & OBD_MD_FLRMTPERM) {
                 void *buf = req_capsule_server_get(pill, &RMF_ACL);
 
                 /* mdt_getattr_lock only */
@@ -579,8 +582,9 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
         }
 #endif
 
-        if ((reqbody->valid & OBD_MD_FLMDSCAPA) &&
-            info->mti_mdt->mdt_opts.mo_mds_capa) {
+        if (reqbody->valid & OBD_MD_FLMDSCAPA &&
+            info->mti_mdt->mdt_opts.mo_mds_capa &&
+            info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) {
                 struct lustre_capa *capa;
 
                 capa = req_capsule_server_get(pill, &RMF_CAPA1);
@@ -596,7 +600,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
 
 static int mdt_renew_capa(struct mdt_thread_info *info)
 {
-        struct mdt_device  *mdt = info->mti_mdt;
         struct mdt_object  *obj = info->mti_object;
         struct mdt_body    *body;
         struct lustre_capa *capa, *c;
@@ -607,7 +610,8 @@ static int mdt_renew_capa(struct mdt_thread_info *info)
          * return directly, client will find body->valid OBD_MD_FLOSSCAPA
          * flag not set.
          */
-        if (!obj || !mdt->mdt_opts.mo_mds_capa)
+        if (!obj || !info->mti_mdt->mdt_opts.mo_oss_capa ||
+            !(info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA))
                 RETURN(0);
 
         body = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
@@ -1116,16 +1120,14 @@ static int mdt_connect(struct mdt_thread_info *info)
         if (rc == 0) {
                 LASSERT(req->rq_export != NULL);
                 info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev);
-                rc = mdt_init_idmap(info);
-                if (rc != 0) {
-                        struct obd_export *exp;
-
-                        exp = req->rq_export;
-                        /* if mdt_init_idmap failed, revocation for connect */
-                        obd_disconnect(class_export_get(exp));
-                }
-        } else
+                rc = mdt_init_sec_level(info);
+                if (rc == 0)
+                        rc = mdt_init_idmap(info);
+                if (rc != 0)
+                        obd_disconnect(class_export_get(req->rq_export));
+        } else {
                 rc = err_serious(rc);
+        }
         return rc;
 }
 
@@ -1262,7 +1264,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page,
                 memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen));
                 lname = mdt_name(info->mti_env, name,
                                  le16_to_cpu(ent->lde_namelen));
-                ma->ma_attr_flags |= MDS_PERM_BYPASS;
+                ma->ma_attr_flags |= (MDS_PERM_BYPASS | MDS_QUOTA_IGNORE);
                 rc = mdo_name_insert(info->mti_env,
                                      md_object_next(&object->mot_obj),
                                      lname, lf, ma);
@@ -1633,15 +1635,134 @@ static int mdt_sync(struct mdt_thread_info *info)
         RETURN(rc);
 }
 
+#ifdef HAVE_QUOTA_SUPPORT
 static int mdt_quotacheck_handle(struct mdt_thread_info *info)
 {
-        return err_serious(-EOPNOTSUPP);
+        struct obd_quotactl *oqctl;
+        struct req_capsule *pill = info->mti_pill;
+        struct obd_export *exp = info->mti_exp;
+        struct md_device *next = info->mti_mdt->mdt_child;
+        int rc;
+        ENTRY;
+
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_QUOTACHECK_NET))
+                RETURN(0);
+
+        oqctl = req_capsule_client_get(pill, &RMF_OBD_QUOTACTL);
+        if (oqctl == NULL)
+                RETURN(-EPROTO);
+
+        /* remote client has no permission for quotacheck */
+        if (unlikely(exp_connect_rmtclient(exp)))
+                RETURN(-EPERM);
+
+        rc = req_capsule_server_pack(pill);
+        if (rc)
+                RETURN(rc);
+
+        rc = next->md_ops->mdo_quota.mqo_check(info->mti_env, next, exp,
+                                               oqctl->qc_type);
+        RETURN(rc);
 }
 
 static int mdt_quotactl_handle(struct mdt_thread_info *info)
 {
-        return err_serious(-EOPNOTSUPP);
+        struct obd_quotactl *oqctl, *repoqc;
+        struct req_capsule *pill = info->mti_pill;
+        struct obd_export *exp = info->mti_exp;
+        struct md_device *next = info->mti_mdt->mdt_child;
+        const struct md_quota_operations *mqo = &next->md_ops->mdo_quota;
+        int id, rc;
+        ENTRY;
+
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_QUOTACTL_NET))
+                RETURN(0);
+
+        oqctl = req_capsule_client_get(pill, &RMF_OBD_QUOTACTL);
+        if (oqctl == NULL)
+                RETURN(-EPROTO);
+
+        id = oqctl->qc_id;
+        if (exp_connect_rmtclient(exp)) {
+                struct ptlrpc_request *req = mdt_info_req(info);
+                struct mdt_export_data *med = mdt_req2med(req);
+                struct lustre_idmap_table *idmap = med->med_idmap;
+
+                if (unlikely(oqctl->qc_cmd != Q_GETQUOTA &&
+                             oqctl->qc_cmd != Q_GETINFO))
+                        RETURN(-EPERM);
+
+
+                if (oqctl->qc_type == USRQUOTA)
+                        id = lustre_idmap_lookup_uid(NULL, idmap, 0,
+                                                     oqctl->qc_id);
+                else if (oqctl->qc_type == GRPQUOTA)
+                        id = lustre_idmap_lookup_gid(NULL, idmap, 0,
+                                                     oqctl->qc_id);
+                else
+                        RETURN(-EINVAL);
+
+                if (id == CFS_IDMAP_NOTFOUND) {
+                        CDEBUG(D_QUOTA, "no mapping for id %u\n",
+                               oqctl->qc_id);
+                        RETURN(-EACCES);
+                }
+        }
+
+        rc = req_capsule_server_pack(pill);
+        if (rc)
+                RETURN(rc);
+
+        repoqc = req_capsule_server_get(pill, &RMF_OBD_QUOTACTL);
+        LASSERT(repoqc != NULL);
+
+        switch (oqctl->qc_cmd) {
+        case Q_QUOTAON:
+                rc = mqo->mqo_on(info->mti_env, next, oqctl->qc_type, id);
+                break;
+        case Q_QUOTAOFF:
+                rc = mqo->mqo_off(info->mti_env, next, oqctl->qc_type, id);
+                break;
+        case Q_SETINFO:
+                rc = mqo->mqo_setinfo(info->mti_env, next, oqctl->qc_type, id,
+                                      &oqctl->qc_dqinfo);
+                break;
+        case Q_GETINFO:
+                rc = mqo->mqo_getinfo(info->mti_env, next, oqctl->qc_type, id,
+                                      &oqctl->qc_dqinfo);
+                break;
+        case Q_SETQUOTA:
+                rc = mqo->mqo_setquota(info->mti_env, next, oqctl->qc_type, id,
+                                       &oqctl->qc_dqblk);
+                break;
+        case Q_GETQUOTA:
+                rc = mqo->mqo_getquota(info->mti_env, next, oqctl->qc_type, id,
+                                       &oqctl->qc_dqblk);
+                break;
+        case Q_GETOINFO:
+                rc = mqo->mqo_getoinfo(info->mti_env, next, oqctl->qc_type, id,
+                                       &oqctl->qc_dqinfo);
+                break;
+        case Q_GETOQUOTA:
+                rc = mqo->mqo_getoquota(info->mti_env, next, oqctl->qc_type, id,
+                                        &oqctl->qc_dqblk);
+                break;
+        case LUSTRE_Q_INVALIDATE:
+                rc = mqo->mqo_invalidate(info->mti_env, next, oqctl->qc_type);
+                break;
+        case LUSTRE_Q_FINVALIDATE:
+                rc = mqo->mqo_finvalidate(info->mti_env, next, oqctl->qc_type);
+                break;
+        default:
+                CERROR("unsupported mdt_quotactl command: %d\n",
+                       oqctl->qc_cmd);
+                RETURN(-EFAULT);
+        }
+
+        *repoqc = *oqctl;
+        RETURN(rc);
 }
+#endif
 
 /*
  * OBD PING and other handlers.
@@ -2381,6 +2502,15 @@ static void mdt_thread_info_init(struct ptlrpc_request *req,
         info->mti_env = req->rq_svc_thread->t_env;
         ci = md_capainfo(info->mti_env);
         memset(ci, 0, sizeof *ci);
+        if (req->rq_export) {
+                if (exp_connect_rmtclient(req->rq_export))
+                        ci->mc_auth = LC_ID_CONVERT;
+                else if (req->rq_export->exp_connect_flags &
+                         OBD_CONNECT_MDS_CAPA)
+                        ci->mc_auth = LC_ID_PLAIN;
+                else
+                        ci->mc_auth = LC_ID_NONE;
+        }
 
         info->mti_fail_id = OBD_FAIL_MDS_ALL_REPLY_NET;
         info->mti_transno = lustre_msg_get_transno(req->rq_reqmsg);
@@ -3803,7 +3933,7 @@ err_mdt_svc:
 static void mdt_stack_fini(const struct lu_env *env,
                            struct mdt_device *m, struct lu_device *top)
 {
-        struct obd_device       *obd = m->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device       *obd = mdt2obd_dev(m);
         struct lustre_cfg_bufs  *bufs;
         struct lustre_cfg       *lcfg;
         struct mdt_thread_info  *info;
@@ -3951,7 +4081,7 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
         struct md_device *next = m->mdt_child;
         struct lu_device *d    = &m->mdt_md_dev.md_lu_dev;
         struct lu_site   *ls   = d->ld_site;
-        struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device *obd = mdt2obd_dev(m);
         ENTRY;
 
         /* At this point, obd exports might still be on the "obd_zombie_exports"
@@ -3972,8 +4102,10 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
         target_recovery_fini(obd);
         mdt_stop_ptlrpc_service(m);
         obd_zombie_barrier();
+#ifdef HAVE_QUOTA_SUPPORT
+        next->md_ops->mdo_quota.mqo_cleanup(env, next);
+#endif
         mdt_fs_cleanup(env, m);
-
         upcall_cache_cleanup(m->mdt_identity_cache);
         m->mdt_identity_cache = NULL;
 
@@ -4018,6 +4150,8 @@ static void fsoptions_to_mdt_flags(struct mdt_device *m, char *options)
 {
         char *p = options;
 
+        m->mdt_opts.mo_mds_capa = 1;
+        m->mdt_opts.mo_oss_capa = 1;
 #ifdef CONFIG_FS_POSIX_ACL
         /* ACLs should be enabled by default (b=13829) */
         m->mdt_opts.mo_acl = 1;
@@ -4065,11 +4199,14 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         struct obd_device         *obd;
         const char                *dev = lustre_cfg_string(cfg, 0);
         const char                *num = lustre_cfg_string(cfg, 2);
-        struct lustre_mount_info  *lmi;
+        struct lustre_mount_info  *lmi = NULL;
         struct lustre_sb_info     *lsi;
         struct lu_site            *s;
         struct md_site            *mite;
         const char                *identity_upcall = "NONE";
+#ifdef HAVE_QUOTA_SUPPORT
+        struct md_device          *next;
+#endif
         int                        rc;
         ENTRY;
 
@@ -4107,7 +4244,6 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         } else {
                 lsi = s2lsi(lmi->lmi_sb);
                 fsoptions_to_mdt_flags(m, lsi->lsi_lmd->lmd_opts);
-                server_put_mount_2(dev, lmi->lmi_mnt);
         }
 
         rwlock_init(&m->mdt_sptlrpc_lock);
@@ -4123,7 +4259,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
 
         OBD_ALLOC_PTR(mite);
         if (mite == NULL)
-                RETURN(-ENOMEM);
+                GOTO(err_lmi, rc = -ENOMEM);
 
         s = &mite->ms_lu;
 
@@ -4229,11 +4365,21 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         if (rc)
                 GOTO(err_capa, rc);
 
+#ifdef HAVE_QUOTA_SUPPORT
+        next = m->mdt_child;
+        rc = next->md_ops->mdo_quota.mqo_setup(env, next, lmi->lmi_mnt);
+        if (rc)
+                GOTO(err_fs_cleanup, rc);
+#endif
+
+        server_put_mount_2(dev, lmi->lmi_mnt);
+        lmi = NULL;
+
         target_recovery_init(obd, mdt_recovery_handle);
 
         rc = mdt_start_ptlrpc_service(m);
         if (rc)
-                GOTO(err_fs_cleanup, rc);
+                GOTO(err_recovery, rc);
 
         ping_evictor_start();
 
@@ -4257,8 +4403,12 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
 err_stop_service:
         ping_evictor_stop();
         mdt_stop_ptlrpc_service(m);
-err_fs_cleanup:
+err_recovery:
         target_recovery_fini(obd);
+#ifdef HAVE_QUOTA_SUPPORT
+        next->md_ops->mdo_quota.mqo_cleanup(env, next);
+err_fs_cleanup:
+#endif
         mdt_fs_cleanup(env, m);
 err_capa:
         cfs_timer_disarm(&m->mdt_ck_timer);
@@ -4284,6 +4434,9 @@ err_fini_site:
         lu_site_fini(s);
 err_free_site:
         OBD_FREE_PTR(mite);
+err_lmi:
+        if (lmi) 
+                server_put_mount_2(dev, lmi->lmi_mnt);
         return (rc);
 }
 
@@ -4333,7 +4486,7 @@ static int mdt_process_config(const struct lu_env *env,
                 lprocfs_mdt_init_vars(&lvars);
                 rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars,
                                               cfg, obd);
-                if (rc == -ENOSYS)
+                if (rc > 0 || rc == -ENOSYS)
                         /* we don't understand; pass it on */
                         rc = next->ld_ops->ldo_process_config(env, next, cfg);
                 break;
@@ -4434,8 +4587,6 @@ static int mdt_connect_internal(struct obd_export *exp,
                                 struct mdt_device *mdt,
                                 struct obd_connect_data *data)
 {
-        __u64 flags;
-
         if (data != NULL) {
                 data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED;
                 data->ocd_ibits_known &= MDS_INODELOCK_FULL;
@@ -4453,12 +4604,6 @@ static int mdt_connect_internal(struct obd_export *exp,
                 if (!mdt->mdt_opts.mo_user_xattr)
                         data->ocd_connect_flags &= ~OBD_CONNECT_XATTR;
 
-                if (!mdt->mdt_opts.mo_mds_capa)
-                        data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA;
-
-                if (!mdt->mdt_opts.mo_oss_capa)
-                        data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
-
                 spin_lock(&exp->exp_lock);
                 exp->exp_connect_flags = data->ocd_connect_flags;
                 spin_unlock(&exp->exp_lock);
@@ -4475,28 +4620,6 @@ static int mdt_connect_internal(struct obd_export *exp,
         }
 #endif
 
-        flags = OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT;
-        if ((exp->exp_connect_flags & flags) == flags) {
-                CWARN("%s: both local and remote client flags are set\n",
-                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
-                return -EBADE;
-        }
-
-        if (mdt->mdt_opts.mo_mds_capa &&
-            ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) {
-                CWARN("%s: MDS requires capability support, but client not\n",
-                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
-                return -EBADE;
-        }
-
-        if (mdt->mdt_opts.mo_oss_capa &&
-            ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) {
-                CWARN("%s: MDS requires OSS capability support, "
-                      "but client not\n",
-                      mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
-                return -EBADE;
-        }
-
         if ((exp->exp_connect_flags & OBD_CONNECT_FID) == 0) {
                 CWARN("%s: MDS requires FID support, but client not\n",
                       mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name);
@@ -4707,7 +4830,7 @@ static int mdt_destroy_export(struct obd_export *export)
         ENTRY;
 
         med = &export->exp_mdt_data;
-        if (med->med_rmtclient)
+        if (exp_connect_rmtclient(export))
                 mdt_cleanup_idmap(med);
 
         target_destroy_export(export);
@@ -4814,6 +4937,10 @@ static int mdt_upcall(const struct lu_env *env, struct md_device *md,
                         CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n",
                                      m->mdt_max_mdsize, m->mdt_max_cookiesize);
                         mdt_allow_cli(m, CONFIG_SYNC);
+#ifdef HAVE_QUOTA_SUPPORT
+                        if (md->md_lu_dev.ld_obd->obd_recovering == 0)
+                                next->md_ops->mdo_quota.mqo_recovery(env, next);
+#endif
                         break;
                 case MD_NO_TRANS:
                         mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
@@ -4836,11 +4963,21 @@ static int mdt_obd_notify(struct obd_device *host,
                           struct obd_device *watched,
                           enum obd_notify_event ev, void *data)
 {
+        struct mdt_device *mdt = mdt_dev(host->obd_lu_dev);
+#ifdef HAVE_QUOTA_SUPPORT
+        struct md_device *next = mdt->mdt_child;
+#endif
         ENTRY;
 
         switch (ev) {
         case OBD_NOTIFY_CONFIG:
-                mdt_allow_cli(mdt_dev(host->obd_lu_dev), (unsigned long)data);
+                mdt_allow_cli(mdt, (unsigned long)data);
+
+#ifdef HAVE_QUOTA_SUPPORT
+               /* quota_type has been processed, we can now handle
+                * incoming quota requests */
+                next->md_ops->mdo_quota.mqo_notify(NULL, next);
+#endif
                 break;
         default:
                 CDEBUG(D_INFO, "Unhandled notification %#x\n", ev);
@@ -4888,7 +5025,10 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
 {
         struct lu_device *ld = md2lu_dev(mdt->mdt_child);
-        struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device *obd = mdt2obd_dev(mdt);
+#ifdef HAVE_QUOTA_SUPPORT
+        struct md_device *next = mdt->mdt_child;
+#endif
         int rc, lost;
         ENTRY;
         /* if some clients didn't participate in recovery then we can possibly
@@ -4897,6 +5037,9 @@ int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
         mdt_seq_adjust(env, mdt, lost);
 
         rc = ld->ld_ops->ldo_recovery_complete(env, ld);
+#ifdef HAVE_QUOTA_SUPPORT
+        next->md_ops->mdo_quota.mqo_recovery(env, next);
+#endif
         RETURN(rc);
 }
 
@@ -5118,8 +5261,10 @@ DEF_MDT_HNDL_F(HABEO_CORPUS,              DONE_WRITING, mdt_done_writing),
 DEF_MDT_HNDL_F(0           |HABEO_REFERO, PIN,          mdt_pin),
 DEF_MDT_HNDL_0(0,                         SYNC,         mdt_sync),
 DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR,    mdt_is_subdir),
+#ifdef HAVE_QUOTA_SUPPORT
 DEF_MDT_HNDL_F(0,                         QUOTACHECK,   mdt_quotacheck_handle),
 DEF_MDT_HNDL_F(0,                         QUOTACTL,     mdt_quotactl_handle)
+#endif
 };
 
 #define DEF_OBD_HNDL(flags, name, fn)                   \
index 3243e65..21cd41b 100644 (file)
@@ -285,10 +285,8 @@ __u32 mdt_identity_get_perm(struct md_identity *identity,
 int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o,
                          void *buf)
 {
-        struct ptlrpc_request   *req = mdt_info_req(info);
         struct md_ucred         *uc = mdt_ucred(info);
         struct md_object        *next = mdt_object_child(o);
-        struct mdt_export_data  *med = mdt_req2med(req);
         struct mdt_remote_perm  *perm = buf;
 
         ENTRY;
@@ -296,7 +294,7 @@ int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o,
         /* remote client request always pack ptlrpc_user_desc! */
         LASSERT(perm);
 
-        if (!med->med_rmtclient)
+        if (!exp_connect_rmtclient(info->mti_exp))
                 RETURN(-EBADE);
 
         if ((uc->mu_valid != UCRED_OLD) && (uc->mu_valid != UCRED_NEW))
index 08f38c0..162a02f 100644 (file)
 
 #include "mdt_internal.h"
 
-int mdt_init_idmap(struct mdt_thread_info *info)
+#define mdt_init_sec_none(reply, exp)                                   \
+do {                                                                    \
+        reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT |          \
+                                      OBD_CONNECT_RMT_CLIENT_FORCE |    \
+                                      OBD_CONNECT_MDS_CAPA |            \
+                                      OBD_CONNECT_OSS_CAPA);            \
+        spin_lock(&exp->exp_lock);                                      \
+        exp->exp_connect_flags = reply->ocd_connect_flags;              \
+        spin_unlock(&exp->exp_lock);                                    \
+} while (0)
+
+int mdt_init_sec_level(struct mdt_thread_info *info)
 {
+        struct mdt_device *mdt = info->mti_mdt;
         struct ptlrpc_request *req = mdt_info_req(info);
         char *client = libcfs_nid2str(req->rq_peer.nid);
-        struct mdt_export_data *med = mdt_req2med(req);
-        struct obd_device *obd = req->rq_export->exp_obd;
+        struct obd_export *exp = req->rq_export;
+        struct obd_device *obd = exp->exp_obd;
         struct obd_connect_data *data, *reply;
         int rc = 0, remote;
         ENTRY;
@@ -91,26 +103,116 @@ int mdt_init_idmap(struct mdt_thread_info *info)
         if (data == NULL || reply == NULL)
                 RETURN(-EFAULT);
 
-        if (!req->rq_auth_gss || req->rq_auth_usr_mdt) {
-                med->med_rmtclient = 0;
-                reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT;
+        /* connection from MDT is always trusted */
+        if (req->rq_auth_usr_mdt) {
+                mdt_init_sec_none(reply, exp);
                 RETURN(0);
         }
 
-        remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT;
+        /* no GSS support case */
+        if (!req->rq_auth_gss) {
+                if (mdt->mdt_sec_level > LUSTRE_SEC_NONE) {
+                        CWARN("client %s -> target %s does not user GSS, "
+                              "can not run under security level %d.\n",
+                              client, obd->obd_name, mdt->mdt_sec_level);
+                        RETURN(-EACCES);
+                } else {
+                        mdt_init_sec_none(reply, exp);
+                        RETURN(0);
+                }
+        }
+
+        /* old version case */
+        if (unlikely(!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) ||
+                     !(data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) ||
+                     !(data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA))) {
+                if (mdt->mdt_sec_level > LUSTRE_SEC_NONE) {
+                        CWARN("client %s -> target %s uses old version, "
+                              "can not run under security level %d.\n",
+                              client, obd->obd_name, mdt->mdt_sec_level);
+                        RETURN(-EACCES);
+                } else {
+                        CWARN("client %s -> target %s uses old version, "
+                              "run under security level %d.\n",
+                              client, obd->obd_name, mdt->mdt_sec_level);
+                        mdt_init_sec_none(reply, exp);
+                        RETURN(0);
+                }
+        }
 
+        remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT_FORCE;
         if (remote) {
-                med->med_rmtclient = 1;
                 if (!req->rq_auth_remote)
                         CDEBUG(D_SEC, "client (local realm) %s -> target %s "
-                               "asked to be remote!\n", client, obd->obd_name);
+                               "asked to be remote.\n", client, obd->obd_name);
         } else if (req->rq_auth_remote) {
-                med->med_rmtclient = 1;
-                CDEBUG(D_SEC, "client (remote realm) %s -> target %s forced "
-                       "to be remote!\n", client, obd->obd_name);
+                remote = 1;
+                CDEBUG(D_SEC, "client (remote realm) %s -> target %s is set "
+                       "as remote by default.\n", client, obd->obd_name);
+        }
+
+        if (remote) {
+                if (!mdt->mdt_opts.mo_oss_capa) {
+                        CDEBUG(D_SEC, "client %s -> target %s is set as remote,"
+                               " but OSS capabilities are not enabled: %d.\n",
+                               client, obd->obd_name, mdt->mdt_opts.mo_oss_capa);
+                        RETURN(-EACCES);
+                }
+        } else {
+                if (req->rq_auth_uid == INVALID_UID) {
+                        CDEBUG(D_SEC, "client %s -> target %s: user is not "
+                               "authenticated!\n", client, obd->obd_name);
+                        RETURN(-EACCES);
+                }
         }
 
-        if (med->med_rmtclient) {
+        switch (mdt->mdt_sec_level) {
+        case LUSTRE_SEC_NONE:
+                if (!remote) {
+                        mdt_init_sec_none(reply, exp);
+                        break;
+                } else {
+                        CDEBUG(D_SEC, "client %s -> target %s is set as remote, "
+                               "can not run under security level %d.\n",
+                               client, obd->obd_name, mdt->mdt_sec_level);
+                        RETURN(-EACCES);
+                }
+        case LUSTRE_SEC_REMOTE:
+                if (!remote)
+                        mdt_init_sec_none(reply, exp);
+                break;
+        case LUSTRE_SEC_ALL:
+                if (!remote) {
+                        reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT |
+                                                      OBD_CONNECT_RMT_CLIENT_FORCE);
+                        if (!mdt->mdt_opts.mo_mds_capa)
+                                reply->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA;
+                        if (!mdt->mdt_opts.mo_oss_capa)
+                                reply->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
+
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_connect_flags = reply->ocd_connect_flags;
+                        spin_unlock(&exp->exp_lock);
+                }
+                break;
+        default:
+                RETURN(-EINVAL);
+        }
+
+        RETURN(rc);
+}
+
+int mdt_init_idmap(struct mdt_thread_info *info)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        struct mdt_export_data *med = mdt_req2med(req);
+        struct obd_export *exp = req->rq_export;
+        char *client = libcfs_nid2str(req->rq_peer.nid);
+        struct obd_device *obd = exp->exp_obd;
+        int rc = 0;
+        ENTRY;
+
+        if (exp_connect_rmtclient(exp)) {
                 down(&med->med_idmap_sem);
                 if (!med->med_idmap)
                         med->med_idmap = lustre_idmap_init();
@@ -131,28 +233,16 @@ int mdt_init_idmap(struct mdt_thread_info *info)
                         RETURN(-ENOMEM);
                 }
 
-                reply->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT;
                 CDEBUG(D_SEC, "client %s -> target %s is remote.\n",
                        client, obd->obd_name);
-
                 /* NB, MDS_CONNECT establish root idmap too! */
                 rc = mdt_handle_idmap(info);
-        } else {
-                if (req->rq_auth_uid == INVALID_UID) {
-                        CDEBUG(D_SEC, "client %s -> target %s: user is not "
-                               "authenticated!\n", client, obd->obd_name);
-                        RETURN(-EACCES);
-                }
-                reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT;
         }
-
         RETURN(rc);
 }
 
 void mdt_cleanup_idmap(struct mdt_export_data *med)
 {
-        LASSERT(med->med_rmtclient);
-
         down(&med->med_idmap_sem);
         if (med->med_idmap != NULL) {
                 lustre_idmap_fini(med->med_idmap);
@@ -185,7 +275,7 @@ int mdt_handle_idmap(struct mdt_thread_info *info)
                 RETURN(0);
 
         med = mdt_req2med(req);
-        if (!med->med_rmtclient)
+        if (!exp_connect_rmtclient(info->mti_exp))
                 RETURN(0);
 
         opc = lustre_msg_get_opc(req->rq_reqmsg);
@@ -262,7 +352,7 @@ int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *req,
         gid_t gid, fsgid;
 
         /* Only remote client need desc_to_idmap. */
-        if (!med->med_rmtclient)
+        if (!exp_connect_rmtclient(req->rq_export))
                 return 0;
 
         uid = lustre_idmap_lookup_uid(NULL, idmap, 0, pud->pud_uid);
@@ -317,7 +407,7 @@ void mdt_body_reverse_idmap(struct mdt_thread_info *info, struct mdt_body *body)
         struct mdt_export_data    *med = mdt_req2med(req);
         struct lustre_idmap_table *idmap = med->med_idmap;
 
-        if (!med->med_rmtclient)
+        if (!exp_connect_rmtclient(info->mti_exp))
                 return;
 
         if (body->valid & OBD_MD_FLUID) {
@@ -366,7 +456,7 @@ int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op)
                  * done in cmm/mdd layer, here set all cases as uc->mu_fsgid. */
                 if ((attr->la_valid & LA_GID) && (attr->la_gid != -1))
                         attr->la_gid = uc->mu_fsgid;
-        } else if (med->med_rmtclient) {
+        } else if (exp_connect_rmtclient(info->mti_exp)) {
                 /* NB: -1 case will be handled by mdt_fix_attr() later. */
                 if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) {
                         uid_t uid = lustre_idmap_lookup_uid(uc, idmap, 0,
index 4251858..b5aaecb 100644 (file)
@@ -71,6 +71,7 @@
 #include <lvfs.h>
 #include <lustre_idmap.h>
 #include <lustre_eacl.h>
+#include <lustre_fsfilt.h>
 
 static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
 {
@@ -176,6 +177,7 @@ struct mdt_device {
 
         cfs_proc_dir_entry_t      *mdt_proc_entry;
         struct lprocfs_stats      *mdt_stats;
+        int                        mdt_sec_level;
 };
 
 #define MDT_SERVICE_WATCHDOG_FACTOR     (2000)
@@ -312,7 +314,7 @@ struct mdt_thread_info {
 
         /*
          * XXX: Part Three:
-         * The following members will be filled explictly
+         * The following members will be filled explicitly
          * with zero in mdt_reint_unpack(), because they are only used
          * by reint requests (including mdt_reint_open()).
          */
@@ -369,6 +371,7 @@ struct mdt_thread_info {
 
         /* Ops object filename */
         struct lu_name             mti_name;
+        struct md_attr             mti_tmp_attr;
 };
 
 typedef void (*mdt_cb_t)(const struct mdt_device *mdt, __u64 transno,
@@ -599,21 +602,16 @@ int mdt_init_ucred_reint(struct mdt_thread_info *);
 void mdt_exit_ucred(struct mdt_thread_info *);
 
 /* mdt_idmap.c */
+int mdt_init_sec_level(struct mdt_thread_info *);
 int mdt_init_idmap(struct mdt_thread_info *);
-
 void mdt_cleanup_idmap(struct mdt_export_data *);
-
 int mdt_handle_idmap(struct mdt_thread_info *);
-
 int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *,
                               struct ptlrpc_user_desc *);
-
 void mdt_body_reverse_idmap(struct mdt_thread_info *,
                             struct mdt_body *);
-
 int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *,
                                   struct mdt_remote_perm *);
-
 int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32);
 
 static inline struct mdt_device *mdt_dev(struct lu_device *d)
@@ -778,11 +776,11 @@ static inline void mdt_set_capainfo(struct mdt_thread_info *info, int offset,
                                     const struct lu_fid *fid,
                                     struct lustre_capa *capa)
 {
-        struct mdt_device *dev = info->mti_mdt;
         struct md_capainfo *ci;
 
         LASSERT(offset >= 0 && offset <= MD_CAPAINFO_MAX);
-        if (!dev->mdt_opts.mo_mds_capa)
+        if (!info->mti_mdt->mdt_opts.mo_mds_capa ||
+            !(info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA))
                 return;
 
         ci = md_capainfo(info->mti_env);
@@ -815,5 +813,9 @@ static inline void mdt_dump_capainfo(struct mdt_thread_info *info)
         }
 }
 
+static inline struct obd_device *mdt2obd_dev(const struct mdt_device *mdt)
+{
+        return mdt->mdt_md_dev.md_lu_dev.ld_obd;
+}
 #endif /* __KERNEL__ */
 #endif /* _MDT_H */
index c2abdf1..d3bbed9 100644 (file)
@@ -92,12 +92,12 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
                           void *buf)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_export_data  *med = mdt_req2med(req);
         struct mdt_device       *mdt = info->mti_mdt;
         struct ptlrpc_user_desc *pud = req->rq_user_desc;
         struct md_ucred         *ucred = mdt_ucred(info);
         lnet_nid_t               peernid = req->rq_peer.nid;
         __u32                    perm = 0;
+        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
         int                      setuid;
         int                      setgid;
         int                      rc = 0;
@@ -123,7 +123,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
         }
 
         /* sanity check: we expect the uid which client claimed is true */
-        if (med->med_rmtclient) {
+        if (remote) {
                 if (req->rq_auth_mapped_uid == INVALID_UID) {
                         CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
                         RETURN(-EACCES);
@@ -153,7 +153,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
         }
 
         if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (med->med_rmtclient) {
+                if (remote) {
                         CDEBUG(D_SEC, "remote client must run with identity_get "
                                "enabled!\n");
                         RETURN(-EACCES);
@@ -169,7 +169,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
                                             pud->pud_uid);
                 if (IS_ERR(identity)) {
                         if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                                     !med->med_rmtclient)) {
+                                     !remote)) {
                                 ucred->mu_identity = NULL;
                                 perm = CFS_SETUID_PERM | CFS_SETGID_PERM |
                                        CFS_SETGRP_PERM;
@@ -181,8 +181,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
                 } else {
                         ucred->mu_identity = identity;
                         perm = mdt_identity_get_perm(ucred->mu_identity,
-                                                     med->med_rmtclient,
-                                                     peernid);
+                                                     remote, peernid);
                 }
         }
 
@@ -211,7 +210,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
         /*
          * NB: remote client not allowed to setgroups anyway.
          */
-        if (!med->med_rmtclient && perm & CFS_SETGRP_PERM) {
+        if (!remote && perm & CFS_SETGRP_PERM) {
                 if (pud->pud_ngroups) {
                         /* setgroups for local client */
                         ucred->mu_ginfo = groups_alloc(pud->pud_ngroups);
@@ -241,11 +240,14 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type,
         /* XXX: need to process root_squash here. */
         mdt_root_squash(info);
 
-        /* remove fs privilege for non-root user */
+        /* remove fs privilege for non-root user. */
         if (ucred->mu_fsuid)
                 ucred->mu_cap = pud->pud_cap & ~CFS_CAP_FS_MASK;
         else
                 ucred->mu_cap = pud->pud_cap;
+        if (remote && !(perm & CFS_RMTOWN_PERM))
+                ucred->mu_cap &= ~(CFS_CAP_SYS_RESOURCE_MASK |
+                                   CFS_CAP_CHOWN_MASK);
         ucred->mu_valid = UCRED_NEW;
 
         EXIT;
@@ -269,13 +271,13 @@ out:
 int mdt_check_ucred(struct mdt_thread_info *info)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_export_data  *med = mdt_req2med(req);
         struct mdt_device       *mdt = info->mti_mdt;
         struct ptlrpc_user_desc *pud = req->rq_user_desc;
         struct md_ucred         *ucred = mdt_ucred(info);
         struct md_identity      *identity = NULL;
         lnet_nid_t               peernid = req->rq_peer.nid;
         __u32                    perm = 0;
+        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
         int                      setuid;
         int                      setgid;
         int                      rc = 0;
@@ -290,7 +292,7 @@ int mdt_check_ucred(struct mdt_thread_info *info)
 
         /* sanity check: if we use strong authentication, we expect the
          * uid which client claimed is true */
-        if (med->med_rmtclient) {
+        if (remote) {
                 if (req->rq_auth_mapped_uid == INVALID_UID) {
                         CDEBUG(D_SEC, "remote user not mapped, deny access!\n");
                         RETURN(-EACCES);
@@ -320,7 +322,7 @@ int mdt_check_ucred(struct mdt_thread_info *info)
         }
 
         if (is_identity_get_disabled(mdt->mdt_identity_cache)) {
-                if (med->med_rmtclient) {
+                if (remote) {
                         CDEBUG(D_SEC, "remote client must run with identity_get "
                                "enabled!\n");
                         RETURN(-EACCES);
@@ -331,7 +333,7 @@ int mdt_check_ucred(struct mdt_thread_info *info)
         identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid);
         if (IS_ERR(identity)) {
                 if (unlikely(PTR_ERR(identity) == -EREMCHG &&
-                             !med->med_rmtclient)) {
+                             !remote)) {
                         RETURN(0);
                 } else {
                         CDEBUG(D_SEC, "Deny access without identity: uid %u\n",
@@ -340,7 +342,7 @@ int mdt_check_ucred(struct mdt_thread_info *info)
                }
         }
 
-        perm = mdt_identity_get_perm(identity, med->med_rmtclient, peernid);
+        perm = mdt_identity_get_perm(identity, remote, peernid);
         /* find out the setuid/setgid attempt */
         setuid = (pud->pud_uid != pud->pud_fsuid);
         setgid = (pud->pud_gid != pud->pud_fsgid ||
@@ -404,7 +406,7 @@ static int old_init_ucred(struct mdt_thread_info *info,
         /* XXX: need to process root_squash here. */
         mdt_root_squash(info);
 
-        /* remove fs privilege for non-root user */
+        /* remove fs privilege for non-root user. */
         if (uc->mu_fsuid)
                 uc->mu_cap = body->capability & ~CFS_CAP_FS_MASK;
         else
@@ -444,7 +446,7 @@ static int old_init_ucred_reint(struct mdt_thread_info *info)
         /* XXX: need to process root_squash here. */
         mdt_root_squash(info);
 
-        /* remove fs privilege for non-root user */
+        /* remove fs privilege for non-root user. */
         if (uc->mu_fsuid)
                 uc->mu_cap &= ~CFS_CAP_FS_MASK;
         uc->mu_valid = UCRED_OLD;
@@ -571,6 +573,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
 {
         struct mdt_body       *repbody;
         const struct lu_attr *la = &ma->ma_attr;
+        int rc;
         ENTRY;
 
         repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
@@ -605,6 +608,21 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
                 repbody->valid |= OBD_MD_FLCOOKIE;
         }
 
+        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
+            info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
+            repbody->valid & OBD_MD_FLEASIZE) {
+                struct lustre_capa *capa;
+
+                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
+                LASSERT(capa);
+                capa->lc_opc = CAPA_OPC_OSS_DESTROY;
+                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
+                if (rc)
+                        RETURN(rc);
+
+                repbody->valid |= OBD_MD_FLOSSCAPA;
+        }
+
         RETURN(0);
 }
 
index 0e95718..37c1375 100644 (file)
@@ -425,6 +425,39 @@ static int lprocfs_mdt_wr_evict_client(struct file *file, const char *buffer,
         return count;
 }
 
+static int lprocfs_rd_sec_level(char *page, char **start, off_t off,
+                                int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+        return snprintf(page, count, "%d\n", mdt->mdt_sec_level);
+}
+
+static int lprocfs_wr_sec_level(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val > LUSTRE_SEC_ALL || val < LUSTRE_SEC_NONE)
+                return -EINVAL;
+
+        if (val == LUSTRE_SEC_SPECIFY) {
+                CWARN("security level %d will be supported in future.\n",
+                      LUSTRE_SEC_SPECIFY);
+                return -EINVAL;
+        }
+
+        mdt->mdt_sec_level = val;
+        return count;
+}
+
 static int lprocfs_rd_cos(char *page, char **start, off_t off,
                               int count, int *eof, void *data)
 {
@@ -470,6 +503,8 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
         { "site_stats",                 lprocfs_rd_site_stats,           0, 0 },
         { "evict_client",               0, lprocfs_mdt_wr_evict_client,     0 },
         { "hash_stats",                 lprocfs_obd_rd_hash,    0, 0 },
+        { "sec_level",                  lprocfs_rd_sec_level,
+                                        lprocfs_wr_sec_level,               0 },
         { "commit_on_sharing",          lprocfs_rd_cos, lprocfs_wr_cos, 0 },
         { 0 }
 };
index 4c4690d..916e3e0 100644 (file)
@@ -501,8 +501,8 @@ static int mdt_finish_open(struct mdt_thread_info *info,
                            int flags, int created, struct ldlm_reply *rep)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
+        struct obd_export       *exp = req->rq_export;
         struct mdt_export_data  *med = &req->rq_export->exp_mdt_data;
-        struct mdt_device       *mdt = info->mti_mdt;
         struct md_attr          *ma  = &info->mti_attr;
         struct lu_attr          *la  = &ma->ma_attr;
         struct mdt_file_data    *mfd;
@@ -521,7 +521,7 @@ static int mdt_finish_open(struct mdt_thread_info *info,
         islnk = S_ISLNK(la->la_mode);
         mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o));
 
-        if (med->med_rmtclient) {
+        if (exp_connect_rmtclient(exp)) {
                 void *buf = req_capsule_server_get(info->mti_pill, &RMF_ACL);
 
                 rc = mdt_pack_remote_perm(info, o, buf);
@@ -534,7 +534,7 @@ static int mdt_finish_open(struct mdt_thread_info *info,
                 }
         }
 #ifdef CONFIG_FS_POSIX_ACL
-        else if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) {
+        else if (exp->exp_connect_flags & OBD_CONNECT_ACL) {
                 const struct lu_env *env = info->mti_env;
                 struct md_object *next = mdt_object_child(o);
                 struct lu_buf *buf = &info->mti_buf;
@@ -564,26 +564,26 @@ static int mdt_finish_open(struct mdt_thread_info *info,
         }
 #endif
 
-        if (mdt->mdt_opts.mo_mds_capa) {
+        if (info->mti_mdt->mdt_opts.mo_mds_capa &&
+            exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) {
                 struct lustre_capa *capa;
 
                 capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1);
                 LASSERT(capa);
                 capa->lc_opc = CAPA_OPC_MDS_DEFAULT;
-                capa->lc_uid = 0;
                 rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0);
                 if (rc)
                         RETURN(rc);
                 repbody->valid |= OBD_MD_FLMDSCAPA;
         }
-        if (mdt->mdt_opts.mo_oss_capa &&
+        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
+            exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
             S_ISREG(lu_object_attr(&o->mot_obj.mo_lu))) {
                 struct lustre_capa *capa;
 
                 capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
                 LASSERT(capa);
                 capa->lc_opc = CAPA_OPC_OSS_DEFAULT | capa_open_opc(flags);
-                capa->lc_uid = 0;
                 rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0);
                 if (rc)
                         RETURN(rc);
index 4853c3e..1286919 100644 (file)
@@ -101,7 +101,7 @@ int mdt_record_write(const struct lu_env *env,
 
         LASSERTF(dt != NULL, "dt is NULL when we want to write record\n");
         LASSERT(th != NULL);
-        rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA);
+        rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA, 1);
         if (rc == buf->lb_len)
                 rc = 0;
         else if (rc >= 0)
@@ -329,7 +329,7 @@ static int mdt_clients_data_init(const struct lu_env *env,
 {
         struct lr_server_data  *lsd = &mdt->mdt_lsd;
         struct lsd_client_data *lcd = NULL;
-        struct obd_device      *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device      *obd = mdt2obd_dev(mdt);
         loff_t off;
         int cl_idx;
         int rc = 0;
@@ -423,7 +423,7 @@ static int mdt_server_data_init(const struct lu_env *env,
 {
         struct lr_server_data  *lsd = &mdt->mdt_lsd;
         struct lsd_client_data *lcd = NULL;
-        struct obd_device      *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device      *obd = mdt2obd_dev(mdt);
         struct mdt_thread_info *mti;
         struct dt_object       *obj;
         struct lu_attr         *la;
@@ -561,7 +561,7 @@ static int mdt_server_data_update(const struct lu_env *env,
 void mdt_cb_new_client(const struct mdt_device *mdt, __u64 transno,
                                   void *data, int err)
 {
-        struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device *obd = mdt2obd_dev(mdt);
 
         target_client_add_cb(obd, transno, data, err);
 }
@@ -573,7 +573,7 @@ int mdt_client_new(const struct lu_env *env, struct mdt_device *mdt)
         struct mdt_export_data *med;
         struct lsd_client_data *lcd;
         struct lr_server_data  *lsd = &mdt->mdt_lsd;
-        struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device *obd = mdt2obd_dev(mdt);
         struct thandle *th;
         loff_t off;
         int rc;
@@ -649,7 +649,7 @@ int mdt_client_add(const struct lu_env *env,
         struct mdt_thread_info *mti;
         struct mdt_export_data *med;
         unsigned long *bitmap = mdt->mdt_client_bitmap;
-        struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device *obd = mdt2obd_dev(mdt);
         struct lr_server_data *lsd = &mdt->mdt_lsd;
         int rc = 0;
         ENTRY;
@@ -691,7 +691,7 @@ int mdt_client_del(const struct lu_env *env, struct mdt_device *mdt)
         struct mdt_thread_info *mti;
         struct mdt_export_data *med;
         struct lsd_client_data *lcd;
-        struct obd_device      *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd;
+        struct obd_device      *obd = mdt2obd_dev(mdt);
         struct thandle *th;
         loff_t off;
         int rc = 0;
@@ -918,7 +918,7 @@ static int mdt_txn_commit_cb(const struct lu_env *env,
                              struct thandle *txn, void *cookie)
 {
         struct mdt_device *mdt = cookie;
-        struct obd_device *obd = md2lu_dev(&mdt->mdt_md_dev)->ld_obd;
+        struct obd_device *obd = mdt2obd_dev(mdt);
         struct mdt_txn_info *txi;
         int i;
 
index d42e20f..2fb2fde 100644 (file)
@@ -80,7 +80,8 @@ static int mdt_create_pack_capa(struct mdt_thread_info *info, int rc,
         if (repbody->valid & OBD_MD_FLMDSCAPA)
                 RETURN(rc);
 
-        if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa) {
+        if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa &&
+            info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) {
                 struct lustre_capa *capa;
 
                 capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1);
@@ -291,7 +292,6 @@ out_unlock:
 static int mdt_reint_setattr(struct mdt_thread_info *info,
                              struct mdt_lock_handle *lhc)
 {
-        struct mdt_device       *mdt = info->mti_mdt;
         struct md_attr          *ma = &info->mti_attr;
         struct mdt_reint_record *rr = &info->mti_rr;
         struct ptlrpc_request   *req = mdt_info_req(info);
@@ -387,7 +387,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
 
         mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo));
 
-        if (mdt->mdt_opts.mo_oss_capa &&
+        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
+            info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
             S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) &&
             (ma->ma_attr.la_valid & LA_SIZE)) {
                 struct lustre_capa *capa;
index 47cce00..5a93bfe 100644 (file)
@@ -128,6 +128,8 @@ int mdt_getxattr(struct mdt_thread_info *info)
         struct mdt_body        *repbody = NULL;
         struct md_object       *next;
         struct lu_buf          *buf;
+        __u32                   remote = exp_connect_rmtclient(info->mti_exp);
+        __u32                   perm;
         int                     easize, rc;
         ENTRY;
 
@@ -147,11 +149,11 @@ int mdt_getxattr(struct mdt_thread_info *info)
         next = mdt_object_child(info->mti_object);
 
         if (info->mti_body->valid & OBD_MD_FLRMTRGETFACL) {
-                __u32 perm = mdt_identity_get_perm(uc->mu_identity,
-                                                   med->med_rmtclient,
-                                                   req->rq_peer.nid);
+                if (unlikely(!remote))
+                        GOTO(out, rc = err_serious(-EINVAL));
 
-                LASSERT(med->med_rmtclient);
+                perm = mdt_identity_get_perm(uc->mu_identity, remote,
+                                             req->rq_peer.nid);
                 if (!(perm & CFS_RMTACL_PERM))
                         GOTO(out, rc = err_serious(-EPERM));
 
@@ -197,7 +199,9 @@ int mdt_getxattr(struct mdt_thread_info *info)
                 if (rc > 0 && flags != CFS_IC_NOTHING) {
                         int rc1;
 
-                        LASSERT(med->med_rmtclient);
+                        if (unlikely(!remote))
+                                GOTO(out, rc = -EINVAL);
+
                         rc1 = lustre_posix_acl_xattr_id2client(uc,
                                         med->med_idmap,
                                         (posix_acl_xattr_header *)(buf->lb_buf),
@@ -275,7 +279,6 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
                        struct mdt_lock_handle *unused)
 {
         struct ptlrpc_request   *req = mdt_info_req(info);
-        struct mdt_export_data  *med = mdt_req2med(req);
         struct md_ucred         *uc  = mdt_ucred(info);
         const char               user_string[] = "user.";
         const char               trust_string[] = "trusted.";
@@ -294,6 +297,8 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
         __u64                    lockpart;
         int                      rc;
         posix_acl_xattr_header  *new_xattr = NULL;
+        __u32                    remote = exp_connect_rmtclient(info->mti_exp);
+        __u32                    perm;
         ENTRY;
 
         CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1));
@@ -311,11 +316,11 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
                 RETURN(rc);
 
         if (valid & OBD_MD_FLRMTRSETFACL) {
-                __u32 perm = mdt_identity_get_perm(uc->mu_identity,
-                                                   med->med_rmtclient,
-                                                   req->rq_peer.nid);
+                if (unlikely(!remote))
+                        GOTO(out, rc = err_serious(-EINVAL));
 
-                LASSERT(med->med_rmtclient);
+                perm = mdt_identity_get_perm(uc->mu_identity, remote,
+                                             req->rq_peer.nid);
                 if (!(perm & CFS_RMTACL_PERM))
                         GOTO(out, rc = err_serious(-EPERM));
         }
@@ -368,7 +373,9 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
                         xattr = req_capsule_client_get(pill, &RMF_EADATA);
 
                         if (valid & OBD_MD_FLRMTLSETFACL) {
-                                LASSERT(med->med_rmtclient);
+                                if (unlikely(!remote))
+                                        GOTO(out_unlock, rc = -EINVAL);
+
                                 xattr_len = mdt_rmtlsetfacl(info, child,
                                                 xattr_name,
                                                 (ext_acl_xattr_header *)xattr,
index 421df58..b73386f 100644 (file)
@@ -113,10 +113,11 @@ static inline int capa_on_server(struct obd_capa *ocapa)
 static inline void capa_delete(struct obd_capa *ocapa)
 {
         LASSERT(capa_on_server(ocapa));
-        hlist_del(&ocapa->u.tgt.c_hash);
-        list_del(&ocapa->c_list);
+        hlist_del_init(&ocapa->u.tgt.c_hash);
+        list_del_init(&ocapa->c_list);
         capa_count[ocapa->c_site]--;
-        free_capa(ocapa);
+        /* release the ref when alloc */
+        capa_put(ocapa);
 }
 
 void cleanup_capa_hash(struct hlist_head *hash)
@@ -200,7 +201,7 @@ struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa)
         struct list_head *list = &capa_list[CAPA_SITE_SERVER];
 
         ocapa = alloc_capa(CAPA_SITE_SERVER);
-        if (!ocapa)
+        if (IS_ERR(ocapa))
                 return NULL;
 
         spin_lock(&capa_lock);
@@ -210,25 +211,18 @@ struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa)
                 set_capa_expiry(ocapa);
                 hlist_add_head(&ocapa->u.tgt.c_hash, head);
                 list_add_tail(&ocapa->c_list, list);
-                capa_count[CAPA_SITE_SERVER]++;
                 capa_get(ocapa);
-
+                capa_count[CAPA_SITE_SERVER]++;
                 if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE)
                         capa_delete_lru(list);
-
-                DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new");
-                                        
                 spin_unlock(&capa_lock);
                 return ocapa;
+        } else {
+                capa_get(old);
+                spin_unlock(&capa_lock);
+                capa_put(ocapa);
+                return old;
         }
-
-        capa_get(old);
-        spin_unlock(&capa_lock);
-
-        DEBUG_CAPA(D_SEC, &old->c_capa, "update");
-
-        free_capa(ocapa);
-        return old;
 }
 
 struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa,
@@ -278,6 +272,110 @@ int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key)
 
         return 0;
 }
+
+int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
+{
+        struct ll_crypto_cipher *tfm;
+        struct scatterlist sd = {
+                .page   = virt_to_page(d),
+                .offset = (unsigned long)(d) % CFS_PAGE_SIZE,
+                .length = 16,
+        };
+        struct scatterlist ss = {
+                .page   = virt_to_page(s),
+                .offset = (unsigned long)(s) % CFS_PAGE_SIZE,
+                .length = 16,
+        };
+        struct blkcipher_desc desc;
+        unsigned int min;
+        int rc;
+        ENTRY;
+
+        tfm = ll_crypto_alloc_blkcipher("aes", 0, 0 );
+        if (tfm == NULL) {
+                CERROR("failed to load transform for aes\n");
+                RETURN(-EFAULT);
+        }
+
+        min = crypto_tfm_alg_min_keysize(tfm);
+        if (keylen < min) {
+                CERROR("keylen at least %d bits for aes\n", min * 8);
+                GOTO(out, rc = -EINVAL);
+        }
+
+        rc = ll_crypto_blkcipher_setkey(tfm, key, min);
+        if (rc) {
+                CERROR("failed to setting key for aes\n");
+                GOTO(out, rc);
+        }
+
+        desc.tfm   = tfm;
+        desc.info  = NULL;
+        desc.flags = 0;
+        rc = ll_crypto_blkcipher_encrypt(&desc, &sd, &ss, 16);
+        if (rc) {
+                CERROR("failed to encrypt for aes\n");
+                GOTO(out, rc);
+        }
+
+        EXIT;
+
+out:
+        ll_crypto_free_blkcipher(tfm);
+        return rc;
+}
+
+int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen)
+{
+        struct ll_crypto_cipher *tfm;
+        struct scatterlist sd = {
+                .page   = virt_to_page(d),
+                .offset = (unsigned long)(d) % CFS_PAGE_SIZE,
+                .length = 16,
+        };
+        struct scatterlist ss = {
+                .page   = virt_to_page(s),
+                .offset = (unsigned long)(s) % CFS_PAGE_SIZE,
+                .length = 16,
+        };
+        struct blkcipher_desc desc;
+        unsigned int min;
+        int rc;
+        ENTRY;
+
+        tfm = ll_crypto_alloc_blkcipher("aes", 0, 0 );
+        if (tfm == NULL) {
+                CERROR("failed to load transform for aes\n");
+                RETURN(-EFAULT);
+        }
+
+        min = crypto_tfm_alg_min_keysize(tfm);
+        if (keylen < min) {
+                CERROR("keylen at least %d bits for aes\n", min * 8);
+                GOTO(out, rc = -EINVAL);
+        }
+
+        rc = ll_crypto_blkcipher_setkey(tfm, key, min);
+        if (rc) {
+                CERROR("failed to setting key for aes\n");
+                GOTO(out, rc);
+        }
+
+        desc.tfm   = tfm;
+        desc.info  = NULL;
+        desc.flags = 0;
+        rc = ll_crypto_blkcipher_decrypt(&desc, &sd, &ss, 16);
+        if (rc) {
+                CERROR("failed to decrypt for aes\n");
+                GOTO(out, rc);
+        }
+
+        EXIT;
+
+out:
+        ll_crypto_free_blkcipher(tfm);
+        return rc;
+}
 #endif
 
 void capa_cpy(void *capa, struct obd_capa *ocapa)
@@ -287,22 +385,11 @@ void capa_cpy(void *capa, struct obd_capa *ocapa)
         spin_unlock(&ocapa->c_lock);
 }
 
-char *dump_capa_content(char *buf, char *key, int len)
-{
-        int i, n = 0;
-
-        for (i = 0; i < len; i++)
-                n += sprintf(buf + n, "%02x", (unsigned char) key[i]);
-        return buf;
-}
-
 EXPORT_SYMBOL(init_capa_hash);
 EXPORT_SYMBOL(cleanup_capa_hash);
-
 EXPORT_SYMBOL(capa_add);
 EXPORT_SYMBOL(capa_lookup);
-
 EXPORT_SYMBOL(capa_hmac);
+EXPORT_SYMBOL(capa_encrypt_id);
+EXPORT_SYMBOL(capa_decrypt_id);
 EXPORT_SYMBOL(capa_cpy);
-
-EXPORT_SYMBOL(dump_capa_content);
index 0803a15..d5bbe44 100644 (file)
@@ -405,7 +405,6 @@ EXPORT_SYMBOL(class_name2obd);
 EXPORT_SYMBOL(class_uuid2dev);
 EXPORT_SYMBOL(class_uuid2obd);
 EXPORT_SYMBOL(class_find_client_obd);
-EXPORT_SYMBOL(class_find_client_notype);
 EXPORT_SYMBOL(class_devices_in_group);
 EXPORT_SYMBOL(class_conn2export);
 EXPORT_SYMBOL(class_exp2obd);
index f317b95..854b5c6 100644 (file)
@@ -506,18 +506,6 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
         return NULL;
 }
 
-struct obd_device *class_find_client_notype(struct obd_uuid *tgt_uuid,
-                                            struct obd_uuid *grp_uuid)
-{
-        struct obd_device *obd;
-
-        obd = class_find_client_obd(tgt_uuid, LUSTRE_MDC_NAME, NULL);
-        if (!obd)
-                obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME,
-                                            grp_uuid);
-        return obd;
-}
-
 /* Iterate the obd_device list looking devices have grp_uuid. Start
    searching at *next, and if a device is found, the next index to look
    at is saved in *next. If next is NULL, then the first matching device
index abaa9df..60ee61c 100644 (file)
@@ -760,7 +760,7 @@ static int llog_lvfs_destroy(struct llog_handle *handle)
         if (rc)
                 GOTO(out, rc);
 
-        rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL);
+        rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL, NULL);
  out:
         OBDO_FREE(oa);
         RETURN(rc);
index b9de3be..a347c0d 100644 (file)
@@ -1275,6 +1275,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_uuid);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_new);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_rem);
@@ -1377,6 +1378,7 @@ int lprocfs_alloc_md_stats(struct obd_device *obd,
         LPROCFS_MD_OP_INIT(num_private_stats, stats, lock_match);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, cancel_unused);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, renew_capa);
+        LPROCFS_MD_OP_INIT(num_private_stats, stats, unpack_capa);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_getattr_async);
         LPROCFS_MD_OP_INIT(num_private_stats, stats, revalidate_lock);
@@ -1856,7 +1858,7 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count,
         __u64 whole, frac = 0, units;
         unsigned frac_d = 1;
 
-        if (count > (sizeof(kernbuf) - 1) )
+        if (count > (sizeof(kernbuf) - 1))
                 return -EINVAL;
 
         if (copy_from_user(kernbuf, buffer, count))
index 42d8cf1..42798fb 100644 (file)
@@ -1063,7 +1063,7 @@ EXPORT_SYMBOL(lu_context_key_register);
 
 static void key_fini(struct lu_context *ctx, int index)
 {
-        if (ctx->lc_value[index] != NULL) {
+        if (ctx->lc_value != NULL && ctx->lc_value[index] != NULL) {
                 struct lu_context_key *key;
 
                 key = lu_keys[index];
index c4021ba..128343e 100644 (file)
@@ -888,6 +888,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
         int i, keylen, vallen;
         int matched = 0, j = 0;
         int rc = 0;
+        int skip = 0;
         ENTRY;
 
         if (lcfg->lcfg_command != LCFG_PARAM) {
@@ -943,6 +944,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
                         CERROR("%s: unknown param %s\n",
                                (char *)lustre_cfg_string(lcfg, 0), key);
                         /* rc = -EINVAL;       continue parsing other params */
+                        skip++;
                 } else {
                         LCONSOLE_INFO("%s.%.*s: set parameter %.*s=%s\n",
                                       lustre_cfg_string(lcfg, 0),
@@ -953,6 +955,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 
         if (rc > 0)
                 rc = 0;
+        if (!rc && skip)
+                rc = skip;
         RETURN(rc);
 #else
         CDEBUG(D_CONFIG, "liblustre can't process params.\n");
index 52609b0..ebc3bba 100644 (file)
@@ -153,7 +153,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa,
 
 int echo_destroy(struct obd_export *exp, struct obdo *oa,
                  struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                 struct obd_export *md_exp)
+                 struct obd_export *md_exp, void *capa)
 {
         struct obd_device *obd = class_exp2obd(exp);
 
index 225ec5a..01b9572 100644 (file)
@@ -1301,7 +1301,7 @@ static int echo_create_object(struct echo_device *ed, int on_target,
 
  failed:
         if (created && rc)
-                obd_destroy(ec->ec_exp, oa, lsm, oti, NULL);
+                obd_destroy(ec->ec_exp, oa, lsm, oti, NULL, NULL);
         if (lsm)
                 obd_free_memmd(ec->ec_exp, &lsm);
         if (rc)
@@ -1637,7 +1637,7 @@ static int echo_client_brw_ioctl(int rw, struct obd_export *exp,
         struct obd_device *obd = class_exp2obd(exp);
         struct echo_device *ed = obd2echo_dev(obd);
         struct echo_client_obd *ec = ed->ed_ec;
-        struct obd_trans_info dummy_oti = { .oti_thread_id = -1 };
+        struct obd_trans_info dummy_oti = { .oti_thread = NULL };
         struct echo_object *eco;
         int rc;
         int async = 1;
@@ -1762,7 +1762,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp,
                         oa->o_gr = FILTER_GROUP_ECHO;
                         oa->o_valid |= OBD_MD_FLGROUP;
                         rc = obd_destroy(ec->ec_exp, oa, eco->eo_lsm,
-                                         &dummy_oti, NULL);
+                                         &dummy_oti, NULL, NULL);
                         if (rc == 0)
                                 eco->eo_deleted = 1;
                         echo_put_object(eco);
index de162b1..593f37b 100644 (file)
@@ -1915,7 +1915,8 @@ static int filter_iobuf_pool_init(struct filter_obd *filter)
  * If we haven't allocated a pool entry for this thread before, do so now. */
 void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti)
 {
-        int thread_id                    = oti ? oti->oti_thread_id : -1;
+        int thread_id                    = (oti && oti->oti_thread) ?
+                                           oti->oti_thread->t_id : -1;
         struct filter_iobuf  *pool       = NULL;
         struct filter_iobuf **pool_place = NULL;
 
@@ -2042,7 +2043,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
         rwlock_init(&filter->fo_sptlrpc_lock);
         sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset);
 
-        filter->fo_fl_oss_capa = 0;
+        filter->fo_fl_oss_capa = 1;
         CFS_INIT_LIST_HEAD(&filter->fo_capa_keys);
         filter->fo_capa_hash = init_capa_hash();
         if (filter->fo_capa_hash == NULL)
@@ -2920,9 +2921,7 @@ static int filter_destroy_export(struct obd_export *exp)
                        exp->exp_obd->obd_name, exp->exp_client_uuid.uuid,
                        exp, exp->exp_filter_data.fed_pending);
 
-        /* Not ported yet the b1_6 quota functionality
-         * lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
-         */
+        lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
 
         target_destroy_export(exp);
         ldlm_destroy_export(exp);
@@ -3299,43 +3298,52 @@ out_unlock:
 int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
                    struct obd_trans_info *oti)
 {
+        struct obdo *oa = oinfo->oi_oa;
+        struct lustre_capa *capa = oinfo_capa(oinfo);
         struct ldlm_res_id res_id;
         struct filter_mod_data *fmd;
         struct lvfs_run_ctxt saved;
         struct filter_obd *filter;
         struct ldlm_resource *res;
         struct dentry *dentry;
+        __u64 opc = CAPA_OPC_META_WRITE;
         int rc;
         ENTRY;
 
-        osc_build_res_name(oinfo->oi_oa->o_id, oinfo->oi_oa->o_gr, &res_id);
-        rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo),
-                              oinfo_capa(oinfo), CAPA_OPC_META_WRITE);
+        if (oa->o_valid & OBD_FL_TRUNC)
+                opc |= CAPA_OPC_OSS_TRUNC;
+        rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), capa, opc);
         if (rc)
                 RETURN(rc);
 
+        if (oa->o_valid & (OBD_MD_FLUID | OBD_MD_FLGID)) {
+                rc = filter_capa_fixoa(exp, oa, obdo_mdsno(oa), capa);
+                if (rc)
+                        RETURN(rc);
+        }
+
+        osc_build_res_name(oa->o_id, oa->o_gr, &res_id);
         /* This would be very bad - accidentally truncating a file when
          * changing the time or similar - bug 12203. */
-        if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE &&
+        if (oa->o_valid & OBD_MD_FLSIZE &&
             oinfo->oi_policy.l_extent.end != OBD_OBJECT_EOF) {
                 static char mdsinum[48];
 
-                if (oinfo->oi_oa->o_valid & OBD_MD_FLFID)
+                if (oa->o_valid & OBD_MD_FLFID)
                         snprintf(mdsinum, sizeof(mdsinum) - 1,
-                                 " of inode "LPU64"/%u", oinfo->oi_oa->o_fid,
-                                 oinfo->oi_oa->o_generation);
+                                 " of inode "LPU64"/%u", oa->o_fid,
+                                 oa->o_generation);
                 else
                         mdsinum[0] = '\0';
 
                 CERROR("%s: setattr from %s trying to truncate objid "LPU64
                        " %s\n",
                        exp->exp_obd->obd_name, obd_export_nid2str(exp),
-                       oinfo->oi_oa->o_id, mdsinum);
+                       oa->o_id, mdsinum);
                 RETURN(-EPERM);
         }
 
-        dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa,
-                                    __FUNCTION__, 1);
+        dentry = __filter_oa2dentry(exp->exp_obd, oa, __FUNCTION__, 1);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
 
@@ -3343,16 +3351,16 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
         lock_kernel();
 
-        if (oinfo->oi_oa->o_valid &
+        if (oa->o_valid &
             (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) {
-                fmd = filter_fmd_get(exp,oinfo->oi_oa->o_id,oinfo->oi_oa->o_gr);
+                fmd = filter_fmd_get(exp, oa->o_id, oa->o_gr);
                 if (fmd && fmd->fmd_mactime_xid < oti->oti_xid)
                         fmd->fmd_mactime_xid = oti->oti_xid;
                 filter_fmd_put(exp, fmd);
         }
 
         /* setting objects attributes (including owner/group) */
-        rc = filter_setattr_internal(exp, dentry, oinfo->oi_oa, oti);
+        rc = filter_setattr_internal(exp, dentry, oa, oti);
         if (rc)
                 GOTO(out_unlock, rc);
 
@@ -3366,10 +3374,10 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
                 ldlm_resource_putref(res);
         }
 
-        oinfo->oi_oa->o_valid = OBD_MD_FLID;
+        oa->o_valid = OBD_MD_FLID;
 
         /* Quota release need uid/gid info */
-        obdo_from_inode(oinfo->oi_oa, dentry->d_inode,
+        obdo_from_inode(oa, dentry->d_inode,
                         FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
 
         EXIT;
@@ -3469,7 +3477,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
 
         for (id = last; id > oa->o_id; id--) {
                 doa.o_id = id;
-                rc = filter_destroy(exp, &doa, NULL, NULL, NULL);
+                rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL);
                 if (rc && rc != -ENOENT) /* this is pretty fatal... */
                         CEMERG("error destroying precreate objid "LPU64": %d\n",
                                id, rc);
@@ -3888,7 +3896,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
 
 int filter_destroy(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md *md, struct obd_trans_info *oti,
-                   struct obd_export *md_exp)
+                   struct obd_export *md_exp, void *capa)
 {
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         struct obd_device *obd;
@@ -3903,6 +3911,11 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
 
         LASSERT(oa->o_valid & OBD_MD_FLGROUP);
 
+        rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa),
+                              (struct lustre_capa *)capa, CAPA_OPC_OSS_DESTROY);
+        if (rc)
+                RETURN(rc);
+
         obd = exp->exp_obd;
         filter = &obd->u.filter;
 
@@ -4047,9 +4060,8 @@ cleanup:
         qcids[GRPQUOTA] = oa->o_gid;
         rc2 = lquota_adjust(filter_quota_interface_ref, obd, qcids, NULL, rc,
                             FSFILT_OP_UNLINK);
-
         if (rc2)
-                CDEBUG(D_QUOTA, "filter adjust qunit! (rc:%d)\n", rc2);
+                CERROR("filter adjust qunit! (rc:%d)\n", rc2);
         return rc;
 }
 
@@ -4071,13 +4083,10 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo,
                ", o_size = "LPD64"\n", oinfo->oi_oa->o_id,
                oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start);
 
-        rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo),
-                              oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC);
-        if (rc)
-                RETURN(rc);
-
         oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start;
+        oinfo->oi_oa->o_valid |= OBD_FL_TRUNC;
         rc = filter_setattr(exp, oinfo, oti);
+        oinfo->oi_oa->o_valid &= ~OBD_FL_TRUNC;
         RETURN(rc);
 }
 
@@ -4246,6 +4255,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
 
         if (KEY_IS(KEY_REVIMP_UPD)) {
                 filter_revimp_update(exp);
+                lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd);
                 RETURN(0);
         }
 
@@ -4273,7 +4283,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
         rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse);
         llog_ctxt_put(ctxt);
 
-        lquota_setinfo(filter_quota_interface_ref, exp, obd);
+        lquota_setinfo(filter_quota_interface_ref, obd, exp);
 
         RETURN(rc);
 }
@@ -4414,6 +4424,8 @@ static int filter_process_config(struct obd_device *obd, obd_count len,
 
                 rc = class_process_proc_param(PARAM_OST, lvars.obd_vars,
                                               lcfg, obd);
+               if (rc > 0)
+                       rc = 0;
                 break;
         }
 
index 1fbddb6..bc43a6a 100644 (file)
@@ -128,6 +128,9 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
         if (!filter->fo_fl_oss_capa)
                 RETURN(0);
 
+        if (!(exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA))
+                RETURN(0);
+
         if (capa == NULL) {
                 if (fid)
                         CERROR("mdsno/fid/opc "LPU64"/"DFID"/"LPX64
@@ -164,8 +167,13 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
                 RETURN(rc);
         }
 
+        if (capa_is_expired_sec(capa)) {
+                DEBUG_CAPA(D_ERROR, capa, "expired");
+                RETURN(-ESTALE);
+        }
+
         spin_lock(&capa_lock);
-        list_for_each_entry(k, &filter->fo_capa_keys, k_list)
+        list_for_each_entry(k, &filter->fo_capa_keys, k_list) {
                 if (k->k_key.lk_mdsid == mdsid) {
                         keys_ready = 1;
                         if (k->k_key.lk_keyid == capa_keyid(capa)) {
@@ -174,6 +182,7 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
                                 break;
                         }
                 }
+        }
         spin_unlock(&capa_lock);
 
         if (!keys_ready) {
@@ -212,6 +221,64 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
         RETURN(0);
 }
 
+int filter_capa_fixoa(struct obd_export *exp, struct obdo *oa, __u64 mdsid,
+                      struct lustre_capa *capa)
+{
+        int rc = 0;
+        ENTRY;
+
+        if (!(exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA))
+                RETURN(0);
+
+        if (unlikely(!capa))
+                RETURN(-EACCES);
+
+        if (capa_flags(capa) == LC_ID_CONVERT) {
+                struct obd_device *obd = exp->exp_obd;
+                struct filter_obd *filter = &obd->u.filter;
+                struct filter_capa_key *k;
+                int found = 0;
+
+                spin_lock(&capa_lock);
+                list_for_each_entry(k, &filter->fo_capa_keys, k_list) {
+                        if (k->k_key.lk_mdsid == mdsid &&
+                            k->k_key.lk_keyid == capa_keyid(capa)) {
+                                found = 1;
+                                break;
+                        }
+                }
+                spin_unlock(&capa_lock);
+
+                if (found) {
+                        union {
+                                __u64 id64;
+                                __u32 id32[2];
+                        } uid, gid;
+                        __u32 d[4], s[4];
+
+                        uid.id64 = capa_uid(capa);
+                        gid.id64 = capa_gid(capa);
+                        s[0] = uid.id32[0];
+                        s[1] = uid.id32[1];
+                        s[2] = gid.id32[0];
+                        s[3] = gid.id32[1];
+
+                        rc = capa_decrypt_id(d, s, k->k_key.lk_key,
+                                             CAPA_HMAC_KEY_MAX_LEN);
+                        if (unlikely(rc))
+                                RETURN(rc);
+
+                        oa->o_uid = d[0];
+                        oa->o_gid = d[2];
+                } else {
+                        DEBUG_CAPA(D_ERROR, capa, "no matched capability key for");
+                        rc = -ESTALE;
+                }
+        }
+
+        RETURN(rc);
+}
+
 void filter_free_capa_keys(struct filter_obd *filter)
 {
         struct filter_capa_key *key, *n;
index fb82e44..e5db720 100644 (file)
@@ -150,7 +150,7 @@ int filter_common_setup(struct obd_device *, struct lustre_cfg *lcfg,
                         void *option);
 int filter_destroy(struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md *md, struct obd_trans_info *,
-                   struct obd_export *);
+                   struct obd_export *, void *);
 int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                             struct obdo *oa, struct obd_trans_info *oti);
 int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
@@ -244,6 +244,8 @@ static inline __u64 obdo_mdsno(struct obdo *oa)
 int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key);
 int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid,
                      struct lustre_capa *capa, __u64 opc);
+int filter_capa_fixoa(struct obd_export *exp, struct obdo *oa, __u64 mdsid,
+                      struct lustre_capa *capa);
 void filter_free_capa_keys(struct filter_obd *filter);
 
 void blacklist_add(uid_t uid);
index 5bab0b6..07b2f9c 100644 (file)
@@ -328,8 +328,11 @@ void filter_invalidate_cache(struct obd_device *obd, struct obd_ioobj *obj,
         LASSERT(inode != NULL);
 
         for (i = 0, rnb = nb; i < obj->ioo_bufcnt; i++, rnb++) {
-                obd_off start = rnb->offset >> CFS_PAGE_SHIFT;
-                obd_off end = (rnb->offset + rnb->len) >> CFS_PAGE_SHIFT;
+                obd_off start;
+                obd_off end;
+
+                start = rnb->offset >> CFS_PAGE_SHIFT;
+                end = (rnb->offset + rnb->len) >> CFS_PAGE_SHIFT;
                 invalidate_mapping_pages(inode->i_mapping, start, end);
                 /* just to avoid warnings */
                 start = 0;
@@ -650,6 +653,13 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
                 GOTO(cleanup, rc = -ENOENT);
         }
 
+        if (oa->o_valid & (OBD_MD_FLUID | OBD_MD_FLGID) &&
+            dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) {
+                rc = filter_capa_fixoa(exp, oa, obdo_mdsno(oa), capa);
+                if (rc)
+                        GOTO(cleanup, rc);
+        }
+
         rc = filter_map_remote_to_local(objcount, obj, nb, npages, res);
         if (rc)
                 GOTO(cleanup, rc);
index 8940804..aa49c15 100644 (file)
@@ -246,7 +246,8 @@ void filter_free_iobuf(struct filter_iobuf *iobuf)
 void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf,
                       struct obd_trans_info *oti)
 {
-        int thread_id = oti ? oti->oti_thread_id : -1;
+        int thread_id = (oti && oti->oti_thread) ?
+                        oti->oti_thread->t_id : -1;
 
         if (unlikely(thread_id < 0)) {
                 filter_free_iobuf(iobuf);
@@ -556,7 +557,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         struct obd_device *obd = exp->exp_obd;
         struct filter_obd *fo = &obd->u.filter;
         void *wait_handle;
-        int   total_size = 0, rc2;
+        int total_size = 0;
+        int rec_pending = 0;
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         ENTRY;
 
@@ -567,21 +569,11 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         if (rc != 0)
                 GOTO(cleanup, rc);
 
-        /* Unfortunately, if quota master is too busy to handle the
-         * pre-dqacq in time and quota hash on ost is used up, we
-         * have to wait for the completion of in flight dqacq/dqrel,
-         * then try again */
-        if ((rc2 = lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid,
-                                   oa->o_gid, niocount)) == QUOTA_RET_ACQUOTA) {
-                OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
-                lquota_acquire(filter_quota_interface_ref, obd, oa->o_uid,
-                               oa->o_gid);
-        }
-
-        if (rc2 < 0) {
-                rc = rc2;
-                GOTO(cleanup, rc);
-        }
+        /* we try to get enough quota to write here, and let ldiskfs
+         * decide if it is out of quota or not b=14783 */
+        lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid,
+                        oa->o_gid, niocount, &rec_pending, oti,
+                        LQUOTA_FLAGS_BLK);
 
         iobuf = filter_iobuf_get(&obd->u.filter, oti);
         if (IS_ERR(iobuf))
@@ -595,9 +587,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         iobuf->dr_ignore_quota = 0;
         for (i = 0, lnb = res; i < niocount; i++, lnb++) {
                 loff_t this_size;
+                __u32 flags = lnb->flags;
 
                 /* If overwriting an existing block, we don't need a grant */
-                if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
+                if (!(flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
                     filter_range_is_mapped(inode, lnb->offset, lnb->len))
                         lnb->rc = 0;
 
@@ -627,10 +620,15 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
                 if (this_size > iattr.ia_size)
                         iattr.ia_size = this_size;
 
-                /* if one page is a write-back page from client cache, or it's
-                 * written by root, then mark the whole io request as ignore
-                 * quota request */
-                if (lnb->flags & (OBD_BRW_FROM_GRANT | OBD_BRW_NOQUOTA))
+                /* if one page is a write-back page from client cache and
+                 * not from direct_io, or it's written by root, then mark
+                 * the whole io request as ignore quota request, remote
+                 * client can not break through quota. */
+                if (exp_connect_rmtclient(exp))
+                        flags &= ~OBD_BRW_NOQUOTA;
+                if ((flags & OBD_BRW_NOQUOTA) ||
+                    (flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
+                     OBD_BRW_FROM_GRANT)
                         iobuf->dr_ignore_quota = 1;
         }
 
@@ -721,6 +719,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         fsfilt_check_slow(obd, now, "commitrw commit");
 
 cleanup:
+        if (rec_pending)
+                lquota_pending_commit(filter_quota_interface_ref, obd, oa->o_uid,
+                                      oa->o_gid, niocount, 1);
+
         filter_grant_commit(exp, niocount, res);
 
         switch (cleanup_phase) {
index 0e769ad..2462a07 100644 (file)
@@ -173,7 +173,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt,
         oa->o_lcookie = *cookie;
         oid = oa->o_id;
 
-        rc = filter_destroy(exp, oa, NULL, NULL, NULL);
+        rc = filter_destroy(exp, oa, NULL, NULL, NULL, NULL);
         OBDO_FREE(oa);
         if (rc == -ENOENT) {
                 CDEBUG(D_RPCTRACE, "object already removed, send cookie\n");
index cbe0753..7810acc 100644 (file)
@@ -143,7 +143,6 @@ int lprocfs_filter_wr_readcache(struct file *file, const char *buffer,
         return count;
 }
 
-
 int lprocfs_filter_rd_fmd_max_num(char *page, char **start, off_t off,
                                   int count, int *eof, void *data)
 {
@@ -242,6 +241,37 @@ static int lprocfs_filter_rd_capa_count(char *page, char **start, off_t off,
                         capa_count[CAPA_SITE_SERVER]);
 }
 
+static int lprocfs_rd_sec_level(char *page, char **start, off_t off,
+                                int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+
+        return snprintf(page, count, "%d\n", obd->u.filter.fo_sec_level);
+}
+
+static int lprocfs_wr_sec_level(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val > LUSTRE_SEC_ALL || val < LUSTRE_SEC_NONE)
+                return -EINVAL;
+
+        if (val == LUSTRE_SEC_SPECIFY) {
+                CWARN("security level %d will be supported in future.\n",
+                      LUSTRE_SEC_SPECIFY);
+                return -EINVAL;
+        }
+
+        obd->u.filter.fo_sec_level = val;
+        return count;
+}
+
 static int lprocfs_filter_rd_cache(char *page, char **start, off_t off,
                                    int count, int *eof, void *data)
 {
@@ -318,11 +348,8 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
                           lprocfs_filter_rd_readcache,
                           lprocfs_filter_wr_readcache, 0 },
 #ifdef HAVE_QUOTA_SUPPORT
-        { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0},
-        { "quota_btune_sz", lprocfs_rd_btune, lprocfs_wr_btune, 0},
-        { "quota_iunit_sz", lprocfs_rd_iunit, lprocfs_wr_iunit, 0},
-        { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0},
-        { "quota_type",     lprocfs_rd_type, lprocfs_wr_type, 0},
+        { "quota_type",     lprocfs_quota_rd_type,
+                            lprocfs_quota_wr_type, 0},
 #endif
         { "client_cache_count", lprocfs_filter_rd_fmd_max_num,
                           lprocfs_filter_wr_fmd_max_num, 0 },
@@ -331,6 +358,8 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
         { "capa",         lprocfs_filter_rd_capa,
                           lprocfs_filter_wr_capa, 0 },
         { "capa_count",   lprocfs_filter_rd_capa_count, 0, 0 },
+        { "sec_level",    lprocfs_rd_sec_level,
+                          lprocfs_wr_sec_level,            0 },
         { "read_cache_enable", lprocfs_filter_rd_cache, lprocfs_filter_wr_cache, 0},
         { "writethrough_cache_enable", lprocfs_filter_rd_wcache,
                           lprocfs_filter_wr_wcache, 0},
index be6badb..6085101 100644 (file)
@@ -280,11 +280,7 @@ struct osc_page {
          * True for a `temporary page' created by read-ahead code, probably
          * outside of any DLM lock.
          */
-                              ops_temp:1,
-        /**
-         * True iff page was created by a user with `appropriate privileges'.
-         */
-                              ops_ignore_quota:1;
+                              ops_temp:1;
         /**
          * Linkage into a per-osc_object list of pages in flight. For
          * debugging.
index 86fe589..4b4ae0a 100644 (file)
@@ -302,9 +302,12 @@ static int osc_io_commit_write(const struct lu_env *env,
                                const struct cl_page_slice *slice,
                                unsigned from, unsigned to)
 {
-        LASSERT(to > 0);
-
+        struct osc_page       *opg = cl2osc_page(slice);
+        struct osc_object     *obj = cl2osc(opg->ops_cl.cpl_obj);
+        struct osc_async_page *oap = &opg->ops_oap;
         ENTRY;
+
+        LASSERT(to > 0);
         /*
          * XXX instead of calling osc_page_touch() here and in
          * osc_io_fault_start() it might be more logical to introduce
@@ -312,6 +315,10 @@ static int osc_io_commit_write(const struct lu_env *env,
          * fault code calls.
          */
         osc_page_touch(env, cl2osc_page(slice), to);
+        if (!client_is_remote(osc_export(obj)) &&
+            cfs_capable(CFS_CAP_SYS_RESOURCE))
+                oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+
         RETURN(0);
 }
 
index d42e4f9..a583eef 100644 (file)
@@ -180,16 +180,22 @@ static int osc_page_cache_add(const struct lu_env *env,
         struct osc_io     *oio = osc_env_io(env);
         int result;
         int brw_flags;
+        int noquota = 0;
 
         LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0));
         ENTRY;
 
         /* Set the OBD_BRW_SRVLOCK before the page is queued. */
         brw_flags = oio->oi_lockless ? OBD_BRW_SRVLOCK : 0;
+        if (!client_is_remote(osc_export(obj)) &&
+            cfs_capable(CFS_CAP_SYS_RESOURCE)) {
+                brw_flags |= OBD_BRW_NOQUOTA;
+                noquota = OBD_BRW_NOQUOTA;
+        }
 
         osc_page_transfer_get(opg, "transfer\0cache");
         result = osc_queue_async_io(env, osc_export(obj), NULL, obj->oo_oinfo,
-                                    &opg->ops_oap, OBD_BRW_WRITE,
+                                    &opg->ops_oap, OBD_BRW_WRITE | noquota,
                                     0, 0, brw_flags, 0);
         if (result != 0)
                 osc_page_transfer_put(env, opg);
@@ -467,7 +473,6 @@ struct cl_page *osc_page_init(const struct lu_env *env,
 
                 opg->ops_from = 0;
                 opg->ops_to   = CFS_PAGE_SIZE;
-                opg->ops_ignore_quota = !!cfs_capable(CFS_CAP_SYS_RESOURCE);
 
                 result = osc_prep_async_page(osc_export(osc),
                                              NULL, osc->oo_oinfo, vmpage,
@@ -500,9 +505,18 @@ void osc_io_submit_page(const struct lu_env *env,
         LINVRNT(osc_page_protected(env, opg,
                                    crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
 
+        oap->oap_page_off   = opg->ops_from;
+        oap->oap_count      = opg->ops_to - opg->ops_from;
+        oap->oap_brw_flags |= OBD_BRW_SYNC;
+        if (oio->oi_lockless)
+                oap->oap_brw_flags |= OBD_BRW_SRVLOCK;
+
         oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
-        if (opg->ops_ignore_quota)
+        if (!client_is_remote(osc_export(cl2osc(opg->ops_cl.cpl_obj))) &&
+            cfs_capable(CFS_CAP_SYS_RESOURCE)) {
+                oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
                 oap->oap_cmd |= OBD_BRW_NOQUOTA;
+        }
 
         oap->oap_async_flags |= OSC_FLAGS;
         if (oap->oap_cmd & OBD_BRW_READ)
@@ -510,10 +524,6 @@ void osc_io_submit_page(const struct lu_env *env,
         else if (!(oap->oap_brw_page.flag & OBD_BRW_FROM_GRANT))
                 osc_enter_cache_try(env, cli, oap->oap_loi, oap, 1);
 
-        oap->oap_page_off   = opg->ops_from;
-        oap->oap_count      = opg->ops_to - opg->ops_from;
-        oap->oap_brw_flags |= oio->oi_lockless ? OBD_BRW_SRVLOCK : 0;
-
         osc_oap_to_pending(oap);
         osc_page_transfer_get(opg, "transfer\0imm");
         osc_page_transfer_add(env, opg, crt);
index 3a27a42..329200b 100644 (file)
@@ -694,7 +694,7 @@ static int osc_can_send_destroy(struct client_obd *cli)
  * cookies to the MDS after committing destroy transactions. */
 static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                        struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                       struct obd_export *md_export)
+                       struct obd_export *md_export, void *capa)
 {
         struct client_obd     *cli = &exp->exp_obd->u.cli;
         struct ptlrpc_request *req;
@@ -717,6 +717,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
                 RETURN(-ENOMEM);
         }
 
+        osc_set_capa_size(req, &RMF_CAPA1, (struct obd_capa *)capa);
         rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
                                0, &cancels, count);
         if (rc) {
@@ -734,6 +735,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
         LASSERT(body);
         body->oa = *oa;
 
+        osc_pack_capa(req, body, (struct obd_capa *)capa);
         ptlrpc_request_set_replen(req);
 
         if (!osc_can_send_destroy(cli)) {
@@ -1048,7 +1050,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
                                 struct lov_stripe_md *lsm, obd_count page_count,
                                 struct brw_page **pga,
                                 struct ptlrpc_request **reqp,
-                                struct obd_capa *ocapa)
+                                struct obd_capa *ocapa, int reserve)
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
@@ -1075,7 +1077,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
                 opc = OST_READ;
                 req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW);
         }
-
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -1219,6 +1220,8 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
         aa->aa_ppga = pga;
         aa->aa_cli = cli;
         CFS_INIT_LIST_HEAD(&aa->aa_oaps);
+        if (ocapa && reserve)
+                aa->aa_ocapa = capa_get(ocapa);
 
         *reqp = req;
         RETURN(0);
@@ -1448,7 +1451,7 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa,
 
 restart_bulk:
         rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
-                                  page_count, pga, &req, ocapa);
+                                  page_count, pga, &req, ocapa, 0);
         if (rc != 0)
                 return (rc);
 
@@ -1495,18 +1498,13 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
         }
 
         DEBUG_REQ(D_ERROR, request, "redo for recoverable error");
-/*
-        body = lustre_msg_buf(request->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
-                ocapa = lustre_unpack_capa(request->rq_reqmsg,
-                                           REQ_REC_OFF + 3);
-*/
+
         rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
                                         OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
                                   aa->aa_cli, aa->aa_oa,
                                   NULL /* lsm unused by osc currently */,
                                   aa->aa_page_count, aa->aa_ppga,
-                                  &new_req, NULL /* ocapa */);
+                                  &new_req, aa->aa_ocapa, 0);
         if (rc)
                 RETURN(rc);
 
@@ -1544,6 +1542,9 @@ int osc_brw_redo_request(struct ptlrpc_request *request,
                 }
         }
 
+        new_aa->aa_ocapa = aa->aa_ocapa;
+        aa->aa_ocapa = NULL;
+
         /* use ptlrpc_set_add_req is safe because interpret functions work
          * in check_set context. only one way exist with access to request
          * from different thread got -EINTR - this way protected with
@@ -1944,6 +1945,11 @@ static int brw_interpret(const struct lu_env *env,
                         RETURN(0);
         }
 
+        if (aa->aa_ocapa) {
+                capa_put(aa->aa_ocapa);
+                aa->aa_ocapa = NULL;
+        }
+
         cli = aa->aa_cli;
 
         client_obd_list_lock(&cli->cl_loi_list_lock);
@@ -2052,7 +2058,7 @@ static struct ptlrpc_request *osc_build_req(const struct lu_env *env,
 
         sort_brw_pages(pga, page_count);
         rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
-                                  pga, &req, crattr.cra_capa);
+                                  pga, &req, crattr.cra_capa, 1);
         if (rc != 0) {
                 CERROR("prep_req failed: %d\n", rc);
                 GOTO(out, req = ERR_PTR(rc));
@@ -2560,6 +2566,9 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         oap->oap_page = page;
         oap->oap_obj_off = offset;
+        if (!client_is_remote(exp) &&
+            cfs_capable(CFS_CAP_SYS_RESOURCE))
+                oap->oap_brw_flags = OBD_BRW_NOQUOTA;
 
         LASSERT(!(offset & ~CFS_PAGE_MASK));
 
@@ -2605,7 +2614,6 @@ int osc_queue_async_io(const struct lu_env *env,
                 RETURN(-EBUSY);
 
         /* check if the file's owner/group is over quota */
-#ifdef HAVE_QUOTA_SUPPORT
         if ((cmd & OBD_BRW_WRITE) && !(cmd & OBD_BRW_NOQUOTA)) {
                 struct cl_object *obj;
                 struct cl_attr    attr; /* XXX put attr into thread info */
@@ -2622,7 +2630,6 @@ int osc_queue_async_io(const struct lu_env *env,
                 if (rc)
                         RETURN(rc);
         }
-#endif
 
         if (loi == NULL)
                 loi = lsm->lsm_oinfo[0];
@@ -3964,6 +3971,8 @@ int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg)
         default:
                 rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars,
                                               lcfg, obd);
+               if (rc > 0)
+                       rc = 0;
                 break;
         }
 
index 422d255..57e2f71 100644 (file)
@@ -141,7 +141,7 @@ static int   osd_fid_lookup    (const struct lu_env *env,
                                 const struct lu_fid *fid);
 static void  osd_inode_getattr (const struct lu_env *env,
                                 struct inode *inode, struct lu_attr *attr);
-static void  osd_inode_setattr (const struct lu_env *env,
+static int   osd_inode_setattr (const struct lu_env *env,
                                 struct inode *inode, const struct lu_attr *attr);
 static int   osd_param_is_sane (const struct osd_device *dev,
                                 const struct txn_param *param);
@@ -154,7 +154,8 @@ static int   osd_index_insert  (const struct lu_env *env,
                                 const struct dt_rec *rec,
                                 const struct dt_key *key,
                                 struct thandle *handle,
-                                struct lustre_capa *capa);
+                                struct lustre_capa *capa,
+                                int ingore_quota);
 static int   osd_index_delete  (const struct lu_env *env,
                                 struct dt_object *dt, const struct dt_key *key,
                                 struct thandle *handle,
@@ -236,6 +237,31 @@ struct osd_thandle {
 
 };
 
+#ifdef HAVE_QUOTA_SUPPORT
+static inline void
+osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save)
+{
+        struct md_ucred    *uc = md_ucred(env);
+
+        LASSERT(uc != NULL);
+
+        save->oc_uid = current->fsuid;
+        save->oc_gid = current->fsgid;
+        save->oc_cap = current->cap_effective;
+        current->fsuid         = uc->mu_fsuid;
+        current->fsgid         = uc->mu_fsgid;
+        current->cap_effective = uc->mu_cap;
+}
+
+static inline void
+osd_pop_ctxt(struct osd_ctxt *save)
+{
+        current->fsuid         = save->oc_uid;
+        current->fsgid         = save->oc_gid;
+        current->cap_effective = save->oc_cap;
+}
+#endif
+
 /*
  * Invariants, assertions.
  */
@@ -758,46 +784,161 @@ static int osd_init_capa_ctxt(const struct lu_env *env, struct dt_device *d,
         RETURN(0);
 }
 
-/* Note: we did not count into QUOTA here, If we mount with --data_journal
- * we may need more*/
-static const int osd_dto_credits[DTO_NR] = {
-        /*
-         * Insert/Delete. IAM EXT3_INDEX_EXTRA_TRANS_BLOCKS(8) +
-         * EXT3_SINGLEDATA_TRANS_BLOCKS 8 XXX Note: maybe iam need more,since
-         * iam have more level than Ext3 htree
+/**
+ * Concurrency: serialization provided by callers.
+ */
+static void osd_init_quota_ctxt(const struct lu_env *env, struct dt_device *d,
+                               struct dt_quota_ctxt *ctxt, void *data)
+{
+        struct obd_device *obd = (void *)ctxt;
+        struct vfsmount *mnt = (struct vfsmount *)data;
+        ENTRY;
+
+        obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb;
+        OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+        obd->obd_lvfs_ctxt.pwdmnt = mnt;
+        obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
+        obd->obd_lvfs_ctxt.fs = get_ds();
+
+        EXIT;
+}
+
+/**
+ * Note: we do not count into QUOTA here.
+ * If we mount with --data_journal we may need more.
+ */
+static const int osd_dto_credits_noquota[DTO_NR] = {
+        /**
+         * Insert/Delete.
+         * INDEX_EXTRA_TRANS_BLOCKS(8) +
+         * SINGLEDATA_TRANS_BLOCKS(8)
+         * XXX Note: maybe iam need more, since iam have more level than
+         *           EXT3 htree.
          */
         [DTO_INDEX_INSERT]  = 16,
         [DTO_INDEX_DELETE]  = 16,
+        /**
+         * Unused now
+         */
         [DTO_IDNEX_UPDATE]  = 16,
-        /*
-         * Create a object. Same as create object in Ext3 filesystem, but did
-         * not count QUOTA i EXT3_DATA_TRANS_BLOCKS(12) +
-         * INDEX_EXTRA_BLOCKS(8) + 3(inode bits,groups, GDT)
+        /**
+         * Create a object. The same as create object in EXT3.
+         * DATA_TRANS_BLOCKS(14) +
+         * INDEX_EXTRA_BLOCKS(8) +
+         * 3(inode bits, groups, GDT)
+         */
+        [DTO_OBJECT_CREATE] = 25,
+        /**
+         * Unused now
+         */
+        [DTO_OBJECT_DELETE] = 25,
+        /**
+         * Attr set credits.
+         * 3(inode bits, group, GDT)
          */
-        [DTO_OBJECT_CREATE] = 23,
-        [DTO_OBJECT_DELETE] = 23,
+        [DTO_ATTR_SET_BASE] = 3,
+        /**
+         * Xattr set. The same as xattr of EXT3.
+         * DATA_TRANS_BLOCKS(14)
+         * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are
+         *           also counted in. Do not know why?
+         */
+        [DTO_XATTR_SET]     = 14,
+        [DTO_LOG_REC]       = 14,
+        /**
+         * creadits for inode change during write.
+         */
+        [DTO_WRITE_BASE]    = 3,
+        /**
+         * credits for single block write.
+         */
+        [DTO_WRITE_BLOCK]   = 14,
+        /**
+         * Attr set credits for chown.
+         * 3 (inode bit, group, GDT)
+         */
+        [DTO_ATTR_SET_CHOWN]= 3
+};
+
+/**
+ * Note: we count into QUOTA here.
+ * If we mount with --data_journal we may need more.
+ */
+static const int osd_dto_credits_quota[DTO_NR] = {
+        /**
+         * INDEX_EXTRA_TRANS_BLOCKS(8) +
+         * SINGLEDATA_TRANS_BLOCKS(8) +
+         * 2 * QUOTA_TRANS_BLOCKS(2)
+         */
+        [DTO_INDEX_INSERT]  = 20,
+        /**
+         * INDEX_EXTRA_TRANS_BLOCKS(8) +
+         * SINGLEDATA_TRANS_BLOCKS(8) +
+         * 2 * QUOTA_TRANS_BLOCKS(2)
+         */
+        [DTO_INDEX_DELETE]  = 20,
+        /**
+         * Unused now.
+         */ 
+        [DTO_IDNEX_UPDATE]  = 16,
         /*
-         * Attr set credits 3 inode, group, GDT
+         * Create a object. Same as create object in EXT3 filesystem.
+         * DATA_TRANS_BLOCKS(16) +
+         * INDEX_EXTRA_BLOCKS(8) +
+         * 3(inode bits, groups, GDT) +
+         * 2 * QUOTA_INIT_BLOCKS(25)
          */
-        [DTO_ATTR_SET]      = 3,
+        [DTO_OBJECT_CREATE] = 77,
         /*
-         * XATTR_SET. SAME AS XATTR of EXT3 EXT3_DATA_TRANS_BLOCKS XXX Note:
-         * in original MDS implmentation EXT3_INDEX_EXTRA_TRANS_BLOCKS are
-         * also counted in. Do not know why?
+         * Unused now.
+         * DATA_TRANS_BLOCKS(16) +
+         * INDEX_EXTRA_BLOCKS(8) +
+         * 3(inode bits, groups, GDT) +
+         * QUOTA(?)
+         */ 
+        [DTO_OBJECT_DELETE] = 27,
+        /**
+         * Attr set credits.
+         * 3 (inode bit, group, GDT) +
+         */
+        [DTO_ATTR_SET_BASE] = 3,
+        /**
+         * Xattr set. The same as xattr of EXT3.
+         * DATA_TRANS_BLOCKS(16)
+         * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are
+         *           also counted in. Do not know why?
          */
         [DTO_XATTR_SET]     = 16,
         [DTO_LOG_REC]       = 16,
-        /* creadits for inode change during write */
+        /**
+         * creadits for inode change during write.
+         */
         [DTO_WRITE_BASE]    = 3,
-        /* credits for single block write */
-        [DTO_WRITE_BLOCK]   = 12
+        /**
+         * credits for single block write.
+         */
+        [DTO_WRITE_BLOCK]   = 16,
+        /**
+         * Attr set credits for chown.
+         * 3 (inode bit, group, GDT) +
+         * 2 * QUOTA_INIT_BLOCKS(25) +
+         * 2 * QUOTA_DEL_BLOCKS(9)
+         */
+        [DTO_ATTR_SET_CHOWN]= 71
 };
 
 static int osd_credit_get(const struct lu_env *env, struct dt_device *d,
                           enum dt_txn_op op)
 {
-        LASSERT(0 <= op && op < ARRAY_SIZE(osd_dto_credits));
-        return osd_dto_credits[op];
+        LASSERT(ARRAY_SIZE(osd_dto_credits_noquota) ==
+                ARRAY_SIZE(osd_dto_credits_quota));
+        LASSERT(0 <= op && op < ARRAY_SIZE(osd_dto_credits_noquota));
+#ifdef HAVE_QUOTA_SUPPORT
+        if (test_opt(osd_sb(osd_dt_dev(d)), QUOTA))
+                return osd_dto_credits_quota[op];
+        else
+#endif
+                return osd_dto_credits_noquota[op];
 }
 
 static const struct dt_device_operations osd_dt_ops = {
@@ -811,6 +952,7 @@ static const struct dt_device_operations osd_dt_ops = {
         .dt_commit_async   = osd_commit_async,
         .dt_credit_get     = osd_credit_get,
         .dt_init_capa_ctxt = osd_init_capa_ctxt,
+        .dt_init_quota_ctxt= osd_init_quota_ctxt,
 };
 
 static void osd_object_read_lock(const struct lu_env *env,
@@ -878,6 +1020,7 @@ static int capa_is_sane(const struct lu_env *env,
                         struct lustre_capa_key *keys)
 {
         struct osd_thread_info *oti = osd_oti_get(env);
+        struct lustre_capa *tcapa = &oti->oti_capa;
         struct obd_capa *oc;
         int i, rc = 0;
         ENTRY;
@@ -892,6 +1035,11 @@ static int capa_is_sane(const struct lu_env *env,
                 RETURN(rc);
         }
 
+        if (capa_is_expired_sec(capa)) {
+                DEBUG_CAPA(D_ERROR, capa, "expired");
+                RETURN(-ESTALE);
+        }
+
         spin_lock(&capa_lock);
         for (i = 0; i < 2; i++) {
                 if (keys[i].lk_keyid == capa->lc_keyid) {
@@ -906,11 +1054,11 @@ static int capa_is_sane(const struct lu_env *env,
                 RETURN(-ESTALE);
         }
 
-        rc = capa_hmac(oti->oti_capa.lc_hmac, capa, oti->oti_capa_key.lk_key);
+        rc = capa_hmac(tcapa->lc_hmac, capa, oti->oti_capa_key.lk_key);
         if (rc)
                 RETURN(rc);
-        if (memcmp(oti->oti_capa.lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac)))
-        {
+
+        if (memcmp(tcapa->lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) {
                 DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch");
                 RETURN(-EACCES);
         }
@@ -926,6 +1074,7 @@ static int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
 {
         const struct lu_fid *fid = lu_object_fid(&dt->do_lu);
         struct osd_device *dev = osd_dev(dt->do_lu.lo_dev);
+        struct md_capainfo *ci;
         int rc;
 
         if (!dev->od_fl_capa)
@@ -934,6 +1083,13 @@ static int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
         if (capa == BYPASS_CAPA)
                 return 0;
 
+        ci = md_capainfo(env);
+        if (unlikely(!ci))
+                return 0;
+
+        if (ci->mc_auth == LC_ID_NONE)
+                return 0;
+
         if (!capa) {
                 CERROR("no capability is provided for fid "DFID"\n", PFID(fid));
                 return -EACCES;
@@ -984,6 +1140,7 @@ static int osd_attr_set(const struct lu_env *env,
                         struct lustre_capa *capa)
 {
         struct osd_object *obj = osd_dt_obj(dt);
+        int rc;
 
         LASSERT(handle != NULL);
         LASSERT(dt_object_exists(dt));
@@ -993,11 +1150,12 @@ static int osd_attr_set(const struct lu_env *env,
                 return -EACCES;
 
         spin_lock(&obj->oo_guard);
-        osd_inode_setattr(env, obj->oo_inode, attr);
+        rc = osd_inode_setattr(env, obj->oo_inode, attr);
         spin_unlock(&obj->oo_guard);
 
-        mark_inode_dirty(obj->oo_inode);
-        return 0;
+        if (!rc)
+                mark_inode_dirty(obj->oo_inode);
+        return rc;
 }
 
 static struct timespec *osd_inode_time(const struct lu_env *env,
@@ -1012,8 +1170,8 @@ static struct timespec *osd_inode_time(const struct lu_env *env,
         return t;
 }
 
-static void osd_inode_setattr(const struct lu_env *env,
-                              struct inode *inode, const struct lu_attr *attr)
+static int osd_inode_setattr(const struct lu_env *env,
+                             struct inode *inode, const struct lu_attr *attr)
 {
         __u64 bits;
 
@@ -1021,6 +1179,24 @@ static void osd_inode_setattr(const struct lu_env *env,
 
         LASSERT(!(bits & LA_TYPE)); /* Huh? You want too much. */
 
+#ifdef HAVE_QUOTA_SUPPORT
+        if ((bits & LA_UID && attr->la_uid != inode->i_uid) ||
+            (bits & LA_GID && attr->la_gid != inode->i_gid)) {
+                struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt;
+                struct iattr iattr;
+                int rc;
+
+                iattr.ia_valid = bits & (LA_UID | LA_GID);
+                iattr.ia_uid = attr->la_uid;
+                iattr.ia_gid = attr->la_gid;
+                osd_push_ctxt(env, save);
+                rc = DQUOT_TRANSFER(inode, &iattr) ? -EDQUOT : 0;
+                osd_pop_ctxt(save);
+                if (rc != 0)
+                        return rc;
+        }
+#endif
+
         if (bits & LA_ATIME)
                 inode->i_atime  = *osd_inode_time(env, inode, attr->la_atime);
         if (bits & LA_CTIME)
@@ -1031,8 +1207,14 @@ static void osd_inode_setattr(const struct lu_env *env,
                 LDISKFS_I(inode)->i_disksize = attr->la_size;
                 i_size_write(inode, attr->la_size);
         }
+# if 0
+        /*
+         * OSD should not change "i_blocks" which is used by quota.
+         * "i_blocks" should be changed by ldiskfs only.
+         * Disable this assignment until SOM to fix some EA field. */
         if (bits & LA_BLOCKS)
                 inode->i_blocks = attr->la_blocks;
+#endif
         if (bits & LA_MODE)
                 inode->i_mode   = (inode->i_mode & S_IFMT) |
                         (attr->la_mode & ~S_IFMT);
@@ -1051,6 +1233,7 @@ static void osd_inode_setattr(const struct lu_env *env,
                 li->i_flags = (li->i_flags & ~LDISKFS_FL_USER_MODIFIABLE) |
                         (attr->la_flags & LDISKFS_FL_USER_MODIFIABLE);
         }
+        return 0;
 }
 
 /*
@@ -1087,6 +1270,9 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
         struct osd_thandle *oth;
         struct inode       *parent;
         struct inode       *inode;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct osd_ctxt    *save = &info->oti_ctxt;
+#endif
 
         LINVRNT(osd_invariant(obj));
         LASSERT(obj->oo_inode == NULL);
@@ -1101,7 +1287,13 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
                 parent = osd->od_obj_area->d_inode;
         LASSERT(parent->i_op != NULL);
 
+#ifdef HAVE_QUOTA_SUPPORT
+        osd_push_ctxt(info->oti_env, save);
+#endif
         inode = ldiskfs_create_inode(oth->ot_handle, parent, mode);
+#ifdef HAVE_QUOTA_SUPPORT
+        osd_pop_ctxt(save);
+#endif
         if (!IS_ERR(inode)) {
                 obj->oo_inode = inode;
                 result = 0;
@@ -1271,13 +1463,16 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt,
         }
         if (result == 0) {
                 struct osd_inode_id *id = &info->oti_id;
+                struct md_ucred     *uc = md_ucred(env);
 
                 LASSERT(obj->oo_inode != NULL);
+                LASSERT(uc != NULL);
 
                 id->oii_ino = obj->oo_inode->i_ino;
                 id->oii_gen = obj->oo_inode->i_generation;
 
-                result = osd_oi_insert(info, &osd->od_oi, fid, id, th);
+                result = osd_oi_insert(info, &osd->od_oi, fid, id, th,
+                                       uc->mu_cap & CFS_CAP_SYS_RESOURCE_MASK);
         }
 
         LASSERT(ergo(result == 0, dt_object_exists(dt)));
@@ -1471,6 +1666,7 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
         struct lustre_capa_key *key = &info->oti_capa_key;
         struct lustre_capa *capa = &info->oti_capa;
         struct obd_capa *oc;
+        struct md_capainfo *ci;
         int rc;
         ENTRY;
 
@@ -1484,10 +1680,41 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
         if (old && osd_object_auth(env, dt, old, opc))
                 RETURN(ERR_PTR(-EACCES));
 
+        ci = md_capainfo(env);
+        if (unlikely(!ci))
+                RETURN(ERR_PTR(-ENOENT));
+
+        switch (ci->mc_auth) {
+        case LC_ID_NONE:
+                RETURN(NULL);
+        case LC_ID_PLAIN:
+                capa->lc_uid = obj->oo_inode->i_uid;
+                capa->lc_gid = obj->oo_inode->i_gid;
+                capa->lc_flags = LC_ID_PLAIN;
+                break;
+        case LC_ID_CONVERT: {
+                __u32 d[4], s[4];
+
+                s[0] = obj->oo_inode->i_uid;
+                get_random_bytes(&(s[1]), sizeof(__u32));
+                s[2] = obj->oo_inode->i_gid;
+                get_random_bytes(&(s[3]), sizeof(__u32));
+                rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN);
+                if (unlikely(rc))
+                        RETURN(ERR_PTR(rc));
+
+                capa->lc_uid   = ((__u64)d[1] << 32) | d[0];
+                capa->lc_gid   = ((__u64)d[3] << 32) | d[2];
+                capa->lc_flags = LC_ID_CONVERT;
+                break;
+        }
+        default:
+                RETURN(ERR_PTR(-EINVAL));
+        }
+
         capa->lc_fid = *fid;
         capa->lc_opc = opc;
-        capa->lc_uid = 0;
-        capa->lc_flags = dev->od_capa_alg << 24;
+        capa->lc_flags |= dev->od_capa_alg << 24;
         capa->lc_timeout = dev->od_capa_timeout;
         capa->lc_expiry = 0;
 
@@ -1587,11 +1814,15 @@ static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
 
 static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                          const struct lu_buf *buf, loff_t *pos,
-                         struct thandle *handle, struct lustre_capa *capa)
+                         struct thandle *handle, struct lustre_capa *capa,
+                         int ignore_quota)
 {
         struct inode       *inode = osd_dt_obj(dt)->oo_inode;
         struct osd_thandle *oh;
         ssize_t             result;
+#ifdef HAVE_QUOTA_SUPPORT
+        cfs_cap_t           save = current->cap_effective;
+#endif
 
         LASSERT(handle != NULL);
 
@@ -1600,8 +1831,17 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
 
         oh = container_of(handle, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle->h_transaction != NULL);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (ignore_quota)
+                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+        else
+                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+#endif
         result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len,
                                              pos, oh->ot_handle);
+#ifdef HAVE_QUOTA_SUPPORT
+        current->cap_effective = save;
+#endif
         if (result == 0)
                 result = buf->lb_len;
         return result;
@@ -1795,12 +2035,16 @@ static int osd_index_lookup(const struct lu_env *env, struct dt_object *dt,
 
 static int osd_index_insert(const struct lu_env *env, struct dt_object *dt,
                             const struct dt_rec *rec, const struct dt_key *key,
-                            struct thandle *th, struct lustre_capa *capa)
+                            struct thandle *th, struct lustre_capa *capa,
+                            int ignore_quota)
 {
         struct osd_object     *obj = osd_dt_obj(dt);
         struct iam_path_descr *ipd;
         struct osd_thandle    *oh;
         struct iam_container  *bag = &obj->oo_dir->od_container;
+#ifdef HAVE_QUOTA_SUPPORT
+        cfs_cap_t              save = current->cap_effective;
+#endif
         int rc;
 
         ENTRY;
@@ -1820,8 +2064,17 @@ static int osd_index_insert(const struct lu_env *env, struct dt_object *dt,
         oh = container_of0(th, struct osd_thandle, ot_super);
         LASSERT(oh->ot_handle != NULL);
         LASSERT(oh->ot_handle->h_transaction != NULL);
+#ifdef HAVE_QUOTA_SUPPORT
+        if (ignore_quota)
+                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+        else
+                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+#endif
         rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
                         (struct iam_rec *)rec, ipd);
+#ifdef HAVE_QUOTA_SUPPORT
+        current->cap_effective = save;
+#endif
         osd_ipd_put(env, bag, ipd);
         LINVRNT(osd_invariant(obj));
         RETURN(rc);
@@ -2135,7 +2388,8 @@ static int osd_index_compat_insert(const struct lu_env *env,
                                    struct dt_object *dt,
                                    const struct dt_rec *rec,
                                    const struct dt_key *key, struct thandle *th,
-                                   struct lustre_capa *capa)
+                                   struct lustre_capa *capa,
+                                   int ignore_quota)
 {
         struct osd_object     *obj = osd_dt_obj(dt);
 
@@ -2392,7 +2646,7 @@ static int osd_process_config(const struct lu_env *env,
         RETURN(err);
 }
 extern void ldiskfs_orphan_cleanup (struct super_block * sb,
-                                   struct ldiskfs_super_block * es);
+                                    struct ldiskfs_super_block * es);
 
 static int osd_recovery_complete(const struct lu_env *env,
                                  struct lu_device *d)
index bcfcd91..e187323 100644 (file)
 
 struct inode;
 
+#define OSD_COUNTERS (0)
+
+#ifdef HAVE_QUOTA_SUPPORT
+struct osd_ctxt {
+        __u32 oc_uid;
+        __u32 oc_gid;
+        __u32 oc_cap;
+};
+#endif
+
 /*
  * osd device.
  */
@@ -145,6 +155,9 @@ struct osd_thread_info {
         int                    oti_r_locks;
         int                    oti_w_locks;
         int                    oti_txns;
+#ifdef HAVE_QUOTA_SUPPORT
+        struct osd_ctxt        oti_ctxt;
+#endif
 };
 
 #ifdef LPROCFS
index a2e086c..79d4082 100644 (file)
@@ -195,7 +195,7 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi,
 
 int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi,
                   const struct lu_fid *fid, const struct osd_inode_id *id0,
-                  struct thandle *th)
+                  struct thandle *th, int ignore_quota)
 {
         struct dt_object    *idx;
         struct osd_inode_id *id;
@@ -210,7 +210,8 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi,
         id->oii_gen = cpu_to_be32(id0->oii_gen);
         return idx->do_index_ops->dio_insert(info->oti_env, idx,
                                              (const struct dt_rec *)id,
-                                             key, th, BYPASS_CAPA);
+                                             key, th, BYPASS_CAPA,
+                                             ignore_quota);
 }
 
 int osd_oi_delete(struct osd_thread_info *info,
index b1d2dc8..8e02eb2 100644 (file)
@@ -98,7 +98,7 @@ int  osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi,
                    const struct lu_fid *fid, struct osd_inode_id *id);
 int  osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi,
                    const struct lu_fid *fid, const struct osd_inode_id *id,
-                   struct thandle *th);
+                   struct thandle *th, int ingore_quota);
 int  osd_oi_delete(struct osd_thread_info *info,
                    struct osd_oi *oi, const struct lu_fid *fid,
                    struct thandle *th);
index abb4f66..8b2b7fb 100644 (file)
@@ -69,6 +69,18 @@ static int oss_num_create_threads;
 CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444,
                 "number of OSS create threads to start");
 
+/**
+ * Do not return server-side uid/gid to remote client
+ */
+static void ost_drop_id(struct obd_export *exp, struct  obdo *oa)
+{
+        if (exp_connect_rmtclient(exp)) {
+                oa->o_uid = -1;
+                oa->o_gid = -1;
+                oa->o_valid &= ~(OBD_MD_FLUID | OBD_MD_FLGID);
+        }
+}
+
 void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
 {
         struct oti_req_ack_lock *ack_lock;
@@ -95,6 +107,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req,
 {
         struct ost_body *body, *repbody;
         __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        struct lustre_capa *capa = NULL;
         int rc;
         ENTRY;
 
@@ -115,6 +128,9 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req,
                 ldlm_request_cancel(req, dlm, 0);
         }
 
+        if (body->oa.o_valid & OBD_MD_FLOSSCAPA)
+                capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 2);
+
         rc = lustre_pack_reply(req, 2, size, NULL);
         if (rc)
                 RETURN(rc);
@@ -124,7 +140,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req,
         repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                  sizeof(*repbody));
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
-        req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL);
+        req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL, capa);
         RETURN(0);
 }
 
@@ -154,6 +170,7 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req)
                 oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg,
                                                    REQ_REC_OFF + 1);
         req->rq_status = obd_getattr(exp, &oinfo);
+        ost_drop_id(exp, &repbody->oa);
         RETURN(0);
 }
 
@@ -320,6 +337,7 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req,
                 ost_punch_lock_put(exp, oinfo.oi_oa, &lh);
         }
         repbody->oa = *oinfo.oi_oa;
+        ost_drop_id(exp, &repbody->oa);
         RETURN(rc);
 }
 
@@ -348,6 +366,7 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
         memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
         req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size,
                                   repbody->oa.o_blocks, capa);
+        ost_drop_id(exp, &repbody->oa);
         RETURN(0);
 }
 
@@ -378,6 +397,7 @@ static int ost_setattr(struct obd_export *exp, struct ptlrpc_request *req,
                 oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg,
                                                    REQ_REC_OFF + 1);
         req->rq_status = obd_setattr(exp, &oinfo, oti);
+        ost_drop_id(exp, &repbody->oa);
         RETURN(0);
 }
 
@@ -792,6 +812,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                          sizeof(*repbody));
                 memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
+                ost_drop_id(exp, &repbody->oa);
         }
 
 out_lock:
@@ -843,6 +864,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         obd_count                client_cksum = 0, server_cksum = 0;
         cksum_type_t             cksum_type = OBD_CKSUM_CRC32;
         int                      no_reply = 0;
+        __u32                    o_uid = 0, o_gid = 0;
         ENTRY;
 
         req->rq_bulk_write = 1;
@@ -970,6 +992,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 body->oa.o_valid &= ~OBD_MD_FLGRANT;
         }
 
+        if (exp_connect_rmtclient(exp)) {
+                o_uid = body->oa.o_uid;
+                o_gid = body->oa.o_gid;
+        }
         npages = OST_THREAD_POOL_SIZE;
         rc = obd_preprw(OBD_BRW_WRITE, exp, &body->oa, objcount,
                         ioo, remote_nb, &npages, local_nb, oti, capa);
@@ -1065,6 +1091,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         /* Must commit after prep above in all cases */
         rc = obd_commitrw(OBD_BRW_WRITE, exp, &repbody->oa, objcount, ioo,
                           remote_nb, npages, local_nb, oti, rc);
+        if (exp_connect_rmtclient(exp)) {
+                repbody->oa.o_uid = o_uid;
+                repbody->oa.o_gid = o_gid;
+        }
 
         if (unlikely(client_cksum != server_cksum && rc == 0)) {
                 int  new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type);
@@ -1230,26 +1260,25 @@ static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req)
         RETURN(rc);
 }
 
+#ifdef HAVE_QUOTA_SUPPORT
 static int ost_handle_quotactl(struct ptlrpc_request *req)
 {
         struct obd_quotactl *oqctl, *repoqc;
-        __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*repoqc) };
         int rc;
         ENTRY;
 
-        oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl),
-                                   lustre_swab_obd_quotactl);
+        oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
         if (oqctl == NULL)
                 GOTO(out, rc = -EPROTO);
 
-        rc = lustre_pack_reply(req, 2, size, NULL);
+        rc = req_capsule_server_pack(&req->rq_pill);
         if (rc)
                 GOTO(out, rc);
 
-        repoqc = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repoqc));
-
+        repoqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
         req->rq_status = obd_quotactl(req->rq_export, oqctl);
         *repoqc = *oqctl;
+
 out:
         RETURN(rc);
 }
@@ -1265,15 +1294,38 @@ static int ost_handle_quotacheck(struct ptlrpc_request *req)
                 RETURN(-EPROTO);
 
         rc = req_capsule_server_pack(&req->rq_pill);
-        if (rc) {
-                CERROR("ost: out of memory while packing quotacheck reply\n");
+        if (rc)
                 RETURN(-ENOMEM);
-        }
 
         req->rq_status = obd_quotacheck(req->rq_export, oqctl);
         RETURN(0);
 }
 
+static int ost_handle_quota_adjust_qunit(struct ptlrpc_request *req)
+{
+        struct quota_adjust_qunit *oqaq, *repoqa;
+        struct lustre_quota_ctxt *qctxt;
+        int rc;
+        ENTRY;
+
+        qctxt = &req->rq_export->exp_obd->u.obt.obt_qctxt;
+        oqaq = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT);
+        if (oqaq == NULL)
+                GOTO(out, rc = -EPROTO);
+
+        rc = req_capsule_server_pack(&req->rq_pill);
+        if (rc)
+                GOTO(out, rc);
+
+        repoqa = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT);
+        req->rq_status = obd_quota_adjust_qunit(req->rq_export, oqaq, qctxt);
+        *repoqa = *oqaq;
+
+ out:
+        RETURN(rc);
+}
+#endif
+
 static int ost_llog_handle_connect(struct obd_export *exp,
                                    struct ptlrpc_request *req)
 {
@@ -1286,6 +1338,122 @@ static int ost_llog_handle_connect(struct obd_export *exp,
         RETURN(rc);
 }
 
+#define ost_init_sec_none(reply, exp)                                   \
+do {                                                                    \
+        reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT |          \
+                                      OBD_CONNECT_RMT_CLIENT_FORCE |    \
+                                      OBD_CONNECT_OSS_CAPA);            \
+        spin_lock(&exp->exp_lock);                                      \
+        exp->exp_connect_flags = reply->ocd_connect_flags;              \
+        spin_unlock(&exp->exp_lock);                                    \
+} while (0)
+
+static int ost_init_sec_level(struct ptlrpc_request *req)
+{
+        struct obd_export *exp = req->rq_export;
+        struct req_capsule *pill = &req->rq_pill;
+        struct obd_device *obd = exp->exp_obd;
+        struct filter_obd *filter = &obd->u.filter;
+        char *client = libcfs_nid2str(req->rq_peer.nid);
+        struct obd_connect_data *data, *reply;
+        int rc = 0, remote;
+        ENTRY;
+
+        data = req_capsule_client_get(pill, &RMF_CONNECT_DATA);
+        reply = req_capsule_server_get(pill, &RMF_CONNECT_DATA);
+        if (data == NULL || reply == NULL)
+                RETURN(-EFAULT);
+
+        /* connection from MDT is always trusted */
+        if (req->rq_auth_usr_mdt) {
+                ost_init_sec_none(reply, exp);
+                RETURN(0);
+        }
+
+        /* no GSS support case */
+        if (!req->rq_auth_gss) {
+                if (filter->fo_sec_level > LUSTRE_SEC_NONE) {
+                        CWARN("client %s -> target %s does not user GSS, "
+                              "can not run under security level %d.\n",
+                              client, obd->obd_name, filter->fo_sec_level);
+                        RETURN(-EACCES);
+                } else {
+                        ost_init_sec_none(reply, exp);
+                        RETURN(0);
+                }
+        }
+
+        /* old version case */
+        if (unlikely(!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) ||
+                     !(data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA))) {
+                if (filter->fo_sec_level > LUSTRE_SEC_NONE) {
+                        CWARN("client %s -> target %s uses old version, "
+                              "can not run under security level %d.\n",
+                              client, obd->obd_name, filter->fo_sec_level);
+                        RETURN(-EACCES);
+                } else {
+                        CWARN("client %s -> target %s uses old version, "
+                              "run under security level %d.\n",
+                              client, obd->obd_name, filter->fo_sec_level);
+                        ost_init_sec_none(reply, exp);
+                        RETURN(0);
+                }
+        }
+
+        remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT_FORCE;
+        if (remote) {
+                if (!req->rq_auth_remote)
+                        CDEBUG(D_SEC, "client (local realm) %s -> target %s "
+                               "asked to be remote.\n", client, obd->obd_name);
+        } else if (req->rq_auth_remote) {
+                remote = 1;
+                CDEBUG(D_SEC, "client (remote realm) %s -> target %s is set "
+                       "as remote by default.\n", client, obd->obd_name);
+        }
+
+        if (remote) {
+                if (!filter->fo_fl_oss_capa) {
+                        CDEBUG(D_SEC, "client %s -> target %s is set as remote,"
+                               " but OSS capabilities are not enabled: %d.\n",
+                               client, obd->obd_name, filter->fo_fl_oss_capa);
+                        RETURN(-EACCES);
+                }
+        }
+
+        switch (filter->fo_sec_level) {
+        case LUSTRE_SEC_NONE:
+                if (!remote) {
+                        ost_init_sec_none(reply, exp);
+                        break;
+                } else {
+                        CDEBUG(D_SEC, "client %s -> target %s is set as remote, "
+                               "can not run under security level %d.\n",
+                               client, obd->obd_name, filter->fo_sec_level);
+                        RETURN(-EACCES);
+                }
+        case LUSTRE_SEC_REMOTE:
+                if (!remote)
+                        ost_init_sec_none(reply, exp);
+                break;
+        case LUSTRE_SEC_ALL:
+                if (!remote) {
+                        reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT |
+                                                      OBD_CONNECT_RMT_CLIENT_FORCE);
+                        if (!filter->fo_fl_oss_capa)
+                                reply->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA;
+
+                        spin_lock(&exp->exp_lock);
+                        exp->exp_connect_flags = reply->ocd_connect_flags;
+                        spin_unlock(&exp->exp_lock);
+                }
+                break;
+        default:
+                RETURN(-EINVAL);
+        }
+
+        RETURN(rc);
+}
+
 static int filter_export_check_flavor(struct filter_obd *filter,
                                       struct obd_export *exp,
                                       struct ptlrpc_request *req)
@@ -1382,8 +1550,11 @@ int ost_msg_check_version(struct lustre_msg *msg)
         case OST_SYNC:
         case OST_SET_INFO:
         case OST_GET_INFO:
+#ifdef HAVE_QUOTA_SUPPORT
         case OST_QUOTACHECK:
         case OST_QUOTACTL:
+        case OST_QUOTA_ADJUST_QUNIT:
+#endif
                 rc = lustre_msg_check_version(msg, LUSTRE_OST_VERSION);
                 if (rc)
                         CERROR("bad opc %u version %08x, expecting %08x\n",
@@ -1487,12 +1658,14 @@ int ost_handle(struct ptlrpc_request *req)
                 if (OBD_FAIL_CHECK(OBD_FAIL_OST_CONNECT_NET2))
                         RETURN(0);
                 if (!rc) {
-                        struct obd_export *exp = req->rq_export;
+                        rc = ost_init_sec_level(req);
+                        if (!rc) {
+                                struct obd_export *exp = req->rq_export;
 
-                        obd = exp->exp_obd;
-
-                        rc = filter_export_check_flavor(&obd->u.filter,
-                                                        exp, req);
+                                obd = exp->exp_obd;
+                                rc = filter_export_check_flavor(&obd->u.filter,
+                                                                exp, req);
+                        }
                 }
                 break;
         }
@@ -1598,6 +1771,7 @@ int ost_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_INODE, req, "get_info");
                 rc = ost_get_info(req->rq_export, req);
                 break;
+#ifdef HAVE_QUOTA_SUPPORT
         case OST_QUOTACHECK:
                 CDEBUG(D_INODE, "quotacheck\n");
                 req_capsule_set(&req->rq_pill, &RQF_OST_QUOTACHECK);
@@ -1612,6 +1786,12 @@ int ost_handle(struct ptlrpc_request *req)
                         RETURN(0);
                 rc = ost_handle_quotactl(req);
                 break;
+        case OST_QUOTA_ADJUST_QUNIT:
+                CDEBUG(D_INODE, "quota_adjust_qunit\n");
+                req_capsule_set(&req->rq_pill, &RQF_OST_QUOTA_ADJUST_QUNIT);
+                rc = ost_handle_quota_adjust_qunit(req);
+                break;
+#endif
         case OBD_PING:
                 DEBUG_REQ(D_INODE, req, "ping");
                 req_capsule_set(&req->rq_pill, &RQF_OBD_PING);
index f975d74..e663cea 100644 (file)
@@ -105,6 +105,10 @@ static const struct req_msg_field *quotactl_only[] = {
         &RMF_PTLRPC_BODY,
         &RMF_OBD_QUOTACTL
 };
+static const struct req_msg_field *quota_adjust_qunit_only[] = {
+        &RMF_PTLRPC_BODY,
+        &RMF_QUOTA_ADJUST_QUNIT
+};
 
 static const struct req_msg_field *qunit_data_only[] = {
         &RMF_PTLRPC_BODY,
@@ -240,7 +244,9 @@ static const struct req_msg_field *mds_last_unlink_server[] = {
         &RMF_PTLRPC_BODY,
         &RMF_MDT_BODY,
         &RMF_MDT_MD,
-        &RMF_LOGCOOKIES
+        &RMF_LOGCOOKIES,
+        &RMF_CAPA1,
+        &RMF_CAPA2
 };
 
 static const struct req_msg_field *mds_reint_setattr_client[] = {
@@ -465,7 +471,8 @@ static const struct req_msg_field *ost_body_capa[] = {
 static const struct req_msg_field *ost_destroy_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_OST_BODY,
-        &RMF_DLM_REQ
+        &RMF_DLM_REQ,
+        &RMF_CAPA1
 };
 
 
@@ -518,10 +525,10 @@ static const struct req_msg_field *ost_get_fiemap_server[] = {
 static const struct req_format *req_formats[] = {
         &RQF_OBD_PING,
         &RQF_SEC_CTX,
-        &RQF_SEQ_QUERY,
-        &RQF_FLD_QUERY,
         &RQF_MGS_TARGET_REG,
         &RQF_MGS_SET_INFO,
+        &RQF_SEQ_QUERY,
+        &RQF_FLD_QUERY,
         &RQF_MDS_CONNECT,
         &RQF_MDS_DISCONNECT,
         &RQF_MDS_SET_INFO,
@@ -552,10 +559,12 @@ static const struct req_format *req_formats[] = {
         &RQF_MDS_QUOTACHECK,
         &RQF_MDS_QUOTACTL,
         &RQF_MDS_QUOTA_DQACQ,
+        &RQF_QC_CALLBACK,
         &RQF_OST_CONNECT,
         &RQF_OST_DISCONNECT,
         &RQF_OST_QUOTACHECK,
         &RQF_OST_QUOTACTL,
+        &RQF_OST_QUOTA_ADJUST_QUNIT,
         &RQF_OST_GETATTR,
         &RQF_OST_SETATTR,
         &RQF_OST_CREATE,
@@ -670,6 +679,12 @@ const struct req_msg_field RMF_OBD_QUOTACTL =
                     sizeof(struct obd_quotactl), lustre_swab_obd_quotactl);
 EXPORT_SYMBOL(RMF_OBD_QUOTACTL);
 
+const struct req_msg_field RMF_QUOTA_ADJUST_QUNIT =
+        DEFINE_MSGF("quota_adjust_qunit", 0,
+                    sizeof(struct quota_adjust_qunit),
+                    lustre_swab_quota_adjust_qunit);
+EXPORT_SYMBOL(RMF_QUOTA_ADJUST_QUNIT);
+
 const struct req_msg_field RMF_QUNIT_DATA =
         DEFINE_MSGF("qunit_data", 0,
                     sizeof(struct qunit_data), NULL);
@@ -878,6 +893,14 @@ const struct req_format RQF_MGS_SET_INFO =
                          mgs_set_info);
 EXPORT_SYMBOL(RQF_MGS_SET_INFO);
 
+const struct req_format RQF_SEQ_QUERY =
+        DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
+EXPORT_SYMBOL(RQF_SEQ_QUERY);
+
+const struct req_format RQF_FLD_QUERY =
+        DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
+EXPORT_SYMBOL(RQF_FLD_QUERY);
+
 const struct req_format RQF_LOG_CANCEL =
         DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty);
 EXPORT_SYMBOL(RQF_LOG_CANCEL);
@@ -898,6 +921,11 @@ const struct req_format RQF_OST_QUOTACTL =
         DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only);
 EXPORT_SYMBOL(RQF_OST_QUOTACTL);
 
+const struct req_format RQF_OST_QUOTA_ADJUST_QUNIT =
+        DEFINE_REQ_FMT0("OST_QUOTA_ADJUST_QUNIT", quota_adjust_qunit_only,
+                        quota_adjust_qunit_only);
+EXPORT_SYMBOL(RQF_OST_QUOTA_ADJUST_QUNIT);
+
 const struct req_format RQF_QC_CALLBACK =
         DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty);
 EXPORT_SYMBOL(RQF_QC_CALLBACK);
@@ -906,14 +934,6 @@ const struct req_format RQF_MDS_QUOTA_DQACQ =
         DEFINE_REQ_FMT0("MDS_QUOTA_DQACQ", qunit_data_only, qunit_data_only);
 EXPORT_SYMBOL(RQF_MDS_QUOTA_DQACQ);
 
-const struct req_format RQF_SEQ_QUERY =
-        DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
-EXPORT_SYMBOL(RQF_SEQ_QUERY);
-
-const struct req_format RQF_FLD_QUERY =
-        DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server);
-EXPORT_SYMBOL(RQF_FLD_QUERY);
-
 const struct req_format RQF_MDS_GETSTATUS =
         DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa);
 EXPORT_SYMBOL(RQF_MDS_GETSTATUS);
index 519593c..29b805c 100644 (file)
@@ -72,6 +72,7 @@ struct ll_rpc_opcode {
         { OST_SET_INFO,     "ost_set_info" },
         { OST_QUOTACHECK,   "ost_quotacheck" },
         { OST_QUOTACTL,     "ost_quotactl" },
+        { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" },
         { MDS_GETATTR,      "mds_getattr" },
         { MDS_GETATTR_NAME, "mds_getattr_lock" },
         { MDS_CLOSE,        "mds_close" },
@@ -106,7 +107,7 @@ struct ll_rpc_opcode {
         { MGS_SET_INFO,     "mgs_set_info" },
         { OBD_PING,         "obd_ping" },
         { OBD_LOG_CANCEL,   "llog_origin_handle_cancel" },
-        { OBD_QC_CALLBACK,  "obd_qc_callback" },
+        { OBD_QC_CALLBACK,  "obd_quota_callback" },
         { LLOG_ORIGIN_HANDLE_CREATE,     "llog_origin_handle_create" },
         { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" },
         { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" },
@@ -120,7 +121,9 @@ struct ll_rpc_opcode {
         { SEQ_QUERY,        "seq_query" },
         { SEC_CTX_INIT,     "sec_ctx_init" },
         { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" },
-        { SEC_CTX_FINI,     "sec_ctx_fini" }
+        { SEC_CTX_FINI,     "sec_ctx_fini" },
+        { QUOTA_DQACQ,      "quota_acquire" },
+        { QUOTA_DQREL,      "quota_release" }
 };
 
 struct ll_eopcode {
@@ -132,12 +135,13 @@ struct ll_eopcode {
         { LDLM_EXTENT_ENQUEUE,  "ldlm_extent_enqueue" },
         { LDLM_FLOCK_ENQUEUE,   "ldlm_flock_enqueue" },
         { LDLM_IBITS_ENQUEUE,   "ldlm_ibits_enqueue" },
+        { MDS_REINT_SETATTR,    "mds_reint_setattr" },
         { MDS_REINT_CREATE,     "mds_reint_create" },
         { MDS_REINT_LINK,       "mds_reint_link" },
-        { MDS_REINT_OPEN,       "mds_reint_open" },
-        { MDS_REINT_SETATTR,    "mds_reint_setattr" },
-        { MDS_REINT_RENAME,     "mds_reint_rename" },
         { MDS_REINT_UNLINK,     "mds_reint_unlink" },
+        { MDS_REINT_RENAME,     "mds_reint_rename" },
+        { MDS_REINT_OPEN,       "mds_reint_open" },
+        { MDS_REINT_SETXATTR,   "mds_reint_setxattr" },
         { BRW_READ_BYTES,       "read_bytes" },
         { BRW_WRITE_BYTES,      "write_bytes" },
 };
@@ -145,15 +149,19 @@ struct ll_eopcode {
 const char *ll_opcode2str(__u32 opcode)
 {
         /* When one of the assertions below fail, chances are that:
-         *     1) A new opcode was added in lustre_idl.h, but was
-         *        is missing from the table above.
+         *     1) A new opcode was added in include/lustre/lustre_idl.h,
+         *        but is missing from the table above.
          * or  2) The opcode space was renumbered or rearranged,
          *        and the opcode_offset() function in
          *        ptlrpc_internal.h needs to be modified.
          */
         __u32 offset = opcode_offset(opcode);
-        LASSERT(offset < LUSTRE_MAX_OPCODES);
-        LASSERT(ll_rpc_opcode_table[offset].opcode == opcode);
+        LASSERTF(offset < LUSTRE_MAX_OPCODES,
+                 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
+                 offset, LUSTRE_MAX_OPCODES);
+        LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode,
+                 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
+                 offset, ll_rpc_opcode_table[offset].opcode, opcode);
         return ll_rpc_opcode_table[offset].opname;
 }
 
index 5cf4caa..61fbe25 100644 (file)
@@ -733,6 +733,7 @@ void *lustre_swab_buf(struct lustre_msg *msg, int index, int min_size,
 {
         void *ptr = NULL;
 
+        LASSERT(msg != NULL);
         switch (msg->lm_magic) {
         case LUSTRE_MSG_MAGIC_V2:
         case LUSTRE_MSG_MAGIC_V2_SWABBED:
@@ -1744,6 +1745,15 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q)
         lustre_swab_obd_dqblk (&q->qc_dqblk);
 }
 
+void lustre_swab_quota_adjust_qunit (struct quota_adjust_qunit *q)
+{
+        __swab32s (&q->qaq_flags);
+        __swab32s (&q->qaq_id);
+        __swab64s (&q->qaq_bunit_sz);
+        __swab64s (&q->qaq_iunit_sz);
+        __swab64s (&q->padding1);
+}
+
 void lustre_swab_mds_remote_perm (struct mds_remote_perm *p)
 {
         __swab32s (&p->rp_uid);
@@ -2104,54 +2114,92 @@ void lustre_swab_qdata(struct qunit_data *d)
         __swab32s (&d->qd_id);
         __swab32s (&d->qd_flags);
         __swab64s (&d->qd_count);
-}
-
-void lustre_swab_qdata_old(struct qunit_data_old *d)
-{
-        __swab32s (&d->qd_id);
-        __swab32s (&d->qd_type);
-        __swab32s (&d->qd_count);
-        __swab32s (&d->qd_isblk);
+        __swab64s (&d->qd_qunit);
+        __swab64s (&d->padding);
 }
 
 #ifdef __KERNEL__
-struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d)
+
+/**
+ * got qdata from request(req/rep)
+ */
+int quota_get_qdata(void *request, struct qunit_data *qdata,
+                    int is_req, int is_exp)
 {
-        struct qunit_data_old tmp;
-        struct qunit_data *ret;
-        ENTRY;
+        struct ptlrpc_request *req = (struct ptlrpc_request *)request;
+        struct qunit_data *new;
+        __u64  flags = is_exp ? req->rq_export->exp_connect_flags :
+                       req->rq_import->imp_connect_data.ocd_connect_flags;
+        int rc = 0;
 
-        if (!d)
-                return NULL;
+        LASSERT(req);
+        LASSERT(qdata);
 
-        tmp = *d;
-        ret = (struct qunit_data *)d;
-        ret->qd_id = tmp.qd_id;
-        ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0);
-        ret->qd_count = tmp.qd_count;
-        RETURN(ret);
+        /* support for quota64 and change_qs */
+        if (flags & OBD_CONNECT_CHANGE_QS) {
+                if (!(flags & OBD_CONNECT_QUOTA64)) {
+                        CDEBUG(D_ERROR, "Wire protocol for qunit is broken!\n");
+                        return -EINVAL;
+                }
+                if (is_req == QUOTA_REQUEST)
+                        new = lustre_swab_reqbuf(req, REQ_REC_OFF,
+                                                 sizeof(struct qunit_data),
+                                                 lustre_swab_qdata);
+                else
+                        new = lustre_swab_repbuf(req, REPLY_REC_OFF,
+                                                 sizeof(struct qunit_data),
+                                                 lustre_swab_qdata);
+                if (new == NULL)
+                        GOTO(out, rc = -EPROTO);
+                *qdata = *new;
+                QDATA_SET_CHANGE_QS(qdata);
+                return 0;
+        } else {
+                QDATA_CLR_CHANGE_QS(qdata);
+        }
 
+out:
+        return rc;
 }
-EXPORT_SYMBOL(lustre_quota_old_to_new);
+EXPORT_SYMBOL(quota_get_qdata);
 
-struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d)
+/**
+ * copy qdata to request(req/rep)
+ */
+int quota_copy_qdata(void *request, struct qunit_data *qdata,
+                     int is_req, int is_exp)
 {
-        struct qunit_data tmp;
-        struct qunit_data_old *ret;
-        ENTRY;
+        struct ptlrpc_request *req = (struct ptlrpc_request *)request;
+        void *target;
+        __u64  flags = is_exp ? req->rq_export->exp_connect_flags :
+                req->rq_import->imp_connect_data.ocd_connect_flags;
+        int rc = 0;
 
-        if (!d)
-                return NULL;
+        LASSERT(req);
+        LASSERT(qdata);
 
-        tmp = *d;
-        ret = (struct qunit_data_old *)d;
-        ret->qd_id = tmp.qd_id;
-        ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA);
-        ret->qd_count = (__u32)tmp.qd_count;
-        ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0);
-        RETURN(ret);
+        /* support for quota64 and change_qs */
+        if (flags & OBD_CONNECT_CHANGE_QS) {
+                if (!(flags & OBD_CONNECT_QUOTA64)) {
+                        CERROR("Wire protocol for qunit is broken!\n");
+                        return -EINVAL;
+                }
+                if (is_req == QUOTA_REQUEST)
+                        target = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+                                                sizeof(struct qunit_data));
+                else
+                        target = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
+                                                sizeof(struct qunit_data));
+                if (!target)
+                        GOTO(out, rc = -EPROTO);
+                memcpy(target, qdata, sizeof(*qdata));
+                return 0;
+        }
+
+out:
+        return rc;
 }
-EXPORT_SYMBOL(lustre_quota_new_to_old);
+EXPORT_SYMBOL(quota_copy_qdata);
 #endif /* __KERNEL__ */
 
 static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
@@ -2222,11 +2270,12 @@ void lustre_swab_lustre_capa(struct lustre_capa *c)
 {
         lustre_swab_lu_fid(&c->lc_fid);
         __swab64s (&c->lc_opc);
-        __swab32s (&c->lc_uid);
+        __swab64s (&c->lc_uid);
+        __swab64s (&c->lc_gid);
         __swab32s (&c->lc_flags);
         __swab32s (&c->lc_keyid);
         __swab32s (&c->lc_timeout);
-        __swab64s (&c->lc_expiry);
+        __swab32s (&c->lc_expiry);
 }
 
 void lustre_swab_lustre_capa_key (struct lustre_capa_key *k)
index 76e0727..d8e7c86 100644 (file)
@@ -264,7 +264,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc);
 EXPORT_SYMBOL(lustre_swab_ldlm_request);
 EXPORT_SYMBOL(lustre_swab_ldlm_reply);
 EXPORT_SYMBOL(lustre_swab_qdata);
-EXPORT_SYMBOL(lustre_swab_qdata_old);
+EXPORT_SYMBOL(lustre_swab_quota_adjust_qunit);
 EXPORT_SYMBOL(lustre_msg_get_flags);
 EXPORT_SYMBOL(lustre_msg_add_flags);
 EXPORT_SYMBOL(lustre_msg_set_flags);
index 3f2458d..0045ddd 100644 (file)
@@ -110,7 +110,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
          */
         list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
-                
+
                 /* If need to resend the last sent transno (because a
                    reconnect has occurred), then stop on the matching
                    req and send it again. If, however, the last sent
@@ -257,7 +257,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
                 spin_lock(&imp->imp_lock);
                 imp->imp_deactive = 0;
                 spin_unlock(&imp->imp_lock);
-                
+
                 CDEBUG(D_HA, "setting import %s VALID\n",
                        obd2cli_tgt(imp->imp_obd));
                 rc = ptlrpc_recover_import(imp, NULL);
index 9db1780..4f72364 100644 (file)
@@ -2271,35 +2271,6 @@ EXPORT_SYMBOL(sec2target_str);
  * crypto API helper/alloc blkciper     *
  ****************************************/
 
-#ifdef __KERNEL__
-#ifndef HAVE_ASYNC_BLOCK_CIPHER
-struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname,
-                                                   u32 type, u32 mask)
-{
-        char        buf[CRYPTO_MAX_ALG_NAME + 1];
-        const char *pan = algname;
-        u32         flag = 0;
-
-        if (strncmp("cbc(", algname, 4) == 0)
-                flag |= CRYPTO_TFM_MODE_CBC;
-        else if (strncmp("ecb(", algname, 4) == 0)
-                flag |= CRYPTO_TFM_MODE_ECB;
-        if (flag) {
-                char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')');
-                if (vp) {
-                        memcpy(buf, algname + 4, vp - algname - 4);
-                        buf[vp - algname - 4] = '\0';
-                        pan = buf;
-                } else {
-                        flag = 0;
-                }
-        }
-        return crypto_alloc_tfm(pan, flag);
-}
-EXPORT_SYMBOL(ll_crypto_alloc_blkcipher);
-#endif
-#endif
-
 /****************************************
  * initialize/finalize                  *
  ****************************************/
index 2381e72..d8d737d 100644 (file)
@@ -1415,7 +1415,6 @@ static int ptlrpc_main(void *arg)
         struct ptlrpc_thread   *thread = data->thread;
         struct obd_device      *dev = data->dev;
         struct ptlrpc_reply_state *rs;
-        struct lc_watchdog     *watchdog;
 #ifdef WITH_GROUP_INFO
         struct group_info *ginfo = NULL;
 #endif
@@ -1483,9 +1482,10 @@ static int ptlrpc_main(void *arg)
          */
         cfs_waitq_signal(&thread->t_ctl_waitq);
 
-        watchdog = lc_watchdog_add(max_t(int, obd_timeout, AT_OFF ? 0 :
-                                   at_get(&svc->srv_at_estimate)) *
-                                   svc->srv_watchdog_factor, NULL, NULL);
+        thread->t_watchdog = lc_watchdog_add(max_t(int, obd_timeout, AT_OFF ? 0 :
+                                                   at_get(&svc->srv_at_estimate))
+                                             *  svc->srv_watchdog_factor,
+                                             NULL, NULL);
 
         spin_lock(&svc->srv_lock);
         svc->srv_threads_running++;
@@ -1493,8 +1493,8 @@ static int ptlrpc_main(void *arg)
         spin_unlock(&svc->srv_lock);
         cfs_waitq_signal(&svc->srv_free_rs_waitq);
 
-        CDEBUG(D_NET, "service thread %d (#%d)started\n", thread->t_id,
-              svc->srv_threads_running);
+        CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
+               svc->srv_threads_running);
 
         /* XXX maintain a list of all managed devices: insert here */
 
@@ -1504,7 +1504,7 @@ static int ptlrpc_main(void *arg)
                 struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout,
                                                      ptlrpc_retry_rqbds, svc);
 
-                lc_watchdog_disable(watchdog);
+                lc_watchdog_disable(thread->t_watchdog);
 
                 cond_resched();
 
@@ -1521,7 +1521,7 @@ static int ptlrpc_main(void *arg)
                               svc->srv_at_check,
                               &lwi);
 
-                lc_watchdog_touch_ms(watchdog, max_t(int, obd_timeout,
+                lc_watchdog_touch_ms(thread->t_watchdog, max_t(int, obd_timeout,
                                      AT_OFF ? 0 :
                                      at_get(&svc->srv_at_estimate)) *
                                      svc->srv_watchdog_factor);
@@ -1568,7 +1568,8 @@ static int ptlrpc_main(void *arg)
                 }
         }
 
-        lc_watchdog_delete(watchdog);
+        lc_watchdog_delete(thread->t_watchdog);
+        thread->t_watchdog = NULL;
 
 out_srv_fini:
         /*
index b1308fb..d35ec0d 100644 (file)
@@ -129,7 +129,9 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_QUOTACHECK);
         LASSERTF(OST_QUOTACTL == 19, " found %lld\n",
                  (long long)OST_QUOTACTL);
-        LASSERTF(OST_LAST_OPC == 20, " found %lld\n",
+        LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n",
+                 (long long)OST_QUOTA_ADJUST_QUNIT);
+        LASSERTF(OST_LAST_OPC == 21, " found %lld\n",
                  (long long)OST_LAST_OPC);
         LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n",
                  (long long)OBD_OBJECT_EOF);
@@ -253,9 +255,9 @@ void lustre_assert_wire_constants(void)
                  (long long)OBD_QC_CALLBACK);
         LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
                  (long long)OBD_LAST_OPC);
-        LASSERTF(QUOTA_DQACQ == 601, " found %lld\n",
+        LASSERTF(QUOTA_DQACQ == 901, " found %lld\n",
                  (long long)QUOTA_DQACQ);
-        LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
+        LASSERTF(QUOTA_DQREL == 902, " found %lld\n",
                  (long long)QUOTA_DQREL);
         LASSERTF(MGS_CONNECT == 250, " found %lld\n",
                  (long long)MGS_CONNECT);
@@ -461,8 +463,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL);
-        CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL);
         CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL);
         CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL);
         CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL);
@@ -2145,7 +2147,7 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm));
 
         /* Checks for struct qunit_data */
-        LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n",
+        LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n",
                  (long long)(int)sizeof(struct qunit_data));
         LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n",
                  (long long)(int)offsetof(struct qunit_data, qd_id));
@@ -2159,26 +2161,38 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct qunit_data, qd_count));
         LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct qunit_data *)0)->qd_count));
-
-        /* Checks for struct qunit_data_old */
-        LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n",
-                 (long long)(int)sizeof(struct qunit_data_old));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_id) == 0, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_id));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_id) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_id));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_type) == 4, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_type));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_type) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_type));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_count) == 8, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_count));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_count) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_count));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_isblk) == 12, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_isblk));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk));
+        LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_qunit));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit));
+        LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, padding));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->padding));
+
+        /* Checks for struct quota_adjust_qunit */
+        LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct quota_adjust_qunit));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, padding1) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, padding1));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->padding1) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->padding1));
 
         /* Checks for struct mgs_target_info */
         LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n",
index e42dff1..f052b42 100644 (file)
@@ -1,7 +1,7 @@
 MODULES := lquota
 
 lquota-objs := quota_check.o quota_context.o quota_ctl.o quota_interface.o
-lquota-objs += quota_master.o
+lquota-objs += quota_master.o quota_adjust_qunit.o lproc_quota.o
 
 @INCLUDE_RULES@
 
index e070f4c..9a20d28 100644 (file)
 
 if LIBLUSTRE
 noinst_LIBRARIES = libquota.a
-libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c
+libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c quota_adjust_qunit.c
 libquota_a_CPPFLAGS = $(LLCPPFLAGS)
 libquota_a_CFLAGS = $(LLCFLAGS)
 endif
 
-if MODULES
+if QUOTA
 modulefs_DATA = lquota$(KMODEXT)
 endif
 
diff --git a/lustre/quota/lproc_quota.c b/lustre/quota/lproc_quota.c
new file mode 100644 (file)
index 0000000..a76807a
--- /dev/null
@@ -0,0 +1,667 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+#define DEBUG_SUBSYSTEM S_LQUOTA
+
+#include <linux/version.h>
+#include <lprocfs_status.h>
+#include <obd.h>
+#include <linux/seq_file.h>
+#include <lustre_fsfilt.h>
+
+#include "quota_internal.h"
+
+#ifdef HAVE_QUOTA_SUPPORT
+
+#ifdef LPROCFS
+int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_bunit_sz);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_bunit);
+
+int lprocfs_quota_wr_bunit(struct file *file, const char *buffer,
+                           unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val % QUOTABLOCK_SIZE ||
+            val <= obd->u.obt.obt_qctxt.lqc_btune_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_bunit_sz = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_bunit);
+
+int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_btune_sz);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_btune);
+
+int lprocfs_quota_wr_btune(struct file *file, const char *buffer,
+                           unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE ||
+            val >= obd->u.obt.obt_qctxt.lqc_bunit_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_btune_sz = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_btune);
+
+int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_iunit_sz);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_iunit);
+
+int lprocfs_quota_wr_iunit(struct file *file, const char *buffer,
+                           unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_iunit_sz = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_iunit);
+
+int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_itune_sz);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_itune);
+
+int lprocfs_quota_wr_itune(struct file *file, const char *buffer,
+                           unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= MIN_QLIMIT ||
+            val >= obd->u.obt.obt_qctxt.lqc_iunit_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_itune_sz = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_itune);
+
+#define USER_QUOTA      1
+#define GROUP_QUOTA     2
+
+#define MAX_STYPE_SIZE  5
+
+int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        char stype[MAX_STYPE_SIZE + 1] = "";
+        int oq_type;
+        struct obd_device_target *obt;
+
+        LASSERT(obd != NULL);
+
+        obt = &obd->u.obt;
+
+        /* Collect the needed information */
+        oq_type = obd->u.obt.obt_qctxt.lqc_flags;
+
+        /* Transform the collected data into a user-readable string */
+        if (oq_type & LQC_USRQUOTA_FLAG)
+                strcat(stype, "u");
+        if (oq_type & LQC_GRPQUOTA_FLAG)
+                strcat(stype, "g");
+
+        strcat(stype, "3");
+
+        return snprintf(page, count, "%s\n", stype);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_type);
+
+static int auto_quota_on(struct obd_device *obd, int type,
+                         struct super_block *sb, int is_master)
+{
+        struct obd_quotactl *oqctl;
+        struct lvfs_run_ctxt saved;
+        int rc = 0, id;
+        struct obd_device_target *obt;
+        ENTRY;
+
+        LASSERT(type == USRQUOTA || type == GRPQUOTA || type == UGQUOTA);
+
+        obt = &obd->u.obt;
+
+        OBD_ALLOC_PTR(oqctl);
+        if (!oqctl)
+                RETURN(-ENOMEM);
+
+        if (!atomic_dec_and_test(&obt->obt_quotachecking)) {
+                CDEBUG(D_INFO, "other people are doing quotacheck\n");
+                atomic_inc(&obt->obt_quotachecking);
+                RETURN(-EBUSY);
+        }
+
+        id = UGQUOTA2LQC(type);
+        /* quota already turned on */
+        if ((obt->obt_qctxt.lqc_flags & id) == id) {
+                rc = 0;
+                goto out;
+        }
+
+        oqctl->qc_type = type;
+        oqctl->qc_cmd = Q_QUOTAON;
+        oqctl->qc_id = obt->obt_qfmt;
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        if (is_master) {
+                struct mds_obd *mds = &obd->u.mds;
+
+                down(&mds->mds_qonoff_sem);
+                /* turn on cluster wide quota */
+                rc = mds_admin_quota_on(obd, oqctl);
+                if (rc)
+                        CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR,
+                               "auto-enable admin quota failed. rc=%d\n", rc);
+                up(&mds->mds_qonoff_sem);
+
+        }
+        if (!rc) {
+                /* turn on local quota */
+                rc = fsfilt_quotactl(obd, sb, oqctl);
+                if (rc)
+                        CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR,
+                               "auto-enable local quota failed. rc=%d\n", rc);
+                else
+                        obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(type);
+        }
+
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+out:
+        atomic_inc(&obt->obt_quotachecking);
+
+        OBD_FREE_PTR(oqctl);
+        RETURN(rc);
+}
+
+int lprocfs_quota_wr_type(struct file *file, const char *buffer,
+                          unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        struct obd_device_target *obt;
+        int type = 0, is_mds;
+        unsigned long i;
+        char stype[MAX_STYPE_SIZE + 1] = "";
+
+        LASSERT(obd != NULL);
+
+        obt = &obd->u.obt;
+
+        is_mds = !strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME);
+
+        if (count > MAX_STYPE_SIZE)
+                return -EINVAL;
+
+        if (copy_from_user(stype, buffer, count))
+                return -EFAULT;
+
+        for (i = 0 ; i < count ; i++) {
+                switch (stype[i]) {
+                case 'u' :
+                        type |= USER_QUOTA;
+                        break;
+                case 'g' :
+                        type |= GROUP_QUOTA;
+                        break;
+                case '1' :
+                case '2' :
+                        CWARN("quota_type options 1 and 2 are obsolete, "
+                              "they will be ignored\n");
+                        break;
+                case '3' : /* the only valid version spec, do nothing */
+                default  : /* just skip stray symbols like \n */
+                        break;
+                }
+        }
+
+        if (type != 0)
+                auto_quota_on(obd, type - 1, obt->obt_sb, is_mds);
+
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_type);
+
+int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%d\n",
+                        obd->u.obt.obt_qctxt.lqc_switch_seconds);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_switch_seconds);
+
+int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer,
+                                    unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 10)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_switch_seconds = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_switch_seconds);
+
+int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off,
+                              int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%d\n",
+                        obd->u.obt.obt_qctxt.lqc_sync_blk);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_sync_blk);
+
+int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer,
+                              unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 0)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_sync_blk = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_sync_blk);
+
+int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "changing qunit size is %s\n",
+                        obd->u.obt.obt_qctxt.lqc_switch_qs ?
+                        "enabled" : "disabled");
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_switch_qs);
+
+int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer,
+                               unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val)
+            obd->u.obt.obt_qctxt.lqc_switch_qs = 1;
+        else
+            obd->u.obt.obt_qctxt.lqc_switch_qs = 0;
+
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_switch_qs);
+
+int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_boundary_factor);
+
+int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer,
+                                     unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 2)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_boundary_factor);
+
+int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_cqs_least_bunit);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_least_bunit);
+
+int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer,
+                                 unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < PTLRPC_MAX_BRW_SIZE ||
+            val >= obd->u.obt.obt_qctxt.lqc_bunit_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_cqs_least_bunit = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_least_bunit);
+
+int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_cqs_least_iunit);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_least_iunit);
+
+int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer,
+                                 unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 1 || val >= obd->u.obt.obt_qctxt.lqc_iunit_sz)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_cqs_least_iunit = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_least_iunit);
+
+int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+
+        return snprintf(page, count, "%lu\n",
+                        obd->u.obt.obt_qctxt.lqc_cqs_qs_factor);
+}
+EXPORT_SYMBOL(lprocfs_quota_rd_qs_factor);
+
+int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer,
+                               unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < 2)
+                return -EINVAL;
+
+        obd->u.obt.obt_qctxt.lqc_cqs_qs_factor = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_quota_wr_qs_factor);
+
+struct lprocfs_vars lprocfs_quota_common_vars[] = {
+        { "quota_bunit_sz", lprocfs_quota_rd_bunit,
+                            lprocfs_quota_wr_bunit, 0},
+        { "quota_btune_sz", lprocfs_quota_rd_btune,
+                            lprocfs_quota_wr_btune, 0},
+        { "quota_iunit_sz", lprocfs_quota_rd_iunit,
+                            lprocfs_quota_wr_iunit, 0},
+        { "quota_itune_sz", lprocfs_quota_rd_itune,
+                            lprocfs_quota_wr_itune, 0},
+        { "quota_type",     lprocfs_quota_rd_type,
+                            lprocfs_quota_wr_type, 0},
+        { "quota_switch_seconds",  lprocfs_quota_rd_switch_seconds,
+                                   lprocfs_quota_wr_switch_seconds, 0 },
+        { "quota_sync_blk", lprocfs_quota_rd_sync_blk,
+                            lprocfs_quota_wr_sync_blk, 0},
+};
+
+struct lprocfs_vars lprocfs_quota_master_vars[] = {
+        { "quota_switch_qs", lprocfs_quota_rd_switch_qs,
+                             lprocfs_quota_wr_switch_qs, 0 },
+        { "quota_boundary_factor", lprocfs_quota_rd_boundary_factor,
+                                   lprocfs_quota_wr_boundary_factor, 0 },
+        { "quota_least_bunit", lprocfs_quota_rd_least_bunit,
+                               lprocfs_quota_wr_least_bunit, 0 },
+        { "quota_least_iunit", lprocfs_quota_rd_least_iunit,
+                               lprocfs_quota_wr_least_iunit, 0 },
+        { "quota_qs_factor",   lprocfs_quota_rd_qs_factor,
+                               lprocfs_quota_wr_qs_factor, 0 },
+};
+
+int lquota_proc_setup(struct obd_device *obd, int is_master)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(lquota_type_proc_dir && obd);
+        qctxt->lqc_proc_dir = lprocfs_register(obd->obd_name,
+                                               lquota_type_proc_dir,
+                                               lprocfs_quota_common_vars, obd);
+        if (IS_ERR(qctxt->lqc_proc_dir)) {
+                rc = PTR_ERR(qctxt->lqc_proc_dir);
+                CERROR("error %d setting up lprocfs for %s\n", rc,
+                       obd->obd_name);
+                qctxt->lqc_proc_dir = NULL;
+                GOTO(out, rc);
+        }
+
+        if (is_master) {
+                rc = lprocfs_add_vars(qctxt->lqc_proc_dir,
+                                      lprocfs_quota_master_vars, obd);
+                if (rc) {
+                        CERROR("error %d setting up lprocfs for %s"
+                               "(quota master)\n", rc, obd->obd_name);
+                        GOTO(out_free_proc, rc);
+                }
+        }
+
+        qctxt->lqc_stats = lprocfs_alloc_stats(LQUOTA_LAST_STAT -
+                                               LQUOTA_FIRST_STAT, 0);
+        if (!qctxt->lqc_stats)
+                GOTO(out_free_proc, rc = -ENOMEM);
+
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_SYNC_ACQ,
+                             LPROCFS_CNTR_AVGMINMAX, "sync_acq_req", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_SYNC_REL,
+                             LPROCFS_CNTR_AVGMINMAX, "sync_rel_req", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ASYNC_ACQ,
+                             LPROCFS_CNTR_AVGMINMAX, "async_acq_req", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ASYNC_REL,
+                             LPROCFS_CNTR_AVGMINMAX, "async_rel_req", "us");
+
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_CHK_BLK,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_blk_quota(lquota_chkquota)", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_CHK_INO,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_ino_quota(lquota_chkquota)", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_COMMIT_BLK,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_blk_quota(lquota_pending_commit)",
+                             "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_COMMIT_INO,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_ino_quota(lquota_pending_commit)",
+                             "us");
+
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_PENDING_BLK_QUOTA,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_pending_blk_quota_req"
+                             "(qctxt_wait_pending_dqacq)", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_PENDING_INO_QUOTA,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "wait_for_pending_ino_quota_req"
+                             "(qctxt_wait_pending_dqacq)", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_NOWAIT_PENDING_BLK_QUOTA,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "nowait_for_pending_blk_quota_req"
+                             "(qctxt_wait_pending_dqacq)", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_NOWAIT_PENDING_INO_QUOTA,
+                             LPROCFS_CNTR_AVGMINMAX,
+                             "nowait_for_pending_ino_quota_req"
+                             "(qctxt_wait_pending_dqacq)", "us");
+
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_QUOTA_CTL,
+                             LPROCFS_CNTR_AVGMINMAX, "quota_ctl", "us");
+        lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ADJUST_QUNIT,
+                             LPROCFS_CNTR_AVGMINMAX, "adjust_qunit", "us");
+
+        lprocfs_register_stats(qctxt->lqc_proc_dir, "stats", qctxt->lqc_stats);
+
+        RETURN(rc);
+
+out_free_proc:
+        lprocfs_remove(&qctxt->lqc_proc_dir);
+out:
+        RETURN(rc);
+}
+
+int lquota_proc_cleanup(struct lustre_quota_ctxt *qctxt)
+{
+        if (!qctxt || !qctxt->lqc_proc_dir)
+                return -EINVAL;
+
+        if (qctxt->lqc_stats != NULL)
+                 lprocfs_free_stats(&qctxt->lqc_stats);
+
+        lprocfs_remove(&qctxt->lqc_proc_dir);
+        return 0;
+}
+
+#endif  /* LPROCFS */
+#endif
diff --git a/lustre/quota/quota_adjust_qunit.c b/lustre/quota/quota_adjust_qunit.c
new file mode 100644 (file)
index 0000000..20ee26b
--- /dev/null
@@ -0,0 +1,419 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LQUOTA
+
+#ifdef __KERNEL__
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/init.h>
+# include <linux/fs.h>
+# include <linux/jbd.h>
+# include <linux/quota.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/smp_lock.h>
+#  include <linux/buffer_head.h>
+#  include <linux/workqueue.h>
+#  include <linux/mount.h>
+# else
+#  include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
+#endif
+
+#include <obd_class.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_cfg.h>
+#include <obd_ost.h>
+#include <lustre_fsfilt.h>
+#include <linux/lustre_quota.h>
+#include <class_hash.h>
+#include "quota_internal.h"
+
+#ifdef HAVE_QUOTA_SUPPORT
+
+#ifdef __KERNEL__
+/**
+ * This function is charge of recording lqs_ino_rec and
+ * lqs_blk_rec. when a lquota slave checks a quota
+ * request(check_cur_qunit) and finishes a quota
+ * request(dqacq_completion), it will be called.
+ * is_chk: whether it is checking quota; otherwise, it is finishing
+ * is_acq: whether it is acquiring; otherwise, it is releasing
+ */
+void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs,
+                      int is_chk, int is_acq)
+{
+        int is_blk;
+
+        LASSERT(qdata && lqs);
+        LASSERT_SPIN_LOCKED(&lqs->lqs_lock);
+        is_blk = QDATA_IS_BLK(qdata);
+
+        if (is_chk) {
+                if (is_acq) {
+                        if (is_blk)
+                                lqs->lqs_blk_rec += qdata->qd_count;
+                        else
+                                lqs->lqs_ino_rec += qdata->qd_count;
+                } else {
+                        if (is_blk)
+                                lqs->lqs_blk_rec -= qdata->qd_count;
+                        else
+                                lqs->lqs_ino_rec -= qdata->qd_count;
+                }
+        } else {
+                if (is_acq) {
+                        if (is_blk)
+                                lqs->lqs_blk_rec -= qdata->qd_count;
+                        else
+                                lqs->lqs_ino_rec -= qdata->qd_count;
+                } else {
+                        if (is_blk)
+                                lqs->lqs_blk_rec += qdata->qd_count;
+                        else
+                                lqs->lqs_ino_rec += qdata->qd_count;
+                }
+        }
+}
+
+void qdata_to_oqaq(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq)
+{
+        LASSERT(qdata);
+        LASSERT(oqaq);
+
+        oqaq->qaq_flags = qdata->qd_flags;
+        oqaq->qaq_id    = qdata->qd_id;
+        if (QDATA_IS_ADJBLK(qdata))
+                oqaq->qaq_bunit_sz = qdata->qd_qunit;
+        if (QDATA_IS_ADJINO(qdata))
+                oqaq->qaq_iunit_sz = qdata->qd_qunit;
+}
+
+int quota_search_lqs(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq,
+                     struct lustre_quota_ctxt *qctxt,
+                     struct lustre_qunit_size **lqs_return)
+{
+        struct quota_adjust_qunit *oqaq_tmp = NULL;
+        ENTRY;
+
+        LASSERT(*lqs_return == NULL);
+        LASSERT(oqaq || qdata);
+
+        if (!oqaq) {
+                OBD_ALLOC_PTR(oqaq_tmp);
+                if (!oqaq_tmp)
+                        RETURN(-ENOMEM);
+                qdata_to_oqaq(qdata, oqaq_tmp);
+        } else {
+                oqaq_tmp = oqaq;
+        }
+
+        *lqs_return = lustre_hash_lookup(qctxt->lqc_lqs_hash, oqaq_tmp);
+        if (*lqs_return)
+                LQS_DEBUG((*lqs_return), "show lqs\n");
+
+        if (!oqaq)
+                OBD_FREE_PTR(oqaq_tmp);
+        RETURN(0);
+}
+
+int quota_create_lqs(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq,
+                     struct lustre_quota_ctxt *qctxt,
+                     struct lustre_qunit_size **lqs_return)
+{
+        struct lustre_qunit_size *lqs = NULL;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(*lqs_return == NULL);
+        LASSERT(oqaq || qdata);
+
+        OBD_ALLOC_PTR(lqs);
+        if (!lqs)
+                GOTO(out, rc = -ENOMEM);
+
+        if (!oqaq)
+                qdata_to_oqaq(qdata, &lqs->lqs_key);
+        else
+                lqs->lqs_key = *oqaq;
+
+        spin_lock_init(&lqs->lqs_lock);
+        lqs->lqs_bwrite_pending = 0;
+        lqs->lqs_iwrite_pending = 0;
+        lqs->lqs_ino_rec = 0;
+        lqs->lqs_blk_rec = 0;
+        lqs->lqs_id = lqs->lqs_key.qaq_id;
+        lqs->lqs_flags = QAQ_IS_GRP(&lqs->lqs_key);
+        lqs->lqs_bunit_sz = qctxt->lqc_bunit_sz;
+        lqs->lqs_iunit_sz = qctxt->lqc_iunit_sz;
+        lqs->lqs_btune_sz = qctxt->lqc_btune_sz;
+        lqs->lqs_itune_sz = qctxt->lqc_itune_sz;
+        lqs->lqs_ctxt = qctxt;
+        if (qctxt->lqc_handler) {
+                lqs->lqs_last_bshrink  = 0;
+                lqs->lqs_last_ishrink  = 0;
+        }
+        lqs_initref(lqs);
+        rc = lustre_hash_add_unique(qctxt->lqc_lqs_hash,
+                                    &lqs->lqs_key, &lqs->lqs_hash);
+        LQS_DEBUG(lqs, "create lqs\n");
+        if (!rc) {
+                lqs_getref(lqs);
+                *lqs_return = lqs;
+        }
+out:
+        if (rc && lqs)
+                OBD_FREE_PTR(lqs);
+        RETURN(rc);
+}
+
+int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq,
+                           struct lustre_quota_ctxt *qctxt)
+{
+        struct lustre_qunit_size *lqs = NULL;
+        unsigned long *lbunit, *liunit, *lbtune, *litune;
+        signed long b_tmp = 0, i_tmp = 0;
+        cfs_time_t time_limit = 0;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(qctxt);
+search_lqs:
+        rc = quota_search_lqs(NULL, oqaq, qctxt, &lqs);
+
+        /* deleting the lqs, because a user sets lfs quota 0 0 0 0  */
+        if (!oqaq->qaq_bunit_sz && !oqaq->qaq_iunit_sz && QAQ_IS_ADJBLK(oqaq) &&
+            QAQ_IS_ADJINO(oqaq)) {
+                if (lqs) {
+                        LQS_DEBUG(lqs, "release lqs\n");
+                        /* this is for quota_search_lqs */
+                        lqs_putref(lqs);
+                        /* kill lqs */
+                        lqs_putref(lqs);
+                }
+                RETURN(rc);
+        }
+
+        if (!lqs) {
+                rc = quota_create_lqs(NULL, oqaq, qctxt, &lqs);
+                if (rc == -EALREADY)
+                        goto search_lqs;
+                if (rc < 0)
+                        RETURN(rc);
+        }
+
+        lbunit = &lqs->lqs_bunit_sz;
+        liunit = &lqs->lqs_iunit_sz;
+        lbtune = &lqs->lqs_btune_sz;
+        litune = &lqs->lqs_itune_sz;
+
+        CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit);
+        spin_lock(&lqs->lqs_lock);
+        /* adjust the slave's block qunit size */
+        if (QAQ_IS_ADJBLK(oqaq)) {
+                cfs_duration_t sec = cfs_time_seconds(qctxt->lqc_switch_seconds);
+
+                b_tmp = *lbunit - oqaq->qaq_bunit_sz;
+
+                if (qctxt->lqc_handler && b_tmp > 0)
+                        lqs->lqs_last_bshrink = cfs_time_current();
+
+                if (qctxt->lqc_handler && b_tmp < 0) {
+                        time_limit = cfs_time_add(lqs->lqs_last_bshrink, sec);
+                        if (!lqs->lqs_last_bshrink ||
+                            cfs_time_after(cfs_time_current(), time_limit)) {
+                                *lbunit = oqaq->qaq_bunit_sz;
+                                *lbtune = (*lbunit) / 2;
+                        } else {
+                                b_tmp = 0;
+                        }
+                } else {
+                        *lbunit = oqaq->qaq_bunit_sz;
+                        *lbtune = (*lbunit) / 2;
+                }
+        }
+
+        /* adjust the slave's file qunit size */
+        if (QAQ_IS_ADJINO(oqaq)) {
+                i_tmp = *liunit - oqaq->qaq_iunit_sz;
+
+                if (qctxt->lqc_handler && i_tmp > 0)
+                        lqs->lqs_last_ishrink  = cfs_time_current();
+
+                if (qctxt->lqc_handler && i_tmp < 0) {
+                        time_limit = cfs_time_add(lqs->lqs_last_ishrink,
+                                                  cfs_time_seconds(qctxt->
+                                                  lqc_switch_seconds));
+                        if (!lqs->lqs_last_ishrink ||
+                            cfs_time_after(cfs_time_current(), time_limit)) {
+                                *liunit = oqaq->qaq_iunit_sz;
+                                *litune = (*liunit) / 2;
+                        } else {
+                                i_tmp = 0;
+                        }
+                } else {
+                        *liunit = oqaq->qaq_iunit_sz;
+                        *litune = (*liunit) / 2;
+                }
+        }
+        spin_unlock(&lqs->lqs_lock);
+        CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit);
+
+        lqs_putref(lqs);
+
+        if (b_tmp > 0)
+                rc |= LQS_BLK_DECREASE;
+        else if (b_tmp < 0)
+                rc |= LQS_BLK_INCREASE;
+
+        if (i_tmp > 0)
+                rc |= LQS_INO_DECREASE;
+        else if (i_tmp < 0)
+                rc |= LQS_INO_INCREASE;
+
+        RETURN(rc);
+}
+
+int filter_quota_adjust_qunit(struct obd_export *exp,
+                              struct quota_adjust_qunit *oqaq,
+                              struct lustre_quota_ctxt *qctxt)
+{
+        struct obd_device *obd = exp->exp_obd;
+        unsigned int uid = 0, gid = 0;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(oqaq);
+        LASSERT(QAQ_IS_ADJBLK(oqaq));
+        rc = quota_adjust_slave_lqs(oqaq, qctxt);
+        if (rc < 0) {
+                CERROR("adjust mds slave's qunit size failed!(rc:%d)\n", rc);
+                RETURN(rc);
+        }
+        if (QAQ_IS_GRP(oqaq))
+                gid = oqaq->qaq_id;
+        else
+                uid = oqaq->qaq_id;
+
+        if (rc > 0) {
+                rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 0, NULL);
+                if (rc == -EDQUOT || rc == -EBUSY || rc == -EAGAIN) {
+                        CDEBUG(D_QUOTA, "rc: %d.\n", rc);
+                        rc = 0;
+                }
+                if (rc)
+                        CERROR("slave adjust block quota failed!(rc:%d)\n", rc);
+        }
+        RETURN(rc);
+}
+#endif /* __KERNEL__ */
+#endif
+
+int client_quota_adjust_qunit(struct obd_export *exp,
+                              struct quota_adjust_qunit *oqaq,
+                              struct lustre_quota_ctxt *qctxt)
+{
+        struct ptlrpc_request *req;
+        struct quota_adjust_qunit *oqa;
+        int rc = 0;
+        ENTRY;
+
+        /* client don't support this kind of operation, abort it */
+        if (!(exp->exp_connect_flags & OBD_CONNECT_CHANGE_QS)) {
+                CDEBUG(D_QUOTA, "osc: %s don't support change qunit size\n",
+                       exp->exp_obd->obd_name);
+                RETURN(rc);
+        }
+        if (strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME))
+                RETURN(-EINVAL);
+
+        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
+                                        &RQF_OST_QUOTA_ADJUST_QUNIT,
+                                        LUSTRE_OST_VERSION,
+                                        OST_QUOTA_ADJUST_QUNIT);
+        if (req == NULL)
+                RETURN(-ENOMEM);
+
+        oqa = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT);
+        *oqa = *oqaq;
+
+        ptlrpc_request_set_replen(req);
+
+        rc = ptlrpc_queue_wait(req);
+        if (rc)
+                CERROR("%s: %s failed: rc = %d\n", exp->exp_obd->obd_name,
+                       __FUNCTION__, rc);
+        ptlrpc_req_finished(req);
+        RETURN (rc);
+}
+
+int lov_quota_adjust_qunit(struct obd_export *exp,
+                           struct quota_adjust_qunit *oqaq,
+                           struct lustre_quota_ctxt *qctxt)
+{
+        struct obd_device *obd = class_exp2obd(exp);
+        struct lov_obd *lov = &obd->u.lov;
+        int i, rc = 0;
+        ENTRY;
+
+        if (!QAQ_IS_ADJBLK(oqaq)) {
+                CERROR("bad qaq_flags %x for lov obd.\n", oqaq->qaq_flags);
+                RETURN(-EFAULT);
+        }
+
+        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+                int err;
+
+                if (!lov->lov_tgts[i]->ltd_active) {
+                        CDEBUG(D_HA, "ost %d is inactive\n", i);
+                        continue;
+                }
+
+                err = obd_quota_adjust_qunit(lov->lov_tgts[i]->ltd_exp, oqaq,
+                                             NULL);
+                if (err) {
+                        if (lov->lov_tgts[i]->ltd_active && !rc)
+                                rc = err;
+                        continue;
+                }
+        }
+        RETURN(rc);
+}
index 20ffb9b..97061cb 100644 (file)
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
+
+
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #ifdef __KERNEL__
 # include <linux/version.h>
@@ -62,6 +64,7 @@
 #include <lustre_quota.h>
 #include "quota_internal.h"
 
+#ifdef HAVE_QUOTA_SUPPORT
 #ifdef __KERNEL__
 static int target_quotacheck_callback(struct obd_export *exp,
                                       struct obd_quotactl *oqctl)
@@ -71,7 +74,7 @@ static int target_quotacheck_callback(struct obd_export *exp,
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_QC_CALLBACK,
+        req = ptlrpc_request_alloc_pack(exp->exp_imp_reverse, &RQF_QC_CALLBACK,
                                         LUSTRE_OBD_VERSION, OBD_QC_CALLBACK);
         if (req == NULL)
                 RETURN(-ENOMEM);
@@ -99,7 +102,7 @@ static int target_quotacheck_thread(void *data)
         ptlrpc_daemonize("quotacheck");
 
         exp = qta->qta_exp;
-        obd = exp->exp_obd;
+        obd = qta->qta_obd;
         oqctl = &qta->qta_oqctl;
 
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
@@ -118,9 +121,9 @@ static int target_quotacheck_thread(void *data)
         return rc;
 }
 
-int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl)
+int target_quota_check(struct obd_device *obd, struct obd_export *exp,
+                       struct obd_quotactl *oqctl)
 {
-        struct obd_device *obd = exp->exp_obd;
         struct obd_device_target *obt = &obd->u.obt;
         struct quotacheck_thread_args *qta;
         int rc = 0;
@@ -136,7 +139,9 @@ int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl)
                 GOTO(out, rc = -ENOMEM);
 
         qta->qta_exp = exp;
+        qta->qta_obd = obd;
         qta->qta_oqctl = *oqctl;
+        qta->qta_oqctl.qc_id = obt->obt_qfmt; /* override qfmt version */
         qta->qta_sb = obt->obt_sb;
         qta->qta_sem = &obt->obt_quotachecking;
 
@@ -166,27 +171,31 @@ out:
 }
 
 #endif /* __KERNEL__ */
+#endif /* HAVE_QUOTA_SUPPORT */
 
-int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl)
+int client_quota_check(struct obd_device *unused, struct obd_export *exp,
+                       struct obd_quotactl *oqctl)
 {
-        struct client_obd     *cli = &exp->exp_obd->u.cli;
-        struct ptlrpc_request *req;
-        struct obd_quotactl   *body;
-        int                    ver, opc, rc;
+        struct client_obd       *cli = &exp->exp_obd->u.cli;
+        struct ptlrpc_request   *req;
+        struct obd_quotactl     *body;
+        const struct req_format *rf;
+        int                      ver, opc, rc;
         ENTRY;
 
         if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) {
+                rf  = &RQF_MDS_QUOTACHECK;
                 ver = LUSTRE_MDS_VERSION;
                 opc = MDS_QUOTACHECK;
         } else if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+                rf  = &RQF_OST_QUOTACHECK;
                 ver = LUSTRE_OST_VERSION;
                 opc = OST_QUOTACHECK;
         } else {
                 RETURN(-EINVAL);
         }
 
-        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
-                                        &RQF_MDS_QUOTACHECK, ver, opc);
+        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -220,18 +229,44 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk)
         qchk->obd_uuid = cli->cl_target_uuid;
         /* FIXME change strncmp to strcmp and save the strlen op */
         if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME,
-            strlen(LUSTRE_OSC_NAME)))
+                    strlen(LUSTRE_OSC_NAME)) == 0)
                 memcpy(qchk->obd_type, LUSTRE_OST_NAME,
                        strlen(LUSTRE_OST_NAME));
         else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME,
-                 strlen(LUSTRE_MDC_NAME)))
+                         strlen(LUSTRE_MDC_NAME)) == 0)
                 memcpy(qchk->obd_type, LUSTRE_MDS_NAME,
                        strlen(LUSTRE_MDS_NAME));
 
         RETURN(rc);
 }
 
-int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl)
+int lmv_quota_check(struct obd_device *unused, struct obd_export *exp,
+                    struct obd_quotactl *oqctl)
+{
+        struct obd_device *obd = class_exp2obd(exp);
+        struct lmv_obd *lmv = &obd->u.lmv;
+        struct lmv_tgt_desc *tgt;
+        int i, rc = 0;
+        ENTRY;
+
+        for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) {
+                int err;
+
+                if (!tgt->ltd_active) {
+                        CERROR("lmv idx %d inactive\n", i);
+                        RETURN(-EIO);
+                }
+
+                err = obd_quotacheck(tgt->ltd_exp, oqctl);
+                if (err && tgt->ltd_active && !rc)
+                        rc = err;
+        }
+
+        RETURN(rc);
+}
+
+int lov_quota_check(struct obd_device *unused, struct obd_export *exp,
+                    struct obd_quotactl *oqctl)
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
index 92313d2..4aac3ae 100644 (file)
@@ -44,7 +44,7 @@
 # define EXPORT_SYMTAB
 #endif
 
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #include <linux/version.h>
 #include <linux/fs.h>
 #include <obd_class.h>
 #include <lustre_quota.h>
 #include <lustre_fsfilt.h>
+#include <class_hash.h>
+#include <lprocfs_status.h>
 #include "quota_internal.h"
 
-unsigned long default_bunit_sz = 100 * 1024 * 1024;       /* 100M bytes */
-unsigned long default_btune_ratio = 50;                   /* 50 percentage */
-unsigned long default_iunit_sz = 5000;       /* 5000 inodes */
-unsigned long default_itune_ratio = 50;      /* 50 percentage */
+#ifdef HAVE_QUOTA_SUPPORT
+
+static lustre_hash_ops_t lqs_hash_ops;
+
+unsigned long default_bunit_sz = 128 * 1024 * 1024; /* 128M bytes */
+unsigned long default_btune_ratio = 50;             /* 50 percentage */
+unsigned long default_iunit_sz = 5120;              /* 5120 inodes */
+unsigned long default_itune_ratio = 50;             /* 50 percentage */
 
 cfs_mem_cache_t *qunit_cachep = NULL;
 struct list_head qunit_hash[NR_DQHASH];
 spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED;
 
+/* please sync qunit_state with qunit_state_names */
+enum qunit_state {
+        /**
+         * a qunit is created
+         */
+        QUNIT_CREATED      = 0,
+        /**
+         * a qunit is added into qunit hash, that means
+         * a quota req will be sent or is flying
+         */
+        QUNIT_IN_HASH      = 1,
+        /**
+         * a qunit is removed from qunit hash, that
+         * means a quota req is handled and comes back
+         */
+        QUNIT_RM_FROM_HASH = 2,
+        /**
+         * qunit can wake up all threads waiting for it
+         */
+        QUNIT_FINISHED     = 3,
+};
+
+static const char *qunit_state_names[] = {
+        [QUNIT_CREATED]      = "CREATED",
+        [QUNIT_IN_HASH]      = "IN_HASH",
+        [QUNIT_RM_FROM_HASH] = "RM_FROM_HASH",
+        [QUNIT_FINISHED]     = "FINISHED",
+};
+
 struct lustre_qunit {
-        struct list_head lq_hash;               /* Hash list in memory */
-        atomic_t lq_refcnt;                     /* Use count */
-        struct lustre_quota_ctxt *lq_ctxt;      /* Quota context this applies to */
-        struct qunit_data lq_data;              /* See qunit_data */
-        unsigned int lq_opc;                    /* QUOTA_DQACQ, QUOTA_DQREL */
-        struct list_head lq_waiters;            /* All write threads waiting for this qunit */
+        struct list_head lq_hash;          /** Hash list in memory */
+        atomic_t lq_refcnt;                /** Use count */
+        struct lustre_quota_ctxt *lq_ctxt; /** Quota context this applies to */
+        struct qunit_data lq_data;         /** See qunit_data */
+        unsigned int lq_opc;               /** QUOTA_DQACQ, QUOTA_DQREL */
+        cfs_waitq_t lq_waitq;              /** Threads waiting for this qunit */
+        spinlock_t lq_lock;                /** Protect the whole structure */
+        enum qunit_state lq_state;         /** Present the status of qunit */
+        int lq_rc;                         /** The rc of lq_data */
 };
 
+#define QUNIT_SET_STATE(qunit, state)                                   \
+do {                                                                    \
+        spin_lock(&qunit->lq_lock);                                     \
+        QDATA_DEBUG((&qunit->lq_data), "qunit(%p) lq_state(%s->%s), "   \
+                    "lq_rc(%d)\n",                                      \
+                    qunit, qunit_state_names[qunit->lq_state],          \
+                    qunit_state_names[state], qunit->lq_rc);            \
+        qunit->lq_state = state;                                        \
+        spin_unlock(&qunit->lq_lock);                                   \
+} while(0)
+
+#define QUNIT_SET_STATE_AND_RC(qunit, state, rc)                        \
+do {                                                                    \
+        spin_lock(&qunit->lq_lock);                                     \
+        qunit->lq_rc = rc;                                              \
+        QDATA_DEBUG((&qunit->lq_data), "qunit(%p) lq_state(%s->%s), "   \
+                    "lq_rc(%d)\n",                                      \
+                    qunit, qunit_state_names[qunit->lq_state],          \
+                    qunit_state_names[state], qunit->lq_rc);            \
+        qunit->lq_state = state;                                        \
+        spin_unlock(&qunit->lq_lock);                                   \
+} while(0)
+
+
 int should_translate_quota (struct obd_import *imp)
 {
         ENTRY;
 
         LASSERT(imp);
-        if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) &&
-            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT))
+        if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64)
                 RETURN(0);
         else
                 RETURN(1);
@@ -135,66 +196,13 @@ static inline int
 qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
 {
         unsigned int id = qdata->qd_id;
-        unsigned int type = qdata->qd_flags & QUOTA_IS_GRP;
+        unsigned int type = QDATA_IS_GRP(qdata);
 
         unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id;
         tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
         return tmp;
 }
 
-/* compute the remaining quota for certain gid or uid b=11693 */
-int compute_remquota(struct obd_device *obd,
-                     struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata)
-{
-        struct super_block *sb = qctxt->lqc_sb;
-        __u64 usage, limit;
-        struct obd_quotactl *qctl;
-        int ret = QUOTA_RET_OK;
-        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
-        ENTRY;
-
-        if (!sb_any_quota_enabled(sb))
-                RETURN(QUOTA_RET_NOQUOTA);
-
-        /* ignore root user */
-        if (qdata->qd_id == 0 && qdata_type == USRQUOTA)
-                RETURN(QUOTA_RET_NOLIMIT);
-
-        OBD_ALLOC_PTR(qctl);
-        if (qctl == NULL)
-                RETURN(-ENOMEM);
-
-        /* get fs quota usage & limit */
-        qctl->qc_cmd = Q_GETQUOTA;
-        qctl->qc_id = qdata->qd_id;
-        qctl->qc_type = qdata_type;
-        ret = fsfilt_quotactl(obd, sb, qctl);
-        if (ret) {
-                if (ret == -ESRCH)      /* no limit */
-                        ret = QUOTA_RET_NOLIMIT;
-                else
-                        CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)",
-                               ret);
-                GOTO(out, ret);
-        }
-
-        usage = qctl->qc_dqblk.dqb_curspace;
-        limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
-        if (!limit){            /* no limit */
-                ret = QUOTA_RET_NOLIMIT;
-                GOTO(out, ret);
-        }
-
-        if (limit >= usage)
-                qdata->qd_count = limit - usage;
-        else
-                qdata->qd_count = 0;
-        EXIT;
-out:
-        OBD_FREE_PTR(qctl);
-        return ret;
-}
-
 /* caller must hold qunit_hash_lock */
 static inline struct lustre_qunit *find_qunit(unsigned int hashent,
                                               struct lustre_quota_ctxt *qctxt,
@@ -207,7 +215,9 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent,
         list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) {
                 tmp = &qunit->lq_data;
                 if (qunit->lq_ctxt == qctxt &&
-                    qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags)
+                    qdata->qd_id == tmp->qd_id &&
+                    (qdata->qd_flags & LQUOTA_QUNIT_FLAGS) ==
+                    (tmp->qd_flags & LQUOTA_QUNIT_FLAGS))
                         return qunit;
         }
         return NULL;
@@ -218,9 +228,9 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent,
  * @qdata: the type of quota unit to be checked
  *
  * return: 1 - need acquire qunit;
- *        2 - need release qunit;
- *        0 - need do nothing.
- *      < 0 - error.
+ *         2 - need release qunit;
+ *         0 - need do nothing.
+ *       < 0 - error.
  */
 static int
 check_cur_qunit(struct obd_device *obd,
@@ -228,16 +238,23 @@ check_cur_qunit(struct obd_device *obd,
 {
         struct super_block *sb = qctxt->lqc_sb;
         unsigned long qunit_sz, tune_sz;
-        __u64 usage, limit;
+        __u64 usage, limit, limit_org, pending_write = 0;
+        long long record = 0;
         struct obd_quotactl *qctl;
+        struct lustre_qunit_size *lqs = NULL;
         int ret = 0;
-        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
-        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
         ENTRY;
 
         if (!sb_any_quota_enabled(sb))
                 RETURN(0);
 
+        spin_lock(&qctxt->lqc_lock);
+        if (!qctxt->lqc_valid){
+                spin_unlock(&qctxt->lqc_lock);
+                RETURN(0);
+        }
+        spin_unlock(&qctxt->lqc_lock);
+
         OBD_ALLOC_PTR(qctl);
         if (qctl == NULL)
                 RETURN(-ENOMEM);
@@ -245,7 +262,7 @@ check_cur_qunit(struct obd_device *obd,
         /* get fs quota usage & limit */
         qctl->qc_cmd = Q_GETQUOTA;
         qctl->qc_id = qdata->qd_id;
-        qctl->qc_type = qdata_type;
+        qctl->qc_type = QDATA_IS_GRP(qdata);
         ret = fsfilt_quotactl(obd, sb, qctl);
         if (ret) {
                 if (ret == -ESRCH)      /* no limit */
@@ -255,40 +272,145 @@ check_cur_qunit(struct obd_device *obd,
                 GOTO(out, ret);
         }
 
-        if (is_blk) {
+        if (QDATA_IS_BLK(qdata)) {
                 usage = qctl->qc_dqblk.dqb_curspace;
                 limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS;
-                qunit_sz = qctxt->lqc_bunit_sz;
-                tune_sz = qctxt->lqc_btune_sz;
-
-                LASSERT(!(qunit_sz % QUOTABLOCK_SIZE));
         } else {
                 usage = qctl->qc_dqblk.dqb_curinodes;
                 limit = qctl->qc_dqblk.dqb_ihardlimit;
-                qunit_sz = qctxt->lqc_iunit_sz;
-                tune_sz = qctxt->lqc_itune_sz;
         }
 
-        /* ignore the no quota limit case */
+        /* ignore the no quota limit case; and it can avoid creating
+         * unnecessary lqs for uid/gid */
         if (!limit)
                 GOTO(out, ret = 0);
 
+ search_lqs:
+        quota_search_lqs(qdata, NULL, qctxt, &lqs);
+        if (!lqs) {
+                CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n");
+                ret = quota_create_lqs(qdata, NULL, qctxt, &lqs);
+                if (ret == -EALREADY) {
+                        ret = 0;
+                        goto search_lqs;
+                }
+                if (ret < 0)
+                        GOTO (out, ret);
+        }
+        spin_lock(&lqs->lqs_lock);
+
+        if (QDATA_IS_BLK(qdata)) {
+                qunit_sz = lqs->lqs_bunit_sz;
+                tune_sz  = lqs->lqs_btune_sz;
+                pending_write = lqs->lqs_bwrite_pending * CFS_PAGE_SIZE;
+                record   = lqs->lqs_blk_rec;
+                LASSERT(!(qunit_sz % QUOTABLOCK_SIZE));
+        } else {
+                /* we didn't need change inode qunit size now */
+                qunit_sz = lqs->lqs_iunit_sz;
+                tune_sz  = lqs->lqs_itune_sz;
+                pending_write = lqs->lqs_iwrite_pending;
+                record   = lqs->lqs_ino_rec;
+        }
+
         /* we don't count the MIN_QLIMIT */
-        if ((limit == MIN_QLIMIT && !is_blk) ||
-            (toqb(limit) == MIN_QLIMIT && is_blk))
+        if ((limit == MIN_QLIMIT && !QDATA_IS_BLK(qdata)) ||
+            (toqb(limit) == MIN_QLIMIT && QDATA_IS_BLK(qdata)))
                 limit = 0;
 
+        usage += pending_write;
+        limit_org = limit;
+        /* when a releasing quota req is sent, before it returned
+           limit is assigned a small value. limit will overflow */
+        if (limit + record < 0)
+                usage -= record;
+        else
+                limit += record;
+
         LASSERT(qdata->qd_count == 0);
         if (limit <= usage + tune_sz) {
-                while (qdata->qd_count + limit <= usage + tune_sz)
+                while (qdata->qd_count + limit <=
+                       usage + tune_sz)
                         qdata->qd_count += qunit_sz;
                 ret = 1;
-        } else if (limit > usage + qunit_sz + tune_sz) {
-                while (limit - qdata->qd_count > usage + qunit_sz + tune_sz)
+        } else if (limit > usage + qunit_sz + tune_sz &&
+                   limit_org > qdata->qd_count + qunit_sz) {
+                while (limit - qdata->qd_count > usage + qunit_sz + tune_sz &&
+                       limit_org > qdata->qd_count + qunit_sz)
                         qdata->qd_count += qunit_sz;
                 ret = 2;
+                /* if there are other pending writes for this uid/gid, releasing
+                 * quota is put off until the last pending write b=16645 */
+                if (ret == 2 && pending_write) {
+                        CDEBUG(D_QUOTA, "delay quota release\n");
+                        ret = 0;
+                }
         }
+        CDEBUG(D_QUOTA, "type: %c, limit: "LPU64", usage: "LPU64
+               ", pending_write: "LPU64", record: "LPD64
+               ", qunit_sz: %lu, tune_sz: %lu, ret: %d.\n",
+               QDATA_IS_BLK(qdata) ? 'b' : 'i', limit, usage, pending_write,
+               record, qunit_sz, tune_sz, ret);
         LASSERT(ret == 0 || qdata->qd_count);
+
+        spin_unlock(&lqs->lqs_lock);
+        lqs_putref(lqs);
+        EXIT;
+ out:
+        OBD_FREE_PTR(qctl);
+        return ret;
+}
+
+/**
+ * Compute the remaining quota for certain gid or uid b=11693
+ */
+int compute_remquota(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+                     struct qunit_data *qdata, int isblk)
+{
+        struct super_block *sb = qctxt->lqc_sb;
+        __u64 usage, limit;
+        struct obd_quotactl *qctl;
+        int ret = QUOTA_RET_OK;
+        ENTRY;
+
+        if (!sb_any_quota_enabled(sb))
+                RETURN(QUOTA_RET_NOQUOTA);
+
+        /* ignore root user */
+        if (qdata->qd_id == 0 && QDATA_IS_GRP(qdata) == USRQUOTA)
+                RETURN(QUOTA_RET_NOLIMIT);
+
+        OBD_ALLOC_PTR(qctl);
+        if (qctl == NULL)
+                RETURN(-ENOMEM);
+
+        /* get fs quota usage & limit */
+        qctl->qc_cmd = Q_GETQUOTA;
+        qctl->qc_id = qdata->qd_id;
+        qctl->qc_type = QDATA_IS_GRP(qdata);
+        ret = fsfilt_quotactl(obd, sb, qctl);
+        if (ret) {
+                if (ret == -ESRCH)      /* no limit */
+                        ret = QUOTA_RET_NOLIMIT;
+                else
+                        CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)",
+                               ret);
+                GOTO(out, ret);
+        }
+
+        usage = isblk ? qctl->qc_dqblk.dqb_curspace :
+                qctl->qc_dqblk.dqb_curinodes;
+        limit = isblk ? qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS :
+                qctl->qc_dqblk.dqb_ihardlimit;
+        if (!limit){            /* no limit */
+                ret = QUOTA_RET_NOLIMIT;
+                GOTO(out, ret);
+        }
+
+        if (limit >= usage)
+                qdata->qd_count = limit - usage;
+        else
+                qdata->qd_count = 0;
         EXIT;
 out:
         OBD_FREE_PTR(qctl);
@@ -319,12 +441,13 @@ static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt,
                 RETURN(NULL);
 
         CFS_INIT_LIST_HEAD(&qunit->lq_hash);
-        CFS_INIT_LIST_HEAD(&qunit->lq_waiters);
+        init_waitqueue_head(&qunit->lq_waitq);
         atomic_set(&qunit->lq_refcnt, 1);
         qunit->lq_ctxt = qctxt;
         memcpy(&qunit->lq_data, qdata, sizeof(*qdata));
         qunit->lq_opc = opc;
-
+        qunit->lq_lock = SPIN_LOCK_UNLOCKED;
+        QUNIT_SET_STATE_AND_RC(qunit, QUNIT_CREATED, 0);
         RETURN(qunit);
 }
 
@@ -351,96 +474,74 @@ insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit)
         struct list_head *head;
 
         LASSERT(list_empty(&qunit->lq_hash));
+        qunit_get(qunit);
         head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data);
         list_add(&qunit->lq_hash, head);
+        QUNIT_SET_STATE(qunit, QUNIT_IN_HASH);
+}
+
+static void compute_lqs_after_removing_qunit(struct lustre_qunit *qunit)
+{
+        struct lustre_qunit_size *lqs = NULL;
+
+        quota_search_lqs(&qunit->lq_data, NULL, qunit->lq_ctxt, &lqs);
+        if (lqs) {
+                spin_lock(&lqs->lqs_lock);
+                if (qunit->lq_opc == QUOTA_DQACQ)
+                        quota_compute_lqs(&qunit->lq_data, lqs, 0, 1);
+                if (qunit->lq_opc == QUOTA_DQREL)
+                        quota_compute_lqs(&qunit->lq_data, lqs, 0, 0);
+                spin_unlock(&lqs->lqs_lock);
+                /* this is for quota_search_lqs */
+                lqs_putref(lqs);
+                /* this is for schedule_dqacq */
+                lqs_putref(lqs);
+        }
+
 }
 
 static void remove_qunit_nolock(struct lustre_qunit *qunit)
 {
         LASSERT(!list_empty(&qunit->lq_hash));
+        LASSERT_SPIN_LOCKED(&qunit_hash_lock);
+
         list_del_init(&qunit->lq_hash);
+        QUNIT_SET_STATE(qunit, QUNIT_RM_FROM_HASH);
+        qunit_put(qunit);
 }
 
-struct qunit_waiter {
-        struct list_head qw_entry;
-        cfs_waitq_t      qw_waitq;
-        int qw_rc;
-};
-
 #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \
                                  (limit = count) : (limit += count)
 
 
-/* FIXME check if this mds is the master of specified id */
-static int
-is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
-          unsigned int id, int type)
+static inline int is_master(struct lustre_quota_ctxt *qctxt)
 {
         return qctxt->lqc_handler ? 1 : 0;
 }
 
 static int
 schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
-               struct qunit_data *qdata, int opc, int wait);
-
-static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
-                                       struct qunit_data *qdata, int opc, int wait)
-{
-        int rc = 0;
-        unsigned long factor;
-        struct qunit_data tmp_qdata;
-        ENTRY;
-
-        LASSERT(qdata && qdata->qd_count);
-        QDATA_DEBUG(qdata, "%s quota split.\n",
-                    (qdata->qd_flags & QUOTA_IS_BLOCK) ? "block" : "inode");
-        if (qdata->qd_flags & QUOTA_IS_BLOCK)
-                factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz *
-                        qctxt->lqc_bunit_sz;
-        else
-                factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz *
-                        qctxt->lqc_iunit_sz;
-
-        if (qctxt->lqc_import && should_translate_quota(qctxt->lqc_import) &&
-            qdata->qd_count > factor) {
-                        tmp_qdata = *qdata;
-                tmp_qdata.qd_count = factor;
-                        qdata->qd_count -= tmp_qdata.qd_count;
-                QDATA_DEBUG((&tmp_qdata), "be split.\n");
-                rc = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait);
-        } else{
-                QDATA_DEBUG(qdata, "don't be split.\n");
-                rc = schedule_dqacq(obd, qctxt, qdata, opc, wait);
-        }
-
-        RETURN(rc);
-}
+               struct qunit_data *qdata, int opc, int wait,
+               struct obd_trans_info *oti);
 
 static int
-dqacq_completion(struct obd_device *obd,
-                 struct lustre_quota_ctxt *qctxt,
+dqacq_completion(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
                  struct qunit_data *qdata, int rc, int opc)
 {
         struct lustre_qunit *qunit = NULL;
         struct super_block *sb = qctxt->lqc_sb;
-        unsigned long qunit_sz;
-        struct qunit_waiter *qw, *tmp;
         int err = 0;
-        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
-        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
-        __u64 qd_tmp = qdata->qd_count;
-        unsigned long div_r;
+        struct quota_adjust_qunit *oqaq = NULL;
+        int rc1 = 0;
         ENTRY;
 
         LASSERT(qdata);
-        qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz;
-        div_r = do_div(qd_tmp, qunit_sz);
-        LASSERTF(!div_r, "qunit_sz: %lu, return qunit_sz: "LPU64"\n",
-                 qunit_sz, qd_tmp);
+        QDATA_DEBUG(qdata, "obd(%s): complete %s quota req\n",
+                    obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel");
 
         /* update local operational quota file */
         if (rc == 0) {
-                __u32 count = QUSG(qdata->qd_count, is_blk);
+                __u64 count = QUSG(qdata->qd_count, QDATA_IS_BLK(qdata));
                 struct obd_quotactl *qctl;
                 __u64 *hardlimit;
 
@@ -453,14 +554,14 @@ dqacq_completion(struct obd_device *obd,
                  * set fs quota limit */
                 qctl->qc_cmd = Q_GETQUOTA;
                 qctl->qc_id = qdata->qd_id;
-                qctl->qc_type = qdata_type;
+                qctl->qc_type = QDATA_IS_GRP(qdata);
                 err = fsfilt_quotactl(obd, sb, qctl);
                 if (err) {
                         CERROR("error get quota fs limit! (rc:%d)\n", err);
                         GOTO(out_mem, err);
                 }
 
-                if (is_blk) {
+                if (QDATA_IS_BLK(qdata)) {
                         qctl->qc_dqblk.dqb_valid = QIF_BLIMITS;
                         hardlimit = &qctl->qc_dqblk.dqb_bhardlimit;
                 } else {
@@ -468,20 +569,24 @@ dqacq_completion(struct obd_device *obd,
                         hardlimit = &qctl->qc_dqblk.dqb_ihardlimit;
                 }
 
+                CDEBUG(D_QUOTA, "hardlimt: "LPU64"\n", *hardlimit);
+
+                if (*hardlimit == 0)
+                        goto out_mem;
+
                 switch (opc) {
                 case QUOTA_DQACQ:
-                        CDEBUG(D_QUOTA, "%s(acq):count: %d, hardlimt: "LPU64
-                               ",type: %s.\n", obd->obd_name, count, *hardlimit,
-                               qdata_type ? "grp": "usr");
                         INC_QLIMIT(*hardlimit, count);
                         break;
                 case QUOTA_DQREL:
-                        CDEBUG(D_QUOTA, "%s(rel):count: %d, hardlimt: "LPU64
-                               ",type: %s.\n", obd->obd_name, count, *hardlimit,
-                               qdata_type ? "grp": "usr");
                         LASSERTF(count < *hardlimit,
-                                 "count: %d, hardlimit: "LPU64".\n",
-                                 count, *hardlimit);
+                                 "id(%u) flag(%u) type(%c) isblk(%c) "
+                                 "count("LPU64") qd_qunit("LPU64") "
+                                 "hardlimit("LPU64").\n",
+                                 qdata->qd_id, qdata->qd_flags,
+                                 QDATA_IS_GRP(qdata) ? 'g' : 'u',
+                                 QDATA_IS_BLK(qdata) ? 'b': 'i',
+                                 qdata->qd_count, qdata->qd_qunit, *hardlimit);
                         *hardlimit -= count;
                         break;
                 default:
@@ -516,40 +621,56 @@ out:
         /* this qunit has been removed by qctxt_cleanup() */
         if (!qunit) {
                 spin_unlock(&qunit_hash_lock);
+                QDATA_DEBUG(qdata, "%s is discarded because qunit isn't found\n",
+                            opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
                 RETURN(err);
         }
 
         LASSERT(opc == qunit->lq_opc);
+        /* remove this qunit from lq_hash so that new processes cannot be added
+         * to qunit->lq_waiters */
         remove_qunit_nolock(qunit);
+        spin_unlock(&qunit_hash_lock);
 
-        /* wake up all waiters */
-        list_for_each_entry_safe(qw, tmp, &qunit->lq_waiters, qw_entry) {
-                list_del_init(&qw->qw_entry);
-                qw->qw_rc = rc;
-                wake_up(&qw->qw_waitq);
-        }
+        compute_lqs_after_removing_qunit(qunit);
 
-        spin_unlock(&qunit_hash_lock);
+        /* wake up all waiters */
+        QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, rc);
+        wake_up_all(&qunit->lq_waitq);
 
         qunit_put(qunit);
+        if (rc < 0 && rc != -EDQUOT)
+                 RETURN(err);
 
         /* don't reschedule in such cases:
-         *   - acq/rel failure, but not for quota recovery.
+         *   - acq/rel failure and qunit isn't changed,
+         *     but not for quota recovery.
          *   - local dqacq/dqrel.
          *   - local disk io failure.
          */
-        if (err || (rc && rc != -EBUSY) ||
-            is_master(obd, qctxt, qdata->qd_id, qdata_type))
+         OBD_ALLOC_PTR(oqaq);
+         if (!oqaq)
+                 RETURN(-ENOMEM);
+         qdata_to_oqaq(qdata, oqaq);
+         /* adjust the qunit size in slaves */
+         rc1 = quota_adjust_slave_lqs(oqaq, qctxt);
+         OBD_FREE_PTR(oqaq);
+         if (rc1 < 0) {
+                 CERROR("adjust slave's qunit size failed!(rc:%d)\n", rc1);
+                 RETURN(rc1);
+         }
+         if (err || (rc && rc != -EBUSY && rc1 == 0) || is_master(qctxt))
                 RETURN(err);
 
         /* reschedule another dqacq/dqrel if needed */
         qdata->qd_count = 0;
-        rc = check_cur_qunit(obd, qctxt, qdata);
-        if (rc > 0) {
+        qdata->qd_flags &= LQUOTA_QUNIT_FLAGS;
+        rc1 = check_cur_qunit(obd, qctxt, qdata);
+        if (rc1 > 0) {
                 int opc;
-                opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0);
-                QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc);
+                opc = rc1 == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
+                rc1 = schedule_dqacq(obd, qctxt, qdata, opc, 0, NULL);
+                QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc1);
         }
         RETURN(err);
 }
@@ -564,158 +685,250 @@ static int dqacq_interpret(const struct lu_env *env,
 {
         struct dqacq_async_args *aa = (struct dqacq_async_args *)data;
         struct lustre_quota_ctxt *qctxt = aa->aa_ctxt;
+        struct obd_device_target *obt = qctxt->lqc_obt;
         struct lustre_qunit *qunit = aa->aa_qunit;
         struct obd_device *obd = req->rq_import->imp_obd;
         struct qunit_data *qdata = NULL;
-        struct qunit_data_old *qdata_old = NULL;
+        int rc1 = 0;
         ENTRY;
 
         LASSERT(req);
         LASSERT(req->rq_import);
 
-        if ((req->rq_import->imp_connect_data.ocd_connect_flags &
-             OBD_CONNECT_QUOTA64) &&
-            !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) {
-                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
-
-                qdata = req_capsule_server_swab_get(&req->rq_pill,
-                                                    &RMF_QUNIT_DATA,
-                                          (void*)lustre_swab_qdata);
-        } else {
-                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
+        /* there are several forms of qunit(historic causes), so we need to
+         * adjust qunit from slaves to the same form here */
+        OBD_ALLOC(qdata, sizeof(struct qunit_data));
+        if (!qdata)
+                RETURN(-ENOMEM);
 
-                qdata = req_capsule_server_swab_get(&req->rq_pill,
-                                                    &RMF_QUNIT_DATA,
-                                       (void*)lustre_swab_qdata_old);
-                qdata = lustre_quota_old_to_new(qdata_old);
-        }
-        if (qdata == NULL) {
-                DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data");
-                RETURN(-EPROTO);
+        down_read(&obt->obt_rwsem);
+        /* if a quota req timeouts or is dropped, we should update quota
+         * statistics which will be handled in dqacq_completion. And in
+         * this situation we should get qdata from request instead of
+         * reply */
+        rc1 = quota_get_qdata(req, qdata,
+                              (rc != 0) ? QUOTA_REQUEST : QUOTA_REPLY,
+                              QUOTA_IMPORT);
+        if (rc1 < 0) {
+                DEBUG_REQ(D_ERROR, req,
+                          "error unpacking qunit_data(rc: %d)\n", rc1);
+                GOTO(exit, rc = rc1);
         }
 
-        LASSERT(qdata->qd_id == qunit->lq_data.qd_id &&
-                (qdata->qd_flags & QUOTA_IS_GRP) ==
-                 (qunit->lq_data.qd_flags & QUOTA_IS_GRP) &&
-                (qdata->qd_count == qunit->lq_data.qd_count ||
-                 qdata->qd_count == 0));
+        QDATA_DEBUG(qdata, "qdata: interpret rc(%d).\n", rc);
+        QDATA_DEBUG((&qunit->lq_data), "lq_data: \n");
 
-        QDATA_DEBUG(qdata, "%s interpret rc(%d).\n",
-                    lustre_msg_get_opc(req->rq_reqmsg) == QUOTA_DQACQ ?
-                    "DQACQ" : "DQREL", rc);
+        if (qdata->qd_id != qunit->lq_data.qd_id ||
+            OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RET_QDATA)) {
+                CDEBUG(D_ERROR, "the returned qd_id isn't expected!"
+                       "(qdata: %u, lq_data: %u)\n", qdata->qd_id,
+                       qunit->lq_data.qd_id);
+                qdata->qd_id = qunit->lq_data.qd_id;
+                rc = -EPROTO;
+        }
+        if (QDATA_IS_GRP(qdata) != QDATA_IS_GRP(&qunit->lq_data)) {
+                CDEBUG(D_ERROR, "the returned grp/usr isn't expected!"
+                       "(qdata: %u, lq_data: %u)\n", qdata->qd_flags,
+                       qunit->lq_data.qd_flags);
+                if (QDATA_IS_GRP(&qunit->lq_data))
+                        QDATA_SET_GRP(qdata);
+                else
+                        QDATA_CLR_GRP(qdata);
+                rc = -EPROTO;
+        }
+        if (qdata->qd_count > qunit->lq_data.qd_count) {
+                CDEBUG(D_ERROR, "the returned qd_count isn't expected!"
+                       "(qdata: "LPU64", lq_data: "LPU64")\n", qdata->qd_count,
+                       qunit->lq_data.qd_count);
+                rc = -EPROTO;
+        }
 
         rc = dqacq_completion(obd, qctxt, qdata, rc,
                               lustre_msg_get_opc(req->rq_reqmsg));
 
+exit:
+        up_read(&obt->obt_rwsem);
+        OBD_FREE(qdata, sizeof(struct qunit_data));
+
         RETURN(rc);
 }
 
-static int got_qunit(struct qunit_waiter *waiter)
+/**
+ * check if quota master is online
+ */
+int check_qm(struct lustre_quota_ctxt *qctxt)
 {
-        int rc = 0;
+        int rc;
         ENTRY;
-        spin_lock(&qunit_hash_lock);
-        rc = list_empty(&waiter->qw_entry);
-        spin_unlock(&qunit_hash_lock);
+
+        spin_lock(&qctxt->lqc_lock);
+        /* quit waiting when mds is back or qctxt is cleaned up */
+        rc = qctxt->lqc_import || !qctxt->lqc_valid;
+        spin_unlock(&qctxt->lqc_lock);
+
+        RETURN(rc);
+}
+
+static int got_qunit(struct lustre_qunit *qunit)
+{
+        int rc;
+        ENTRY;
+
+        spin_lock(&qunit->lq_lock);
+        switch (qunit->lq_state) {
+        case QUNIT_IN_HASH:
+        case QUNIT_RM_FROM_HASH:
+                rc = 0;
+                break;
+        case QUNIT_FINISHED:
+                rc = 1;
+                break;
+        default:
+                rc = 0;
+                CERROR("invalid qunit state %d\n", qunit->lq_state);
+        }
+        spin_unlock(&qunit->lq_lock);
         RETURN(rc);
 }
 
 static int
-schedule_dqacq(struct obd_device *obd,
-               struct lustre_quota_ctxt *qctxt,
-               struct qunit_data *qdata, int opc, int wait)
+schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
+               struct qunit_data *qdata, int opc, int wait,
+               struct obd_trans_info *oti)
 {
         struct lustre_qunit *qunit, *empty;
-        struct qunit_waiter qw;
         struct l_wait_info lwi = { 0 };
         struct ptlrpc_request *req;
-        struct qunit_data *reqdata;
         struct dqacq_async_args *aa;
-       unsigned long factor;   
+        struct obd_import *imp = NULL;
+        struct lustre_qunit_size *lqs = NULL;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
         int rc = 0;
         ENTRY;
 
-        CFS_INIT_LIST_HEAD(&qw.qw_entry);
-        init_waitqueue_head(&qw.qw_waitq);
-        qw.qw_rc = 0;
-
+        LASSERT(opc == QUOTA_DQACQ || opc == QUOTA_DQREL);
+        do_gettimeofday(&work_start);
         if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL)
                 RETURN(-ENOMEM);
 
         spin_lock(&qunit_hash_lock);
-
         qunit = dqacq_in_flight(qctxt, qdata);
         if (qunit) {
                 if (wait)
-                        list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
+                        qunit_get(qunit);
                 spin_unlock(&qunit_hash_lock);
+                qunit_put(empty);
 
-                free_qunit(empty);
                 goto wait_completion;
         }
         qunit = empty;
         insert_qunit_nolock(qctxt, qunit);
-        if (wait)
-                list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
         spin_unlock(&qunit_hash_lock);
 
         LASSERT(qunit);
 
+        quota_search_lqs(qdata, NULL, qctxt, &lqs);
+        if (lqs) {
+                spin_lock(&lqs->lqs_lock);
+                quota_compute_lqs(qdata, lqs, 1, (opc == QUOTA_DQACQ) ? 1 : 0);
+                /* when this qdata returned from mds, it will call lqs_putref */
+                lqs_getref(lqs);
+                spin_unlock(&lqs->lqs_lock);
+                /* this is for quota_search_lqs */
+                lqs_putref(lqs);
+        } else {
+                CDEBUG(D_ERROR, "Can't find the lustre qunit size!\n");
+        }
+
+        QDATA_DEBUG(qdata, "obd(%s): send %s quota req\n",
+                    obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel");
         /* master is going to dqacq/dqrel from itself */
-        if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP))
-        {
+        if (is_master(qctxt)) {
                 int rc2;
                 QDATA_DEBUG(qdata, "local %s.\n",
                             opc == QUOTA_DQACQ ? "DQACQ" : "DQREL");
+                QDATA_SET_CHANGE_QS(qdata);
                 rc = qctxt->lqc_handler(obd, qdata, opc);
                 rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc);
-                RETURN((rc && rc != -EDQUOT) ? rc : rc2);
+                do_gettimeofday(&work_end);
+                timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+                if (opc == QUOTA_DQACQ)
+                        lprocfs_counter_add(qctxt->lqc_stats,
+                                            wait ? LQUOTA_SYNC_ACQ : LQUOTA_ASYNC_ACQ,
+                                            timediff);
+                else
+                        lprocfs_counter_add(qctxt->lqc_stats,
+                                            wait ? LQUOTA_SYNC_REL : LQUOTA_ASYNC_REL,
+                                            timediff);
+                RETURN(rc ? rc : rc2);
+        }
+
+        spin_lock(&qctxt->lqc_lock);
+        if (!qctxt->lqc_import) {
+                spin_unlock(&qctxt->lqc_lock);
+                QDATA_DEBUG(qdata, "lqc_import is invalid.\n");
+
+                spin_lock(&qunit_hash_lock);
+                remove_qunit_nolock(qunit);
+                spin_unlock(&qunit_hash_lock);
+
+                compute_lqs_after_removing_qunit(qunit);
+
+                QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, -EAGAIN);
+                wake_up_all(&qunit->lq_waitq);
+
+                qunit_put(qunit);
+                spin_lock(&qctxt->lqc_lock);
+                if (wait && !qctxt->lqc_import) {
+                        spin_unlock(&qctxt->lqc_lock);
+
+                        LASSERT(oti && oti->oti_thread &&
+                                oti->oti_thread->t_watchdog);
+
+                        lc_watchdog_disable(oti->oti_thread->t_watchdog);
+                        CDEBUG(D_QUOTA, "sleep for quota master\n");
+                        l_wait_event(qctxt->lqc_wait_for_qmaster,
+                                     check_qm(qctxt), &lwi);
+                        CDEBUG(D_QUOTA, "wake up when quota master is back\n");
+                        lc_watchdog_touch(oti->oti_thread->t_watchdog);
+                } else {
+                        spin_unlock(&qctxt->lqc_lock);
+                }
+
+                RETURN(-EAGAIN);
         }
+        imp = class_import_get(qctxt->lqc_import);
+        spin_unlock(&qctxt->lqc_lock);
 
         /* build dqacq/dqrel request */
-        LASSERT(qctxt->lqc_import);
+        LASSERT(imp);
 
-        req = ptlrpc_request_alloc_pack(qctxt->lqc_import, &RQF_MDS_QUOTA_DQACQ,
+        req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_QUOTA_DQACQ,
                                         LUSTRE_MDS_VERSION, opc);
+        class_import_put(imp);
         if (req == NULL) {
+                CDEBUG(D_ERROR, "Can't alloc request\n");
                 dqacq_completion(obd, qctxt, qdata, -ENOMEM, opc);
                 RETURN(-ENOMEM);
         }
 
-       if (qdata->qd_flags & QUOTA_IS_BLOCK)
-               factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz *
-                         qctxt->lqc_bunit_sz;
-        else
-                factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz *
-                         qctxt->lqc_iunit_sz;
-
-        LASSERT(!should_translate_quota(qctxt->lqc_import) ||
-                qdata->qd_count <= factor);
-        if (should_translate_quota(qctxt->lqc_import))
-        {
-                struct qunit_data_old *reqdata_old, *tmp;
-
-                reqdata_old = req_capsule_client_get(&req->rq_pill,
-                                                     &RMF_QUNIT_DATA);
-
-                tmp = lustre_quota_new_to_old(qdata);
-                *reqdata_old = *tmp;
-                req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
-                                     sizeof(*reqdata_old));
-                CDEBUG(D_QUOTA, "qd_count is 32bit!\n");
-        } else {
-                reqdata = req_capsule_client_get(&req->rq_pill,
-                                                 &RMF_QUNIT_DATA);
-
-                *reqdata = *qdata;
-                req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER,
-                                     sizeof(*reqdata));
-                CDEBUG(D_QUOTA, "qd_count is 64bit!\n");
-        }
         ptlrpc_request_set_replen(req);
+        req->rq_no_resend = req->rq_no_delay = 1;
+        rc = quota_copy_qdata(req, qdata, QUOTA_REQUEST, QUOTA_IMPORT);
+        if (rc < 0) {
+                CDEBUG(D_ERROR, "Can't pack qunit_data(rc: %d)\n", rc);
+                ptlrpc_req_finished(req);
+                dqacq_completion(obd, qctxt, qdata, -EPROTO, opc);
+                RETURN(rc);
+        }
+
+        if (wait && qunit)
+                qunit_get(qunit);
 
         CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
-        aa = (struct dqacq_async_args *)&req->rq_async_args;
+        aa = ptlrpc_req_async_args(req);
         aa->aa_ctxt = qctxt;
         aa->aa_qunit = qunit;
 
@@ -727,22 +940,45 @@ schedule_dqacq(struct obd_device *obd,
 wait_completion:
         if (wait && qunit) {
                 struct qunit_data *p = &qunit->lq_data;
-                QDATA_DEBUG(p, "wait for dqacq.\n");
 
-                l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
-                if (qw.qw_rc == 0)
+                QDATA_DEBUG(p, "qunit(%p) is waiting for dqacq.\n", qunit);
+                l_wait_event(qunit->lq_waitq, got_qunit(qunit), &lwi);
+                /* rc = -EAGAIN, it means a quota req is finished;
+                 * rc = -EDQUOT, it means out of quota
+                 * rc = -EBUSY, it means recovery is happening
+                 * other rc < 0, it means real errors, functions who call
+                 * schedule_dqacq should take care of this */
+                spin_lock(&qunit->lq_lock);
+                if (qunit->lq_rc == 0)
                         rc = -EAGAIN;
-
-                CDEBUG(D_QUOTA, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
+                else
+                        rc = qunit->lq_rc;
+                spin_unlock(&qunit->lq_lock);
+                CDEBUG(D_QUOTA, "qunit(%p) finishes waiting. (rc:%d)\n",
+                       qunit, rc);
+                qunit_put(qunit);
         }
+
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        if (opc == QUOTA_DQACQ)
+                lprocfs_counter_add(qctxt->lqc_stats,
+                                    wait ? LQUOTA_SYNC_ACQ : LQUOTA_ASYNC_ACQ,
+                                    timediff);
+        else
+                lprocfs_counter_add(qctxt->lqc_stats,
+                                    wait ? LQUOTA_SYNC_REL : LQUOTA_ASYNC_REL,
+                                    timediff);
+
         RETURN(rc);
 }
 
 int
 qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
-                   uid_t uid, gid_t gid, __u32 isblk, int wait)
+                   uid_t uid, gid_t gid, __u32 isblk, int wait,
+                   struct obd_trans_info *oti)
 {
-        int ret, rc = 0, i = USRQUOTA;
+        int rc = 0, i = USRQUOTA;
         __u32 id[MAXQUOTAS] = { uid, gid };
         struct qunit_data qdata[MAXQUOTAS];
         ENTRY;
@@ -753,20 +989,26 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
 
         for (i = 0; i < MAXQUOTAS; i++) {
                 qdata[i].qd_id = id[i];
-                qdata[i].qd_flags = 0;
-                qdata[i].qd_flags |= i;
-                qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
+                qdata[i].qd_flags = i;
+                if (isblk)
+                        QDATA_SET_BLK(&qdata[i]);
                 qdata[i].qd_count = 0;
 
-                ret = check_cur_qunit(obd, qctxt, &qdata[i]);
-                if (ret > 0) {
+                rc = check_cur_qunit(obd, qctxt, &qdata[i]);
+                if (rc > 0) {
                         int opc;
                         /* need acquire or release */
-                        opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                        ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i],
-                                                          opc, wait);
-                        if (!rc)
-                                rc = ret;
+                        opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
+                        rc = schedule_dqacq(obd, qctxt, &qdata[i], opc,
+                                            wait,oti);
+                        if (rc < 0)
+                                RETURN(rc);
+                } else if (wait == 1) {
+                        /* when wait equates 1, that means mds_quota_acquire
+                         * or filter_quota_acquire is calling it. */
+                        rc = qctxt_wait_pending_dqacq(qctxt, id[i], i, isblk);
+                        if (rc < 0)
+                                RETURN(rc);
                 }
         }
 
@@ -778,93 +1020,174 @@ qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id,
                          unsigned short type, int isblk)
 {
         struct lustre_qunit *qunit = NULL;
-        struct qunit_waiter qw;
         struct qunit_data qdata;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
         struct l_wait_info lwi = { 0 };
+        int rc = 0;
         ENTRY;
 
-        CFS_INIT_LIST_HEAD(&qw.qw_entry);
-        init_waitqueue_head(&qw.qw_waitq);
-        qw.qw_rc = 0;
-
+        do_gettimeofday(&work_start);
         qdata.qd_id = id;
-        qdata.qd_flags = 0;
-        qdata.qd_flags |= type;
-        qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0;
+        qdata.qd_flags = type;
+        if (isblk)
+                QDATA_SET_BLK(&qdata);
         qdata.qd_count = 0;
 
         spin_lock(&qunit_hash_lock);
-
         qunit = dqacq_in_flight(qctxt, &qdata);
         if (qunit)
-                list_add_tail(&qw.qw_entry, &qunit->lq_waiters);
-
+                /* grab reference on this qunit to handle races with
+                 * dqacq_completion(). Otherwise, this qunit could be freed just
+                 * after we release the qunit_hash_lock */
+                qunit_get(qunit);
         spin_unlock(&qunit_hash_lock);
 
         if (qunit) {
-                struct qunit_data *p = &qdata;
-                QDATA_DEBUG(p, "wait for dqacq completion.\n");
-                l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi);
-                QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc);
+                struct qunit_data *p = &qunit->lq_data;
+
+                QDATA_DEBUG(p, "qunit(%p) is waiting for dqacq.\n", qunit);
+                l_wait_event(qunit->lq_waitq, got_qunit(qunit), &lwi);
+                CDEBUG(D_QUOTA, "qunit(%p) finishes waiting. (rc:%d)\n",
+                       qunit, qunit->lq_rc);
+                qunit_put(qunit);
+                do_gettimeofday(&work_end);
+                timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+                lprocfs_counter_add(qctxt->lqc_stats,
+                                    isblk ? LQUOTA_WAIT_PENDING_BLK_QUOTA :
+                                            LQUOTA_WAIT_PENDING_INO_QUOTA,
+                                    timediff);
+                /* keep same as schedule_dqacq() b=17030 */
+                spin_lock(&qunit->lq_lock);
+                if (qunit->lq_rc == 0)
+                        rc = -EAGAIN;
+                else
+                        rc = qunit->lq_rc;
+                spin_unlock(&qunit->lq_lock);
+        } else {
+                do_gettimeofday(&work_end);
+                timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+                lprocfs_counter_add(qctxt->lqc_stats,
+                                    isblk ? LQUOTA_NOWAIT_PENDING_BLK_QUOTA :
+                                            LQUOTA_NOWAIT_PENDING_INO_QUOTA,
+                                    timediff);
         }
-        RETURN(0);
+
+        RETURN(rc);
 }
 
 int
-qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
-           dqacq_handler_t handler)
+qctxt_init(struct obd_device *obd, dqacq_handler_t handler)
 {
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct obd_device_target *obt = &obd->u.obt;
+        struct super_block *sb = obt->obt_sb;
         int rc = 0;
         ENTRY;
 
+        LASSERT(qctxt);
+
         rc = ptlrpcd_addref();
         if (rc)
                 RETURN(rc);
 
+        cfs_waitq_init(&qctxt->lqc_wait_for_qmaster);
+        spin_lock_init(&qctxt->lqc_lock);
+        spin_lock(&qctxt->lqc_lock);
         qctxt->lqc_handler = handler;
         qctxt->lqc_sb = sb;
+        qctxt->lqc_obt = obt;
         qctxt->lqc_import = NULL;
         qctxt->lqc_recovery = 0;
-        qctxt->lqc_atype = 0;
-        qctxt->lqc_status= 0;
+        qctxt->lqc_switch_qs = 1; /* Change qunit size in default setting */
+        qctxt->lqc_valid = 1;
+        qctxt->lqc_cqs_boundary_factor = 4;
+        qctxt->lqc_cqs_least_bunit = PTLRPC_MAX_BRW_SIZE;
+        qctxt->lqc_cqs_least_iunit = 2;
+        qctxt->lqc_cqs_qs_factor = 2;
+        qctxt->lqc_flags = 0;
+        QUOTA_MASTER_UNREADY(qctxt);
         qctxt->lqc_bunit_sz = default_bunit_sz;
         qctxt->lqc_btune_sz = default_bunit_sz / 100 * default_btune_ratio;
         qctxt->lqc_iunit_sz = default_iunit_sz;
         qctxt->lqc_itune_sz = default_iunit_sz * default_itune_ratio / 100;
+        qctxt->lqc_switch_seconds = 300; /* enlarging will wait 5 minutes
+                                          * after the last shrinking */
+        qctxt->lqc_sync_blk = 0;
+        spin_unlock(&qctxt->lqc_lock);
+
+        qctxt->lqc_lqs_hash = lustre_hash_init("LQS_HASH", 7, 7,
+                                               &lqs_hash_ops, 0);
+        if (!qctxt->lqc_lqs_hash) {
+                CERROR("initialize hash lqs for %s error!\n", obd->obd_name);
+                RETURN(-ENOMEM);
+        }
 
-        RETURN(0);
+#ifdef LPROCFS
+        rc = lquota_proc_setup(obd, is_master(qctxt));
+        if (rc)
+                CERROR("initialize proc for %s error!\n", obd->obd_name);
+#endif
+
+        RETURN(rc);
 }
 
 void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force)
 {
         struct lustre_qunit *qunit, *tmp;
-        struct qunit_waiter *qw, *tmp2;
+        struct list_head tmp_list;
+        struct obd_device_target *obt = qctxt->lqc_obt;
         int i;
         ENTRY;
 
-        spin_lock(&qunit_hash_lock);
+        CFS_INIT_LIST_HEAD(&tmp_list);
 
+        spin_lock(&qctxt->lqc_lock);
+        qctxt->lqc_valid = 0;
+        spin_unlock(&qctxt->lqc_lock);
+
+        spin_lock(&qunit_hash_lock);
         for (i = 0; i < NR_DQHASH; i++) {
                 list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) {
                         if (qunit->lq_ctxt != qctxt)
                                 continue;
-
                         remove_qunit_nolock(qunit);
-                        /* wake up all waiters */
-                        list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters,
-                                                 qw_entry) {
-                                list_del_init(&qw->qw_entry);
-                                qw->qw_rc = 0;
-                                wake_up(&qw->qw_waitq);
-                        }
-                        qunit_put(qunit);
+                        list_add(&qunit->lq_hash, &tmp_list);
                 }
         }
-
         spin_unlock(&qunit_hash_lock);
 
+        list_for_each_entry_safe(qunit, tmp, &tmp_list, lq_hash) {
+                list_del_init(&qunit->lq_hash);
+                compute_lqs_after_removing_qunit(qunit);
+
+                /* wake up all waiters */
+                QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, 0);
+                wake_up_all(&qunit->lq_waitq);
+                qunit_put(qunit);
+        }
+
+        down_write(&obt->obt_rwsem);
+        lustre_hash_exit(qctxt->lqc_lqs_hash);
+        qctxt->lqc_lqs_hash = NULL;
+        up_write(&obt->obt_rwsem);
+
+        /* after qctxt_cleanup, qctxt might be freed, then check_qm() is
+         * unpredicted. So we must wait until lqc_wait_for_qmaster is empty */
+        while (cfs_waitq_active(&qctxt->lqc_wait_for_qmaster)) {
+                cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
+                cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE,
+                                     cfs_time_seconds(1));
+        }
+
         ptlrpcd_decref();
 
+#ifdef LPROCFS
+        if (lquota_proc_cleanup(qctxt))
+                CERROR("cleanup proc error!\n");
+#endif
+
         EXIT;
 }
 
@@ -919,24 +1242,27 @@ static int qslave_recovery_main(void *arg)
                 list_for_each_entry_safe(dqid, tmp, &id_list, di_link) {
                         list_del_init(&dqid->di_link);
                         /* skip slave recovery on itself */
-                        if (is_master(obd, qctxt, dqid->di_id, type))
+                        if (is_master(qctxt))
                                 goto free;
                         if (rc && rc != -EBUSY)
                                 goto free;
 
                         qdata.qd_id = dqid->di_id;
-                        qdata.qd_flags = 0;
-                        qdata.qd_flags |= type;
-                        qdata.qd_flags |= QUOTA_IS_BLOCK;
+                        qdata.qd_flags = type;
+                        QDATA_SET_BLK(&qdata);
                         qdata.qd_count = 0;
 
                         ret = check_cur_qunit(obd, qctxt, &qdata);
                         if (ret > 0) {
                                 int opc;
                                 opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL;
-                                rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0);
-                        } else
+                                rc = schedule_dqacq(obd, qctxt, &qdata, opc,
+                                                    0, NULL);
+                                if (rc == -EDQUOT)
+                                        rc = 0;
+                        } else {
                                 rc = 0;
+                        }
 
                         if (rc)
                                 CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR,
@@ -974,3 +1300,102 @@ qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt)
 exit:
         EXIT;
 }
+
+
+/**
+ * lqs<->qctxt hash operations
+ */
+
+/**
+ * string hashing using djb2 hash algorithm
+ */
+static unsigned
+lqs_hash(lustre_hash_t *lh, void *key, unsigned mask)
+{
+        struct quota_adjust_qunit *lqs_key;
+        unsigned hash;
+        ENTRY;
+
+        LASSERT(key);
+        lqs_key = (struct quota_adjust_qunit *)key;
+        hash = (QAQ_IS_GRP(lqs_key) ? 5381 : 5387) * lqs_key->qaq_id;
+
+        RETURN(hash & mask);
+}
+
+static int
+lqs_compare(void *key, struct hlist_node *hnode)
+{
+        struct quota_adjust_qunit *lqs_key;
+        struct lustre_qunit_size *q;
+        int rc;
+        ENTRY;
+
+        LASSERT(key);
+        lqs_key = (struct quota_adjust_qunit *)key;
+        q = hlist_entry(hnode, struct lustre_qunit_size, lqs_hash);
+
+        spin_lock(&q->lqs_lock);
+        rc = ((lqs_key->qaq_id == q->lqs_id) &&
+              (QAQ_IS_GRP(lqs_key) == LQS_IS_GRP(q)));
+        spin_unlock(&q->lqs_lock);
+
+        RETURN(rc);
+}
+
+static void *
+lqs_get(struct hlist_node *hnode)
+{
+        struct lustre_qunit_size *q = 
+            hlist_entry(hnode, struct lustre_qunit_size, lqs_hash);
+        ENTRY;
+
+        atomic_inc(&q->lqs_refcount);
+        CDEBUG(D_QUOTA, "lqs=%p refcount %d\n",
+               q, atomic_read(&q->lqs_refcount));
+
+        RETURN(q);
+}
+
+static void *
+lqs_put(struct hlist_node *hnode)
+{
+        struct lustre_qunit_size *q = 
+            hlist_entry(hnode, struct lustre_qunit_size, lqs_hash);
+        ENTRY;
+
+        LASSERT(atomic_read(&q->lqs_refcount) > 0);
+        atomic_dec(&q->lqs_refcount);
+        CDEBUG(D_QUOTA, "lqs=%p refcount %d\n",
+               q, atomic_read(&q->lqs_refcount));
+
+        RETURN(q);
+}
+
+static void
+lqs_exit(struct hlist_node *hnode)
+{
+        struct lustre_qunit_size *q;
+        ENTRY;
+
+        q = hlist_entry(hnode, struct lustre_qunit_size, lqs_hash);
+        /* 
+         * Nothing should be left. User of lqs put it and
+         * lqs also was deleted from table by this time
+         * so we should have 0 refs.
+         */
+        LASSERTF(atomic_read(&q->lqs_refcount) == 0, 
+                 "Busy lqs %p with %d refs\n", q,
+                 atomic_read(&q->lqs_refcount));
+        OBD_FREE_PTR(q);
+        EXIT;
+}
+
+static lustre_hash_ops_t lqs_hash_ops = {
+        .lh_hash    = lqs_hash,
+        .lh_compare = lqs_compare,
+        .lh_get     = lqs_get,
+        .lh_put     = lqs_put,
+        .lh_exit    = lqs_exit
+};
+#endif /* HAVE_QUOTA_SUPPORT */
index 2cb9c9d..826e9e5 100644 (file)
@@ -36,7 +36,7 @@
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #ifdef __KERNEL__
 # include <linux/version.h>
@@ -44,7 +44,6 @@
 # include <linux/init.h>
 # include <linux/fs.h>
 # include <linux/jbd.h>
-# include <linux/ext3_fs.h>
 # include <linux/quota.h>
 # include <linux/smp_lock.h>
 # include <linux/buffer_head.h>
 #include <lustre_quota.h>
 #include "quota_internal.h"
 
+#ifdef HAVE_QUOTA_SUPPORT
 #ifdef __KERNEL__
-int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
+int mds_quota_ctl(struct obd_device *obd, struct obd_export *unused,
+                  struct obd_quotactl *oqctl)
 {
-        struct obd_device *obd = exp->exp_obd;
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
         int rc = 0;
         ENTRY;
 
+        do_gettimeofday(&work_start);
         switch (oqctl->qc_cmd) {
         case Q_QUOTAON:
                 rc = mds_quota_on(obd, oqctl);
                 break;
         case Q_QUOTAOFF:
-                mds_quota_off(obd, oqctl);
+                rc = mds_quota_off(obd, oqctl);
                 break;
         case Q_SETINFO:
                 rc = mds_set_dqinfo(obd, oqctl);
@@ -93,6 +98,12 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
         case Q_GETOQUOTA:
                 rc = mds_get_obd_quota(obd, oqctl);
                 break;
+        case LUSTRE_Q_INVALIDATE:
+                rc = mds_quota_invalidate(obd, oqctl);
+                break;
+        case LUSTRE_Q_FINVALIDATE:
+                rc = mds_quota_finvalidate(obd, oqctl);
+                break;
         default:
                 CERROR("%s: unsupported mds_quotactl command: %d\n",
                        obd->obd_name, oqctl->qc_cmd);
@@ -103,19 +114,29 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                 CDEBUG(D_INFO, "mds_quotactl admin quota command %d, id %u, "
                                "type %d, failed: rc = %d\n",
                        oqctl->qc_cmd, oqctl->qc_id, oqctl->qc_type, rc);
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_QUOTA_CTL, timediff);
 
         RETURN(rc);
 }
 
-int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
+int filter_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                     struct obd_quotactl *oqctl)
 {
         struct obd_device *obd = exp->exp_obd;
         struct obd_device_target *obt = &obd->u.obt;
         struct lvfs_run_ctxt saved;
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
         int rc = 0;
         ENTRY;
 
+        do_gettimeofday(&work_start);
         switch (oqctl->qc_cmd) {
+        case Q_FINVALIDATE:
         case Q_QUOTAON:
         case Q_QUOTAOFF:
                 if (!atomic_dec_and_test(&obt->obt_quotachecking)) {
@@ -124,6 +145,12 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                         rc = -EBUSY;
                         break;
                 }
+                if (oqctl->qc_cmd == Q_FINVALIDATE &&
+                    (obt->obt_qctxt.lqc_flags & UGQUOTA2LQC(oqctl->qc_type))) {
+                        rc = -EBUSY;
+                        break;
+                }
+                oqctl->qc_id = obt->obt_qfmt; /* override qfmt version */
         case Q_GETOINFO:
         case Q_GETOQUOTA:
         case Q_GETQUOTA:
@@ -137,18 +164,21 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                                                  1);
 
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-                rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
+                rc = fsfilt_quotactl(obd, obt->obt_sb, oqctl);
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
 
-                if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF) {
-                        if (!rc)
-                                obt->obt_qctxt.lqc_status = 
-                                        (oqctl->qc_cmd == Q_QUOTAON) ? 1 : 0;
+                if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF ||
+                    oqctl->qc_cmd == Q_FINVALIDATE) {
+                        if (!rc && oqctl->qc_cmd == Q_QUOTAON)
+                                obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(oqctl->qc_type);
+                        if (!rc && oqctl->qc_cmd == Q_QUOTAOFF)
+                                obt->obt_qctxt.lqc_flags &= ~UGQUOTA2LQC(oqctl->qc_type);
                         atomic_inc(&obt->obt_quotachecking);
                 }
                 break;
         case Q_SETQUOTA:
-                qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, 
+                /* currently, it is only used for nullifying the quota */
+                qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt,
                                          oqctl->qc_id, oqctl->qc_type, 1);
 
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
@@ -170,14 +200,14 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                 LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0);
 
                 /* There might be a pending dqacq/dqrel (which is going to
-                 * clear stale limits on slave). we should wait for it's 
+                 * clear stale limits on slave). we should wait for it's
                  * completion then initialize limits */
-                qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, 
+                qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt,
                                          oqctl->qc_id, oqctl->qc_type, 1);
 
                 if (!oqctl->qc_dqblk.dqb_bhardlimit)
                         goto adjust;
-                
+
                 LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
@@ -200,8 +230,13 @@ adjust:
                 else
                         gid = oqctl->qc_id;
 
-                rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, 
-                                        uid, gid, 1, 0);
+                rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt,
+                                        uid, gid, 1, 0, NULL);
+                if (rc == -EDQUOT || rc == -EBUSY) {
+                        CDEBUG(D_QUOTA, "rc: %d.\n", rc);
+                        rc = 0;
+                }
+
                 break;
                 }
         default:
@@ -209,30 +244,37 @@ adjust:
                        obd->obd_name, oqctl->qc_cmd);
                 RETURN(-EFAULT);
         }
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_QUOTA_CTL, timediff);
 
         RETURN(rc);
 }
 #endif /* __KERNEL__ */
+#endif
 
-int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
+int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                     struct obd_quotactl *oqctl)
 {
-        struct ptlrpc_request *req;
-        struct obd_quotactl   *oqc;
-        int                    ver, opc, rc;
+        struct ptlrpc_request   *req;
+        struct obd_quotactl     *oqc;
+        const struct req_format *rf;
+        int                      ver, opc, rc;
         ENTRY;
 
         if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) {
+                rf  = &RQF_MDS_QUOTACTL;
                 ver = LUSTRE_MDS_VERSION,
                 opc = MDS_QUOTACTL;
         } else if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+                rf  = &RQF_OST_QUOTACTL;
                 ver = LUSTRE_OST_VERSION,
                 opc = OST_QUOTACTL;
         } else {
                 RETURN(-EINVAL);
         }
 
-        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
-                                        &RQF_MDS_QUOTACTL, ver, opc);
+        req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc);
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -242,30 +284,65 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
         ptlrpc_request_set_replen(req);
 
         rc = ptlrpc_queue_wait(req);
-        if (!rc) {
+        if (rc) {
+                CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc);
+                GOTO(out, rc);
+        }
+
+        oqc = NULL;
+        if (req->rq_repmsg)
                 oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
-                if (oqc == NULL)
-                        GOTO(out, rc = -EPROTO);
 
-                *oqctl = *oqc;
+        if (oqc == NULL) {
+                CERROR ("Can't unpack obd_quotactl\n");
+                GOTO(out, rc = -EPROTO);
         }
+
+        *oqctl = *oqc;
+        EXIT;
 out:
         ptlrpc_req_finished(req);
-        RETURN (rc);
+        return rc;
+}
+
+/**
+ * For lmv, only need to send request to master MDT, and the master MDT will
+ * process with other slave MDTs.
+ */
+int lmv_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                  struct obd_quotactl *oqctl)
+{
+        struct obd_device *obd = class_exp2obd(exp);
+        struct lmv_obd *lmv = &obd->u.lmv;
+        struct lmv_tgt_desc *tgt = &lmv->tgts[0];
+        int rc;
+        ENTRY;
+
+        if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) {
+                CERROR("master lmv inactive\n");
+                RETURN(-EIO);
+        }
+
+        rc = obd_quotactl(tgt->ltd_exp, oqctl);
+        RETURN(rc);
 }
 
-int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
+int lov_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                  struct obd_quotactl *oqctl)
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct lov_obd *lov = &obd->u.lov;
         __u64 curspace = 0;
-        __u32 bhardlimit = 0;
+        __u64 bhardlimit = 0;
         int i, rc = 0;
         ENTRY;
 
-        if (oqctl->qc_cmd != Q_QUOTAON && oqctl->qc_cmd != Q_QUOTAOFF &&
-            oqctl->qc_cmd != Q_GETOQUOTA && oqctl->qc_cmd != Q_INITQUOTA &&
-            oqctl->qc_cmd != Q_SETQUOTA) {
+        if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON &&
+            oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF &&
+            oqctl->qc_cmd != Q_GETOQUOTA &&
+            oqctl->qc_cmd != Q_INITQUOTA &&
+            oqctl->qc_cmd != LUSTRE_Q_SETQUOTA &&
+            oqctl->qc_cmd != Q_FINVALIDATE) {
                 CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd);
                 RETURN(-EFAULT);
         }
@@ -277,11 +354,10 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                         if (oqctl->qc_cmd == Q_GETOQUOTA) {
                                 CERROR("ost %d is inactive\n", i);
                                 rc = -EIO;
-                                break;
                         } else {
                                 CDEBUG(D_HA, "ost %d is inactive\n", i);
-                                continue;
                         }
+                        continue;
                 }
 
                 err = obd_quotactl(lov->lov_tgts[i]->ltd_exp, oqctl);
index e035ceb..f4374b7 100644 (file)
@@ -37,7 +37,7 @@
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #ifdef __KERNEL__
 # include <linux/version.h>
 # include <linux/init.h>
 # include <linux/fs.h>
 # include <linux/jbd.h>
-# include <linux/ext3_fs.h>
-# include <linux/smp_lock.h>
-# include <linux/buffer_head.h>
-# include <linux/workqueue.h>
-# include <linux/mount.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/smp_lock.h>
+#  include <linux/buffer_head.h>
+#  include <linux/workqueue.h>
+#  include <linux/mount.h>
+# else
+#  include <linux/locks.h>
+# endif
 #else /* __KERNEL__ */
 # include <liblustre.h>
 #endif
 #include <lprocfs_status.h>
 #include "quota_internal.h"
 
-
 #ifdef __KERNEL__
-/* quota proc file handling functions */
-#ifdef LPROCFS
-int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        LASSERT(obd != NULL);
-
-        return snprintf(page, count, "%lu\n", 
-                        obd->u.obt.obt_qctxt.lqc_bunit_sz);
-}
-EXPORT_SYMBOL(lprocfs_rd_bunit);
-
-int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        LASSERT(obd != NULL);
-
-        return snprintf(page, count, "%lu\n", 
-                        obd->u.obt.obt_qctxt.lqc_iunit_sz);
-}
-EXPORT_SYMBOL(lprocfs_rd_iunit);
-
-int lprocfs_wr_bunit(struct file *file, const char *buffer,
-                     unsigned long count, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        int val, rc;
-        LASSERT(obd != NULL);
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-
-        if (rc)
-                return rc;
-
-        if (val % QUOTABLOCK_SIZE ||
-            val <= obd->u.obt.obt_qctxt.lqc_btune_sz)
-                return -EINVAL;
-
-        obd->u.obt.obt_qctxt.lqc_bunit_sz = val;
-        return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_bunit);
-
-int lprocfs_wr_iunit(struct file *file, const char *buffer,
-                     unsigned long count, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        int val, rc;
-        LASSERT(obd != NULL);
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz)
-                return -EINVAL;
-
-        obd->u.obt.obt_qctxt.lqc_iunit_sz = val;
-        return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_iunit);
-
-int lprocfs_rd_btune(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        LASSERT(obd != NULL);
-
-        return snprintf(page, count, "%lu\n", 
-                        obd->u.obt.obt_qctxt.lqc_btune_sz);
-}
-EXPORT_SYMBOL(lprocfs_rd_btune);
-
-int lprocfs_rd_itune(char *page, char **start, off_t off, int count, 
-                     int *eof, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        LASSERT(obd != NULL);
-
-        return snprintf(page, count, "%lu\n", 
-                        obd->u.obt.obt_qctxt.lqc_itune_sz);
-}
-EXPORT_SYMBOL(lprocfs_rd_itune);
-
-int lprocfs_wr_btune(struct file *file, const char *buffer,
-                     unsigned long count, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        int val, rc;
-        LASSERT(obd != NULL);
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-        
-        if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || 
-            val >= obd->u.obt.obt_qctxt.lqc_bunit_sz)
-                return -EINVAL;
-
-        obd->u.obt.obt_qctxt.lqc_btune_sz = val;
-        return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_btune);
-
-int lprocfs_wr_itune(struct file *file, const char *buffer,
-                     unsigned long count, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        int val, rc;
-        LASSERT(obd != NULL);
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        if (val <= MIN_QLIMIT || 
-            val >= obd->u.obt.obt_qctxt.lqc_iunit_sz)
-                return -EINVAL;
 
-        obd->u.obt.obt_qctxt.lqc_itune_sz = val;
-        return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_itune);
-
-#define USER_QUOTA      1
-#define GROUP_QUOTA     2
-
-#define MAX_STYPE_SIZE  4
-int lprocfs_rd_type(char *page, char **start, off_t off, int count, 
-                    int *eof, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        char stype[MAX_STYPE_SIZE + 1] = "";
-        int type = obd->u.obt.obt_qctxt.lqc_atype;
-        LASSERT(obd != NULL);
-
-        if (type == 0) {
-                strcpy(stype, "off");
-        } else {
-                if (type & USER_QUOTA)
-                        strcat(stype, "u");
-                if (type & GROUP_QUOTA)
-                        strcat(stype, "g");
-        }
-        
-        return snprintf(page, count, "%s\n", stype);
-}
-EXPORT_SYMBOL(lprocfs_rd_type);
+#ifdef HAVE_QUOTA_SUPPORT
 
-static int auto_quota_on(struct obd_device *obd, int type,
-                         struct super_block *sb, int is_master)
-{
-        struct obd_quotactl *oqctl;
-        struct lvfs_run_ctxt saved;
-        int rc;
-        ENTRY;
-
-        LASSERT(type == USRQUOTA || type == GRPQUOTA || type == UGQUOTA);
-
-        /* quota already turned on */
-        if (obd->u.obt.obt_qctxt.lqc_status)
-                RETURN(0);
-
-        OBD_ALLOC_PTR(oqctl);
-        if (!oqctl)
-                RETURN(-ENOMEM);
-
-        oqctl->qc_type = type;
-        oqctl->qc_cmd = Q_QUOTAON;
-        oqctl->qc_id = QFMT_LDISKFS;
-
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        if (!is_master)
-                goto local_quota;
-
-        /* turn on cluster wide quota */
-        rc = mds_admin_quota_on(obd, oqctl);
-        if (rc) {
-                CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, 
-                       "auto-enable admin quota failed. rc=%d\n", rc);
-                GOTO(out_pop, rc);
-        }
-local_quota:
-        /* turn on local quota */
-        rc = fsfilt_quotactl(obd, sb, oqctl);
-        if (rc) {
-                CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, 
-                       "auto-enable local quota failed. rc=%d\n", rc);
-                if (is_master)
-                         mds_quota_off(obd, oqctl);
-        } else {
-                obd->u.obt.obt_qctxt.lqc_status = 1;
-        }
-out_pop:
-        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
-        OBD_FREE_PTR(oqctl);
-        RETURN(rc);
-}
-
-
-int lprocfs_wr_type(struct file *file, const char *buffer,
-                    unsigned long count, void *data)
-{
-        struct obd_device *obd = (struct obd_device *)data;
-        struct obd_device_target *obt = &obd->u.obt;
-        int type = 0;
-        char stype[MAX_STYPE_SIZE + 1] = "";
-        LASSERT(obd != NULL);
-
-        if (copy_from_user(stype, buffer, MAX_STYPE_SIZE))
-                return -EFAULT;
-
-        if (strchr(stype, 'u'))
-                type |= USER_QUOTA;
-        if (strchr(stype, 'g'))
-                type |= GROUP_QUOTA;
-        
-        obt->obt_qctxt.lqc_atype = type;
-
-        if (type == 0)
-                return count;
-
-        if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME))
-                auto_quota_on(obd, type - 1, obt->obt_sb, 1);
-        else if (!strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME))
-                auto_quota_on(obd, type - 1, obt->obt_sb, 0);
-        else 
-                return -EFAULT;
-
-        return count;
-}
-EXPORT_SYMBOL(lprocfs_wr_type);
-#endif /* LPROCFS */
+static cfs_time_t last_print = 0;
+static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED;
 
 static int filter_quota_setup(struct obd_device *obd)
 {
@@ -310,41 +80,73 @@ static int filter_quota_setup(struct obd_device *obd)
         struct obd_device_target *obt = &obd->u.obt;
         ENTRY;
 
+        init_rwsem(&obt->obt_rwsem);
+        obt->obt_qfmt = LUSTRE_QUOTA_V2;
         atomic_set(&obt->obt_quotachecking, 1);
-        rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, NULL);
-        if (rc) {
+        rc = qctxt_init(obd, NULL);
+        if (rc)
                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
-                RETURN(rc);
-        }
 
         RETURN(rc);
 }
 
 static int filter_quota_cleanup(struct obd_device *obd)
 {
+        ENTRY;
         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
-        return 0;
+        RETURN(0);
 }
 
-static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd)
+static int filter_quota_setinfo(struct obd_device *obd, void *data)
 {
+        struct obd_export *exp = data;
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
         struct obd_import *imp;
+        ENTRY;
 
         /* setup the quota context import */
-        obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse;
+        spin_lock(&qctxt->lqc_lock);
+        qctxt->lqc_import = exp->exp_imp_reverse;
+        spin_unlock(&qctxt->lqc_lock);
+        CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n",
+               obd->obd_name,exp->exp_imp_reverse, obd);
 
-        /* make imp's connect flags equal relative exp's connect flags 
+        /* make imp's connect flags equal relative exp's connect flags
          * adding it to avoid the scan export list
          */
-        imp = exp->exp_imp_reverse;
-        if (imp)
-                imp->imp_connect_data.ocd_connect_flags |= 
-                        (exp->exp_connect_flags & OBD_CONNECT_QUOTA64);
+        imp = qctxt->lqc_import;
+        if (likely(imp))
+                imp->imp_connect_data.ocd_connect_flags |=
+                        (exp->exp_connect_flags &
+                         (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS));
 
+        cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster);
         /* start quota slave recovery thread. (release high limits) */
-        qslave_start_recovery(obd, &obd->u.obt.obt_qctxt);
-        return 0;
+        qslave_start_recovery(obd, qctxt);
+        RETURN(0);
 }
+
+static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        ENTRY;
+
+        /* lquota may be not set up before destroying export, b=14896 */
+        if (!obd->obd_set_up)
+                RETURN(0);
+
+        /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import
+         * should be invalid b=12374 */
+        if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) {
+                spin_lock(&qctxt->lqc_lock);
+                qctxt->lqc_import = NULL;
+                spin_unlock(&qctxt->lqc_lock);
+                CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
+                       obd->obd_name, obd);
+        }
+        RETURN(0);
+}
+
 static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
 {
         ENTRY;
@@ -352,10 +154,12 @@ static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
         if (!sb_any_quota_enabled(obd->u.obt.obt_sb))
                 RETURN(0);
 
-        if (ignore)
+        if (ignore) {
+                CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n");
                 cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
-        else
+        } else {
                 cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
+        }
 
         RETURN(0);
 }
@@ -363,6 +167,7 @@ static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore)
 static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
 {
         struct obd_device_target *obt = &obd->u.obt;
+        struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
         int err, cnt, rc = 0;
         struct obd_quotactl *oqctl;
         ENTRY;
@@ -370,15 +175,42 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
         if (!sb_any_quota_enabled(obt->obt_sb))
                 RETURN(0);
 
-        oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
-
         OBD_ALLOC_PTR(oqctl);
         if (!oqctl) {
                 CERROR("Not enough memory!");
                 RETURN(-ENOMEM);
         }
 
+        /* set over quota flags for a uid/gid */
+        oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA;
+        oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA);
+
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                struct quota_adjust_qunit oqaq_tmp;
+                struct lustre_qunit_size *lqs = NULL;
+
+                oqaq_tmp.qaq_flags = cnt;
+                oqaq_tmp.qaq_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid;
+
+                quota_search_lqs(NULL, &oqaq_tmp, qctxt, &lqs);
+                if (lqs) {
+                        spin_lock(&lqs->lqs_lock);
+                        if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) {
+                                oa->o_flags |= (cnt == USRQUOTA) ?
+                                        OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
+                                spin_unlock(&lqs->lqs_lock);
+                                CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), "
+                                       "sync_blk(%d)\n", lqs->lqs_bunit_sz,
+                                       qctxt->lqc_sync_blk);
+                                /* this is for quota_search_lqs */
+                                lqs_putref(lqs);
+                                continue;
+                        }
+                        spin_unlock(&lqs->lqs_lock);
+                        /* this is for quota_search_lqs */
+                        lqs_putref(lqs);
+                }
+
                 memset(oqctl, 0, sizeof(*oqctl));
 
                 oqctl->qc_cmd = Q_GETQUOTA;
@@ -388,14 +220,13 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
                 if (err) {
                         if (!rc)
                                 rc = err;
+                        oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA :
+                                                             OBD_MD_FLGRPQUOTA);
                         continue;
                 }
 
-                /* set over quota flags for a uid/gid */
-                oa->o_valid |= (cnt == USRQUOTA) ?
-                               OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA;
                 if (oqctl->qc_dqblk.dqb_bhardlimit &&
-                   (toqb(oqctl->qc_dqblk.dqb_curspace) >
+                   (toqb(oqctl->qc_dqblk.dqb_curspace) >=
                     oqctl->qc_dqblk.dqb_bhardlimit))
                         oa->o_flags |= (cnt == USRQUOTA) ?
                                 OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA;
@@ -404,58 +235,288 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa)
         RETURN(rc);
 }
 
-static int filter_quota_acquire(struct obd_device *obd, unsigned int uid,
-                                unsigned int gid)
+/**
+ * check whether the left quota of certain uid and gid can satisfy a block_write
+ * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA
+ */
+static int quota_check_common(struct obd_device *obd, unsigned int uid,
+                              unsigned int gid, int count, int cycle, int isblk)
 {
         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
-        int rc;
+        int i;
+        __u32 id[MAXQUOTAS] = { uid, gid };
+        struct qunit_data qdata[MAXQUOTAS];
+        int rc = 0, rc2[2] = { 0, 0 };
         ENTRY;
 
-        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 1);
-        RETURN(rc == -EAGAIN);
-}
+        CLASSERT(MAXQUOTAS < 4);
+        if (!sb_any_quota_enabled(qctxt->lqc_sb))
+                RETURN(rc);
 
-static int mds_quota_init(void)
-{
-        return lustre_dquot_init();
+        spin_lock(&qctxt->lqc_lock);
+        if (!qctxt->lqc_valid){
+                spin_unlock(&qctxt->lqc_lock);
+                RETURN(rc);
+        }
+        spin_unlock(&qctxt->lqc_lock);
+
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct lustre_qunit_size *lqs = NULL;
+
+                qdata[i].qd_id = id[i];
+                qdata[i].qd_flags = i;
+                if (isblk)
+                        QDATA_SET_BLK(&qdata[i]);
+                qdata[i].qd_count = 0;
+
+                /* ignore root user */
+                if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
+                        continue;
+
+                quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
+                if (!lqs)
+                        continue;
+
+                rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk);
+                spin_lock(&lqs->lqs_lock);
+                if (!cycle) {
+                        rc = QUOTA_RET_INC_PENDING;
+                        if (isblk)
+                                lqs->lqs_bwrite_pending += count;
+                        else
+                                lqs->lqs_iwrite_pending += count;
+                }
+                if (rc2[i] == QUOTA_RET_OK) {
+                        if (isblk && qdata[i].qd_count <
+                            lqs->lqs_bwrite_pending * CFS_PAGE_SIZE)
+                                rc2[i] = QUOTA_RET_ACQUOTA;
+                        if (!isblk && qdata[i].qd_count <
+                            lqs->lqs_iwrite_pending)
+                                rc2[i] = QUOTA_RET_ACQUOTA;
+                }
+                spin_unlock(&lqs->lqs_lock);
+                CDEBUG(D_QUOTA, "count: %d, write pending: %lu, qd_count: "LPU64
+                       ".\n", count,
+                       isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending,
+                       qdata[i].qd_count);
+
+                /* When cycle is zero, lqs_*_pending will be changed. We will
+                 * get reference of the lqs here and put reference of lqs in
+                 * quota_pending_commit b=14784 */
+                if (!cycle)
+                        lqs_getref(lqs);
+
+                /* this is for quota_search_lqs */
+                lqs_putref(lqs);
+        }
+
+        if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA)
+                RETURN(rc | QUOTA_RET_ACQUOTA);
+        else
+                RETURN(rc);
 }
 
-static int mds_quota_exit(void)
+static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid,
+                                unsigned int gid, int count, int *pending,
+                                quota_acquire acquire,
+                                struct obd_trans_info *oti, int isblk)
 {
-        lustre_dquot_exit();
-        return 0;
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
+        struct l_wait_info lwi = { 0 };
+        int rc = 0, cycle = 0, count_err = 1;
+        ENTRY;
+
+        CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name);
+        *pending = 0;
+        /* Unfortunately, if quota master is too busy to handle the
+         * pre-dqacq in time and quota hash on ost is used up, we
+         * have to wait for the completion of in flight dqacq/dqrel,
+         * in order to get enough quota for write b=12588 */
+        do_gettimeofday(&work_start);
+        while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk)) &
+               QUOTA_RET_ACQUOTA) {
+
+                spin_lock(&qctxt->lqc_lock);
+                if (!qctxt->lqc_import && oti) {
+                        spin_unlock(&qctxt->lqc_lock);
+
+                        LASSERT(oti && oti->oti_thread &&
+                                oti->oti_thread->t_watchdog);
+
+                        lc_watchdog_disable(oti->oti_thread->t_watchdog);
+                        CDEBUG(D_QUOTA, "sleep for quota master\n");
+                        l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt),
+                                     &lwi);
+                        CDEBUG(D_QUOTA, "wake up when quota master is back\n");
+                        lc_watchdog_touch(oti->oti_thread->t_watchdog);
+                } else {
+                        spin_unlock(&qctxt->lqc_lock);
+                }
+
+                if (rc & QUOTA_RET_INC_PENDING)
+                        *pending = 1;
+
+                cycle++;
+                if (isblk)
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90);
+                /* after acquire(), we should run quota_check_common again
+                 * so that we confirm there are enough quota to finish write */
+                rc = acquire(obd, uid, gid, oti, isblk);
+
+                /* please reference to dqacq_completion for the below */
+                /* a new request is finished, try again */
+                if (rc == -EAGAIN) {
+                        CDEBUG(D_QUOTA, "finish a quota req, try again\n");
+                        continue;
+                }
+
+                /* it is out of quota already */
+                if (rc == -EDQUOT) {
+                        CDEBUG(D_QUOTA, "out of quota,  return -EDQUOT\n");
+                        break;
+                }
+
+                /* -EBUSY and others, wait a second and try again */
+                if (rc < 0) {
+                        cfs_waitq_t        waitq;
+                        struct l_wait_info lwi;
+
+                        if (oti && oti->oti_thread && oti->oti_thread->t_watchdog)
+                                lc_watchdog_touch(oti->oti_thread->t_watchdog);
+                        CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc,
+                               count_err++);
+
+                        init_waitqueue_head(&waitq);
+                        lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL,
+                                          NULL);
+                        l_wait_event(waitq, 0, &lwi);
+                }
+
+                if (rc < 0 || cycle % 10 == 2) {
+                        spin_lock(&last_print_lock);
+                        if (last_print == 0 ||
+                            cfs_time_before((last_print + cfs_time_seconds(30)),
+                                            cfs_time_current())) {
+                                last_print = cfs_time_current();
+                                spin_unlock(&last_print_lock);
+                                CWARN("still haven't managed to acquire quota "
+                                      "space from the quota master after %d "
+                                      "retries (err=%d, rc=%d)\n",
+                                      cycle, count_err - 1, rc);
+                        } else {
+                                spin_unlock(&last_print_lock);
+                        }
+                }
+
+                CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc,
+                       cycle);
+        }
+
+        if (!cycle && rc & QUOTA_RET_INC_PENDING)
+                *pending = 1;
+
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats,
+                            isblk ? LQUOTA_WAIT_FOR_CHK_BLK :
+                                    LQUOTA_WAIT_FOR_CHK_INO,
+                            timediff);
+
+        RETURN(rc);
 }
 
-/* check whether the left quota of certain uid and uid can satisfy a write rpc
- * when need to acquire quota, return QUOTA_RET_ACQUOTA */
-static int filter_quota_check(struct obd_device *obd, unsigned int uid, 
-                              unsigned int gid, int npage)
+/**
+ * when a block_write or inode_create rpc is finished, adjust the record for
+ * pending blocks and inodes
+ */
+static int quota_pending_commit(struct obd_device *obd, unsigned int uid,
+                                unsigned int gid, int count, int isblk)
 {
         struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        struct timeval work_start;
+        struct timeval work_end;
+        long timediff;
         int i;
         __u32 id[MAXQUOTAS] = { uid, gid };
         struct qunit_data qdata[MAXQUOTAS];
-        int rc;
         ENTRY;
 
+        CDEBUG(D_QUOTA, "commit pending quota for  %s\n", obd->obd_name);
         CLASSERT(MAXQUOTAS < 4);
         if (!sb_any_quota_enabled(qctxt->lqc_sb))
                 RETURN(0);
 
+        do_gettimeofday(&work_start);
         for (i = 0; i < MAXQUOTAS; i++) {
+                struct lustre_qunit_size *lqs = NULL;
+
                 qdata[i].qd_id = id[i];
                 qdata[i].qd_flags = i;
-                qdata[i].qd_flags |= QUOTA_IS_BLOCK;
+                if (isblk)
+                        QDATA_SET_BLK(&qdata[i]);
                 qdata[i].qd_count = 0;
 
-                qctxt_wait_pending_dqacq(qctxt, id[i], i, 1);
-                rc = compute_remquota(obd, qctxt, &qdata[i]);
-                if (rc == QUOTA_RET_OK && 
-                    qdata[i].qd_count < npage * CFS_PAGE_SIZE)
-                        RETURN(QUOTA_RET_ACQUOTA);
+                if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i]))
+                        continue;
+
+                quota_search_lqs(&qdata[i], NULL, qctxt, &lqs);
+                if (lqs) {
+                        int flag = 0;
+                        CDEBUG(D_QUOTA, "pending: %lu, count: %d.\n",
+                               isblk ? lqs->lqs_bwrite_pending :
+                               lqs->lqs_iwrite_pending, count);
+                        spin_lock(&lqs->lqs_lock);
+                        if (isblk) {
+                                if (lqs->lqs_bwrite_pending >= count) {
+                                        lqs->lqs_bwrite_pending -= count;
+                                        spin_unlock(&lqs->lqs_lock);
+                                        flag = 1;
+                                } else {
+                                        spin_unlock(&lqs->lqs_lock);
+                                        CDEBUG(D_ERROR,
+                                               "there are too many blocks!\n");
+                                }
+                        } else {
+                                if (lqs->lqs_iwrite_pending >= count) {
+                                        lqs->lqs_iwrite_pending -= count;
+                                        spin_unlock(&lqs->lqs_lock);
+                                        flag = 1;
+                                } else {
+                                        spin_unlock(&lqs->lqs_lock);
+                                        CDEBUG(D_ERROR,
+                                               "there are too many files!\n");
+                                }
+                        }
+
+                        lqs_putref(lqs);
+                        /* When lqs_*_pening is changed back, we'll putref lqs
+                         * here b=14784 */
+                        if (flag)
+                                lqs_putref(lqs);
+                }
         }
+        do_gettimeofday(&work_end);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
+        lprocfs_counter_add(qctxt->lqc_stats,
+                            isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK :
+                                    LQUOTA_WAIT_FOR_COMMIT_INO,
+                            timediff);
 
-        RETURN(rc);
+        RETURN(0);
+}
+
+static int mds_quota_init(void)
+{
+        return lustre_dquot_init();
+}
+
+static int mds_quota_exit(void)
+{
+        lustre_dquot_exit();
+        return 0;
 }
 
 static int mds_quota_setup(struct obd_device *obd)
@@ -465,41 +526,69 @@ static int mds_quota_setup(struct obd_device *obd)
         int rc;
         ENTRY;
 
+        init_rwsem(&obt->obt_rwsem);
+        obt->obt_qfmt = LUSTRE_QUOTA_V2;
+        mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2;
         atomic_set(&obt->obt_quotachecking, 1);
         /* initialize quota master and quota context */
         sema_init(&mds->mds_qonoff_sem, 1);
-        rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, dqacq_handler);
+        rc = qctxt_init(obd, dqacq_handler);
         if (rc) {
                 CERROR("initialize quota context failed! (rc:%d)\n", rc);
                 RETURN(rc);
         }
-
+        mds->mds_quota = 1;
         RETURN(rc);
 }
 
 static int mds_quota_cleanup(struct obd_device *obd)
 {
+        ENTRY;
+        obd->u.mds.mds_quota = 0;
         qctxt_cleanup(&obd->u.obt.obt_qctxt, 0);
         RETURN(0);
 }
 
+static int mds_quota_setinfo(struct obd_device *obd, void *data)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        ENTRY;
+
+        if (data != NULL)
+                QUOTA_MASTER_READY(qctxt);
+        else
+                QUOTA_MASTER_UNREADY(qctxt);
+        RETURN(0);
+}
+
 static int mds_quota_fs_cleanup(struct obd_device *obd)
 {
         struct mds_obd *mds = &obd->u.mds;
-        int i;
+        struct obd_quotactl oqctl;
         ENTRY;
 
-        /* close admin quota files */
+        memset(&oqctl, 0, sizeof(oqctl));
+        oqctl.qc_type = UGQUOTA;
+
         down(&mds->mds_qonoff_sem);
-        for (i = 0; i < MAXQUOTAS; i++) {
-                if (mds->mds_quota_info.qi_files[i]) {
-                        filp_close(mds->mds_quota_info.qi_files[i], 0);
-                        mds->mds_quota_info.qi_files[i] = NULL;
-                }
-        }
+        mds_admin_quota_off(obd, &oqctl);
         up(&mds->mds_qonoff_sem);
         RETURN(0);
 }
+
+static int quota_acquire_common(struct obd_device *obd, unsigned int uid,
+                                unsigned int gid, struct obd_trans_info *oti,
+                                int isblk)
+{
+        struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt;
+        int rc;
+        ENTRY;
+
+        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, isblk, 1, oti);
+        RETURN(rc);
+}
+
+#endif /* HAVE_QUOTA_SUPPORT */
 #endif /* __KERNEL__ */
 
 struct osc_quota_info {
@@ -548,6 +637,7 @@ static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
 {
         unsigned int hashent = hashfn(cli, id, type);
         struct osc_quota_info *oqi;
+        ENTRY;
 
         LASSERT_SPIN_LOCKED(&qinfo_list_lock);
         list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
@@ -555,7 +645,7 @@ static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
                     oqi->oqi_id == id && oqi->oqi_type == type)
                         return oqi;
         }
-        return NULL;
+        RETURN(NULL);
 }
 
 static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
@@ -581,8 +671,7 @@ static void free_qinfo(struct osc_quota_info *oqi)
         OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
 }
 
-int osc_quota_chkdq(struct client_obd *cli,
-                    unsigned int uid, unsigned int gid)
+int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid)
 {
         unsigned int id;
         int cnt, rc = QUOTA_OK;
@@ -604,8 +693,7 @@ int osc_quota_chkdq(struct client_obd *cli,
         RETURN(rc);
 }
 
-int osc_quota_setdq(struct client_obd *cli,
-                    unsigned int uid, unsigned int gid,
+int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid,
                     obd_flag valid, obd_flag flags)
 {
         unsigned int id;
@@ -713,6 +801,7 @@ int osc_quota_exit(void)
 }
 
 #ifdef __KERNEL__
+#ifdef HAVE_QUOTA_SUPPORT
 quota_interface_t mds_quota_interface = {
         .quota_init     = mds_quota_init,
         .quota_exit     = mds_quota_exit,
@@ -720,9 +809,13 @@ quota_interface_t mds_quota_interface = {
         .quota_cleanup  = mds_quota_cleanup,
         .quota_check    = target_quota_check,
         .quota_ctl      = mds_quota_ctl,
-        .quota_fs_cleanup       =mds_quota_fs_cleanup,
+        .quota_setinfo  = mds_quota_setinfo,
+        .quota_fs_cleanup = mds_quota_fs_cleanup,
         .quota_recovery = mds_quota_recovery,
         .quota_adjust   = mds_quota_adjust,
+        .quota_chkquota = quota_chk_acq_common,
+        .quota_acquire  = quota_acquire_common,
+        .quota_pending_commit = quota_pending_commit,
 };
 
 quota_interface_t filter_quota_interface = {
@@ -731,12 +824,16 @@ quota_interface_t filter_quota_interface = {
         .quota_check    = target_quota_check,
         .quota_ctl      = filter_quota_ctl,
         .quota_setinfo  = filter_quota_setinfo,
+        .quota_clearinfo = filter_quota_clearinfo,
         .quota_enforce  = filter_quota_enforce,
         .quota_getflag  = filter_quota_getflag,
-        .quota_acquire  = filter_quota_acquire,
+        .quota_acquire  = quota_acquire_common,
         .quota_adjust   = filter_quota_adjust,
-        .quota_chkquota = filter_quota_check,
+        .quota_chkquota = quota_chk_acq_common,
+        .quota_adjust_qunit   = filter_quota_adjust_qunit,
+        .quota_pending_commit = quota_pending_commit,
 };
+#endif
 #endif /* __KERNEL__ */
 
 quota_interface_t mdc_quota_interface = {
@@ -745,6 +842,11 @@ quota_interface_t mdc_quota_interface = {
         .quota_poll_check = client_quota_poll_check,
 };
 
+quota_interface_t lmv_quota_interface = {
+        .quota_ctl      = lmv_quota_ctl,
+        .quota_check    = lmv_quota_check,
+};
+
 quota_interface_t osc_quota_interface = {
         .quota_ctl      = client_quota_ctl,
         .quota_check    = client_quota_check,
@@ -754,22 +856,42 @@ quota_interface_t osc_quota_interface = {
         .quota_chkdq    = osc_quota_chkdq,
         .quota_setdq    = osc_quota_setdq,
         .quota_cleanup  = osc_quota_cleanup,
+        .quota_adjust_qunit = client_quota_adjust_qunit,
 };
 
 quota_interface_t lov_quota_interface = {
-        .quota_check    = lov_quota_check,
         .quota_ctl      = lov_quota_ctl,
+        .quota_check    = lov_quota_check,
+        .quota_adjust_qunit = lov_quota_adjust_qunit,
 };
 
 #ifdef __KERNEL__
+
+cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL;
+
 static int __init init_lustre_quota(void)
 {
-        int rc = qunit_cache_init();
+#ifdef HAVE_QUOTA_SUPPORT
+        int rc = 0;
+
+        lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME,
+                                                proc_lustre_root,
+                                                NULL, NULL);
+        if (IS_ERR(lquota_type_proc_dir)) {
+                CERROR("LProcFS failed in lquota-init\n");
+                rc = PTR_ERR(lquota_type_proc_dir);
+                return rc;
+        }
+
+        rc = qunit_cache_init();
         if (rc)
                 return rc;
+
         PORTAL_SYMBOL_REGISTER(filter_quota_interface);
         PORTAL_SYMBOL_REGISTER(mds_quota_interface);
+#endif
         PORTAL_SYMBOL_REGISTER(mdc_quota_interface);
+        PORTAL_SYMBOL_REGISTER(lmv_quota_interface);
         PORTAL_SYMBOL_REGISTER(osc_quota_interface);
         PORTAL_SYMBOL_REGISTER(lov_quota_interface);
         return 0;
@@ -777,13 +899,19 @@ static int __init init_lustre_quota(void)
 
 static void /*__exit*/ exit_lustre_quota(void)
 {
-        PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
-        PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface);
+        PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(osc_quota_interface);
         PORTAL_SYMBOL_UNREGISTER(lov_quota_interface);
+#ifdef HAVE_QUOTA_SUPPORT
+        PORTAL_SYMBOL_UNREGISTER(filter_quota_interface);
+        PORTAL_SYMBOL_UNREGISTER(mds_quota_interface);
 
         qunit_cache_cleanup();
+
+        if (lquota_type_proc_dir)
+                lprocfs_remove(&lquota_type_proc_dir);
+#endif
 }
 
 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
@@ -792,9 +920,12 @@ MODULE_LICENSE("GPL");
 
 cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota);
 
+#ifdef HAVE_QUOTA_SUPPORT
 EXPORT_SYMBOL(mds_quota_interface);
 EXPORT_SYMBOL(filter_quota_interface);
+#endif
 EXPORT_SYMBOL(mdc_quota_interface);
+EXPORT_SYMBOL(lmv_quota_interface);
 EXPORT_SYMBOL(osc_quota_interface);
 EXPORT_SYMBOL(lov_quota_interface);
 #endif /* __KERNEL */
index d896fa7..e9073be 100644 (file)
 
 #include <lustre_quota.h>
 
+#ifdef HAVE_QUOTA_SUPPORT
+
 /* QUSG covnert bytes to blocks when counting block quota */
 #define QUSG(count, isblk)      (isblk ? toqb(count) : count)
 
-/* This flag is set in qc_stat to distinguish if the current getquota 
+/* This flag is set in qc_stat to distinguish if the current getquota
  * operation is for quota recovery */
 #define QUOTA_RECOVERING    0x01
+#define OBD_LQUOTA_DEVICENAME  "lquota"
 
 #ifdef __KERNEL__
 
 #define DQUOT_DEBUG(dquot, fmt, arg...)                                       \
         CDEBUG(D_QUOTA, "refcnt(%u) id(%u) type(%u) off(%llu) flags(%lu) "    \
-               "bhardlimit(%u) curspace("LPX64") ihardlimit(%u) "             \
-               "curinodes(%u): " fmt, dquot->dq_refcnt,                       \
+               "bhardlimit("LPU64") curspace("LPU64") ihardlimit("LPU64") "   \
+               "curinodes("LPU64"): " fmt, dquot->dq_refcnt,                  \
                dquot->dq_id, dquot->dq_type, dquot->dq_off,  dquot->dq_flags, \
                dquot->dq_dqb.dqb_bhardlimit, dquot->dq_dqb.dqb_curspace,      \
                dquot->dq_dqb.dqb_ihardlimit, dquot->dq_dqb.dqb_curinodes,     \
                qinfo->qi_info[1].dqi_free_entry, ## arg);
 
 #define QDATA_DEBUG(qd, fmt, arg...)                                          \
-        CDEBUG(D_QUOTA, "id(%u) type(%lu) count("LPU64") isblk(%lu):"         \
-               fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count,     \
-               (qd->qd_flags & QUOTA_IS_BLOCK) >> 1,       \
+        CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) isblk(%c) count("LPU64") "  \
+               "qd_qunit("LPU64"): " fmt, qd->qd_id, qd->qd_flags,            \
+               QDATA_IS_GRP(qd) ? 'g' : 'u', QDATA_IS_BLK(qd) ? 'b': 'i',     \
+               qd->qd_count,                                                  \
+               (QDATA_IS_ADJBLK(qd) | QDATA_IS_ADJINO(qd)) ? qd->qd_qunit : 0,\
                ## arg);
 
+#define QAQ_DEBUG(qaq, fmt, arg...)                                           \
+        CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) bunit("LPU64") "            \
+               "iunit("LPU64"): " fmt, qaq->qaq_id, qaq->qaq_flags,           \
+               QAQ_IS_GRP(qaq) ? 'g': 'u', qaq->qaq_bunit_sz,                 \
+               qaq->qaq_iunit_sz, ## arg);
+
+#define LQS_DEBUG(lqs, fmt, arg...)                                           \
+        CDEBUG(D_QUOTA, "lqs(%p) id(%u) flag(%lu) type(%c) bunit(%lu) "       \
+               "btune(%lu) iunit(%lu) itune(%lu) lqs_bwrite_pending(%lu) "    \
+               "lqs_iwrite_pending(%lu) ino_rec("LPD64") blk_rec("LPD64" ) "  \
+               "refcount(%d): "                                               \
+               fmt, lqs, lqs->lqs_id, lqs->lqs_flags,                         \
+               LQS_IS_GRP(lqs) ? 'g' : 'u',                                   \
+               lqs->lqs_bunit_sz, lqs->lqs_btune_sz, lqs->lqs_iunit_sz,       \
+               lqs->lqs_itune_sz, lqs->lqs_bwrite_pending,                    \
+               lqs->lqs_iwrite_pending, lqs->lqs_ino_rec,                     \
+               lqs->lqs_blk_rec, atomic_read(&lqs->lqs_refcount), ## arg);
+
 
 /* quota_context.c */
 void qunit_cache_cleanup(void);
 int qunit_cache_init(void);
 int qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt,
-                       uid_t uid, gid_t gid, __u32 isblk, int wait);
+                       uid_t uid, gid_t gid, __u32 isblk, int wait,
+                       struct obd_trans_info *oti);
 int qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id,
                              unsigned short type, int isblk);
-int qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb,
-               dqacq_handler_t handler);
+int qctxt_init(struct obd_device *obd, dqacq_handler_t handler);
 void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force);
-void qslave_start_recovery(struct obd_device *obd, 
+void qslave_start_recovery(struct obd_device *obd,
                            struct lustre_quota_ctxt *qctxt);
 int compute_remquota(struct obd_device *obd,
-                     struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata);
+                     struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata,
+                     int isblk);
+int check_qm(struct lustre_quota_ctxt *qctxt);
 /* quota_master.c */
 int lustre_dquot_init(void);
 void lustre_dquot_exit(void);
@@ -97,27 +122,89 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[],
 int filter_quota_adjust(struct obd_device *obd, unsigned int qcids[],
                         unsigned int qpids[], int rc, int opc);
 int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_quota_get_version(struct obd_device *obd, lustre_quota_version_t *ver);
+int mds_quota_invalidate(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_quota_finvalidate(struct obd_device *obd, struct obd_quotactl *oqctl);
+
 int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl);
+int mds_admin_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl);
 int mds_quota_recovery(struct obd_device *obd);
 int mds_get_obd_quota(struct obd_device *obd, struct obd_quotactl *oqctl);
+int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt, struct lustre_dquot
+                      *dquot, __u32 ost_num, __u32 mdt_num, int type,
+                      struct quota_adjust_qunit *oqaq);
 #endif
 
 /* quota_ctl.c */
-int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl);
-int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl);
-int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl);
-int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl);
+int mds_quota_ctl(struct obd_device *obd, struct obd_export *exp,
+                  struct obd_quotactl *oqctl);
+int filter_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                     struct obd_quotactl *oqctl);
 
 /* quota_chk.c */
-int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl);
-int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl);
-int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl);
-int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
+int target_quota_check(struct obd_device *obd, struct obd_export *exp,
+                       struct obd_quotactl *oqctl);
+
+int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct
+                          lustre_quota_ctxt *qctxt);
+void qdata_to_oqaq(struct qunit_data *qdata,
+                   struct quota_adjust_qunit *oqaq);
+#ifdef __KERNEL__
+int quota_search_lqs(struct qunit_data *qdata,
+                     struct quota_adjust_qunit *oqaq,
+                     struct lustre_quota_ctxt *qctxt,
+                     struct lustre_qunit_size **lqs_return);
+int quota_create_lqs(struct qunit_data *qdata,
+                     struct quota_adjust_qunit *oqaq,
+                     struct lustre_quota_ctxt *qctxt,
+                     struct lustre_qunit_size **lqs_return);
+void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs,
+                       int is_chk, int is_acq);
+
 
+extern int quote_get_qdata(struct ptlrpc_request *req, struct qunit_data *qdata,
+                           int is_req, int is_exp);
+extern int quote_copy_qdata(struct ptlrpc_request *req, struct qunit_data *qdata,
+                            int is_req, int is_exp);
+int filter_quota_adjust_qunit(struct obd_export *exp,
+                              struct quota_adjust_qunit *oqaq,
+                              struct lustre_quota_ctxt *qctxt);
+int lquota_proc_setup(struct obd_device *obd, int is_master);
+int lquota_proc_cleanup(struct lustre_quota_ctxt *qctxt);
+
+extern cfs_proc_dir_entry_t *lquota_type_proc_dir;
+#endif
+
+#define LQS_BLK_DECREASE 1
+#define LQS_BLK_INCREASE 2
+#define LQS_INO_DECREASE 4
+#define LQS_INO_INCREASE 8
+
+
+#endif
+int client_quota_adjust_qunit(struct obd_export *exp,
+                              struct quota_adjust_qunit *oqaq,
+                              struct lustre_quota_ctxt *qctxt);
+int lov_quota_adjust_qunit(struct obd_export *exp,
+                           struct quota_adjust_qunit *oqaq,
+                           struct lustre_quota_ctxt *qctxt);
+int client_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                     struct obd_quotactl *oqctl);
+int lmv_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                  struct obd_quotactl *oqctl);
+int lov_quota_ctl(struct obd_device *unused, struct obd_export *exp,
+                  struct obd_quotactl *oqctl);
+int client_quota_check(struct obd_device *unused, struct obd_export *exp,
+                       struct obd_quotactl *oqctl);
+int lmv_quota_check(struct obd_device *unused, struct obd_export *exp,
+                    struct obd_quotactl *oqctl);
+int lov_quota_check(struct obd_device *unused, struct obd_export *exp,
+                    struct obd_quotactl *oqctl);
+int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
 #endif
index 5ffdf8d..512fc49 100644 (file)
@@ -44,7 +44,7 @@
 # define EXPORT_SYMTAB
 #endif
 
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_LQUOTA
 
 #include <linux/version.h>
 #include <linux/fs.h>
@@ -62,8 +62,9 @@
 
 #include "quota_internal.h"
 
-/* lock ordering: 
- * mds->mds_qonoff_sem > dquot->dq_sem */
+#ifdef HAVE_QUOTA_SUPPORT
+
+/* lock ordering: mds->mds_qonoff_sem > dquot->dq_sem */
 static struct list_head lustre_dquot_hash[NR_DQHASH];
 static spinlock_t dquot_hash_lock = SPIN_LOCK_UNLOCKED;
 
@@ -198,7 +199,7 @@ static struct lustre_dquot *lustre_dqget(struct obd_device *obd,
 
         if ((empty = alloc_dquot(lqi, id, type)) == NULL)
                 RETURN(ERR_PTR(-ENOMEM));
-        
+
         spin_lock(&dquot_hash_lock);
         if ((dquot = find_dquot(hashent, lqi, id, type)) != NULL) {
                 dquot->dq_refcnt++;
@@ -226,24 +227,134 @@ static struct lustre_dquot *lustre_dqget(struct obd_device *obd,
         RETURN(dquot);
 }
 
+static void init_oqaq(struct quota_adjust_qunit *oqaq,
+                      struct lustre_quota_ctxt *qctxt,
+                      qid_t id, int type)
+{
+        struct lustre_qunit_size *lqs = NULL;
+
+        oqaq->qaq_id = id;
+        oqaq->qaq_flags = type;
+        quota_search_lqs(NULL, oqaq, qctxt, &lqs);
+        if (lqs) {
+                spin_lock(&lqs->lqs_lock);
+                oqaq->qaq_bunit_sz = lqs->lqs_bunit_sz;
+                oqaq->qaq_iunit_sz = lqs->lqs_iunit_sz;
+                oqaq->qaq_flags    = lqs->lqs_flags;
+                spin_unlock(&lqs->lqs_lock);
+                lqs_putref(lqs);
+        } else {
+                CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n");
+                oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz;
+                oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz;
+        }
+}
+
+int dqacq_adjust_qunit_sz(struct obd_device *obd, qid_t id, int type,
+                          __u32 is_blk)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt;
+        struct obd_device *lov_mds_obd = class_exp2obd(mds->mds_osc_exp);
+        struct lov_obd *lov = &lov_mds_obd->u.lov;
+        __u32 ost_num = lov->desc.ld_tgt_count, mdt_num = 1;
+        struct quota_adjust_qunit *oqaq = NULL;
+        unsigned int uid = 0, gid = 0;
+        struct lustre_quota_info *info = &mds->mds_quota_info;
+        struct lustre_dquot *dquot = NULL;
+        int adjust_res = 0;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(mds);
+        dquot = lustre_dqget(obd, info, id, type);
+        if (IS_ERR(dquot))
+                RETURN(PTR_ERR(dquot));
+
+        OBD_ALLOC_PTR(oqaq);
+        if (!oqaq)
+                GOTO(out, rc = -ENOMEM);
+
+        down(&dquot->dq_sem);
+        init_oqaq(oqaq, qctxt, id, type);
+
+        rc = dquot_create_oqaq(qctxt, dquot, ost_num, mdt_num,
+                               is_blk ? LQUOTA_FLAGS_ADJBLK :
+                               LQUOTA_FLAGS_ADJINO, oqaq);
+
+        if (rc < 0) {
+                CDEBUG(D_ERROR, "create oqaq failed! (rc:%d)\n", rc);
+                GOTO(out_sem, rc);
+        }
+        QAQ_DEBUG(oqaq, "show oqaq.\n")
+
+        if (!QAQ_IS_ADJBLK(oqaq) && !QAQ_IS_ADJINO(oqaq))
+                GOTO(out_sem, rc);
+
+        /* adjust the mds slave qunit size */
+        adjust_res = quota_adjust_slave_lqs(oqaq, qctxt);
+        if (adjust_res <= 0) {
+                if (adjust_res < 0) {
+                        rc = adjust_res;
+                        CDEBUG(D_ERROR, "adjust mds slave's qunit size failed! \
+                               (rc:%d)\n", rc);
+                } else {
+                        CDEBUG(D_QUOTA, "qunit doesn't need to be adjusted.\n");
+                }
+                GOTO(out_sem, rc);
+        }
+
+        if (type)
+                gid = dquot->dq_id;
+        else
+                uid = dquot->dq_id;
+
+        up(&dquot->dq_sem);
+
+        rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, is_blk, 0, NULL);
+        if (rc == -EDQUOT || rc == -EBUSY) {
+                CDEBUG(D_QUOTA, "rc: %d.\n", rc);
+                rc = 0;
+        }
+        if (rc) {
+                CDEBUG(D_ERROR, "mds fail to adjust file quota! \
+                               (rc:%d)\n", rc);
+                GOTO(out, rc);
+        }
+
+        /* only when block qunit is reduced, boardcast to osts */
+        if ((adjust_res & LQS_BLK_DECREASE) && QAQ_IS_ADJBLK(oqaq))
+                rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq, qctxt);
+
+out:
+        lustre_dqput(dquot);
+        if (oqaq)
+                OBD_FREE_PTR(oqaq);
+
+        RETURN(rc);
+out_sem:
+       up(&dquot->dq_sem);
+       goto out;
+}
+
 int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
 {
         struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt;
         struct lustre_quota_info *info = &mds->mds_quota_info;
         struct lustre_dquot *dquot = NULL;
         __u64 *usage = NULL;
-        __u32 hlimit = 0, slimit = 0;
-        __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP;
-        __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1;
+        __u64 hlimit = 0, slimit = 0;
         time_t *time = NULL;
         unsigned int grace = 0;
+        struct lustre_qunit_size *lqs = NULL;
         int rc = 0;
         ENTRY;
 
         if (OBD_FAIL_CHECK(OBD_FAIL_OBD_DQACQ))
                 RETURN(-EIO);
 
-        dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type);
+        dquot = lustre_dqget(obd, info, qdata->qd_id, QDATA_IS_GRP(qdata));
         if (IS_ERR(dquot))
                 RETURN(PTR_ERR(dquot));
 
@@ -258,14 +369,14 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
                 GOTO(out, rc = -EBUSY);
         }
 
-        if (is_blk) {
-                grace = info->qi_info[qdata_type].dqi_bgrace;
+        if (QDATA_IS_BLK(qdata)) {
+                grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_bgrace;
                 usage = &dquot->dq_dqb.dqb_curspace;
                 hlimit = dquot->dq_dqb.dqb_bhardlimit;
                 slimit = dquot->dq_dqb.dqb_bsoftlimit;
                 time = &dquot->dq_dqb.dqb_btime;
         } else {
-                grace = info->qi_info[qdata_type].dqi_igrace;
+                grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_igrace;
                 usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes;
                 hlimit = dquot->dq_dqb.dqb_ihardlimit;
                 slimit = dquot->dq_dqb.dqb_isoftlimit;
@@ -281,12 +392,21 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
 
         switch (opc) {
         case QUOTA_DQACQ:
-                if (hlimit && 
-                    QUSG(*usage + qdata->qd_count, is_blk) > hlimit)
-                        GOTO(out, rc = -EDQUOT);
+                if (hlimit &&
+                    QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > hlimit)
+                {
+                        if (QDATA_IS_CHANGE_QS(qdata) &&
+                            QUSG(*usage, QDATA_IS_BLK(qdata)) < hlimit)
+                                qdata->qd_count = (hlimit -
+                                        QUSG(*usage, QDATA_IS_BLK(qdata)))
+                                        * (QDATA_IS_BLK(qdata) ?
+                                           QUOTABLOCK_SIZE : 1);
+                        else
+                                GOTO(out, rc = -EDQUOT);
+                }
 
                 if (slimit &&
-                    QUSG(*usage + qdata->qd_count, is_blk) > slimit) {
+                    QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > slimit) {
                         if (*time && cfs_time_current_sec() >= *time)
                                 GOTO(out, rc = -EDQUOT);
                         else if (!*time)
@@ -304,7 +424,7 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc)
                         *usage -= qdata->qd_count;
 
                 /* (usage <= soft limit) but not (usage < soft limit) */
-                if (!slimit || QUSG(*usage, is_blk) <= slimit)
+                if (!slimit || QUSG(*usage, QDATA_IS_BLK(qdata)) <= slimit)
                         *time = 0;
                 break;
         default:
@@ -317,6 +437,37 @@ out:
         up(&dquot->dq_sem);
         up(&mds->mds_qonoff_sem);
         lustre_dqput(dquot);
+        if (rc != -EDQUOT)
+                dqacq_adjust_qunit_sz(obd, qdata->qd_id, QDATA_IS_GRP(qdata),
+                                      QDATA_IS_BLK(qdata));
+
+        quota_search_lqs(qdata, NULL, qctxt, &lqs);
+        if (QDATA_IS_BLK(qdata)) {
+                if (!lqs) {
+                        CDEBUG(D_INFO, "Can't find the lustre qunit size!\n");
+                        qdata->qd_qunit  = qctxt->lqc_bunit_sz;
+                } else {
+                        spin_lock(&lqs->lqs_lock);
+                        qdata->qd_qunit  = lqs->lqs_bunit_sz;
+                        spin_unlock(&lqs->lqs_lock);
+                }
+                QDATA_SET_ADJBLK(qdata);
+        } else {
+                if (!lqs) {
+                        CDEBUG(D_INFO, "Can't find the lustre qunit size!\n");
+                        qdata->qd_qunit  = qctxt->lqc_iunit_sz;
+                } else {
+                        spin_lock(&lqs->lqs_lock);
+                        qdata->qd_qunit  = lqs->lqs_iunit_sz;
+                        spin_unlock(&lqs->lqs_lock);
+                }
+                QDATA_SET_ADJINO(qdata);
+        }
+
+        QDATA_DEBUG(qdata, "alloc/release qunit in dqacq_handler\n");
+        if (lqs)
+                lqs_putref(lqs);
+
         return rc;
 }
 
@@ -327,25 +478,73 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[],
         int rc2 = 0;
         ENTRY;
 
-        if (rc && rc != -EDQUOT)
+        if (rc && rc != -EDQUOT && rc != ENOLCK)
                 RETURN(0);
 
         switch (opc) {
-        case FSFILT_OP_RENAME:
-                /* acquire/release block quota on owner of original parent */
-                rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[2], qpids[3], 1, 0);
-                /* fall-through */
         case FSFILT_OP_SETATTR:
-                /* acquire/release file quota on original owner */
-                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 0, 0);
-                /* fall-through */
-        case FSFILT_OP_CREATE:
+                /* release file quota on original owner */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 0, 0,
+                                          NULL);
+                /* release block quota on original owner */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                          NULL);
+                /* acquire file quota on current owner */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0,
+                                          NULL);
+                /* acquire block quota on current owner */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0,
+                                          NULL);
+                break;
+        case FSFILT_OP_UNLINK_PARTIAL_CHILD:
+                /* release file quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0,
+                                          NULL);
+                /* rlease block quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0,
+                                          NULL);
+                break;
+        case FSFILT_OP_CREATE_PARTIAL_CHILD:
+                /* acquire file quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0,
+                                          NULL);
+                /* acquire block quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0,
+                                          NULL);
+                break;
+        case FSFILT_OP_LINK:
+                /* acquire block quota on parent */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                          NULL);
+                break;
         case FSFILT_OP_UNLINK:
-                /* acquire/release file/block quota on owner of child (or current owner) */
-                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0);
-                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0);
-                /* acquire/release block quota on owner of parent (or original owner) */
-                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0);
+                /* release block quota on parent */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                          NULL);
+                /* release file quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0,
+                                          NULL);
+                if (qpids[0] != qcids[0] || qpids[1] != qcids[1])
+                        /* release block quota on child */
+                        rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0],
+                                                  qcids[1], 1, 0, NULL);
+                break;
+        case FSFILT_OP_UNLINK_PARTIAL_PARENT:
+                /* release block quota on parent */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                          NULL);
+                break;
+        case FSFILT_OP_CREATE:
+                /* acquire block quota on parent */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                          NULL);
+                /* acquire file quota on child */
+                rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0,
+                                          NULL);
+                if (qpids[0] != qcids[0] || qpids[1] != qcids[1])
+                        /* acquire block quota on child */
+                        rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0],
+                                                  qcids[1], 1, 0, NULL);
                 break;
         default:
                 LBUG();
@@ -353,7 +552,8 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[],
         }
 
         if (rc2)
-                CERROR("mds adjust qunit failed! (opc:%d rc:%d)\n", opc, rc2);
+                CDEBUG(rc2 == -EAGAIN ? D_QUOTA: D_ERROR,
+                       "mds adjust qunit failed! (opc:%d rc:%d)\n", opc, rc2);
         RETURN(0);
 }
 
@@ -370,50 +570,122 @@ int filter_quota_adjust(struct obd_device *obd, unsigned int qcids[],
         switch (opc) {
         case FSFILT_OP_SETATTR:
                 /* acquire/release block quota on original & current owner */
-                rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0);
-                rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0);
+                rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0,
+                                        NULL);
+                rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0,
+                                         NULL);
                 break;
         case FSFILT_OP_UNLINK:
                 /* release block quota on this owner */
         case FSFILT_OP_CREATE: /* XXX for write operation on obdfilter */
                 /* acquire block quota on this owner */
-                rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0);
+                rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0,
+                                        NULL);
                 break;
         default:
                 LBUG();
                 break;
         }
 
-        if (rc || rc2)
-                CERROR("filter adjust qunit failed! (opc:%d rc%d)\n",
-                       opc, rc ?: rc2);
+        if (rc || rc2) {
+                if (!rc)
+                        rc = rc2;
+                CDEBUG(rc == -EAGAIN ? D_QUOTA: D_ERROR,
+                       "filter adjust qunit failed! (opc:%d rc%d)\n",
+                       opc, rc);
+        }
+
         RETURN(0);
 }
 
-#define LUSTRE_ADMIN_QUOTAFILES {\
-       "admin_quotafile.usr",  /* user admin quotafile */\
-       "admin_quotafile.grp"   /* group admin quotafile */\
-}
 static const char prefix[] = "OBJECTS/";
 
+int mds_quota_invalidate(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_info *qinfo = &mds->mds_quota_info;
+        int rc = 0, i;
+        char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2;
+        char name[64];
+        struct lvfs_run_ctxt saved;
+
+        LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2);
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        down(&mds->mds_qonoff_sem);
+
+        for (i = 0; i < MAXQUOTAS; i++) {
+                struct file *fp;
+
+                if (!Q_TYPESET(oqctl, i))
+                        continue;
+
+                /* quota file has been opened ? */
+                if (qinfo->qi_files[i]) {
+                        rc = -EBUSY;
+                        goto out;
+                }
+
+                LASSERT(strlen(quotafile[i]) + sizeof(prefix) <= sizeof(name));
+                sprintf(name, "%s%s", prefix, quotafile[i]);
+
+                fp = filp_open(name, O_CREAT | O_TRUNC | O_RDWR, 0644);
+                if (IS_ERR(fp)) {
+                        rc = PTR_ERR(fp);
+                        CERROR("error invalidating admin quotafile %s (rc:%d)\n",
+                               name, rc);
+                }
+                else
+                        filp_close(fp, 0);
+        }
+
+out:
+        up(&mds->mds_qonoff_sem);
+
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        return rc;
+}
+
+int mds_quota_finvalidate(struct obd_device *obd, struct obd_quotactl *oqctl)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        int rc;
+        struct lvfs_run_ctxt saved;
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        down(&mds->mds_qonoff_sem);
+
+        oqctl->qc_cmd = Q_FINVALIDATE;
+        oqctl->qc_id = obd->u.obt.obt_qfmt;
+        rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
+        if (!rc)
+                rc = obd_quotactl(mds->mds_osc_exp, oqctl);
+
+        up(&mds->mds_qonoff_sem);
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        return rc;
+}
+
 int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct lustre_quota_info *qinfo = &mds->mds_quota_info;
-        const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES;
+        const char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2;
         struct lvfs_run_ctxt saved;
         char name[64];
         int i, rc = 0;
-        struct dentry *dparent = mds->mds_objects_dir;
-        struct inode *iparent = dparent->d_inode;
         ENTRY;
 
-        LASSERT(iparent);
+        LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2);
+
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
 
         down(&mds->mds_qonoff_sem);
-        for (i = 0; i < MAXQUOTAS; i++) {
-                struct dentry *de;
+
+        for (i = 0; i < MAXQUOTAS && !rc; i++) {
                 struct file *fp;
 
                 if (!Q_TYPESET(oqctl, i))
@@ -426,33 +698,44 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
                         continue;
                 }
 
-                /* lookup quota file */
-                rc = 0;
-                LOCK_INODE_MUTEX(iparent);
-                de = lookup_one_len(quotafiles[i], dparent,
-                                    strlen(quotafiles[i]));
-                UNLOCK_INODE_MUTEX(iparent);
-                if (IS_ERR(de) || de->d_inode == NULL || 
-                    !S_ISREG(de->d_inode->i_mode))
-                        rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT;
-                if (!IS_ERR(de))
-                        dput(de);
-
-                if (rc && rc != -ENOENT) {
-                        CERROR("error lookup quotafile %s! (rc:%d)\n",
+                LASSERT(strlen(quotafile[i]) + sizeof(prefix) <= sizeof(name));
+                sprintf(name, "%s%s", prefix, quotafile[i]);
+
+                /* check if quota file exists and is correct */
+                fp = filp_open(name, O_RDONLY, 0);
+                if (!IS_ERR(fp)) {
+                        /* irregular file is not the right place for quota */
+                        if (!S_ISREG(fp->f_dentry->d_inode->i_mode)) {
+                                CERROR("admin quota file %s is not "
+                                       "regular!", name);
+                                filp_close(fp, 0);
+                                rc = -EINVAL;
+                                break;
+                        }
+                        qinfo->qi_files[i] = fp;
+                        rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_CHK);
+                        qinfo->qi_files[i] = 0;
+                        filp_close(fp, 0);
+                }
+                else
+                        rc = PTR_ERR(fp);
+
+                if (!rc)
+                        continue;
+
+                /* -EINVAL may be returned by quotainfo for bad quota file */
+                if (rc != -ENOENT && rc != -EINVAL) {
+                        CERROR("error opening old quota file %s (%d)\n",
                                name, rc);
                         break;
-                } else if (!rc) {
-                        continue;
                 }
 
-                LASSERT(strlen(quotafiles[i]) + sizeof(prefix) <= sizeof(name));
-                sprintf(name, "%s%s", prefix, quotafiles[i]);
+                CDEBUG(D_INFO, "%s new quota file %s\n", name,
+                       rc == -ENOENT ? "creating" : "overwriting");
 
-                LASSERT(rc == -ENOENT);
-                /* create quota file */
-                fp = filp_open(name, O_CREAT | O_EXCL, 0644);
-                if (IS_ERR(fp) || !S_ISREG(fp->f_dentry->d_inode->i_mode)) {
+                /* create quota file overwriting old if needed */
+                fp = filp_open(name, O_CREAT | O_TRUNC | O_RDWR, 0644);
+                if (IS_ERR(fp)) {
                         rc = PTR_ERR(fp);
                         CERROR("error creating admin quotafile %s (rc:%d)\n",
                                name, rc);
@@ -460,15 +743,14 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
                 }
 
                 qinfo->qi_files[i] = fp;
-                rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO);
-                filp_close(fp, 0);
-                qinfo->qi_files[i] = NULL;
 
-                if (rc) {
+                rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO);
+                if (rc)
                         CERROR("error init %s admin quotafile! (rc:%d)\n",
                                i == USRQUOTA ? "user" : "group", rc);
-                        break;
-                }
+
+                filp_close(fp, 0);
+                qinfo->qi_files[i] = NULL;
         }
         up(&mds->mds_qonoff_sem);
 
@@ -476,7 +758,7 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl)
         RETURN(rc);
 }
 
-static int close_quota_files(struct obd_quotactl *oqctl, 
+static int close_quota_files(struct obd_quotactl *oqctl,
                              struct lustre_quota_info *qinfo)
 {
         int i, rc = 0;
@@ -499,13 +781,12 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct lustre_quota_info *qinfo = &mds->mds_quota_info;
-        const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES;
+        const char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2;
         char name[64];
         int i, rc = 0;
-        struct inode *iparent = mds->mds_objects_dir->d_inode;
         ENTRY;
 
-        LASSERT(iparent);
+        LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2);
 
         /* open admin quota files and read quotafile info */
         for (i = 0; i < MAXQUOTAS; i++) {
@@ -514,27 +795,33 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
                 if (!Q_TYPESET(oqctl, i))
                         continue;
 
-                LASSERT(strlen(quotafiles[i]) + sizeof(prefix) <= sizeof(name));
-                sprintf(name, "%s%s", prefix, quotafiles[i]);
+                LASSERT(strlen(quotafile[i])
+                        + sizeof(prefix) <= sizeof(name));
+                sprintf(name, "%s%s", prefix, quotafile[i]);
 
                 if (qinfo->qi_files[i] != NULL) {
                         rc = -EBUSY;
                         break;
                 }
 
-                fp = filp_open(name, O_RDWR | O_EXCL, 0644);
+                fp = filp_open(name, O_RDWR, 0);
                 if (IS_ERR(fp) || !S_ISREG(fp->f_dentry->d_inode->i_mode)) {
-                        rc = PTR_ERR(fp);
-                        CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR,
-                               "open %s failed! (rc:%d)\n", name, rc);
+                        rc = IS_ERR(fp) ? PTR_ERR(fp) : -EINVAL;
+                        CERROR("error open/create %s! (rc:%d)\n", name, rc);
                         break;
                 }
                 qinfo->qi_files[i] = fp;
 
+                rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_CHK);
+                if (rc) {
+                        CERROR("invalid quota file %s! (rc:%d)\n", name, rc);
+                        break;
+                }
+
                 rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_RD_INFO);
                 if (rc) {
-                        CERROR("error read quotainfo of %s! (rc:%d)\n",
-                               name, rc);
+                        CERROR("error read quotainfo of %s! (rc:%d)\n", name,
+                               rc);
                         break;
                 }
         }
@@ -545,8 +832,8 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
         RETURN(rc);
 }
 
-static int mds_admin_quota_off(struct obd_device *obd, 
-                               struct obd_quotactl *oqctl)
+int mds_admin_quota_off(struct obd_device *obd,
+                        struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct lustre_quota_info *qinfo = &mds->mds_quota_info;
@@ -584,7 +871,7 @@ int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
 
         rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
         if (!rc)
-                obt->obt_qctxt.lqc_status = 1;
+                obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(oqctl->qc_type);
 out:
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         up(&mds->mds_qonoff_sem);
@@ -614,7 +901,7 @@ int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl)
         rc = obd_quotactl(mds->mds_osc_exp, oqctl);
         rc2 = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
         if (!rc2)
-                obt->obt_qctxt.lqc_status = 0;
+                obt->obt_qctxt.lqc_flags &= ~UGQUOTA2LQC(oqctl->qc_type);
 
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         up(&mds->mds_qonoff_sem);
@@ -671,10 +958,124 @@ out:
         RETURN(rc);
 }
 
+int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt,
+                      struct lustre_dquot *dquot, __u32 ost_num, __u32 mdt_num,
+                      int type, struct quota_adjust_qunit *oqaq)
+{
+        __u64 bunit_curr_o, iunit_curr_o;
+        unsigned long shrink_qunit_limit = qctxt->lqc_cqs_boundary_factor;
+        unsigned long cqs_factor = qctxt->lqc_cqs_qs_factor;
+        __u64 blimit = dquot->dq_dqb.dqb_bhardlimit ?
+                dquot->dq_dqb.dqb_bhardlimit : dquot->dq_dqb.dqb_bsoftlimit;
+        __u64 ilimit = dquot->dq_dqb.dqb_ihardlimit ?
+                dquot->dq_dqb.dqb_ihardlimit : dquot->dq_dqb.dqb_isoftlimit;
+        int rc = 0;
+        ENTRY;
+
+        if (!dquot || !oqaq)
+                RETURN(-EINVAL);
+        LASSERT_SEM_LOCKED(&dquot->dq_sem);
+        LASSERT(oqaq->qaq_iunit_sz);
+        LASSERT(oqaq->qaq_bunit_sz);
+
+        /* don't change qunit size */
+        if (!qctxt->lqc_switch_qs)
+                RETURN(rc);
+
+        bunit_curr_o = oqaq->qaq_bunit_sz;
+        iunit_curr_o = oqaq->qaq_iunit_sz;
+
+        if (dquot->dq_type == GRPQUOTA)
+                QAQ_SET_GRP(oqaq);
+
+        if ((type & LQUOTA_FLAGS_ADJBLK) && blimit) {
+                __u64 b_limitation =
+                        oqaq->qaq_bunit_sz * ost_num * shrink_qunit_limit;
+                /* enlarge block qunit size */
+                while (blimit >
+                       QUSG(dquot->dq_dqb.dqb_curspace + 2 * b_limitation, 1)) {
+                        oqaq->qaq_bunit_sz =
+                                QUSG(oqaq->qaq_bunit_sz * cqs_factor, 1)
+                                << QUOTABLOCK_BITS;
+                        b_limitation = oqaq->qaq_bunit_sz * ost_num *
+                                shrink_qunit_limit;
+                }
+
+                if (oqaq->qaq_bunit_sz > qctxt->lqc_bunit_sz)
+                        oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz;
+
+                /* shrink block qunit size */
+                while (blimit <
+                       QUSG(dquot->dq_dqb.dqb_curspace + b_limitation, 1)) {
+                        do_div(oqaq->qaq_bunit_sz , cqs_factor);
+                        oqaq->qaq_bunit_sz = QUSG(oqaq->qaq_bunit_sz, 1) <<
+                                QUOTABLOCK_BITS;
+                        b_limitation = oqaq->qaq_bunit_sz * ost_num *
+                                shrink_qunit_limit;
+                        if (oqaq->qaq_bunit_sz <  qctxt->lqc_cqs_least_bunit)
+                                break;
+                }
+
+                if (oqaq->qaq_bunit_sz < qctxt->lqc_cqs_least_bunit)
+                        oqaq->qaq_bunit_sz = qctxt->lqc_cqs_least_bunit;
+
+                if (bunit_curr_o != oqaq->qaq_bunit_sz)
+                        QAQ_SET_ADJBLK(oqaq);
+
+        }
+
+        if ((type & LQUOTA_FLAGS_ADJINO) && ilimit) {
+                __u64 i_limitation =
+                        oqaq->qaq_iunit_sz * mdt_num * shrink_qunit_limit;
+                /* enlarge file qunit size */
+                while (ilimit > dquot->dq_dqb.dqb_curinodes
+                       + 2 * i_limitation) {
+                        oqaq->qaq_iunit_sz = oqaq->qaq_iunit_sz * cqs_factor;
+                        i_limitation = oqaq->qaq_iunit_sz * mdt_num *
+                                shrink_qunit_limit;
+                }
+
+                if (oqaq->qaq_iunit_sz > qctxt->lqc_iunit_sz)
+                        oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz;
+
+                /* shrink file qunit size */
+                while (ilimit < dquot->dq_dqb.dqb_curinodes
+                       + i_limitation) {
+                        do_div(oqaq->qaq_iunit_sz, cqs_factor);
+                        i_limitation = oqaq->qaq_iunit_sz * mdt_num *
+                                       shrink_qunit_limit;
+                        if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit)
+                                break;
+                }
+
+                if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit)
+                        oqaq->qaq_iunit_sz = qctxt->lqc_cqs_least_iunit;
+
+                if (iunit_curr_o != oqaq->qaq_iunit_sz)
+                        QAQ_SET_ADJINO(oqaq);
+
+        }
+
+        if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit &&
+            !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) {
+                oqaq->qaq_bunit_sz = 0;
+                oqaq->qaq_iunit_sz = 0;
+                QAQ_SET_ADJBLK(oqaq);
+                QAQ_SET_ADJINO(oqaq);
+        }
+
+        QAQ_DEBUG(oqaq, "the oqaq computed\n");
+
+        RETURN(rc);
+}
+
 static int mds_init_slave_ilimits(struct obd_device *obd,
-                                  struct obd_quotactl *oqctl, int set)
+                                  struct obd_quotactl *oqctl, int set,
+                                  struct quota_adjust_qunit *oqaq)
 {
         /* XXX: for file limits only adjust local now */
+        struct obd_device_target *obt = &obd->u.obt;
+        struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
         unsigned int uid = 0, gid = 0;
         struct obd_quotactl *ioqc = NULL;
         int flag;
@@ -683,21 +1084,29 @@ static int mds_init_slave_ilimits(struct obd_device *obd,
 
         /* if we are going to set zero limit, needn't init slaves */
         if (!oqctl->qc_dqblk.dqb_ihardlimit && !oqctl->qc_dqblk.dqb_isoftlimit &&
-            set)
+            !set)
                 RETURN(0);
 
         OBD_ALLOC_PTR(ioqc);
         if (!ioqc)
                 RETURN(-ENOMEM);
-        
-        flag = oqctl->qc_dqblk.dqb_ihardlimit || 
-               oqctl->qc_dqblk.dqb_isoftlimit || set;
+
+        flag = oqctl->qc_dqblk.dqb_ihardlimit ||
+               oqctl->qc_dqblk.dqb_isoftlimit || !set;
         ioqc->qc_cmd = flag ? Q_INITQUOTA : Q_SETQUOTA;
         ioqc->qc_id = oqctl->qc_id;
         ioqc->qc_type = oqctl->qc_type;
         ioqc->qc_dqblk.dqb_valid = QIF_ILIMITS;
         ioqc->qc_dqblk.dqb_ihardlimit = flag ? MIN_QLIMIT : 0;
 
+        if (QAQ_IS_ADJINO(oqaq)) {
+                /* adjust the mds slave's inode qunit size */
+                rc = quota_adjust_slave_lqs(oqaq, qctxt);
+                if (rc < 0)
+                        CDEBUG(D_ERROR, "adjust mds slave's inode qunit size \
+                               failed! (rc:%d)\n", rc);
+        }
+
         /* set local limit to MIN_QLIMIT */
         rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
         if (rc)
@@ -709,9 +1118,15 @@ static int mds_init_slave_ilimits(struct obd_device *obd,
         else
                 gid = oqctl->qc_id;
 
-        rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 0, 0);
+        rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 0, 0,
+                                NULL);
+        if (rc == -EDQUOT || rc == -EBUSY) {
+                CDEBUG(D_QUOTA, "rc: %d.\n", rc);
+                rc = 0;
+        }
         if (rc) {
-                CERROR("error mds adjust local file quota! (rc:%d)\n", rc);
+                CDEBUG(D_QUOTA,"error mds adjust local file quota! (rc:%d)\n",
+                       rc);
                 GOTO(out, rc);
         }
         /* FIXME initialize all slaves in CMD */
@@ -723,31 +1138,41 @@ out:
 }
 
 static int mds_init_slave_blimits(struct obd_device *obd,
-                                  struct obd_quotactl *oqctl, int set)
+                                  struct obd_quotactl *oqctl, int set,
+                                  struct quota_adjust_qunit *oqaq)
 {
+        struct obd_device_target *obt = &obd->u.obt;
+        struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt;
         struct mds_obd *mds = &obd->u.mds;
         struct obd_quotactl *ioqc;
         unsigned int uid = 0, gid = 0;
+        int rc, rc1 = 0;
         int flag;
-        int rc;
         ENTRY;
 
         /* if we are going to set zero limit, needn't init slaves */
         if (!oqctl->qc_dqblk.dqb_bhardlimit && !oqctl->qc_dqblk.dqb_bsoftlimit &&
-            set)
+            !set)
                 RETURN(0);
 
         OBD_ALLOC_PTR(ioqc);
         if (!ioqc)
                 RETURN(-ENOMEM);
 
-        flag = oqctl->qc_dqblk.dqb_bhardlimit || 
-               oqctl->qc_dqblk.dqb_bsoftlimit || set;
+        flag = oqctl->qc_dqblk.dqb_bhardlimit ||
+               oqctl->qc_dqblk.dqb_bsoftlimit || !set;
         ioqc->qc_cmd = flag ? Q_INITQUOTA : Q_SETQUOTA;
         ioqc->qc_id = oqctl->qc_id;
         ioqc->qc_type = oqctl->qc_type;
         ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS;
         ioqc->qc_dqblk.dqb_bhardlimit = flag ? MIN_QLIMIT : 0;
+        if (QAQ_IS_ADJBLK(oqaq)) {
+                /* adjust the mds slave's block qunit size */
+                rc1 = quota_adjust_slave_lqs(oqaq, qctxt);
+                if (rc1 < 0)
+                        CERROR("adjust mds slave's block qunit size failed!"
+                               "(rc:%d)\n", rc1);
+        }
 
         rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
         if (rc)
@@ -759,14 +1184,26 @@ static int mds_init_slave_blimits(struct obd_device *obd,
         else
                 gid = oqctl->qc_id;
 
-        rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 1, 0);
+        /* initialize all slave's limit */
+        rc = obd_quotactl(mds->mds_osc_exp, ioqc);
+
+        rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 1, 0,
+                                NULL);
+        if (rc == -EDQUOT || rc == -EBUSY) {
+                CDEBUG(D_QUOTA, "rc: %d.\n", rc);
+                rc = 0;
+        }
         if (rc) {
                 CERROR("error mds adjust local block quota! (rc:%d)\n", rc);
                 GOTO(out, rc);
         }
 
-        /* initialize all slave's limit */
-        rc = obd_quotactl(mds->mds_osc_exp, ioqc);
+        /* adjust all slave's qunit size when setting quota
+         * this is will create a lqs for every ost, which will present
+         * certain uid/gid is set quota or not */
+        QAQ_SET_ADJBLK(oqaq);
+        rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq, qctxt);
+
         EXIT;
 out:
         OBD_FREE_PTR(ioqc);
@@ -776,15 +1213,27 @@ out:
 int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
+        struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt;
+        struct obd_device *lov_obd = class_exp2obd(mds->mds_osc_exp);
+        struct lov_obd *lov = &lov_obd->u.lov;
+        struct quota_adjust_qunit *oqaq = NULL;
         struct lustre_quota_info *qinfo = &mds->mds_quota_info;
-        __u32 ihardlimit, isoftlimit, bhardlimit, bsoftlimit;
+        __u64 ihardlimit, isoftlimit, bhardlimit, bsoftlimit;
         time_t btime, itime;
         struct lustre_dquot *dquot;
         struct obd_dqblk *dqblk = &oqctl->qc_dqblk;
-        int set, rc;
+        /* orig_set means if quota was set before; now_set means we are
+         * setting/cancelling quota */
+        int orig_set, now_set;
+        int rc, rc2 = 0, flag = 0;
         ENTRY;
 
+        OBD_ALLOC_PTR(oqaq);
+        if (!oqaq)
+                RETURN(-ENOMEM);
         down(&mds->mds_qonoff_sem);
+        init_oqaq(oqaq, qctxt, oqctl->qc_id, oqctl->qc_type);
+
         if (qinfo->qi_files[oqctl->qc_type] == NULL)
                 GOTO(out_sem, rc = -ESRCH);
 
@@ -819,18 +1268,20 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
                 dquot->dq_dqb.dqb_bhardlimit = dqblk->dqb_bhardlimit;
                 dquot->dq_dqb.dqb_bsoftlimit = dqblk->dqb_bsoftlimit;
                 /* clear usage (limit pool) */
-                if (!dquot->dq_dqb.dqb_bhardlimit && 
+                if (!dquot->dq_dqb.dqb_bhardlimit &&
                     !dquot->dq_dqb.dqb_bsoftlimit)
                         dquot->dq_dqb.dqb_curspace = 0;
 
                 /* clear grace time */
-                if (!dqblk->dqb_bsoftlimit || 
+                if (!dqblk->dqb_bsoftlimit ||
                     toqb(dquot->dq_dqb.dqb_curspace) <= dqblk->dqb_bsoftlimit)
                         dquot->dq_dqb.dqb_btime = 0;
                 /* set grace only if user hasn't provided his own */
                 else if (!(dqblk->dqb_valid & QIF_BTIME))
-                        dquot->dq_dqb.dqb_btime = cfs_time_current_sec() + 
+                        dquot->dq_dqb.dqb_btime = cfs_time_current_sec() +
                                 qinfo->qi_info[dquot->dq_type].dqi_bgrace;
+
+                flag |= LQUOTA_FLAGS_ADJBLK;
         }
 
         if (dqblk->dqb_valid & QIF_ILIMITS) {
@@ -847,7 +1298,16 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
                 else if (!(dqblk->dqb_valid & QIF_ITIME))
                         dquot->dq_dqb.dqb_itime = cfs_time_current_sec() +
                                 qinfo->qi_info[dquot->dq_type].dqi_igrace;
+
+                flag |= LQUOTA_FLAGS_ADJINO;
         }
+        QAQ_DEBUG(oqaq, "before dquot_create_oqaq\n");
+        rc = dquot_create_oqaq(qctxt, dquot, lov->desc.ld_tgt_count, 1,
+                               flag, oqaq);
+        QAQ_DEBUG(oqaq, "after dquot_create_oqaq\n");
+        if (rc < 0)
+                CDEBUG(D_QUOTA, "adjust qunit size failed! (rc:%d)\n", rc);
+
 
         rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
 
@@ -859,38 +1319,47 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         }
 
         up(&mds->mds_qonoff_sem);
-        if (dqblk->dqb_valid & QIF_ILIMITS) {
-                set = !(ihardlimit || isoftlimit);
-                rc = mds_init_slave_ilimits(obd, oqctl, set);
+        orig_set = ihardlimit || isoftlimit;
+        now_set  = dqblk->dqb_ihardlimit || dqblk->dqb_isoftlimit;
+        if (dqblk->dqb_valid & QIF_ILIMITS && orig_set != now_set) {
+                down(&dquot->dq_sem);
+                dquot->dq_dqb.dqb_curinodes = 0;
+                up(&dquot->dq_sem);
+                rc = mds_init_slave_ilimits(obd, oqctl, orig_set, oqaq);
                 if (rc) {
                         CERROR("init slave ilimits failed! (rc:%d)\n", rc);
                         goto revoke_out;
                 }
         }
 
-        if (dqblk->dqb_valid & QIF_BLIMITS) {
-                set = !(bhardlimit || bsoftlimit);
-                rc = mds_init_slave_blimits(obd, oqctl, set);
+        orig_set = bhardlimit || bsoftlimit;
+        now_set  = dqblk->dqb_bhardlimit || dqblk->dqb_bsoftlimit;
+        if (dqblk->dqb_valid & QIF_BLIMITS && orig_set != now_set) {
+                down(&dquot->dq_sem);
+                dquot->dq_dqb.dqb_curspace = 0;
+                up(&dquot->dq_sem);
+                rc = mds_init_slave_blimits(obd, oqctl, orig_set, oqaq);
                 if (rc) {
                         CERROR("init slave blimits failed! (rc:%d)\n", rc);
                         goto revoke_out;
                 }
         }
-        down(&mds->mds_qonoff_sem);
 
 revoke_out:
+        down(&mds->mds_qonoff_sem);
+        down(&dquot->dq_sem);
         if (rc) {
                 /* cancel previous setting */
-                down(&dquot->dq_sem);
                 dquot->dq_dqb.dqb_ihardlimit = ihardlimit;
                 dquot->dq_dqb.dqb_isoftlimit = isoftlimit;
                 dquot->dq_dqb.dqb_bhardlimit = bhardlimit;
                 dquot->dq_dqb.dqb_bsoftlimit = bsoftlimit;
                 dquot->dq_dqb.dqb_btime = btime;
                 dquot->dq_dqb.dqb_itime = itime;
-                fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
-                up(&dquot->dq_sem);
         }
+        rc2 = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT);
+        up(&dquot->dq_sem);
+
 out:
         down(&dquot->dq_sem);
         dquot->dq_status &= ~DQ_STATUS_SET;
@@ -899,14 +1368,18 @@ out:
         EXIT;
 out_sem:
         up(&mds->mds_qonoff_sem);
-        return rc;
+
+        if (oqaq)
+                OBD_FREE_PTR(oqaq);
+
+        return rc ? rc : rc2;
 }
 
 static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct obd_quotactl *soqc;
         struct lvfs_run_ctxt saved;
-        int rc;
+        int rc, rc1;
         ENTRY;
 
         OBD_ALLOC_PTR(soqc);
@@ -917,26 +1390,29 @@ static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl)
         soqc->qc_id = oqctl->qc_id;
         soqc->qc_type = oqctl->qc_type;
 
+        /* get block usage from OSS */
+        soqc->qc_dqblk.dqb_curspace = 0;
         rc = obd_quotactl(obd->u.mds.mds_osc_exp, soqc);
-        if (rc)
-               GOTO(out, rc);
-
-        oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace;
+        if (!rc) {
+                oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace;
+                oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
+        }
 
-        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        /* get block/inode usage from MDS */
         soqc->qc_dqblk.dqb_curspace = 0;
-        rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, soqc);
+        soqc->qc_dqblk.dqb_curinodes = 0;
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        rc1 = fsfilt_quotactl(obd, obd->u.obt.obt_sb, soqc);
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        if (!rc1) {
+                oqctl->qc_dqblk.dqb_curspace += soqc->qc_dqblk.dqb_curspace;
+                oqctl->qc_dqblk.dqb_curinodes = soqc->qc_dqblk.dqb_curinodes;
+                oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
+        }
 
-        if (rc)
-                GOTO(out, rc);
-
-        oqctl->qc_dqblk.dqb_curinodes += soqc->qc_dqblk.dqb_curinodes;
-        oqctl->qc_dqblk.dqb_curspace += soqc->qc_dqblk.dqb_curspace;
-        EXIT;
-out:
         OBD_FREE_PTR(soqc);
-        return rc;
+
+        RETURN(rc ? : rc1);
 }
 
 int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
@@ -949,6 +1425,7 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         ENTRY;
 
         down(&mds->mds_qonoff_sem);
+        dqblk->dqb_valid = 0;
         if (qinfo->qi_files[oqctl->qc_type] == NULL)
                 GOTO(out, rc = -ESRCH);
 
@@ -963,6 +1440,7 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         dqblk->dqb_bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
         dqblk->dqb_btime = dquot->dq_dqb.dqb_btime;
         dqblk->dqb_itime = dquot->dq_dqb.dqb_itime;
+        dqblk->dqb_valid |= QIF_LIMITS | QIF_TIMES;
         up(&dquot->dq_sem);
 
         lustre_dqput(dquot);
@@ -997,7 +1475,7 @@ static int
 dquot_recovery(struct obd_device *obd, unsigned int id, unsigned short type)
 {
         struct mds_obd *mds = &obd->u.mds;
-        struct lustre_quota_info *qinfo= &obd->u.mds.mds_quota_info;
+        struct lustre_quota_info *qinfo= &mds->mds_quota_info;
         struct lustre_dquot *dquot;
         struct obd_quotactl *qctl;
         __u64 total_limits = 0;
@@ -1030,7 +1508,7 @@ dquot_recovery(struct obd_device *obd, unsigned int id, unsigned short type)
         qctl->qc_type = type;
         qctl->qc_id = id;
         qctl->qc_stat = QUOTA_RECOVERING;
-        rc = obd_quotactl(obd->u.mds.mds_osc_exp, qctl);
+        rc = obd_quotactl(mds->mds_osc_exp, qctl);
         if (rc)
                 GOTO(out, rc);
         total_limits = qctl->qc_dqblk.dqb_bhardlimit;
@@ -1094,7 +1572,7 @@ static int qmaster_recovery_main(void *arg)
                         continue;
                 }
                 CFS_INIT_LIST_HEAD(&id_list);
-                rc = fsfilt_qids(obd, qinfo->qi_files[type], NULL, type, 
+                rc = fsfilt_qids(obd, qinfo->qi_files[type], NULL, type,
                                  &id_list);
                 up(&mds->mds_qonoff_sem);
 
@@ -1142,3 +1620,5 @@ int mds_quota_recovery(struct obd_device *obd)
         wait_for_completion(&data.comp);
         RETURN(rc);
 }
+
+#endif /* HAVE_QUOTA_SUPPORT */
index 90cca7c..09fbb63 100755 (executable)
@@ -36,7 +36,6 @@ if [ "$ACC_SM_ONLY" ]; then
     done
 fi
 LFSCK="no" # bug 13698
-SANITY_QUOTA="no" # bug 13058
 
 LIBLUSTRETESTS=${LIBLUSTRETESTS:-../liblustre/tests}
 
index 99a3ccb..9ef06ad 100644 (file)
@@ -59,6 +59,10 @@ MOUNTOPT=""
     MKFSOPT=$MKFSOPT" -i $MDSISIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL"
+[ "x$MDSCAPA" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA"
 [ "x$mdsfailover_HOST" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
 [ "x$STRIPE_BYTES" != "x" ] &&
@@ -76,6 +80,10 @@ MOUNTOPT=""
     MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL"
+[ "x$OSSCAPA" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA"
 [ "x$ostfailover_HOST" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
 OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT"
index 10ba95f..6358789 100644 (file)
@@ -69,8 +69,10 @@ MOUNTOPT=""
     MKFSOPT=$MKFSOPT" -i $MDSISIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL"
 [ "x$MDSCAPA" != "x" ] &&
-    MKFSOPT="--param mdt.capa=$MDSCAPA"
+    MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA"
 [ "x$mdsfailover_HOST" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
 [ "x$STRIPE_BYTES" != "x" ] &&
@@ -88,8 +90,10 @@ MOUNTOPT=""
     MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL"
 [ "x$OSSCAPA" != "x" ] &&
-    MKFSOPT="--param ost.capa=$OSSCAPA"
+    MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA"
 [ "x$ostfailover_HOST" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
 OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT"
index f958d58..6422b79 100644 (file)
@@ -50,8 +50,10 @@ MKFSOPT=""
     MKFSOPT=$MKFSOPT" -i $MDSISIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\\\"$MKFSOPT\\\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL"
 [ "x$MDSCAPA" != "x" ] &&
-    MKFSOPT="--param mdt.capa=$MDSCAPA"
+    MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA"
 [ "x$mdsfailover_HOST" != "x" ] &&
     MDSOPT=$MDSOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`"
 [ "x$STRIPE_BYTES" != "x" ] &&
@@ -69,8 +71,10 @@ MKFSOPT=""
     MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE"
 [ "x$MKFSOPT" != "x" ] &&
     MKFSOPT="--mkfsoptions=\\\"$MKFSOPT\\\""
+[ "x$SECLEVEL" != "x" ] &&
+    MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL"
 [ "x$OSSCAPA" != "x" ] &&
-    MKFSOPT="--param ost.capa=$OSSCAPA"
+    MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA"
 [ "x$ostfailover_HOST" != "x" ] &&
     OSTOPT=$OSTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
 OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $OSTOPT $OST_MKFS_OPTS"
@@ -79,6 +83,7 @@ MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop,user_xattr,acl"}
 OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"}
 
 #client
+MOUNTOPT=""
 MOUNT=${MOUNT:-/mnt/${FSNAME}}
 MOUNT1=${MOUNT1:-$MOUNT}
 MOUNT2=${MOUNT2:-${MOUNT}2}
index cb3dc82..740ce41 100644 (file)
@@ -17,7 +17,8 @@ SRCDIR=`dirname $0`
 export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin
 
 ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT="$SANITY_QUOTA_EXCEPT"
+# enable test_23 after bug 16542 fixed.
+ALWAYS_EXCEPT="10 23 $SANITY_QUOTA_EXCEPT"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 case `uname -r` in
@@ -36,10 +37,8 @@ TSTID2=${TSTID2:-60001}
 TSTUSR=${TSTUSR:-"quota_usr"}
 TSTUSR2=${TSTUSR2:-"quota_2usr"}
 BLK_SZ=1024
-BUNIT_SZ=${BUNIT_SZ:-1000}     # default 1000 quota blocks
-BTUNE_SZ=${BTUNE_SZ:-500}      # default 50% of BUNIT_SZ
-IUNIT_SZ=${IUNIT_SZ:-10}       # default 10 files
-ITUNE_SZ=${ITUNE_SZ:-5}                # default 50% of IUNIT_SZ
+BUNIT_SZ=${BUNIT_SZ:-1024}     # min block quota unit(kB)
+IUNIT_SZ=${IUNIT_SZ:-10}       # min inode quota unit
 MAX_DQ_TIME=604800
 MAX_IQ_TIME=604800
 
@@ -48,11 +47,12 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+DIRECTIO=${DIRECTIO:-$LUSTRE/tests/directio}
 
 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
 
-[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11"
+[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21"
 
 QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log}
 
@@ -63,11 +63,11 @@ DIR2=${DIR2:-$MOUNT2}
 
 check_and_setup_lustre
 
-LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1`
-OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd`
+LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1`
+OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd`
 
-SHOW_QUOTA_USER="$LFS quota -u $TSTUSR $DIR"
-SHOW_QUOTA_GROUP="$LFS quota -g $TSTUSR $DIR"
+SHOW_QUOTA_USER="$LFS quota -v -u $TSTUSR $DIR"
+SHOW_QUOTA_GROUP="$LFS quota -v -g $TSTUSR $DIR"
 SHOW_QUOTA_INFO="$LFS quota -t $DIR"
 
 # control the time of tests
@@ -81,74 +81,59 @@ eval ONLY_99=true
 
 # set_blk_tunables(btune_sz)
 set_blk_tunesz() {
+       local btune=$(($1 * BLK_SZ))
        # set btune size on all obdfilters
-       do_facet ost1 "set -x; for i in /proc/fs/lustre/obdfilter/*/quota_btune_sz; do
-               echo $(($1 * BLK_SZ)) >> \\\$i;
-       done"
+       do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_btune_sz=$btune"
        # set btune size on mds
-       do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_btune_sz; do
-               echo $(($1 * BLK_SZ)) >> \\\$i;
-       done"
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_btune_sz=$btune"
 }
 
 # set_blk_unitsz(bunit_sz)
 set_blk_unitsz() {
-       do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_bunit_sz; do
-               echo $(($1 * BLK_SZ)) >> \\\$i;
-       done"
-       do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_bunit_sz; do
-               echo $(($1 * BLK_SZ)) >> \\\$i;
-       done"
+       local bunit=$(($1 * BLK_SZ))
+       # set bunit size on all obdfilters
+       do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_bunit_sz=$bunit"
+       # set bunit size on mds
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_bunit_sz=$bunit"
 }
 
 # set_file_tunesz(itune_sz)
 set_file_tunesz() {
-       # set iunit and itune size on all obdfilters
-       do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_itune_sz; do
-               echo $1 >> \\\$i;
-       done"
-       # set iunit and itune size on mds
-       do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_itune_sz; do
-               echo $1 >> \\\$i;
-       done"
+       local itune=$1
+       # set itune size on all obdfilters
+       do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_itune_sz=$itune"
+       # set itune size on mds
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_itune_sz=$itune"
 }
 
 # set_file_unitsz(iunit_sz)
 set_file_unitsz() {
-       do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_iunit_sz; do
-               echo $1 >> \\\$i;
-       done"
-       do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_iunit_sz; do
-               echo $1 >> \\\$i;
-       done"
+       local iunit=$1
+       # set iunit size on all obdfilters
+       do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_iunit_sz=$iunit"
+       # set iunit size on mds
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_iunit_sz=$iunit"
 }
 
-# These are for test on local machine,if run sanity-quota.sh on 
-# real cluster, ltest should have setup the test environment: 
-#
-# - create test user/group on all servers with same id.
-# - set unit size/tune on all servers size to reasonable value.
-pre_test() {
-       if [ -z "$NOSETUP" ]; then
-               # set block tunables
-               set_blk_tunesz $BTUNE_SZ
-               set_blk_unitsz $BUNIT_SZ
-               # set file tunables
-               set_file_tunesz $ITUNE_SZ
-               set_file_unitsz $IUNIT_SZ
-       fi
-}
-pre_test
-
-post_test() {
-       if [ -z "$NOSETUP" ]; then
-               # restore block tunables to default size
-               set_blk_unitsz $((1024 * 100))
-               set_blk_tunesz $((1024 * 50))
-               # restore file tunables to default size
-               set_file_unitsz 5000
-               set_file_tunesz 2500
-       fi
+lustre_fail() {
+       local fail_node=$1
+       local fail_loc=$2
+
+       case $fail_node in
+           "mds" )
+               do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ;;
+           "ost" )
+               for num in `seq $OSTCOUNT`; do
+                   do_facet ost$num "lctl set_param fail_loc=$fail_loc"
+               done ;;
+           "mds_ost" )
+               do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ;
+               for num in `seq $OSTCOUNT`; do
+                   do_facet ost$num "lctl set_param fail_loc=$fail_loc"
+               done ;;
+           * ) echo "usage: lustre_fail fail_node fail_loc" ;
+               return 1 ;;
+       esac
 }
 
 RUNAS="runas -u $TSTID"
@@ -158,126 +143,232 @@ FAIL_ON_ERROR=true check_runas_id $TSTID2 $RUNAS2
 
 FAIL_ON_ERROR=false
 
+run_test_with_stat() {
+       (($# != 2)) && error "the number of arguments is wrong"
+
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.stats=0" > /dev/null
+       for j in `seq $OSTCOUNT`; do
+           do_facet ost$j "lctl set_param lquota.${FSNAME}-OST*.stats=0" > /dev/null
+       done
+       run_test "$@"
+       if [ ${STAT:-"yes"} != "no" -a -z "$LAST_SKIPPED" ]; then
+           echo "statistics info begin ***************************************"
+           do_facet $SINGLEMDS "lctl get_param lquota.mdd_obd-${FSNAME}-MDT*.stats"
+           for j in `seq $OSTCOUNT`; do
+               do_facet ost$j "lctl get_param lquota.${FSNAME}-OST*.stats"
+           done
+           echo "statistics info end   ***************************************"
+       fi
+}
+
 # set quota
 test_0() {
        $LFS quotaoff -ug $DIR
        $LFS quotacheck -ug $DIR
 
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+       lctl set_param debug="+quota"
+       do_facet $SINGLEMDS "lctl set_param debug=+quota"
+       for num in `seq $OSTCOUNT`; do
+           do_facet ost$num "lctl set_param debug=+quota"
+       done
 }
-run_test 0 "Set quota ============================="
+run_test_with_stat 0 "Set quota ============================="
 
-# block hard limit (normal use and out of quota)
-test_1() {
+# test for specific quota limitation, qunit, qtune $1=block_quota_limit
+test_1_sub() {
+       LIMIT=$1
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
+       TESTFILE="$DIR/$tdir/$tfile-0"
 
-       LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 5)) # 5 bunits each sever
-       TESTFILE=$DIR/$tdir/$tfile-0    
-       
-       echo "  User quota (limit: $LIMIT kbytes)"
-       $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR
+       wait_delete_completed
+
+       # test for user
+       log "  User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       sleep 3
        $SHOW_QUOTA_USER
-       
+
        $LFS setstripe $TESTFILE -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
 
-       echo "    Write ..."
+       log "    Write ..."
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(usr) write failure, but expect success"
-       echo "    Done"
-       echo "    Write out of block quota ..."
+       log "    Done"
+       log "    Write out of block quota ..."
        # this time maybe cache write,  ignore it's failure
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true
        # flush cache, ensure noquota flag is setted on client
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(usr) write success, but expect EDQUOT"
 
        rm -f $TESTFILE
-       
-       echo "  Group quota (limit: $LIMIT kbytes)"
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR           # clear user limit
-       $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR
+       sync; sleep 1; sync;
+       OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'`
+       OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`
+       echo $OST0_QUOTA_USED
+       [ $OST0_QUOTA_USED -ne 0 ] && \
+           ($SHOW_QUOTA_USER; error "quota deleted isn't released")
+       $SHOW_QUOTA_USER
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+       # test for group
+       log "--------------------------------------"
+       log "  Group quota (limit: $LIMIT kbytes)"
+       $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       sleep 3
        $SHOW_QUOTA_GROUP
-       TESTFILE=$DIR/$tdir/$tfile-1    
+       TESTFILE="$DIR/$tdir/$tfile-1"
 
        $LFS setstripe $TESTFILE -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
 
-       echo "    Write ..."
+       log "    Write ..."
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(grp) write failure, but expect success"
-       echo "    Done"
-       echo "    Write out of block quota ..."
+       log "    Done"
+       log "    Write out of block quota ..."
        # this time maybe cache write, ignore it's failure
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(grp) write success, but expect EDQUOT"
 
        # cleanup
        rm -f $TESTFILE
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       sync; sleep 1; sync;
+       OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'`
+       OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`
+       echo $OST0_QUOTA_USED
+       [ $OST0_QUOTA_USED -ne 0 ] && \
+           ($SHOW_QUOTA_USER; error "quota deleted isn't released")
+       $SHOW_QUOTA_GROUP
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 1 "Block hard limit (normal use and out of quota) ==="
 
-# file hard limit (normal use and out of quota)
-test_2() {
+# block hard limit (normal use and out of quota)
+test_1() {
+       for i in `seq 1 $cycle`; do
+           # define blk_qunit is between 1M and 4M
+           blk_qunit=$(( $RANDOM % 3072 + 1024 ))
+           blk_qtune=$(( $RANDOM % $blk_qunit ))
+           # other osts and mds will occupy at 1M blk quota
+           b_limit=$(( ($RANDOM - 16384) / 8 +  $OSTCOUNT * $blk_qunit * 4 ))
+           set_blk_tunesz $blk_qtune
+           set_blk_unitsz $blk_qunit
+           echo "cycle: $i(total $cycle) bunit:$blk_qunit, btune:$blk_qtune, blimit:$b_limit"
+           test_1_sub $b_limit
+           echo "=================================================="
+           set_blk_unitsz $((128 * 1024))
+           set_blk_tunesz $((128 * 1024 / 2))
+       done
+}
+run_test_with_stat 1 "Block hard limit (normal use and out of quota) ==="
+
+# test for specific quota limitation, qunit, qtune $1=block_quota_limit
+test_2_sub() {
+       LIMIT=$1
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
+       TESTFILE="$DIR/$tdir/$tfile-0"
 
-       LIMIT=$(($IUNIT_SZ * 10)) # 10 iunits on mds
-       TESTFILE=$DIR/$tdir/$tfile-0    
+       wait_delete_completed
 
-       echo "  User quota (limit: $LIMIT files)"
-       $LFS setquota -u $TSTUSR 0 0 0 $LIMIT $DIR
+       # test for user
+       log "  User quota (limit: $LIMIT files)"
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I $LIMIT $DIR
+       sleep 3
        $SHOW_QUOTA_USER
 
-       echo "    Create $LIMIT files ..."
+       log "    Create $LIMIT files ..."
        $RUNAS createmany -m ${TESTFILE} $LIMIT || \
-           error "(usr) create failure, but expect success"
-       echo "    Done"
-       echo "    Create out of file quota ..."
+               error "(usr) create failure, but expect success"
+       log "    Done"
+       log "    Create out of file quota ..."
        $RUNAS touch ${TESTFILE}_xxx && \
-               error "(usr) touch success, but expect EDQUOT"
+               error "(usr) touch success, but expect EDQUOT"
 
        unlinkmany ${TESTFILE} $LIMIT
-       rm ${TESTFILE}_xxx
+       rm -f ${TESTFILE}_xxx
+       sync; sleep 1; sync;
+
+       MDS_UUID=`do_facet $SINGLEMDS $LCTL dl | grep -m1 " mdt " | awk '{print $((NF-1))}'`
+       MDS_QUOTA_USED=`$LFS quota -o $MDS_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`
+       echo $MDS_QUOTA_USED
+       [ $MDS_QUOTA_USED -ne 0 ] && \
+           ($SHOW_QUOTA_USER; error "quota deleted isn't released")
+       $SHOW_QUOTA_USER
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 
-       echo "  Group quota (limit: $LIMIT files)"
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR           # clear user limit
-       $LFS setquota -g $TSTUSR 0 0 0 $LIMIT $DIR
+       # test for group
+       log "--------------------------------------"
+       log "  Group quota (limit: $LIMIT FILE)"
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I $LIMIT $DIR
+       sleep 3
        $SHOW_QUOTA_GROUP
        TESTFILE=$DIR/$tdir/$tfile-1
 
-       echo "    Create $LIMIT files ..."
+       log "    Create $LIMIT files ..."
        $RUNAS createmany -m ${TESTFILE} $LIMIT || \
-               error "(grp) create failure, but expect success"
+               error "(usr) create failure, but expect success"
+       log "    Done"
+       log "    Create out of file quota ..."
+       $RUNAS touch ${TESTFILE}_xxx && \
+               error "(usr) touch success, but expect EDQUOT"
 
-       echo "    Done"
-       echo "    Create out of file quota ..."
-        $RUNAS touch ${TESTFILE}_xxx && \
-                error "(grp) touch success, but expect EDQUOT"
+       unlinkmany ${TESTFILE} $LIMIT
+       rm -f ${TESTFILE}_xxx
+       sync; sleep 1; sync;
 
-       $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(grp) touch success, but expect EDQUOT"
+       MDS_UUID=`do_facet $SINGLEMDS $LCTL dl | grep -m1 " mdt " | awk '{print $((NF-1))}'`
+       MDS_QUOTA_USED=`$LFS quota -o $MDS_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`
+       echo $MDS_QUOTA_USED
+       [ $MDS_QUOTA_USED -ne 0 ] && \
+           ($SHOW_QUOTA_USER; error "quota deleted isn't released")
+       $SHOW_QUOTA_GROUP
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+}
 
-       # cleanup
-       unlinkmany ${TESTFILE} $LIMIT
-       rm ${TESTFILE}_xxx
+# file hard limit (normal use and out of quota)
+test_2() {
+       for i in `seq 1 $cycle`; do
+           if [ $i -eq 1 ]; then
+               ino_qunit=52
+               ino_qtune=41
+               i_limit=11
+           else
+               # define ino_qunit is between 10 and 100
+               ino_qunit=$(( $RANDOM % 90 + 10 ))
+               ino_qtune=$(( $RANDOM % $ino_qunit ))
+               # RANDOM's maxium is 32767
+               i_limit=$(( $RANDOM % 990 + 10 ))
+           fi
 
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+           set_file_tunesz $ino_qtune
+           set_file_unitsz $ino_qunit
+           echo "cycle: $i(total $cycle) iunit:$ino_qunit, itune:$ino_qtune, ilimit:$i_limit"
+           test_2_sub $i_limit
+           echo "=================================================="
+           set_file_unitsz 5120
+           set_file_tunesz 2560
+       done
 }
-run_test 2 "File hard limit (normal use and out of quota) ==="
+run_test_with_stat 2 "File hard limit (normal use and out of quota) ==="
 
 test_block_soft() {
        TESTFILE=$1
        TIMER=$(($2 * 3 / 2))
        OFFSET=0
 
+       wait_delete_completed
+
        echo "    Write to exceed soft limit"
        RUNDD="$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ"
        $RUNDD count=$((BUNIT_SZ+1)) || \
-               error "write failure, but expect success"
+               error "write failure, but expect success"
        OFFSET=$((OFFSET + BUNIT_SZ + 1))
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
 
        $SHOW_QUOTA_USER
        $SHOW_QUOTA_GROUP
@@ -285,38 +376,38 @@ test_block_soft() {
 
        echo "    Write before timer goes off"
        $RUNDD count=$BUNIT_SZ seek=$OFFSET || \
-               error "write failure, but expect success"
+               error "write failure, but expect success"
        OFFSET=$((OFFSET + BUNIT_SZ))
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
        echo "    Done"
-       
-        echo "    Sleep $TIMER seconds ..."
-        sleep $TIMER
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-        $SHOW_QUOTA_INFO
+       echo "    Sleep $TIMER seconds ..."
+       sleep $TIMER
+
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_INFO
 
        echo "    Write after timer goes off"
        # maybe cache write, ignore.
-       sync; sleep 1; sync;
        $RUNDD count=$BUNIT_SZ seek=$OFFSET || true
        OFFSET=$((OFFSET + BUNIT_SZ))
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
        $RUNDD count=$BUNIT_SZ seek=$OFFSET && \
-               error "write success, but expect EDQUOT"
+               error "write success, but expect EDQUOT"
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-        $SHOW_QUOTA_INFO
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_INFO
 
        echo "    Unlink file to stop timer"
        rm -f $TESTFILE
+       sync; sleep 1; sync
        echo "    Done"
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-        $SHOW_QUOTA_INFO
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_INFO
 
        echo "    Write ..."
        $RUNDD count=$BUNIT_SZ || error "write failure, but expect success"
@@ -324,6 +415,7 @@ test_block_soft() {
 
        # cleanup
        rm -f $TESTFILE
+       sync; sleep 3; sync;
 }
 
 # block soft limit (start timer, timer goes off, stop timer)
@@ -331,7 +423,8 @@ test_3() {
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
 
-       LIMIT=$(( $BUNIT_SZ * 2 )) # 1 bunit on mds and 1 bunit on the ost
+       # 1 bunit on mds and 1 bunit on every ost
+       LIMIT=$(( $BUNIT_SZ * ($OSTCOUNT + 1) ))
        GRACE=10
 
        echo "  User quota (soft limit: $LIMIT kbytes  grace: $GRACE seconds)"
@@ -340,11 +433,11 @@ test_3() {
        $LFS setstripe $TESTFILE -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
 
-       $LFS setquota -t -u $GRACE $MAX_IQ_TIME $DIR
-       $LFS setquota -u $TSTUSR $LIMIT 0 0 0 $DIR
+       $LFS setquota -t -u --block-grace $GRACE --inode-grace $MAX_IQ_TIME $DIR
+       $LFS setquota -u $TSTUSR -b $LIMIT -B 0 -i 0 -I 0 $DIR
 
        test_block_soft $TESTFILE $GRACE
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 
        echo "  Group quota (soft limit: $LIMIT kbytes  grace: $GRACE seconds)"
        TESTFILE=$DIR/$tdir/$tfile-1
@@ -352,19 +445,21 @@ test_3() {
        $LFS setstripe $TESTFILE -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
 
-       $LFS setquota -t -g $GRACE $MAX_IQ_TIME $DIR
-       $LFS setquota -g $TSTUSR $LIMIT 0 0 0 $DIR
+       $LFS setquota -t -g --block-grace $GRACE --inode-grace $MAX_IQ_TIME $DIR
+       $LFS setquota -g $TSTUSR -b $LIMIT -B 0 -i 0 -I 0 $DIR
 
        test_block_soft $TESTFILE $GRACE
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 3 "Block soft limit (start timer, timer goes off, stop timer) ==="
+run_test_with_stat 3 "Block soft limit (start timer, timer goes off, stop timer) ==="
 
 test_file_soft() {
        TESTFILE=$1
        LIMIT=$2
        TIMER=$(($3 * 3 / 2))
 
+       wait_delete_completed
+
        echo "    Create files to exceed soft limit"
        $RUNAS createmany -m ${TESTFILE}_ $((LIMIT + 1)) || \
                error "create failure, but expect success"
@@ -379,23 +474,24 @@ test_file_soft() {
 
        echo "    Sleep $TIMER seconds ..."
        sleep $TIMER
-       
+
        $SHOW_QUOTA_USER
        $SHOW_QUOTA_GROUP
        $SHOW_QUOTA_INFO
-       
+
        echo "    Create file after timer goes off"
-       $RUNAS createmany -m ${TESTFILE}_after_ $((IUNIT_SZ - 2)) || \
-               error "create ${TESTFILE}_after failure, but expect success"
+       # the least of inode qunit is 2, so there are at most 3(qunit:2+qtune:1)
+       # inode quota left here
+       $RUNAS touch ${TESTFILE}_after ${TESTFILE}_after1 ${TESTFILE}_after2 || true
        sync; sleep 1; sync
-       $RUNAS touch ${TESTFILE}_after && \
+       $RUNAS touch ${TESTFILE}_after3 && \
                error "create after timer expired, but expect EDQUOT"
        sync; sleep 1; sync
 
        $SHOW_QUOTA_USER
        $SHOW_QUOTA_GROUP
        $SHOW_QUOTA_INFO
-       
+
        echo "    Unlink files to stop timer"
        find `dirname $TESTFILE` -name "`basename ${TESTFILE}`*" | xargs rm -f
        echo "    Done"
@@ -408,6 +504,7 @@ test_file_soft() {
 
        # cleanup
        rm -f ${TESTFILE}_xxx
+       sync; sleep 3; sync;
 }
 
 # file soft limit (start timer, timer goes off, stop timer)
@@ -420,66 +517,70 @@ test_4a() {       # was test_4
        GRACE=5
 
        echo "  User quota (soft limit: $LIMIT files  grace: $GRACE seconds)"
-       $LFS setquota -t -u $MAX_DQ_TIME $GRACE $DIR
-       $LFS setquota -u $TSTUSR 0 0 $LIMIT 0 $DIR
+       $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $GRACE $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i $LIMIT -I 0 $DIR
        $SHOW_QUOTA_USER
 
        test_file_soft $TESTFILE $LIMIT $GRACE
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 
        echo "  Group quota (soft limit: $LIMIT files  grace: $GRACE seconds)"
-       $LFS setquota -t -g $MAX_DQ_TIME $GRACE $DIR
-       $LFS setquota -g $TSTUSR 0 0 $LIMIT 0 $DIR
+       $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $GRACE $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i $LIMIT -I 0 $DIR
        $SHOW_QUOTA_GROUP
        TESTFILE=$DIR/$tdir/$tfile-1
 
        test_file_soft $TESTFILE $LIMIT $GRACE
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 
        # cleanup
-       $LFS setquota -t -u $MAX_DQ_TIME $MAX_IQ_TIME $DIR
-       $LFS setquota -t -g $MAX_DQ_TIME $MAX_IQ_TIME $DIR
+       $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR
+       $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR
 }
-run_test 4a "File soft limit (start timer, timer goes off, stop timer) ==="
+run_test_with_stat 4a "File soft limit (start timer, timer goes off, stop timer) ==="
 
 test_4b() {    # was test_4a
-        GR_STR1="1w3d"
-        GR_STR2="1000s"
-        GR_STR3="5s"
-        GR_STR4="1w2d3h4m5s"
-        GR_STR5="5c"
-        GR_STR6="1111111111111111"
-
-        # test of valid grace strings handling
-        echo "  Valid grace strings test"
-        $LFS setquota -t -u $GR_STR1 $GR_STR2 $DIR
-        $LFS quota -u -t $DIR | grep "Block grace time: $GR_STR1"
-        $LFS setquota -t -g $GR_STR3 $GR_STR4 $DIR
-        $LFS quota -g -t $DIR | grep "Inode grace time: $GR_STR4"
-
-        # test of invalid grace strings handling
-        echo "  Invalid grace strings test"
-        ! $LFS setquota -t -u $GR_STR4 $GR_STR5 $DIR
-        ! $LFS setquota -t -g $GR_STR4 $GR_STR6 $DIR
-
-        # cleanup
-        $LFS setquota -t -u $MAX_DQ_TIME $MAX_IQ_TIME $DIR
-        $LFS setquota -t -g $MAX_DQ_TIME $MAX_IQ_TIME $DIR
+       GR_STR1="1w3d"
+       GR_STR2="1000s"
+       GR_STR3="5s"
+       GR_STR4="1w2d3h4m5s"
+       GR_STR5="5c"
+       GR_STR6="1111111111111111"
+
+       wait_delete_completed
+
+       # test of valid grace strings handling
+       echo "  Valid grace strings test"
+       $LFS setquota -t -u --block-grace $GR_STR1 --inode-grace $GR_STR2 $DIR
+       $LFS quota -u -t $DIR | grep "Block grace time: $GR_STR1"
+       $LFS setquota -t -g --block-grace $GR_STR3 --inode-grace $GR_STR4 $DIR
+       $LFS quota -g -t $DIR | grep "Inode grace time: $GR_STR4"
+
+       # test of invalid grace strings handling
+       echo "  Invalid grace strings test"
+       ! $LFS setquota -t -u --block-grace $GR_STR4 --inode-grace $GR_STR5 $DIR
+       ! $LFS setquota -t -g --block-grace $GR_STR4 --inode-grace $GR_STR6 $DIR
+
+       # cleanup
+       $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR
+       $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR
 }
-run_test 4b "Grace time strings handling ==="
+run_test_with_stat 4b "Grace time strings handling ==="
 
 # chown & chgrp (chown & chgrp successfully even out of block/file quota)
 test_5() {
        mkdir -p $DIR/$tdir
        BLIMIT=$(( $BUNIT_SZ * $((OSTCOUNT + 1)) * 10)) # 10 bunits on each server
        ILIMIT=$(( $IUNIT_SZ * 10 )) # 10 iunits on mds
-       
+
+       wait_delete_completed
+
        echo "  Set quota limit (0 $BLIMIT 0 $ILIMIT) for $TSTUSR.$TSTUSR"
-       $LFS setquota -u $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR
-       $LFS setquota -g $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B $BLIMIT -i 0 -I $ILIMIT $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B $BLIMIT -i 0 -I $ILIMIT $DIR
        $SHOW_QUOTA_USER
        $SHOW_QUOTA_GROUP
-       
+
        echo "  Create more than $ILIMIT files and more than $BLIMIT kbytes ..."
        createmany -m $DIR/$tdir/$tfile-0_ $((ILIMIT + 1)) || \
                error "touch failure, expect success"
@@ -493,11 +594,12 @@ test_5() {
 
        # cleanup
        unlinkmany $DIR/$tdir/$tfile-0_ $((ILIMIT + 1))
+       sync; sleep 3; sync;
 
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 5 "Chown & chgrp successfully even out of block/file quota ==="
+run_test_with_stat 5 "Chown & chgrp successfully even out of block/file quota ==="
 
 # block quota acquire & release
 test_6() {
@@ -506,16 +608,18 @@ test_6() {
                return 0;
        fi
 
+       wait_delete_completed
+
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
 
        LIMIT=$((BUNIT_SZ * (OSTCOUNT + 1) * 5)) # 5 bunits per server
        FILEA="$DIR/$tdir/$tfile-0_a"
        FILEB="$DIR/$tdir/$tfile-0_b"
-       
+
        echo "  Set block limit $LIMIT kbytes to $TSTUSR.$TSTUSR"
-       $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR
-       $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
        $SHOW_QUOTA_USER
        $SHOW_QUOTA_GROUP
 
@@ -526,42 +630,44 @@ test_6() {
        chown $TSTUSR.$TSTUSR $FILEB
 
        echo "  Exceed quota limit ..."
-        RUNDD="$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ"
-        $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \
-                error "write fileb failure, but expect success"
+       RUNDD="$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ"
+       $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \
+               error "write fileb failure, but expect success"
 
-       sync; sleep 1; sync;
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-        $RUNDD seek=$LIMIT count=$((BUNIT_SZ * OSTCOUNT)) && \
-                error "write fileb success, but expect EDQUOT"
-       sync; sleep 1; sync;
+       cancel_lru_locks osc
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+       $RUNDD seek=$LIMIT count=$((BUNIT_SZ * OSTCOUNT)) && \
+               error "write fileb success, but expect EDQUOT"
+       cancel_lru_locks osc
        echo "  Write to OST0 return EDQUOT"
        # this write maybe cache write, ignore it's failure
-        RUNDD="$RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ"
-        $RUNDD count=$(($BUNIT_SZ * 2)) || true
-       sync; sleep 1; sync;
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-        $RUNDD count=$((BUNIT_SZ * 2)) seek=$((BUNIT_SZ *2)) && \
-                error "write filea success, but expect EDQUOT"
+       RUNDD="$RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ"
+       $RUNDD count=$(($BUNIT_SZ * 2)) || true
+       cancel_lru_locks osc
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+       $RUNDD count=$((BUNIT_SZ * 2)) seek=$((BUNIT_SZ *2)) && \
+               error "write filea success, but expect EDQUOT"
 
        echo "  Remove fileb to let OST1 release quota"
        rm -f $FILEB
-        sync; sleep 10; sync; # need to allow journal commit for small fs
+       sync; sleep 10; sync; # need to allow journal commit for small fs
 
        echo "  Write to OST0"
        $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \
-               error "write filea failure, expect success"
+               error "write filea failure, expect success"
        echo "  Done"
 
        # cleanup
        rm -f $FILEA
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
-       $LFS setquota -g $TSTUSR 0 0 0 0 $DIR
+       sync; sleep 3; sync;
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
        return 0
 }
-run_test 6 "Block quota acquire & release ========="
+run_test_with_stat 6 "Block quota acquire & release ========="
 
 # quota recovery (block quota only by now)
 test_7()
@@ -569,23 +675,25 @@ test_7()
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
 
-       LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever
+       wait_delete_completed
+
+       LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) ))
        TESTFILE="$DIR/$tdir/$tfile-0"
-       
-       $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR
-       
+
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+
        $LFS setstripe $TESTFILE -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
 
        echo "  Write to OST0..."
        $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ || \
                error "write failure, but expect success"
-       
-       #define OBD_FAIL_OBD_DQACQ               0x604
-       echo 0x604 > /proc/sys/lustre/fail_loc
+
+       #define OBD_FAIL_OBD_DQACQ             0x604
+       lustre_fail mds  0x604
        echo "  Remove files on OST0"
        rm -f $TESTFILE
-       echo 0 > /proc/sys/lustre/fail_loc
+       lustre_fail mds  0
 
        echo "  Trigger recovery..."
        OSC0_UUID="`$LCTL dl | awk '$3 ~ /osc/ { print $1 }'`"
@@ -598,20 +706,20 @@ test_7()
 
        # check limits
        PATTERN="`echo $DIR | sed 's/\//\\\\\//g'`"
-       TOTAL_LIMIT="`$LFS quota -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`"
+       TOTAL_LIMIT="`$LFS quota -v -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`"
        [ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!"
        echo "  total limits = $TOTAL_LIMIT"
-       
-        OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'`
-        [ -z "$OST0_UUID" ] && OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'`
-       OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`"
+
+       OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'`
+       [ -z "$OST0_UUID" ] && OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'`
+       OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $2 }'`"
        [ $OST0_LIMIT -eq $BUNIT_SZ ] || error "high limits not released!"
        echo "  limits on $OST0_UUID = $OST0_LIMIT"
 
        # cleanup
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 7 "Quota recovery (only block limit) ======"
+run_test_with_stat 7 "Quota recovery (only block limit) ======"
 
 # run dbench with quota enabled
 test_8() {
@@ -622,9 +730,9 @@ test_8() {
        wait_delete_completed
 
        echo "  Set enough high limit for user: $TSTUSR"
-       $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
        echo "  Set enough high limit for group: $TSTUSR"
-       $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
+       $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
 
        chmod 0777 $DIR/$tdir
        local duration=""
@@ -633,162 +741,140 @@ test_8() {
 
        sync; sleep 3; sync;
 
-       return 0 
+       return 0
 }
-run_test 8 "Run dbench with quota enabled ==========="
+run_test_with_stat 8 "Run dbench with quota enabled ==========="
 
 # run for fixing bug10707, it needs a big room. test for 64bit
 KB=1024
 GB=$((KB * 1024 * 1024))
-FSIZE=$((OSTCOUNT * 9 / 2))
 # Use this as dd bs to decrease time
 # inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS);
 blksize=$((1 << 21)) # 2Mb
+size_file=$((GB * 9 / 2))
+# this check is just for test9 and test10
+OST0_MIN=4900000 #4.67G
+check_whether_skip () {
+    OST0_SIZE=`$LFS df $DIR | awk '/\[OST:0\]/ {print $4}'`
+    log "OST0_SIZE: $OST0_SIZE  required: $OST0_MIN"
+    if [ $OST0_SIZE -lt $OST0_MIN ]; then
+       echo "WARN: OST0 has less than $OST0_MIN free, skip this test."
+       return 0
+    else
+       return 1
+    fi
+}
 
 test_9() {
-       chmod 0777 $DIR/$tdir
-        lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1`
-        size_file=$((FSIZE * GB))
-        echo "lustrefs_size:$lustrefs_size  size_file:$((size_file / KB))"
-        if [ $((lustrefs_size * KB)) -lt $size_file ]; then
-               skip "less than $size_file bytes free"
-               return 0;
-        fi
+       check_whether_skip && return 0
 
-        set_blk_unitsz $((1024 * 100))
-        set_blk_tunesz $((1024 * 50))
-
-        # set the D_QUOTA flag
-       debugsave
-       sysctl -w lnet.debug="+quota"
-
-        TESTFILE="$DIR/$tdir/$tfile-0"
+       wait_delete_completed
 
-        BLK_LIMIT=$((100 * KB * KB)) # 100G
-        FILE_LIMIT=1000000
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
 
-        echo "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR"
-        $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
-        echo "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR"
-        $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
+       mkdir -p $DIR/$tdir
+       chmod 0777 $DIR/$tdir
+       TESTFILE="$DIR/$tdir/$tfile-0"
 
-        echo "  Set stripe"
-        [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT
-        touch $TESTFILE
-        chown $TSTUSR.$TSTUSR $TESTFILE
+       BLK_LIMIT=$((100 * KB * KB)) # 100G
+       FILE_LIMIT=1000000
+       echo "  Set block limit $BLK_LIMIT kbytes to $TSTUSR.$TSTUSR"
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR"
+       $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR"
+       $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
 
-        echo "    Write the big file of $FSIZE G ..."
-        $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((size_file / blksize)) || \
-               error "(usr) write $FSIZE G file failure, but expect success"
+       echo "  Set stripe"
+       $LFS setstripe $TESTFILE -c 1
+       touch $TESTFILE
+       chown $TSTUSR.$TSTUSR $TESTFILE
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
-        echo "    delete the big file of $FSIZE G..." 
-        $RUNAS rm -f $TESTFILE
+       log "    Write the big file of 4.5G ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((size_file / blksize)) || \
+              error "(usr) write 4.5G file failure, but expect success"
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
-        echo "    write the big file of 2 G..."
-        $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((2 * GB / blksize)) || \
-               error "(usr) write 2 G file failure, but expect seccess"
+       log "    delete the big file of 4.5G..."
+       $RUNAS rm -f $TESTFILE
+       sync; sleep 3; sync;
 
-        echo "    delete the big file of 2 G..."
-        $RUNAS rm -f $TESTFILE 
-        RC=$?
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
-        set_blk_tunesz $BTUNE_SZ
-        set_blk_unitsz $BUNIT_SZ
+       RC=$?
 
-       debugrestore
-       wait_delete_completed
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
 
-        return $RC
+       return $RC
 }
-run_test 9 "run for fixing bug10707(64bit) ==========="
+run_test_with_stat 9 "run for fixing bug10707(64bit) ==========="
 
 # run for fixing bug10707, it need a big room. test for 32bit
+# 2.0 version does not support 32 bit qd_count, so such test is obsolete.
 test_10() {
        mkdir -p $DIR/$tdir
        chmod 0777 $DIR/$tdir
-       lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1`
-       size_file=$((FSIZE * GB))
-       echo "lustrefs_size:$lustrefs_size  size_file:$((size_file / KB))"
-       if [ $((lustrefs_size * KB)) -lt $size_file ]; then
-               skip "less than $size_file bytes free"
-               return 0;
-       fi
+       check_whether_skip && return 0
 
-       sync; sleep 10; sync;
+       wait_delete_completed
 
-       set_blk_unitsz $((1024 * 100))
-       set_blk_tunesz $((1024 * 50))
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
 
-       # set the D_QUOTA flag
-       debugsave
-       sysctl -w lnet.debug="+quota"
-       
        # make qd_count 32 bit
-       sysctl -w lustre.fail_loc=0xA00
+       lustre_fail mds_ost 0xA00
 
        TESTFILE="$DIR/$tdir/$tfile-0"
 
        BLK_LIMIT=$((100 * KB * KB)) # 100G
        FILE_LIMIT=1000000
 
-       echo "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR"
-       $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
-       echo "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR"
-       $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR
-       
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR"
+       $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR"
+       $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR
+
        echo "  Set stripe"
-       [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT
+       $LFS setstripe $TESTFILE -c 1
        touch $TESTFILE
        chown $TSTUSR.$TSTUSR $TESTFILE
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
-
-        echo "    Write the big file of $FSIZE G ..."
-        $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((size_file / blksize)) || \
-               error "(usr) write $FSIZE G file failure, but expect success"
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
-        echo "    delete the big file of $FSIZE G..."
-        $RUNAS rm -f $TESTFILE 
+       log "    Write the big file of 4.5 G ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((size_file / blksize)) || \
+               error "(usr) write 4.5 G file failure, but expect success"
 
-        $SHOW_QUOTA_USER
-        $SHOW_QUOTA_GROUP
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
-       echo "    write the big file of 2 G..."
-       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$blksize count=$((2 * GB / blkzise)) || \
-               error "(usr) write 2 G file failure, but expect success" 
+       log "    delete the big file of 4.5 G..."
+       $RUNAS rm -f $TESTFILE
+       sync; sleep 3; sync;
 
-       echo "    delete the big file of 2 G..."
-       $RUNAS rm -f $TESTFILE 
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
 
        RC=$?
 
-       # clear the flage
-       debugrestore
-
        # make qd_count 64 bit
-       sysctl -w lustre.fail_loc=0
+       lustre_fail mds_ost 0
 
-       set_blk_tunesz $BTUNE_SZ
-       set_blk_unitsz $BUNIT_SZ
-
-       wait_delete_completed
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
 
        return $RC
 }
-run_test 10 "run for fixing bug10707(32bit) ==========="
+#run_test_with_stat 10 "run for fixing bug10707(32bit) ==========="
 
 test_11() {
        wait_delete_completed
@@ -796,14 +882,14 @@ test_11() {
        #prepare the test
        block_limit=`(echo 0; df -t lustre -P | awk '{print $(NF - 4)}') | tail -n 1`
        echo $block_limit
-       orig_dbr=`cat /proc/sys/vm/dirty_background_ratio`
-       orig_dec=`cat /proc/sys/vm/dirty_expire_centisecs`
-       orig_dr=`cat /proc/sys/vm/dirty_ratio`
-       orig_dwc=`cat /proc/sys/vm/dirty_writeback_centisecs`
-       echo 1  > /proc/sys/vm/dirty_background_ratio
-       echo 30 > /proc/sys/vm/dirty_expire_centisecs
-       echo 1  > /proc/sys/vm/dirty_ratio
-       echo 50 > /proc/sys/vm/dirty_writeback_centisecs
+       orig_dbr=`sysctl -n vm.dirty_background_ratio`
+       orig_dec=`sysctl -n vm.dirty_expire_centisecs`
+       orig_dr=`sysctl -n vm.dirty_ratio`
+       orig_dwc=`sysctl -n vm.dirty_writeback_centisecs`
+       sysctl -w vm.dirty_background_ratio=1
+       sysctl -w vm.dirty_expire_centisecs=30
+       sysctl -w vm.dirty_ratio=1
+       sysctl -w vm.dirty_writeback_centisecs=50
        TESTDIR="$DIR/$tdir"
        local RV=0
 
@@ -819,7 +905,7 @@ test_11() {
           echo -n "    create a file for uid "
           for j in `seq 1 30`; do
               echo -n "$j "
-               # 30MB per dd for a total of 900MB (if space even permits)
+              # 30MB per dd for a total of 900MB (if space even permits)
               runas -u $j dd if=/dev/zero of=$TESTDIR/$tfile  bs=$blksize count=15 > /dev/null 2>&1 &
           done
           echo ""
@@ -838,7 +924,7 @@ test_11() {
                 RV=2
                 break
             fi
-             LAST_USED=$USED
+            LAST_USED=$USED
           done
           echo "    removing the test files..."
           rm -f $TESTDIR/$tfile
@@ -848,16 +934,16 @@ test_11() {
        echo "Test took $SECS sec"
 
        #clean
-       echo $orig_dbr > /proc/sys/vm/dirty_background_ratio
-       echo $orig_dec > /proc/sys/vm/dirty_expire_centisecs
-       echo $orig_dr  > /proc/sys/vm/dirty_ratio
-       echo $orig_dwc > /proc/sys/vm/dirty_writeback_centisecs
+       sysctl -w vm.dirty_background_ratio=$orig_dbr
+       sysctl -w vm.dirty_expire_centisecs=$orig_dec
+       sysctl -w vm.dirty_ratio=$orig_dr
+       sysctl -w vm.dirty_writeback_centisecs=$orig_dwc
        if [ $RV -ne 0 ]; then
-           error "Nothing was written for $SECS sec ... aborting"
+          error "Nothing was written for $SECS sec ... aborting"
        fi
        return $RV
 }
-run_test 11 "run for fixing bug10912 ==========="
+run_test_with_stat 11 "run for fixing bug10912 ==========="
 
 
 # test a deadlock between quota and journal b=11693
@@ -868,106 +954,115 @@ test_12() {
        [ "$(grep $DIR2 /proc/mounts)" ] || mount_client $DIR2 || \
                { skip "Need lustre mounted on $MOUNT2 " && retutn 0; }
 
+       if [ $OSTCOUNT -lt 2 ]; then
+               skip "$OSTCOUNT < 2, too few osts"
+               return 0;
+       fi
+
        LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever
        TESTFILE="$DIR/$tdir/$tfile-0"
        TESTFILE2="$DIR2/$tdir/$tfile-1"
-       
+
+       wait_delete_completed
+
        echo "   User quota (limit: $LIMIT kbytes)"
-       $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
 
-       $LFS setstripe $TESTFILE -i 0 -c 1 
+       $LFS setstripe $TESTFILE -i 0 -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
-       $LFS setstripe $TESTFILE2 -i 0 -c 1
-        chown $TSTUSR2.$TSTUSR2 $TESTFILE2
+       $LFS setstripe $TESTFILE2 -i 1 -c 1
+       chown $TSTUSR2.$TSTUSR2 $TESTFILE2
 
        #define OBD_FAIL_OST_HOLD_WRITE_RPC      0x21f
-       sysctl -w lustre.fail_loc=0x0000021f        
+       lustre_fail ost 0x0000021f
 
        echo "   step1: write out of block quota ..."
-       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) & 
-       DDPID=$!
-       sleep 5
-       $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 & 
+       $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 &
        DDPID1=$!
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) &
+       DDPID=$!
 
        echo  "   step2: testing ......"
        count=0
        while [ true ]; do
-           if [ -z `ps -ef | awk '$2 == '${DDPID1}' { print $8 }'` ]; then break; fi
+           if ! ps -p ${DDPID1} > /dev/null 2>&1; then break; fi
            count=$[count+1]
            if [ $count -gt 64 ]; then
-               sysctl -w lustre.fail_loc=0
+               lustre_fail ost 0
                error "dd should be finished!"
            fi
            sleep 1
-       done    
+       done
        echo "(dd_pid=$DDPID1, time=$count)successful"
 
        #Recover fail_loc and dd will finish soon
-       sysctl -w lustre.fail_loc=0
+       lustre_fail ost 0
 
        echo  "   step3: testing ......"
        count=0
        while [ true ]; do
-           if [ -z `ps -ef | awk '$2 == '${DDPID}' { print $8 }'` ]; then break; fi
+           if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi
            count=$[count+1]
-           if [ $count -gt 100 ]; then
+           if [ $count -gt 150 ]; then
                error "dd should be finished!"
            fi
            sleep 1
-       done    
+       done
        echo "(dd_pid=$DDPID, time=$count)successful"
 
        rm -f $TESTFILE $TESTFILE2
-       
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR           # clear user limit
+       sync; sleep 3; sync;
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 12 "test a deadlock between quota and journal ==="
+run_test_with_stat 12 "test a deadlock between quota and journal ==="
 
 # test multiple clients write block quota b=11693
 test_13() {
+       mkdir -p $DIR/$tdir
+       wait_delete_completed
+
        # one OST * 10 + (mds + other OSTs)
        LIMIT=$((BUNIT_SZ * 10 + (BUNIT_SZ * OSTCOUNT)))
        TESTFILE="$DIR/$tdir/$tfile"
-       mkdir -p $DIR/$tdir
 
        echo "   User quota (limit: $LIMIT kbytes)"
-       $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
        $SHOW_QUOTA_USER
-       
+
        $LFS setstripe $TESTFILE -i 0 -c 1
        chown $TSTUSR.$TSTUSR $TESTFILE
        $LFS setstripe $TESTFILE.2 -i 0 -c 1
-        chown $TSTUSR.$TSTUSR $TESTFILE.2
+       chown $TSTUSR.$TSTUSR $TESTFILE.2
 
        echo "   step1: write out of block quota ..."
        # one bunit will give mds
-       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & 
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] &
        DDPID=$!
-       $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & 
+       $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] &
        DDPID1=$!
 
        echo  "   step2: testing ......"
        count=0
        while [ true ]; do
-           if [ -z `ps -ef | awk '$2 == '${DDPID}' { print $8 }'` ]; then break; fi
+           if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi
            count=$[count+1]
            if [ $count -gt 64 ]; then
                error "dd should be finished!"
            fi
            sleep 1
-       done    
+       done
        echo "(dd_pid=$DDPID, time=$count)successful"
 
        count=0
        while [ true ]; do
-           if [ -z `ps -ef | awk '$2 == '${DDPID1}' { print $8 }'` ]; then break; fi
+           if ! ps -p ${DDPID1} > /dev/null 2>&1 ; then break; fi
            count=$[count+1]
            if [ $count -gt 64 ]; then
                error "dd should be finished!"
            fi
            sleep 1
-       done    
+       done
        echo "(dd_pid=$DDPID1, time=$count)successful"
 
        sync; sleep 5; sync;
@@ -980,13 +1075,14 @@ test_13() {
                error "files too small $fz + $fz2 < $((BUNIT_SZ * BLK_SZ * 10))"
 
        rm -f $TESTFILE $TESTFILE.2
-       
-       $LFS setquota -u $TSTUSR 0 0 0 0 $DIR           # clear user limit
+       sync; sleep 3; sync;
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
 }
-run_test 13 "test multiple clients write block quota ==="
+run_test_with_stat 13 "test multiple clients write block quota ==="
 
 check_if_quota_zero(){
-        line=`$LFS quota -$1 $2 $DIR | wc -l`
+       line=`$LFS quota -v -$1 $2 $DIR | wc -l`
        for i in `seq 3 $line`; do
            if [ $i -eq 3 ]; then
                field="3 4 6 7"
@@ -994,67 +1090,684 @@ check_if_quota_zero(){
                field="3 5"
            fi
            for j in $field; do
-               tmp=`$LFS quota -$1 $2 $DIR | sed -n ${i}p | 
-                     awk  '{print $'"$j"'}'`
-               [ -n "$tmp" ] && [ $tmp -ne 0 ] && $LFS quota -$1 $2 $DIR && \
+               tmp=`$LFS quota -v -$1 $2 $DIR | sed -n ${i}p |
+                    awk  '{print $'"$j"'}'`
+               [ -n "$tmp" ] && [ $tmp -ne 0 ] && $LFS quota -v -$1 $2 $DIR && \
                    error "quota on $2 isn't clean"
            done
        done
        echo "pass check_if_quota_zero"
 }
 
-pre_test_14 () {
-        # reboot the lustre
-        cd $T_PWD; sh llmountcleanup.sh || error "llmountcleanup failed"
-        sh llmount.sh
-        pre_test
-        run_test 0 "reboot lustre"
-}
-
-pre_test_14 
-
 test_14a() {   # was test_14 b=12223 -- setting quota on root
        TESTFILE="$DIR/$tdir/$tfile"
+
+       # reboot the lustre
+       sync; sleep 5; sync
+       cleanup_and_setup_lustre
+       test_0
+
        mkdir -p $DIR/$tdir
 
        # out of root's file and block quota
-        $LFS setquota -u root 10 10 10 10 $DIR
+       $LFS setquota -u root -b 10 -B 10 -i 10 -I 10 $DIR
        createmany -m ${TESTFILE} 20 || \
            error "unexpected: user(root) create files failly!"
        dd if=/dev/zero of=$TESTFILE bs=4k count=4096 || \
            error "unexpected: user(root) write files failly!"
        chmod 666 $TESTFILE
        $RUNAS dd if=/dev/zero of=${TESTFILE} seek=4096 bs=4k count=4096 && \
-           error "unexpected: user(quota_usr) write a file successfully!"      
+           error "unexpected: user(quota_usr) write a file successfully!"
 
        # trigger the llog
        chmod 777 $DIR
-       for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done 
-       for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done 
+       for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done
+       for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done
 
        # do the check
-       dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg" 
-       $LFS setquota -u root 0 0 0 0 $DIR
+       dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg"
+       $LFS setquota -u root -b 0 -B 0 -i 0 -I 0 $DIR
        #check_if_quota_zero u root
 
-       # clean 
+       # clean
        unlinkmany ${TESTFILE} 15
        rm -f $TESTFILE
+       sync; sleep 3; sync;
+}
+run_test_with_stat 14a "test setting quota on root ==="
+
+# save quota version (both administrative and operational quotas)
+quota_save_version() {
+       do_facet mgs "lctl conf_param ${FSNAME}-MDT*.mdd.quota_type=$1"
+       do_facet mgs "lctl conf_param ${FSNAME}-OST*.ost.quota_type=$1"
+       sleep 5
+}
+
+test_15(){
+       LIMIT=$((24 * 1024 * 1024 * 1024 * 1024)) # 24 TB
+       PATTERN="`echo $DIR | sed 's/\//\\\\\//g'`"
+
+       wait_delete_completed
+
+       # test for user
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       TOTAL_LIMIT="`$LFS quota -v -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`"
+       [ $TOTAL_LIMIT -eq $LIMIT ] || error "  (user)total limits = $TOTAL_LIMIT; limit = $LIMIT, failed!"
+       echo "  (user)total limits = $TOTAL_LIMIT; limit = $LIMIT, successful!"
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+
+       # test for group
+       $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       TOTAL_LIMIT="`$LFS quota -v -g $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`"
+       [ $TOTAL_LIMIT -eq $LIMIT ] || error "  (group)total limits = $TOTAL_LIMIT; limit = $LIMIT, failed!"
+       echo "  (group)total limits = $TOTAL_LIMIT; limit = $LIMIT, successful!"
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+       $LFS quotaoff -ug $DIR
+       do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_type=ug" | grep "error writing" && \
+                error "fail to set version for $SINGLEMDS"
+       for j in `seq $OSTCOUNT`; do
+               do_facet ost$j "lctl set_param lquota.${FSNAME}-OST*.quota_type=ug" | grep "error writing" && \
+                        error "fail to set version for ost$j"
+       done
+
+       echo "invalidating quota files"
+       $LFS quotainv -ug $DIR
+       $LFS quotainv -ugf $DIR
+       $LFS quotacheck -ug $DIR
+}
+run_test_with_stat 15 "set block quota more than 4T ==="
+
+# $1=u/g $2=with qunit adjust or not
+test_16_tub() {
+       LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 4))
+       TESTFILE="$DIR/$tdir/$tfile"
+       mkdir -p $DIR/$tdir
+
+       wait_delete_completed
+
+       echo "  User quota (limit: $LIMIT kbytes)"
+       if [ $1 == "u" ]; then
+           $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+           $SHOW_QUOTA_USER
+       else
+           $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+           $SHOW_QUOTA_GROUP
+       fi
+
+       $LFS setstripe $TESTFILE -c 1
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       echo "    Write ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((BUNIT_SZ * 4)) || \
+           error "(usr) write failure, but expect success"
+       echo "    Done"
+       echo "    Write out of block quota ..."
+       # this time maybe cache write,  ignore it's failure
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || true
+       # flush cache, ensure noquota flag is setted on client
+       cancel_lru_locks osc
+       if [ $2 -eq 1 ]; then
+           $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || \
+               error "(write failure, but expect success"
+       else
+           $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) && \
+               error "(write success, but expect EDQUOT"
+       fi
+
+       rm -f $TESTFILE
+       sync; sleep 3; sync;
+       $LFS setquota -$1 $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+}
+
+# test without adjusting qunit
+# 2.0 version does not support WITHOUT_CHANGE_QS, so such test is obsolete
+test_16 () {
+       set_blk_tunesz $((BUNIT_SZ * 2))
+       set_blk_unitsz $((BUNIT_SZ * 4))
+       for i in u g; do
+           for j in 0 1; do
+               # define OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS    0xA01
+               echo " grp/usr: $i, adjust qunit: $j"
+               echo "-------------------------------"
+               [ $j -eq 1 ] && lustre_fail mds_ost 0
+               [ $j -eq 0 ] && lustre_fail mds_ost 0xA01
+               test_16_tub $i $j
+           done
+       done
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+}
+#run_test_with_stat 16 "test without adjusting qunit"
+
+# run for fixing bug14526, failed returned quota reqs shouldn't ruin lustre.
+test_17() {
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       wait_delete_completed
+
+       #define OBD_FAIL_QUOTA_RET_QDATA | OBD_FAIL_ONCE
+       lustre_fail ost 0x80000A02
+
+       TESTFILE="$DIR/$tdir/$tfile-a"
+       TESTFILE2="$DIR/$tdir/$tfile-b"
+       mkdir -p $DIR/$tdir
+
+       BLK_LIMIT=$((100 * 1024)) # 100M
+
+       log "  Set enough high limit(block:$BLK_LIMIT) for user: $TSTUSR"
+       $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR
+       log "  Set enough high limit(block:$BLK_LIMIT) for group: $TSTUSR"
+       $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR
+
+       touch $TESTFILE
+       chown $TSTUSR.$TSTUSR $TESTFILE
+       touch $TESTFILE2
+       chown $TSTUSR.$TSTUSR $TESTFILE2
+
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+
+       log "    Write the test file1 ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE  bs=$BLK_SZ count=$(( 10 * 1024 )) \
+           || echo "write 10M file failure"
+
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+
+       log "    write the test file2 ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE2  bs=$BLK_SZ count=$(( 10 * 1024 )) \
+           || error "write 10M file failure"
+
+       $SHOW_QUOTA_USER
+       $SHOW_QUOTA_GROUP
+
+       rm -f $TESTFILE $TESTFILE2
+       RC=$?
+       sync; sleep 3; sync;
+
+       # make qd_count 64 bit
+       lustre_fail ost 0
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       return $RC
+}
+run_test_with_stat 17 "run for fixing bug14526 ==========="
+
+# test when mds takes a long time to handle a quota req so that
+# the ost has dropped it, the ost still could work well b=14840
+test_18() {
+       LIMIT=$((100 * 1024 * 1024)) # 100G
+       TESTFILE="$DIR/$tdir/$tfile"
+       mkdir -p $DIR/$tdir
+
+       wait_delete_completed
+
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       log "   User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT
+       $SHOW_QUOTA_USER
+
+       $LFS setstripe $TESTFILE -i 0 -c 1
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       #define OBD_FAIL_MDS_BLOCK_QUOTA_REQ      0x142
+       lustre_fail mds 0x142
+
+       log "   step1: write 100M block ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 100)) &
+       DDPID=$!
+
+       sleep 5
+       lustre_fail mds 0
+
+       echo  "   step2: testing ......"
+       count=0
+       timeout=$(lctl get_param -n timeout)
+       while [ true ]; do
+           if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi
+           count=$[count+1]
+           if [ $count -gt $((4 * $timeout)) ]; then
+               error "count=$count dd should be finished!"
+           fi
+           sleep 1
+       done
+       log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
+       if [ $count -lt $(($timeout - 10)) ]; then
+           error " should take longer!"
+       else
+           echo " successful"
+       fi
+
+       testfile_size=$(stat -c %s $TESTFILE)
+       [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \
+           error "verifying file failed!"
+       rm -f $TESTFILE
+       sync; sleep 3; sync;
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+}
+run_test_with_stat 18 "run for fixing bug14840 ==========="
+
+# test when mds drops a quota req, the ost still could work well b=14840
+test_18a() {
+       LIMIT=$((100 * 1024 * 1024)) # 100G
+       TESTFILE="$DIR/$tdir/$tfile-a"
+       mkdir -p $DIR/$tdir
+
+       wait_delete_completed
+
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       log "   User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT
+       $SHOW_QUOTA_USER
+
+       $LFS setstripe $TESTFILE -i 0 -c 1
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       #define OBD_FAIL_MDS_DROP_QUOTA_REQ | OBD_FAIL_ONCE   0x80000143
+       lustre_fail mds 0x80000143
+
+       log "   step1: write 100M block ..."
+       $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 100)) &
+       DDPID=$!
+
+       echo  "   step2: testing ......"
+       count=0
+       timeout=$(lctl get_param -n timeout)
+       while [ true ]; do
+           if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi
+           count=$[count+1]
+           if [ $count -gt $((6 * $timeout)) ]; then
+               lustre_fail mds 0
+               error "count=$count dd should be finished!"
+           fi
+           sleep 1
+       done
+       log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
+       if [ $count -lt $(($timeout - 10)) ]; then
+           lustre_fail mds 0
+           error " should take longer!"
+       else
+           echo " successful"
+       fi
+
+       lustre_fail mds 0
+
+       rm -f $TESTFILE
+       sync; sleep 3; sync;
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+}
+run_test_with_stat 18a "run for fixing bug14840 ==========="
+
+# test when mds do failover, the ost still could work well without trigger
+# watchdog b=14840
+test_18bc_sub() {
+       type=$1
+
+       LIMIT=$((110 * 1024 )) # 110M
+       TESTFILE="$DIR/$tdir/$tfile"
+       mkdir -p $DIR/$tdir
+
+       wait_delete_completed
+
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       log "   User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT
+       $SHOW_QUOTA_USER
+
+       $LFS setstripe $TESTFILE -i 0 -c 1
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       timeout=$(sysctl -n lustre.timeout)
+
+       if [ $type = "directio" ]; then
+           log "   write 100M block(directio) ..."
+           $RUNAS $DIRECTIO write $TESTFILE 0 100 $((BLK_SZ * 1024)) &
+       else
+           log "   write 100M block(normal) ..."
+           $RUNAS dd if=/dev/zero of=$TESTFILE bs=$((BLK_SZ * 1024)) count=100 &
+       fi
+
+       DDPID=$!
+       do_facet $SINGLEMDS "$LCTL conf_param ${FSNAME}-MDT*.mdd.quota_type=ug"
+
+       log "failing mds for $((2 * timeout)) seconds"
+       fail $SINGLEMDS $((2 * timeout))
+
+       # check if quotaon successful
+       $LFS quota -u $TSTUSR $MOUNT 2>&1 | grep -q "quotas are not enabled"
+       if [ $? -eq 0 ]; then
+           error "quotaon failed!"
+           rm -rf $TESTFILE
+           return
+       fi
+
+       count=0
+       while [ true ]; do
+           if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi
+           if [ $((++count % (2 * timeout) )) -eq 0 ]; then
+               log "it took $count second"
+           fi
+           sleep 1
+       done
+       log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
+       sync; sleep 1; sync
+
+       testfile_size=$(stat -c %s $TESTFILE)
+       [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \
+           error "verifying file failed!"
+       $SHOW_QUOTA_USER
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+       rm -rf $TESTFILE
+       sync; sleep 1; sync
+}
+
+# test when mds does failover, the ost still could work well
+# this test shouldn't trigger watchdog b=14840
+test_18b() {
+       test_18bc_sub normal
+       test_18bc_sub directio
+       # check if watchdog is triggered
+       MSG="test 18b: run for fixing bug14840"
+       do_facet ost1 "dmesg > $TMP/lustre-log-${TESTNAME}.log"
+       do_facet client cat > $TMP/lustre-log-${TESTNAME}.awk <<-EOF
+               /$MSG/ {
+                   start = 1;
+               }
+               /Watchdog triggered/ {
+                   if (start) {
+                       print \$0;
+                   }
+               }
+       EOF
+       watchdog=`do_facet ost1 awk -f $TMP/lustre-log-${TESTNAME}.awk $TMP/lustre-log-${TESTNAME}.log`
+       if [ -n "$watchdog" ]; then error "$watchdog"; fi
+}
+run_test_with_stat 18b "run for fixing bug14840(mds failover, no watchdog) ==========="
+
+# test when mds does failover, the ost still could work well
+# this test will prevent OST_DISCONNET from happening b=14840
+test_18c() {
+       # define OBD_FAIL_OST_DISCONNECT_NET 0x202(disable ost_disconnect for osts)
+       lustre_fail ost  0x202
+       test_18bc_sub normal
+       test_18bc_sub directio
+       lustre_fail ost  0
+}
+run_test_with_stat 18c "run for fixing bug14840(mds failover, OST_DISCONNECT is disabled) ==========="
+
+run_to_block_limit() {
+       local LIMIT=$((($OSTCOUNT + 1) * $BUNIT_SZ))
+       local TESTFILE=$1
+       wait_delete_completed
+
+       # set 1 Mb quota unit size
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       # bind file to a single OST
+       $LFS setstripe -c 1 $TESTFILE
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       echo "  User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT
+       $SHOW_QUOTA_USER
+       echo "  Updating quota limits"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT
+       $SHOW_QUOTA_USER
+
+       RUNDD="$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ"
+       $RUNDD count=$BUNIT_SZ || error "(usr) write failure, but expect success"
+       # for now page cache of TESTFILE may still be dirty,
+       # let's push it to the corresponding OST, this will also
+       # cache NOQUOTA on the client from OST's reply
+       cancel_lru_locks osc
+       $RUNDD seek=$BUNIT_SZ && error "(usr) write success, should be EDQUOT"
+}
+
+test_19() {
+       # 1 Mb bunit per each MDS/OSS
+       local TESTFILE="$DIR/$tdir/$tfile"
+       mkdir -p $DIR/$tdir
+
+       run_to_block_limit $TESTFILE
+       $SHOW_QUOTA_USER
+
+       # cleanup
+       rm -f $TESTFILE
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+
+}
+run_test_with_stat 19 "test if administrative limits updates do not zero operational limits (14790) ==="
+
+test_20()
+{
+       LSTR=(1t 2g 3m 4k) # limits strings
+       LVAL=($[1*1024*1024*1024] $[2*1024*1024] $[3*1024*1024] $[4*1024]) # limits values
+
+       $LFS setquota -u $TSTUSR --block-softlimit ${LSTR[0]} \
+                                $MOUNT || error "could not set quota limits"
+
+       $LFS setquota -u $TSTUSR --block-hardlimit ${LSTR[1]} \
+                                --inode-softlimit ${LSTR[2]} \
+                                --inode-hardlimit ${LSTR[3]} \
+                                $MOUNT || error "could not set quota limits"
+
+       ($LFS quota -v -u $TSTUSR $MOUNT  | \
+           grep -E '^ *'$MOUNT' *[0-9]+\** *'${LVAL[0]}' *'${LVAL[1]}' *[0-9]+\** *'${LVAL[2]}' *'${LVAL[3]}) \
+                || error "lfs quota output is unexpected"
+
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 \
+                                $MOUNT || error "could not reset quota limits"
+
+}
+run_test_with_stat 20 "test if setquota specifiers work properly (15754)"
+
+test_21_sub() {
+       local testfile=$1
+       local blk_number=$2
+       local seconds=$3
+
+       time=$(($(date +%s) + seconds))
+       while [ $(date +%s) -lt $time ]; do
+           $RUNAS dd if=/dev/zero of=$testfile  bs=$BLK_SZ count=$blk_number > /dev/null 2>&1
+           rm -f $testfile
+       done
 }
-run_test 14a "test setting quota on root ==="
+
+# run for fixing bug16053, setquota shouldn't fail when writing and
+# deleting are happening
+test_21() {
+       set_blk_tunesz 512
+       set_blk_unitsz 1024
+
+       wait_delete_completed
+
+       TESTFILE="$DIR/$tdir/$tfile"
+
+       BLK_LIMIT=$((10 * 1024 * 1024)) # 10G
+       FILE_LIMIT=1000000
+
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR"
+       $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $MOUNT
+       log "  Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR"
+       $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $MOUNT
+
+       # repeat writing on a 1M file
+       test_21_sub ${TESTFILE}_1 1024 30 &
+       DDPID1=$!
+       # repeat writing on a 128M file
+       test_21_sub ${TESTFILE}_2 $((1024 * 128)) 30 &
+       DDPID2=$!
+
+       time=$(($(date +%s) + 30))
+       i=1
+       while [ $(date +%s) -lt $time ]; do
+           log "  Set quota for $i times"
+           $LFS setquota -u $TSTUSR -b 0 -B $((BLK_LIMIT + 1024 * i)) -i 0 -I $((FILE_LIMIT + i)) $MOUNT
+           $LFS setquota -g $TSTUSR -b 0 -B $((BLK_LIMIT + 1024 * i)) -i 0 -I $((FILE_LIMIT + i)) $MOUNT
+           i=$((i+1))
+           sleep 1
+       done
+
+       count=0
+       while [ true ]; do
+           if ! ps -p ${DDPID1} > /dev/null 2>&1; then break; fi
+           count=$[count+1]
+           if [ $count -gt 60 ]; then
+               error "dd should be finished!"
+           fi
+           sleep 1
+       done
+       echo "(dd_pid=$DDPID1, time=$count)successful"
+
+       count=0
+       while [ true ]; do
+           if ! ps -p ${DDPID2} > /dev/null 2>&1; then break; fi
+           count=$[count+1]
+           if [ $count -gt 60 ]; then
+               error "dd should be finished!"
+           fi
+           sleep 1
+       done
+       echo "(dd_pid=$DDPID2, time=$count)successful"
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+       $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       return $RC
+}
+run_test_with_stat 21 "run for fixing bug16053 ==========="
+
+test_22() {
+       local SAVEREFORMAT
+
+       SAVEREFORMAT=$REFORMAT
+       $LFS quotaoff -ug $DIR || error "could not turn quotas off"
+
+       quota_save_version "ug"
+
+       REFORMAT="reformat"
+       stopall
+       mount
+       setupall
+       REFORMAT=$SAVEREFORMAT
+
+       echo "checking parameters"
+
+       do_facet $SINGLEMDS "lctl get_param mdd.${FSNAME}-MDT*.quota_type" | grep "ug" || error "admin failure"
+       do_facet ost1 "lctl get_param obdfilter.*.quota_type" | grep "ug" || error "op failure"
+
+       run_test 0 "reboot lustre"
+}
+run_test_with_stat 22 "test if quota_type saved as permanent parameter ===="
+
+test_23_sub() {
+       mkdir -p $DIR/$tdir
+       chmod 0777 $DIR/$tdir
+       TESTFILE="$DIR/$tdir/$tfile-0"
+       rm -f $TESTFILE
+       local bs_unit=$((1024*1024))
+       LIMIT=$1
+
+       wait_delete_completed
+
+       # test for user
+       log "  User quota (limit: $LIMIT kbytes)"
+       $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR
+       sleep 3
+       $SHOW_QUOTA_USER
+
+       $LFS setstripe $TESTFILE -c 1
+       chown $TSTUSR.$TSTUSR $TESTFILE
+
+       log "    Step1: trigger quota with 0_DIRECT"
+       log "      Write half of file"
+       $RUNAS $DIRECTIO write $TESTFILE 0 $(($LIMIT/1024/2)) $bs_unit || error "(1) write failure, but expect success: $LIMIT"
+       log "      Write out of block quota ..."
+       $RUNAS $DIRECTIO write $TESTFILE $(($LIMIT/1024/2)) $(($LIMIT/1024/2)) $bs_unit && error "(2) write success, but expect EDQUOT: $LIMIT"
+       log "    Step1: done"
+
+       log "    Step2: rewrite should succeed"
+       $RUNAS $DIRECTIO write $TESTFILE $(($LIMIT/1024/2)) 1 $bs_unit || error "(3) write failure, but expect success: $LIMIT"
+       log "    Step2: done"
+
+       rm -f $TESTFILE
+       wait_delete_completed
+       OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'`
+       OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`
+       echo $OST0_QUOTA_USED
+       [ $OST0_QUOTA_USED -ne 0 ] && \
+           ($SHOW_QUOTA_USER; error "quota deleted isn't released")
+       $SHOW_QUOTA_USER
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR
+}
+
+test_23() {
+       log "run for $((OSTCOUNT * 4))MB test file"
+       test_23_sub $((OSTCOUNT * 4 * 1024))
+
+       OST0_MIN=120000
+       check_whether_skip && return 0
+       log "run for $((OSTCOUNT * 40))MB test file"
+       test_23_sub $((OSTCOUNT * 40 * 1024))
+}
+run_test_with_stat 23 "run for fixing bug16125 ==========="
+
+test_24() {
+       local TESTFILE="$DIR/$tdir/$tfile"
+       mkdir -p $DIR/$tdir
+
+       run_to_block_limit $TESTFILE
+       $SHOW_QUOTA_USER | grep '*' || error "no matching *"
+
+       # cleanup
+       rm -f $TESTFILE
+       $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT
+
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
+       
+}
+run_test_with_stat 24 "test if lfs draws an asterix when limit is reached (16646) ==========="
 
 # turn off quota
 test_99()
 {
        $LFS quotaoff $DIR
+       lctl set_param debug="-quota"
+
        return 0
 }
-run_test 99 "Quota off ==============================="
+run_test_with_stat 99 "Quota off ==============================="
 
 
 log "cleanup: ======================================================"
 cd $ORIG_PWD
-post_test
 check_and_cleanup_lustre
 echo '=========================== finished ==============================='
 [ -f "$QUOTALOG" ] && cat $QUOTALOG && grep -q FAIL $QUOTALOG && exit 1 || true
index 0bfb2f9..b65f722 100644 (file)
@@ -64,10 +64,14 @@ fi
 
 MDT="`do_facet $SINGLEMDS "lctl get_param -N mdt.\*MDT\*/stats 2>/dev/null | cut -d"." -f2" || true`"
 if [ ! -z "$MDT" ]; then
-        do_facet $SINGLEMDS "mkdir -p $CONFDIR"
+       do_facet $SINGLEMDS "mkdir -p $CONFDIR"
        IDENTITY_FLUSH=mdt.$MDT.identity_flush
        MDSCAPA=mdt.$MDT.capa
        CAPA_TIMEOUT=mdt.$MDT.capa_timeout
+       MDSSECLEVEL=mdt.$MDT.sec_level
+       LOCALMDT=$MDT
+else
+       LOCALMDT=""
 fi
 
 # for CLIENT_TYPE
@@ -121,25 +125,41 @@ sec_setup
 
 # run as different user
 test_0() {
-        umask 0022
+       umask 0022
 
-        chmod 0755 $DIR || error "chmod (1)"
-       rm -rf $DIR/$tdir || error "rm (1)"
+       chmod 0755 $DIR || error "chmod (1)"
+       rm -rf $DIR/* || error "rm (1)"
        mkdir -p $DIR/$tdir || error "mkdir (1)"
-       chown $USER0 $DIR/$tdir || error "chown (1)"
+
+       if [ "$CLIENT_TYPE" = "remote" ]; then
+               [ -z "$MDT" ] && skip "do not support do_facet operations." && return
+               do_facet $SINGLEMDS "echo '* 0 normtown' > $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+               chown $USER0 $DIR/$tdir && error "chown (1)"
+               do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       else
+               chown $USER0 $DIR/$tdir || error "chown (2)"
+       fi
+
        $RUNAS -u $ID0 ls $DIR || error "ls (1)"
         rm -f $DIR/f0 || error "rm (2)"
        $RUNAS -u $ID0 touch $DIR/f0 && error "touch (1)"
        $RUNAS -u $ID0 touch $DIR/$tdir/f1 || error "touch (2)"
        $RUNAS -u $ID1 touch $DIR/$tdir/f2 && error "touch (3)"
        touch $DIR/$tdir/f3 || error "touch (4)"
-       chown root $DIR/$tdir || error "chown (2)"
+       chown root $DIR/$tdir || error "chown (3)"
        chgrp $USER0 $DIR/$tdir || error "chgrp (1)"
        chmod 0775 $DIR/$tdir || error "chmod (2)"
        $RUNAS -u $ID0 touch $DIR/$tdir/f4 || error "touch (5)"
        $RUNAS -u $ID1 touch $DIR/$tdir/f5 && error "touch (6)"
        touch $DIR/$tdir/f6 || error "touch (7)"
-       rm -rf $DIR/$tdir || error "rm (3)"
+       rm -rf $DIR/* || error "rm (3)"
+
+       if [ "$CLIENT_TYPE" = "remote" ]; then
+               do_facet $SINGLEMDS "rm -f $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       fi
 }
 run_test 0 "uid permission ============================="
 
@@ -147,11 +167,11 @@ run_test 0 "uid permission ============================="
 test_1() {
        [ $GSS_SUP = 0 ] && skip "without GSS support." && return
        [ -z "$MDT" ] && skip "do not support do_facet operations." && return
-       [ "$CLIENT_TYPE" = "remote" ] && \
-               skip "test_1 for local client only" && return
 
-       do_facet $SINGLEMDS "rm -f $PERM_CONF"
-       do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       if [ "$CLIENT_TYPE" = "remote" ]; then
+               do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       fi
 
        rm -rf $DIR/$tdir
        mkdir -p $DIR/$tdir
@@ -159,7 +179,7 @@ test_1() {
        chown $USER0 $DIR/$tdir || error "chown (1)"
        $RUNAS -u $ID1 -v $ID0 touch $DIR/$tdir/f0 && error "touch (2)"
        echo "enable uid $ID1 setuid"
-       do_facet $SINGLEMDS "echo '* $ID1 setuid' > $PERM_CONF"
+       do_facet $SINGLEMDS "echo '* $ID1 setuid' >> $PERM_CONF"
        do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
        $RUNAS -u $ID1 -v $ID0 touch $DIR/$tdir/f1 || error "touch (3)"
 
@@ -196,6 +216,10 @@ test_2 () {
        [ -z "$(which setfacl 2>/dev/null)" ] && \
                skip "could not find setfacl" && return
        [ "$UID" != 0 ] && skip "must run as root" && return
+       [ -z "$MDT" ] && skip "do not support do_facet operations." && return
+
+       do_facet $SINGLEMDS "echo '* 0 rmtacl,rmtown' > $PERM_CONF"
+       do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
 
        sec_login root root
        sec_login bin bin
@@ -206,17 +230,8 @@ test_2 () {
        umask 0022
        cd $DIR
 
-       if [ ! -z "$MDT" ]; then
-               do_facet $SINGLEMDS "echo '* 0 rmtacl' > $PERM_CONF"
-               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
-       fi
-
-        if lfs rgetfacl $DIR; then
-                echo "performing cp ..."
-                run_rmtacl_subtest cp || error "cp"
-        else
-                echo "server doesn't permit current user 'lfs r{s,g}etfacl', skip cp test."
-        fi
+        echo "performing cp ..."
+        run_rmtacl_subtest cp || error "cp"
        echo "performing getfacl-noacl..."
        run_rmtacl_subtest getfacl-noacl || error "getfacl-noacl"
        echo "performing misc..."
@@ -233,13 +248,11 @@ test_2 () {
        run_rmtacl_subtest inheritance || error "inheritance"
        rm -f make-tree
 
-       if [ ! -z "$MDT" ]; then
-               do_facet $SINGLEMDS "rm -f $PERM_CONF"
-               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
-       fi
-
        cd $SAVE_PWD
        umask $SAVE_UMASK
+
+       do_facet $SINGLEMDS "rm -f $PERM_CONF"
+       do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
 }
 run_test 2 "rmtacl ============================="
 
@@ -255,22 +268,31 @@ run_test 3 "rootsquash ============================="
 # as for remote client, the groups of the specified uid on MDT
 # will be obtained by upcall /sbin/l_getidentity and used.
 test_4() {
+       if [ "$CLIENT_TYPE" = "remote" ]; then
+               [ -z "$MDT" ] && skip "do not support do_facet operations." && return
+               do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       fi
+
        rm -rf $DIR/$tdir
         mkdir -p $DIR/$tdir
         chmod 0771 $DIR/$tdir
         chgrp $ID0 $DIR/$tdir
        $RUNAS -u $ID0 ls $DIR/$tdir || error "setgroups (1)"
-       if [ "$CLIENT_TYPE" != "remote" ]; then
+       if [ "$CLIENT_TYPE" = "local" ]; then
                if [ ! -z "$MDT" ]; then
                        do_facet $SINGLEMDS "echo '* $ID1 setgrp' > $PERM_CONF"
                        do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
-                       $RUNAS -u $ID1 -G1,2,$ID0 ls $DIR/$tdir || error "setgroups (2)"
-                       do_facet $SINGLEMDS "rm -f $PERM_CONF"
-                       do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+                       $RUNAS -u $ID1 -G1,2,$ID0 ls $DIR/$tdir || error "setgroups (2)"
                fi
        fi
        $RUNAS -u $ID1 -G1,2 ls $DIR/$tdir && error "setgroups (3)"
        rm -rf $DIR/$tdir
+
+       if [ ! -z "$MDT" ]; then
+               do_facet $SINGLEMDS "rm -f $PERM_CONF"
+               do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1"
+       fi
 }
 run_test 4 "set supplementary group ==============="
 
@@ -282,6 +304,39 @@ mds_capability_timeout() {
         return 0
 }
 
+mds_sec_level_switch() {
+        [ $# -lt 1 ] && echo "Miss mds sec level switch value" && return 1
+
+        case $1 in
+                0) echo "Disable capa for all clients";;
+                1) echo "Enable capa for remote client";;
+               3) echo "Enable capa for all clients";;
+                *) echo "Invalid mds sec level switch value" && return 2;;
+        esac
+
+       do_facet $SINGLEMDS "lctl set_param -n $MDSSECLEVEL=$1"
+        return 0
+}
+
+oss_sec_level_switch() {
+        [ $# -lt 1 ] && echo "Miss oss sec level switch value" && return 1
+
+        case $1 in
+                0) echo "Disable capa for all clients";;
+                1) echo "Enable capa for remote client";;
+               3) echo "Enable capa for all clients";;
+                *) echo "Invalid oss sec level switch value" && return 2;;
+        esac
+
+       for i in `seq $OSTCOUNT`; do
+               local j=`expr $i - 1`
+               local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats 2>/dev/null | cut -d"." -f2" || true`"
+                [ -z "$OST" ] && return 3
+               do_facet ost$i "lctl set_param -n obdfilter.$OST.sec_level=$1"
+       done
+        return 0
+}
+
 mds_capability_switch() {
         [ $# -lt 1 ] && echo "Miss mds capability switch value" && return 1
 
@@ -306,12 +361,25 @@ oss_capability_switch() {
 
        for i in `seq $OSTCOUNT`; do
                local j=`expr $i - 1`
-               local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats | cut -d"." -f2" || true`"
+               local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats 2>/dev/null | cut -d"." -f2" || true`"
+                [ -z "$OST" ] && return 3
                do_facet ost$i "lctl set_param -n obdfilter.$OST.capa=$1"
        done
         return 0
 }
 
+turn_mds_capa_on() {
+        mds_capability_switch 3 || return 1
+       mds_sec_level_switch 3  || return 2
+        return 0
+}
+
+turn_oss_capa_on() {
+        oss_capability_switch 1 || return 1
+       oss_sec_level_switch 3  || return 2
+        return 0
+}
+
 turn_capability_on() {
         local capa_timeout=${1:-"1800"}
 
@@ -320,13 +388,22 @@ turn_capability_on() {
         # is turned on on all MDS/OSS servers before
         # client mount.
 
-        umount $MOUNT || return 1
+       turn_mds_capa_on || return 1
+       turn_oss_capa_on || return 2
+        mds_capability_timeout $capa_timeout || return 3
+        remount_client $MOUNT || return 4
+        return 0
+}
 
-        mds_capability_switch 3 || return 2
-        oss_capability_switch 1 || return 3
-        mds_capability_timeout $capa_timeout || return 4
+turn_mds_capa_off() {
+       mds_sec_level_switch 0  || return 1
+        mds_capability_switch 0 || return 2
+        return 0
+}
 
-        mount_client $MOUNT || return 5
+turn_oss_capa_off() {
+       oss_sec_level_switch 0  || return 1
+        oss_capability_switch 0 || return 2
         return 0
 }
 
@@ -335,8 +412,8 @@ turn_capability_off() {
         # it in a live system. But, please turn off
         # capability of all OSS servers before MDS servers.
 
-        oss_capability_switch 0 || return 1
-        mds_capability_switch 0 || return 2
+       turn_oss_capa_off || return 1
+       turn_mds_capa_off || return 2
         return 0
 }
 
@@ -347,24 +424,29 @@ turn_capability_off() {
 test_5() {
         local file=$DIR/f5
 
+       [ $GSS_SUP = 0 ] && skip "without GSS support." && return
        [ -z "$MDT" ] && skip "do not support do_facet operations." && return
+       [ ! -z "$LOCALMDT" ] && skip "client should be separated from server." && return
+       rm -f $file
+
        turn_capability_off
        if [ $? != 0 ]; then
                error "turn_capability_off"
                return 1
        fi
-       rm -f $file
 
-        # Disable proc variable
-        mds_capability_switch 0
+        turn_oss_capa_on
        if [ $? != 0 ]; then
-               error "mds_capability_switch 0"
+               error "turn_oss_capa_on"
                return 2
        fi
-        oss_capability_switch 1
-       if [ $? != 0 ]; then
-               error "oss_capability_switch 1"
-               return 3
+
+       if [ "$CLIENT_TYPE" = "remote" ]; then
+               remount_client $MOUNT && return 3
+               turn_oss_capa_off
+               return 0
+       else
+               remount_client $MOUNT || return 4
        fi
 
         # proc variable disabled -- access to the objects in the filesystem
@@ -374,14 +456,15 @@ test_5() {
        $WTL $file 30
        if [ $? == 0 ]; then
                error "Write worked well even though secrets not supplied."
-               return 4
+               return 5
         fi
 
         turn_capability_on
        if [ $? != 0 ]; then
                error "turn_capability_on"
-               return 4
+               return 6
        fi
+
         sleep 5
 
         # proc variable enabled, secrets supplied -- write should work now
@@ -390,13 +473,13 @@ test_5() {
        $WTL $file 30
        if [ $? != 0 ]; then
                error "Write failed even though secrets supplied."
-               return 5
+               return 7
         fi
 
        turn_capability_off
        if [ $? != 0 ]; then
                error "turn_capability_off"
-               return 7
+               return 8
        fi
        rm -f $file
 }
@@ -409,12 +492,16 @@ run_test 5 "capa secrets ========================="
 test_6() {
         local file=$DIR/f6
 
+       [ $GSS_SUP = 0 ] && skip "without GSS support." && return
        [ -z "$MDT" ] && skip "do not support do_facet operations." && return
+       [ ! -z "$LOCALMDT" ] && skip "client should be separated from server." && return
+
        turn_capability_off
        if [ $? != 0 ]; then
                error "turn_capability_off"
                return 1
        fi
+
        rm -f $file
 
         turn_capability_on 30
@@ -422,6 +509,7 @@ test_6() {
                error "turn_capability_on 30"
                return 2
        fi
+
         # Token expiry
        $WTL $file 60
        if [ $? != 0 ]; then
@@ -435,14 +523,15 @@ test_6() {
                error "mds_capability_timeout 30"
                return 4
        fi
+
        $WTL $file 60 &
        local PID=$!
        sleep 5
 
         # To disable automatic renew, only need turn capa off on MDS.
-        mds_capability_switch 0
+       turn_mds_capa_off
        if [ $? != 0 ]; then
-               error "mds_capability_switch 0"
+               error "turn_mds_capa_off"
                return 5
        fi
 
index 0cff0fb..875513f 100644 (file)
@@ -3623,7 +3623,7 @@ test_80() { # bug 10718
         dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1M
         sync; sleep 1; sync
         BEFORE=`date +%s`
-        cancel_lru_locks OSC
+        cancel_lru_locks osc
         AFTER=`date +%s`
         DIFF=$((AFTER-BEFORE))
         if [ $DIFF -gt 1 ] ; then
@@ -5111,8 +5111,10 @@ test_123a() { # was test 123, statahead(bug 11401)
                SLOWOK=1
         fi
 
-        remount_client $MOUNT
         mkdir -p $DIR/$tdir
+        rm -rf $DIR/$tdir/*
+        cancel_lru_locks mdc
+        cancel_lru_locks osc
         error=0
         NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
         [ $NUMFREE -gt 100000 ] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000))
index b713dd7..f4b7a48 100644 (file)
@@ -179,6 +179,11 @@ init_test_env() {
 
 }
 
+case `uname -r` in
+2.4.*) EXT=".o"; USE_QUOTA=no; [ ! "$CLIENTONLY" ] && FSTYPE=ext3;;
+    *) EXT=".ko"; USE_QUOTA=yes;;
+esac
+
 load_module() {
     EXT=".ko"
     module=$1
@@ -227,10 +232,7 @@ load_modules() {
     load_module obdclass/obdclass
     load_module ptlrpc/ptlrpc
     load_module ptlrpc/gss/ptlrpc_gss
-    # Now, some modules depend on lquota without USE_QUOTA check,
-    # will fix later. Disable check "$USE_QUOTA" = "yes" temporary.
-    #[ "$USE_QUOTA" = "yes" ] && load_module quota/lquota
-    load_module quota/lquota
+    [ "$USE_QUOTA" = "yes" -a "$LQUOTA" != "no" ] && load_module quota/lquota
     load_module fid/fid
     load_module fld/fld
     load_module lmv/lmv
@@ -747,8 +749,10 @@ client_reconnect() {
 
 facet_failover() {
     facet=$1
+    sleep_time=$2
     echo "Failing $facet on node `facet_active_host $facet`"
     shutdown_facet $facet
+    [ -n "$sleep_time" ] && sleep $sleep_time
     reboot_facet $facet
     client_df &
     DFPID=$!
@@ -1124,7 +1128,7 @@ switch_identity() {
     local num=$1
     local switch=$2
     local j=`expr $num - 1`
-    local MDT="`do_facet mds$num lctl get_param -N mdt.*MDT*$j | cut -d"." -f2 2>/dev/null || true`"
+    local MDT="`(do_facet mds$num lctl get_param -N mdt.*MDT*$j 2>/dev/null | cut -d"." -f2 2>/dev/null) || true`"
 
     if [ -z "$MDT" ]; then
         return 2
@@ -1635,6 +1639,8 @@ basetest() {
     IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
 }
 
+# print a newline if the last test was skipped
+export LAST_SKIPPED=
 run_test() {
     assert_DIR
 
@@ -1642,38 +1648,46 @@ run_test() {
     if [ ! -z "$ONLY" ]; then
         testname=ONLY_$1
         if [ ${!testname}x != x ]; then
+            [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED=
             run_one $1 "$2"
             return $?
         fi
         testname=ONLY_$base
         if [ ${!testname}x != x ]; then
+            [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED=
             run_one $1 "$2"
             return $?
         fi
+        LAST_SKIPPED="y"
         echo -n "."
         return 0
     fi
     testname=EXCEPT_$1
     if [ ${!testname}x != x ]; then
+        LAST_SKIPPED="y"
         TESTNAME=test_$1 skip "skipping excluded test $1"
         return 0
     fi
     testname=EXCEPT_$base
     if [ ${!testname}x != x ]; then
+        LAST_SKIPPED="y"
         TESTNAME=test_$1 skip "skipping excluded test $1 (base $base)"
         return 0
     fi
     testname=EXCEPT_SLOW_$1
     if [ ${!testname}x != x ]; then
+        LAST_SKIPPED="y"
         TESTNAME=test_$1 skip "skipping SLOW test $1"
         return 0
     fi
     testname=EXCEPT_SLOW_$base
     if [ ${!testname}x != x ]; then
+        LAST_SKIPPED="y"
         TESTNAME=test_$1 skip "skipping SLOW test $1 (base $base)"
         return 0
     fi
 
+    LAST_SKIPPED=
     run_one $1 "$2"
 
     return $?
index f45a8ae..ae4c437 100644 (file)
@@ -194,6 +194,7 @@ static perm_type_t perm_types[] = {
         { "setgid", CFS_SETGID_PERM },
         { "setgrp", CFS_SETGRP_PERM },
         { "rmtacl", CFS_RMTACL_PERM },
+        { "rmtown", CFS_RMTOWN_PERM },
         { 0 }
 };
 
@@ -202,6 +203,7 @@ static perm_type_t noperm_types[] = {
         { "nosetgid", CFS_SETGID_PERM },
         { "nosetgrp", CFS_SETGRP_PERM },
         { "normtacl", CFS_RMTACL_PERM },
+        { "normtown", CFS_RMTOWN_PERM },
         { 0 }
 };
 
index f0d07ff..130f41c 100644 (file)
 #include <dirent.h>
 #include <time.h>
 #include <ctype.h>
+#ifdef HAVE_SYS_QUOTA_H
+# include <sys/quota.h>
+#endif
+
 /* For dirname() */
 #include <libgen.h>
 
@@ -83,13 +87,14 @@ static int lfs_osts(int argc, char **argv);
 static int lfs_df(int argc, char **argv);
 static int lfs_check(int argc, char **argv);
 static int lfs_catinfo(int argc, char **argv);
-#ifdef HAVE_QUOTA_SUPPORT
+#ifdef HAVE_SYS_QUOTA_H
 static int lfs_quotachown(int argc, char **argv);
 static int lfs_quotacheck(int argc, char **argv);
 static int lfs_quotaon(int argc, char **argv);
 static int lfs_quotaoff(int argc, char **argv);
 static int lfs_setquota(int argc, char **argv);
 static int lfs_quota(int argc, char **argv);
+static int lfs_quotainv(int argc, char **argv);
 #endif
 static int lfs_flushctx(int argc, char **argv);
 static int lfs_join(int argc, char **argv);
@@ -156,7 +161,7 @@ command_t cmdlist[] = {
          "report filesystem disk space usage or inodes usage"
          "of each MDS/OSD.\n"
          "Usage: df [-i] [-h] [path]"},
-#ifdef HAVE_QUOTA_SUPPORT
+#ifdef HAVE_SYS_QUOTA_H
         {"quotachown",lfs_quotachown, 0,
          "Change files' owner or group on the specified filesystem.\n"
          "usage: quotachown [-i] <filesystem>\n"
@@ -170,10 +175,24 @@ command_t cmdlist[] = {
         {"quotaoff", lfs_quotaoff, 0, "Turn filesystem quotas off.\n"
          "usage: quotaoff [ -ug ] <filesystem>"},
         {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n"
-         "usage: setquota [ -u | -g ] <name> <block-softlimit> <block-hardlimit> <inode-softlimit> <inode-hardlimit> <filesystem>\n"
-         "       setquota -t [ -u | -g ] <block-grace> <inode-grace> <filesystem>"},
+         "usage: setquota [ -u | -g ] <name> -b <block-softlimit> -B <block-hardlimit> -i <inode-softlimit> -I <inode-hardlimit> <filesystem>\n"
+         "       setquota -t [ -u | -g ] <block-grace> <inode-grace> <filesystem>\n"
+         "       setquota [ -u | --user | -g | --group ] <name>\n"
+         "                [--block-softlimit <block-softlimit>]\n"
+         "                [--block-hardlimit <block-hardlimit>]\n"
+         "                [--inode-softlimit <inode-softlimit>]\n"
+         "                [--inode-hardlimit <inode-hardlimit>] <filesystem>\n"
+         "       setquota [-t] [ -u | --user | -g | --group ]\n"
+         "                [--block-grace <block-grace>]\n"
+         "                [--inode-grace <inode-grace>] <filesystem>\n"
+         "       -b can be used instead of --block-softlimit/--block-grace\n"
+         "       -B can be used instead of --block-hardlimit\n"
+         "       -i can be used instead of --inode-softlimit/--inode-grace\n"
+         "       -I can be used instead of --inode-hardlimit"},
         {"quota", lfs_quota, 0, "Display disk usage and limits.\n"
-         "usage: quota [ -o obd_uuid ] [{-u|-g  <name>}|-t] <filesystem>"},
+         "usage: quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [{-u|-g <name>}|-t] <filesystem>"},
+        {"quotainv", lfs_quotainv, 0, "Invalidate quota data.\n"
+         "usage: quotainv [-u|-g] <filesystem>"},
 #endif
         {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n"
          "usage: flushctx [-k] [mountpoint...]"},
@@ -267,7 +286,7 @@ static int lfs_setstripe(int argc, char **argv)
         {
                 optind = 0;
                 while ((c = getopt_long(argc, argv, "c:di:o:s:p:",
-                                                long_opts, NULL)) >= 0) {
+                                        long_opts, NULL)) >= 0) {
                         switch (c) {
                         case 0:
                                 /* Long options. */
@@ -313,13 +332,13 @@ static int lfs_setstripe(int argc, char **argv)
 
         if (optind == argc) {
                 fprintf(stderr, "error: %s: missing filename|dirname\n",
-                                argv[0]);
+                        argv[0]);
                 return CMD_HELP;
         }
 
         /* get the stripe size */
         if (stripe_size_arg != NULL) {
-                result = parse_size(stripe_size_arg, &st_size, &size_units);
+                result = parse_size(stripe_size_arg, &st_size, &size_units, 0);
                 if (result) {
                         fprintf(stderr, "error: %s: bad size '%s'\n",
                                 argv[0], stripe_size_arg);
@@ -392,9 +411,12 @@ static int set_time(time_t *time, time_t *set, char *str)
         return res;
 }
 
+#define USER 0
+#define GROUP 1
+
 static int name2id(unsigned int *id, char *name, int type)
 {
-        if (type == USRQUOTA) {
+        if (type == USER) {
                 struct passwd *entry;
 
                 if (!(entry = getpwnam(name))) {
@@ -421,7 +443,7 @@ static int name2id(unsigned int *id, char *name, int type)
 
 static int id2name(char **name, unsigned int id, int type)
 {
-        if (type == USRQUOTA) {
+        if (type == USER) {
                 struct passwd *entry;
 
                 if (!(entry = getpwuid(id))) {
@@ -582,7 +604,7 @@ static int lfs_find(int argc, char **argv)
                         new_fashion = 1;
                         param.gid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
-                                ret = name2id(&param.gid, optarg, GRPQUOTA);
+                                ret = name2id(&param.gid, optarg, GROUP);
                                 if (ret != 0) {
                                         fprintf(stderr, "Group/GID: %s cannot "
                                                 "be found.\n", optarg);
@@ -606,7 +628,7 @@ static int lfs_find(int argc, char **argv)
                         new_fashion = 1;
                         param.uid = strtol(optarg, &endptr, 10);
                         if (optarg == endptr) {
-                                ret = name2id(&param.uid, optarg, USRQUOTA);
+                                ret = name2id(&param.uid, optarg, USER);
                                 if (ret != 0) {
                                         fprintf(stderr, "User/UID: %s cannot "
                                                 "be found.\n", optarg);
@@ -723,7 +745,8 @@ static int lfs_find(int argc, char **argv)
 
                         if (param.size_sign)
                                 optarg++;
-                        ret = parse_size(optarg, &param.size,&param.size_units);
+                        ret = parse_size(optarg, &param.size,
+                                         &param.size_units, 0);
                         if (ret) {
                                 fprintf(stderr,"error: bad size '%s'\n",
                                         optarg);
@@ -1066,12 +1089,12 @@ static int mntdf(char *mntdir, int ishow, int cooked)
 
                 if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
                     rc == -ENODATA || rc == 0) {
-                        showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked,
-                               "MDT", index, rc);
+                        showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf),
+                               ishow, cooked, "MDT", index, rc);
                 } else {
                         fprintf(stderr,
                                 "error: llapi_obd_statfs(%s): %s (%d)\n",
-                                uuid_buf.uuid, strerror(-rc), rc);
+                                obd_uuid2str(&uuid_buf), strerror(-rc), rc);
                         return rc;
                 }
                 if (rc == 0) {
@@ -1093,8 +1116,8 @@ static int mntdf(char *mntdir, int ishow, int cooked)
 
                 if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
                     rc == -ENODATA || rc == 0) {
-                        showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked,
-                               "OST", index, rc);
+                        showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf),
+                               ishow, cooked, "OST", index, rc);
                 } else {
                         fprintf(stderr,
                                 "error: llapi_obd_statfs failed: %s (%d)\n",
@@ -1314,7 +1337,7 @@ out:
         return rc;
 }
 
-#ifdef HAVE_QUOTA_SUPPORT
+#ifdef HAVE_SYS_QUOTA_H
 static int lfs_quotachown(int argc, char **argv)
 {
 
@@ -1341,15 +1364,13 @@ static int lfs_quotachown(int argc, char **argv)
         return rc;
 }
 
-
 static int lfs_quotacheck(int argc, char **argv)
 {
         int c, check_type = 0;
         char *mnt;
         struct if_quotacheck qchk;
         struct if_quotactl qctl;
-        char *obd_type = qchk.obd_type;
-        char *obd_uuid = qchk.obd_uuid.uuid;
+        char *obd_type = (char *)qchk.obd_type;
         int rc;
 
         memset(&qchk, 0, sizeof(qchk));
@@ -1382,7 +1403,6 @@ static int lfs_quotacheck(int argc, char **argv)
 
         memset(&qctl, 0, sizeof(qctl));
         qctl.qc_cmd = LUSTRE_Q_QUOTAOFF;
-        qctl.qc_id = QFMT_LDISKFS;
         qctl.qc_type = check_type;
         rc = llapi_quotactl(mnt, &qctl);
         if (rc) {
@@ -1399,20 +1419,20 @@ static int lfs_quotacheck(int argc, char **argv)
         rc = llapi_poll_quotacheck(mnt, &qchk);
         if (rc) {
                 if (*obd_type)
-                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                        fprintf(stderr, "%s %s ", obd_type,
+                                obd_uuid2str(&qchk.obd_uuid));
                 fprintf(stderr, "quota check failed: %s\n", strerror(errno));
                 return rc;
         }
 
         memset(&qctl, 0, sizeof(qctl));
         qctl.qc_cmd = LUSTRE_Q_QUOTAON;
-        qctl.qc_id = QFMT_LDISKFS;
         qctl.qc_type = check_type;
         rc = llapi_quotactl(mnt, &qctl);
         if (rc) {
                 if (*obd_type)
-                        fprintf(stderr, "%s %s ",
-                                qctl.obd_type, qctl.obd_uuid.uuid);
+                        fprintf(stderr, "%s %s ", (char *)qctl.obd_type,
+                                obd_uuid2str(&qctl.obd_uuid));
                 fprintf(stderr, "%s turn on quota failed: %s\n",
                         argv[0], strerror(errno));
                 return rc;
@@ -1426,13 +1446,11 @@ static int lfs_quotaon(int argc, char **argv)
         int c;
         char *mnt;
         struct if_quotactl qctl;
-        char *obd_type = qctl.obd_type;
-        char *obd_uuid = qctl.obd_uuid.uuid;
+        char *obd_type = (char *)qctl.obd_type;
         int rc;
 
         memset(&qctl, 0, sizeof(qctl));
         qctl.qc_cmd = LUSTRE_Q_QUOTAON;
-        qctl.qc_id = QFMT_LDISKFS;
 
         optind = 0;
         while ((c = getopt(argc, argv, "ugf")) != -1) {
@@ -1466,7 +1484,8 @@ static int lfs_quotaon(int argc, char **argv)
         rc = llapi_quotactl(mnt, &qctl);
         if (rc) {
                 if (*obd_type)
-                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                        fprintf(stderr, "%s %s ", obd_type,
+                                obd_uuid2str(&qctl.obd_uuid));
                 fprintf(stderr, "%s failed: %s\n", argv[0], strerror(errno));
                 return rc;
         }
@@ -1479,8 +1498,7 @@ static int lfs_quotaoff(int argc, char **argv)
         int c;
         char *mnt;
         struct if_quotactl qctl;
-        char *obd_type = qctl.obd_type;
-        char *obd_uuid = qctl.obd_uuid.uuid;
+        char *obd_type = (char *)qctl.obd_type;
         int rc;
 
         memset(&qctl, 0, sizeof(qctl));
@@ -1513,9 +1531,15 @@ static int lfs_quotaoff(int argc, char **argv)
         mnt = argv[optind];
 
         rc = llapi_quotactl(mnt, &qctl);
+        if (rc == -1 && errno == ESRCH) {
+                fprintf(stderr, "\n%s quotas are not enabled.\n",
+                        qctl.qc_type == 0x00 ? "user" : "group");
+                return 0;
+        }
         if (rc) {
                 if (*obd_type)
-                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                        fprintf(stderr, "%s %s ", obd_type,
+                                obd_uuid2str(&qctl.obd_uuid));
                 fprintf(stderr, "quotaoff failed: %s\n", strerror(errno));
                 return rc;
         }
@@ -1523,6 +1547,54 @@ static int lfs_quotaoff(int argc, char **argv)
         return 0;
 }
 
+static int lfs_quotainv(int argc, char **argv)
+{
+        int c;
+        char *mnt;
+        struct if_quotactl qctl;
+        int rc;
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd = LUSTRE_Q_INVALIDATE;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ugf")) != -1) {
+                switch (c) {
+                case 'u':
+                        qctl.qc_type |= 0x01;
+                        break;
+                case 'g':
+                        qctl.qc_type |= 0x02;
+                        break;
+                case 'f':
+                        qctl.qc_cmd = LUSTRE_Q_FINVALIDATE;
+                        break;
+                default:
+                        fprintf(stderr, "error: %s: option '-%c' "
+                                        "unrecognized\n", argv[0], c);
+                        return CMD_HELP;
+                }
+        }
+
+        if (qctl.qc_type)
+                qctl.qc_type--;
+        else /* by default, invalidate quota for both user & group */
+                qctl.qc_type = 0x02;
+
+        if (argc == optind)
+                return CMD_HELP;
+
+        mnt = argv[optind];
+
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                fprintf(stderr, "quotainv failed: %s\n", strerror(errno));
+                return rc;
+        }
+
+        return 0;
+}
+
 #define ARG2INT(nr, str, msg)                                           \
 do {                                                                    \
         char *endp;                                                     \
@@ -1592,87 +1664,224 @@ error:
         return ULONG_MAX;
 }
 
-int lfs_setquota(int argc, char **argv)
+#define ARG2ULL(nr, str, defscale)                                      \
+do {                                                                    \
+        unsigned long long limit, units = 0;                            \
+        int rc;                                                         \
+                                                                        \
+        rc = parse_size(str, &limit, &units, 1);                        \
+        if (rc < 0) {                                                   \
+                fprintf(stderr, "error: bad limit value %s\n", str);    \
+                return CMD_HELP;                                        \
+        }                                                               \
+        nr = ((units == 0) ? (defscale) : 1) * limit;                   \
+} while (0)
+
+static inline int has_times_option(int argc, char **argv)
 {
-        int c;
-        char *mnt;
+        int i;
+
+        for (i = 1; i < argc; i++)
+                if (!strcmp(argv[i], "-t"))
+                        return 1;
+
+        return 0;
+}
+
+int lfs_setquota_times(int argc, char **argv)
+{
+        int c, rc;
         struct if_quotactl qctl;
-        char *obd_type = qctl.obd_type;
-        char *obd_uuid = qctl.obd_uuid.uuid;
-        int rc;
+        char *mnt, *obd_type = (char *)qctl.obd_type;
+        struct obd_dqblk *dqb = &qctl.qc_dqblk;
+        struct obd_dqinfo *dqi = &qctl.qc_dqinfo;
+        struct option long_opts[] = {
+                {"user",            no_argument,       0, 'u'},
+                {"group",           no_argument,       0, 'g'},
+                {"block-grace",     required_argument, 0, 'b'},
+                {"inode-grace",     required_argument, 0, 'i'},
+                {"times",           no_argument,       0, 't'},
+                {0, 0, 0, 0}
+        };
 
         memset(&qctl, 0, sizeof(qctl));
-        qctl.qc_cmd = LUSTRE_Q_SETQUOTA;
+        qctl.qc_cmd  = LUSTRE_Q_SETINFO;
+        qctl.qc_type = UGQUOTA;
 
         optind = 0;
-        while ((c = getopt(argc, argv, "ugt")) != -1) {
+        while ((c = getopt_long(argc, argv, "ugb:i:t", long_opts, NULL)) != -1) {
                 switch (c) {
                 case 'u':
-                        qctl.qc_type |= 0x01;
-                        break;
                 case 'g':
-                        qctl.qc_type |= 0x02;
+                        if (qctl.qc_type != UGQUOTA) {
+                                fprintf(stderr, "error: -u and -g can't be used "
+                                                "more than once\n");
+                                return CMD_HELP;
+                        }
+                        qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA;
                         break;
-                case 't':
-                        qctl.qc_cmd = LUSTRE_Q_SETINFO;
+                case 'b':
+                        if ((dqi->dqi_bgrace = str2sec(optarg)) == ULONG_MAX) {
+                                fprintf(stderr, "error: bad block-grace: %s\n",
+                                        optarg);
+                                return CMD_HELP;
+                        }
+                        dqb->dqb_valid |= QIF_BTIME;
                         break;
-                default:
-                        fprintf(stderr, "error: %s: option '-%c' "
-                                        "unrecognized\n", argv[0], c);
+                case 'i':
+                        if ((dqi->dqi_igrace = str2sec(optarg)) == ULONG_MAX) {
+                                fprintf(stderr, "error: bad inode-grace: %s\n",
+                                        optarg);
+                                return CMD_HELP;
+                        }
+                        dqb->dqb_valid |= QIF_ITIME;
+                        break;
+                case 't': /* Yes, of course! */
+                        break;
+                default: /* getopt prints error message for us when opterr != 0 */
                         return CMD_HELP;
                 }
         }
 
-        if (qctl.qc_type)
-                qctl.qc_type--;
-
         if (qctl.qc_type == UGQUOTA) {
-                fprintf(stderr, "error: user and group quotas can't be set "
-                                "both\n");
+                fprintf(stderr, "error: neither -u nor -g specified\n");
                 return CMD_HELP;
         }
 
-        if (qctl.qc_cmd == LUSTRE_Q_SETQUOTA) {
-                struct obd_dqblk *dqb = &qctl.qc_dqblk;
+        if (optind != argc - 1) {
+                fprintf(stderr, "error: unexpected parameters encountered\n");
+                return CMD_HELP;
+        }
 
-                if (optind + 6 != argc)
-                        return CMD_HELP;
+        mnt = argv[optind];
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                if (*obd_type)
+                        fprintf(stderr, "%s %s ", obd_type,
+                                obd_uuid2str(&qctl.obd_uuid));
+                fprintf(stderr, "setquota failed: %s\n", strerror(errno));
+                return rc;
+        }
 
-                rc = name2id(&qctl.qc_id, argv[optind++], qctl.qc_type);
-                if (rc) {
-                        fprintf(stderr, "error: find id for name %s failed: %s\n",
-                                argv[optind - 1], strerror(errno));
-                        return CMD_HELP;
-                }
+        return 0;
+}
 
-                ARG2INT(dqb->dqb_bsoftlimit, argv[optind++], "block-softlimit");
-                ARG2INT(dqb->dqb_bhardlimit, argv[optind++], "block-hardlimit");
-                ARG2INT(dqb->dqb_isoftlimit, argv[optind++], "inode-softlimit");
-                ARG2INT(dqb->dqb_ihardlimit, argv[optind++], "inode-hardlimit");
+#define BSLIMIT (1 << 0)
+#define BHLIMIT (1 << 1)
+#define ISLIMIT (1 << 2)
+#define IHLIMIT (1 << 3)
 
-                dqb->dqb_valid = QIF_LIMITS;
-        } else {
-                struct obd_dqinfo *dqi = &qctl.qc_dqinfo;
+int lfs_setquota(int argc, char **argv)
+{
+        int c, rc;
+        struct if_quotactl qctl;
+        char *mnt, *obd_type = (char *)qctl.obd_type;
+        struct obd_dqblk *dqb = &qctl.qc_dqblk;
+        struct option long_opts[] = {
+                {"user",            required_argument, 0, 'u'},
+                {"group",           required_argument, 0, 'g'},
+                {"block-softlimit", required_argument, 0, 'b'},
+                {"block-hardlimit", required_argument, 0, 'B'},
+                {"inode-softlimit", required_argument, 0, 'i'},
+                {"inode-hardlimit", required_argument, 0, 'I'},
+                {0, 0, 0, 0}
+        };
+        unsigned limit_mask = 0;
 
-                if (optind + 3 != argc)
-                        return CMD_HELP;
+        if (has_times_option(argc, argv))
+                return lfs_setquota_times(argc, argv);
+
+        memset(&qctl, 0, sizeof(qctl));
+        qctl.qc_cmd  = LUSTRE_Q_SETQUOTA;
+        qctl.qc_type = UGQUOTA; /* UGQUOTA makes no sense for setquota,
+                                 * so it can be used as a marker that qc_type
+                                 * isn't reinitialized from command line */
 
-                if ((dqi->dqi_bgrace = str2sec(argv[optind++])) == ULONG_MAX) {
-                        fprintf(stderr, "error: bad %s: %s\n", "block-grace", argv[optind - 1]);
+        optind = 0;
+        while ((c = getopt_long(argc, argv, "u:g:b:B:i:I:", long_opts, NULL)) != -1) {
+                switch (c) {
+                case 'u':
+                case 'g':
+                        if (qctl.qc_type != UGQUOTA) {
+                                fprintf(stderr, "error: -u and -g can't be used"
+                                                " more than once\n");
+                                return CMD_HELP;
+                        }
+                        qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA;
+                        rc = name2id(&qctl.qc_id, optarg,
+                                     (qctl.qc_type == USRQUOTA) ? USER : GROUP);
+                        if (rc) {
+                                fprintf(stderr, "error: unknown id %s\n",
+                                        optarg);
+                                return CMD_HELP;
+                        }
+                        break;
+                case 'b':
+                        ARG2ULL(dqb->dqb_bsoftlimit, optarg, 1024);
+                        dqb->dqb_bsoftlimit >>= 10;
+                        limit_mask |= BSLIMIT;
+                        break;
+                case 'B':
+                        ARG2ULL(dqb->dqb_bhardlimit, optarg, 1024);
+                        dqb->dqb_bhardlimit >>= 10;
+                        limit_mask |= BHLIMIT;
+                        break;
+                case 'i':
+                        ARG2ULL(dqb->dqb_isoftlimit, optarg, 1);
+                        limit_mask |= ISLIMIT;
+                        break;
+                case 'I':
+                        ARG2ULL(dqb->dqb_ihardlimit, optarg, 1);
+                        limit_mask |= IHLIMIT;
+                        break;
+                default: /* getopt prints error message for us when opterr != 0 */
                         return CMD_HELP;
                 }
-                if ((dqi->dqi_igrace = str2sec(argv[optind++])) == ULONG_MAX) {
-                        fprintf(stderr, "error: bad %s: %s\n", "inode-grace", argv[optind - 1]);
+        }
+
+        if (qctl.qc_type == UGQUOTA) {
+                fprintf(stderr, "error: neither -u nor -g are specified\n");
+                return CMD_HELP;
+        }
+
+        if (optind != argc - 1) {
+                fprintf(stderr, "error: unexpected parameters encountered\n");
+                return CMD_HELP;
+        }
+
+        mnt = argv[optind];
+
+        if ((!(limit_mask & BHLIMIT) ^ !(limit_mask & BSLIMIT)) ||
+            (!(limit_mask & IHLIMIT) ^ !(limit_mask & ISLIMIT))) {
+                /* sigh, we can't just set blimits/ilimits */
+                struct if_quotactl tmp_qctl = {.qc_cmd  = LUSTRE_Q_GETQUOTA,
+                                               .qc_type = qctl.qc_type,
+                                               .qc_id   = qctl.qc_id};
+
+                rc = llapi_quotactl(mnt, &tmp_qctl);
+                if (rc < 0) {
+                        fprintf(stderr, "error: getquota failed\n");
                         return CMD_HELP;
                 }
+
+                if (!(limit_mask & BHLIMIT))
+                        dqb->dqb_bhardlimit = tmp_qctl.qc_dqblk.dqb_bhardlimit;
+                if (!(limit_mask & BSLIMIT))
+                        dqb->dqb_bsoftlimit = tmp_qctl.qc_dqblk.dqb_bsoftlimit;
+                if (!(limit_mask & IHLIMIT))
+                        dqb->dqb_ihardlimit = tmp_qctl.qc_dqblk.dqb_ihardlimit;
+                if (!(limit_mask & ISLIMIT))
+                        dqb->dqb_isoftlimit = tmp_qctl.qc_dqblk.dqb_isoftlimit;
         }
 
-        mnt = argv[optind];
+        dqb->dqb_valid |= (limit_mask & (BHLIMIT | BSLIMIT)) ? QIF_BLIMITS : 0;
+        dqb->dqb_valid |= (limit_mask & (IHLIMIT | ISLIMIT)) ? QIF_ILIMITS : 0;
 
         rc = llapi_quotactl(mnt, &qctl);
         if (rc) {
                 if (*obd_type)
-                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
+                        fprintf(stderr, "%s %s ", obd_type,
+                                obd_uuid2str(&qctl.obd_uuid));
                 fprintf(stderr, "setquota failed: %s\n", strerror(errno));
                 return rc;
         }
@@ -1741,7 +1950,7 @@ static void print_quota_title(char *name, struct if_quotactl *qctl)
                "files", "quota", "limit", "grace");
 }
 
-static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
+static void print_quota(char *mnt, struct if_quotactl *qctl)
 {
         time_t now;
 
@@ -1752,10 +1961,10 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
                 struct obd_dqblk *dqb = &qctl->qc_dqblk;
 
                 if (dqb->dqb_bhardlimit &&
-                    toqb(dqb->dqb_curspace) > dqb->dqb_bhardlimit) {
+                    toqb(dqb->dqb_curspace) >= dqb->dqb_bhardlimit) {
                         bover = 1;
                 } else if (dqb->dqb_bsoftlimit &&
-                           toqb(dqb->dqb_curspace) > dqb->dqb_bsoftlimit) {
+                           toqb(dqb->dqb_curspace) >= dqb->dqb_bsoftlimit) {
                         if (dqb->dqb_btime > now) {
                                 bover = 2;
                         } else {
@@ -1764,10 +1973,10 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
                 }
 
                 if (dqb->dqb_ihardlimit &&
-                    dqb->dqb_curinodes > dqb->dqb_ihardlimit) {
+                    dqb->dqb_curinodes >= dqb->dqb_ihardlimit) {
                         iover = 1;
                 } else if (dqb->dqb_isoftlimit &&
-                           dqb->dqb_curinodes > dqb->dqb_isoftlimit) {
+                           dqb->dqb_curinodes >= dqb->dqb_isoftlimit) {
                         if (dqb->dqb_btime > now) {
                                 iover = 2;
                         } else {
@@ -1789,10 +1998,16 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
 
                         if (bover)
                                 diff2str(dqb->dqb_btime, timebuf, now);
-
-                        sprintf(numbuf[0], LPU64, toqb(dqb->dqb_curspace));
-                        sprintf(numbuf[1], LPU64, dqb->dqb_bsoftlimit);
-                        sprintf(numbuf[2], LPU64, dqb->dqb_bhardlimit);
+                        sprintf(numbuf[0], (dqb->dqb_valid & QIF_SPACE) ?
+                                LPU64 : "["LPU64"]", toqb(dqb->dqb_curspace));
+                        if (qctl->qc_valid == QC_GENERAL)
+                                sprintf(numbuf[1], (dqb->dqb_valid & QIF_BLIMITS)
+                                        ? LPU64 : "["LPU64"]",
+                                        dqb->dqb_bsoftlimit);
+                        else
+                                sprintf(numbuf[1], "%s", "");
+                        sprintf(numbuf[2], (dqb->dqb_valid & QIF_BLIMITS)
+                                ? LPU64 : "["LPU64"]", dqb->dqb_bhardlimit);
                         printf(" %7s%c %6s %7s %7s",
                                numbuf[0], bover ? '*' : ' ', numbuf[1],
                                numbuf[2], bover > 1 ? timebuf : "");
@@ -1800,10 +2015,17 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
                         if (iover)
                                 diff2str(dqb->dqb_itime, timebuf, now);
 
-                        sprintf(numbuf[0], LPU64, dqb->dqb_curinodes);
-                        sprintf(numbuf[1], LPU64, dqb->dqb_isoftlimit);
-                        sprintf(numbuf[2], LPU64, dqb->dqb_ihardlimit);
-                        if (!ost_only)
+                        sprintf(numbuf[0], (dqb->dqb_valid & QIF_INODES) ?
+                                LPU64 : "["LPU64"]", dqb->dqb_curinodes);
+                       if (qctl->qc_valid == QC_GENERAL)
+                                sprintf(numbuf[1], (dqb->dqb_valid & QIF_ILIMITS)
+                                        ? LPU64 : "["LPU64"]",
+                                        dqb->dqb_isoftlimit);
+                        else
+                                sprintf(numbuf[1], "%s", "");
+                        sprintf(numbuf[2], (dqb->dqb_valid & QIF_ILIMITS) ?
+                                LPU64 : "["LPU64"]", dqb->dqb_ihardlimit);
+                        if (qctl->qc_valid != QC_OSTIDX)
                                 printf(" %7s%c %6s %7s %7s",
                                        numbuf[0], iover ? '*' : ' ', numbuf[1],
                                        numbuf[2], iover > 1 ? timebuf : "");
@@ -1821,103 +2043,89 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
         }
 }
 
-static void print_mds_quota(char *mnt, struct if_quotactl *qctl)
+static int print_obd_quota(char *mnt, struct if_quotactl *qctl, int is_mdt)
 {
-        int rc;
+        int rc = 0, rc1 = 0, count = 0;
+        __u32 valid = qctl->qc_valid;
 
-        /* XXX: this is a flag to mark that only mds quota is wanted */
-        qctl->qc_dqblk.dqb_valid = 1;
-        rc = llapi_quotactl(mnt, qctl);
+        rc = llapi_get_obd_count(mnt, &count, is_mdt);
         if (rc) {
-                fprintf(stderr, "quotactl failed: %s\n", strerror(errno));
-                return;
-        }
-        qctl->qc_dqblk.dqb_valid = 0;
-
-        print_quota(qctl->obd_uuid.uuid, qctl, 0);
-}
-
-static void print_lov_quota(char *mnt, struct if_quotactl *qctl)
-{
-        DIR *dir;
-        struct obd_uuid *uuids = NULL, *uuidp;
-        int obdcount = 1024;
-        int i, rc;
-
-        dir = opendir(mnt);
-        if (!dir) {
-                fprintf(stderr, "open %s failed: %s\n", mnt, strerror(errno));
-                return;
-        }
-
-        uuids = (struct obd_uuid *)malloc(INIT_ALLOC_NUM_OSTS *
-                                          sizeof(struct obd_uuid));
-        if (uuids == NULL)
-                goto out;
-
-retry_get_uuids:
-        rc = llapi_lov_get_uuids(dirfd(dir), uuids, &obdcount);
-        if (rc != 0) {
-                struct obd_uuid *uuids_temp;
-
-                if (rc == -EOVERFLOW) {
-                        uuids_temp = realloc(uuids, obdcount *
-                                             sizeof(struct obd_uuid));
-                        if (uuids_temp != NULL)
-                                goto retry_get_uuids;
-                        else
-                                rc = -ENOMEM;
-                }
-
-                fprintf(stderr, "get ost uuid failed: %s\n", strerror(rc));
-                goto out;
+                fprintf(stderr, "can not get %s count: %s\n",
+                        is_mdt ? "mdt": "ost", strerror(errno));
+                return rc;
         }
 
-        for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
-                memcpy(&qctl->obd_uuid, uuidp, sizeof(*uuidp));
-
-                /* XXX clear this flag to get quota from osts */
-                qctl->qc_dqblk.dqb_valid = 0;
+        for (qctl->qc_idx = 0; qctl->qc_idx < count; qctl->qc_idx++) {
+                qctl->qc_valid = is_mdt ? QC_MDTIDX : QC_OSTIDX;
                 rc = llapi_quotactl(mnt, qctl);
                 if (rc) {
-                        fprintf(stderr, "%s quotactl failed: %s\n",
-                                uuidp->uuid, strerror(errno));
+                        /* It is remote client case. */
+                        if (errno == EOPNOTSUPP) {
+                                rc = 0;
+                                goto out;
+                        }
+
+                        if (!rc1)
+                                rc1 = rc;
+                        fprintf(stderr, "quotactl %s%d failed.\n",
+                                is_mdt ? "mdt": "ost", qctl->qc_idx);
                         continue;
                 }
 
-                print_quota(uuidp->uuid, qctl, 1);
+                print_quota(obd_uuid2str(&qctl->obd_uuid), qctl);
         }
 
 out:
-        closedir(dir);
-        return;
+        qctl->qc_valid = valid;
+        return rc ? : rc1;
 }
 
 static int lfs_quota(int argc, char **argv)
 {
         int c;
-        char *name = NULL, *mnt;
+        char *mnt, *name = NULL;
         struct if_quotactl qctl = { .qc_cmd = LUSTRE_Q_GETQUOTA,
-                                    .qc_type = 0x01 };
-        char *obd_type = qctl.obd_type;
-        char *obd_uuid = qctl.obd_uuid.uuid;
-        int rc;
+                                    .qc_type = UGQUOTA };
+        char *obd_type = (char *)qctl.obd_type;
+        char *obd_uuid = (char *)qctl.obd_uuid.uuid;
+        int rc, rc1 = 0, rc2 = 0, rc3 = 0, verbose = 0, pass = 0;
+        __u32 valid = QC_GENERAL, idx = 0;
 
         optind = 0;
-        while ((c = getopt(argc, argv, "ugto:")) != -1) {
+        while ((c = getopt(argc, argv, "ugto:i:I:v")) != -1) {
                 switch (c) {
                 case 'u':
-                        qctl.qc_type = 0x01;
+                        if (qctl.qc_type != UGQUOTA) {
+                                fprintf(stderr, "error: use either -u or -g\n");
+                                return CMD_HELP;
+                        }
+                        qctl.qc_type = USRQUOTA;
                         break;
                 case 'g':
-                        qctl.qc_type = 0x02;
+                        if (qctl.qc_type != UGQUOTA) {
+                                fprintf(stderr, "error: use either -u or -g\n");
+                                return CMD_HELP;
+                        }
+                        qctl.qc_type = GRPQUOTA;
                         break;
                 case 't':
                         qctl.qc_cmd = LUSTRE_Q_GETINFO;
                         break;
                 case 'o':
+                        valid = qctl.qc_valid = QC_UUID;
                         strncpy(obd_uuid, optarg, sizeof(qctl.obd_uuid));
                         break;
+                case 'i':
+                        valid = qctl.qc_valid = QC_MDTIDX;
+                        idx = qctl.qc_idx = atoi(optarg);
+                        break;
+                case 'I':
+                        valid = qctl.qc_valid = QC_OSTIDX;
+                        idx = qctl.qc_idx = atoi(optarg);
+                        break;
+                case 'v':
+                        verbose = 1;
+                        break;
                 default:
                         fprintf(stderr, "error: %s: option '-%c' "
                                         "unrecognized\n", argv[0], c);
@@ -1925,57 +2133,80 @@ static int lfs_quota(int argc, char **argv)
                 }
         }
 
-        if (qctl.qc_type)
-                qctl.qc_type--;
-
-
-        if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) {
+        /* current uid/gid info for "lfs quota /path/to/lustre/mount" */
+        if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && qctl.qc_type == UGQUOTA &&
+            optind == argc - 1) {
+ug_output:
+                memset(&qctl, 0, sizeof(qctl)); /* spoiled by print_*_quota */
+                qctl.qc_cmd = LUSTRE_Q_GETQUOTA;
+                qctl.qc_valid = valid;
+                qctl.qc_idx = idx;
+                if (pass++ == 0) {
+                        qctl.qc_type = USRQUOTA;
+                        qctl.qc_id = geteuid();
+                } else {
+                        qctl.qc_type = GRPQUOTA;
+                        qctl.qc_id = getegid();
+                }
+                rc = id2name(&name, qctl.qc_id,
+                             (qctl.qc_type == USRQUOTA) ? USER : GROUP);
+                if (rc)
+                        name = "<unknown>";
+        } else if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) {
                 if (optind + 2 != argc) {
                         fprintf(stderr, "error: missing quota argument(s)\n");
                         return CMD_HELP;
                 }
 
                 name = argv[optind++];
-                rc = name2id(&qctl.qc_id, name, qctl.qc_type);
+                rc = name2id(&qctl.qc_id, name,
+                             (qctl.qc_type == USRQUOTA) ? USER : GROUP);
                 if (rc) {
                         fprintf(stderr,"error: can't find id for name %s: %s\n",
                                 name, strerror(errno));
                         return CMD_HELP;
                 }
-                print_quota_title(name, &qctl);
         } else if (optind + 1 != argc) {
                 fprintf(stderr, "error: missing quota info argument(s)\n");
                 return CMD_HELP;
         }
 
+        if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA)
+                print_quota_title(name, &qctl);
+
         mnt = argv[optind];
 
-        rc = llapi_quotactl(mnt, &qctl);
-        if (rc) {
-                if (*obd_type)
-                        fprintf(stderr, "%s %s ", obd_type, obd_uuid);
-                fprintf(stderr, "quota failed: %s\n", strerror(errno));
-                return rc;
+        rc1 = llapi_quotactl(mnt, &qctl);
+        if (rc1 == -1 && errno == ESRCH) {
+                fprintf(stderr, "\n%s quotas are not enabled.\n", 
+                        qctl.qc_type == USRQUOTA ? "user" : "group");
+                goto out;
         }
+        if (rc1 && *obd_type)
+                fprintf(stderr, "%s %s ", obd_type, obd_uuid);
 
-        if (!name)
-                rc = id2name(&name, getuid(), qctl.qc_type);
+        if (qctl.qc_valid != QC_GENERAL)
+                mnt = obd_uuid2str(&qctl.obd_uuid);
 
-        if (*obd_uuid) {
-                mnt = "";
-                name = obd_uuid;
+        print_quota(mnt, &qctl);
+
+        if (qctl.qc_valid == QC_GENERAL && qctl.qc_cmd != LUSTRE_Q_GETINFO && verbose) {
+                rc2 = print_obd_quota(mnt, &qctl, 1);
+                rc3 = print_obd_quota(mnt, &qctl, 0);
         }
 
-        print_quota(mnt, &qctl, 0);
+        if (rc1 || rc2 || rc3)
+                printf("Some errors happened when getting quota info. "
+                       "Some devices may be not working or deactivated. "
+                       "The data in \"[]\" is inaccurate.\n");
 
-        if (!*obd_uuid && qctl.qc_cmd != LUSTRE_Q_GETINFO) {
-                print_mds_quota(mnt, &qctl);
-                print_lov_quota(mnt, &qctl);
-        }
+out:
+        if (pass == 1)
+                goto ug_output;
 
         return 0;
 }
-#endif /* HAVE_QUOTA_SUPPORT */
+#endif /* HAVE_SYS_QUOTA_H! */
 
 static int flushctx_ioctl(char *mp)
 {
index 9aad868..3856947 100644 (file)
@@ -160,18 +160,25 @@ void llapi_printf(int level, char *fmt, ...)
         va_end(args);
 }
 
+/**
+ * size_units is unchanged if no specifier used
+ */
 int parse_size(char *optarg, unsigned long long *size,
-               unsigned long long *size_units)
+               unsigned long long *size_units, int bytes_spec)
 {
         char *end;
 
-        *size = strtoul(optarg, &end, 0);
+        *size = strtoull(optarg, &end, 0);
 
         if (*end != '\0') {
                 if ((*end == 'b') && *(end+1) == '\0' &&
-                    (*size & (~0ULL << (64 - 9))) == 0) {
+                    (*size & (~0ULL << (64 - 9))) == 0 &&
+                    !bytes_spec) {
                         *size <<= 9;
                         *size_units = 1 << 9;
+                } else if ((*end == 'b') && *(end+1) == '\0' &&
+                           bytes_spec) {
+                        *size_units = 1;
                 } else if ((*end == 'k' || *end == 'K') &&
                            *(end+1) == '\0' && (*size &
                            (~0ULL << (64 - 10))) == 0) {
@@ -651,6 +658,24 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count)
         return rc;
 }
 
+int llapi_get_obd_count(char *mnt, int *count, int is_mdt)
+{
+        DIR *root;
+        int rc; 
+
+        root = opendir(mnt);
+        if (!root) {
+                llapi_err(LLAPI_MSG_ERROR, "open %s failed", mnt);
+                return -1;
+        }
+
+        *count = is_mdt;
+        rc = ioctl(dirfd(root), LL_IOC_GETOBDCOUNT, count);
+
+        closedir(root);
+        return rc; 
+}
+
 /* Here, param->obduuid points to a single obduuid, the index of which is
  * returned in param->obdindex */
 static int setup_obd_uuid(DIR *dir, char *dname, struct find_param *param)
@@ -1201,7 +1226,7 @@ err:
  * @mds indicates if this is MDS timestamps and there are attributes on OSTs.
  *
  * The result is -1 if it does not match, 0 if not yet clear, 1 if matches.
- * The table bolow gives the answers for the specified parameters (value and
+ * The table below gives the answers for the specified parameters (value and
  * sign), 1st column is the answer for the MDS value, the 2nd is for the OST:
  * --------------------------------------
  * 1 | file > limit; sign > 0 | -1 / -1 |
index d106c64..861e318 100755 (executable)
@@ -222,9 +222,9 @@ lmc_options = [
     ('quota', """
     quotaon: enable quota, only u|g|ug is supported now.
       iunit: the unit for slave to acquire/release inode quota from/to master.
-             Int type (>0), default value in Lustre is 5000 inodes.
+             Int type (>0), default value in Lustre is 5120 inodes.
       bunit: the unit for slave to acquire/release block quota from/to master.
-             Mbytes (>0), default value in Lustre is 100(Mbytes).
+             Mbytes (>0), default value in Lustre is 128(Mbytes).
       itune: used to tune the threthold. When inode quota usage reach the threthold,
              slave should acquire/release inode quota from/to master.
              Int type (100 > btune > 0), default value in Lustre is 50 (percentge).
index dc366bd..2611093 100644 (file)
@@ -69,6 +69,7 @@
 #define lustre_swab_llog_hdr NULL
 #define lustre_swab_llogd_body NULL
 #define lustre_swab_obd_quotactl NULL
+#define lustre_swab_quota_adjust_qunit NULL
 #define lustre_swab_mgs_target_info NULL
 #define lustre_swab_niobuf_remote NULL
 #define lustre_swab_obd_ioobj NULL
index 4ca8ec5..55d026b 100644 (file)
@@ -206,8 +206,8 @@ static void check_obd_connect_data(void)
         CHECK_CDEFINE(OBD_CONNECT_JOIN);
         CHECK_CDEFINE(OBD_CONNECT_ATTRFID);
         CHECK_CDEFINE(OBD_CONNECT_NODEVOH);
-        CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT);
         CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
+        CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT_FORCE);
         CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE);
         CHECK_CDEFINE(OBD_CONNECT_QUOTA64);
         CHECK_CDEFINE(OBD_CONNECT_MDS_CAPA);
@@ -1051,17 +1051,8 @@ check_qunit_data(void)
         CHECK_MEMBER(qunit_data, qd_id);
         CHECK_MEMBER(qunit_data, qd_flags);
         CHECK_MEMBER(qunit_data, qd_count);
-}
-
-static void
-check_qunit_data_old(void)
-{
-        BLANK_LINE();
-        CHECK_STRUCT(qunit_data_old);
-        CHECK_MEMBER(qunit_data_old, qd_id);
-        CHECK_MEMBER(qunit_data_old, qd_type);
-        CHECK_MEMBER(qunit_data_old, qd_count);
-        CHECK_MEMBER(qunit_data_old, qd_isblk);
+        CHECK_MEMBER(qunit_data, qd_qunit);
+        CHECK_MEMBER(qunit_data, padding);
 }
 
 static void
@@ -1122,6 +1113,18 @@ check_posix_acl_xattr_header(void)
 }
 
 static void
+check_quota_adjust_qunit(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(quota_adjust_qunit);
+        CHECK_MEMBER(quota_adjust_qunit, qaq_flags);
+        CHECK_MEMBER(quota_adjust_qunit, qaq_id);
+        CHECK_MEMBER(quota_adjust_qunit, qaq_bunit_sz);
+        CHECK_MEMBER(quota_adjust_qunit, qaq_iunit_sz);
+        CHECK_MEMBER(quota_adjust_qunit, padding1);
+}
+
+static void
 check_ll_user_fiemap(void)
 {
         BLANK_LINE();
@@ -1269,6 +1272,7 @@ main(int argc, char **argv)
         CHECK_VALUE(OST_SYNC);
         CHECK_VALUE(OST_QUOTACHECK);
         CHECK_VALUE(OST_QUOTACTL);
+        CHECK_VALUE(OST_QUOTA_ADJUST_QUNIT);
         CHECK_VALUE(OST_LAST_OPC);
 
         CHECK_DEFINE(OBD_OBJECT_EOF);
@@ -1411,7 +1415,7 @@ main(int argc, char **argv)
         check_llog_array_rec();
         check_mds_extent_desc();
         check_qunit_data();
-        check_qunit_data_old();
+        check_quota_adjust_qunit();
         check_mgs_target_info();
         check_lustre_disk_data();
         check_ll_user_fiemap();
index e56dd28..02e5746 100644 (file)
@@ -126,7 +126,9 @@ void lustre_assert_wire_constants(void)
                  (long long)OST_QUOTACHECK);
         LASSERTF(OST_QUOTACTL == 19, " found %lld\n",
                  (long long)OST_QUOTACTL);
-        LASSERTF(OST_LAST_OPC == 20, " found %lld\n",
+        LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n",
+                 (long long)OST_QUOTA_ADJUST_QUNIT);
+        LASSERTF(OST_LAST_OPC == 21, " found %lld\n",
                  (long long)OST_LAST_OPC);
         LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n",
                  (long long)OBD_OBJECT_EOF);
@@ -250,9 +252,9 @@ void lustre_assert_wire_constants(void)
                  (long long)OBD_QC_CALLBACK);
         LASSERTF(OBD_LAST_OPC == 403, " found %lld\n",
                  (long long)OBD_LAST_OPC);
-        LASSERTF(QUOTA_DQACQ == 601, " found %lld\n",
+        LASSERTF(QUOTA_DQACQ == 901, " found %lld\n",
                  (long long)QUOTA_DQACQ);
-        LASSERTF(QUOTA_DQREL == 602, " found %lld\n",
+        LASSERTF(QUOTA_DQREL == 902, " found %lld\n",
                  (long long)QUOTA_DQREL);
         LASSERTF(MGS_CONNECT == 250, " found %lld\n",
                  (long long)MGS_CONNECT);
@@ -458,8 +460,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL);
-        CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL);
         CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL);
         CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL);
         CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL);
@@ -2142,7 +2144,7 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm));
 
         /* Checks for struct qunit_data */
-        LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n",
+        LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n",
                  (long long)(int)sizeof(struct qunit_data));
         LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n",
                  (long long)(int)offsetof(struct qunit_data, qd_id));
@@ -2156,26 +2158,38 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct qunit_data, qd_count));
         LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct qunit_data *)0)->qd_count));
-
-        /* Checks for struct qunit_data_old */
-        LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n",
-                 (long long)(int)sizeof(struct qunit_data_old));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_id) == 0, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_id));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_id) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_id));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_type) == 4, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_type));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_type) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_type));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_count) == 8, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_count));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_count) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_count));
-        LASSERTF((int)offsetof(struct qunit_data_old, qd_isblk) == 12, " found %lld\n",
-                 (long long)(int)offsetof(struct qunit_data_old, qd_isblk));
-        LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk));
+        LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, qd_qunit));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit));
+        LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct qunit_data, padding));
+        LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct qunit_data *)0)->padding));
+
+        /* Checks for struct quota_adjust_qunit */
+        LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct quota_adjust_qunit));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz));
+        LASSERTF((int)offsetof(struct quota_adjust_qunit, padding1) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct quota_adjust_qunit, padding1));
+        LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->padding1) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->padding1));
 
         /* Checks for struct mgs_target_info */
         LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n",