From cefa8cda2ba2d288ccaa4ec077a6c627592503ea Mon Sep 17 00:00:00 2001 From: fanyong Date: Sat, 15 Nov 2008 18:39:35 +0000 Subject: [PATCH] Land b_head_quota onto HEAD (20081116_0105) b=13058 i=johann i=yury.umanets --- lustre/ChangeLog | 26 + lustre/Makefile.in | 2 +- lustre/autoMakefile.am | 2 - lustre/autoconf/lustre-core.m4 | 119 +- lustre/cmm/cmm_device.c | 277 ++++ lustre/cmm/cmm_object.c | 2 +- lustre/cmm/mdc_device.c | 2 +- lustre/doc/lfs.1 | 49 +- lustre/fid/fid_store.c | 2 +- lustre/fld/fld_index.c | 2 +- lustre/include/class_hash.h | 4 +- lustre/include/dt_object.h | 16 +- lustre/include/linux/lustre_acl.h | 2 +- lustre/include/linux/lustre_compat25.h | 27 +- lustre/include/linux/lustre_fsfilt.h | 31 +- lustre/include/linux/lustre_user.h | 3 + lustre/include/lprocfs_status.h | 78 +- lustre/include/lustre/liblustreapi.h | 5 +- lustre/include/lustre/lustre_idl.h | 171 ++- lustre/include/lustre/lustre_user.h | 33 +- lustre/include/lustre_capa.h | 113 +- lustre/include/lustre_export.h | 15 +- lustre/include/lustre_lib.h | 3 +- lustre/include/lustre_net.h | 26 +- lustre/include/lustre_quota.h | 595 ++++++-- lustre/include/lustre_req_layout.h | 2 + lustre/include/lustre_sec.h | 9 + lustre/include/md_object.h | 104 +- lustre/include/obd.h | 31 +- lustre/include/obd_class.h | 55 +- lustre/include/obd_ost.h | 1 + lustre/include/obd_support.h | 4 +- .../quota-fix-oops-in-invalidate_dquots.patch | 127 ++ .../patches/quota-large-limits-rhel5.patch | 616 ++++++++ .../patches/quota-large-limits-sles10.patch | 616 ++++++++ lustre/kernel_patches/series/2.6-rhel5.series | 1 + lustre/kernel_patches/series/2.6-sles10.series | 2 + lustre/kernel_patches/series/2.6.22-vanilla.series | 1 + lustre/ldlm/ldlm_lib.c | 100 +- lustre/ldlm/ldlm_lock.c | 3 +- lustre/ldlm/ldlm_lockd.c | 3 +- lustre/liblustre/Makefile.am | 2 +- lustre/liblustre/file.c | 2 +- lustre/liblustre/lutil.c | 8 +- lustre/llite/dir.c | 164 ++- lustre/llite/llite_capa.c | 80 +- lustre/llite/llite_lib.c | 44 +- lustre/llite/namei.c | 10 +- lustre/lmv/lmv_obd.c | 88 +- lustre/lov/lov_obd.c | 55 +- lustre/lov/lov_request.c | 3 +- lustre/lvfs/autoMakefile.am | 5 +- lustre/lvfs/fsfilt_ext3.c | 180 ++- lustre/lvfs/fsfilt_reiserfs.c | 2 - lustre/lvfs/lustre_quota_fmt.c | 483 ++++--- lustre/lvfs/lustre_quota_fmt.h | 109 +- lustre/lvfs/quotafmt_test.c | 12 +- lustre/mdc/mdc_internal.h | 19 - lustre/mdc/mdc_request.c | 65 +- lustre/mdd/Makefile.in | 2 +- lustre/mdd/mdd_device.c | 21 +- lustre/mdd/mdd_dir.c | 347 ++++- lustre/mdd/mdd_internal.h | 49 + lustre/mdd/mdd_lov.c | 19 +- lustre/mdd/mdd_lproc.c | 21 + lustre/mdd/mdd_object.c | 151 +- lustre/mdd/mdd_orphans.c | 2 +- lustre/mdd/mdd_permission.c | 2 +- lustre/mdd/mdd_quota.c | 276 ++++ lustre/mdd/mdd_trans.c | 26 +- lustre/mds/handler.c | 29 +- lustre/mds/lproc_mds.c | 173 --- lustre/mds/mds_fs.c | 3 +- lustre/mds/mds_internal.h | 4 +- lustre/mds/mds_lov.c | 4 +- lustre/mdt/mdt_handler.c | 269 +++- lustre/mdt/mdt_identity.c | 4 +- lustre/mdt/mdt_idmap.c | 148 +- lustre/mdt/mdt_internal.h | 20 +- lustre/mdt/mdt_lib.c | 48 +- lustre/mdt/mdt_lproc.c | 35 + lustre/mdt/mdt_open.c | 14 +- lustre/mdt/mdt_recovery.c | 16 +- lustre/mdt/mdt_reint.c | 7 +- lustre/mdt/mdt_xattr.c | 29 +- lustre/obdclass/capa.c | 147 +- lustre/obdclass/class_obd.c | 1 - lustre/obdclass/genops.c | 12 - lustre/obdclass/llog_lvfs.c | 2 +- lustre/obdclass/lprocfs_status.c | 4 +- lustre/obdclass/lu_object.c | 2 +- lustre/obdclass/obd_config.c | 4 + lustre/obdecho/echo.c | 2 +- lustre/obdecho/echo_client.c | 6 +- lustre/obdfilter/filter.c | 72 +- lustre/obdfilter/filter_capa.c | 69 +- lustre/obdfilter/filter_internal.h | 4 +- lustre/obdfilter/filter_io.c | 14 +- lustre/obdfilter/filter_io_26.c | 46 +- lustre/obdfilter/filter_log.c | 2 +- lustre/obdfilter/lproc_obdfilter.c | 41 +- lustre/osc/osc_cl_internal.h | 6 +- lustre/osc/osc_io.c | 11 +- lustre/osc/osc_page.c | 24 +- lustre/osc/osc_request.c | 37 +- lustre/osd/osd_handler.c | 334 ++++- lustre/osd/osd_internal.h | 13 + lustre/osd/osd_oi.c | 5 +- lustre/osd/osd_oi.h | 2 +- lustre/ost/ost_handler.c | 210 ++- lustre/ptlrpc/layout.c | 44 +- lustre/ptlrpc/lproc_ptlrpc.c | 26 +- lustre/ptlrpc/pack_generic.c | 123 +- lustre/ptlrpc/ptlrpc_module.c | 2 +- lustre/ptlrpc/recover.c | 4 +- lustre/ptlrpc/sec.c | 29 - lustre/ptlrpc/service.c | 19 +- lustre/ptlrpc/wiretest.c | 66 +- lustre/quota/Makefile.in | 2 +- lustre/quota/autoMakefile.am | 4 +- lustre/quota/lproc_quota.c | 667 +++++++++ lustre/quota/quota_adjust_qunit.c | 419 ++++++ lustre/quota/quota_check.c | 65 +- lustre/quota/quota_context.c | 1047 ++++++++++---- lustre/quota/quota_ctl.c | 146 +- lustre/quota/quota_interface.c | 751 ++++++---- lustre/quota/quota_internal.h | 125 +- lustre/quota/quota_master.c | 760 ++++++++-- lustre/tests/acceptance-small.sh | 1 - lustre/tests/cfg/insanity-lmv.sh | 8 + lustre/tests/cfg/lmv.sh | 8 +- lustre/tests/cfg/local.sh | 9 +- lustre/tests/sanity-quota.sh | 1515 ++++++++++++++------ lustre/tests/sanity-sec.sh | 197 ++- lustre/tests/sanity.sh | 6 +- lustre/tests/test-framework.sh | 24 +- lustre/utils/l_getidentity.c | 2 + lustre/utils/lfs.c | 595 +++++--- lustre/utils/liblustreapi.c | 33 +- lustre/utils/lmc | 4 +- lustre/utils/req-layout.c | 1 + lustre/utils/wirecheck.c | 30 +- lustre/utils/wiretest.c | 66 +- 143 files changed, 11104 insertions(+), 3094 deletions(-) create mode 100644 lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch create mode 100644 lustre/kernel_patches/patches/quota-large-limits-rhel5.patch create mode 100644 lustre/kernel_patches/patches/quota-large-limits-sles10.patch create mode 100644 lustre/mdd/mdd_quota.c create mode 100644 lustre/quota/lproc_quota.c create mode 100644 lustre/quota/quota_adjust_qunit.c diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 6a4513f..0026ea9 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -66,6 +66,15 @@ Description: Hitting mdc_commit_close() ASSERTION Details : Properly handle request reference release in ll_release_openhandle(). +Severity : major +Bugzilla : 14840 +Description: quota recovery deadlock during mds failover +Details : This patch includes att18982, att18236, att18237 in bz14840. + Slove the problems: + 1. fix osts hang when mds does failover with quotaon + 2. prevent watchdog storm when osts threads wait for the + recovery of mds + Severity : normal Bugzilla : 15975 Frequency : only patchless client @@ -150,6 +159,23 @@ Details : Apply the MGS_CONNECT_SUPPORTED mask at reconnect time so the connect flags are properly negotiated. Severity : normal +Frequency : often +Bugzilla : 16125 +Description: quotas are not honored with O_DIRECT +Details : all writes with the flag O_DIRECT will use grants which leads to + this problem. Now using OBD_BRW_SYNC to guard this. + +Severity : normal +Bugzilla : 15058 +Description: add quota statistics +Details : 1. sort out quota proc entries and proc code. + 2. add quota statistics + +Severity : enhancement +Bugzilla : 13058 +Description: enable quota support for HEAD. + +Severity : normal Bugzilla : 16006 Description: Properly propagate oinfo flags from lov to osc for statfs Details : restore missing copy oi_flags to lov requests. diff --git a/lustre/Makefile.in b/lustre/Makefile.in index 82c5433..f1c44fa 100644 --- a/lustre/Makefile.in +++ b/lustre/Makefile.in @@ -6,9 +6,9 @@ subdir-m += ptlrpc subdir-m += osc subdir-m += obdecho subdir-m += mgc +subdir-m += quota @SERVER_TRUE@subdir-m += mds obdfilter ost mgs mdt cmm mdd osd @CLIENT_TRUE@subdir-m += mdc lmv llite fld -@QUOTA_TRUE@subdir-m += quota @INCLUDE_RULES@ diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 3ad4024..51658ae 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -58,9 +58,7 @@ if CLIENT SUBDIRS += $(CLIENT_SUBDIRS) endif -if QUOTA SUBDIRS += $(QUOTA_SUBDIRS) -endif # this needs to be after the client subdirs if LIBLUSTRE diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 2a87e8f..317d3c2 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -703,6 +703,18 @@ LB_LINUX_CONFIG_IM([CRYPTO_SHA1],[],[ ]) ]) +# +# LC_CONFIG_RMTCLIENT +# +dnl FIXME +dnl the AES symbol usually tied with arch, e.g. CRYPTO_AES_586 +dnl FIXME +AC_DEFUN([LC_CONFIG_RMTCLIENT], +[LB_LINUX_CONFIG_IM([CRYPTO_AES],[],[ + AC_MSG_ERROR([Lustre remote client require that CONFIG_CRYPTO_AES is enabled in your kernel.]) +]) +]) + AC_DEFUN([LC_SUNRPC_CACHE], [AC_MSG_CHECKING([if sunrpc struct cache_head uses kref]) LB_LINUX_TRY_COMPILE([ @@ -784,11 +796,6 @@ AC_DEFUN([LC_CONFIG_GSS], [AC_MSG_WARN([kernel TWOFISH support is recommended by using GSS.])]) LB_LINUX_CONFIG_IM([CRYPTO_CAST6],[], [AC_MSG_WARN([kernel CAST6 support is recommended by using GSS.])]) - dnl FIXME - dnl the AES symbol usually tied with arch, e.g. CRYPTO_AES_586 - dnl FIXME - LB_LINUX_CONFIG_IM([CRYPTO_AES],[], - [AC_MSG_WARN([kernel AES support is recommended by using GSS.])]) AC_CHECK_LIB([gssapi], [gss_init_sec_context], [GSSAPI_LIBS="$GSSAPI_LDFLAGS -lgssapi"], @@ -1551,9 +1558,9 @@ AC_DEFUN([LC_PROG_LINUX], LC_CONFIG_PINGER LC_CONFIG_CHECKSUM LC_CONFIG_LIBLUSTRE_RECOVERY - LC_CONFIG_QUOTA LC_CONFIG_HEALTH_CHECK_WRITE LC_CONFIG_LRU_RESIZE + LC_QUOTA_MODULE LC_TASK_PPTR # RHEL4 patches @@ -1591,6 +1598,7 @@ AC_DEFUN([LC_PROG_LINUX], LC_FUNC_SET_FS_PWD LC_CAPA_CRYPTO + LC_CONFIG_RMTCLIENT LC_CONFIG_GSS LC_FUNC_MS_FLOCK_LOCK LC_FUNC_HAVE_CAN_SLEEP_ARG @@ -1599,6 +1607,7 @@ AC_DEFUN([LC_PROG_LINUX], LC_COOKIE_FOLLOW_LINK LC_FUNC_RCU LC_PERCPU_COUNTER + LC_QUOTA64 # does the kernel have VFS intent patches? LC_VFS_INTENT_PATCHES @@ -1645,7 +1654,7 @@ AC_DEFUN([LC_PROG_LINUX], # raid5-zerocopy patch LC_PAGE_CONSTANT - + # 2.6.22 LC_INVALIDATE_BDEV_2ARG LC_ASYNC_BLOCK_CIPHER @@ -1778,50 +1787,35 @@ fi # # LC_CONFIG_QUOTA # -# whether to enable quota support +# whether to enable quota support global control # AC_DEFUN([LC_CONFIG_QUOTA], [AC_ARG_ENABLE([quota], AC_HELP_STRING([--enable-quota], [enable quota support]), - [],[enable_quota='default']) -if test x$linux25 != xyes; then - enable_quota='no' -fi -LB_LINUX_CONFIG([QUOTA],[ - if test x$enable_quota = xdefault; then - enable_quota='yes' - fi -],[ - if test x$enable_quota = xdefault; then - enable_quota='no' - AC_MSG_WARN([quota is not enabled because the kernel lacks quota support]) - else - if test x$enable_quota = xyes; then - AC_MSG_ERROR([cannot enable quota because the kernel lacks quota support]) - fi - fi + [],[enable_quota='yes']) ]) -if test x$enable_quota != xno; then + +# whether to enable quota support(kernel modules) +AC_DEFUN([LC_QUOTA_MODULE], +[if test x$enable_quota != xno; then + LB_LINUX_CONFIG([QUOTA],[ + enable_quota_module='yes' AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support]) + ],[ + enable_quota_module='no' + AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support]) + ]) fi ]) -# -# LC_CONFIG_SPLIT -# -# whether to enable split support -# -AC_DEFUN([LC_CONFIG_SPLIT], -[AC_MSG_CHECKING([whether to enable split support]) -AC_ARG_ENABLE([split], - AC_HELP_STRING([--enable-split], - [enable split support]), - [],[enable_split='no']) -AC_MSG_RESULT([$enable_split]) -if test x$enable_split != xno; then - AC_DEFINE(HAVE_SPLIT_SUPPORT, 1, [enable split support]) -fi +AC_DEFUN([LC_QUOTA], +[#check global +LC_CONFIG_QUOTA +#check for utils +AC_CHECK_HEADER(sys/quota.h, + [AC_DEFINE(HAVE_SYS_QUOTA_H, 1, [Define to 1 if you have .])], + [AC_MSG_ERROR([don't find in your system])]) ]) AC_DEFUN([LC_QUOTA_READ], @@ -1840,6 +1834,23 @@ LB_LINUX_TRY_COMPILE([ ]) # +# LC_CONFIG_SPLIT +# +# whether to enable split support +# +AC_DEFUN([LC_CONFIG_SPLIT], +[AC_MSG_CHECKING([whether to enable split support]) +AC_ARG_ENABLE([split], + AC_HELP_STRING([--enable-split], + [enable split support]), + [],[enable_split='no']) +AC_MSG_RESULT([$enable_split]) +if test x$enable_split != xno; then + AC_DEFINE(HAVE_SPLIT_SUPPORT, 1, [enable split support]) +fi +]) + +# # LC_COOKIE_FOLLOW_LINK # # kernel 2.6.13+ ->follow_link returns a cookie @@ -1942,6 +1953,30 @@ LB_LINUX_TRY_COMPILE([ ]) # +# LC_QUOTA64 +# linux kernel may have 64-bit limits support +# +AC_DEFUN([LC_QUOTA64], +[AC_MSG_CHECKING([if kernel has 64-bit quota limits support]) +LB_LINUX_TRY_COMPILE([ + #include + #include + #include + int versions[] = V2_INITQVERSIONS_R1; + struct v2_disk_dqblk_r1 dqblk_r1; +],[],[ + AC_DEFINE(HAVE_QUOTA64, 1, [have quota64]) + AC_MSG_RESULT([yes]) + +],[ + AC_MSG_WARN([You have got no 64-bit kernel quota support.]) + AC_MSG_WARN([Continuing with limited quota support.]) + AC_MSG_WARN([quotacheck is needed for filesystems with recent quota versions.]) + AC_MSG_RESULT([no]) +]) +]) + +# # LC_CONFIGURE # # other configure checks @@ -2046,7 +2081,7 @@ AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes) AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) AM_CONDITIONAL(CLIENT, test x$enable_client = xyes) AM_CONDITIONAL(SERVER, test x$enable_server = xyes) -AM_CONDITIONAL(QUOTA, test x$enable_quota = xyes) +AM_CONDITIONAL(QUOTA, test x$enable_quota_module = xyes) AM_CONDITIONAL(SPLIT, test x$enable_split = xyes) AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes) AM_CONDITIONAL(EXT2FS_DEVEL, test x$ac_cv_header_ext2fs_ext2fs_h = xyes) diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c index d2c435d..839322a 100644 --- a/lustre/cmm/cmm_device.c +++ b/lustre/cmm/cmm_device.c @@ -53,6 +53,9 @@ #include #include "cmm_internal.h" #include "mdc_internal.h" +#ifdef HAVE_QUOTA_SUPPORT +# include +#endif static struct obd_ops cmm_obd_device_ops = { .o_owner = THIS_MODULE @@ -127,12 +130,286 @@ static int cmm_update_capa_key(const struct lu_env *env, RETURN(rc); } +#ifdef HAVE_QUOTA_SUPPORT +static int cmm_quota_notify(const struct lu_env *env, struct md_device *m) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_notify(env, + cmm_dev->cmm_child); + RETURN(rc); +} + +static int cmm_quota_setup(const struct lu_env *env, struct md_device *m, + void *data) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setup(env, + cmm_dev->cmm_child, + data); + RETURN(rc); +} + +static int cmm_quota_cleanup(const struct lu_env *env, struct md_device *m) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_cleanup(env, + cmm_dev->cmm_child); + RETURN(rc); +} + +static int cmm_quota_recovery(const struct lu_env *env, struct md_device *m) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_recovery(env, + cmm_dev->cmm_child); + RETURN(rc); +} + +static int cmm_quota_check(const struct lu_env *env, struct md_device *m, + struct obd_export *exp, __u32 type) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_check(env, + cmm_dev->cmm_child, + exp, type); + RETURN(rc); +} + +static int cmm_quota_on(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_on(env, + cmm_dev->cmm_child, + type, id); + RETURN(rc); +} + +static int cmm_quota_off(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_off(env, + cmm_dev->cmm_child, + type, id); + RETURN(rc); +} + +static int cmm_quota_setinfo(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setinfo(env, + cmm_dev->cmm_child, + type, id, dqinfo); + RETURN(rc); +} + +static int cmm_quota_getinfo(const struct lu_env *env, + const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getinfo(env, + cmm_dev->cmm_child, + type, id, dqinfo); + RETURN(rc); +} + +static int cmm_quota_setquota(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_setquota(env, + cmm_dev->cmm_child, + type, id, dqblk); + RETURN(rc); +} + +static int cmm_quota_getquota(const struct lu_env *env, + const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getquota(env, + cmm_dev->cmm_child, + type, id, dqblk); + RETURN(rc); +} + +static int cmm_quota_getoinfo(const struct lu_env *env, + const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getoinfo(env, + cmm_dev->cmm_child, + type, id, dqinfo); + RETURN(rc); +} + +static int cmm_quota_getoquota(const struct lu_env *env, + const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct cmm_device *cmm_dev = md2cmm_dev((struct md_device *)m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_getoquota(env, + cmm_dev->cmm_child, + type, id, dqblk); + RETURN(rc); +} + +static int cmm_quota_invalidate(const struct lu_env *env, struct md_device *m, + __u32 type) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_invalidate(env, + cmm_dev->cmm_child, + type); + RETURN(rc); +} + +static int cmm_quota_finvalidate(const struct lu_env *env, struct md_device *m, + __u32 type) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + /* disable quota for CMD case temporary. */ + if (cmm_dev->cmm_tgt_count) + RETURN(-EOPNOTSUPP); + + rc = cmm_child_ops(cmm_dev)->mdo_quota.mqo_finvalidate(env, + cmm_dev->cmm_child, + type); + RETURN(rc); +} +#endif + static const struct md_device_operations cmm_md_ops = { .mdo_statfs = cmm_statfs, .mdo_root_get = cmm_root_get, .mdo_maxsize_get = cmm_maxsize_get, .mdo_init_capa_ctxt = cmm_init_capa_ctxt, .mdo_update_capa_key = cmm_update_capa_key, +#ifdef HAVE_QUOTA_SUPPORT + .mdo_quota = { + .mqo_notify = cmm_quota_notify, + .mqo_setup = cmm_quota_setup, + .mqo_cleanup = cmm_quota_cleanup, + .mqo_recovery = cmm_quota_recovery, + .mqo_check = cmm_quota_check, + .mqo_on = cmm_quota_on, + .mqo_off = cmm_quota_off, + .mqo_setinfo = cmm_quota_setinfo, + .mqo_getinfo = cmm_quota_getinfo, + .mqo_setquota = cmm_quota_setquota, + .mqo_getquota = cmm_quota_getquota, + .mqo_getoinfo = cmm_quota_getoinfo, + .mqo_getoquota = cmm_quota_getoquota, + .mqo_invalidate = cmm_quota_invalidate, + .mqo_finvalidate = cmm_quota_finvalidate + } +#endif }; extern struct lu_device_type mdc_device_type; diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 3309687..73c9dad 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -1274,5 +1274,5 @@ static const struct md_dir_operations cmr_dir_ops = { .mdo_link = cmr_link, .mdo_unlink = cmr_unlink, .mdo_rename = cmr_rename, - .mdo_rename_tgt = cmr_rename_tgt, + .mdo_rename_tgt = cmr_rename_tgt }; diff --git a/lustre/cmm/mdc_device.c b/lustre/cmm/mdc_device.c index 5507962..db2d0b1 100644 --- a/lustre/cmm/mdc_device.c +++ b/lustre/cmm/mdc_device.c @@ -146,7 +146,7 @@ static int mdc_obd_add(const struct lu_env *env, ocd->ocd_ibits_known = MDS_INODELOCK_UPDATE; ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_ACL | - OBD_CONNECT_LCL_CLIENT | + OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_IBITS | diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1 index 3b3b7fb..532a60b 100644 --- a/lustre/doc/lfs.1 +++ b/lustre/doc/lfs.1 @@ -44,13 +44,33 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the .br .B lfs quotaoff [-ug] .br -.B lfs setquota [-u|-g] - \fB +.B lfs quotainv [-ug] [-f] +.br +.B lfs setquota [-u|--user|-g|--group] + \fB[--block-softlimit ] + \fB[--block-hardlimit ] + \fB[--inode-softlimit ] + \fB[--inode-hardlimit ] \fB\fR .br -.B lfs setquota -t [-u|-g] +.B lfs setquota [-u|--user|-g|--group] + \fB[-b ] [-B ] + \fB[-i ] [-I ] + \fB\fR .br -.B lfs quota [-o obd_uuid] [-u|-g] +.B lfs setquota -t [-u|-g] + \fB[--block-grace ] + \fB[--inode-grace ] + \fB\fR +.br +.B lfs setquota -t [-u|-g] + \fB[-b ] [-i ] + \fB\fR +.br + +.B lfs quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] +.br +.B lfs quota .br .B lfs quota -t [-u|-g] .br @@ -121,14 +141,17 @@ To turn filesystem quotas on. Options specify quota for users (-u) groups (-g) a .B quotaoff [-ugf] To turn filesystem quotas off. Options specify quota for users (-u) groups (-g) and force (-f) .TP -.B setquota [-u|-g] -To set filesystem quotas for users or groups. Limits are specific as blocks and inodes, see EXAMPLES +.B quotainv [-ug] [-f] +Clear quota files (administrative quota files if used without -f, operational quota files otherwise), all of their quota entries, for (-u) users or (-g) groups; after quotainv one must use quotacheck before using quotas. DO NOT USE THIS COMMAND UNLESS YOU REALLY KNOW WHAT IT DOES. IT IS MAINLY FOR INTERNAL PURPOSES. +.TP +.B setquota [-u|-g] [--block-softlimit ] [--block-hardlimit ] [--inode-softlimit ] [--inode-hardlimit ] +To set filesystem quotas for users or groups. Limits can be specified with -b, -k, -m, -g, -t, -p suffixes which specify units of 1, 2^10, 2^20, 2^30, 2^40 and 2^50 accordingly. Block limits unit is kilobyte (1024) by default and block limits are always kilobyte-grained (even if specified in bytes), see EXAMPLES .TP -.B setquota -t [-u|-g] +.B setquota -t [-u|-g] [--block-grace ] [--inode-grace ] To set filesystem quota grace times for users or groups. Grace time is specified in "XXwXXdXXhXXmXXs" format or as an integer seconds value, see EXAMPLES .TP -.B quota [-o obd_uuid] [-u|-g] -To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name must be specified. +.B quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] +To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name can be specified. If both user and group are omitted quotas for current uid/gid are shown. -v provides more verbose (with per-obd statistics) output. .TP .B quota -t [-u|-g] To display block and inode grace times for user (-u) or group (-g) quotas @@ -141,7 +164,7 @@ Quit the interactive lfs session .SH EXAMPLES .TP .B $ lfs setstripe -s 128k -c 2 /mnt/lustre/file1 -This creats a file striped on two OSTs with 128kB on each stripe. +This creates a file striped on two OSTs with 128kB on each stripe. .TP .B $ lfs setstripe -d /mnt/lustre/dir This deletes a default stripe pattern on dir. New files will use the default striping pattern created therein. @@ -182,10 +205,10 @@ Turn quotas of user and group on .B $ lfs quotaoff -ug /mnt/lustre Turn quotas of user and group off .TP -.B $ lfs setquota -u bob 0 1000000 0 10000 /mnt/lustre -Set quotas of user `bob': 1GB block quota and 10,000 file quota +.B $ lfs setquota -u bob --block-softlimit 2000000 --block-hardlimit 1000000 /mnt/lustre +Set quotas of user `bob': 1GB block quota hardlimit and 2 GB block quota softlimit .TP -.B $ lfs setquota -t -u 1000 1w4d /mnt/lustre +.B $ lfs setquota -t -u --block-grace 1000 --inode-grace 1w4d /mnt/lustre Set grace times for user quotas: 1000 seconds for block quotas, 1 week and 4 days for inode quotas .TP .B $ lfs quota -u bob /mnt/lustre diff --git a/lustre/fid/fid_store.c b/lustre/fid/fid_store.c index 42fda49..7a827da 100644 --- a/lustre/fid/fid_store.c +++ b/lustre/fid/fid_store.c @@ -102,7 +102,7 @@ int seq_store_write(struct lu_server_seq *seq, rc = dt_obj->do_body_ops->dbo_write(env, dt_obj, seq_store_buf(info), - &pos, th, BYPASS_CAPA); + &pos, th, BYPASS_CAPA, 1); if (rc == sizeof(info->sti_space)) { CDEBUG(D_INFO, "%s: Space - "DRANGE"\n", seq->lss_name, PRANGE(&seq->lss_space)); diff --git a/lustre/fld/fld_index.c b/lustre/fld/fld_index.c index a1e88d4..aba0bb0 100644 --- a/lustre/fld/fld_index.c +++ b/lustre/fld/fld_index.c @@ -131,7 +131,7 @@ int fld_index_create(struct lu_server_fld *fld, rc = dt_obj->do_index_ops->dio_insert(env, dt_obj, fld_rec(env, mds), fld_key(env, seq), - th, BYPASS_CAPA); + th, BYPASS_CAPA, 1); dt_dev->dd_ops->dt_trans_stop(env, th); } else rc = PTR_ERR(th); diff --git a/lustre/include/class_hash.h b/lustre/include/class_hash.h index e2b2b11..6210c7f 100644 --- a/lustre/include/class_hash.h +++ b/lustre/include/class_hash.h @@ -170,7 +170,7 @@ __lustre_hash_key_validate(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { if (unlikely(lh->lh_flags & LH_DEBUG)) - LASSERT(lh_compare(lh, key, hnode)); + LASSERT(lh_compare(lh, key, hnode) > 0); } /* Validate hnode is in the correct bucket */ @@ -193,7 +193,7 @@ __lustre_hash_bucket_lookup(lustre_hash_t *lh, struct hlist_node *hnode; hlist_for_each(hnode, &lhb->lhb_head) - if (lh_compare(lh, key, hnode)) + if (lh_compare(lh, key, hnode) > 0) return hnode; return NULL; diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index fbbb9ad..0cd80c4 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -66,6 +66,7 @@ struct txn_param; struct dt_device; struct dt_object; struct dt_index_features; +struct dt_quota_ctxt; struct dt_device_param { unsigned ddp_max_name_len; @@ -82,11 +83,12 @@ enum dt_txn_op { DTO_IDNEX_UPDATE, DTO_OBJECT_CREATE, DTO_OBJECT_DELETE, - DTO_ATTR_SET, + DTO_ATTR_SET_BASE, DTO_XATTR_SET, DTO_LOG_REC, /**< XXX temporary: dt layer knows nothing about llog. */ DTO_WRITE_BASE, DTO_WRITE_BLOCK, + DTO_ATTR_SET_CHOWN, DTO_NR }; @@ -144,6 +146,12 @@ struct dt_device_operations { struct dt_device *dev, int mode, unsigned long timeout, __u32 alg, struct lustre_capa_key *keys); + /** + * Initialize quota context. + */ + void (*dt_init_quota_ctxt)(const struct lu_env *env, + struct dt_device *dev, + struct dt_quota_ctxt *ctxt, void *data); /** * get transaction credits for given \a op. @@ -337,7 +345,8 @@ struct dt_body_operations { */ ssize_t (*dbo_write)(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, - struct thandle *handle, struct lustre_capa *capa); + struct thandle *handle, struct lustre_capa *capa, + int ignore_quota); }; /** @@ -370,7 +379,8 @@ struct dt_index_operations { */ int (*dio_insert)(const struct lu_env *env, struct dt_object *dt, const struct dt_rec *rec, const struct dt_key *key, - struct thandle *handle, struct lustre_capa *capa); + struct thandle *handle, struct lustre_capa *capa, + int ignore_quota); /** * precondition: dt_object_exists(dt); */ diff --git a/lustre/include/linux/lustre_acl.h b/lustre/include/linux/lustre_acl.h index 713341e..cfdc247 100644 --- a/lustre/include/linux/lustre_acl.h +++ b/lustre/include/linux/lustre_acl.h @@ -43,7 +43,7 @@ #define _LUSTRE_LINUX_ACL_H #ifndef _LUSTRE_ACL_H -#error Shoud not include direectly. use #include instead +#error Shoud not include direectly. use #include instead #endif #ifdef __KERNEL__ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 7f3f0da..13c0385 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -501,9 +501,32 @@ struct blkcipher_desc { #define ll_crypto_blkcipher_encrypt_iv(desc, dst, src, bytes) \ crypto_cipher_encrypt_iv((desc)->tfm, dst, src, bytes, (desc)->info) -extern struct ll_crypto_cipher *ll_crypto_alloc_blkcipher( - const char * algname, u32 type, u32 mask); static inline +struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname, + u32 type, u32 mask) +{ + char buf[CRYPTO_MAX_ALG_NAME + 1]; + const char *pan = algname; + u32 flag = 0; + + if (strncmp("cbc(", algname, 4) == 0) + flag |= CRYPTO_TFM_MODE_CBC; + else if (strncmp("ecb(", algname, 4) == 0) + flag |= CRYPTO_TFM_MODE_ECB; + if (flag) { + char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')'); + if (vp) { + memcpy(buf, algname + 4, vp - algname - 4); + buf[vp - algname - 4] = '\0'; + pan = buf; + } else { + flag = 0; + } + } + return crypto_alloc_tfm(pan, flag); +} + +static inline struct ll_crypto_hash *ll_crypto_alloc_hash(const char *alg, u32 type, u32 mask) { char buf[CRYPTO_MAX_ALG_NAME + 1]; diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 2996e36..b544341 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -113,12 +113,12 @@ struct fsfilt_operations { int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct super_block *sb); int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); - int (* fs_quotactl)(struct super_block *sb, - struct obd_quotactl *oqctl); int (* fs_quotacheck)(struct super_block *sb, struct obd_quotactl *oqctl); __u64 (* fs_get_version) (struct inode *inode); __u64 (* fs_set_version) (struct inode *inode, __u64 new_version); + int (* fs_quotactl)(struct super_block *sb, + struct obd_quotactl *oqctl); int (* fs_quotainfo)(struct lustre_quota_info *lqi, int type, int cmd); int (* fs_qids)(struct file *file, struct inode *inode, int type, @@ -167,18 +167,21 @@ static inline lvfs_sbdev_type fsfilt_journal_sbdev(struct obd_device *obd, return (lvfs_sbdev_type)0; } -#define FSFILT_OP_UNLINK 1 -#define FSFILT_OP_RMDIR 2 -#define FSFILT_OP_RENAME 3 -#define FSFILT_OP_CREATE 4 -#define FSFILT_OP_MKDIR 5 -#define FSFILT_OP_SYMLINK 6 -#define FSFILT_OP_MKNOD 7 -#define FSFILT_OP_SETATTR 8 -#define FSFILT_OP_LINK 9 -#define FSFILT_OP_CANCEL_UNLINK 10 -#define FSFILT_OP_JOIN 11 -#define FSFILT_OP_NOOP 15 +#define FSFILT_OP_UNLINK 1 +#define FSFILT_OP_RMDIR 2 +#define FSFILT_OP_RENAME 3 +#define FSFILT_OP_CREATE 4 +#define FSFILT_OP_MKDIR 5 +#define FSFILT_OP_SYMLINK 6 +#define FSFILT_OP_MKNOD 7 +#define FSFILT_OP_SETATTR 8 +#define FSFILT_OP_LINK 9 +#define FSFILT_OP_CANCEL_UNLINK 10 +#define FSFILT_OP_JOIN 11 +#define FSFILT_OP_NOOP 15 +#define FSFILT_OP_UNLINK_PARTIAL_CHILD 21 +#define FSFILT_OP_UNLINK_PARTIAL_PARENT 22 +#define FSFILT_OP_CREATE_PARTIAL_CHILD 23 #define __fsfilt_check_slow(obd, start, msg) \ do { \ diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h index b44679e..da302bc 100644 --- a/lustre/include/linux/lustre_user.h +++ b/lustre/include/linux/lustre_user.h @@ -48,6 +48,9 @@ # endif #else # include +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) +# define NEED_QUOTA_DEFS +# endif # ifdef HAVE_QUOTA_SUPPORT # include # endif diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 8ef613e..7763498 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -222,7 +222,7 @@ static inline int opcode_offset(__u32 opc) { (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); -} else if (opc < FLD_LAST_OPC) { + } else if (opc < FLD_LAST_OPC) { /* FLD opcode */ return (opc - FLD_FIRST_OPC + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + @@ -252,6 +252,18 @@ static inline int opcode_offset(__u32 opc) { (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); + } else if (opc < QUOTA_LAST_OPC) { + /* LQUOTA Opcode */ + return (opc - QUOTA_FIRST_OPC + + (SEC_LAST_OPC - SEC_FIRST_OPC) + + (SEQ_LAST_OPC - SEQ_FIRST_OPC) + + (FLD_LAST_OPC - FLD_FIRST_OPC) + + (LLOG_LAST_OPC - LLOG_FIRST_OPC) + + (OBD_LAST_OPC - OBD_FIRST_OPC) + + (MGS_LAST_OPC - MGS_FIRST_OPC) + + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + + (MDS_LAST_OPC - MDS_FIRST_OPC) + + (OST_LAST_OPC - OST_FIRST_OPC)); } else { /* Unknown Opcode */ return -1; @@ -266,7 +278,8 @@ static inline int opcode_offset(__u32 opc) { (SEQ_LAST_OPC - SEQ_FIRST_OPC) + \ (MGS_LAST_OPC - MGS_FIRST_OPC) + \ (LLOG_LAST_OPC - LLOG_FIRST_OPC) + \ - (SEC_LAST_OPC - SEC_FIRST_OPC)) + (SEC_LAST_OPC - SEC_FIRST_OPC) + \ + (QUOTA_LAST_OPC - QUOTA_FIRST_OPC)) #define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \ (EXTRA_LAST_OPC - EXTRA_FIRST_OPC)) @@ -288,12 +301,13 @@ enum { LDLM_EXTENT_ENQUEUE, LDLM_FLOCK_ENQUEUE, LDLM_IBITS_ENQUEUE, + MDS_REINT_SETATTR, MDS_REINT_CREATE, MDS_REINT_LINK, - MDS_REINT_OPEN, - MDS_REINT_SETATTR, - MDS_REINT_RENAME, MDS_REINT_UNLINK, + MDS_REINT_RENAME, + MDS_REINT_OPEN, + MDS_REINT_SETXATTR, BRW_READ_BYTES, BRW_WRITE_BYTES, EXTRA_LAST_OPC @@ -617,6 +631,56 @@ int lprocfs_obd_rd_recovery_maxtime(char *page, char **start, off_t off, /* lprocfs_status.c: write recovery max time bz13079 */ int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, unsigned long count, void *data); + +/* all quota proc functions */ +extern int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data); +extern int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data); +extern int lprocfs_quota_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data); +extern int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data); +extern int lprocfs_quota_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count, + int *eof, void *data); +extern int lprocfs_quota_wr_type(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer, + unsigned long count, void *data); #else /* LPROCFS is not defined */ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, @@ -651,7 +715,7 @@ static inline void lprocfs_init_ops_stats(int num_private_stats, static inline void lprocfs_init_ldlm_stats(struct lprocfs_stats *ldlm_stats) { return; } static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats) + unsigned int num_private_stats) { return 0; } static inline int lprocfs_alloc_md_stats(struct obd_device *obddev, unsigned int num_private_stats) @@ -663,7 +727,7 @@ struct obd_export; static inline int lprocfs_add_clear_entry(struct obd_export *exp) { return 0; } static inline int lprocfs_exp_setup(struct obd_export *exp, - lnet_nid_t *peer_nid, int *newnid) + lnet_nid_t *peer_nid, int *newnid) { return 0; } static inline int lprocfs_exp_cleanup(struct obd_export *exp) { return 0; } diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index 8717532..3f4fd1e 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -84,7 +84,7 @@ extern int llapi_poollist(char *name); extern int llapi_file_get_stripe(const char *path, struct lov_user_md *lum); #define HAVE_LLAPI_FILE_LOOKUP extern int llapi_file_lookup(int dirfd, const char *name); - + struct find_param { unsigned int maxdepth; time_t atime; @@ -151,8 +151,9 @@ extern int llapi_file_get_lov_uuid(const char *path, struct obd_uuid *lov_uuid); extern int llapi_file_fget_lov_uuid(int fd, struct obd_uuid *lov_uuid); extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count); extern int llapi_is_lustre_mnttype(const char *type); +extern int llapi_get_obd_count(char *mnt, int *count, int is_mdt); extern int parse_size(char *optarg, unsigned long long *size, - unsigned long long *size_units); + unsigned long long *size_units, int bytes_spec); extern int llapi_path2fid(const char *path, unsigned long long *seq, unsigned long *oid, unsigned long *ver); diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index fbb1af2..81fd3b7 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -653,8 +653,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_JOIN 0x00002000ULL /* files can be concatenated */ #define OBD_CONNECT_ATTRFID 0x00004000ULL /* Server supports GetAttr By Fid */ #define OBD_CONNECT_NODEVOH 0x00008000ULL /* No open handle for special nodes */ -#define OBD_CONNECT_LCL_CLIENT 0x00010000ULL /* local 1.8 client */ -#define OBD_CONNECT_RMT_CLIENT 0x00020000ULL /* Remote 1.8 client */ +#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /* Remote client */ +#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /* Remote client by force */ #define OBD_CONNECT_BRW_SIZE 0x00040000ULL /* Max bytes per rpc */ #define OBD_CONNECT_QUOTA64 0x00080000ULL /* 64bit qunit_data.qd_count b=10707*/ #define OBD_CONNECT_MDS_CAPA 0x00100000ULL /* MDS capability */ @@ -683,8 +683,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ OBD_CONNECT_NODEVOH |/* OBD_CONNECT_ATTRFID |*/\ - OBD_CONNECT_LCL_CLIENT | \ OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_RMT_CLIENT_FORCE | \ OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \ OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \ OBD_CONNECT_FID | \ @@ -696,7 +696,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \ OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET | \ OBD_CONNECT_CKSUM | LRU_RESIZE_CONNECT_FLAG | \ - OBD_CONNECT_AT) + OBD_CONNECT_AT | OBD_CONNECT_CHANGE_QS | \ + OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_RMT_CLIENT_FORCE) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT) @@ -766,6 +768,7 @@ typedef enum { OST_SET_INFO = 17, OST_QUOTACHECK = 18, OST_QUOTACTL = 19, + OST_QUOTA_ADJUST_QUNIT = 20, OST_LAST_OPC } ost_cmd_t; #define OST_FIRST_OPC OST_REPLY @@ -908,6 +911,8 @@ struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ #define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ +#define OBD_FL_TRUNC (0x0000200000000000ULL) /* for filter_truncate */ + #define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */ #define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */ #define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */ @@ -1244,13 +1249,26 @@ extern void lustre_swab_mdt_epoch (struct mdt_epoch *b); #define Q_INITQUOTA 0x800101 /* init slave limits */ #define Q_GETOINFO 0x800102 /* get obd quota info */ #define Q_GETOQUOTA 0x800103 /* get obd quotas */ +#define Q_FINVALIDATE 0x800104 /* invalidate operational quotas */ + +#define Q_TYPEMATCH(id, type) \ + ((id) == (type) || (id) == UGQUOTA) -#define Q_TYPESET(oqc, type) \ - ((oqc)->qc_type == type || (oqc)->qc_type == UGQUOTA) +#define Q_TYPESET(oqc, type) Q_TYPEMATCH((oqc)->qc_type, type) #define Q_GETOCMD(oqc) \ ((oqc)->qc_cmd == Q_GETOINFO || (oqc)->qc_cmd == Q_GETOQUOTA) +#define QCTL_COPY(out, in) \ +do { \ + Q_COPY(out, in, qc_cmd); \ + Q_COPY(out, in, qc_type); \ + Q_COPY(out, in, qc_id); \ + Q_COPY(out, in, qc_stat); \ + Q_COPY(out, in, qc_dqinfo); \ + Q_COPY(out, in, qc_dqblk); \ +} while (0) + struct obd_quotactl { __u32 qc_cmd; __u32 qc_type; @@ -1262,6 +1280,34 @@ struct obd_quotactl { extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); +struct quota_adjust_qunit { + __u32 qaq_flags; + __u32 qaq_id; + __u64 qaq_bunit_sz; + __u64 qaq_iunit_sz; + __u64 padding1; +}; +extern void lustre_swab_quota_adjust_qunit(struct quota_adjust_qunit *q); + +/* flags in qunit_data and quota_adjust_qunit will use macroes below */ +#define LQUOTA_FLAGS_GRP 1UL /* 0 is user, 1 is group */ +#define LQUOTA_FLAGS_BLK 2UL /* 0 is inode, 1 is block */ +#define LQUOTA_FLAGS_ADJBLK 4UL /* adjust the block qunit size */ +#define LQUOTA_FLAGS_ADJINO 8UL /* adjust the inode qunit size */ +#define LQUOTA_FLAGS_CHG_QS 16UL /* indicate whether it has capability of + * OBD_CONNECT_CHANGE_QS */ + +/* the status of lqsk_flags in struct lustre_qunit_size_key */ +#define LQUOTA_QUNIT_FLAGS (LQUOTA_FLAGS_GRP | LQUOTA_FLAGS_BLK) + +#define QAQ_IS_GRP(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_GRP) +#define QAQ_IS_ADJBLK(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJBLK) +#define QAQ_IS_ADJINO(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJINO) + +#define QAQ_SET_GRP(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_GRP) +#define QAQ_SET_ADJBLK(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJBLK) +#define QAQ_SET_ADJINO(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJINO) + /* inode access permission for remote user, the inode info are omitted, * for client knows them. */ struct mds_remote_perm { @@ -1277,7 +1323,8 @@ enum { CFS_SETUID_PERM = 0x01, CFS_SETGID_PERM = 0x02, CFS_SETGRP_PERM = 0x04, - CFS_RMTACL_PERM = 0x08 + CFS_RMTACL_PERM = 0x08, + CFS_RMTOWN_PERM = 0x10 }; extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p); @@ -1421,7 +1468,8 @@ enum { MDS_CROSS_REF = 1 << 1, MDS_VTX_BYPASS = 1 << 2, MDS_PERM_BYPASS = 1 << 3, - MDS_SOM = 1 << 4 + MDS_SOM = 1 << 4, + MDS_QUOTA_IGNORE = 1 << 5 }; struct mds_rec_join { @@ -2261,7 +2309,6 @@ struct obdo { extern void lustre_swab_obdo (struct obdo *o); /* request structure for OST's */ - struct ost_body { struct obdo oa; }; @@ -2293,37 +2340,71 @@ extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct lustre_cfg; extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); -/* quota. fixed by tianzy for bug10707 */ -#define QUOTA_IS_GRP 0X1UL /* 0 is user, 1 is group. Used by qd_flags*/ -#define QUOTA_IS_BLOCK 0x2UL /* 0 is inode, 1 is block. Used by qd_flags*/ - +/* this will be used when OBD_CONNECT_CHANGE_QS is set */ struct qunit_data { - __u32 qd_id; /* ID appiles to (uid, gid) */ - __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit; - * Block quota or file quota occupy one bit */ - __u64 qd_count; /* acquire/release count (bytes for block quota) */ + /** + * ID appiles to (uid, gid) + */ + __u32 qd_id; + /** + * LQUOTA_FLAGS_* affect the responding bits + */ + __u32 qd_flags; + /** + * acquire/release count (bytes for block quota) + */ + __u64 qd_count; + /** + * when a master returns the reply to a slave, it will + * contain the current corresponding qunit size + */ + __u64 qd_qunit; + __u64 padding; }; -struct qunit_data_old { - __u32 qd_id; /* ID appiles to (uid, gid) */ - __u32 qd_type; /* Quota type (USRQUOTA, GRPQUOTA) */ - __u32 qd_count; /* acquire/release count (bytes for block quota) */ - __u32 qd_isblk; /* Block quota or file quota */ -}; +#define QDATA_IS_GRP(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_GRP) +#define QDATA_IS_BLK(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_BLK) +#define QDATA_IS_ADJBLK(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJBLK) +#define QDATA_IS_ADJINO(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJINO) +#define QDATA_IS_CHANGE_QS(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_CHG_QS) + +#define QDATA_SET_GRP(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_GRP) +#define QDATA_SET_BLK(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_BLK) +#define QDATA_SET_ADJBLK(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJBLK) +#define QDATA_SET_ADJINO(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJINO) +#define QDATA_SET_CHANGE_QS(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_CHG_QS) + +#define QDATA_CLR_GRP(qdata) ((qdata)->qd_flags &= ~LQUOTA_FLAGS_GRP) +#define QDATA_CLR_CHANGE_QS(qdata) ((qdata)->qd_flags &= ~LQUOTA_FLAGS_CHG_QS) extern void lustre_swab_qdata(struct qunit_data *d); -extern void lustre_swab_qdata_old(struct qunit_data_old *d); -extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d); -extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d); +extern int quota_get_qdata(void*req, struct qunit_data *qdata, + int is_req, int is_exp); +extern int quota_copy_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp); typedef enum { - QUOTA_DQACQ = 601, - QUOTA_DQREL = 602, + QUOTA_DQACQ = 901, + QUOTA_DQREL = 902, + QUOTA_LAST_OPC } quota_cmd_t; +#define QUOTA_FIRST_OPC QUOTA_DQACQ #define JOIN_FILE_ALIGN 4096 -/** security opcodes */ +#define QUOTA_REQUEST 1 +#define QUOTA_REPLY 0 +#define QUOTA_EXPORT 1 +#define QUOTA_IMPORT 0 + +/* quota check function */ +#define QUOTA_RET_OK 0 /**< return successfully */ +#define QUOTA_RET_NOQUOTA 1 /**< not support quota */ +#define QUOTA_RET_NOLIMIT 2 /**< quota limit isn't set */ +#define QUOTA_RET_ACQUOTA 4 /**< need to acquire extra quota */ +#define QUOTA_RET_INC_PENDING 8 /**< pending value is increased */ + +/* security opcodes */ typedef enum { SEC_CTX_INIT = 801, SEC_CTX_INIT_CONT = 802, @@ -2341,15 +2422,15 @@ typedef enum { /* NB take care when changing the sequence of elements this struct, * because the offset info is used in find_capa() */ struct lustre_capa { - struct lu_fid lc_fid; /* fid */ - __u64 lc_opc; /* operations allowed */ - __u32 lc_uid; /* uid, it is obsolete, but maybe used in - * future, reserve it for 64-bits aligned.*/ - __u32 lc_flags; /* HMAC algorithm & flags */ - __u32 lc_keyid; /* key used for the capability */ - __u32 lc_timeout; /* capa timeout value (sec) */ - __u64 lc_expiry; /* expiry time (sec) */ - __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /* HMAC */ + struct lu_fid lc_fid; /** fid */ + __u64 lc_opc; /** operations allowed */ + __u64 lc_uid; /** file owner */ + __u64 lc_gid; /** file group */ + __u32 lc_flags; /** HMAC algorithm & flags */ + __u32 lc_keyid; /** key# used for the capability */ + __u32 lc_timeout; /** capa timeout value (sec) */ + __u32 lc_expiry; /** expiry time (sec) */ + __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */ } __attribute__((packed)); extern void lustre_swab_lustre_capa(struct lustre_capa *c); @@ -2364,9 +2445,9 @@ enum { CAPA_OPC_OSS_WRITE = 1<<5, /**< write oss object data */ CAPA_OPC_OSS_READ = 1<<6, /**< read oss object data */ CAPA_OPC_OSS_TRUNC = 1<<7, /**< truncate oss object */ - CAPA_OPC_META_WRITE = 1<<8, /**< write object meta data */ - CAPA_OPC_META_READ = 1<<9, /**< read object meta data */ - + CAPA_OPC_OSS_DESTROY = 1<<8, /**< destroy oss object */ + CAPA_OPC_META_WRITE = 1<<9, /**< write object meta data */ + CAPA_OPC_META_READ = 1<<10, /**< read object meta data */ }; #define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE) @@ -2374,7 +2455,8 @@ enum { (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \ CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) #define CAPA_OPC_OSS_ONLY \ - (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC) + (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \ + CAPA_OPC_OSS_DESTROY) #define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY #define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) @@ -2411,11 +2493,6 @@ struct lustre_capa_key { extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); -/* quota check function */ -#define QUOTA_RET_OK 0 /**< return successfully */ -#define QUOTA_RET_NOQUOTA 1 /**< not support quota */ -#define QUOTA_RET_NOLIMIT 2 /**< quota limit isn't set */ -#define QUOTA_RET_ACQUOTA 3 /**< need to acquire extra quota */ #endif /** @} lustreidl */ diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index bd76396..12a0f0e 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -98,6 +98,8 @@ struct obd_statfs; #define LL_IOC_FLUSHCTX _IOW ('f', 166, long) #define LL_IOC_RMTACL _IOW ('f', 167, long) +#define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long) + #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) #define LL_IOC_LLOOP_INFO _IOWR('f', 171, long) @@ -228,17 +230,19 @@ static inline char *obd_uuid2str(struct obd_uuid *uuid) return (char *)(uuid->uuid); } -#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ -#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ -#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ -#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ -#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ -#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ +/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */ +#define LUSTRE_Q_QUOTAON 0x800002 /* turn quotas on */ +#define LUSTRE_Q_QUOTAOFF 0x800003 /* turn quotas off */ +#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ +#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ +#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ +#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ +/* lustre-specific control commands */ +#define LUSTRE_Q_INVALIDATE 0x80000b /* invalidate quota data */ +#define LUSTRE_Q_FINVALIDATE 0x80000c /* invalidate filter quota data */ #define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ -#define QFMT_LDISKFS 2 /* QFMT_VFS_V0(2), quota format for ldiskfs */ - struct if_quotacheck { char obd_type[16]; struct obd_uuid obd_uuid; @@ -306,6 +310,10 @@ enum { #endif /* !__KERNEL__ */ +typedef enum lustre_quota_version { + LUSTRE_QUOTA_V2 = 1 +} lustre_quota_version_t; + /* XXX: same as if_dqinfo struct in kernel */ struct obd_dqinfo { __u64 dqi_bgrace; @@ -328,11 +336,20 @@ struct obd_dqblk { __u32 padding; }; +enum { + QC_GENERAL = 0, + QC_MDTIDX = 1, + QC_OSTIDX = 2, + QC_UUID = 3 +}; + struct if_quotactl { __u32 qc_cmd; __u32 qc_type; __u32 qc_id; __u32 qc_stat; + __u32 qc_valid; + __u32 qc_idx; struct obd_dqinfo qc_dqinfo; struct obd_dqblk qc_dqblk; char obd_type[16]; diff --git a/lustre/include/lustre_capa.h b/lustre/include/lustre_capa.h index 1fb6a7d..7f65a44 100644 --- a/lustre/include/lustre_capa.h +++ b/lustre/include/lustre_capa.h @@ -95,29 +95,24 @@ enum { CAPA_SITE_MAX }; -static inline __u64 capa_opc(struct lustre_capa *capa) -{ - return capa->lc_opc; -} - -static inline __u32 capa_uid(struct lustre_capa *capa) +static inline struct lu_fid *capa_fid(struct lustre_capa *capa) { - return capa->lc_uid; + return &capa->lc_fid; } -static inline struct lu_fid *capa_fid(struct lustre_capa *capa) +static inline __u64 capa_opc(struct lustre_capa *capa) { - return &capa->lc_fid; + return capa->lc_opc; } -static inline __u32 capa_keyid(struct lustre_capa *capa) +static inline __u64 capa_uid(struct lustre_capa *capa) { - return capa->lc_keyid; + return capa->lc_uid; } -static inline __u64 capa_expiry(struct lustre_capa *capa) +static inline __u64 capa_gid(struct lustre_capa *capa) { - return capa->lc_expiry; + return capa->lc_gid; } static inline __u32 capa_flags(struct lustre_capa *capa) @@ -127,9 +122,12 @@ static inline __u32 capa_flags(struct lustre_capa *capa) static inline __u32 capa_alg(struct lustre_capa *capa) { - __u32 alg = capa->lc_flags; + return (capa->lc_flags >> 24); +} - return alg >> 24; +static inline __u32 capa_keyid(struct lustre_capa *capa) +{ + return capa->lc_keyid; } static inline __u64 capa_key_mdsid(struct lustre_capa_key *key) @@ -142,12 +140,23 @@ static inline __u32 capa_key_keyid(struct lustre_capa_key *key) return key->lk_keyid; } +static inline __u32 capa_timeout(struct lustre_capa *capa) +{ + return capa->lc_timeout; +} + +static inline __u32 capa_expiry(struct lustre_capa *capa) +{ + return capa->lc_expiry; +} + #define DEBUG_CAPA(level, c, fmt, args...) \ do { \ -CDEBUG(level, fmt " capability@%p uid %u opc "LPX64" fid "DFID" keyid %u " \ - "expiry "LPU64" flags %u alg %d\n", \ - ##args, c, capa_uid(c), capa_opc(c), PFID(capa_fid(c)), capa_keyid(c), \ - capa_expiry(c), capa_flags(c), capa_alg(c)); \ +CDEBUG(level, fmt " capability@%p fid "DFID" opc "LPX64" uid "LPU64" gid " \ + LPU64" flags %u alg %d keyid %u timeout %u expiry %u\n", \ + ##args, c, PFID(capa_fid(c)), capa_opc(c), capa_uid(c), capa_gid(c), \ + capa_flags(c), capa_alg(c), capa_keyid(c), capa_timeout(c), \ + capa_expiry(c)); \ } while (0) #define DEBUG_CAPA_KEY(level, k, fmt, args...) \ @@ -172,38 +181,33 @@ struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa, int alive); int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key); +int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen); +int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen); void capa_cpy(void *dst, struct obd_capa *ocapa); - -char *dump_capa_content(char *buf, char *key, int len); - static inline struct obd_capa *alloc_capa(int site) { #ifdef __KERNEL__ struct obd_capa *ocapa; + if (unlikely(site != CAPA_SITE_CLIENT && site != CAPA_SITE_SERVER)) + return ERR_PTR(-EINVAL); + OBD_SLAB_ALLOC(ocapa, capa_cachep, GFP_KERNEL, sizeof(*ocapa)); - if (ocapa) { - atomic_set(&ocapa->c_refc, 0); - spin_lock_init(&ocapa->c_lock); - CFS_INIT_LIST_HEAD(&ocapa->c_list); - ocapa->c_site = site; - } - return ocapa; -#else - return NULL; -#endif -} + if (unlikely(!ocapa)) + return ERR_PTR(-ENOMEM); + + CFS_INIT_LIST_HEAD(&ocapa->c_list); + atomic_set(&ocapa->c_refc, 1); + spin_lock_init(&ocapa->c_lock); + ocapa->c_site = site; + if (ocapa->c_site == CAPA_SITE_CLIENT) + CFS_INIT_LIST_HEAD(&ocapa->u.cli.lli_list); + else + CFS_INIT_HLIST_NODE(&ocapa->u.tgt.c_hash); -static inline void free_capa(struct obd_capa *ocapa) -{ -#ifdef __KERNEL__ - if (atomic_read(&ocapa->c_refc)) { - DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc %d for", - atomic_read(&ocapa->c_refc)); - LBUG(); - } - OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa)); + return ocapa; #else + return ERR_PTR(-EOPNOTSUPP); #endif } @@ -225,7 +229,19 @@ static inline void capa_put(struct obd_capa *ocapa) DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "refc is 0 for"); LBUG(); } - atomic_dec(&ocapa->c_refc); + + if (atomic_dec_and_test(&ocapa->c_refc)) { + LASSERT(list_empty(&ocapa->c_list)); + if (ocapa->c_site == CAPA_SITE_CLIENT) { + LASSERT(list_empty(&ocapa->u.cli.lli_list)); + } else { + struct hlist_node *hnode; + + hnode = &ocapa->u.tgt.c_hash; + LASSERT(!hnode->next && !hnode->pprev); + } + OBD_SLAB_FREE(ocapa, capa_cachep, sizeof(*ocapa)); + } } static inline int open_flags_to_accmode(int flags) @@ -253,6 +269,11 @@ static inline void set_capa_expiry(struct obd_capa *ocapa) cfs_time_seconds(expiry)); } +static inline int capa_is_expired_sec(struct lustre_capa *capa) +{ + return (capa->lc_expiry - cfs_time_current_sec() <= 0); +} + static inline int capa_is_expired(struct obd_capa *ocapa) { return cfs_time_beforeq(ocapa->c_expiry, cfs_time_current()); @@ -284,5 +305,11 @@ struct filter_capa_key { struct lustre_capa_key k_key; }; +enum { + LC_ID_NONE = 0, + LC_ID_PLAIN = 1, + LC_ID_CONVERT = 2 +}; + #define BYPASS_CAPA (struct lustre_capa *)ERR_PTR(-ENOENT) #endif /* __LINUX_CAPA_H_ */ diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 4851fbc..b2a5c86 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -56,7 +56,6 @@ struct mdt_export_data { __u64 med_ibits_known; loff_t med_lr_off; int med_lr_idx; - unsigned int med_rmtclient:1; /* remote client? */ struct semaphore med_idmap_sem; struct lustre_idmap_table *med_idmap; }; @@ -178,6 +177,20 @@ static inline int exp_connect_lru_resize(struct obd_export *exp) return !!(exp->exp_connect_flags & OBD_CONNECT_LRU_RESIZE); } +static inline int exp_connect_rmtclient(struct obd_export *exp) +{ + LASSERT(exp != NULL); + return !!(exp->exp_connect_flags & OBD_CONNECT_RMT_CLIENT); +} + +static inline int client_is_remote(struct obd_export *exp) +{ + struct obd_import *imp = class_exp2cliimp(exp); + + return !!(imp->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_RMT_CLIENT); +} + static inline int imp_connect_lru_resize(struct obd_import *imp) { struct obd_connect_data *ocd; diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 83697fe..ba36693 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -80,13 +80,12 @@ int target_pack_pool_reply(struct ptlrpc_request *req); int target_handle_ping(struct ptlrpc_request *req); void target_committed_to_req(struct ptlrpc_request *req); -#ifdef HAVE_QUOTA_SUPPORT /* quotacheck callback, dqacq/dqrel callback handler */ int target_handle_qc_callback(struct ptlrpc_request *req); +#ifdef HAVE_QUOTA_SUPPORT int target_handle_dqacq_callback(struct ptlrpc_request *req); #else #define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP) -#define target_handle_qc_callback(req) (0) #endif #define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */ diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 558930d..1bb9ac2 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -212,8 +212,8 @@ union ptlrpc_async_args { * big enough. For _tons_ of context, OBD_ALLOC a struct and store * a pointer to it here. The pointer_arg ensures this struct is at * least big enough for that. */ - void *pointer_arg[9]; - __u64 space[5]; + void *pointer_arg[11]; + __u64 space[6]; }; struct ptlrpc_request_set; @@ -625,13 +625,23 @@ struct ptlrpc_bulk_desc { }; struct ptlrpc_thread { - - struct list_head t_link; /* active threads in svc->srv_threads */ - - void *t_data; /* thread-private data (preallocated memory) */ + /** + * active threads in svc->srv_threads + */ + struct list_head t_link; + /** + * thread-private data (preallocated memory) + */ + void *t_data; __u32 t_flags; - - unsigned int t_id; /* service thread index, from ptlrpc_start_threads */ + /** + * service thread index, from ptlrpc_start_threads + */ + unsigned int t_id; + /** + * put watchdog in the structure per thread b=14840 + */ + struct lc_watchdog *t_watchdog; cfs_waitq_t t_ctl_waitq; struct lu_env *t_env; }; diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h index 2b26e24..b0dc442 100644 --- a/lustre/include/lustre_quota.h +++ b/lustre/include/lustre_quota.h @@ -50,6 +50,8 @@ #include #include #include +#include +#include struct obd_device; struct client_obd; @@ -62,6 +64,64 @@ struct client_obd; #ifdef __KERNEL__ +#ifdef LPROCFS +enum { + LQUOTA_FIRST_STAT = 0, + /** @{ */ + /** + * these four are for measuring quota requests, for both of + * quota master and quota slaves + */ + LQUOTA_SYNC_ACQ = LQUOTA_FIRST_STAT, + LQUOTA_SYNC_REL, + LQUOTA_ASYNC_ACQ, + LQUOTA_ASYNC_REL, + /** }@ */ + /** @{ */ + /** + * these four measure how much time I/O threads spend on dealing + * with quota before and after writing data or creating files, + * only for quota slaves(lquota_chkquota and lquota_pending_commit) + */ + LQUOTA_WAIT_FOR_CHK_BLK, + LQUOTA_WAIT_FOR_CHK_INO, + LQUOTA_WAIT_FOR_COMMIT_BLK, + LQUOTA_WAIT_FOR_COMMIT_INO, + /** }@ */ + /** @{ */ + /** + * these two are for measuring time waiting return of quota reqs + * (qctxt_wait_pending_dqacq), only for quota salves + */ + LQUOTA_WAIT_PENDING_BLK_QUOTA, + LQUOTA_WAIT_PENDING_INO_QUOTA, + /** }@ */ + /** @{ */ + /** + * these two are for those when they are calling + * qctxt_wait_pending_dqacq, the quota req has returned already, + * only for quota salves + */ + LQUOTA_NOWAIT_PENDING_BLK_QUOTA, + LQUOTA_NOWAIT_PENDING_INO_QUOTA, + /** }@ */ + /** @{ */ + /** + * these are for quota ctl + */ + LQUOTA_QUOTA_CTL, + /** }@ */ + /** @{ */ + /** + * these are for adjust quota qunit, for both of + * quota master and quota slaves + */ + LQUOTA_ADJUST_QUNIT, + LQUOTA_LAST_STAT + /** }@ */ +}; +#endif /* LPROCFS */ + /* structures to access admin quotafile */ struct lustre_mem_dqinfo { unsigned int dqi_bgrace; @@ -75,28 +135,45 @@ struct lustre_mem_dqinfo { struct lustre_quota_info { struct file *qi_files[MAXQUOTAS]; struct lustre_mem_dqinfo qi_info[MAXQUOTAS]; + lustre_quota_version_t qi_version; }; #define DQ_STATUS_AVAIL 0x0 /* Available dquot */ #define DQ_STATUS_SET 0x01 /* Sombody is setting dquot */ #define DQ_STATUS_RECOVERY 0x02 /* dquot is in recovery */ +struct lustre_mem_dqblk { + __u64 dqb_bhardlimit; /**< absolute limit on disk blks alloc */ + __u64 dqb_bsoftlimit; /**< preferred limit on disk blks */ + __u64 dqb_curspace; /**< current used space */ + __u64 dqb_ihardlimit; /**< absolute limit on allocated inodes */ + __u64 dqb_isoftlimit; /**< preferred inode limit */ + __u64 dqb_curinodes; /**< current # allocated inodes */ + time_t dqb_btime; /**< time limit for excessive disk use */ + time_t dqb_itime; /**< time limit for excessive inode use */ +}; + struct lustre_dquot { - /* Hash list in memory, protect by dquot_hash_lock */ + /** Hash list in memory, protect by dquot_hash_lock */ struct list_head dq_hash; - /* Protect the data in lustre_dquot */ + /** Protect the data in lustre_dquot */ struct semaphore dq_sem; - /* Use count */ + /** Use count */ int dq_refcnt; - /* Pointer of quota info it belongs to */ + /** Pointer of quota info it belongs to */ struct lustre_quota_info *dq_info; - - loff_t dq_off; /* Offset of dquot on disk */ - unsigned int dq_id; /* ID this applies to (uid, gid) */ - int dq_type; /* Type fo quota (USRQUOTA, GRPQUOUTA) */ - unsigned short dq_status; /* See DQ_STATUS_ */ - unsigned long dq_flags; /* See DQ_ in quota.h */ - struct mem_dqblk dq_dqb; /* Diskquota usage */ + /** Offset of dquot on disk */ + loff_t dq_off; + /** ID this applies to (uid, gid) */ + unsigned int dq_id; + /** Type fo quota (USRQUOTA, GRPQUOUTA) */ + int dq_type; + /** See DQ_STATUS_ */ + unsigned short dq_status; + /** See DQ_ in quota.h */ + unsigned long dq_flags; + /** Diskquota usage */ + struct lustre_mem_dqblk dq_dqb; }; struct dquot_id { @@ -110,37 +187,234 @@ struct dquot_id { #define QFILE_INIT_INFO 4 #define QFILE_RD_DQUOT 5 #define QFILE_WR_DQUOT 6 +#define QFILE_CONVERT 7 /* admin quotafile operations */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) int lustre_check_quota_file(struct lustre_quota_info *lqi, int type); int lustre_read_quota_info(struct lustre_quota_info *lqi, int type); int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); int lustre_read_dquot(struct lustre_dquot *dquot); int lustre_commit_dquot(struct lustre_dquot *dquot); int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_get_qids(struct file *file, struct inode *inode, int type, +int lustre_get_qids(struct file *file, struct inode *inode, int type, struct list_head *list); +int lustre_quota_convert(struct lustre_quota_info *lqi, int type); +#else + +#ifndef DQ_FAKE_B +#define DQ_FAKE_B 6 +#endif + +static inline int lustre_check_quota_file(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_write_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_commit_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_init_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_quota_convert(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +#endif /* KERNEL_VERSION(2,5,0) */ #define LL_DQUOT_OFF(sb) DQUOT_OFF(sb) typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd, int opc); + +/* user quota is turned on on filter */ +#define LQC_USRQUOTA_FLAG (1 << 0) +/* group quota is turned on on filter */ +#define LQC_GRPQUOTA_FLAG (1 << 1) + +#define UGQUOTA2LQC(id) ((Q_TYPEMATCH(id, USRQUOTA) ? LQC_USRQUOTA_FLAG : 0) | \ + (Q_TYPEMATCH(id, GRPQUOTA) ? LQC_GRPQUOTA_FLAG : 0)) + struct lustre_quota_ctxt { - struct super_block *lqc_sb; /* superblock this applies to */ - struct obd_import *lqc_import; /* import used to send dqacq/dqrel RPC */ - dqacq_handler_t lqc_handler; /* dqacq/dqrel RPC handler, only for quota master */ - unsigned long lqc_recovery:1, /* Doing recovery */ - lqc_atype:2, /* Turn on user/group quota at setup automatically, - * 0: none, 1: user quota, 2: group quota, 3: both */ - lqc_status:1; /* Quota status. 0:Off, 1:On */ - unsigned long lqc_iunit_sz; /* Unit size of file quota */ - unsigned long lqc_itune_sz; /* Trigger dqacq when available file quota less than - * this value, trigger dqrel when available file quota - * more than this value + 1 iunit */ - unsigned long lqc_bunit_sz; /* Unit size of block quota */ - unsigned long lqc_btune_sz; /* See comment of lqc_itune_sz */ + /** superblock this applies to */ + struct super_block *lqc_sb; + /** obd_device_target for obt_rwsem */ + struct obd_device_target *lqc_obt; + /** import used to send dqacq/dqrel RPC */ + struct obd_import *lqc_import; + /** dqacq/dqrel RPC handler, only for quota master */ + dqacq_handler_t lqc_handler; + /** quota flags */ + unsigned long lqc_flags; + /** @{ */ + unsigned long lqc_recovery:1, /** Doing recovery */ + lqc_switch_qs:1, /** + * the function of change qunit size + * 0:Off, 1:On + */ + lqc_valid:1, /** this qctxt is valid or not */ + lqc_setup:1; /** + * tell whether of not quota_type has + * been processed, so that the master + * knows when it can start processing + * incoming acq/rel quota requests + */ + /** }@ */ + /** + * original unit size of file quota and + * upper limitation for adjust file qunit + */ + unsigned long lqc_iunit_sz; + /** + * Trigger dqacq when available file + * quota less than this value, trigger + * dqrel when available file quota + * more than this value + 1 iunit + */ + unsigned long lqc_itune_sz; + /** + * original unit size of block quota and + * upper limitation for adjust block qunit + */ + unsigned long lqc_bunit_sz; + /** See comment of lqc_itune_sz */ + unsigned long lqc_btune_sz; + /** all lustre_qunit_size structures */ + struct lustre_hash *lqc_lqs_hash; + + /** @{ */ + /** + * the values below are relative to how master change its qunit sizes + */ + /** + * this affects the boundary of + * shrinking and enlarging qunit size. default=4 + */ + unsigned long lqc_cqs_boundary_factor; + /** the least value of block qunit */ + unsigned long lqc_cqs_least_bunit; + /** the least value of inode qunit */ + unsigned long lqc_cqs_least_iunit; + /** + * when enlarging, qunit size will + * mutilple it; when shrinking, + * qunit size will divide it + */ + unsigned long lqc_cqs_qs_factor; + /** + * avoid ping-pong effect of + * adjusting qunit size. How many + * seconds must be waited between + * enlarging and shinking qunit + */ + /** }@ */ + int lqc_switch_seconds; + /** + * when blk qunit reaches this value, + * later write reqs from client should be sync b=16642 + */ + int lqc_sync_blk; + /** guard lqc_imp_valid now */ + spinlock_t lqc_lock; + /** + * when mds isn't connected, threads + * on osts who send the quota reqs + * with wait==1 will be put here b=14840 + */ + cfs_waitq_t lqc_wait_for_qmaster; + struct proc_dir_entry *lqc_proc_dir; + /** lquota statistics */ + struct lprocfs_stats *lqc_stats; }; +#define QUOTA_MASTER_READY(qctxt) (qctxt)->lqc_setup = 1 +#define QUOTA_MASTER_UNREADY(qctxt) (qctxt)->lqc_setup = 0 + +struct lustre_qunit_size { + struct hlist_node lqs_hash; /** the hash entry */ + unsigned int lqs_id; /** id of user/group */ + unsigned long lqs_flags; /** is user/group; FULLBUF or LESSBUF */ + unsigned long lqs_iunit_sz; /** Unit size of file quota currently */ + /** + * Trigger dqacq when available file quota + * less than this value, trigger dqrel + * when more than this value + 1 iunit + */ + unsigned long lqs_itune_sz; + unsigned long lqs_bunit_sz; /** Unit size of block quota currently */ + unsigned long lqs_btune_sz; /** See comment of lqs itune sz */ + /** the blocks reached ost and don't finish */ + unsigned long lqs_bwrite_pending; + /** the inodes reached mds and don't finish */ + unsigned long lqs_iwrite_pending; + /** when inodes are allocated/released, this value will record it */ + long long lqs_ino_rec; + /** when blocks are allocated/released, this value will record it */ + long long lqs_blk_rec; + atomic_t lqs_refcount; + cfs_time_t lqs_last_bshrink; /** time of last block shrink */ + cfs_time_t lqs_last_ishrink; /** time of last inode shrink */ + spinlock_t lqs_lock; + struct quota_adjust_qunit lqs_key; /** hash key */ + struct lustre_quota_ctxt *lqs_ctxt; /** quota ctxt */ +}; + +#define LQS_IS_GRP(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_GRP) +#define LQS_IS_ADJBLK(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJBLK) +#define LQS_IS_ADJINO(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJINO) + +#define LQS_SET_GRP(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_GRP) +#define LQS_SET_ADJBLK(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJBLK) +#define LQS_SET_ADJINO(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJINO) + +static inline void lqs_getref(struct lustre_qunit_size *lqs) +{ + atomic_inc(&lqs->lqs_refcount); + CDEBUG(D_QUOTA, "lqs=%p refcount %d\n", + lqs, atomic_read(&lqs->lqs_refcount)); +} + +static inline void lqs_putref(struct lustre_qunit_size *lqs) +{ + LASSERT(atomic_read(&lqs->lqs_refcount) > 0); + + /* killing last ref, let's let hash table kill it */ + if (atomic_read(&lqs->lqs_refcount) == 1) { + lustre_hash_del(lqs->lqs_ctxt->lqc_lqs_hash, + &lqs->lqs_key, &lqs->lqs_hash); + OBD_FREE_PTR(lqs); + } else { + atomic_dec(&lqs->lqs_refcount); + CDEBUG(D_QUOTA, "lqs=%p refcount %d\n", + lqs, atomic_read(&lqs->lqs_refcount)); + + } +} + +static inline void lqs_initref(struct lustre_qunit_size *lqs) +{ + atomic_set(&lqs->lqs_refcount, 0); +} + #else struct lustre_quota_info { @@ -149,6 +423,9 @@ struct lustre_quota_info { struct lustre_quota_ctxt { }; +#define QUOTA_MASTER_READY(qctxt) +#define QUOTA_MASTER_UNREADY(qctxt) + #endif /* !__KERNEL__ */ #else @@ -168,58 +445,109 @@ struct lustre_quota_ctxt { #define MIN_QLIMIT 1 struct quotacheck_thread_args { - struct obd_export *qta_exp; /* obd export */ - struct obd_quotactl qta_oqctl; /* obd_quotactl args */ - struct super_block *qta_sb; /* obd super block */ - atomic_t *qta_sem; /* obt_quotachecking */ + struct obd_export *qta_exp; /** obd export */ + struct obd_device *qta_obd; /** obd device */ + struct obd_quotactl qta_oqctl; /** obd_quotactl args */ + struct super_block *qta_sb; /** obd super block */ + atomic_t *qta_sem; /** obt_quotachecking */ }; +struct obd_trans_info; +typedef int (*quota_acquire)(struct obd_device *obd, unsigned int uid, + unsigned int gid, struct obd_trans_info *oti, + int isblk); + typedef struct { int (*quota_init) (void); int (*quota_exit) (void); int (*quota_setup) (struct obd_device *); int (*quota_cleanup) (struct obd_device *); - /* For quota master, close admin quota files */ + /** + * For quota master, close admin quota files + */ int (*quota_fs_cleanup) (struct obd_device *); - int (*quota_ctl) (struct obd_export *, struct obd_quotactl *); - int (*quota_check) (struct obd_export *, struct obd_quotactl *); + int (*quota_ctl) (struct obd_device *, struct obd_export *, + struct obd_quotactl *); + int (*quota_check) (struct obd_device *, struct obd_export *, + struct obd_quotactl *); int (*quota_recovery) (struct obd_device *); - - /* For quota master/slave, adjust quota limit after fs operation */ - int (*quota_adjust) (struct obd_device *, unsigned int[], - unsigned int[], int, int); - - /* For quota slave, set import, trigger quota recovery */ - int (*quota_setinfo) (struct obd_export *, struct obd_device *); - - /* For quota slave, set proper thread resoure capability */ + + /** + * For quota master/slave, adjust quota limit after fs operation + */ + int (*quota_adjust) (struct obd_device *, unsigned int[], + unsigned int[], int, int); + + /** + * For quota slave, set import, trigger quota recovery, + * For quota master, set lqc_setup + */ + int (*quota_setinfo) (struct obd_device *, void *); + + /** + * For quota slave, clear import when relative import is invalid + */ + int (*quota_clearinfo) (struct obd_export *, struct obd_device *); + + /** + * For quota slave, set proper thread resoure capability + */ int (*quota_enforce) (struct obd_device *, unsigned int); - - /* For quota slave, check whether specified uid/gid is over quota */ + + /** + * For quota slave, check whether specified uid/gid is over quota + */ int (*quota_getflag) (struct obd_device *, struct obdo *); - - /* For quota slave, acquire/release quota from master if needed */ - int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int); - - /* For quota slave, check whether specified uid/gid's remaining quota - * can finish a write rpc */ + + /** + * For quota slave, acquire/release quota from master if needed + */ + int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int, + struct obd_trans_info *, int); + + /** + * For quota slave, check whether specified uid/gid's remaining quota + * can finish a block_write or inode_create rpc. It updates the pending + * record of block and inode, acquires quota if necessary + */ int (*quota_chkquota) (struct obd_device *, unsigned int, unsigned int, - int); + int, int *, quota_acquire, + struct obd_trans_info *, int); - /* For quota client, poll if the quota check done */ + /** + * For quota client, poll if the quota check done + */ int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *); - - /* For quota client, check whether specified uid/gid is over quota */ + + /** + * For quota client, check whether specified uid/gid is over quota + */ int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int); - - /* For quota client, set over quota flag for specifed uid/gid */ + + /** + * For quota client, the actions after the pending write is committed + */ + int (*quota_pending_commit) (struct obd_device *, unsigned int, + unsigned int, int, int); + + /** + * For quota client, set over quota flag for specifed uid/gid + */ int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int, obd_flag, obd_flag); + + /** + * For adjusting qunit size b=10600 + */ + int (*quota_adjust_qunit) (struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); + } quota_interface_t; #define Q_COPY(out, in, member) (out)->member = (in)->member -#define QUOTA_OP(interface, op) interface->quota_ ## op +#define QUOTA_OP(interface, op) interface->quota_ ## op #define QUOTA_CHECK_OP(interface, op) \ do { \ @@ -235,39 +563,39 @@ static inline int lquota_init(quota_interface_t *interface) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, init); rc = QUOTA_OP(interface, init)(); RETURN(rc); } -static inline int lquota_exit(quota_interface_t *interface) +static inline int lquota_exit(quota_interface_t *interface) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, exit); rc = QUOTA_OP(interface, exit)(); RETURN(rc); } static inline int lquota_setup(quota_interface_t *interface, - struct obd_device *obd) + struct obd_device *obd) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, setup); rc = QUOTA_OP(interface, setup)(obd); RETURN(rc); } static inline int lquota_cleanup(quota_interface_t *interface, - struct obd_device *obd) + struct obd_device *obd) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, cleanup); rc = QUOTA_OP(interface, cleanup)(obd); RETURN(rc); @@ -278,32 +606,57 @@ static inline int lquota_fs_cleanup(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, fs_cleanup); rc = QUOTA_OP(interface, fs_cleanup)(obd); RETURN(rc); } static inline int lquota_recovery(quota_interface_t *interface, - struct obd_device *obd) -{ + struct obd_device *obd) +{ int rc; ENTRY; - + QUOTA_CHECK_OP(interface, recovery); rc = QUOTA_OP(interface, recovery)(obd); RETURN(rc); } +static inline int lquota_check(quota_interface_t *interface, + struct obd_device *obd, + struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, check); + rc = QUOTA_OP(interface, check)(obd, exp, oqctl); + RETURN(rc); +} + +static inline int lquota_ctl(quota_interface_t *interface, + struct obd_device *obd, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, ctl); + rc = QUOTA_OP(interface, ctl)(obd, NULL, oqctl); + RETURN(rc); +} + static inline int lquota_adjust(quota_interface_t *interface, - struct obd_device *obd, - unsigned int qcids[], - unsigned int qpids[], - int rc, int opc) + struct obd_device *obd, + unsigned int qcids[], + unsigned int qpids[], + int rc, int opc) { int ret; ENTRY; - + QUOTA_CHECK_OP(interface, adjust); ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc); RETURN(ret); @@ -315,7 +668,7 @@ static inline int lquota_chkdq(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, chkdq); rc = QUOTA_OP(interface, chkdq)(cli, uid, gid); RETURN(rc); @@ -328,7 +681,7 @@ static inline int lquota_setdq(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, setdq); rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags); RETURN(rc); @@ -340,26 +693,37 @@ static inline int lquota_poll_check(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, poll_check); rc = QUOTA_OP(interface, poll_check)(exp, qchk); RETURN(rc); } - static inline int lquota_setinfo(quota_interface_t *interface, - struct obd_export *exp, - struct obd_device *obd) + struct obd_device *obd, + void *data) { int rc; ENTRY; QUOTA_CHECK_OP(interface, setinfo); - rc = QUOTA_OP(interface, setinfo)(exp, obd); + rc = QUOTA_OP(interface, setinfo)(obd, data); + RETURN(rc); +} + +static inline int lquota_clearinfo(quota_interface_t *interface, + struct obd_export *exp, + struct obd_device *obd) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, clearinfo); + rc = QUOTA_OP(interface, clearinfo)(exp, obd); RETURN(rc); } -static inline int lquota_enforce(quota_interface_t *interface, +static inline int lquota_enforce(quota_interface_t *interface, struct obd_device *obd, unsigned int ignore) { @@ -381,57 +745,60 @@ static inline int lquota_getflag(quota_interface_t *interface, rc = QUOTA_OP(interface, getflag)(obd, oa); RETURN(rc); } - -static inline int lquota_acquire(quota_interface_t *interface, - struct obd_device *obd, - unsigned int uid, unsigned int gid) + +static inline int lquota_chkquota(quota_interface_t *interface, + struct obd_device *obd, + unsigned int uid, unsigned int gid, int count, + int *flag, struct obd_trans_info *oti, + int isblk) { int rc; ENTRY; + QUOTA_CHECK_OP(interface, chkquota); QUOTA_CHECK_OP(interface, acquire); - rc = QUOTA_OP(interface, acquire)(obd, uid, gid); + rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, count, flag, + QUOTA_OP(interface, acquire), oti, + isblk); RETURN(rc); } -static inline int lquota_chkquota(quota_interface_t *interface, - struct obd_device *obd, - unsigned int uid, unsigned int gid, - int npage) +static inline int lquota_pending_commit(quota_interface_t *interface, + struct obd_device *obd, + unsigned int uid, unsigned int gid, + int npage, int isblk) { int rc; ENTRY; - - QUOTA_CHECK_OP(interface, chkquota); - rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, npage); + + QUOTA_CHECK_OP(interface, pending_commit); + rc = QUOTA_OP(interface, pending_commit)(obd, uid, gid, npage, isblk); RETURN(rc); } -int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_rd_btune(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_rd_itune(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_rd_type(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data); - #ifndef __KERNEL__ extern quota_interface_t osc_quota_interface; -extern quota_interface_t mdc_quota_interface; extern quota_interface_t lov_quota_interface; +extern quota_interface_t mdc_quota_interface; +extern quota_interface_t lmv_quota_interface; + +#ifndef MAXQUOTAS +#define MAXQUOTAS 2 +#endif + +#ifndef USRQUOTA +#define USRQUOTA 0 #endif +#ifndef GRPQUOTA +#define GRPQUOTA 1 +#endif + +#endif + +#define LUSTRE_ADMIN_QUOTAFILES_V2 {\ + "admin_quotafile_v2.usr", /** user admin quotafile */\ + "admin_quotafile_v2.grp" /** group admin quotafile */\ +} + #endif /* _LUSTRE_QUOTA_H */ diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h index 1853571..4f0c777 100644 --- a/lustre/include/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -177,6 +177,7 @@ extern const struct req_format RQF_OST_CONNECT; extern const struct req_format RQF_OST_DISCONNECT; extern const struct req_format RQF_OST_QUOTACHECK; extern const struct req_format RQF_OST_QUOTACTL; +extern const struct req_format RQF_OST_QUOTA_ADJUST_QUNIT; extern const struct req_format RQF_OST_GETATTR; extern const struct req_format RQF_OST_SETATTR; extern const struct req_format RQF_OST_CREATE; @@ -244,6 +245,7 @@ extern const struct req_msg_field RMF_CAPA1; extern const struct req_msg_field RMF_CAPA2; extern const struct req_msg_field RMF_OBD_QUOTACHECK; extern const struct req_msg_field RMF_OBD_QUOTACTL; +extern const struct req_msg_field RMF_QUOTA_ADJUST_QUNIT; extern const struct req_msg_field RMF_QUNIT_DATA; extern const struct req_msg_field RMF_STRING; diff --git a/lustre/include/lustre_sec.h b/lustre/include/lustre_sec.h index 00c20d2..24d2a41 100644 --- a/lustre/include/lustre_sec.h +++ b/lustre/include/lustre_sec.h @@ -779,5 +779,14 @@ int bulk_csum_svc(struct ptlrpc_bulk_desc *desc, int read, struct ptlrpc_bulk_sec_desc *bsdv, int vsize, struct ptlrpc_bulk_sec_desc *bsdr, int rsize); +#define CFS_CAP_CHOWN_MASK (1 << CFS_CAP_CHOWN) +#define CFS_CAP_SYS_RESOURCE_MASK (1 << CFS_CAP_SYS_RESOURCE) + +enum { + LUSTRE_SEC_NONE = 0, + LUSTRE_SEC_REMOTE = 1, + LUSTRE_SEC_SPECIFY = 2, + LUSTRE_SEC_ALL = 3 +}; #endif /* _LUSTRE_SEC_H_ */ diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index c2fc95c..ecc92dc 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -62,7 +62,7 @@ struct md_device; struct md_device_operations; struct md_object; - +struct obd_export; enum { UCRED_INVALID = -1, @@ -73,18 +73,18 @@ enum { struct md_ucred { __u32 mu_valid; - __u32 mu_o_uid; - __u32 mu_o_gid; - __u32 mu_o_fsuid; - __u32 mu_o_fsgid; - __u32 mu_uid; - __u32 mu_gid; - __u32 mu_fsuid; - __u32 mu_fsgid; - __u32 mu_suppgids[2]; - cfs_cap_t mu_cap; - __u32 mu_umask; - struct group_info *mu_ginfo; + __u32 mu_o_uid; + __u32 mu_o_gid; + __u32 mu_o_fsuid; + __u32 mu_o_fsgid; + __u32 mu_uid; + __u32 mu_gid; + __u32 mu_fsuid; + __u32 mu_fsgid; + __u32 mu_suppgids[2]; + cfs_cap_t mu_cap; + __u32 mu_umask; + struct group_info *mu_ginfo; struct md_identity *mu_identity; }; @@ -95,6 +95,8 @@ enum { /** there are at most 5 fids in one operation, see rename, NOTE the last one * is a temporary one used for is_subdir() */ struct md_capainfo { + __u32 mc_auth; + __u32 mc_padding; const struct lu_fid *mc_fid[MD_CAPAINFO_MAX]; struct lustre_capa *mc_capa[MD_CAPAINFO_MAX]; }; @@ -315,6 +317,82 @@ struct md_device_operations { int (*mdo_update_capa_key)(const struct lu_env *env, struct md_device *m, struct lustre_capa_key *key); + +#ifdef HAVE_QUOTA_SUPPORT + struct md_quota_operations { + int (*mqo_notify)(const struct lu_env *env, + struct md_device *m); + + int (*mqo_setup)(const struct lu_env *env, + struct md_device *m, + void *data); + + int (*mqo_cleanup)(const struct lu_env *env, + struct md_device *m); + + int (*mqo_recovery)(const struct lu_env *env, + struct md_device *m); + + int (*mqo_check)(const struct lu_env *env, + struct md_device *m, + struct obd_export *exp, + __u32 type); + + int (*mqo_on)(const struct lu_env *env, + struct md_device *m, + __u32 type, + __u32 id); + + int (*mqo_off)(const struct lu_env *env, + struct md_device *m, + __u32 type, + __u32 id); + + int (*mqo_setinfo)(const struct lu_env *env, + struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqinfo *dqinfo); + + int (*mqo_getinfo)(const struct lu_env *env, + const struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqinfo *dqinfo); + + int (*mqo_setquota)(const struct lu_env *env, + struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqblk *dqblk); + + int (*mqo_getquota)(const struct lu_env *env, + const struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqblk *dqblk); + + int (*mqo_getoinfo)(const struct lu_env *env, + const struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqinfo *dqinfo); + + int (*mqo_getoquota)(const struct lu_env *env, + const struct md_device *m, + __u32 type, + __u32 id, + struct obd_dqblk *dqblk); + + int (*mqo_invalidate)(const struct lu_env *env, + struct md_device *m, + __u32 type); + + int (*mqo_finvalidate)(const struct lu_env *env, + struct md_device *m, + __u32 type); + } mdo_quota; +#endif }; enum md_upcall_event { diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 15d4273..2a5b2d8 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -248,6 +248,8 @@ struct obd_device_target { struct super_block *obt_sb; atomic_t obt_quotachecking; struct lustre_quota_ctxt obt_qctxt; + lustre_quota_version_t obt_qfmt; + struct rw_semaphore obt_rwsem; }; /* llog contexts */ @@ -362,6 +364,7 @@ struct filter_obd { struct list_head fo_capa_keys; struct hlist_head *fo_capa_hash; struct llog_commit_master *fo_lcm; + int fo_sec_level; }; #define OSC_MAX_RIF_DEFAULT 8 @@ -517,9 +520,9 @@ struct mds_obd { __u32 mds_id; /* mark pages dirty for write. */ - bitmap_t *mds_lov_page_dirty; + bitmap_t *mds_lov_page_dirty; /* array for store pages with obd_id */ - void **mds_lov_page_array; + void **mds_lov_page_array; /* file for store objid */ struct file *mds_lov_objid_filp; __u32 mds_lov_objid_count; @@ -537,13 +540,14 @@ struct mds_obd { mds_fl_acl:1, mds_evict_ost_nids:1, mds_fl_cfglog:1, - mds_fl_synced:1; + mds_fl_synced:1, + mds_quota:1; struct upcall_cache *mds_identity_cache; /* for capability keys update */ struct lustre_capa_key *mds_capa_keys; - struct rw_semaphore mds_notify_lock; + struct rw_semaphore mds_notify_lock; }; /* lov objid */ @@ -788,7 +792,7 @@ struct obd_trans_info { int oti_numcookies; /* initial thread handling transaction */ - int oti_thread_id; + struct ptlrpc_thread * oti_thread; __u32 oti_conn_cnt; struct obd_uuid *oti_ost_uuid; @@ -808,7 +812,7 @@ static inline void oti_init(struct obd_trans_info *oti, if (req->rq_repmsg != NULL) oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg); - oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1; + oti->oti_thread = req->rq_svc_thread; if (req->rq_reqmsg != NULL) oti->oti_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg); } @@ -1214,7 +1218,7 @@ struct obd_ops { struct lov_stripe_md **ea, struct obd_trans_info *oti); int (*o_destroy)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); + struct obd_export *md_exp, void *capa); int (*o_setattr)(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti); int (*o_setattr_async)(struct obd_export *exp, struct obd_info *oinfo, @@ -1292,8 +1296,14 @@ struct obd_ops { struct obd_uuid *(*o_get_uuid) (struct obd_export *exp); /* quota methods */ - int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); - int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); + int (*o_quotacheck)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + int (*o_quotactl)(struct obd_device *, struct obd_export *, + struct obd_quotactl *); + int (*o_quota_adjust_qunit)(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); + int (*o_ping)(struct obd_export *exp); @@ -1436,6 +1446,8 @@ struct md_ops { void *opaque); int (*m_renew_capa)(struct obd_export *, struct obd_capa *oc, renew_capa_cb_t cb); + int (*m_unpack_capa)(struct obd_export *, struct ptlrpc_request *, + const struct req_msg_field *, struct obd_capa **); int (*m_get_remote_perm)(struct obd_export *, const struct lu_fid *, struct obd_capa *, __u32, @@ -1526,6 +1538,7 @@ static inline void init_obd_quota_ops(quota_interface_t *interface, LASSERT(obd_ops); obd_ops->o_quotacheck = QUOTA_OP(interface, check); obd_ops->o_quotactl = QUOTA_OP(interface, ctl); + obd_ops->o_quota_adjust_qunit = QUOTA_OP(interface, adjust_qunit); } static inline __u64 oinfo_mdsno(struct obd_info *oinfo) diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 3e885b2..8fe3e6c 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -86,8 +86,6 @@ void class_obd_list(void); struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, const char * typ_name, struct obd_uuid *grp_uuid); -struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid, - struct obd_uuid *grp_uuid); struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next); struct obd_device * class_num2obd(int num); @@ -669,7 +667,7 @@ static inline int obd_create(struct obd_export *exp, struct obdo *obdo, static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { int rc; ENTRY; @@ -677,7 +675,7 @@ static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, EXP_CHECK_DT_OP(exp, destroy); EXP_COUNTER_INCREMENT(exp, destroy); - rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); + rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp, capa); RETURN(rc); } @@ -1469,7 +1467,7 @@ static inline int obd_quotacheck(struct obd_export *exp, EXP_CHECK_DT_OP(exp, quotacheck); EXP_COUNTER_INCREMENT(exp, quotacheck); - rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl); + rc = OBP(exp->exp_obd, quotacheck)(exp->exp_obd, exp, oqctl); RETURN(rc); } @@ -1482,7 +1480,39 @@ static inline int obd_quotactl(struct obd_export *exp, EXP_CHECK_DT_OP(exp, quotactl); EXP_COUNTER_INCREMENT(exp, quotactl); - rc = OBP(exp->exp_obd, quotactl)(exp, oqctl); + rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl); + RETURN(rc); +} + +static inline int obd_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt) +{ +#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT) + struct timeval work_start; + struct timeval work_end; + long timediff; +#endif + int rc; + ENTRY; + +#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT) + if (qctxt) + do_gettimeofday(&work_start); +#endif + EXP_CHECK_DT_OP(exp, quota_adjust_qunit); + EXP_COUNTER_INCREMENT(exp, quota_adjust_qunit); + + rc = OBP(exp->exp_obd, quota_adjust_qunit)(exp, oqaq, qctxt); + +#if defined(LPROCFS) && defined(HAVE_QUOTA_SUPPORT) + if (qctxt) { + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_ADJUST_QUNIT, + timediff); + } +#endif RETURN(rc); } @@ -1932,6 +1962,19 @@ static inline int md_renew_capa(struct obd_export *exp, struct obd_capa *ocapa, RETURN(rc); } +static inline int md_unpack_capa(struct obd_export *exp, + struct ptlrpc_request *req, + const struct req_msg_field *field, + struct obd_capa **oc) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, unpack_capa); + EXP_MD_COUNTER_INCREMENT(exp, unpack_capa); + rc = MDP(exp->exp_obd, unpack_capa)(exp, req, field, oc); + RETURN(rc); +} + static inline int md_intent_getattr_async(struct obd_export *exp, struct md_enqueue_info *minfo, struct ldlm_enqueue_info *einfo) diff --git a/lustre/include/obd_ost.h b/lustre/include/obd_ost.h index b849ff0..8ddb969 100644 --- a/lustre/include/obd_ost.h +++ b/lustre/include/obd_ost.h @@ -54,6 +54,7 @@ struct osc_brw_async_args { struct brw_page **aa_ppga; struct client_obd *aa_cli; struct list_head aa_oaps; + struct obd_capa *aa_ocapa; struct cl_req *aa_clerq; }; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 25ab060..50379c5 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -192,6 +192,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13f #define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x140 #define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x141 +#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x142 +#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x143 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -320,7 +322,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MGS_PAUSE_REQ 0x904 #define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 -#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 +#define OBD_FAIL_QUOTA_RET_QDATA 0xA02 #define OBD_FAIL_LPROC_REMOVE 0xB00 diff --git a/lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch b/lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch new file mode 100644 index 0000000..b8c6b0d --- /dev/null +++ b/lustre/kernel_patches/patches/quota-fix-oops-in-invalidate_dquots.patch @@ -0,0 +1,127 @@ +From: Jan Kara +Date: Thu, 23 Mar 2006 11:00:17 +0000 (-0800) +Subject: [PATCH] Fix oops in invalidate_dquots() +X-Git-Tag: v2.6.17-rc1~1059 +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=6362e4d4eda61efb04ac1cdae32e48ac6d90b701 + +[PATCH] Fix oops in invalidate_dquots() + +When quota is being turned off we assumed that all the references to dquots +were already dropped. That need not be true as inodes being deleted are +not on superblock's inodes list and hence we need not reach it when +removing quota references from inodes. So invalidate_dquots() has to wait +for all the users of dquots (as quota is already marked as turned off, no +new references can be acquired and so this is bound to happen rather +early). When we do this, we can also remove the iprune_sem locking as it +was protecting us against exactly the same problem when freeing inodes +icache memory. + +Signed-off-by: Jan Kara +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +--- + +diff --git a/fs/dquot.c b/fs/dquot.c +index 1966c89..9376a43 100644 +--- a/fs/dquot.c ++++ b/fs/dquot.c +@@ -118,8 +118,7 @@ + * spinlock to internal buffers before writing. + * + * Lock ordering (including related VFS locks) is the following: +- * i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > +- * > dquot->dq_lock > dqio_sem ++ * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem + * i_mutex on quota files is special (it's below dqio_sem) + */ + +@@ -407,23 +406,49 @@ out_dqlock: + + /* Invalidate all dquots on the list. Note that this function is called after + * quota is disabled and pointers from inodes removed so there cannot be new +- * quota users. Also because we hold dqonoff_sem there can be no quota users +- * for this sb+type at all. */ ++ * quota users. There can still be some users of quotas due to inodes being ++ * just deleted or pruned by prune_icache() (those are not attached to any ++ * list). We have to wait for such users. ++ */ + static void invalidate_dquots(struct super_block *sb, int type) + { + struct dquot *dquot, *tmp; + ++restart: + spin_lock(&dq_list_lock); + list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { + if (dquot->dq_sb != sb) + continue; + if (dquot->dq_type != type) + continue; +-#ifdef __DQUOT_PARANOIA +- if (atomic_read(&dquot->dq_count)) +- BUG(); +-#endif +- /* Quota now has no users and it has been written on last dqput() */ ++ /* Wait for dquot users */ ++ if (atomic_read(&dquot->dq_count)) { ++ DEFINE_WAIT(wait); ++ ++ atomic_inc(&dquot->dq_count); ++ prepare_to_wait(&dquot->dq_wait_unused, &wait, ++ TASK_UNINTERRUPTIBLE); ++ spin_unlock(&dq_list_lock); ++ /* Once dqput() wakes us up, we know it's time to free ++ * the dquot. ++ * IMPORTANT: we rely on the fact that there is always ++ * at most one process waiting for dquot to free. ++ * Otherwise dq_count would be > 1 and we would never ++ * wake up. ++ */ ++ if (atomic_read(&dquot->dq_count) > 1) ++ schedule(); ++ finish_wait(&dquot->dq_wait_unused, &wait); ++ dqput(dquot); ++ /* At this moment dquot() need not exist (it could be ++ * reclaimed by prune_dqcache(). Hence we must ++ * restart. */ ++ goto restart; ++ } ++ /* ++ * Quota now has no users and it has been written on last ++ * dqput() ++ */ + remove_dquot_hash(dquot); + remove_free_dquot(dquot); + remove_inuse(dquot); +@@ -540,6 +565,10 @@ we_slept: + if (atomic_read(&dquot->dq_count) > 1) { + /* We have more than one user... nothing to do */ + atomic_dec(&dquot->dq_count); ++ /* Releasing dquot during quotaoff phase? */ ++ if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) && ++ atomic_read(&dquot->dq_count) == 1) ++ wake_up(&dquot->dq_wait_unused); + spin_unlock(&dq_list_lock); + return; + } +@@ -581,6 +610,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) + INIT_LIST_HEAD(&dquot->dq_inuse); + INIT_HLIST_NODE(&dquot->dq_hash); + INIT_LIST_HEAD(&dquot->dq_dirty); ++ init_waitqueue_head(&dquot->dq_wait_unused); + dquot->dq_sb = sb; + dquot->dq_type = type; + atomic_set(&dquot->dq_count, 1); +@@ -732,13 +762,9 @@ static void drop_dquot_ref(struct super_block *sb, int type) + { + LIST_HEAD(tofree_head); + +- /* We need to be guarded against prune_icache to reach all the +- * inodes - otherwise some can be on the local list of prune_icache */ +- down(&iprune_sem); + down_write(&sb_dqopt(sb)->dqptr_sem); + remove_dquot_ref(sb, type, &tofree_head); + up_write(&sb_dqopt(sb)->dqptr_sem); +- up(&iprune_sem); + put_dquot_list(&tofree_head); + } + diff --git a/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch b/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch new file mode 100644 index 0000000..4f3a3bc --- /dev/null +++ b/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch @@ -0,0 +1,616 @@ +diff -rNpu linux-2.6.16.54-0.2.5/fs/dquot.c linux-2.6.16.54-0.2.5-quota/fs/dquot.c +--- linux-2.6.16.54-0.2.5/fs/dquot.c 2008-03-18 15:48:26.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/dquot.c 2008-03-17 22:43:11.000000000 +0300 +@@ -1588,10 +1588,19 @@ int vfs_get_dqblk(struct super_block *sb + } + + /* Generic routine for setting common part of quota structure */ +-static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) ++static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) + { + struct mem_dqblk *dm = &dquot->dq_dqb; + int check_blim = 0, check_ilim = 0; ++ struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; ++ ++ if ((di->dqb_valid & QIF_BLIMITS && ++ (di->dqb_bhardlimit > dqi->dqi_maxblimit || ++ di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || ++ (di->dqb_valid & QIF_ILIMITS && ++ (di->dqb_ihardlimit > dqi->dqi_maxilimit || ++ di->dqb_isoftlimit > dqi->dqi_maxilimit))) ++ return -ERANGE; + + spin_lock(&dq_data_lock); + if (di->dqb_valid & QIF_SPACE) { +@@ -1623,7 +1632,7 @@ static void do_set_dqblk(struct dquot *d + clear_bit(DQ_BLKS_B, &dquot->dq_flags); + } + else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ +- dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; ++ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; + } + if (check_ilim) { + if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { +@@ -1631,7 +1640,7 @@ static void do_set_dqblk(struct dquot *d + clear_bit(DQ_INODES_B, &dquot->dq_flags); + } + else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ +- dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; ++ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; + } + if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) + clear_bit(DQ_FAKE_B, &dquot->dq_flags); +@@ -1639,21 +1648,24 @@ static void do_set_dqblk(struct dquot *d + set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dq_data_lock); + mark_dquot_dirty(dquot); ++ ++ return 0; + } + + int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) + { + struct dquot *dquot; ++ int rc; + + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + if (!(dquot = dqget(sb, id, type))) { + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + return -ESRCH; + } +- do_set_dqblk(dquot, di); ++ rc = do_set_dqblk(dquot, di); + dqput(dquot); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); +- return 0; ++ return rc; + } + + /* Generic routine for getting common part of quota file information */ +diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v1.c linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c +--- linux-2.6.16.54-0.2.5/fs/quota_v1.c 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c 2008-03-17 22:42:47.000000000 +0300 +@@ -139,6 +139,9 @@ static int v1_read_file_info(struct supe + goto out; + } + ret = 0; ++ /* limits are stored as unsigned 32-bit data */ ++ dqopt->info[type].dqi_maxblimit = 0xffffffff; ++ dqopt->info[type].dqi_maxilimit = 0xffffffff; + dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; + dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME; + out: +diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v2.c linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c +--- linux-2.6.16.54-0.2.5/fs/quota_v2.c 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c 2008-03-18 11:58:02.000000000 +0300 +@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL"); + typedef char *dqbuf_t; + + #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff) +-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) ++#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \ ++ sizeof(struct v2_disk_dqdbheader))) ++#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1) ++ ++static const union v2_disk_dqblk emptydquot; ++static const union v2_disk_dqblk fakedquot[2] = { ++ {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }, ++ {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} } ++}; + +-/* Check whether given file is really vfsv0 quotafile */ +-static int v2_check_quota_file(struct super_block *sb, int type) ++static inline uint v2_dqblksz(uint rev) ++{ ++ uint sz; ++ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) ++ sz = sizeof(struct v2_disk_dqblk_r0); ++ else ++ sz = sizeof(struct v2_disk_dqblk_r1); ++ ++ return sz; ++} ++ ++/* Number of quota entries in a block */ ++static inline int v2_dqstrinblk(uint rev) ++{ ++ return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev); ++} ++ ++/* Get revision of a quota file, -1 if it does not look a quota file */ ++static int v2_quota_file_revision(struct super_block *sb, int type) + { + struct v2_disk_dqheader dqhead; + ssize_t size; + static const uint quota_magics[] = V2_INITQMAGICS; +- static const uint quota_versions[] = V2_INITQVERSIONS; ++ static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0; ++ static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1; + + size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + if (size != sizeof(struct v2_disk_dqheader)) { + printk("quota_v2: failed read expected=%zd got=%zd\n", + sizeof(struct v2_disk_dqheader), size); +- return 0; ++ return -1; + } +- if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || +- le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) +- return 0; +- return 1; ++ if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) { ++ if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type]) ++ return 0; ++ if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type]) ++ return 1; ++ } ++ return -1; ++} ++ ++/* Check whether given file is really vfsv0 quotafile */ ++static inline int v2_check_quota_file(struct super_block *sb, int type) ++{ ++ return v2_quota_file_revision(sb, type) != -1; + } + + /* Read information header from quota file */ +@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe + struct v2_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + ssize_t size; ++ int rev; ++ ++ rev = v2_quota_file_revision(sb, type); ++ if (rev < 0) { ++ printk(KERN_WARNING "Second quota file check failed.\n"); ++ return -1; ++ } + + size = sb->s_op->quota_read(sb, type, (char *)&dinfo, + sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); +@@ -65,6 +110,16 @@ static int v2_read_file_info(struct supe + info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); ++ ++ info->u.v2_i.dqi_revision = rev; ++ if (rev == 0) { ++ info->dqi_maxblimit = 0xffffffffULL; ++ info->dqi_maxilimit = 0xffffffffULL; ++ } else { ++ info->dqi_maxblimit = 0xffffffffffffffffULL; ++ info->dqi_maxilimit = 0xffffffffffffffffULL; ++ } ++ + return 0; + } + +@@ -94,29 +149,61 @@ static int v2_write_file_info(struct sup + return 0; + } + +-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d) ++static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev) + { +- m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); +- m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit); +- m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes); +- m->dqb_itime = le64_to_cpu(d->dqb_itime); +- m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit); +- m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit); +- m->dqb_curspace = le64_to_cpu(d->dqb_curspace); +- m->dqb_btime = le64_to_cpu(d->dqb_btime); +-} +- +-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id) +-{ +- d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); +- d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); +- d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes); +- d->dqb_itime = cpu_to_le64(m->dqb_itime); +- d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit); +- d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit); +- d->dqb_curspace = cpu_to_le64(m->dqb_curspace); +- d->dqb_btime = cpu_to_le64(m->dqb_btime); +- d->dqb_id = cpu_to_le32(id); ++ REV_ASSERT(rev); ++ ++ if (rev == 0) { ++ struct v2_disk_dqblk_r0 *ddqblk = &d->r0; ++ m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit); ++ m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit); ++ m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes); ++ m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime); ++ m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit); ++ m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit); ++ m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace); ++ m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime); ++ } else { ++ struct v2_disk_dqblk_r1 *ddqblk = &d->r1; ++ m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit); ++ m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit); ++ m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes); ++ m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime); ++ m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit); ++ m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit); ++ m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace); ++ m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime); ++ } ++} ++ ++static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m, ++ qid_t id, uint rev) ++{ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) { ++ struct v2_disk_dqblk_r0 *ddqblk = &d->r0; ++ ddqblk->dqb_id = cpu_to_le32(id); ++ ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit); ++ ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit); ++ ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes); ++ ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime); ++ ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit); ++ ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit); ++ ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace); ++ ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime); ++ } else { ++ struct v2_disk_dqblk_r1 *ddqblk = &d->r1; ++ ddqblk->dqb_id = cpu_to_le32(id); ++ ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); ++ ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); ++ ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); ++ ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime); ++ ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); ++ ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); ++ ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace); ++ ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime); ++ } + } + + static dqbuf_t getdqbuf(void) +@@ -268,10 +355,10 @@ static uint find_free_dqentry(struct dqu + { + struct super_block *sb = dquot->dq_sb; + struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type; +- uint blk, i; ++ uint blk, i, rev = info->u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev); + struct v2_disk_dqdbheader *dh; +- struct v2_disk_dqblk *ddquot; +- struct v2_disk_dqblk fakedquot; ++ union v2_disk_dqblk *ddquot; + dqbuf_t buf; + + *err = 0; +@@ -298,17 +385,18 @@ static uint find_free_dqentry(struct dqu + info->u.v2_i.dqi_free_entry = blk; + mark_info_dirty(sb, dquot->dq_type); + } +- if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ ++ /* Block will be full? */ ++ if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk) + if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); + goto out_buf; + } + dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); +- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); + /* Find free structure in block */ +- for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); ++ for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz); ++ i++, ddquot = (char *)ddquot + dqblksz); + #ifdef __QUOTA_V2_PARANOIA +- if (i == V2_DQSTRINBLK) { ++ if (i == dqstrinblk) { + printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n"); + *err = -EIO; + goto out_buf; +@@ -318,7 +406,8 @@ static uint find_free_dqentry(struct dqu + printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); + goto out_buf; + } +- dquot->dq_off = (blk<dq_off = (blk<dq_type; + ssize_t ret; +- struct v2_disk_dqblk ddquot, empty; ++ union v2_disk_dqblk ddquot; ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev); + + /* dq_off is guarded by dqio_sem */ + if (!dquot->dq_off) +@@ -401,18 +492,22 @@ static int v2_write_dquot(struct dquot * + return ret; + } + spin_lock(&dq_data_lock); +- mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); ++ mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev); + /* Argh... We may need to write structure full of zeroes but that would be + * treated as an empty place by the rest of the code. Format change would + * be definitely cleaner but the problems probably are not worth it */ +- memset(&empty, 0, sizeof(struct v2_disk_dqblk)); +- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) +- ddquot.dqb_itime = cpu_to_le64(1); ++ if (!memcmp(&emptydquot, &ddquot, dqblksz)) { ++ if (rev == 0) ++ ddquot.r0.dqb_itime = cpu_to_le64(1); ++ else ++ ddquot.r1.dqb_itime = cpu_to_le64(1); ++ } + spin_unlock(&dq_data_lock); + ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, +- (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off); +- if (ret != sizeof(struct v2_disk_dqblk)) { +- printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id); ++ (char *)&ddquot, dqblksz, dquot->dq_off); ++ if (ret != dqblksz) { ++ printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", ++ dquot->dq_sb->s_id); + if (ret >= 0) + ret = -ENOSPC; + } +@@ -431,6 +526,7 @@ static int free_dqentry(struct dquot *dq + struct v2_disk_dqdbheader *dh; + dqbuf_t buf = getdqbuf(); + int ret = 0; ++ uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision; + + if (!buf) + return -ENOMEM; +@@ -456,8 +552,8 @@ static int free_dqentry(struct dquot *dq + } + else { + memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, +- sizeof(struct v2_disk_dqblk)); +- if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { ++ v2_dqblksz(rev)); ++ if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) { + /* Insert will write block itself */ + if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) { + printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); +@@ -529,41 +625,56 @@ static int v2_delete_dquot(struct dquot + return remove_tree(dquot, &tmp, 0); + } + ++static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev) ++{ ++ __u32 dq_id; ++ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) ++ dq_id = le32_to_cpu(ddquot->r0.dqb_id); ++ else ++ dq_id = le32_to_cpu(ddquot->r1.dqb_id); ++ ++ return dq_id; ++} ++ + /* Find entry in block */ + static loff_t find_block_dqentry(struct dquot *dquot, uint blk) + { + dqbuf_t buf = getdqbuf(); + loff_t ret = 0; + int i; +- struct v2_disk_dqblk *ddquot = GETENTRIES(buf); ++ union v2_disk_dqblk *ddquot = GETENTRIES(buf); ++ int type = dquot->dq_type; ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev); + + if (!buf) + return -ENOMEM; +- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { ++ ++ ret = read_blk(dquot->dq_sb, type, blk, buf); ++ if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + if (dquot->dq_id) +- for (i = 0; i < V2_DQSTRINBLK && +- le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++); ++ for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id; ++ i++, ddquot = (char *)ddquot + dqblksz); + else { /* ID 0 as a bit more complicated searching... */ +- struct v2_disk_dqblk fakedquot; +- +- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); +- for (i = 0; i < V2_DQSTRINBLK; i++) +- if (!le32_to_cpu(ddquot[i].dqb_id) && +- memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk))) ++ for (i = 0; i < dqstrinblk; i++, ddquot = (char *)ddquot+dqblksz) ++ if (!dqid(ddquot, rev) && ++ memcmp(&emptydquot, ddquot, dqblksz)) + break; + } +- if (i == V2_DQSTRINBLK) { ++ if (i == dqstrinblk) { + printk(KERN_ERR "VFS: Quota for id %u referenced " + "but not present.\n", dquot->dq_id); + ret = -EIO; + goto out_buf; + } + else +- ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct +- v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk); ++ ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf); + out_buf: + freedqbuf(buf); + return ret; +@@ -605,7 +716,7 @@ static int v2_read_dquot(struct dquot *d + { + int type = dquot->dq_type; + loff_t offset; +- struct v2_disk_dqblk ddquot, empty; ++ union v2_disk_dqblk ddquot; + int ret = 0; + + #ifdef __QUOTA_V2_PARANOIA +@@ -626,25 +737,30 @@ static int v2_read_dquot(struct dquot *d + ret = offset; + } + else { ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i. ++ dqi_revision; ++ uint dqblksz = v2_dqblksz(rev); + dquot->dq_off = offset; +- if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, +- (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset)) +- != sizeof(struct v2_disk_dqblk)) { ++ ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, ++ (char *)&ddquot, dqblksz, offset); ++ if (ret != dqblksz) { + if (ret >= 0) + ret = -EIO; + printk(KERN_ERR "VFS: Error while reading quota " + "structure for id %u.\n", dquot->dq_id); +- memset(&ddquot, 0, sizeof(struct v2_disk_dqblk)); ++ memset(&ddquot, 0, dqblksz); + } + else { + ret = 0; + /* We need to escape back all-zero structure */ +- memset(&empty, 0, sizeof(struct v2_disk_dqblk)); +- empty.dqb_itime = cpu_to_le64(1); +- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) +- ddquot.dqb_itime = 0; ++ if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) { ++ if (rev == 0) ++ ddquot.r0.dqb_itime = cpu_to_le64(0); ++ else ++ ddquot.r1.dqb_itime = cpu_to_le64(0); ++ } + } +- disk2memdqb(&dquot->dq_dqb, &ddquot); ++ disk2memdqb(&dquot->dq_dqb, &ddquot, rev); + if (!dquot->dq_dqb.dqb_bhardlimit && + !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h +--- linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h 2008-03-17 23:39:54.000000000 +0300 +@@ -21,6 +21,7 @@ struct v2_mem_dqinfo { + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; ++ unsigned int dqi_revision; + }; + + #endif /* _LINUX_DQBLK_V2_H */ +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quota.h linux-2.6.16.54-0.2.5-quota/include/linux/quota.h +--- linux-2.6.16.54-0.2.5/include/linux/quota.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/quota.h 2008-03-17 23:39:54.000000000 +0300 +@@ -148,12 +148,12 @@ struct if_dqinfo { + * Data for one user/group kept in memory + */ + struct mem_dqblk { +- __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ +- __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ ++ qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ ++ qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */ + qsize_t dqb_curspace; /* current used space */ +- __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ +- __u32 dqb_isoftlimit; /* preferred inode limit */ +- __u32 dqb_curinodes; /* current # allocated inodes */ ++ qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */ ++ qsize_t dqb_isoftlimit; /* preferred inode limit */ ++ qsize_t dqb_curinodes; /* current # allocated inodes */ + time_t dqb_btime; /* time limit for excessive disk use */ + time_t dqb_itime; /* time limit for excessive inode use */ + }; +@@ -169,6 +169,8 @@ struct mem_dqinfo { + unsigned long dqi_flags; + unsigned int dqi_bgrace; + unsigned int dqi_igrace; ++ qsize_t dqi_maxblimit; ++ qsize_t dqi_maxilimit; + union { + struct v1_mem_dqinfo v1_i; + struct v2_mem_dqinfo v2_i; +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h +--- linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h 2008-03-17 23:39:54.000000000 +0300 +@@ -16,28 +16,51 @@ + 0xd9c01927 /* GRPQUOTA */\ + } + +-#define V2_INITQVERSIONS {\ ++#define V2_INITQVERSIONS_R0 {\ + 0, /* USRQUOTA */\ + 0 /* GRPQUOTA */\ + } + ++#define V2_INITQVERSIONS_R1 {\ ++ 1, /* USRQUOTA */\ ++ 1 /* GRPQUOTA */\ ++} ++ + /* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is a radix tree whose leaves point + * to blocks of these structures. + */ +-struct v2_disk_dqblk { ++struct v2_disk_dqblk_r0 { + __le32 dqb_id; /* id this quota applies to */ + __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */ + __le32 dqb_isoftlimit; /* preferred inode limit */ + __le32 dqb_curinodes; /* current # allocated inodes */ +- __le32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */ +- __le32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */ ++ __le32 dqb_bhardlimit; /* absolute limit on disk space */ ++ __le32 dqb_bsoftlimit; /* preferred limit on disk space */ ++ __le64 dqb_curspace; /* current space occupied (in bytes) */ ++ __le64 dqb_btime; /* time limit for excessive disk use */ ++ __le64 dqb_itime; /* time limit for excessive inode use */ ++}; ++ ++struct v2_disk_dqblk_r1 { ++ __le32 dqb_id; /* id this quota applies to */ ++ __le32 dqb_padding; /* padding field */ ++ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */ ++ __le64 dqb_isoftlimit; /* preferred inode limit */ ++ __le64 dqb_curinodes; /* current # allocated inodes */ ++ __le64 dqb_bhardlimit; /* absolute limit on disk space */ ++ __le64 dqb_bsoftlimit; /* preferred limit on disk space */ + __le64 dqb_curspace; /* current space occupied (in bytes) */ + __le64 dqb_btime; /* time limit for excessive disk use */ + __le64 dqb_itime; /* time limit for excessive inode use */ + }; + ++union v2_disk_dqblk { ++ struct v2_disk_dqblk_r0 r0; ++ struct v2_disk_dqblk_r1 r1; ++}; ++ + /* + * Here are header structures as written on disk and their in-memory copies + */ +@@ -59,7 +82,7 @@ struct v2_disk_dqinfo { + + /* + * Structure of header of block with quota structures. It is padded to 16 bytes so +- * there will be space for exactly 21 quota-entries in a block ++ * there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block + */ + struct v2_disk_dqdbheader { + __le32 dqdh_next_free; /* Number of next block with free entry */ +@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader { + #define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */ + #define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */ + #define V2_DQTREEDEPTH 4 /* Depth of quota tree */ +-#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */ + + #endif /* _LINUX_QUOTAIO_V2_H */ diff --git a/lustre/kernel_patches/patches/quota-large-limits-sles10.patch b/lustre/kernel_patches/patches/quota-large-limits-sles10.patch new file mode 100644 index 0000000..fcef1c2 --- /dev/null +++ b/lustre/kernel_patches/patches/quota-large-limits-sles10.patch @@ -0,0 +1,616 @@ +diff -rNpu linux-2.6.16.54-0.2.5/fs/dquot.c linux-2.6.16.54-0.2.5-quota/fs/dquot.c +--- linux-2.6.16.54-0.2.5/fs/dquot.c 2008-03-18 15:48:26.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/dquot.c 2008-03-17 22:43:11.000000000 +0300 +@@ -1588,10 +1588,19 @@ int vfs_get_dqblk(struct super_block *sb + } + + /* Generic routine for setting common part of quota structure */ +-static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) ++static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) + { + struct mem_dqblk *dm = &dquot->dq_dqb; + int check_blim = 0, check_ilim = 0; ++ struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; ++ ++ if ((di->dqb_valid & QIF_BLIMITS && ++ (di->dqb_bhardlimit > dqi->dqi_maxblimit || ++ di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || ++ (di->dqb_valid & QIF_ILIMITS && ++ (di->dqb_ihardlimit > dqi->dqi_maxilimit || ++ di->dqb_isoftlimit > dqi->dqi_maxilimit))) ++ return -ERANGE; + + spin_lock(&dq_data_lock); + if (di->dqb_valid & QIF_SPACE) { +@@ -1623,7 +1632,7 @@ static void do_set_dqblk(struct dquot *d + clear_bit(DQ_BLKS_B, &dquot->dq_flags); + } + else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ +- dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; ++ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; + } + if (check_ilim) { + if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { +@@ -1631,7 +1640,7 @@ static void do_set_dqblk(struct dquot *d + clear_bit(DQ_INODES_B, &dquot->dq_flags); + } + else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ +- dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; ++ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; + } + if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) + clear_bit(DQ_FAKE_B, &dquot->dq_flags); +@@ -1639,21 +1648,24 @@ static void do_set_dqblk(struct dquot *d + set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dq_data_lock); + mark_dquot_dirty(dquot); ++ ++ return 0; + } + + int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) + { + struct dquot *dquot; ++ int rc; + + down(&sb_dqopt(sb)->dqonoff_sem); + if (!(dquot = dqget(sb, id, type))) { + up(&sb_dqopt(sb)->dqonoff_sem); + return -ESRCH; + } +- do_set_dqblk(dquot, di); ++ rc = do_set_dqblk(dquot, di); + dqput(dquot); + up(&sb_dqopt(sb)->dqonoff_sem); +- return 0; ++ return rc; + } + + /* Generic routine for getting common part of quota file information */ +diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v1.c linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c +--- linux-2.6.16.54-0.2.5/fs/quota_v1.c 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v1.c 2008-03-17 22:42:47.000000000 +0300 +@@ -139,6 +139,9 @@ static int v1_read_file_info(struct supe + goto out; + } + ret = 0; ++ /* limits are stored as unsigned 32-bit data */ ++ dqopt->info[type].dqi_maxblimit = 0xffffffff; ++ dqopt->info[type].dqi_maxilimit = 0xffffffff; + dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; + dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME; + out: +diff -rNpu linux-2.6.16.54-0.2.5/fs/quota_v2.c linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c +--- linux-2.6.16.54-0.2.5/fs/quota_v2.c 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/fs/quota_v2.c 2008-03-18 11:58:02.000000000 +0300 +@@ -23,26 +23,64 @@ MODULE_LICENSE("GPL"); + typedef char *dqbuf_t; + + #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff) +-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) ++#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \ ++ sizeof(struct v2_disk_dqdbheader))) ++#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1) ++ ++static const union v2_disk_dqblk emptydquot; ++static const union v2_disk_dqblk fakedquot[2] = { ++ {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }, ++ {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} } ++}; + +-/* Check whether given file is really vfsv0 quotafile */ +-static int v2_check_quota_file(struct super_block *sb, int type) ++static inline uint v2_dqblksz(uint rev) ++{ ++ uint sz; ++ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) ++ sz = sizeof(struct v2_disk_dqblk_r0); ++ else ++ sz = sizeof(struct v2_disk_dqblk_r1); ++ ++ return sz; ++} ++ ++/* Number of quota entries in a block */ ++static inline int v2_dqstrinblk(uint rev) ++{ ++ return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev); ++} ++ ++/* Get revision of a quota file, -1 if it does not look a quota file */ ++static int v2_quota_file_revision(struct super_block *sb, int type) + { + struct v2_disk_dqheader dqhead; + ssize_t size; + static const uint quota_magics[] = V2_INITQMAGICS; +- static const uint quota_versions[] = V2_INITQVERSIONS; ++ static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0; ++ static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1; + + size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); + if (size != sizeof(struct v2_disk_dqheader)) { + printk("quota_v2: failed read expected=%zd got=%zd\n", + sizeof(struct v2_disk_dqheader), size); +- return 0; ++ return -1; + } +- if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || +- le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) +- return 0; +- return 1; ++ if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) { ++ if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type]) ++ return 0; ++ if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type]) ++ return 1; ++ } ++ return -1; ++} ++ ++/* Check whether given file is really vfsv0 quotafile */ ++static inline int v2_check_quota_file(struct super_block *sb, int type) ++{ ++ return v2_quota_file_revision(sb, type) != -1; + } + + /* Read information header from quota file */ +@@ -51,6 +89,13 @@ static int v2_read_file_info(struct supe + struct v2_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + ssize_t size; ++ int rev; ++ ++ rev = v2_quota_file_revision(sb, type); ++ if (rev < 0) { ++ printk(KERN_WARNING "Second quota file check failed.\n"); ++ return -1; ++ } + + size = sb->s_op->quota_read(sb, type, (char *)&dinfo, + sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); +@@ -65,6 +110,16 @@ static int v2_read_file_info(struct supe + info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); ++ ++ info->u.v2_i.dqi_revision = rev; ++ if (rev == 0) { ++ info->dqi_maxblimit = 0xffffffffULL; ++ info->dqi_maxilimit = 0xffffffffULL; ++ } else { ++ info->dqi_maxblimit = 0xffffffffffffffffULL; ++ info->dqi_maxilimit = 0xffffffffffffffffULL; ++ } ++ + return 0; + } + +@@ -94,29 +149,61 @@ static int v2_write_file_info(struct sup + return 0; + } + +-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d) ++static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev) + { +- m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); +- m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit); +- m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes); +- m->dqb_itime = le64_to_cpu(d->dqb_itime); +- m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit); +- m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit); +- m->dqb_curspace = le64_to_cpu(d->dqb_curspace); +- m->dqb_btime = le64_to_cpu(d->dqb_btime); +-} +- +-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id) +-{ +- d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); +- d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); +- d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes); +- d->dqb_itime = cpu_to_le64(m->dqb_itime); +- d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit); +- d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit); +- d->dqb_curspace = cpu_to_le64(m->dqb_curspace); +- d->dqb_btime = cpu_to_le64(m->dqb_btime); +- d->dqb_id = cpu_to_le32(id); ++ REV_ASSERT(rev); ++ ++ if (rev == 0) { ++ struct v2_disk_dqblk_r0 *ddqblk = &d->r0; ++ m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit); ++ m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit); ++ m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes); ++ m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime); ++ m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit); ++ m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit); ++ m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace); ++ m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime); ++ } else { ++ struct v2_disk_dqblk_r1 *ddqblk = &d->r1; ++ m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit); ++ m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit); ++ m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes); ++ m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime); ++ m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit); ++ m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit); ++ m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace); ++ m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime); ++ } ++} ++ ++static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m, ++ qid_t id, uint rev) ++{ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) { ++ struct v2_disk_dqblk_r0 *ddqblk = &d->r0; ++ ddqblk->dqb_id = cpu_to_le32(id); ++ ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit); ++ ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit); ++ ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes); ++ ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime); ++ ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit); ++ ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit); ++ ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace); ++ ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime); ++ } else { ++ struct v2_disk_dqblk_r1 *ddqblk = &d->r1; ++ ddqblk->dqb_id = cpu_to_le32(id); ++ ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); ++ ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); ++ ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); ++ ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime); ++ ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); ++ ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); ++ ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace); ++ ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime); ++ } + } + + static dqbuf_t getdqbuf(void) +@@ -268,10 +355,10 @@ static uint find_free_dqentry(struct dqu + { + struct super_block *sb = dquot->dq_sb; + struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type; +- uint blk, i; ++ uint blk, i, rev = info->u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev); + struct v2_disk_dqdbheader *dh; +- struct v2_disk_dqblk *ddquot; +- struct v2_disk_dqblk fakedquot; ++ union v2_disk_dqblk *ddquot; + dqbuf_t buf; + + *err = 0; +@@ -298,17 +385,18 @@ static uint find_free_dqentry(struct dqu + info->u.v2_i.dqi_free_entry = blk; + mark_info_dirty(sb, dquot->dq_type); + } +- if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ ++ /* Block will be full? */ ++ if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk) + if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); + goto out_buf; + } + dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); +- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); + /* Find free structure in block */ +- for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); ++ for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz); ++ i++, ddquot = (char *)ddquot + dqblksz); + #ifdef __QUOTA_V2_PARANOIA +- if (i == V2_DQSTRINBLK) { ++ if (i == dqstrinblk) { + printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n"); + *err = -EIO; + goto out_buf; +@@ -318,7 +406,8 @@ static uint find_free_dqentry(struct dqu + printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); + goto out_buf; + } +- dquot->dq_off = (blk<dq_off = (blk<dq_type; + ssize_t ret; +- struct v2_disk_dqblk ddquot, empty; ++ union v2_disk_dqblk ddquot; ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev); + + /* dq_off is guarded by dqio_sem */ + if (!dquot->dq_off) +@@ -401,18 +492,22 @@ static int v2_write_dquot(struct dquot * + return ret; + } + spin_lock(&dq_data_lock); +- mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); ++ mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev); + /* Argh... We may need to write structure full of zeroes but that would be + * treated as an empty place by the rest of the code. Format change would + * be definitely cleaner but the problems probably are not worth it */ +- memset(&empty, 0, sizeof(struct v2_disk_dqblk)); +- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) +- ddquot.dqb_itime = cpu_to_le64(1); ++ if (!memcmp(&emptydquot, &ddquot, dqblksz)) { ++ if (rev == 0) ++ ddquot.r0.dqb_itime = cpu_to_le64(1); ++ else ++ ddquot.r1.dqb_itime = cpu_to_le64(1); ++ } + spin_unlock(&dq_data_lock); + ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, +- (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off); +- if (ret != sizeof(struct v2_disk_dqblk)) { +- printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id); ++ (char *)&ddquot, dqblksz, dquot->dq_off); ++ if (ret != dqblksz) { ++ printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", ++ dquot->dq_sb->s_id); + if (ret >= 0) + ret = -ENOSPC; + } +@@ -431,6 +526,7 @@ static int free_dqentry(struct dquot *dq + struct v2_disk_dqdbheader *dh; + dqbuf_t buf = getdqbuf(); + int ret = 0; ++ uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision; + + if (!buf) + return -ENOMEM; +@@ -456,8 +552,8 @@ static int free_dqentry(struct dquot *dq + } + else { + memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, +- sizeof(struct v2_disk_dqblk)); +- if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { ++ v2_dqblksz(rev)); ++ if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) { + /* Insert will write block itself */ + if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) { + printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); +@@ -529,41 +625,56 @@ static int v2_delete_dquot(struct dquot + return remove_tree(dquot, &tmp, 0); + } + ++static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev) ++{ ++ __u32 dq_id; ++ ++ REV_ASSERT(rev); ++ ++ if (rev == 0) ++ dq_id = le32_to_cpu(ddquot->r0.dqb_id); ++ else ++ dq_id = le32_to_cpu(ddquot->r1.dqb_id); ++ ++ return dq_id; ++} ++ + /* Find entry in block */ + static loff_t find_block_dqentry(struct dquot *dquot, uint blk) + { + dqbuf_t buf = getdqbuf(); + loff_t ret = 0; + int i; +- struct v2_disk_dqblk *ddquot = GETENTRIES(buf); ++ union v2_disk_dqblk *ddquot = GETENTRIES(buf); ++ int type = dquot->dq_type; ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision; ++ uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev); + + if (!buf) + return -ENOMEM; +- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { ++ ++ ret = read_blk(dquot->dq_sb, type, blk, buf); ++ if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + if (dquot->dq_id) +- for (i = 0; i < V2_DQSTRINBLK && +- le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++); ++ for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id; ++ i++, ddquot = (char *)ddquot + dqblksz); + else { /* ID 0 as a bit more complicated searching... */ +- struct v2_disk_dqblk fakedquot; +- +- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); +- for (i = 0; i < V2_DQSTRINBLK; i++) +- if (!le32_to_cpu(ddquot[i].dqb_id) && +- memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk))) ++ for (i = 0; i < dqstrinblk; i++, ddquot = (char *)ddquot+dqblksz) ++ if (!dqid(ddquot, rev) && ++ memcmp(&emptydquot, ddquot, dqblksz)) + break; + } +- if (i == V2_DQSTRINBLK) { ++ if (i == dqstrinblk) { + printk(KERN_ERR "VFS: Quota for id %u referenced " + "but not present.\n", dquot->dq_id); + ret = -EIO; + goto out_buf; + } + else +- ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct +- v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk); ++ ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf); + out_buf: + freedqbuf(buf); + return ret; +@@ -605,7 +716,7 @@ static int v2_read_dquot(struct dquot *d + { + int type = dquot->dq_type; + loff_t offset; +- struct v2_disk_dqblk ddquot, empty; ++ union v2_disk_dqblk ddquot; + int ret = 0; + + #ifdef __QUOTA_V2_PARANOIA +@@ -626,25 +737,30 @@ static int v2_read_dquot(struct dquot *d + ret = offset; + } + else { ++ uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i. ++ dqi_revision; ++ uint dqblksz = v2_dqblksz(rev); + dquot->dq_off = offset; +- if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, +- (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset)) +- != sizeof(struct v2_disk_dqblk)) { ++ ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, ++ (char *)&ddquot, dqblksz, offset); ++ if (ret != dqblksz) { + if (ret >= 0) + ret = -EIO; + printk(KERN_ERR "VFS: Error while reading quota " + "structure for id %u.\n", dquot->dq_id); +- memset(&ddquot, 0, sizeof(struct v2_disk_dqblk)); ++ memset(&ddquot, 0, dqblksz); + } + else { + ret = 0; + /* We need to escape back all-zero structure */ +- memset(&empty, 0, sizeof(struct v2_disk_dqblk)); +- empty.dqb_itime = cpu_to_le64(1); +- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) +- ddquot.dqb_itime = 0; ++ if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) { ++ if (rev == 0) ++ ddquot.r0.dqb_itime = cpu_to_le64(0); ++ else ++ ddquot.r1.dqb_itime = cpu_to_le64(0); ++ } + } +- disk2memdqb(&dquot->dq_dqb, &ddquot); ++ disk2memdqb(&dquot->dq_dqb, &ddquot, rev); + if (!dquot->dq_dqb.dqb_bhardlimit && + !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h +--- linux-2.6.16.54-0.2.5/include/linux/dqblk_v2.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/dqblk_v2.h 2008-03-17 23:39:54.000000000 +0300 +@@ -21,6 +21,7 @@ struct v2_mem_dqinfo { + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; ++ unsigned int dqi_revision; + }; + + #endif /* _LINUX_DQBLK_V2_H */ +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quota.h linux-2.6.16.54-0.2.5-quota/include/linux/quota.h +--- linux-2.6.16.54-0.2.5/include/linux/quota.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/quota.h 2008-03-17 23:39:54.000000000 +0300 +@@ -148,12 +148,12 @@ struct if_dqinfo { + * Data for one user/group kept in memory + */ + struct mem_dqblk { +- __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ +- __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ ++ qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ ++ qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */ + qsize_t dqb_curspace; /* current used space */ +- __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ +- __u32 dqb_isoftlimit; /* preferred inode limit */ +- __u32 dqb_curinodes; /* current # allocated inodes */ ++ qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */ ++ qsize_t dqb_isoftlimit; /* preferred inode limit */ ++ qsize_t dqb_curinodes; /* current # allocated inodes */ + time_t dqb_btime; /* time limit for excessive disk use */ + time_t dqb_itime; /* time limit for excessive inode use */ + }; +@@ -169,6 +169,8 @@ struct mem_dqinfo { + unsigned long dqi_flags; + unsigned int dqi_bgrace; + unsigned int dqi_igrace; ++ qsize_t dqi_maxblimit; ++ qsize_t dqi_maxilimit; + union { + struct v1_mem_dqinfo v1_i; + struct v2_mem_dqinfo v2_i; +diff -rNpu linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h +--- linux-2.6.16.54-0.2.5/include/linux/quotaio_v2.h 2006-03-20 08:53:29.000000000 +0300 ++++ linux-2.6.16.54-0.2.5-quota/include/linux/quotaio_v2.h 2008-03-17 23:39:54.000000000 +0300 +@@ -16,28 +16,51 @@ + 0xd9c01927 /* GRPQUOTA */\ + } + +-#define V2_INITQVERSIONS {\ ++#define V2_INITQVERSIONS_R0 {\ + 0, /* USRQUOTA */\ + 0 /* GRPQUOTA */\ + } + ++#define V2_INITQVERSIONS_R1 {\ ++ 1, /* USRQUOTA */\ ++ 1 /* GRPQUOTA */\ ++} ++ + /* + * The following structure defines the format of the disk quota file + * (as it appears on disk) - the file is a radix tree whose leaves point + * to blocks of these structures. + */ +-struct v2_disk_dqblk { ++struct v2_disk_dqblk_r0 { + __le32 dqb_id; /* id this quota applies to */ + __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */ + __le32 dqb_isoftlimit; /* preferred inode limit */ + __le32 dqb_curinodes; /* current # allocated inodes */ +- __le32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */ +- __le32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */ ++ __le32 dqb_bhardlimit; /* absolute limit on disk space */ ++ __le32 dqb_bsoftlimit; /* preferred limit on disk space */ ++ __le64 dqb_curspace; /* current space occupied (in bytes) */ ++ __le64 dqb_btime; /* time limit for excessive disk use */ ++ __le64 dqb_itime; /* time limit for excessive inode use */ ++}; ++ ++struct v2_disk_dqblk_r1 { ++ __le32 dqb_id; /* id this quota applies to */ ++ __le32 dqb_padding; /* padding field */ ++ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */ ++ __le64 dqb_isoftlimit; /* preferred inode limit */ ++ __le64 dqb_curinodes; /* current # allocated inodes */ ++ __le64 dqb_bhardlimit; /* absolute limit on disk space */ ++ __le64 dqb_bsoftlimit; /* preferred limit on disk space */ + __le64 dqb_curspace; /* current space occupied (in bytes) */ + __le64 dqb_btime; /* time limit for excessive disk use */ + __le64 dqb_itime; /* time limit for excessive inode use */ + }; + ++union v2_disk_dqblk { ++ struct v2_disk_dqblk_r0 r0; ++ struct v2_disk_dqblk_r1 r1; ++}; ++ + /* + * Here are header structures as written on disk and their in-memory copies + */ +@@ -59,7 +82,7 @@ struct v2_disk_dqinfo { + + /* + * Structure of header of block with quota structures. It is padded to 16 bytes so +- * there will be space for exactly 21 quota-entries in a block ++ * there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block + */ + struct v2_disk_dqdbheader { + __le32 dqdh_next_free; /* Number of next block with free entry */ +@@ -74,6 +97,5 @@ struct v2_disk_dqdbheader { + #define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */ + #define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */ + #define V2_DQTREEDEPTH 4 /* Depth of quota tree */ +-#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */ + + #endif /* _LINUX_QUOTAIO_V2_H */ diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series index 9e7a8ed..8495289 100644 --- a/lustre/kernel_patches/series/2.6-rhel5.series +++ b/lustre/kernel_patches/series/2.6-rhel5.series @@ -19,3 +19,4 @@ raid5-merge-ios-rhel5.patch raid5-zerocopy-rhel5.patch md-rebuild-policy.patch jbd-journal-chksum-2.6.18-vanilla.patch +quota-large-limits-rhel5.patch diff --git a/lustre/kernel_patches/series/2.6-sles10.series b/lustre/kernel_patches/series/2.6-sles10.series index 72adc21..070f943 100644 --- a/lustre/kernel_patches/series/2.6-sles10.series +++ b/lustre/kernel_patches/series/2.6-sles10.series @@ -10,7 +10,9 @@ export_symbol_numa-2.6-fc5.patch blkdev_tunables-2.6-sles10.patch jbd-stats-2.6-sles10.patch i_filter_data.patch +quota-fix-oops-in-invalidate_dquots.patch jbd-journal-chksum-2.6-sles10.patch proc-sleep-2.6.16-sles10.patch export-nr_free_buffer_pages.patch fmode-exec-2.6-sles10.patch +quota-large-limits-sles10.patch diff --git a/lustre/kernel_patches/series/2.6.22-vanilla.series b/lustre/kernel_patches/series/2.6.22-vanilla.series index eba2991..fe32803 100644 --- a/lustre/kernel_patches/series/2.6.22-vanilla.series +++ b/lustre/kernel_patches/series/2.6.22-vanilla.series @@ -11,3 +11,4 @@ export-2.6.18-vanilla.patch 8kstack-2.6.12.patch export-show_task-2.6.18-vanilla.patch sd_iostats-2.6.22-vanilla.patch +quota-large-limits-rhel5.patch diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 81d357b..49fd0bf 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -51,7 +51,6 @@ #include #include "ldlm_internal.h" - /* @priority: if non-zero, move the selected to the list head * @create: if zero, only search in existed connections */ @@ -2166,33 +2165,37 @@ void target_committed_to_req(struct ptlrpc_request *req) EXPORT_SYMBOL(target_committed_to_req); -#ifdef HAVE_QUOTA_SUPPORT int target_handle_qc_callback(struct ptlrpc_request *req) { struct obd_quotactl *oqctl; struct client_obd *cli = &req->rq_export->exp_obd->u.cli; oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - if (oqctl == NULL) + if (oqctl == NULL) { + CERROR("Can't unpack obd_quotactl\n"); RETURN(-EPROTO); + } cli->cl_qchk_stat = oqctl->qc_stat; return 0; } +#ifdef HAVE_QUOTA_SUPPORT int target_handle_dqacq_callback(struct ptlrpc_request *req) { #ifdef __KERNEL__ struct obd_device *obd = req->rq_export->exp_obd; struct obd_device *master_obd; + struct obd_device_target *obt; struct lustre_quota_ctxt *qctxt; - struct qunit_data *qdata; - void* rep; - struct qunit_data_old *qdata_old; + struct qunit_data *qdata = NULL; int rc = 0; ENTRY; + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_DROP_QUOTA_REQ)) + RETURN(rc); + rc = req_capsule_server_pack(&req->rq_pill); if (rc) { CERROR("packing reply failed!: rc = %d\n", rc); @@ -2201,52 +2204,73 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) LASSERT(req->rq_export); - /* fixed for bug10707 */ - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - rep = req_capsule_server_get(&req->rq_pill, - &RMF_QUNIT_DATA); - LASSERT(rep); - qdata = req_capsule_client_swab_get(&req->rq_pill, - &RMF_QUNIT_DATA, - (void*)lustre_swab_qdata); - } else { - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - rep = req_capsule_server_get(&req->rq_pill, &RMF_QUNIT_DATA); - LASSERT(rep); - qdata_old = req_capsule_client_swab_get(&req->rq_pill, - &RMF_QUNIT_DATA, - (void*)lustre_swab_qdata_old); - qdata = lustre_quota_old_to_new(qdata_old); + OBD_ALLOC(qdata, sizeof(struct qunit_data)); + if (!qdata) + RETURN(-ENOMEM); + rc = quota_get_qdata(req, qdata, QUOTA_REQUEST, QUOTA_EXPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't unpack qunit_data(rc: %d)\n", rc); + GOTO(out, rc); } - if (qdata == NULL) - RETURN(-EPROTO); - /* we use the observer */ - LASSERT(obd->obd_observer && obd->obd_observer->obd_observer); + if (!obd->obd_observer || !obd->obd_observer->obd_observer) { + CERROR("Can't find the observer, it is recovering\n"); + req->rq_status = -EIO; + GOTO(send_reply, rc = -EIO); + } + master_obd = obd->obd_observer->obd_observer; - qctxt = &master_obd->u.obt.obt_qctxt; + obt = &master_obd->u.obt; + qctxt = &obt->obt_qctxt; + + if (!qctxt->lqc_setup || !qctxt->lqc_valid) { + /* quota_type has not been processed yet, return EAGAIN + * until we know whether or not quotas are supposed to + * be enabled */ + CDEBUG(D_QUOTA, "quota_type not processed yet, return " + "-EAGAIN\n"); + req->rq_status = -EAGAIN; + rc = ptlrpc_reply(req); + GOTO(out, rc); + } + + down_read(&obt->obt_rwsem); + if (qctxt->lqc_lqs_hash == NULL) { + up_read(&obt->obt_rwsem); + /* quota_type has not been processed yet, return EAGAIN + * until we know whether or not quotas are supposed to + * be enabled */ + CDEBUG(D_QUOTA, "quota_ctxt is not ready yet, return " + "-EAGAIN\n"); + req->rq_status = -EAGAIN; + rc = ptlrpc_reply(req); + GOTO(out, rc); + } LASSERT(qctxt->lqc_handler); rc = qctxt->lqc_handler(master_obd, qdata, lustre_msg_get_opc(req->rq_reqmsg)); + up_read(&obt->obt_rwsem); if (rc && rc != -EDQUOT) CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, "dqacq failed! (rc:%d)\n", rc); + req->rq_status = rc; - /* the qd_count might be changed in lqc_handler */ - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - memcpy(rep, qdata, sizeof(*qdata)); - } else { - qdata_old = lustre_quota_new_to_old(qdata); - memcpy(rep, qdata_old, sizeof(*qdata_old)); + /* there are three forms of qunit(historic causes), so we need to + * adjust the same form to different forms slaves needed */ + rc = quota_copy_qdata(req, qdata, QUOTA_REPLY, QUOTA_EXPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't pack qunit_data(rc: %d)\n", rc); + GOTO(out, rc); } - req->rq_status = rc; - rc = ptlrpc_reply(req); + /* Block the quota req. b=14840 */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_BLOCK_QUOTA_REQ, obd_timeout); +send_reply: + rc = ptlrpc_reply(req); +out: + OBD_FREE(qdata, sizeof(struct qunit_data)); RETURN(rc); #else return 0; diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 27fa1af..50d4504 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -268,7 +268,8 @@ int ldlm_lock_destroy_internal(struct ldlm_lock *lock) } lock->l_destroyed = 1; - if (lock->l_export && lock->l_export->exp_lock_hash) + if (lock->l_export && lock->l_export->exp_lock_hash && + !hlist_unhashed(&lock->l_exp_hash)) lustre_hash_del(lock->l_export->exp_lock_hash, &lock->l_remote_handle, &lock->l_exp_hash); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 35f3bcf..b809cba 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1882,7 +1882,8 @@ void ldlm_revoke_lock_cb(void *obj, void *data) LASSERT(!lock->l_blocking_lock); lock->l_flags |= LDLM_FL_AST_SENT; - if (lock->l_export && lock->l_export->exp_lock_hash) + if (lock->l_export && lock->l_export->exp_lock_hash && + !hlist_unhashed(&lock->l_exp_hash)) lustre_hash_del(lock->l_export->exp_lock_hash, &lock->l_remote_handle, &lock->l_exp_hash); list_add_tail(&lock->l_rk_ast, rpc_list); diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am index 116d0c9..326a8c0 100644 --- a/lustre/liblustre/Makefile.am +++ b/lustre/liblustre/Makefile.am @@ -22,7 +22,7 @@ LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/obdclass/liblustreclass.a \ $(top_builddir)/lustre/lvfs/liblvfs.a -if QUOTA +if LIBLUSTRE QUOTA_LIBS = $(top_builddir)/lustre/quota/libquota.a endif diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 07b7ddd..38fb136 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -320,7 +320,7 @@ int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir) } } - rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL); + rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL); OBDO_FREE(oa); if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index 0de50cf..fc9bc5d 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -77,14 +77,14 @@ void *inter_module_get(char *arg) return ldlm_namespace_cleanup; else if (!strcmp(arg, "ldlm_replay_locks")) return ldlm_replay_locks; -#ifdef HAVE_QUOTA_SUPPORT - else if (!strcmp(arg, "osc_quota_interface")) - return &osc_quota_interface; else if (!strcmp(arg, "mdc_quota_interface")) return &mdc_quota_interface; + else if (!strcmp(arg, "lmv_quota_interface")) + return &lmv_quota_interface; + else if (!strcmp(arg, "osc_quota_interface")) + return &osc_quota_interface; else if (!strcmp(arg, "lov_quota_interface")) return &lov_quota_interface; -#endif else return NULL; } diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 061f82e..4366c86 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -500,16 +500,6 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) RETURN(rc); } -#define QCTL_COPY(out, in) \ -do { \ - Q_COPY(out, in, qc_cmd); \ - Q_COPY(out, in, qc_type); \ - Q_COPY(out, in, qc_id); \ - Q_COPY(out, in, qc_stat); \ - Q_COPY(out, in, qc_dqinfo); \ - Q_COPY(out, in, qc_dqblk); \ -} while (0) - int ll_send_mgc_param(struct obd_export *mgc, char *string) { struct mgs_send_param *msp; @@ -1011,7 +1001,8 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, struct obd_quotactl *oqctl; int rc, error = 0; - if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + if (!cfs_capable(CFS_CAP_SYS_ADMIN) || + sbi->ll_flags & LL_SBI_RMT_CLIENT) RETURN(-EPERM); OBD_ALLOC_PTR(oqctl); @@ -1035,7 +1026,8 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, struct if_quotacheck *check; int rc; - if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + if (!cfs_capable(CFS_CAP_SYS_ADMIN) || + sbi->ll_flags & LL_SBI_RMT_CLIENT) RETURN(-EPERM); OBD_ALLOC_PTR(check); @@ -1063,47 +1055,39 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, OBD_FREE_PTR(check); RETURN(rc); } -#ifdef HAVE_QUOTA_SUPPORT case OBD_IOC_QUOTACTL: { struct if_quotactl *qctl; - struct obd_quotactl *oqctl; - - int cmd, type, id, rc = 0; + int cmd, type, id, valid, rc = 0; OBD_ALLOC_PTR(qctl); if (!qctl) RETURN(-ENOMEM); - OBD_ALLOC_PTR(oqctl); - if (!oqctl) { - OBD_FREE_PTR(qctl); - RETURN(-ENOMEM); - } if (copy_from_user(qctl, (void *)arg, sizeof(*qctl))) GOTO(out_quotactl, rc = -EFAULT); cmd = qctl->qc_cmd; type = qctl->qc_type; id = qctl->qc_id; + valid = qctl->qc_valid; + switch (cmd) { + case LUSTRE_Q_INVALIDATE: + case LUSTRE_Q_FINVALIDATE: case Q_QUOTAON: case Q_QUOTAOFF: case Q_SETQUOTA: case Q_SETINFO: - if (!cfs_capable(CFS_CAP_SYS_ADMIN)) + if (!cfs_capable(CFS_CAP_SYS_ADMIN) || + sbi->ll_flags & LL_SBI_RMT_CLIENT) GOTO(out_quotactl, rc = -EPERM); break; case Q_GETQUOTA: if (((type == USRQUOTA && current->euid != id) || (type == GRPQUOTA && !in_egroup_p(id))) && - !cfs_capable(CFS_CAP_SYS_ADMIN)) + (!cfs_capable(CFS_CAP_SYS_ADMIN) || + sbi->ll_flags & LL_SBI_RMT_CLIENT)) GOTO(out_quotactl, rc = -EPERM); - - /* XXX: dqb_valid is borrowed as a flag to mark that - * only mds quota is wanted */ - if (qctl->qc_dqblk.dqb_valid) - qctl->obd_uuid = sbi->ll_md_exp->exp_obd-> - u.cli.cl_target_uuid; break; case Q_GETINFO: break; @@ -1112,69 +1096,76 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, GOTO(out_quotactl, rc = -ENOTTY); } - QCTL_COPY(oqctl, qctl); - - if (qctl->obd_uuid.uuid[0]) { - struct obd_device *obd; - struct obd_uuid *uuid = &qctl->obd_uuid; - - obd = class_find_client_notype(uuid, - &sbi->ll_dt_exp->exp_obd->obd_uuid); - if (!obd) - GOTO(out_quotactl, rc = -ENOENT); + if (valid != QC_GENERAL) { + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + GOTO(out_quotactl, rc = -EOPNOTSUPP); if (cmd == Q_GETINFO) - oqctl->qc_cmd = Q_GETOINFO; + qctl->qc_cmd = Q_GETOINFO; else if (cmd == Q_GETQUOTA) - oqctl->qc_cmd = Q_GETOQUOTA; + qctl->qc_cmd = Q_GETOQUOTA; else GOTO(out_quotactl, rc = -EINVAL); - if (sbi->ll_md_exp->exp_obd == obd) { - rc = obd_quotactl(sbi->ll_md_exp, oqctl); - } else { - int i; - struct obd_export *exp; - struct lov_obd *lov = &sbi->ll_dt_exp-> - exp_obd->u.lov; - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || - !lov->lov_tgts[i]->ltd_active) - continue; - exp = lov->lov_tgts[i]->ltd_exp; - if (exp->exp_obd == obd) { - rc = obd_quotactl(exp, oqctl); - break; - } - } + switch (valid) { + case QC_MDTIDX: + rc = obd_iocontrol(OBD_IOC_QUOTACTL, + sbi->ll_md_exp, + sizeof(*qctl), qctl, NULL); + break; + case QC_OSTIDX: + rc = obd_iocontrol(OBD_IOC_QUOTACTL, + sbi->ll_dt_exp, + sizeof(*qctl), qctl, NULL); + break; + case QC_UUID: + rc = obd_iocontrol(OBD_IOC_QUOTACTL, + sbi->ll_md_exp, + sizeof(*qctl), qctl, NULL); + if (rc == -EAGAIN) + rc = obd_iocontrol(OBD_IOC_QUOTACTL, + sbi->ll_dt_exp, + sizeof(*qctl), qctl, + NULL); + break; + default: + rc = -EINVAL; + break; } - oqctl->qc_cmd = cmd; - QCTL_COPY(qctl, oqctl); - - if (copy_to_user((void *)arg, qctl, sizeof(*qctl))) - rc = -EFAULT; - - GOTO(out_quotactl, rc); - } - - rc = obd_quotactl(sbi->ll_md_exp, oqctl); - if (rc && rc != -EBUSY && cmd == Q_QUOTAON) { - oqctl->qc_cmd = Q_QUOTAOFF; - obd_quotactl(sbi->ll_md_exp, oqctl); + if (rc) + GOTO(out_quotactl, rc); + else + qctl->qc_cmd = cmd; + } else { + struct obd_quotactl *oqctl; + + OBD_ALLOC_PTR(oqctl); + if (!oqctl) + GOTO(out_quotactl, rc = -ENOMEM); + + QCTL_COPY(oqctl, qctl); + rc = obd_quotactl(sbi->ll_md_exp, oqctl); + if (rc) { + if (rc != -EBUSY && cmd == Q_QUOTAON) { + oqctl->qc_cmd = Q_QUOTAOFF; + obd_quotactl(sbi->ll_md_exp, oqctl); + } + OBD_FREE_PTR(oqctl); + GOTO(out_quotactl, rc); + } else { + QCTL_COPY(qctl, oqctl); + OBD_FREE_PTR(oqctl); + } } - QCTL_COPY(qctl, oqctl); - if (copy_to_user((void *)arg, qctl, sizeof(*qctl))) rc = -EFAULT; + out_quotactl: OBD_FREE_PTR(qctl); - OBD_FREE_PTR(oqctl); RETURN(rc); } -#endif /* HAVE_QUOTA_SUPPORT */ case OBD_IOC_GETNAME: { struct obd_device *obd = class_exp2obd(sbi->ll_dt_exp); if (!obd) @@ -1202,6 +1193,27 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, RETURN(0); } #endif + case LL_IOC_GETOBDCOUNT: { + int count; + + if (copy_from_user(&count, (int *)arg, sizeof(int))) + RETURN(-EFAULT); + + if (!count) { + /* get ost count */ + struct lov_obd *lov = &sbi->ll_dt_exp->exp_obd->u.lov; + count = lov->desc.ld_tgt_count; + } else { + /* get mdt count */ + struct lmv_obd *lmv = &sbi->ll_md_exp->exp_obd->u.lmv; + count = lmv->desc.ld_tgt_count; + } + + if (copy_to_user((int *)arg, &count, sizeof(int))) + RETURN(-EFAULT); + + RETURN(0); + } default: RETURN(obd_iocontrol(cmd, sbi->ll_dt_exp,0,NULL,(void *)arg)); } diff --git a/lustre/llite/llite_capa.c b/lustre/llite/llite_capa.c index 818008a..1a7bd1f 100644 --- a/lustre/llite/llite_capa.c +++ b/lustre/llite/llite_capa.c @@ -157,9 +157,10 @@ static void ll_delete_capa(struct obd_capa *ocapa) } DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client"); - list_del(&ocapa->c_list); + list_del_init(&ocapa->c_list); capa_count[CAPA_SITE_CLIENT]--; - free_capa(ocapa); + /* release the ref when alloc */ + capa_put(ocapa); } /* three places where client capa is deleted: @@ -238,7 +239,6 @@ static int capa_thread_main(void *unused) capa_get(ocapa); ll_capa_renewed++; spin_unlock(&capa_lock); - rc = md_renew_capa(ll_i2mdexp(inode), ocapa, ll_update_capa); spin_lock(&capa_lock); @@ -259,7 +259,7 @@ static int capa_thread_main(void *unused) break; } - if (atomic_read(&ocapa->c_refc)) { + if (atomic_read(&ocapa->c_refc) > 1) { DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired(c_refc %d), don't release", atomic_read(&ocapa->c_refc)); @@ -312,27 +312,6 @@ void ll_capa_thread_stop(void) ll_capa_thread.t_flags & SVC_STOPPED); } -static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct obd_capa *ocapa; - - /* inside capa_lock */ - list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { - if ((capa_opc(&ocapa->c_capa) & opc) != opc) - continue; - - LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), - ll_inode2fid(inode))); - LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); - - DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); - return ocapa; - } - - return NULL; -} - struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc) { struct ll_inode_info *lli = ll_i2info(inode); @@ -353,14 +332,17 @@ struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc) continue; if ((opc & CAPA_OPC_OSS_WRITE) && capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) { - found = 1; break; + found = 1; + break; } else if ((opc & CAPA_OPC_OSS_READ) && capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_READ)) { - found = 1; break; + found = 1; + break; } else if ((opc & CAPA_OPC_OSS_TRUNC) && capa_opc_supported(&ocapa->c_capa, opc)) { - found = 1; break; + found = 1; + break; } } @@ -429,12 +411,33 @@ static struct obd_capa *do_add_mds_capa(struct inode *inode, DEBUG_CAPA(D_SEC, capa, "update MDS"); - free_capa(ocapa); + capa_put(ocapa); ocapa = old; } return ocapa; } +static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_capa *ocapa; + + /* inside capa_lock */ + list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) { + if ((capa_opc(&ocapa->c_capa) & opc) != opc) + continue; + + LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa), + ll_inode2fid(inode))); + LASSERT(ocapa->c_site == CAPA_SITE_CLIENT); + + DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client"); + return ocapa; + } + + return NULL; +} + static inline void inode_add_oss_capa(struct inode *inode, struct obd_capa *ocapa) { @@ -479,7 +482,7 @@ static struct obd_capa *do_add_oss_capa(struct inode *inode, DEBUG_CAPA(D_SEC, capa, "update OSS"); - free_capa(ocapa); + capa_put(ocapa); ocapa = old; } @@ -496,7 +499,7 @@ struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa) /* truncate capa won't renew */ if (ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) { set_capa_expiry(ocapa); - list_del(&ocapa->c_list); + list_del_init(&ocapa->c_list); sort_add_capa(ocapa, ll_capa_list); update_capa_timer(ocapa, capa_renewal_time(ocapa)); @@ -547,18 +550,18 @@ int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa) } } - list_del(&ocapa->c_list); + list_del_init(&ocapa->c_list); sort_add_capa(ocapa, &ll_idle_capas); spin_unlock(&capa_lock); capa_put(ocapa); iput(inode); - return rc; + RETURN(rc); } spin_lock(&ocapa->c_lock); LASSERT(!memcmp(&ocapa->c_capa, capa, - offsetof(struct lustre_capa, lc_flags))); + offsetof(struct lustre_capa, lc_opc))); ocapa->c_capa = *capa; set_capa_expiry(ocapa); spin_unlock(&ocapa->c_lock); @@ -616,10 +619,13 @@ void ll_truncate_free_capa(struct obd_capa *ocapa) LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC); DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free truncate"); + /* release ref when find */ capa_put(ocapa); - spin_lock(&capa_lock); - ll_delete_capa(ocapa); - spin_unlock(&capa_lock); + if (likely(ocapa->c_capa.lc_opc == CAPA_OPC_OSS_TRUNC)) { + spin_lock(&capa_lock); + ll_delete_capa(ocapa); + spin_unlock(&capa_lock); + } } void ll_clear_inode_capas(struct inode *inode) diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 3ed9c85..4542588 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -232,7 +232,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_VERSION | OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET| OBD_CONNECT_FID | OBD_CONNECT_AT | - OBD_CONNECT_LOV_V3; + OBD_CONNECT_LOV_V3 | OBD_CONNECT_RMT_CLIENT; #ifdef HAVE_LRU_RESIZE_SUPPORT if (sbi->ll_flags & LL_SBI_LRU_RESIZE) @@ -263,13 +263,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - data->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; - } else { - data->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; - data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; - } + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data, NULL); if (err == -EBUSY) { @@ -347,21 +342,16 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) if (data->ocd_connect_flags & OBD_CONNECT_JOIN) sbi->ll_flags |= LL_SBI_JOIN; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) { - /* sometimes local client claims to be remote, but mdt - * will disagree when client gss not applied. */ - LCONSOLE_INFO("client claims to be remote, but server " - "rejected, forced to be local.\n"); - sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; + if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) { + if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { + sbi->ll_flags |= LL_SBI_RMT_CLIENT; + LCONSOLE_INFO("client is set as remote by default.\n"); } } else { - if (!(data->ocd_connect_flags & OBD_CONNECT_LCL_CLIENT)) { - /* with gss applied, remote client can not claim to be - * local, so mdt maybe force client to be remote. */ - LCONSOLE_INFO("client claims to be local, but server " - "rejected, forced to be remote.\n"); - sbi->ll_flags |= LL_SBI_RMT_CLIENT; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; + LCONSOLE_INFO("client claims to be remote, but server " + "rejected, forced to be local.\n"); } } @@ -385,9 +375,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| - OBD_CONNECT_AT; - if (sbi->ll_flags & LL_SBI_OSS_CAPA) - data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA; + OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | + OBD_CONNECT_OSS_CAPA; if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { /* OBD_CONNECT_CKSUM should always be set, even if checksums are @@ -406,6 +395,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) #ifdef HAVE_LRU_RESIZE_SUPPORT data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; #endif + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; + CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " "ocd_grant: %d\n", data->ocd_connect_flags, data->ocd_version, data->ocd_grant); @@ -471,7 +463,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) err = md_getattr(sbi->ll_md_exp, &sbi->ll_root_fid, oc, valid, 0, &request); if (oc) - free_capa(oc); + capa_put(oc); if (err) { CERROR("md_getattr failed for root: rc = %d\n", err); GOTO(out_lock_cn_cb, err); @@ -2114,6 +2106,8 @@ int ll_process_config(struct lustre_cfg *lcfg) proc fns must be able to handle that! */ rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars, lcfg, sb); + if (rc > 0) + rc = 0; return(rc); } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 0933e2f..91c81c3 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1047,6 +1047,7 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; struct obdo *oa; + struct obd_capa *oc = NULL; int rc; ENTRY; @@ -1101,7 +1102,14 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } } - rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir)); + if (body->valid & OBD_MD_FLOSSCAPA) { + rc = md_unpack_capa(ll_i2mdexp(dir), request, &RMF_CAPA2, &oc); + if (rc) + GOTO(out_free_memmd, rc); + } + + rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc); + capa_put(oc); OBDO_FREE(oa); if (rc) CERROR("obd destroy objid "LPX64" error %d\n", diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index ad6c65f..004218e 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -725,12 +725,13 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, { struct obd_device *obddev = class_exp2obd(exp); struct lmv_obd *lmv = &obddev->u.lmv; - int i; + int i = 0; int rc = 0; int set = 0; + int count = lmv->desc.ld_tgt_count; ENTRY; - if (lmv->desc.ld_tgt_count == 0) + if (count == 0) RETURN(-ENOTTY); switch (cmd) { @@ -743,7 +744,7 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); LASSERT(data->ioc_plen1 == sizeof(struct obd_statfs)); - if ((index >= lmv->desc.ld_tgt_count)) + if ((index >= count)) RETURN(-ENODEV); if (!lmv->tgts[index].ltd_active) @@ -764,8 +765,54 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, RETURN(-EFAULT); break; } + case OBD_IOC_QUOTACTL: { + struct if_quotactl *qctl = karg; + struct lmv_tgt_desc *tgt = NULL; + struct obd_quotactl *oqctl; + + if (qctl->qc_valid == QC_MDTIDX) { + if (qctl->qc_idx < 0 || count <= qctl->qc_idx) + RETURN(-EINVAL); + + tgt = &lmv->tgts[qctl->qc_idx]; + if (!tgt->ltd_exp) + RETURN(-EINVAL); + } else if (qctl->qc_valid == QC_UUID) { + for (i = 0; i < count; i++) { + tgt = &lmv->tgts[i]; + if (!obd_uuid_equals(&tgt->ltd_uuid, + &qctl->obd_uuid)) + continue; + + if (tgt->ltd_exp == NULL) + RETURN(-EINVAL); + + break; + } + } else { + RETURN(-EINVAL); + } + + if (i >= count) + RETURN(-EAGAIN); + + LASSERT(tgt && tgt->ltd_exp); + OBD_ALLOC_PTR(oqctl); + if (!oqctl) + RETURN(-ENOMEM); + + QCTL_COPY(oqctl, qctl); + rc = obd_quotactl(tgt->ltd_exp, oqctl); + if (rc == 0) { + QCTL_COPY(qctl, oqctl); + qctl->qc_valid = QC_MDTIDX; + qctl->obd_uuid = tgt->ltd_uuid; + } + OBD_FREE_PTR(oqctl); + break; + } default : { - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + for (i = 0; i < count; i++) { int err; if (lmv->tgts[i].ltd_exp == NULL) @@ -773,7 +820,9 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, err = obd_iocontrol(cmd, lmv->tgts[i].ltd_exp, len, karg, uarg); - if (err) { + if (err == -ENODATA && cmd == OBD_IOC_POLL_QUOTACHECK) { + RETURN(err); + } else if (err) { if (lmv->tgts[i].ltd_active) { CERROR("error: iocontrol MDC %s on MDT" "idx %d cmd %x: err = %d\n", @@ -2837,6 +2886,18 @@ static int lmv_renew_capa(struct obd_export *exp, struct obd_capa *oc, RETURN(rc); } +int lmv_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req, + const struct req_msg_field *field, struct obd_capa **oc) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + int rc; + + ENTRY; + rc = md_unpack_capa(lmv->tgts[0].ltd_exp, req, field, oc); + RETURN(rc); +} + int lmv_intent_getattr_async(struct obd_export *exp, struct md_enqueue_info *minfo, struct ldlm_enqueue_info *einfo) @@ -2960,11 +3021,15 @@ struct md_ops lmv_md_ops = { .m_set_open_replay_data = lmv_set_open_replay_data, .m_clear_open_replay_data = lmv_clear_open_replay_data, .m_renew_capa = lmv_renew_capa, + .m_unpack_capa = lmv_unpack_capa, .m_get_remote_perm = lmv_get_remote_perm, .m_intent_getattr_async = lmv_intent_getattr_async, .m_revalidate_lock = lmv_revalidate_lock }; +static quota_interface_t *quota_interface; +extern quota_interface_t lmv_quota_interface; + int __init lmv_init(void) { struct lprocfs_static_vars lvars; @@ -2979,10 +3044,18 @@ int __init lmv_init(void) } lprocfs_lmv_init_vars(&lvars); + + request_module("lquota"); + quota_interface = PORTAL_SYMBOL_GET(lmv_quota_interface); + init_obd_quota_ops(quota_interface, &lmv_obd_ops); + rc = class_register_type(&lmv_obd_ops, &lmv_md_ops, lvars.module_vars, LUSTRE_LMV_NAME, NULL); - if (rc) + if (rc) { + if (quota_interface) + PORTAL_SYMBOL_PUT(lmv_quota_interface); cfs_mem_cache_destroy(lmv_object_cache); + } return rc; } @@ -2990,6 +3063,9 @@ int __init lmv_init(void) #ifdef __KERNEL__ static void lmv_exit(void) { + if (quota_interface) + PORTAL_SYMBOL_PUT(lmv_quota_interface); + class_unregister_type(LUSTRE_LMV_NAME); LASSERTF(atomic_read(&lmv_object_count) == 0, diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index e876da0..ea90841 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -931,6 +931,8 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, lcfg, obd); + if (rc > 0) + rc = 0; GOTO(out, rc); } case LCFG_POOL_NEW: @@ -1130,7 +1132,7 @@ do { static int lov_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { struct lov_request_set *set; struct obd_info oinfo; @@ -1163,7 +1165,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, oti->oti_logcookies = set->set_cookies + req->rq_stripe; err = obd_destroy(lov->lov_tgts[req->rq_idx]->ltd_exp, - req->rq_oi.oi_oa, NULL, oti, NULL); + req->rq_oi.oi_oa, NULL, oti, NULL, capa); err = lov_update_common_set(set, req, err); if (err) { CERROR("error: destroying objid "LPX64" subobj " @@ -1901,7 +1903,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; - int i, rc = 0, count = lov->desc.ld_tgt_count; + int i = 0, rc = 0, count = lov->desc.ld_tgt_count; struct obd_uuid *uuidp; ENTRY; @@ -1995,6 +1997,53 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case LL_IOC_LOV_SETEA: rc = lov_setea(exp, karg, uarg); break; + case OBD_IOC_QUOTACTL: { + struct if_quotactl *qctl = karg; + struct lov_tgt_desc *tgt = NULL; + struct obd_quotactl *oqctl; + + if (qctl->qc_valid == QC_OSTIDX) { + if (qctl->qc_idx < 0 || count <= qctl->qc_idx) + RETURN(-EINVAL); + + tgt = lov->lov_tgts[qctl->qc_idx]; + if (!tgt || !tgt->ltd_exp) + RETURN(-EINVAL); + } else if (qctl->qc_valid == QC_UUID) { + for (i = 0; i < count; i++) { + tgt = lov->lov_tgts[i]; + if (!tgt || + !obd_uuid_equals(&tgt->ltd_uuid, + &qctl->obd_uuid)) + continue; + + if (tgt->ltd_exp == NULL) + RETURN(-EINVAL); + + break; + } + } else { + RETURN(-EINVAL); + } + + if (i >= count) + RETURN(-EAGAIN); + + LASSERT(tgt && tgt->ltd_exp); + OBD_ALLOC_PTR(oqctl); + if (!oqctl) + RETURN(-ENOMEM); + + QCTL_COPY(oqctl, qctl); + rc = obd_quotactl(tgt->ltd_exp, oqctl); + if (rc == 0) { + QCTL_COPY(qctl, oqctl); + qctl->qc_valid = QC_OSTIDX; + qctl->obd_uuid = tgt->ltd_uuid; + } + OBD_FREE_PTR(oqctl); + break; + } default: { int set = 0; diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index ba95f06..176968f 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -620,7 +620,8 @@ cleanup: continue; sub_exp = lov->lov_tgts[req->rq_idx]->ltd_exp; - err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL); + err = obd_destroy(sub_exp, req->rq_oi.oi_oa, NULL, oti, NULL, + NULL); if (err) CERROR("Failed to uncreate objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index 1b5311b..b80a28d 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -60,7 +60,7 @@ sources: fsfilt_$(BACKINGFS).c else #SERVER sources: -endif +endif #SERVER ldiskfs_sed_flags = \ -e "s/dx_hash_info/ext3_dx_hash_info/g" \ @@ -104,8 +104,7 @@ install-data-hook: $(install_data_hook) DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \ lvfs_internal.h lvfs_linux.c lvfs_userfs.c \ upcall_cache.c prng.c lvfs_lib.c \ - lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c \ - # quotacheck_test.c quotactl_test.c fsfilt_ext3_quota.h + lustre_quota_fmt.c lustre_quota_fmt.h quotafmt_test.c MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ CLEANFILES = fsfilt-*.c fsfilt_ldiskfs*.c fsfilt_extN.c sources diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 25ed99c..26ed65b 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -67,6 +67,8 @@ #include #endif +#include "lustre_quota_fmt.h" + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) #define FSFILT_DATA_TRANS_BLOCKS(sb) EXT3_DATA_TRANS_BLOCKS #define FSFILT_DELETE_TRANS_BLOCKS(sb) EXT3_DELETE_TRANS_BLOCKS @@ -723,9 +725,7 @@ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) int rc; memset(&sfs, 0, sizeof(sfs)); - rc = ll_do_statfs(sb, &sfs); - if (!rc && sfs.f_bfree < sfs.f_ffree) { sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; sfs.f_ffree = sfs.f_bfree; @@ -883,7 +883,6 @@ static unsigned long new_blocks(handle_t *handle, struct ext3_ext_base *base, pblock = ext3_mb_new_blocks(handle, &ar, err); *count = ar.len; return pblock; - } #endif @@ -1315,19 +1314,37 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, static int fsfilt_ext3_setup(struct super_block *sb) { +#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,6)) && \ + defined(HAVE_QUOTA_SUPPORT)) || defined(S_PDIROPS) + struct ext3_sb_info *sbi = EXT3_SB(sb); #if 0 - EXT3_SB(sb)->dx_lock = fsfilt_ext3_dx_lock; - EXT3_SB(sb)->dx_unlock = fsfilt_ext3_dx_unlock; + sbi->dx_lock = fsfilt_ext3_dx_lock; + sbi->dx_unlock = fsfilt_ext3_dx_unlock; +#endif #endif #ifdef S_PDIROPS CWARN("Enabling PDIROPS\n"); - set_opt(EXT3_SB(sb)->s_mount_opt, PDIROPS); + set_opt(sbi->s_mount_opt, PDIROPS); sb->s_flags |= S_PDIROPS; #endif if (!EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) CWARN("filesystem doesn't have dir_index feature enabled\n"); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)) && defined(HAVE_QUOTA_SUPPORT) - set_opt(EXT3_SB(sb)->s_mount_opt, QUOTA); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,6)) && defined(HAVE_QUOTA_SUPPORT) + /* enable journaled quota support */ + /* kfreed in ext3_put_super() */ + sbi->s_qf_names[USRQUOTA] = kstrdup("lquota.user.reserved", GFP_KERNEL); + if (!sbi->s_qf_names[USRQUOTA]) + return -ENOMEM; + sbi->s_qf_names[GRPQUOTA] = kstrdup("lquota.group.reserved", GFP_KERNEL); + if (!sbi->s_qf_names[GRPQUOTA]) { + kfree(sbi->s_qf_names[USRQUOTA]); + sbi->s_qf_names[USRQUOTA] = NULL; + return -ENOMEM; + } + sbi->s_jquota_fmt = QFMT_VFS_V0; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)) + set_opt(sbi->s_mount_opt, QUOTA); +#endif #endif return 0; } @@ -1363,8 +1380,7 @@ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs) return 0; } -static const char *op_quotafile[] = { "lquota.user", "lquota.group" }; - +#ifdef HAVE_QUOTA_SUPPORT #define DQINFO_COPY(out, in) \ do { \ Q_COPY(out, in, dqi_bgrace); \ @@ -1386,8 +1402,6 @@ do { \ Q_COPY(out, in, dqb_valid); \ } while (0) - - static int fsfilt_ext3_quotactl(struct super_block *sb, struct obd_quotactl *oqc) { @@ -1419,10 +1433,15 @@ static int fsfilt_ext3_quotactl(struct super_block *sb, continue; if (oqc->qc_cmd == Q_QUOTAON) { + char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2; + + LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2); + if (!qcop->quota_on) GOTO(out, rc = -ENOSYS); - rc = qcop->quota_on(sb, i, oqc->qc_id, - (char *)op_quotafile[i]); + + rc = qcop->quota_on(sb, i, QFMT_VFS_V0, + name[i]); } else if (oqc->qc_cmd == Q_QUOTAOFF) { if (!qcop->quota_off) GOTO(out, rc = -ENOSYS); @@ -1455,14 +1474,38 @@ static int fsfilt_ext3_quotactl(struct super_block *sb, if (!qcop->get_dqblk) GOTO(out, rc = -ENOSYS); rc = qcop->get_dqblk(sb, oqc->qc_type, oqc->qc_id, dqblk); + if (!rc) + dqblk->dqb_valid = QIF_LIMITS | QIF_USAGE; break; case Q_SYNC: if (!sb->s_qcop->quota_sync) GOTO(out, rc = -ENOSYS); qcop->quota_sync(sb, oqc->qc_type); break; + case Q_FINVALIDATE: + CDEBUG(D_WARNING, "invalidating operational quota files\n"); + for (i = 0; i < MAXQUOTAS; i++) { + struct file *fp; + char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2; + + LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2); + + if (!Q_TYPESET(oqc, i)) + continue; + + fp = filp_open(name[i], O_CREAT | O_TRUNC | O_RDWR, 0644); + if (IS_ERR(fp)) { + rc = PTR_ERR(fp); + CERROR("error invalidating operational quota file" + " %s (rc:%d)\n", name[i], rc); + } else { + filp_close(fp, 0); + } + + } + break; default: - CERROR("unsupported quotactl command: %d", oqc->qc_cmd); + CERROR("unsupported quotactl command: %d\n", oqc->qc_cmd); LBUG(); } out: @@ -1473,26 +1516,26 @@ out: OBD_FREE_PTR(dqblk); if (rc) - CDEBUG(D_QUOTA, "quotactl command %#x, id %u, type %d " + CDEBUG(D_QUOTA, "quotactl command %#x, id %u, type %u " "failed: %d\n", oqc->qc_cmd, oqc->qc_id, oqc->qc_type, rc); RETURN(rc); } struct chk_dqblk{ - struct hlist_node dqb_hash; /* quotacheck hash */ - struct list_head dqb_list; /* in list also */ - qid_t dqb_id; /* uid/gid */ - short dqb_type; /* USRQUOTA/GRPQUOTA */ - __u32 dqb_bhardlimit; /* block hard limit */ - __u32 dqb_bsoftlimit; /* block soft limit */ - qsize_t dqb_curspace; /* current space */ - __u32 dqb_ihardlimit; /* inode hard limit */ - __u32 dqb_isoftlimit; /* inode soft limit */ - __u32 dqb_curinodes; /* current inodes */ - __u64 dqb_btime; /* block grace time */ - __u64 dqb_itime; /* inode grace time */ - __u32 dqb_valid; /* flag for above fields */ + struct hlist_node dqb_hash; /** quotacheck hash */ + struct list_head dqb_list; /** in list also */ + qid_t dqb_id; /** uid/gid */ + short dqb_type; /** USRQUOTA/GRPQUOTA */ + qsize_t dqb_bhardlimit; /** block hard limit */ + qsize_t dqb_bsoftlimit; /** block soft limit */ + qsize_t dqb_curspace; /** current space */ + qsize_t dqb_ihardlimit; /** inode hard limit */ + qsize_t dqb_isoftlimit; /** inode soft limit */ + qsize_t dqb_curinodes; /** current inodes */ + __u64 dqb_btime; /** block grace time */ + __u64 dqb_itime; /** inode grace time */ + __u32 dqb_valid; /** flag for above fields */ }; static inline unsigned int chkquot_hash(qid_t id, int type) @@ -1568,7 +1611,7 @@ cqget(struct super_block *sb, struct hlist_head *hash, struct list_head *list, return cdqb; } -static inline int quota_onoff(struct super_block *sb, int cmd, int type) +static inline int quota_onoff(struct super_block *sb, int cmd, int type, int qfmt) { struct obd_quotactl *oqctl; int rc; @@ -1578,7 +1621,7 @@ static inline int quota_onoff(struct super_block *sb, int cmd, int type) RETURN(-ENOMEM); oqctl->qc_cmd = cmd; - oqctl->qc_id = QFMT_LDISKFS; + oqctl->qc_id = qfmt; oqctl->qc_type = type; rc = fsfilt_ext3_quotactl(sb, oqctl); @@ -1700,24 +1743,8 @@ static int add_inode_quota(struct inode *inode, struct qchk_ctxt *qctxt, return rc; } -static int v2_write_dqheader(struct file *f, int type) -{ - static const __u32 quota_magics[] = V2_INITQMAGICS; - static const __u32 quota_versions[] = V2_INITQVERSIONS; - struct v2_disk_dqheader dqhead; - loff_t offset = 0; - - CLASSERT(ARRAY_SIZE(quota_magics) == ARRAY_SIZE(quota_versions)); - LASSERT(0 <= type && type < ARRAY_SIZE(quota_magics)); - - dqhead.dqh_magic = cpu_to_le32(quota_magics[type]); - dqhead.dqh_version = cpu_to_le32(quota_versions[type]); - - return cfs_user_write(f, (char *)&dqhead, sizeof(dqhead), &offset); -} - /* write dqinfo struct in a new quota file */ -static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info) +static int v3_write_dqinfo(struct file *f, int type, struct if_dqinfo *info) { struct v2_disk_dqinfo dqinfo; __u32 blocks = V2_DQTREEOFF + 1; @@ -1741,6 +1768,22 @@ static int v2_write_dqinfo(struct file *f, int type, struct if_dqinfo *info) return cfs_user_write(f, (char *)&dqinfo, sizeof(dqinfo), &offset); } +static int v3_write_dqheader(struct file *f, int type) +{ + static const __u32 quota_magics[] = V2_INITQMAGICS; + static const __u32 quota_versions[] = V2_INITQVERSIONS_R1; + struct v2_disk_dqheader dqhead; + loff_t offset = 0; + + CLASSERT(ARRAY_SIZE(quota_magics) == ARRAY_SIZE(quota_versions)); + LASSERT(0 <= type && type < ARRAY_SIZE(quota_magics)); + + dqhead.dqh_magic = cpu_to_le32(quota_magics[type]); + dqhead.dqh_version = cpu_to_le32(quota_versions[type]); + + return cfs_user_write(f, (char *)&dqhead, sizeof(dqhead), &offset); +} + static int create_new_quota_files(struct qchk_ctxt *qctxt, struct obd_quotactl *oqc) { @@ -1751,32 +1794,36 @@ static int create_new_quota_files(struct qchk_ctxt *qctxt, struct if_dqinfo *info = qctxt->qckt_first_check[i]? NULL : &qctxt->qckt_dqinfo[i]; struct file *file; + const char *name[MAXQUOTAS] = LUSTRE_OPQFILES_NAMES_V2; if (!Q_TYPESET(oqc, i)) continue; - file = filp_open(op_quotafile[i], O_RDWR | O_CREAT | O_TRUNC, - 0644); + LASSERT(oqc->qc_id == LUSTRE_QUOTA_V2); + + file = filp_open(name[i], O_RDWR | O_CREAT | O_TRUNC, 0644); if (IS_ERR(file)) { rc = PTR_ERR(file); CERROR("can't create %s file: rc = %d\n", - op_quotafile[i], rc); + name[i], rc); GOTO(out, rc); } if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - CERROR("file %s is not regular", op_quotafile[i]); + CERROR("file %s is not regular", name[i]); filp_close(file, 0); GOTO(out, rc = -EINVAL); } - rc = v2_write_dqheader(file, i); + DQUOT_DROP(file->f_dentry->d_inode); + + rc = v3_write_dqheader(file, i); if (rc) { filp_close(file, 0); GOTO(out, rc); } - rc = v2_write_dqinfo(file, i, info); + rc = v3_write_dqinfo(file, i, info); filp_close(file, 0); if (rc) GOTO(out, rc); @@ -1872,12 +1919,12 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb, if (!Q_TYPESET(oqc, i)) continue; - rc = quota_onoff(sb, Q_QUOTAON, i); + rc = quota_onoff(sb, Q_QUOTAON, i, oqc->qc_id); if (!rc || rc == -EBUSY) { rc = read_old_dqinfo(sb, i, qctxt->qckt_dqinfo); if (rc) GOTO(out, rc); - } else if (rc == -ENOENT) { + } else if (rc == -ENOENT || rc == -EINVAL || rc == -EEXIST) { qctxt->qckt_first_check[i] = 1; } else if (rc) { GOTO(out, rc); @@ -1945,14 +1992,14 @@ static int fsfilt_ext3_quotacheck(struct super_block *sb, } #endif /* turn off quota cause we are to dump chk_dqblk to files */ - quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type); + quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type, oqc->qc_id); rc = create_new_quota_files(qctxt, oqc); if (rc) GOTO(out, rc); /* we use vfs functions to set dqblk, so turn quota on */ - rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type); + rc = quota_onoff(sb, Q_QUOTAON, oqc->qc_type, oqc->qc_id); out: /* dump and free chk_dqblk */ rc = prune_chkquots(sb, qctxt, rc); @@ -1960,7 +2007,7 @@ out: /* turn off quota, `lfs quotacheck` will turn on when all * nodes quotacheck finish. */ - quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type); + quota_onoff(sb, Q_QUOTAOFF, oqc->qc_type, oqc->qc_id); oqc->qc_stat = rc; if (rc) @@ -1969,7 +2016,6 @@ out: RETURN(rc); } -#ifdef HAVE_QUOTA_SUPPORT static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, int cmd) { @@ -1994,9 +2040,15 @@ static int fsfilt_ext3_quotainfo(struct lustre_quota_info *lqi, int type, case QFILE_INIT_INFO: rc = lustre_init_quota_info(lqi, type); break; + case QFILE_CONVERT: + rc = -ENOTSUPP; + CERROR("quota CONVERT command is not supported\n"); + break; default: - CERROR("Unsupported admin quota file cmd %d\n", cmd); - LBUG(); + rc = -ENOTSUPP; + CERROR("Unsupported admin quota file cmd %d\n" + "Are lquota.ko and fsfilt_ldiskfs.ko modules in sync?\n", + cmd); break; } RETURN(rc); @@ -2076,13 +2128,13 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_setup = fsfilt_ext3_setup, .fs_send_bio = fsfilt_ext3_send_bio, .fs_get_op_len = fsfilt_ext3_get_op_len, - .fs_quotactl = fsfilt_ext3_quotactl, - .fs_quotacheck = fsfilt_ext3_quotacheck, #ifdef HAVE_DISK_INODE_VERSION .fs_get_version = fsfilt_ext3_get_version, .fs_set_version = fsfilt_ext3_set_version, #endif #ifdef HAVE_QUOTA_SUPPORT + .fs_quotactl = fsfilt_ext3_quotactl, + .fs_quotacheck = fsfilt_ext3_quotacheck, .fs_quotainfo = fsfilt_ext3_quotainfo, .fs_qids = fsfilt_ext3_qids, .fs_dquot = fsfilt_ext3_dquot, diff --git a/lustre/lvfs/fsfilt_reiserfs.c b/lustre/lvfs/fsfilt_reiserfs.c index 2f58e2a..83db369 100644 --- a/lustre/lvfs/fsfilt_reiserfs.c +++ b/lustre/lvfs/fsfilt_reiserfs.c @@ -184,9 +184,7 @@ static int fsfilt_reiserfs_statfs(struct super_block *sb, int rc; memset(&sfs, 0, sizeof(sfs)); - rc = ll_do_statfs(sb, &sfs); - statfs_pack(osfs, &sfs); return rc; } diff --git a/lustre/lvfs/lustre_quota_fmt.c b/lustre/lvfs/lustre_quota_fmt.c index b0ddb5c..ee713e4 100644 --- a/lustre/lvfs/lustre_quota_fmt.c +++ b/lustre/lvfs/lustre_quota_fmt.c @@ -39,7 +39,6 @@ * from linux/fs/quota_v2.c */ - #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif @@ -57,21 +56,32 @@ #include #include +#include #include "lustre_quota_fmt.h" -typedef char *dqbuf_t; +#ifdef HAVE_QUOTA_SUPPORT + +static const uint lustre_initqversions[][MAXQUOTAS] = { + [LUSTRE_QUOTA_V2] = LUSTRE_INITQVERSIONS_V2 +}; + +static const int lustre_dqstrinblk[] = { + [LUSTRE_QUOTA_V2] = LUSTRE_DQSTRINBLK_V2 +}; -#define GETIDINDEX(id, depth) (((id) >> ((LUSTRE_DQTREEDEPTH-(depth)-1)*8)) & 0xff) -#define GETENTRIES(buf) ((struct lustre_disk_dqblk *)(((char *)buf)+sizeof(struct lustre_disk_dqdbheader))) +static const int lustre_disk_dqblk_sz[] = { + [LUSTRE_QUOTA_V2] = sizeof(struct lustre_disk_dqblk_v2) +}; -static int check_quota_file(struct file *f, struct inode *inode, int type) +int check_quota_file(struct file *f, struct inode *inode, int type, + lustre_quota_version_t version) { struct lustre_disk_dqheader dqhead; mm_segment_t fs; ssize_t size; loff_t offset = 0; static const uint quota_magics[] = LUSTRE_INITQMAGICS; - static const uint quota_versions[] = LUSTRE_INITQVERSIONS; + const uint *quota_versions = lustre_initqversions[version]; if (f) { fs = get_fs(); @@ -90,27 +100,26 @@ static int check_quota_file(struct file *f, struct inode *inode, int type) #endif } if (size != sizeof(struct lustre_disk_dqheader)) - return 0; + return -EINVAL; if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) - return 0; - return 1; + return -EINVAL; + return 0; } -/* Check whether given file is really lustre admin quotafile */ +/** + * Check whether given file is really lustre admin quotafile + */ int lustre_check_quota_file(struct lustre_quota_info *lqi, int type) { struct file *f = lqi->qi_files[type]; - return check_quota_file(f, NULL, type); + return check_quota_file(f, NULL, type, lqi->qi_version); } -/* Read information header from quota file */ -int lustre_read_quota_info(struct lustre_quota_info *lqi, int type) +int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info) { mm_segment_t fs; struct lustre_disk_dqinfo dinfo; - struct lustre_mem_dqinfo *info = &lqi->qi_info[type]; - struct file *f = lqi->qi_files[type]; ssize_t size; loff_t offset = LUSTRE_DQINFOOFF; @@ -120,9 +129,9 @@ int lustre_read_quota_info(struct lustre_quota_info *lqi, int type) sizeof(struct lustre_disk_dqinfo), &offset); set_fs(fs); if (size != sizeof(struct lustre_disk_dqinfo)) { - CDEBUG(D_WARNING, "Can't read info structure on device %s.\n", + CDEBUG(D_ERROR, "Can't read info structure on device %s.\n", f->f_vfsmnt->mnt_sb->s_id); - return -1; + return -EINVAL; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); @@ -133,7 +142,17 @@ int lustre_read_quota_info(struct lustre_quota_info *lqi, int type) return 0; } -/* Write information header to quota file */ +/** + * Read information header from quota file + */ +int lustre_read_quota_info(struct lustre_quota_info *lqi, int type) +{ + return lustre_read_quota_file_info(lqi->qi_files[type], &lqi->qi_info[type]); +} + +/** + * Write information header to quota file + */ int lustre_write_quota_info(struct lustre_quota_info *lqi, int type) { mm_segment_t fs; @@ -164,33 +183,44 @@ int lustre_write_quota_info(struct lustre_quota_info *lqi, int type) return 0; } -static void disk2memdqb(struct mem_dqblk *m, struct lustre_disk_dqblk *d) +void disk2memdqb(struct lustre_mem_dqblk *m, void *d, + lustre_quota_version_t version) { - m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); - m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit); - m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes); - m->dqb_itime = le64_to_cpu(d->dqb_itime); - m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit); - m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit); - m->dqb_curspace = le64_to_cpu(d->dqb_curspace); - m->dqb_btime = le64_to_cpu(d->dqb_btime); + struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d; + + LASSERT(version == LUSTRE_QUOTA_V2); + + m->dqb_ihardlimit = le64_to_cpu(dqblk->dqb_ihardlimit); + m->dqb_isoftlimit = le64_to_cpu(dqblk->dqb_isoftlimit); + m->dqb_curinodes = le64_to_cpu(dqblk->dqb_curinodes); + m->dqb_itime = le64_to_cpu(dqblk->dqb_itime); + m->dqb_bhardlimit = le64_to_cpu(dqblk->dqb_bhardlimit); + m->dqb_bsoftlimit = le64_to_cpu(dqblk->dqb_bsoftlimit); + m->dqb_curspace = le64_to_cpu(dqblk->dqb_curspace); + m->dqb_btime = le64_to_cpu(dqblk->dqb_btime); } -static void mem2diskdqb(struct lustre_disk_dqblk *d, struct mem_dqblk *m, - qid_t id) +static int mem2diskdqb(void *d, struct lustre_mem_dqblk *m, + qid_t id, lustre_quota_version_t version) { - d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); - d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); - d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes); - d->dqb_itime = cpu_to_le64(m->dqb_itime); - d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit); - d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit); - d->dqb_curspace = cpu_to_le64(m->dqb_curspace); - d->dqb_btime = cpu_to_le64(m->dqb_btime); - d->dqb_id = cpu_to_le32(id); + struct lustre_disk_dqblk_v2 *dqblk = (struct lustre_disk_dqblk_v2 *)d; + + LASSERT(version == LUSTRE_QUOTA_V2); + + dqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); + dqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); + dqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); + dqblk->dqb_itime = cpu_to_le64(m->dqb_itime); + dqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); + dqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); + dqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace); + dqblk->dqb_btime = cpu_to_le64(m->dqb_btime); + dqblk->dqb_id = cpu_to_le32(id); + + return 0; } -static dqbuf_t getdqbuf(void) +dqbuf_t getdqbuf(void) { dqbuf_t buf = kmalloc(LUSTRE_DQBLKSIZE, GFP_NOFS); if (!buf) @@ -199,12 +229,12 @@ static dqbuf_t getdqbuf(void) return buf; } -static inline void freedqbuf(dqbuf_t buf) +void freedqbuf(dqbuf_t buf) { kfree(buf); } -static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf) +ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf) { mm_segment_t fs; ssize_t ret; @@ -218,7 +248,7 @@ static ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf) return ret; } -static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf) +ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf) { mm_segment_t fs; ssize_t ret; @@ -229,18 +259,17 @@ static ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf) ret = filp->f_op->write(filp, (char *)buf, LUSTRE_DQBLKSIZE, &offset); set_fs(fs); return ret; - } -static void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info) +void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info) { set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); } -#define lustre_info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) - -/* Remove empty block from list and return it */ -static int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info) +/** + * Remove empty block from list and return it + */ +int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info) { dqbuf_t buf = getdqbuf(); struct lustre_disk_dqdbheader *dh = @@ -256,7 +285,8 @@ static int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info) info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); } else { memset(buf, 0, LUSTRE_DQBLKSIZE); - if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0) /* Assure block allocation... */ + /* Assure block allocation... */ + if ((ret = write_blk(filp, info->dqi_blocks, buf)) < 0) goto out_buf; blk = info->dqi_blocks++; } @@ -267,9 +297,11 @@ out_buf: return ret; } -/* Insert empty block to the list */ -static int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info, - dqbuf_t buf, uint blk) +/** + * Insert empty block to the list + */ +int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info, + dqbuf_t buf, uint blk) { struct lustre_disk_dqdbheader *dh = (struct lustre_disk_dqdbheader *)buf; @@ -286,10 +318,12 @@ static int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info, return 0; } -/* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct file *filp, - struct lustre_mem_dqinfo *info, dqbuf_t buf, - uint blk) +/** + * Remove given block from the list of blocks with free entries + */ +int remove_free_dqentry(struct file *filp, + struct lustre_mem_dqinfo *info, dqbuf_t buf, + uint blk) { dqbuf_t tmpbuf = getdqbuf(); struct lustre_disk_dqdbheader *dh = @@ -321,7 +355,8 @@ static int remove_free_dqentry(struct file *filp, } freedqbuf(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); - if (write_blk(filp, blk, buf) < 0) /* No matter whether write succeeds block is out of list */ + if (write_blk(filp, blk, buf) < 0) + /* No matter whether write succeeds block is out of list */ CDEBUG(D_ERROR, "VFS: Can't write block (%u) with free entries.\n", blk); return 0; @@ -330,10 +365,12 @@ out_buf: return err; } -/* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct file *filp, - struct lustre_mem_dqinfo *info, dqbuf_t buf, - uint blk) +/** + * Insert given block to the beginning of list with free entries + */ +int insert_free_dqentry(struct file *filp, + struct lustre_mem_dqinfo *info, dqbuf_t buf, + uint blk) { dqbuf_t tmpbuf = getdqbuf(); struct lustre_disk_dqdbheader *dh = @@ -363,16 +400,23 @@ out_buf: return err; } -/* Find space for dquot */ -static uint find_free_dqentry(struct lustre_dquot *dquot, int *err) + + +/** + * Find space for dquot + */ +static uint find_free_dqentry(struct lustre_dquot *dquot, int *err, + lustre_quota_version_t version) { struct lustre_quota_info *lqi = dquot->dq_info; struct file *filp = lqi->qi_files[dquot->dq_type]; struct lustre_mem_dqinfo *info = &lqi->qi_info[dquot->dq_type]; uint blk, i; struct lustre_disk_dqdbheader *dh; - struct lustre_disk_dqblk *ddquot; - struct lustre_disk_dqblk fakedquot; + void *ddquot; + int dqblk_sz = lustre_disk_dqblk_sz[version]; + int dqstrinblk = lustre_dqstrinblk[version]; + char fakedquot[dqblk_sz]; dqbuf_t buf; *err = 0; @@ -381,7 +425,7 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err) return 0; } dh = (struct lustre_disk_dqdbheader *)buf; - ddquot = GETENTRIES(buf); + ddquot = GETENTRIES(buf, version); if (info->dqi_free_entry) { blk = info->dqi_free_entry; if ((*err = read_blk(filp, blk, buf)) < 0) @@ -394,10 +438,14 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err) return 0; } memset(buf, 0, LUSTRE_DQBLKSIZE); - info->dqi_free_entry = blk; /* This is enough as block is already zeroed and entry list is empty... */ + info->dqi_free_entry = blk; /* This is enough as block is + already zeroed and entry list + is empty... */ lustre_mark_info_dirty(info); } - if (le16_to_cpu(dh->dqdh_entries) + 1 >= LUSTRE_DQSTRINBLK) /* Block will be full? */ + + /* Will block be full */ + if (le16_to_cpu(dh->dqdh_entries) + 1 >= dqstrinblk) if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) { CDEBUG(D_ERROR, "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", @@ -405,12 +453,13 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err) goto out_buf; } dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries) + 1); - memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk)); + memset(fakedquot, 0, dqblk_sz); /* Find free structure in block */ - for (i = 0; i < LUSTRE_DQSTRINBLK && - memcmp(&fakedquot, ddquot + i, sizeof(fakedquot)); i++) ; + for (i = 0; i < dqstrinblk && + memcmp(fakedquot, (char*)ddquot + i * dqblk_sz, + sizeof(fakedquot)); i++); - if (i == LUSTRE_DQSTRINBLK) { + if (i == dqstrinblk) { CDEBUG(D_ERROR, "VFS: find_free_dqentry(): Data block full but it shouldn't.\n"); *err = -EIO; @@ -426,7 +475,7 @@ static uint find_free_dqentry(struct lustre_dquot *dquot, int *err) dquot->dq_off = (blk << LUSTRE_DQBLKSIZE_BITS) + sizeof(struct lustre_disk_dqdbheader) + - i * sizeof(struct lustre_disk_dqblk); + i * dqblk_sz; freedqbuf(buf); return blk; out_buf: @@ -434,8 +483,11 @@ out_buf: return 0; } -/* Insert reference to structure into the trie */ -static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth) +/** + * Insert reference to structure into the trie + */ +static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth, + lustre_quota_version_t version) { struct lustre_quota_info *lqi = dquot->dq_info; struct file *filp = lqi->qi_files[dquot->dq_type]; @@ -476,9 +528,9 @@ static int do_insert_tree(struct lustre_dquot *dquot, uint * treeblk, int depth) goto out_buf; } - newblk = find_free_dqentry(dquot, &ret); + newblk = find_free_dqentry(dquot, &ret, version); } else - ret = do_insert_tree(dquot, &newblk, depth + 1); + ret = do_insert_tree(dquot, &newblk, depth + 1, version); if (newson && ret >= 0) { ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk); ret = write_blk(filp, *treeblk, buf); @@ -489,27 +541,37 @@ out_buf: return ret; } -/* Wrapper for inserting quota structure into tree */ -static inline int dq_insert_tree(struct lustre_dquot *dquot) +/** + * Wrapper for inserting quota structure into tree + */ +static inline int dq_insert_tree(struct lustre_dquot *dquot, + lustre_quota_version_t version) { int tmp = LUSTRE_DQTREEOFF; - return do_insert_tree(dquot, &tmp, 0); + return do_insert_tree(dquot, &tmp, 0, version); } -/* - * We don't have to be afraid of deadlocks as we never have quotas on quota files... +/** + * We don't have to be afraid of deadlocks as we never have quotas on + * quota files... */ -static int lustre_write_dquot(struct lustre_dquot *dquot) +static int lustre_write_dquot(struct lustre_dquot *dquot, + lustre_quota_version_t version) { int type = dquot->dq_type; struct file *filp; mm_segment_t fs; loff_t offset; ssize_t ret; - struct lustre_disk_dqblk ddquot, empty; + int dqblk_sz = lustre_disk_dqblk_sz[version]; + char ddquot[dqblk_sz], empty[dqblk_sz]; + + ret = mem2diskdqb(ddquot, &dquot->dq_dqb, dquot->dq_id, version); + if (ret < 0) + return ret; if (!dquot->dq_off) - if ((ret = dq_insert_tree(dquot)) < 0) { + if ((ret = dq_insert_tree(dquot, version)) < 0) { CDEBUG(D_ERROR, "VFS: Error %Zd occurred while creating quota.\n", ret); @@ -517,19 +579,18 @@ static int lustre_write_dquot(struct lustre_dquot *dquot) } filp = dquot->dq_info->qi_files[type]; offset = dquot->dq_off; - mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); /* Argh... We may need to write structure full of zeroes but that would be * treated as an empty place by the rest of the code. Format change would * be definitely cleaner but the problems probably are not worth it */ - memset(&empty, 0, sizeof(struct lustre_disk_dqblk)); - if (!memcmp(&empty, &ddquot, sizeof(struct lustre_disk_dqblk))) - ddquot.dqb_itime = cpu_to_le64(1); + memset(empty, 0, dqblk_sz); + if (!memcmp(empty, ddquot, dqblk_sz)) + ((struct lustre_disk_dqblk_v2 *)ddquot)->dqb_itime = cpu_to_le64(1); fs = get_fs(); set_fs(KERNEL_DS); - ret = filp->f_op->write(filp, (char *)&ddquot, - sizeof(struct lustre_disk_dqblk), &offset); + ret = filp->f_op->write(filp, ddquot, + dqblk_sz, &offset); set_fs(fs); - if (ret != sizeof(struct lustre_disk_dqblk)) { + if (ret != dqblk_sz) { CDEBUG(D_WARNING, "VFS: dquota write failed on dev %s\n", filp->f_dentry->d_sb->s_id); if (ret >= 0) @@ -540,14 +601,18 @@ static int lustre_write_dquot(struct lustre_dquot *dquot) return ret; } -/* Free dquot entry in data block */ -static int free_dqentry(struct lustre_dquot *dquot, uint blk) +/** + * Free dquot entry in data block + */ +static int free_dqentry(struct lustre_dquot *dquot, uint blk, + lustre_quota_version_t version) { struct file *filp = dquot->dq_info->qi_files[dquot->dq_type]; struct lustre_mem_dqinfo *info = &dquot->dq_info->qi_info[dquot->dq_type]; struct lustre_disk_dqdbheader *dh; dqbuf_t buf = getdqbuf(); + int dqstrinblk = lustre_dqstrinblk[version]; int ret = 0; if (!buf) @@ -573,10 +638,9 @@ static int free_dqentry(struct lustre_dquot *dquot, uint blk) goto out_buf; } } else { - memset(buf + - (dquot->dq_off & ((1 << LUSTRE_DQBLKSIZE_BITS) - 1)), 0, - sizeof(struct lustre_disk_dqblk)); - if (le16_to_cpu(dh->dqdh_entries) == LUSTRE_DQSTRINBLK - 1) { + memset(buf + (dquot->dq_off & ((1<dqdh_entries) == dqstrinblk - 1) { /* Insert will write block itself */ if ((ret = insert_free_dqentry(filp, info, buf, blk)) < 0) { @@ -597,8 +661,11 @@ out_buf: return ret; } -/* Remove reference to dquot from tree */ -static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth) +/** + * Remove reference to dquot from tree + */ +static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth, + lustre_quota_version_t version) { struct file *filp = dquot->dq_info->qi_files[dquot->dq_type]; struct lustre_mem_dqinfo *info = @@ -616,14 +683,15 @@ static int remove_tree(struct lustre_dquot *dquot, uint * blk, int depth) } newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); if (depth == LUSTRE_DQTREEDEPTH - 1) { - ret = free_dqentry(dquot, newblk); + ret = free_dqentry(dquot, newblk, version); newblk = 0; } else - ret = remove_tree(dquot, &newblk, depth + 1); + ret = remove_tree(dquot, &newblk, depth + 1, version); if (ret >= 0 && !newblk) { int i; ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0); - for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++) ; /* Block got empty? */ + for (i = 0; i < LUSTRE_DQBLKSIZE && !buf[i]; i++) + /* Block got empty? */ ; /* don't put the root block into free blk list! */ if (i == LUSTRE_DQBLKSIZE && *blk != LUSTRE_DQTREEOFF) { put_free_dqblk(filp, info, buf, *blk); @@ -637,24 +705,34 @@ out_buf: return ret; } -/* Delete dquot from tree */ -static int lustre_delete_dquot(struct lustre_dquot *dquot) +/** + * Delete dquot from tree + */ +static int lustre_delete_dquot(struct lustre_dquot *dquot, + lustre_quota_version_t version) { uint tmp = LUSTRE_DQTREEOFF; if (!dquot->dq_off) /* Even not allocated? */ return 0; - return remove_tree(dquot, &tmp, 0); + return remove_tree(dquot, &tmp, 0, version); } -/* Find entry in block */ -static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk) +/** + * Find entry in block + */ +static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk, + lustre_quota_version_t version) { struct file *filp = dquot->dq_info->qi_files[dquot->dq_type]; dqbuf_t buf = getdqbuf(); loff_t ret = 0; int i; - struct lustre_disk_dqblk *ddquot = GETENTRIES(buf); + struct lustre_disk_dqblk_v2 *ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version); + int dqblk_sz = lustre_disk_dqblk_sz[version]; + int dqstrinblk = lustre_dqstrinblk[version]; + + LASSERT(version == LUSTRE_QUOTA_V2); if (!buf) return -ENOMEM; @@ -663,20 +741,20 @@ static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk) goto out_buf; } if (dquot->dq_id) - for (i = 0; - i < LUSTRE_DQSTRINBLK - && le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++) ; + for (i = 0; i < dqstrinblk && + le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; + i++) ; else { /* ID 0 as a bit more complicated searching... */ - struct lustre_disk_dqblk fakedquot; + char fakedquot[dqblk_sz]; - memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk)); - for (i = 0; i < LUSTRE_DQSTRINBLK; i++) + memset(fakedquot, 0, sizeof(fakedquot)); + for (i = 0; i < dqstrinblk; i++) if (!le32_to_cpu(ddquot[i].dqb_id) - && memcmp(&fakedquot, ddquot + i, - sizeof(struct lustre_disk_dqblk))) + && memcmp(fakedquot, ddquot + i, + dqblk_sz)) break; } - if (i == LUSTRE_DQSTRINBLK) { + if (i == dqstrinblk) { CDEBUG(D_ERROR, "VFS: Quota for id %u referenced but not present.\n", dquot->dq_id); @@ -686,14 +764,17 @@ static loff_t find_block_dqentry(struct lustre_dquot *dquot, uint blk) ret = (blk << LUSTRE_DQBLKSIZE_BITS) + sizeof(struct lustre_disk_dqdbheader) + - i * sizeof(struct lustre_disk_dqblk); + i * dqblk_sz; out_buf: freedqbuf(buf); return ret; } -/* Find entry for given id in the tree */ -static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth) +/** + * Find entry for given id in the tree + */ +static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth, + lustre_quota_version_t version) { struct file *filp = dquot->dq_info->qi_files[dquot->dq_type]; dqbuf_t buf = getdqbuf(); @@ -711,18 +792,21 @@ static loff_t find_tree_dqentry(struct lustre_dquot *dquot, uint blk, int depth) if (!blk) /* No reference? */ goto out_buf; if (depth < LUSTRE_DQTREEDEPTH - 1) - ret = find_tree_dqentry(dquot, blk, depth + 1); + ret = find_tree_dqentry(dquot, blk, depth + 1, version); else - ret = find_block_dqentry(dquot, blk); + ret = find_block_dqentry(dquot, blk, version); out_buf: freedqbuf(buf); return ret; } -/* Find entry for given id in the tree - wrapper function */ -static inline loff_t find_dqentry(struct lustre_dquot *dquot) +/** + * Find entry for given id in the tree - wrapper function + */ +static inline loff_t find_dqentry(struct lustre_dquot *dquot, + lustre_quota_version_t version) { - return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0); + return find_tree_dqentry(dquot, LUSTRE_DQTREEOFF, 0, version); } int lustre_read_dquot(struct lustre_dquot *dquot) @@ -731,8 +815,8 @@ int lustre_read_dquot(struct lustre_dquot *dquot) struct file *filp; mm_segment_t fs; loff_t offset; - struct lustre_disk_dqblk ddquot, empty; - int ret = 0; + int ret = 0, dqblk_sz; + lustre_quota_version_t version; /* Invalidated quota? */ if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) { @@ -740,7 +824,11 @@ int lustre_read_dquot(struct lustre_dquot *dquot) return -EIO; } - offset = find_dqentry(dquot); + version = dquot->dq_info->qi_version; + LASSERT(version == LUSTRE_QUOTA_V2); + dqblk_sz = lustre_disk_dqblk_sz[version]; + + offset = find_dqentry(dquot, version); if (offset <= 0) { /* Entry not present? */ if (offset < 0) CDEBUG(D_ERROR, @@ -748,42 +836,46 @@ int lustre_read_dquot(struct lustre_dquot *dquot) dquot->dq_id); dquot->dq_off = 0; set_bit(DQ_FAKE_B, &dquot->dq_flags); - memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); + memset(&dquot->dq_dqb, 0, sizeof(struct lustre_mem_dqblk)); ret = offset; } else { + char ddquot[dqblk_sz], empty[dqblk_sz]; + dquot->dq_off = offset; fs = get_fs(); set_fs(KERNEL_DS); - if ((ret = filp->f_op->read(filp, (char *)&ddquot, - sizeof(struct lustre_disk_dqblk), - &offset)) != - sizeof(struct lustre_disk_dqblk)) { + if ((ret = filp->f_op->read(filp, ddquot, dqblk_sz, &offset)) != + dqblk_sz) { if (ret >= 0) ret = -EIO; CDEBUG(D_ERROR, "VFS: Error while reading quota structure for id %u.\n", dquot->dq_id); - memset(&ddquot, 0, sizeof(struct lustre_disk_dqblk)); + memset(ddquot, 0, dqblk_sz); } else { ret = 0; /* We need to escape back all-zero structure */ - memset(&empty, 0, sizeof(struct lustre_disk_dqblk)); - empty.dqb_itime = cpu_to_le64(1); - if (!memcmp(&empty, &ddquot, - sizeof(struct lustre_disk_dqblk))) - ddquot.dqb_itime = 0; + memset(empty, 0, dqblk_sz); + ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(1); + if (!memcmp(empty, ddquot, dqblk_sz)) + ((struct lustre_disk_dqblk_v2 *)empty)->dqb_itime = cpu_to_le64(0); } set_fs(fs); - disk2memdqb(&dquot->dq_dqb, &ddquot); + disk2memdqb(&dquot->dq_dqb, ddquot, version); } return ret; } -/* Commit changes of dquot to disk - it might also mean deleting it when quota became fake */ +/** + * Commit changes of dquot to disk - it might also mean deleting + * it when quota became fake. + */ int lustre_commit_dquot(struct lustre_dquot *dquot) { int rc = 0; + lustre_quota_version_t version = dquot->dq_info->qi_version; + /* always clear the flag so we don't loop on an IO error... */ clear_bit(DQ_MOD_B, &dquot->dq_flags); @@ -791,9 +883,9 @@ int lustre_commit_dquot(struct lustre_dquot *dquot) * over all cluster, so keep the fake dquot entry on disk is * meaningless, just remove it */ if (test_bit(DQ_FAKE_B, &dquot->dq_flags)) - rc = lustre_delete_dquot(dquot); + rc = lustre_delete_dquot(dquot, version); else - rc = lustre_write_dquot(dquot); + rc = lustre_write_dquot(dquot, version); if (rc < 0) return rc; @@ -804,21 +896,20 @@ int lustre_commit_dquot(struct lustre_dquot *dquot) return rc; } -/* We need to export this function to initialize quotafile, because we haven't - * user level check utility */ -int lustre_init_quota_info(struct lustre_quota_info *lqi, int type) +int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, int fakemagics) { - struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type]; + static const uint quota_magics[] = LUSTRE_INITQMAGICS; + static const uint fake_magics[] = LUSTRE_BADQMAGICS; + const uint* quota_versions = lustre_initqversions[lqi->qi_version]; struct lustre_disk_dqheader dqhead; - struct file *fp = lqi->qi_files[type]; ssize_t size; loff_t offset = 0; + struct file *fp = lqi->qi_files[type]; int rc = 0; - static const uint quota_magics[] = LUSTRE_INITQMAGICS; - static const uint quota_versions[] = LUSTRE_INITQVERSIONS; /* write quotafile header */ - dqhead.dqh_magic = cpu_to_le32(quota_magics[type]); + dqhead.dqh_magic = cpu_to_le32(fakemagics ? + fake_magics[type] : quota_magics[type]); dqhead.dqh_version = cpu_to_le32(quota_versions[type]); size = fp->f_op->write(fp, (char *)&dqhead, sizeof(struct lustre_disk_dqheader), &offset); @@ -827,6 +918,21 @@ int lustre_init_quota_info(struct lustre_quota_info *lqi, int type) CDEBUG(D_ERROR, "error writing quoafile header (rc:%d)\n", rc); rc = size; } + + return rc; +} + +/** + * We need to export this function to initialize quotafile, because we haven't + * user level check utility + */ +int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type, + int fakemagics) +{ + struct lustre_mem_dqinfo *dqinfo = &lqi->qi_info[type]; + int rc; + + rc = lustre_init_quota_header(lqi, type, fakemagics); if (rc) return rc; @@ -839,13 +945,13 @@ int lustre_init_quota_info(struct lustre_quota_info *lqi, int type) return lustre_write_quota_info(lqi, type); } -struct dqblk { - struct list_head link; - uint blk; -}; +int lustre_init_quota_info(struct lustre_quota_info *lqi, int type) +{ + return lustre_init_quota_info_generic(lqi, type, 0); +} -static ssize_t quota_read(struct file *file, struct inode *inode, int type, - uint blk, dqbuf_t buf) +ssize_t quota_read(struct file *file, struct inode *inode, int type, + uint blk, dqbuf_t buf) { if (file) { return read_blk(file, blk, buf); @@ -913,8 +1019,8 @@ out_buf: return ret; } -static int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, - uint blk, int depth, struct list_head *list) +int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, + uint blk, int depth, struct list_head *list) { dqbuf_t buf = getdqbuf(); loff_t ret = 0; @@ -935,7 +1041,7 @@ static int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, continue; if (depth < LUSTRE_DQTREEDEPTH - 1) - ret = walk_tree_dqentry(filp, inode, type, blk, + ret = walk_tree_dqentry(filp, inode, type, blk, depth + 1, list); else ret = walk_block_dqentry(filp, inode, type, blk, list); @@ -945,67 +1051,71 @@ out_buf: return ret; } -/* Walk through the quota file (v2 format) to get all ids with quota limit */ +/** + * Walk through the quota file (v2 format) to get all ids with quota limit + */ int lustre_get_qids(struct file *fp, struct inode *inode, int type, struct list_head *list) { struct list_head blk_list; struct dqblk *blk_item, *tmp; dqbuf_t buf = NULL; - struct lustre_disk_dqblk *ddquot; + struct lustre_disk_dqblk_v2 *ddquot; int rc; + lustre_quota_version_t version; + + ENTRY; - if (!check_quota_file(fp, inode, type)) { + if (check_quota_file(fp, inode, type, LUSTRE_QUOTA_V2) == 0) + version = LUSTRE_QUOTA_V2; + else { CDEBUG(D_ERROR, "unknown quota file format!\n"); - return -EINVAL; + RETURN(-EINVAL); } + if (!list_empty(list)) { CDEBUG(D_ERROR, "not empty list\n"); - return -EINVAL; + RETURN(-EINVAL); } INIT_LIST_HEAD(&blk_list); rc = walk_tree_dqentry(fp, inode, type, LUSTRE_DQTREEOFF, 0, &blk_list); if (rc) { CDEBUG(D_ERROR, "walk through quota file failed!(%d)\n", rc); - goto out_free; + GOTO(out_free, rc); } if (list_empty(&blk_list)) - return 0; + RETURN(0); buf = getdqbuf(); if (!buf) - return -ENOMEM; - ddquot = GETENTRIES(buf); + RETURN(-ENOMEM); + ddquot = (struct lustre_disk_dqblk_v2 *)GETENTRIES(buf, version); list_for_each_entry(blk_item, &blk_list, link) { loff_t ret = 0; - int i; - struct lustre_disk_dqblk fakedquot; + int i, dqblk_sz = lustre_disk_dqblk_sz[version]; + char fakedquot[dqblk_sz]; memset(buf, 0, LUSTRE_DQBLKSIZE); if ((ret = quota_read(fp, inode, type, blk_item->blk, buf))<0) { CDEBUG(D_ERROR, "VFS: Can't read quota tree block %u.\n", blk_item->blk); - rc = ret; - goto out_free; + GOTO(out_free, rc = ret); } - memset(&fakedquot, 0, sizeof(struct lustre_disk_dqblk)); - for (i = 0; i < LUSTRE_DQSTRINBLK; i++) { + memset(fakedquot, 0, dqblk_sz); + for (i = 0; i < lustre_dqstrinblk[version]; i++) { struct dquot_id *dqid; /* skip empty entry */ - if (!memcmp - (&fakedquot, ddquot + i, - sizeof(struct lustre_disk_dqblk))) + if (!memcmp(fakedquot, ddquot + i, dqblk_sz)) continue; dqid = kmalloc(sizeof(*dqid), GFP_NOFS); - if (!dqid) { - rc = -ENOMEM; - goto out_free; - } + if (!dqid) + GOTO(out_free, rc = -ENOMEM); + dqid->di_id = le32_to_cpu(ddquot[i].dqb_id); INIT_LIST_HEAD(&dqid->di_link); list_add(&dqid->di_link, list); @@ -1019,13 +1129,16 @@ out_free: } if (buf) freedqbuf(buf); - return rc; + + RETURN(rc); } -EXPORT_SYMBOL(lustre_check_quota_file); + EXPORT_SYMBOL(lustre_read_quota_info); EXPORT_SYMBOL(lustre_write_quota_info); +EXPORT_SYMBOL(lustre_check_quota_file); EXPORT_SYMBOL(lustre_read_dquot); EXPORT_SYMBOL(lustre_commit_dquot); EXPORT_SYMBOL(lustre_init_quota_info); EXPORT_SYMBOL(lustre_get_qids); +#endif diff --git a/lustre/lvfs/lustre_quota_fmt.h b/lustre/lvfs/lustre_quota_fmt.h index ffdac51..4072509 100644 --- a/lustre/lvfs/lustre_quota_fmt.h +++ b/lustre/lvfs/lustre_quota_fmt.h @@ -41,6 +41,8 @@ #ifndef _LUSTRE_QUOTA_FMT_H #define _LUSTRE_QUOTA_FMT_H +#ifdef HAVE_QUOTA_SUPPORT + #include #include @@ -49,32 +51,49 @@ * Same with quota v2's magic */ #define LUSTRE_INITQMAGICS {\ - 0xd9c01f11, /* USRQUOTA */\ - 0xd9c01927 /* GRPQUOTA */\ + 0xd9c01f11, /** USRQUOTA */\ + 0xd9c01927 /** GRPQUOTA */\ +} + +/* Invalid magics that mark quota file as inconsistent */ +#define LUSTRE_BADQMAGICS {\ + 0xbadbadba, /** USRQUOTA */\ + 0xbadbadba /** GRPQUOTA */\ } -#define LUSTRE_INITQVERSIONS {\ - 0, /* USRQUOTA */\ - 0 /* GRPQUOTA */\ +/* for the verson 2 of lustre_disk_dqblk*/ +#define LUSTRE_INITQVERSIONS_V2 {\ + 1, /* USRQUOTA */\ + 1 /* GRPQUOTA */\ } /* * The following structure defines the format of the disk quota file * (as it appears on disk) - the file is a radix tree whose leaves point - * to blocks of these structures. + * to blocks of these structures. for the version 2. */ -struct lustre_disk_dqblk { - __u32 dqb_id; /* id this quota applies to */ - __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ - __u32 dqb_isoftlimit; /* preferred inode limit */ - __u32 dqb_curinodes; /* current # allocated inodes */ - __u32 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */ - __u32 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */ - __u64 dqb_curspace; /* current space occupied (in bytes) */ - __u64 dqb_btime; /* time limit for excessive disk use */ - __u64 dqb_itime; /* time limit for excessive inode use */ +struct lustre_disk_dqblk_v2 { + __u32 dqb_id; /**< id this quota applies to */ + __u32 padding; + __u64 dqb_ihardlimit; /**< absolute limit on allocated inodes */ + __u64 dqb_isoftlimit; /**< preferred inode limit */ + __u64 dqb_curinodes; /**< current # allocated inodes */ + __u64 dqb_bhardlimit; /**< absolute limit on disk space (in QUOTABLOCK_SIZE) */ + __u64 dqb_bsoftlimit; /**< preferred limit on disk space (in QUOTABLOCK_SIZE) */ + __u64 dqb_curspace; /**< current space occupied (in bytes) */ + __u64 dqb_btime; /**< time limit for excessive disk use */ + __u64 dqb_itime; /**< time limit for excessive inode use */ }; +/* Number of entries in one blocks(14 entries) */ +#define LUSTRE_DQSTRINBLK_V2 \ + ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) \ + / sizeof(struct lustre_disk_dqblk_v2)) +#define GETENTRIES_V2(buf) (((char *)buf)+sizeof(struct lustre_disk_dqdbheader)) + +#define GETENTRIES(buf,version) ((version == LUSTRE_QUOTA_V2) ? \ + GETENTRIES_V2(buf) : 0) + /* * Here are header structures as written on disk and their in-memory copies */ @@ -117,6 +136,62 @@ static void lprocfs_quotfmt_test_init_vars(struct lprocfs_static_vars *lvars) {} #define LUSTRE_DQBLKSIZE (1 << LUSTRE_DQBLKSIZE_BITS) /* Size of block with quota structures */ #define LUSTRE_DQTREEOFF 1 /* Offset of tree in file in blocks */ #define LUSTRE_DQTREEDEPTH 4 /* Depth of quota tree */ -#define LUSTRE_DQSTRINBLK ((LUSTRE_DQBLKSIZE - sizeof(struct lustre_disk_dqdbheader)) / sizeof(struct lustre_disk_dqblk)) /* Number of entries in one blocks */ +typedef char *dqbuf_t; + +#define GETIDINDEX(id, depth) (((id) >> ((LUSTRE_DQTREEDEPTH-(depth)-1)*8)) & 0xff) + +#define MAX_UL (0xffffffffUL) + +#define lustre_info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) + +struct dqblk { + struct list_head link; + uint blk; +}; + +/* come from lustre_fmt_common.c */ +dqbuf_t getdqbuf(void); +void freedqbuf(dqbuf_t buf); +void disk2memdqb(struct lustre_mem_dqblk *m, void *d, + enum lustre_quota_version version); +void lustre_mark_info_dirty(struct lustre_mem_dqinfo *info); +int lustre_init_quota_header(struct lustre_quota_info *lqi, int type, + int fakemagics); +int lustre_init_quota_info_generic(struct lustre_quota_info *lqi, int type, + int fakemagics); +int lustre_read_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_read_quota_file_info(struct file* f, struct lustre_mem_dqinfo* info); +int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); +ssize_t read_blk(struct file *filp, uint blk, dqbuf_t buf); +ssize_t write_blk(struct file *filp, uint blk, dqbuf_t buf); +int get_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info); +int put_free_dqblk(struct file *filp, struct lustre_mem_dqinfo *info, + dqbuf_t buf, uint blk); +int remove_free_dqentry(struct file *filp, + struct lustre_mem_dqinfo *info, dqbuf_t buf, + uint blk); +int insert_free_dqentry(struct file *filp, + struct lustre_mem_dqinfo *info, dqbuf_t buf, + uint blk); +ssize_t quota_read(struct file *file, struct inode *inode, int type, + uint blk, dqbuf_t buf); +int walk_tree_dqentry(struct file *filp, struct inode *inode, int type, + uint blk, int depth, struct list_head *list); +int check_quota_file(struct file *f, struct inode *inode, int type, + lustre_quota_version_t version); +int lustre_check_quota_file(struct lustre_quota_info *lqi, int type); +int lustre_read_dquot(struct lustre_dquot *dquot); +int lustre_commit_dquot(struct lustre_dquot *dquot); +int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_get_qids(struct file *fp, struct inode *inode, int type, + struct list_head *list); + +#define LUSTRE_ADMIN_QUOTAFILES_V2 {\ + "admin_quotafile_v2.usr", /* user admin quotafile */\ + "admin_quotafile_v2.grp" /* group admin quotafile */\ +} + +#define LUSTRE_OPQFILES_NAMES_V2 { "lquota_v2.user", "lquota_v2.group" } #endif /* lustre_quota_fmt.h */ +#endif diff --git a/lustre/lvfs/quotafmt_test.c b/lustre/lvfs/quotafmt_test.c index de6c32e..b360570 100644 --- a/lustre/lvfs/quotafmt_test.c +++ b/lustre/lvfs/quotafmt_test.c @@ -57,6 +57,8 @@ #include "lustre_quota_fmt.h" +#ifdef HAVE_QUOTA_SUPPORT + char *test_quotafile[2] = { "usrquota_test", "grpquota_test" }; static int quotfmt_initialize(struct lustre_quota_info *lqi, @@ -65,7 +67,7 @@ static int quotfmt_initialize(struct lustre_quota_info *lqi, { struct lustre_disk_dqheader dqhead; static const uint quota_magics[] = LUSTRE_INITQMAGICS; - static const uint quota_versions[] = LUSTRE_INITQVERSIONS; + static const uint quota_versions[] = LUSTRE_INITQVERSIONS_V2; struct file *fp; struct inode *parent_inode = tgt->obd_lvfs_ctxt.pwd->d_inode; size_t size; @@ -107,7 +109,7 @@ static int quotfmt_initialize(struct lustre_quota_info *lqi, sizeof(struct lustre_disk_dqheader), &offset); if (size != sizeof(struct lustre_disk_dqheader)) { - CERROR("error writing quoafile header %s (rc = %d)\n", + CERROR("error writing quotafile header %s (rc = %d)\n", name, rc); rc = size; break; @@ -166,7 +168,7 @@ static int quotfmt_test_1(struct lustre_quota_info *lqi) ENTRY; for (i = 0; i < MAXQUOTAS; i++) { - if (!lustre_check_quota_file(lqi, i)) + if (lustre_check_quota_file(lqi, i)) RETURN(-EINVAL); } RETURN(0); @@ -256,7 +258,7 @@ static void put_rand_dquot(struct lustre_dquot *dquot) static int write_check_dquot(struct lustre_quota_info *lqi) { struct lustre_dquot *dquot; - struct mem_dqblk dqblk; + struct lustre_mem_dqblk dqblk; int rc = 0; ENTRY; @@ -541,3 +543,5 @@ MODULE_LICENSE("GPL"); module_init(quotfmt_test_init); module_exit(quotfmt_test_exit); + +#endif /* HAVE_QUOTA_SUPPORT */ diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 62b85bf..c259f2b 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -78,25 +78,6 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data); void mdc_enter_request(struct client_obd *cli); void mdc_exit_request(struct client_obd *cli); -static inline int client_is_remote(struct obd_export *exp) -{ - struct obd_import *imp = class_exp2cliimp(exp); - - if (imp->imp_connect_flags_orig & OBD_CONNECT_RMT_CLIENT) { - if (!(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_RMT_CLIENT)) - return 0; - else - return 1; - } else { - if (!(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_LCL_CLIENT)) - return 1; - else - return 0; - } -} - /* mdc/mdc_locks.c */ int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 4789295..0ca79b9 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -57,33 +57,34 @@ #include #include "mdc_internal.h" -quota_interface_t *quota_interface; - #define REQUEST_MINOR 244 +static quota_interface_t *quota_interface; extern quota_interface_t mdc_quota_interface; static int mdc_cleanup(struct obd_device *obd); -static struct obd_capa *mdc_unpack_capa(struct ptlrpc_request *req, - const struct req_msg_field *field) +int mdc_unpack_capa(struct obd_export *exp, struct ptlrpc_request *req, + const struct req_msg_field *field, struct obd_capa **oc) { struct lustre_capa *capa; - struct obd_capa *oc; + struct obd_capa *c; + ENTRY; /* swabbed already in mdc_enqueue */ capa = req_capsule_server_get(&req->rq_pill, field); if (capa == NULL) - return ERR_PTR(-EPROTO); + RETURN(-EPROTO); - oc = alloc_capa(CAPA_SITE_CLIENT); - if (!oc) { + c = alloc_capa(CAPA_SITE_CLIENT); + if (IS_ERR(c)) { CDEBUG(D_INFO, "alloc capa failed!\n"); - return ERR_PTR(-ENOMEM); + RETURN(PTR_ERR(c)); + } else { + c->c_capa = *capa; + *oc = c; + RETURN(0); } - oc->c_capa = *capa; - - return oc; } /* Helper that implements most of mdc_getstatus and signal_completed_replay. */ @@ -116,12 +117,9 @@ static int send_getstatus(struct obd_import *imp, struct lu_fid *rootfid, GOTO(out, rc = -EPROTO); if (body->valid & OBD_MD_FLMDSCAPA) { - struct obd_capa *oc; - - oc = mdc_unpack_capa(req, &RMF_CAPA1); - if (IS_ERR(oc)) - GOTO(out, rc = PTR_ERR(oc)); - *pc = oc; + rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, pc); + if (rc) + GOTO(out, rc); } *rootfid = body->fid1; @@ -584,28 +582,34 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, } } if (md->body->valid & OBD_MD_FLMDSCAPA) { - struct obd_capa *oc = mdc_unpack_capa(req, &RMF_CAPA1); + struct obd_capa *oc = NULL; - if (IS_ERR(oc)) - GOTO(out, rc = PTR_ERR(oc)); + rc = mdc_unpack_capa(NULL, req, &RMF_CAPA1, &oc); + if (rc) + GOTO(out, rc); md->mds_capa = oc; } if (md->body->valid & OBD_MD_FLOSSCAPA) { - struct obd_capa *oc = mdc_unpack_capa(req, &RMF_CAPA2); + struct obd_capa *oc = NULL; - if (IS_ERR(oc)) - GOTO(out, rc = PTR_ERR(oc)); + rc = mdc_unpack_capa(NULL, req, &RMF_CAPA2, &oc); + if (rc) + GOTO(out, rc); md->oss_capa = oc; } EXIT; out: if (rc) { - if (md->oss_capa) - free_capa(md->oss_capa); - if (md->mds_capa) - free_capa(md->mds_capa); + if (md->oss_capa) { + capa_put(md->oss_capa); + md->oss_capa = NULL; + } + if (md->mds_capa) { + capa_put(md->mds_capa); + md->mds_capa = NULL; + } #ifdef CONFIG_FS_POSIX_ACL posix_acl_release(md->posix_acl); #endif @@ -1689,6 +1693,8 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) default: rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars, lcfg, obd); + if (rc > 0) + rc = 0; break; } return(rc); @@ -1862,13 +1868,12 @@ struct md_ops mdc_md_ops = { .m_set_open_replay_data = mdc_set_open_replay_data, .m_clear_open_replay_data = mdc_clear_open_replay_data, .m_renew_capa = mdc_renew_capa, + .m_unpack_capa = mdc_unpack_capa, .m_get_remote_perm = mdc_get_remote_perm, .m_intent_getattr_async = mdc_intent_getattr_async, .m_revalidate_lock = mdc_revalidate_lock }; -extern quota_interface_t mdc_quota_interface; - int __init mdc_init(void) { int rc; diff --git a/lustre/mdd/Makefile.in b/lustre/mdd/Makefile.in index f1568ea..bfecc0c 100644 --- a/lustre/mdd/Makefile.in +++ b/lustre/mdd/Makefile.in @@ -1,6 +1,6 @@ MODULES := mdd mdd-objs := mdd_object.o mdd_lov.o mdd_orphans.o mdd_lproc.o mdd_dir.o -mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o +mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o mdd_quota.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index b84f3b4..26a905f 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -146,7 +146,7 @@ static int mdd_process_config(const struct lu_env *env, lprocfs_mdd_init_vars(&lvars); rc = class_process_proc_param(PARAM_MDD, lvars.obd_vars, cfg,m); - if (rc == -ENOSYS) + if (rc > 0 || rc == -ENOSYS) /* we don't understand; pass it on */ rc = next->ld_ops->ldo_process_config(env, next, cfg); break; @@ -406,6 +406,25 @@ const struct md_device_operations mdd_ops = { .mdo_maxsize_get = mdd_maxsize_get, .mdo_init_capa_ctxt = mdd_init_capa_ctxt, .mdo_update_capa_key= mdd_update_capa_key, +#ifdef HAVE_QUOTA_SUPPORT + .mdo_quota = { + .mqo_notify = mdd_quota_notify, + .mqo_setup = mdd_quota_setup, + .mqo_cleanup = mdd_quota_cleanup, + .mqo_recovery = mdd_quota_recovery, + .mqo_check = mdd_quota_check, + .mqo_on = mdd_quota_on, + .mqo_off = mdd_quota_off, + .mqo_setinfo = mdd_quota_setinfo, + .mqo_getinfo = mdd_quota_getinfo, + .mqo_setquota = mdd_quota_setquota, + .mqo_getquota = mdd_quota_getquota, + .mqo_getoinfo = mdd_quota_getoinfo, + .mqo_getoquota = mdd_quota_getoquota, + .mqo_invalidate = mdd_quota_invalidate, + .mqo_finvalidate = mdd_quota_finvalidate + } +#endif }; static struct lu_device_type_operations mdd_device_type_ops = { diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 3d96f45..7450c1e 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -514,10 +514,13 @@ static int __mdd_index_insert(const struct lu_env *env, struct mdd_object *pobj, ENTRY; if (dt_try_as_dir(env, next)) { + struct md_ucred *uc = md_ucred(env); + rc = next->do_index_ops->dio_insert(env, next, __mdd_fid_rec(env, lf), (const struct dt_key *)name, - handle, capa); + handle, capa, uc->mu_cap & + CFS_CAP_SYS_RESOURCE_MASK); } else { rc = -ENOTDIR; } @@ -570,10 +573,13 @@ __mdd_index_insert_only(const struct lu_env *env, struct mdd_object *pobj, ENTRY; if (dt_try_as_dir(env, next)) { + struct md_ucred *uc = md_ucred(env); + rc = next->do_index_ops->dio_insert(env, next, __mdd_fid_rec(env, lf), (const struct dt_key *)name, - handle, capa); + handle, capa, uc->mu_cap & + CFS_CAP_SYS_RESOURCE_MASK); } else { rc = -ENOTDIR; } @@ -591,13 +597,35 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, struct mdd_device *mdd = mdo2mdd(src_obj); struct dynlock_handle *dlh; struct thandle *handle; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, rec_pending = 0; +#endif int rc; ENTRY; +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_tobj, la_tmp, BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_LINK; + mdd_quota_wrapper(la_tmp, qids); + /* get block quota for parent */ + lquota_chkquota(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], 1, + &rec_pending, NULL, LQUOTA_FLAGS_BLK); + } + } +#endif + mdd_txn_param_build(env, mdd, MDD_TXN_LINK_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); dlh = mdd_pdo_write_lock(env, mdd_tobj, name, MOR_TGT_CHILD); if (dlh == NULL) @@ -632,6 +660,19 @@ out_unlock: mdd_pdo_write_unlock(env, mdd_tobj, dlh); out_trans: mdd_trans_stop(env, mdd, rc, handle); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) { + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], + 1, 1); + /* Trigger dqacq for the parent owner. If failed, + * the next call for lquota_chkquota will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, 0, qids, rc, + quota_opc); + } +#endif return rc; } @@ -689,6 +730,13 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, struct mdd_device *mdd = mdo2mdd(pobj); struct dynlock_handle *dlh; struct thandle *handle; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qcids[MAXQUOTAS] = { 0, 0 }; + unsigned int qpids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0; +#endif int rc, is_dir; ENTRY; @@ -738,6 +786,23 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, GOTO(cleanup, rc); rc = mdd_finish_unlink(env, mdd_cobj, ma, handle); +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && ma->ma_valid & MA_INODE && + ma->ma_attr.la_nlink == 0) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_pobj, la_tmp, BYPASS_CAPA); + if (!rc) { + mdd_quota_wrapper(la_tmp, qpids); + if (mdd_cobj->mod_count == 0) { + quota_opc = FSFILT_OP_UNLINK; + mdd_quota_wrapper(&ma->ma_attr, qcids); + } else { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_PARENT; + } + } + } +#endif if (rc == 0) obd_set_info_async(mdd2obd_dev(mdd)->u.mds.mds_osc_exp, @@ -749,6 +814,13 @@ cleanup: mdd_pdo_write_unlock(env, mdd_pobj, dlh); out_trans: mdd_trans_stop(env, mdd, rc, handle); +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) + /* Trigger dqrel on the owner of child and parent. If failed, + * the next call for lquota_chkquota will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, + quota_opc); +#endif return rc; } @@ -785,13 +857,41 @@ static int mdd_name_insert(const struct lu_env *env, struct dynlock_handle *dlh; struct thandle *handle; int is_dir = S_ISDIR(ma->ma_attr.la_mode); +#ifdef HAVE_QUOTA_SUPPORT + struct md_ucred *uc = md_ucred(env); + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, rec_pending = 0; + cfs_cap_t save = uc->mu_cap; +#endif int rc; ENTRY; +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + if (!(ma->ma_attr_flags & MDS_QUOTA_IGNORE)) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_LINK; + mdd_quota_wrapper(la_tmp, qids); + /* get block quota for parent */ + lquota_chkquota(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], + 1, &rec_pending, NULL, + LQUOTA_FLAGS_BLK); + } + } else { + uc->mu_cap |= CFS_CAP_SYS_RESOURCE_MASK; + } + } +#endif mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_INSERT_OP); handle = mdd_trans_start(env, mdo2mdd(pobj)); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); dlh = mdd_pdo_write_lock(env, mdd_obj, name, MOR_TGT_PARENT); if (dlh == NULL) @@ -823,6 +923,23 @@ out_unlock: mdd_pdo_write_unlock(env, mdd_obj, dlh); out_trans: mdd_trans_stop(env, mdo2mdd(pobj), rc, handle); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + if (quota_opc) { + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, + obd, qids[USRQUOTA], + qids[GRPQUOTA], 1, 1); + /* Trigger dqacq for the parent owner. If failed, + * the next call for lquota_chkquota will process it*/ + lquota_adjust(mds_quota_interface_ref, obd, 0, qids, + rc, quota_opc); + } else { + uc->mu_cap = save; + } + } +#endif return rc; } @@ -858,13 +975,30 @@ static int mdd_name_remove(const struct lu_env *env, struct dynlock_handle *dlh; struct thandle *handle; int is_dir = S_ISDIR(ma->ma_attr.la_mode); +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0; +#endif int rc; ENTRY; +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_PARENT; + mdd_quota_wrapper(la_tmp, qids); + } + } +#endif mdd_txn_param_build(env, mdd, MDD_TXN_INDEX_DELETE_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); dlh = mdd_pdo_write_lock(env, mdd_obj, name, MOR_TGT_PARENT); if (dlh == NULL) @@ -896,6 +1030,14 @@ out_unlock: mdd_pdo_write_unlock(env, mdd_obj, dlh); out_trans: mdd_trans_stop(env, mdd, rc, handle); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + /* Trigger dqrel for the parent owner. + * If failed, the next call for lquota_chkquota will process it. */ + if (quota_opc) + lquota_adjust(mds_quota_interface_ref, obd, 0, qids, rc, + quota_opc); +#endif return rc; } @@ -939,13 +1081,35 @@ static int mdd_rename_tgt(const struct lu_env *env, struct mdd_device *mdd = mdo2mdd(pobj); struct dynlock_handle *dlh; struct thandle *handle; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qcids[MAXQUOTAS] = { 0, 0 }; + unsigned int qpids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, rec_pending = 0; +#endif int rc; ENTRY; +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && !tobj) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_tpobj, la_tmp, BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_LINK; + mdd_quota_wrapper(la_tmp, qpids); + /* get block quota for target parent */ + lquota_chkquota(mds_quota_interface_ref, obd, + qpids[USRQUOTA], qpids[GRPQUOTA], 1, + &rec_pending, NULL, LQUOTA_FLAGS_BLK); + } + } +#endif mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_TGT_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); dlh = mdd_pdo_write_lock(env, mdd_tpobj, name, MOR_TGT_PARENT); if (dlh == NULL) @@ -998,6 +1162,14 @@ static int mdd_rename_tgt(const struct lu_env *env, rc = mdd_finish_unlink(env, mdd_tobj, ma, handle); if (rc) GOTO(cleanup, rc); + +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && ma->ma_valid & MA_INODE && + ma->ma_attr.la_nlink == 0 && mdd_tobj->mod_count == 0) { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD; + mdd_quota_wrapper(&ma->ma_attr, qcids); + } +#endif } EXIT; cleanup: @@ -1006,6 +1178,22 @@ cleanup: mdd_pdo_write_unlock(env, mdd_tpobj, dlh); out_trans: mdd_trans_stop(env, mdd, rc, handle); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qpids[USRQUOTA], + qpids[GRPQUOTA], + 1, 1); + if (quota_opc) + /* Trigger dqrel/dqacq on the target owner of child and + * parent. If failed, the next call for lquota_chkquota + * will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qcids, + qpids, rc, quota_opc); + } +#endif return rc; } @@ -1284,6 +1472,14 @@ static int mdd_create(const struct lu_env *env, char *name = lname->ln_name; int rc, created = 0, initialized = 0, inserted = 0, lmm_size = 0; int got_def_acl = 0; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qcids[MAXQUOTAS] = { 0, 0 }; + unsigned int qpids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, block_count = 0; + int inode_pending = 0, block_pending = 0, parent_pending = 0; +#endif ENTRY; /* @@ -1327,6 +1523,51 @@ static int mdd_create(const struct lu_env *env, if (rc) RETURN(rc); +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_pobj, la_tmp, BYPASS_CAPA); + if (!rc) { + int same = 0; + + quota_opc = FSFILT_OP_CREATE; + mdd_quota_wrapper(&ma->ma_attr, qcids); + mdd_quota_wrapper(la_tmp, qpids); + /* get file quota for child */ + lquota_chkquota(mds_quota_interface_ref, obd, + qcids[USRQUOTA], qcids[GRPQUOTA], 1, + &inode_pending, NULL, 0); + switch (ma->ma_attr.la_mode & S_IFMT) { + case S_IFLNK: + case S_IFDIR: + block_count = 2; + break; + case S_IFREG: + block_count = 1; + break; + } + if (qcids[USRQUOTA] == qpids[USRQUOTA] && + qcids[GRPQUOTA] == qpids[GRPQUOTA]) { + block_count += 1; + same = 1; + } + /* get block quota for child and parent */ + if (block_count) + lquota_chkquota(mds_quota_interface_ref, obd, + qcids[USRQUOTA], qcids[GRPQUOTA], + block_count, + &block_pending, NULL, + LQUOTA_FLAGS_BLK); + if (!same) + lquota_chkquota(mds_quota_interface_ref, obd, + qpids[USRQUOTA], qpids[GRPQUOTA], 1, + &parent_pending, NULL, + LQUOTA_FLAGS_BLK); + } + } +#endif + /* * No RPC inside the transaction, so OST objects should be created at * first. @@ -1335,7 +1576,7 @@ static int mdd_create(const struct lu_env *env, rc = mdd_lov_create(env, mdd, mdd_pobj, son, &lmm, &lmm_size, spec, attr); if (rc) - RETURN(rc); + GOTO(out_pending, rc); } if (!S_ISLNK(attr->la_mode)) { @@ -1422,6 +1663,7 @@ static int mdd_create(const struct lu_env *env, } if (S_ISLNK(attr->la_mode)) { + struct md_ucred *uc = md_ucred(env); struct dt_object *dt = mdd_object_child(son); const char *target_name = spec->u.sp_symname; int sym_len = strlen(target_name); @@ -1430,7 +1672,9 @@ static int mdd_create(const struct lu_env *env, buf = mdd_buf_get_const(env, target_name, sym_len); rc = dt->do_body_ops->dbo_write(env, dt, buf, &pos, handle, - mdd_object_capa(env, son)); + mdd_object_capa(env, son), + uc->mu_cap & + CFS_CAP_SYS_RESOURCE_MASK); if (rc == sym_len) rc = 0; @@ -1479,6 +1723,27 @@ out_trans: out_free: /* finis lov_create stuff, free all temporary data */ mdd_lov_create_finish(env, mdd, lmm, lmm_size, spec); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) { + if (inode_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qcids[USRQUOTA], qcids[GRPQUOTA], + 1, 0); + if (block_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qcids[USRQUOTA], qcids[GRPQUOTA], + block_count, 1); + if (parent_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qpids[USRQUOTA], qpids[GRPQUOTA], + 1, 1); + /* Trigger dqacq on the owner of child and parent. If failed, + * the next call for lquota_chkquota will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qcids, qpids, rc, + quota_opc); + } +#endif return rc; } @@ -1581,8 +1846,15 @@ static int mdd_rename(const struct lu_env *env, struct mdd_object *mdd_tobj = NULL; struct dynlock_handle *sdlh, *tdlh; struct thandle *handle; - int is_dir; - int rc; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qspids[MAXQUOTAS] = { 0, 0 }; + unsigned int qtcids[MAXQUOTAS] = { 0, 0 }; + unsigned int qtpids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, rec_pending = 0; +#endif + int rc, is_dir; ENTRY; LASSERT(ma->ma_attr.la_mode & S_IFMT); @@ -1591,10 +1863,34 @@ static int mdd_rename(const struct lu_env *env, if (tobj) mdd_tobj = md2mdd_obj(tobj); +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_spobj, la_tmp, BYPASS_CAPA); + if (!rc) { + mdd_quota_wrapper(la_tmp, qspids); + if (!tobj) { + rc = mdd_la_get(env, mdd_tpobj, la_tmp, + BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_LINK; + mdd_quota_wrapper(la_tmp, qtpids); + /* get block quota for target parent */ + lquota_chkquota(mds_quota_interface_ref, + obd, qtpids[USRQUOTA], + qtpids[GRPQUOTA], 1, + &rec_pending, NULL, + LQUOTA_FLAGS_BLK); + } + } + } + } +#endif mdd_txn_param_build(env, mdd, MDD_TXN_RENAME_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); /* FIXME: Should consider tobj and sobj too in rename_lock. */ rc = mdd_rename_order(env, mdd, mdd_spobj, mdd_tpobj); @@ -1680,6 +1976,14 @@ static int mdd_rename(const struct lu_env *env, mdd_write_unlock(env, mdd_tobj); if (rc) GOTO(cleanup, rc); + +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && ma->ma_valid & MA_INODE && + ma->ma_attr.la_nlink == 0 && mdd_tobj->mod_count == 0) { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD; + mdd_quota_wrapper(&ma->ma_attr, qtcids); + } +#endif } la->la_valid = LA_CTIME | LA_MTIME; @@ -1703,6 +2007,27 @@ cleanup_unlocked: mdd_trans_stop(env, mdd, rc, handle); if (mdd_sobj) mdd_object_put(env, mdd_sobj); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qtpids[USRQUOTA], + qtpids[GRPQUOTA], + 1, 1); + /* Trigger dqrel on the source owner of parent. + * If failed, the next call for lquota_chkquota will + * process it. */ + lquota_adjust(mds_quota_interface_ref, obd, 0, qspids, rc, + FSFILT_OP_UNLINK_PARTIAL_PARENT); + if (quota_opc) + /* Trigger dqrel/dqacq on the target owner of child and + * parent. If failed, the next call for lquota_chkquota + * will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qtcids, + qtpids, rc, quota_opc); + } +#endif return rc; } diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index a2cdc61..16de10a 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -51,6 +51,21 @@ #include #include #include +#ifdef HAVE_QUOTA_SUPPORT +# include +#endif +#include + +#ifdef HAVE_QUOTA_SUPPORT +/* quota stuff */ +extern quota_interface_t *mds_quota_interface_ref; + +static inline void mdd_quota_wrapper(struct lu_attr *la, unsigned int *qids) +{ + qids[0] = la->la_uid; + qids[1] = la->la_gid; +} +#endif enum mdd_txn_op { MDD_TXN_OBJECT_DESTROY_OP = 0, @@ -146,6 +161,7 @@ struct mdd_thread_info { int mti_max_lmm_size; struct llog_cookie *mti_max_cookie; int mti_max_cookie_size; + struct obd_quotactl mti_oqctl; }; struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env, @@ -293,11 +309,44 @@ int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj); extern const struct md_dir_operations mdd_dir_ops; extern const struct md_object_operations mdd_obj_ops; +/* mdd_quota.c*/ +#ifdef HAVE_QUOTA_SUPPORT +int mdd_quota_notify(const struct lu_env *env, struct md_device *m); +int mdd_quota_setup(const struct lu_env *env, struct md_device *m, + void *data); +int mdd_quota_cleanup(const struct lu_env *env, struct md_device *m); +int mdd_quota_recovery(const struct lu_env *env, struct md_device *m); +int mdd_quota_check(const struct lu_env *env, struct md_device *m, + struct obd_export *exp, __u32 type); +int mdd_quota_on(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id); +int mdd_quota_off(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id); +int mdd_quota_setinfo(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo); +int mdd_quota_getinfo(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo); +int mdd_quota_setquota(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk); +int mdd_quota_getquota(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk); +int mdd_quota_getoinfo(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo); +int mdd_quota_getoquota(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk); +int mdd_quota_invalidate(const struct lu_env *env, struct md_device *m, + __u32 type); +int mdd_quota_finvalidate(const struct lu_env *env, struct md_device *m, + __u32 type); +#endif + /* mdd_trans.c */ void mdd_txn_param_build(const struct lu_env *env, struct mdd_device *mdd, enum mdd_txn_op); int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj, struct md_attr *ma, enum mdd_txn_op); +int mdd_setattr_txn_param_build(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma, enum mdd_txn_op); static inline void mdd_object_put(const struct lu_env *env, struct mdd_object *o) diff --git a/lustre/mdd/mdd_lov.c b/lustre/mdd/mdd_lov.c index 38bc569..5e5bd18 100644 --- a/lustre/mdd/mdd_lov.c +++ b/lustre/mdd/mdd_lov.c @@ -145,7 +145,7 @@ int mdd_init_obd(const struct lu_env *env, struct mdd_device *mdd, /* * Add here for obd notify mechanism, when adding a new ost, the mds - * will notify this mdd. + * will notify this mdd. The mds will be used for quota also. */ obd->obd_upcall.onu_upcall = mdd_notify; obd->obd_upcall.onu_owner = mdd; @@ -520,16 +520,11 @@ int mdd_lov_create(const struct lu_env *env, struct mdd_device *mdd, oa->o_valid |= OBD_MD_FLFID | OBD_MD_FLGENER; oinfo->oi_oa = oa; oinfo->oi_md = lsm; - oinfo->oi_capa = mdo_capa_get(env, child, NULL, - CAPA_OPC_MDS_DEFAULT); + oinfo->oi_capa = NULL; oinfo->oi_policy.l_extent.start = la->la_size; oinfo->oi_policy.l_extent.end = OBD_OBJECT_EOF; - if (IS_ERR(oinfo->oi_capa)) - oinfo->oi_capa = NULL; - rc = obd_punch_rqset(lov_exp, oinfo, oti); - capa_put(oinfo->oi_capa); if (rc) { CERROR("Error setting attrs for "DFID": rc %d\n", PFID(mdo2fid(child)), rc); @@ -752,7 +747,6 @@ int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj, struct obd_device *obd = mdd2obd_dev(mdd); struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la; const struct lu_fid *fid = mdd_object_fid(obj); - struct obd_capa *oc; int rc = 0; ENTRY; @@ -762,15 +756,8 @@ int mdd_lov_setattr_async(const struct lu_env *env, struct mdd_object *obj, if (rc) RETURN(rc); - oc = mdo_capa_get(env, obj, NULL, CAPA_OPC_MDS_DEFAULT); - if (IS_ERR(oc)) - oc = NULL; - rc = mdd_osc_setattr_async(obd, tmp_la->la_uid, tmp_la->la_gid, lmm, lmm_size, logcookies, fid_seq(fid), - fid_oid(fid), oc); - - capa_put(oc); - + fid_oid(fid), NULL); RETURN(rc); } diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index 2d8bc67..9178114 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -150,8 +150,29 @@ static int lprocfs_rd_atime_diff(char *page, char **start, off_t off, return snprintf(page, count, "%lu\n", mdd->mdd_atime_diff); } +#ifdef HAVE_QUOTA_SUPPORT +static int mdd_lprocfs_quota_rd_type(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct mdd_device *mdd = data; + return lprocfs_quota_rd_type(page, start, off, count, eof, + mdd->mdd_obd_dev); +} + +static int mdd_lprocfs_quota_wr_type(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct mdd_device *mdd = data; + return lprocfs_quota_wr_type(file, buffer, count, mdd->mdd_obd_dev); +} +#endif + static struct lprocfs_vars lprocfs_mdd_obd_vars[] = { { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, +#ifdef HAVE_QUOTA_SUPPORT + { "quota_type", mdd_lprocfs_quota_rd_type, + mdd_lprocfs_quota_wr_type, 0 }, +#endif { 0 } }; diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 5cf0a15..c13cdd6 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -840,9 +840,18 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, struct llog_cookie *logcookies = NULL; int rc, lmm_size = 0, cookie_size = 0; struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qnids[MAXQUOTAS] = { 0, 0 }; + unsigned int qoids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, block_count = 0; + int inode_pending = 0, block_pending = 0; +#endif ENTRY; - mdd_txn_param_build(env, mdd, MDD_TXN_ATTR_SET_OP); + mdd_setattr_txn_param_build(env, obj, (struct md_attr *)ma, + MDD_TXN_ATTR_SET_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) RETURN(PTR_ERR(handle)); @@ -871,6 +880,31 @@ static int mdd_attr_set(const struct lu_env *env, struct md_object *obj, if (rc) GOTO(cleanup, rc); +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && la_copy->la_valid & (LA_UID | LA_GID)) { + struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la; + + rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA); + if (!rc) { + quota_opc = FSFILT_OP_SETATTR; + mdd_quota_wrapper(la_copy, qnids); + mdd_quota_wrapper(la_tmp, qoids); + /* get file quota for new owner */ + lquota_chkquota(mds_quota_interface_ref, obd, + qnids[USRQUOTA], qnids[GRPQUOTA], 1, + &inode_pending, NULL, 0); + block_count = (la_tmp->la_blocks + 7) >> 3; + if (block_count) + /* get block quota for new owner */ + lquota_chkquota(mds_quota_interface_ref, obd, + qnids[USRQUOTA], + qnids[GRPQUOTA], + block_count, &block_pending, + NULL, LQUOTA_FLAGS_BLK); + } + } +#endif + if (la_copy->la_valid & LA_FLAGS) { rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy, handle, 1); @@ -913,6 +947,23 @@ cleanup: rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size, logcookies); } +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) { + if (inode_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qnids[USRQUOTA], qnids[GRPQUOTA], + 1, 0); + if (block_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qnids[USRQUOTA], qnids[GRPQUOTA], + block_count, 1); + /* Trigger dqrel/dqacq for original owner and new owner. + * If failed, the next call for lquota_chkquota will + * process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qnids, qoids, rc, + quota_opc); + } +#endif RETURN(rc); } @@ -1020,6 +1071,12 @@ static int mdd_ref_del(const struct lu_env *env, struct md_object *obj, struct mdd_object *mdd_obj = md2mdd_obj(obj); struct mdd_device *mdd = mdo2mdd(obj); struct thandle *handle; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0; +#endif int rc; ENTRY; @@ -1062,11 +1119,26 @@ static int mdd_ref_del(const struct lu_env *env, struct md_object *obj, GOTO(cleanup, rc); rc = mdd_finish_unlink(env, mdd_obj, ma, handle); +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota && ma->ma_valid & MA_INODE && + ma->ma_attr.la_nlink == 0 && mdd_obj->mod_count == 0) { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD; + mdd_quota_wrapper(&ma->ma_attr, qids); + } +#endif + EXIT; cleanup: mdd_write_unlock(env, mdd_obj); mdd_trans_stop(env, mdd, rc, handle); +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) + /* Trigger dqrel on the owner of child. If failed, + * the next call for lquota_chkquota will process it */ + lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc, + quota_opc); +#endif return rc; } @@ -1105,13 +1177,45 @@ static int mdd_object_create(const struct lu_env *env, struct mdd_object *mdd_obj = md2mdd_obj(obj); const struct lu_fid *pfid = spec->u.sp_pfid; struct thandle *handle; - int rc; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdd->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0, block_count = 0; + int inode_pending = 0, block_pending = 0; +#endif + int rc = 0; ENTRY; +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + quota_opc = FSFILT_OP_CREATE_PARTIAL_CHILD; + mdd_quota_wrapper(&ma->ma_attr, qids); + /* get file quota for child */ + lquota_chkquota(mds_quota_interface_ref, obd, qids[USRQUOTA], + qids[GRPQUOTA], 1, &inode_pending, NULL, 0); + switch (ma->ma_attr.la_mode & S_IFMT) { + case S_IFLNK: + case S_IFDIR: + block_count = 2; + break; + case S_IFREG: + block_count = 1; + break; + } + /* get block quota for child */ + if (block_count) + lquota_chkquota(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], + block_count, &block_pending, NULL, + LQUOTA_FLAGS_BLK); + } +#endif + mdd_txn_param_build(env, mdd, MDD_TXN_OBJECT_CREATE_OP); handle = mdd_trans_start(env, mdd); if (IS_ERR(handle)) - RETURN(PTR_ERR(handle)); + GOTO(out_pending, rc = PTR_ERR(handle)); mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD); rc = mdd_oc_sanity_check(env, mdd_obj, ma); @@ -1167,6 +1271,23 @@ unlock: mdd_write_unlock(env, mdd_obj); mdd_trans_stop(env, mdd, rc, handle); +out_pending: +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) { + if (inode_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], + 1, 0); + if (block_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + qids[USRQUOTA], qids[GRPQUOTA], + block_count, 1); + /* Trigger dqacq on the owner of child. If failed, + * the next call for lquota_chkquota will process it. */ + lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc, + FSFILT_OP_CREATE_PARTIAL_CHILD); + } +#endif return rc; } @@ -1336,6 +1457,12 @@ static int mdd_close(const struct lu_env *env, struct md_object *obj, int rc; struct mdd_object *mdd_obj = md2mdd_obj(obj); struct thandle *handle; +#ifdef HAVE_QUOTA_SUPPORT + struct obd_device *obd = mdo2mdd(obj)->mdd_obd_dev; + struct mds_obd *mds = &obd->u.mds; + unsigned int qids[MAXQUOTAS] = { 0, 0 }; + int quota_opc = 0; +#endif ENTRY; rc = mdd_log_txn_param_build(env, obj, ma, MDD_TXN_UNLINK_OP); @@ -1350,13 +1477,27 @@ static int mdd_close(const struct lu_env *env, struct md_object *obj, mdd_obj->mod_count --; rc = mdd_iattr_get(env, mdd_obj, ma); - if (rc == 0 && mdd_obj->mod_count == 0 && ma->ma_attr.la_nlink == 0) + if (rc == 0 && mdd_obj->mod_count == 0 && ma->ma_attr.la_nlink == 0) { rc = mdd_object_kill(env, mdd_obj, ma); - else +#ifdef HAVE_QUOTA_SUPPORT + if (mds->mds_quota) { + quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD; + mdd_quota_wrapper(&ma->ma_attr, qids); + } +#endif + } else { ma->ma_valid &= ~(MA_LOV | MA_COOKIE); + } mdd_write_unlock(env, mdd_obj); mdd_trans_stop(env, mdo2mdd(obj), rc, handle); +#ifdef HAVE_QUOTA_SUPPORT + if (quota_opc) + /* Trigger dqrel on the owner of child. If failed, + * the next call for lquota_chkquota will process it */ + lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc, + quota_opc); +#endif RETURN(rc); } diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c index 24a134e..940a4df 100644 --- a/lustre/mdd/mdd_orphans.c +++ b/lustre/mdd/mdd_orphans.c @@ -89,7 +89,7 @@ static int orph_index_insert(const struct lu_env *env, rc = dor->do_index_ops->dio_insert(env, dor, (struct dt_rec *)offset, (struct dt_key *)key, th, - BYPASS_CAPA); + BYPASS_CAPA, 1); RETURN(rc); } diff --git a/lustre/mdd/mdd_permission.c b/lustre/mdd/mdd_permission.c index 80e5e83..efbc52a 100644 --- a/lustre/mdd/mdd_permission.c +++ b/lustre/mdd/mdd_permission.c @@ -386,7 +386,7 @@ int mdd_capa_get(const struct lu_env *env, struct md_object *obj, capa->lc_opc); if (IS_ERR(oc)) { rc = PTR_ERR(oc); - } else { + } else if (likely(oc != NULL)) { capa_cpy(capa, oc); capa_put(oc); } diff --git a/lustre/mdd/mdd_quota.c b/lustre/mdd/mdd_quota.c new file mode 100644 index 0000000..7bc92cc --- /dev/null +++ b/lustre/mdd/mdd_quota.c @@ -0,0 +1,276 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/mdd/mdd_quota.c + * + * Lustre Metadata Server (mdd) routines + * + * Author: Fan Yong + */ + +#ifdef HAVE_QUOTA_SUPPORT + +#include "mdd_internal.h" + +int mdd_quota_notify(const struct lu_env *env, struct md_device *m) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + ENTRY; + + lquota_setinfo(mds_quota_interface_ref, obd, (void *)1); + RETURN(0); +} + +int mdd_quota_setup(const struct lu_env *env, struct md_device *m, + void *data) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct dt_device *dt = mdd->mdd_child; + int rc; + ENTRY; + + LASSERT(obd->obd_fsops != NULL); + dt->dd_ops->dt_init_quota_ctxt(env, dt, (void *)obd, data); + rc = lquota_setup(mds_quota_interface_ref, obd); + RETURN(rc); +} + +int mdd_quota_cleanup(const struct lu_env *env, struct md_device *m) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + int rc1, rc2; + ENTRY; + + rc1 = lquota_cleanup(mds_quota_interface_ref, obd); + rc2 = lquota_fs_cleanup(mds_quota_interface_ref, obd); + RETURN(rc1 ? : rc2); +} + +int mdd_quota_recovery(const struct lu_env *env, struct md_device *m) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + int rc; + ENTRY; + + rc = lquota_recovery(mds_quota_interface_ref, obd); + RETURN(rc); +} + +int mdd_quota_check(const struct lu_env *env, struct md_device *m, + struct obd_export *exp, __u32 type) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_type = type; + rc = lquota_check(mds_quota_interface_ref, obd, exp, oqctl); + RETURN(rc); +} + +int mdd_quota_on(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_QUOTAON; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} + +int mdd_quota_off(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_QUOTAOFF; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} + +int mdd_quota_setinfo(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_SETINFO; + oqctl->qc_type = type; + oqctl->qc_id = id; + oqctl->qc_dqinfo = *dqinfo; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} + +int mdd_quota_getinfo(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct mdd_device *mdd = lu2mdd_dev( + &((struct md_device *)m)->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_GETINFO; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + *dqinfo = oqctl->qc_dqinfo; + RETURN(rc); +} + +int mdd_quota_setquota(const struct lu_env *env, struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_SETQUOTA; + oqctl->qc_type = type; + oqctl->qc_id = id; + oqctl->qc_dqblk = *dqblk; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} + +int mdd_quota_getquota(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct mdd_device *mdd = lu2mdd_dev( + &((struct md_device *)m)->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_GETQUOTA; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + *dqblk = oqctl->qc_dqblk; + RETURN(rc); +} + +int mdd_quota_getoinfo(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqinfo *dqinfo) +{ + struct mdd_device *mdd = lu2mdd_dev( + &((struct md_device *)m)->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_GETOINFO; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + *dqinfo = oqctl->qc_dqinfo; + RETURN(rc); +} + +int mdd_quota_getoquota(const struct lu_env *env, const struct md_device *m, + __u32 type, __u32 id, struct obd_dqblk *dqblk) +{ + struct mdd_device *mdd = lu2mdd_dev( + &((struct md_device *)m)->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = Q_GETOQUOTA; + oqctl->qc_type = type; + oqctl->qc_id = id; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + *dqblk = oqctl->qc_dqblk; + RETURN(rc); +} + +int mdd_quota_invalidate(const struct lu_env *env, struct md_device *m, + __u32 type) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = LUSTRE_Q_INVALIDATE; + oqctl->qc_type = type; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} + +int mdd_quota_finvalidate(const struct lu_env *env, struct md_device *m, + __u32 type) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + struct obd_device *obd = mdd->mdd_obd_dev; + struct obd_quotactl *oqctl = &mdd_env_info(env)->mti_oqctl; + int rc; + ENTRY; + + oqctl->qc_cmd = LUSTRE_Q_FINVALIDATE; + oqctl->qc_type = type; + rc = lquota_ctl(mds_quota_interface_ref, obd, oqctl); + RETURN(rc); +} +#endif diff --git a/lustre/mdd/mdd_trans.c b/lustre/mdd/mdd_trans.c index 01ab561..2c0a827 100644 --- a/lustre/mdd/mdd_trans.c +++ b/lustre/mdd/mdd_trans.c @@ -135,6 +135,20 @@ int mdd_log_txn_param_build(const struct lu_env *env, struct md_object *obj, RETURN(rc); } +int mdd_setattr_txn_param_build(const struct lu_env *env, struct md_object *obj, + struct md_attr *ma, enum mdd_txn_op op) +{ + struct mdd_device *mdd = mdo2mdd(&md2mdd_obj(obj)->mod_obj); + ENTRY; + + mdd_txn_param_build(env, mdd, op); + if (ma->ma_attr.la_valid & (LA_UID | LA_GID)) + mdd_env_info(env)->mti_param.tp_credits = + dto_txn_credits[DTO_ATTR_SET_CHOWN]; + + RETURN(0); +} + static void mdd_txn_init_dto_credits(const struct lu_env *env, struct mdd_device *mdd, int *dto_credits) { @@ -161,16 +175,18 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd) mdd->mdd_tod[op].mod_op = op; switch(op) { case MDD_TXN_OBJECT_DESTROY_OP: + /* Unused now */ *c = dt[DTO_OBJECT_DELETE]; break; case MDD_TXN_OBJECT_CREATE_OP: - /* OI_INSERT + CREATE OBJECT */ + /* OI INSERT + CREATE OBJECT */ *c = dt[DTO_INDEX_INSERT] + - dt[DTO_OBJECT_CREATE]; + dt[DTO_OBJECT_CREATE]; break; case MDD_TXN_ATTR_SET_OP: /* ATTR set + XATTR(lsm, lmv) set */ - *c = dt[DTO_ATTR_SET] + dt[DTO_XATTR_SET]; + *c = dt[DTO_ATTR_SET_BASE] + + dt[DTO_XATTR_SET]; break; case MDD_TXN_XATTR_SET_OP: *c = dt[DTO_XATTR_SET]; @@ -191,7 +207,7 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd) case MDD_TXN_RENAME_OP: /* 2 delete index + 1 insert + Unlink log */ *c = 2 * dt[DTO_INDEX_DELETE] + - dt[DTO_INDEX_INSERT]; + dt[DTO_INDEX_INSERT]; break; case MDD_TXN_RENAME_TGT_OP: /* index insert + index delete */ @@ -209,7 +225,7 @@ int mdd_txn_init_credits(const struct lu_env *env, struct mdd_device *mdd) * CREATE_OBJECT CREDITS */ *c = 2 * dt[DTO_INDEX_INSERT] + - dt[DTO_OBJECT_CREATE]; + dt[DTO_OBJECT_CREATE]; break; default: CERROR("Invalid op %d init its credit\n", op); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 5bf89a2..c888039 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #include @@ -87,9 +86,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); - CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino/gen %lu/%u, sb %p\n", - ino, generation, mds->mds_obt.obt_sb); - /* under ext3 this is neither supposed to return bad inodes nor NULL inodes. */ result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name)); @@ -253,9 +249,6 @@ int mds_postrecov(struct obd_device *obd) obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK : OBD_NOTIFY_SYNC, NULL); - /* quota recovery */ - lquota_recovery(mds_quota_interface_ref, obd); - RETURN(rc); } @@ -311,9 +304,6 @@ struct lvfs_callback_ops mds_lvfs_ops = { l_fid2dentry: mds_lvfs_fid2dentry, }; -quota_interface_t *mds_quota_interface_ref; -extern quota_interface_t mds_quota_interface; - static void mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt) { struct mds_obd *mds = &obd->u.mds; @@ -480,9 +470,23 @@ static struct obd_ops mds_cmd_obd_ops = { // .o_health_check = mds_cmd_health_check, }; +quota_interface_t *mds_quota_interface_ref; +extern quota_interface_t mds_quota_interface; + static int __init mds_cmd_init(void) { struct lprocfs_static_vars lvars; + int rc; + + request_module("lquota"); + mds_quota_interface_ref = PORTAL_SYMBOL_GET(mds_quota_interface); + rc = lquota_init(mds_quota_interface_ref); + if (rc) { + if (mds_quota_interface_ref) + PORTAL_SYMBOL_PUT(mds_quota_interface); + return rc; + } + init_obd_quota_ops(mds_quota_interface_ref, &mds_cmd_obd_ops); lprocfs_mds_init_vars(&lvars); class_register_type(&mds_cmd_obd_ops, NULL, lvars.module_vars, @@ -493,9 +497,14 @@ static int __init mds_cmd_init(void) static void /*__exit*/ mds_cmd_exit(void) { + lquota_exit(mds_quota_interface_ref); + if (mds_quota_interface_ref) + PORTAL_SYMBOL_PUT(mds_quota_interface); + class_unregister_type(LUSTRE_MDS_NAME); } +EXPORT_SYMBOL(mds_quota_interface_ref); MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre Metadata Server (MDS)"); MODULE_LICENSE("GPL"); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index f11796d..58aac97 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -130,162 +130,6 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, return count; } -#if 0 -static int lprocfs_wr_group_info(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - struct mds_obd *mds = &obd->u.mds; - struct mds_grp_downcall_data sparam, *param = &sparam; - int size = 0, rc = count; - - if (count < sizeof(param)) { - CERROR("%s: invalid data size %lu\n", obd->obd_name, count); - return count; - } - - if (copy_from_user(param, buffer, sizeof(*param)) || - param->mgd_magic != MDS_GRP_DOWNCALL_MAGIC) { - CERROR("%s: MDS group downcall bad params\n", obd->obd_name); - return count; - } - - if (param->mgd_ngroups > NGROUPS_MAX) { - CWARN("%s: uid %u groups %d more than maximum %d\n", - obd->obd_name, param->mgd_uid, param->mgd_ngroups, - NGROUPS_MAX); - param->mgd_ngroups = NGROUPS_MAX; - } - - if (param->mgd_ngroups > 0) { - size = offsetof(struct mds_grp_downcall_data, - mgd_groups[param->mgd_ngroups]); - OBD_ALLOC(param, size); - if (!param) { - CERROR("%s: fail to alloc %d bytes for uid %u" - " with %d groups\n", obd->obd_name, size, - sparam.mgd_uid, sparam.mgd_ngroups); - param = &sparam; - param->mgd_ngroups = 0; - } else if (copy_from_user(param, buffer, size)) { - CERROR("%s: uid %u bad supplementary group data\n", - obd->obd_name, sparam.mgd_uid); - OBD_FREE(param, size); - param = &sparam; - param->mgd_ngroups = 0; - } - } - rc = upcall_cache_downcall(mds->mds_group_hash, param->mgd_err, - param->mgd_uid, param->mgd_gid, - param->mgd_ngroups, param->mgd_groups); - - if (param && param != &sparam) - OBD_FREE(param, size); - - return rc; -} - -static int lprocfs_rd_group_expire(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct obd_device *obd = data; - - *eof = 1; - return snprintf(page, count, "%lu\n", - obd->u.mds.mds_group_hash->uc_entry_expire / HZ); -} - -static int lprocfs_wr_group_expire(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - int val, rc; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val > 5) - obd->u.mds.mds_group_hash->uc_entry_expire = val * HZ; - else - CERROR("invalid expire time %u for group cache\n", val); - - return count; -} - -static int lprocfs_rd_group_acquire_expire(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct obd_device *obd = data; - - *eof = 1; - return snprintf(page, count, "%lu\n", - obd->u.mds.mds_group_hash->uc_acquire_expire / HZ); -} - -static int lprocfs_wr_group_acquire_expire(struct file *file,const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - int val, rc = 0; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val > 2) - obd->u.mds.mds_group_hash->uc_acquire_expire = val * HZ; - - return count; -} - -static int lprocfs_rd_group_upcall(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct obd_device *obd = data; - - *eof = 1; - return snprintf(page, count, "%s\n", - obd->u.mds.mds_group_hash->uc_upcall); -} - -static int lprocfs_wr_group_upcall(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - struct upcall_cache *hash = obd->u.mds.mds_group_hash; - char kernbuf[UC_CACHE_UPCALL_MAXPATH] = { '\0' }; - - if (count >= UC_CACHE_UPCALL_MAXPATH) { - CERROR("%s: group upcall too long\n", obd->obd_name); - return -EINVAL; - } - - if (copy_from_user(kernbuf, buffer, - min(count, UC_CACHE_UPCALL_MAXPATH - 1))) - return -EFAULT; - - /* Remove any extraneous bits from the upcall (e.g. linefeeds) */ - sscanf(kernbuf, "%s", hash->uc_upcall); - - if (strcmp(hash->uc_name, obd->obd_name) != 0) - CWARN("%s: write to upcall name %s for MDS %s\n", - obd->obd_name, hash->uc_upcall, obd->obd_name); - CWARN("%s: group upcall set to %s\n", obd->obd_name, hash->uc_upcall); - - return count; -} - -static int lprocfs_wr_group_flush(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - - upcall_cache_flush_idle(obd->u.mds.mds_group_hash); - return count; -} -#endif - static int lprocfs_wr_atime_diff(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -336,23 +180,6 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "evict_ost_nids", lprocfs_mds_rd_evictostnids, lprocfs_mds_wr_evictostnids, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, -#ifdef HAVE_QUOTA_SUPPORT - { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0 }, - { "quota_btune_sz", lprocfs_rd_btune, lprocfs_wr_btune, 0 }, - { "quota_iunit_sz", lprocfs_rd_iunit, lprocfs_wr_iunit, 0 }, - { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0 }, - { "quota_type", lprocfs_rd_type, lprocfs_wr_type, 0 }, -#endif -#if 0 - { "group_expire_interval", lprocfs_rd_group_expire, - lprocfs_wr_group_expire, 0}, - { "group_acquire_expire", lprocfs_rd_group_acquire_expire, - lprocfs_wr_group_acquire_expire, 0}, - { "group_upcall", lprocfs_rd_group_upcall, - lprocfs_wr_group_upcall, 0}, - { "group_flush", 0, lprocfs_wr_group_flush, 0}, - { "group_info", 0, lprocfs_wr_group_info, 0 }, -#endif { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, { 0 } }; diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index cecf56d..a39e495 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -157,7 +156,7 @@ out_pop: int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { struct mds_obd *mds = &exp->exp_obd->u.mds; struct inode *parent_inode = mds->mds_objects_dir->d_inode; diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 9cf0e71..c98aefa 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -72,12 +72,10 @@ int mds_obd_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); + struct obd_export *md_exp, void *capa); /* mds/handler.c */ extern struct lvfs_callback_ops mds_lvfs_ops; -/* quota stuff */ -extern quota_interface_t *mds_quota_interface_ref; /* mds/lproc_mds.c */ enum { diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 7d0238e..1968b9c 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -478,7 +478,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX | OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_FID | - OBD_CONNECT_AT; + OBD_CONNECT_AT | OBD_CONNECT_CHANGE_QS; #ifdef HAVE_LRU_RESIZE_SUPPORT data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; #endif @@ -799,7 +799,5 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); - lquota_recovery(mds_quota_interface_ref, obd); - RETURN(rc); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index fd12681..50869dc 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -67,6 +67,9 @@ #include #include #include "mdt_internal.h" +#ifdef HAVE_QUOTA_SUPPORT +# include +#endif #include #include @@ -309,7 +312,8 @@ static int mdt_getstatus(struct mdt_thread_info *info) repbody->valid |= OBD_MD_FLID; - if (mdt->mdt_opts.mo_mds_capa) { + if (mdt->mdt_opts.mo_mds_capa && + info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) { struct mdt_object *root; struct lustre_capa *capa; @@ -320,7 +324,6 @@ static int mdt_getstatus(struct mdt_thread_info *info) capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1); LASSERT(capa); capa->lc_opc = CAPA_OPC_MDS_DEFAULT; - rc = mo_capa_get(info->mti_env, mdt_object_child(root), capa, 0); mdt_object_put(info->mti_env, root); @@ -432,7 +435,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, struct md_object *next = mdt_object_child(o); const struct mdt_body *reqbody = info->mti_body; struct ptlrpc_request *req = mdt_info_req(info); - struct mdt_export_data *med = &req->rq_export->exp_mdt_data; struct md_attr *ma = &info->mti_attr; struct lu_attr *la = &ma->ma_attr; struct req_capsule *pill = info->mti_pill; @@ -537,7 +539,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, repbody->max_cookiesize); } - if (med->med_rmtclient && (reqbody->valid & OBD_MD_FLRMTPERM)) { + if (exp_connect_rmtclient(info->mti_exp) && + reqbody->valid & OBD_MD_FLRMTPERM) { void *buf = req_capsule_server_get(pill, &RMF_ACL); /* mdt_getattr_lock only */ @@ -579,8 +582,9 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, } #endif - if ((reqbody->valid & OBD_MD_FLMDSCAPA) && - info->mti_mdt->mdt_opts.mo_mds_capa) { + if (reqbody->valid & OBD_MD_FLMDSCAPA && + info->mti_mdt->mdt_opts.mo_mds_capa && + info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) { struct lustre_capa *capa; capa = req_capsule_server_get(pill, &RMF_CAPA1); @@ -596,7 +600,6 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, static int mdt_renew_capa(struct mdt_thread_info *info) { - struct mdt_device *mdt = info->mti_mdt; struct mdt_object *obj = info->mti_object; struct mdt_body *body; struct lustre_capa *capa, *c; @@ -607,7 +610,8 @@ static int mdt_renew_capa(struct mdt_thread_info *info) * return directly, client will find body->valid OBD_MD_FLOSSCAPA * flag not set. */ - if (!obj || !mdt->mdt_opts.mo_mds_capa) + if (!obj || !info->mti_mdt->mdt_opts.mo_oss_capa || + !(info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA)) RETURN(0); body = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); @@ -1116,16 +1120,14 @@ static int mdt_connect(struct mdt_thread_info *info) if (rc == 0) { LASSERT(req->rq_export != NULL); info->mti_mdt = mdt_dev(req->rq_export->exp_obd->obd_lu_dev); - rc = mdt_init_idmap(info); - if (rc != 0) { - struct obd_export *exp; - - exp = req->rq_export; - /* if mdt_init_idmap failed, revocation for connect */ - obd_disconnect(class_export_get(exp)); - } - } else + rc = mdt_init_sec_level(info); + if (rc == 0) + rc = mdt_init_idmap(info); + if (rc != 0) + obd_disconnect(class_export_get(req->rq_export)); + } else { rc = err_serious(rc); + } return rc; } @@ -1262,7 +1264,7 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page, memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen)); lname = mdt_name(info->mti_env, name, le16_to_cpu(ent->lde_namelen)); - ma->ma_attr_flags |= MDS_PERM_BYPASS; + ma->ma_attr_flags |= (MDS_PERM_BYPASS | MDS_QUOTA_IGNORE); rc = mdo_name_insert(info->mti_env, md_object_next(&object->mot_obj), lname, lf, ma); @@ -1633,15 +1635,134 @@ static int mdt_sync(struct mdt_thread_info *info) RETURN(rc); } +#ifdef HAVE_QUOTA_SUPPORT static int mdt_quotacheck_handle(struct mdt_thread_info *info) { - return err_serious(-EOPNOTSUPP); + struct obd_quotactl *oqctl; + struct req_capsule *pill = info->mti_pill; + struct obd_export *exp = info->mti_exp; + struct md_device *next = info->mti_mdt->mdt_child; + int rc; + ENTRY; + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_QUOTACHECK_NET)) + RETURN(0); + + oqctl = req_capsule_client_get(pill, &RMF_OBD_QUOTACTL); + if (oqctl == NULL) + RETURN(-EPROTO); + + /* remote client has no permission for quotacheck */ + if (unlikely(exp_connect_rmtclient(exp))) + RETURN(-EPERM); + + rc = req_capsule_server_pack(pill); + if (rc) + RETURN(rc); + + rc = next->md_ops->mdo_quota.mqo_check(info->mti_env, next, exp, + oqctl->qc_type); + RETURN(rc); } static int mdt_quotactl_handle(struct mdt_thread_info *info) { - return err_serious(-EOPNOTSUPP); + struct obd_quotactl *oqctl, *repoqc; + struct req_capsule *pill = info->mti_pill; + struct obd_export *exp = info->mti_exp; + struct md_device *next = info->mti_mdt->mdt_child; + const struct md_quota_operations *mqo = &next->md_ops->mdo_quota; + int id, rc; + ENTRY; + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_QUOTACTL_NET)) + RETURN(0); + + oqctl = req_capsule_client_get(pill, &RMF_OBD_QUOTACTL); + if (oqctl == NULL) + RETURN(-EPROTO); + + id = oqctl->qc_id; + if (exp_connect_rmtclient(exp)) { + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = mdt_req2med(req); + struct lustre_idmap_table *idmap = med->med_idmap; + + if (unlikely(oqctl->qc_cmd != Q_GETQUOTA && + oqctl->qc_cmd != Q_GETINFO)) + RETURN(-EPERM); + + + if (oqctl->qc_type == USRQUOTA) + id = lustre_idmap_lookup_uid(NULL, idmap, 0, + oqctl->qc_id); + else if (oqctl->qc_type == GRPQUOTA) + id = lustre_idmap_lookup_gid(NULL, idmap, 0, + oqctl->qc_id); + else + RETURN(-EINVAL); + + if (id == CFS_IDMAP_NOTFOUND) { + CDEBUG(D_QUOTA, "no mapping for id %u\n", + oqctl->qc_id); + RETURN(-EACCES); + } + } + + rc = req_capsule_server_pack(pill); + if (rc) + RETURN(rc); + + repoqc = req_capsule_server_get(pill, &RMF_OBD_QUOTACTL); + LASSERT(repoqc != NULL); + + switch (oqctl->qc_cmd) { + case Q_QUOTAON: + rc = mqo->mqo_on(info->mti_env, next, oqctl->qc_type, id); + break; + case Q_QUOTAOFF: + rc = mqo->mqo_off(info->mti_env, next, oqctl->qc_type, id); + break; + case Q_SETINFO: + rc = mqo->mqo_setinfo(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqinfo); + break; + case Q_GETINFO: + rc = mqo->mqo_getinfo(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqinfo); + break; + case Q_SETQUOTA: + rc = mqo->mqo_setquota(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqblk); + break; + case Q_GETQUOTA: + rc = mqo->mqo_getquota(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqblk); + break; + case Q_GETOINFO: + rc = mqo->mqo_getoinfo(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqinfo); + break; + case Q_GETOQUOTA: + rc = mqo->mqo_getoquota(info->mti_env, next, oqctl->qc_type, id, + &oqctl->qc_dqblk); + break; + case LUSTRE_Q_INVALIDATE: + rc = mqo->mqo_invalidate(info->mti_env, next, oqctl->qc_type); + break; + case LUSTRE_Q_FINVALIDATE: + rc = mqo->mqo_finvalidate(info->mti_env, next, oqctl->qc_type); + break; + default: + CERROR("unsupported mdt_quotactl command: %d\n", + oqctl->qc_cmd); + RETURN(-EFAULT); + } + + *repoqc = *oqctl; + RETURN(rc); } +#endif /* * OBD PING and other handlers. @@ -2381,6 +2502,15 @@ static void mdt_thread_info_init(struct ptlrpc_request *req, info->mti_env = req->rq_svc_thread->t_env; ci = md_capainfo(info->mti_env); memset(ci, 0, sizeof *ci); + if (req->rq_export) { + if (exp_connect_rmtclient(req->rq_export)) + ci->mc_auth = LC_ID_CONVERT; + else if (req->rq_export->exp_connect_flags & + OBD_CONNECT_MDS_CAPA) + ci->mc_auth = LC_ID_PLAIN; + else + ci->mc_auth = LC_ID_NONE; + } info->mti_fail_id = OBD_FAIL_MDS_ALL_REPLY_NET; info->mti_transno = lustre_msg_get_transno(req->rq_reqmsg); @@ -3803,7 +3933,7 @@ err_mdt_svc: static void mdt_stack_fini(const struct lu_env *env, struct mdt_device *m, struct lu_device *top) { - struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(m); struct lustre_cfg_bufs *bufs; struct lustre_cfg *lcfg; struct mdt_thread_info *info; @@ -3951,7 +4081,7 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) struct md_device *next = m->mdt_child; struct lu_device *d = &m->mdt_md_dev.md_lu_dev; struct lu_site *ls = d->ld_site; - struct obd_device *obd = m->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(m); ENTRY; /* At this point, obd exports might still be on the "obd_zombie_exports" @@ -3972,8 +4102,10 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) target_recovery_fini(obd); mdt_stop_ptlrpc_service(m); obd_zombie_barrier(); +#ifdef HAVE_QUOTA_SUPPORT + next->md_ops->mdo_quota.mqo_cleanup(env, next); +#endif mdt_fs_cleanup(env, m); - upcall_cache_cleanup(m->mdt_identity_cache); m->mdt_identity_cache = NULL; @@ -4018,6 +4150,8 @@ static void fsoptions_to_mdt_flags(struct mdt_device *m, char *options) { char *p = options; + m->mdt_opts.mo_mds_capa = 1; + m->mdt_opts.mo_oss_capa = 1; #ifdef CONFIG_FS_POSIX_ACL /* ACLs should be enabled by default (b=13829) */ m->mdt_opts.mo_acl = 1; @@ -4065,11 +4199,14 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, struct obd_device *obd; const char *dev = lustre_cfg_string(cfg, 0); const char *num = lustre_cfg_string(cfg, 2); - struct lustre_mount_info *lmi; + struct lustre_mount_info *lmi = NULL; struct lustre_sb_info *lsi; struct lu_site *s; struct md_site *mite; const char *identity_upcall = "NONE"; +#ifdef HAVE_QUOTA_SUPPORT + struct md_device *next; +#endif int rc; ENTRY; @@ -4107,7 +4244,6 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, } else { lsi = s2lsi(lmi->lmi_sb); fsoptions_to_mdt_flags(m, lsi->lsi_lmd->lmd_opts); - server_put_mount_2(dev, lmi->lmi_mnt); } rwlock_init(&m->mdt_sptlrpc_lock); @@ -4123,7 +4259,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, OBD_ALLOC_PTR(mite); if (mite == NULL) - RETURN(-ENOMEM); + GOTO(err_lmi, rc = -ENOMEM); s = &mite->ms_lu; @@ -4229,11 +4365,21 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, if (rc) GOTO(err_capa, rc); +#ifdef HAVE_QUOTA_SUPPORT + next = m->mdt_child; + rc = next->md_ops->mdo_quota.mqo_setup(env, next, lmi->lmi_mnt); + if (rc) + GOTO(err_fs_cleanup, rc); +#endif + + server_put_mount_2(dev, lmi->lmi_mnt); + lmi = NULL; + target_recovery_init(obd, mdt_recovery_handle); rc = mdt_start_ptlrpc_service(m); if (rc) - GOTO(err_fs_cleanup, rc); + GOTO(err_recovery, rc); ping_evictor_start(); @@ -4257,8 +4403,12 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, err_stop_service: ping_evictor_stop(); mdt_stop_ptlrpc_service(m); -err_fs_cleanup: +err_recovery: target_recovery_fini(obd); +#ifdef HAVE_QUOTA_SUPPORT + next->md_ops->mdo_quota.mqo_cleanup(env, next); +err_fs_cleanup: +#endif mdt_fs_cleanup(env, m); err_capa: cfs_timer_disarm(&m->mdt_ck_timer); @@ -4284,6 +4434,9 @@ err_fini_site: lu_site_fini(s); err_free_site: OBD_FREE_PTR(mite); +err_lmi: + if (lmi) + server_put_mount_2(dev, lmi->lmi_mnt); return (rc); } @@ -4333,7 +4486,7 @@ static int mdt_process_config(const struct lu_env *env, lprocfs_mdt_init_vars(&lvars); rc = class_process_proc_param(PARAM_MDT, lvars.obd_vars, cfg, obd); - if (rc == -ENOSYS) + if (rc > 0 || rc == -ENOSYS) /* we don't understand; pass it on */ rc = next->ld_ops->ldo_process_config(env, next, cfg); break; @@ -4434,8 +4587,6 @@ static int mdt_connect_internal(struct obd_export *exp, struct mdt_device *mdt, struct obd_connect_data *data) { - __u64 flags; - if (data != NULL) { data->ocd_connect_flags &= MDT_CONNECT_SUPPORTED; data->ocd_ibits_known &= MDS_INODELOCK_FULL; @@ -4453,12 +4604,6 @@ static int mdt_connect_internal(struct obd_export *exp, if (!mdt->mdt_opts.mo_user_xattr) data->ocd_connect_flags &= ~OBD_CONNECT_XATTR; - if (!mdt->mdt_opts.mo_mds_capa) - data->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA; - - if (!mdt->mdt_opts.mo_oss_capa) - data->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA; - spin_lock(&exp->exp_lock); exp->exp_connect_flags = data->ocd_connect_flags; spin_unlock(&exp->exp_lock); @@ -4475,28 +4620,6 @@ static int mdt_connect_internal(struct obd_export *exp, } #endif - flags = OBD_CONNECT_LCL_CLIENT | OBD_CONNECT_RMT_CLIENT; - if ((exp->exp_connect_flags & flags) == flags) { - CWARN("%s: both local and remote client flags are set\n", - mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); - return -EBADE; - } - - if (mdt->mdt_opts.mo_mds_capa && - ((exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) == 0)) { - CWARN("%s: MDS requires capability support, but client not\n", - mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); - return -EBADE; - } - - if (mdt->mdt_opts.mo_oss_capa && - ((exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA) == 0)) { - CWARN("%s: MDS requires OSS capability support, " - "but client not\n", - mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); - return -EBADE; - } - if ((exp->exp_connect_flags & OBD_CONNECT_FID) == 0) { CWARN("%s: MDS requires FID support, but client not\n", mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name); @@ -4707,7 +4830,7 @@ static int mdt_destroy_export(struct obd_export *export) ENTRY; med = &export->exp_mdt_data; - if (med->med_rmtclient) + if (exp_connect_rmtclient(export)) mdt_cleanup_idmap(med); target_destroy_export(export); @@ -4814,6 +4937,10 @@ static int mdt_upcall(const struct lu_env *env, struct md_device *md, CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n", m->mdt_max_mdsize, m->mdt_max_cookiesize); mdt_allow_cli(m, CONFIG_SYNC); +#ifdef HAVE_QUOTA_SUPPORT + if (md->md_lu_dev.ld_obd->obd_recovering == 0) + next->md_ops->mdo_quota.mqo_recovery(env, next); +#endif break; case MD_NO_TRANS: mti = lu_context_key_get(&env->le_ctx, &mdt_thread_key); @@ -4836,11 +4963,21 @@ static int mdt_obd_notify(struct obd_device *host, struct obd_device *watched, enum obd_notify_event ev, void *data) { + struct mdt_device *mdt = mdt_dev(host->obd_lu_dev); +#ifdef HAVE_QUOTA_SUPPORT + struct md_device *next = mdt->mdt_child; +#endif ENTRY; switch (ev) { case OBD_NOTIFY_CONFIG: - mdt_allow_cli(mdt_dev(host->obd_lu_dev), (unsigned long)data); + mdt_allow_cli(mdt, (unsigned long)data); + +#ifdef HAVE_QUOTA_SUPPORT + /* quota_type has been processed, we can now handle + * incoming quota requests */ + next->md_ops->mdo_quota.mqo_notify(NULL, next); +#endif break; default: CDEBUG(D_INFO, "Unhandled notification %#x\n", ev); @@ -4888,7 +5025,10 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) { struct lu_device *ld = md2lu_dev(mdt->mdt_child); - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); +#ifdef HAVE_QUOTA_SUPPORT + struct md_device *next = mdt->mdt_child; +#endif int rc, lost; ENTRY; /* if some clients didn't participate in recovery then we can possibly @@ -4897,6 +5037,9 @@ int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) mdt_seq_adjust(env, mdt, lost); rc = ld->ld_ops->ldo_recovery_complete(env, ld); +#ifdef HAVE_QUOTA_SUPPORT + next->md_ops->mdo_quota.mqo_recovery(env, next); +#endif RETURN(rc); } @@ -5118,8 +5261,10 @@ DEF_MDT_HNDL_F(HABEO_CORPUS, DONE_WRITING, mdt_done_writing), DEF_MDT_HNDL_F(0 |HABEO_REFERO, PIN, mdt_pin), DEF_MDT_HNDL_0(0, SYNC, mdt_sync), DEF_MDT_HNDL_F(HABEO_CORPUS|HABEO_REFERO, IS_SUBDIR, mdt_is_subdir), +#ifdef HAVE_QUOTA_SUPPORT DEF_MDT_HNDL_F(0, QUOTACHECK, mdt_quotacheck_handle), DEF_MDT_HNDL_F(0, QUOTACTL, mdt_quotactl_handle) +#endif }; #define DEF_OBD_HNDL(flags, name, fn) \ diff --git a/lustre/mdt/mdt_identity.c b/lustre/mdt/mdt_identity.c index 3243e65..21cd41b 100644 --- a/lustre/mdt/mdt_identity.c +++ b/lustre/mdt/mdt_identity.c @@ -285,10 +285,8 @@ __u32 mdt_identity_get_perm(struct md_identity *identity, int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o, void *buf) { - struct ptlrpc_request *req = mdt_info_req(info); struct md_ucred *uc = mdt_ucred(info); struct md_object *next = mdt_object_child(o); - struct mdt_export_data *med = mdt_req2med(req); struct mdt_remote_perm *perm = buf; ENTRY; @@ -296,7 +294,7 @@ int mdt_pack_remote_perm(struct mdt_thread_info *info, struct mdt_object *o, /* remote client request always pack ptlrpc_user_desc! */ LASSERT(perm); - if (!med->med_rmtclient) + if (!exp_connect_rmtclient(info->mti_exp)) RETURN(-EBADE); if ((uc->mu_valid != UCRED_OLD) && (uc->mu_valid != UCRED_NEW)) diff --git a/lustre/mdt/mdt_idmap.c b/lustre/mdt/mdt_idmap.c index 08f38c0..162a02f 100644 --- a/lustre/mdt/mdt_idmap.c +++ b/lustre/mdt/mdt_idmap.c @@ -76,12 +76,24 @@ #include "mdt_internal.h" -int mdt_init_idmap(struct mdt_thread_info *info) +#define mdt_init_sec_none(reply, exp) \ +do { \ + reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_RMT_CLIENT_FORCE | \ + OBD_CONNECT_MDS_CAPA | \ + OBD_CONNECT_OSS_CAPA); \ + spin_lock(&exp->exp_lock); \ + exp->exp_connect_flags = reply->ocd_connect_flags; \ + spin_unlock(&exp->exp_lock); \ +} while (0) + +int mdt_init_sec_level(struct mdt_thread_info *info) { + struct mdt_device *mdt = info->mti_mdt; struct ptlrpc_request *req = mdt_info_req(info); char *client = libcfs_nid2str(req->rq_peer.nid); - struct mdt_export_data *med = mdt_req2med(req); - struct obd_device *obd = req->rq_export->exp_obd; + struct obd_export *exp = req->rq_export; + struct obd_device *obd = exp->exp_obd; struct obd_connect_data *data, *reply; int rc = 0, remote; ENTRY; @@ -91,26 +103,116 @@ int mdt_init_idmap(struct mdt_thread_info *info) if (data == NULL || reply == NULL) RETURN(-EFAULT); - if (!req->rq_auth_gss || req->rq_auth_usr_mdt) { - med->med_rmtclient = 0; - reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; + /* connection from MDT is always trusted */ + if (req->rq_auth_usr_mdt) { + mdt_init_sec_none(reply, exp); RETURN(0); } - remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT; + /* no GSS support case */ + if (!req->rq_auth_gss) { + if (mdt->mdt_sec_level > LUSTRE_SEC_NONE) { + CWARN("client %s -> target %s does not user GSS, " + "can not run under security level %d.\n", + client, obd->obd_name, mdt->mdt_sec_level); + RETURN(-EACCES); + } else { + mdt_init_sec_none(reply, exp); + RETURN(0); + } + } + + /* old version case */ + if (unlikely(!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) || + !(data->ocd_connect_flags & OBD_CONNECT_MDS_CAPA) || + !(data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA))) { + if (mdt->mdt_sec_level > LUSTRE_SEC_NONE) { + CWARN("client %s -> target %s uses old version, " + "can not run under security level %d.\n", + client, obd->obd_name, mdt->mdt_sec_level); + RETURN(-EACCES); + } else { + CWARN("client %s -> target %s uses old version, " + "run under security level %d.\n", + client, obd->obd_name, mdt->mdt_sec_level); + mdt_init_sec_none(reply, exp); + RETURN(0); + } + } + remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT_FORCE; if (remote) { - med->med_rmtclient = 1; if (!req->rq_auth_remote) CDEBUG(D_SEC, "client (local realm) %s -> target %s " - "asked to be remote!\n", client, obd->obd_name); + "asked to be remote.\n", client, obd->obd_name); } else if (req->rq_auth_remote) { - med->med_rmtclient = 1; - CDEBUG(D_SEC, "client (remote realm) %s -> target %s forced " - "to be remote!\n", client, obd->obd_name); + remote = 1; + CDEBUG(D_SEC, "client (remote realm) %s -> target %s is set " + "as remote by default.\n", client, obd->obd_name); + } + + if (remote) { + if (!mdt->mdt_opts.mo_oss_capa) { + CDEBUG(D_SEC, "client %s -> target %s is set as remote," + " but OSS capabilities are not enabled: %d.\n", + client, obd->obd_name, mdt->mdt_opts.mo_oss_capa); + RETURN(-EACCES); + } + } else { + if (req->rq_auth_uid == INVALID_UID) { + CDEBUG(D_SEC, "client %s -> target %s: user is not " + "authenticated!\n", client, obd->obd_name); + RETURN(-EACCES); + } } - if (med->med_rmtclient) { + switch (mdt->mdt_sec_level) { + case LUSTRE_SEC_NONE: + if (!remote) { + mdt_init_sec_none(reply, exp); + break; + } else { + CDEBUG(D_SEC, "client %s -> target %s is set as remote, " + "can not run under security level %d.\n", + client, obd->obd_name, mdt->mdt_sec_level); + RETURN(-EACCES); + } + case LUSTRE_SEC_REMOTE: + if (!remote) + mdt_init_sec_none(reply, exp); + break; + case LUSTRE_SEC_ALL: + if (!remote) { + reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT | + OBD_CONNECT_RMT_CLIENT_FORCE); + if (!mdt->mdt_opts.mo_mds_capa) + reply->ocd_connect_flags &= ~OBD_CONNECT_MDS_CAPA; + if (!mdt->mdt_opts.mo_oss_capa) + reply->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA; + + spin_lock(&exp->exp_lock); + exp->exp_connect_flags = reply->ocd_connect_flags; + spin_unlock(&exp->exp_lock); + } + break; + default: + RETURN(-EINVAL); + } + + RETURN(rc); +} + +int mdt_init_idmap(struct mdt_thread_info *info) +{ + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = mdt_req2med(req); + struct obd_export *exp = req->rq_export; + char *client = libcfs_nid2str(req->rq_peer.nid); + struct obd_device *obd = exp->exp_obd; + int rc = 0; + ENTRY; + + if (exp_connect_rmtclient(exp)) { down(&med->med_idmap_sem); if (!med->med_idmap) med->med_idmap = lustre_idmap_init(); @@ -131,28 +233,16 @@ int mdt_init_idmap(struct mdt_thread_info *info) RETURN(-ENOMEM); } - reply->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; CDEBUG(D_SEC, "client %s -> target %s is remote.\n", client, obd->obd_name); - /* NB, MDS_CONNECT establish root idmap too! */ rc = mdt_handle_idmap(info); - } else { - if (req->rq_auth_uid == INVALID_UID) { - CDEBUG(D_SEC, "client %s -> target %s: user is not " - "authenticated!\n", client, obd->obd_name); - RETURN(-EACCES); - } - reply->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; } - RETURN(rc); } void mdt_cleanup_idmap(struct mdt_export_data *med) { - LASSERT(med->med_rmtclient); - down(&med->med_idmap_sem); if (med->med_idmap != NULL) { lustre_idmap_fini(med->med_idmap); @@ -185,7 +275,7 @@ int mdt_handle_idmap(struct mdt_thread_info *info) RETURN(0); med = mdt_req2med(req); - if (!med->med_rmtclient) + if (!exp_connect_rmtclient(info->mti_exp)) RETURN(0); opc = lustre_msg_get_opc(req->rq_reqmsg); @@ -262,7 +352,7 @@ int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *req, gid_t gid, fsgid; /* Only remote client need desc_to_idmap. */ - if (!med->med_rmtclient) + if (!exp_connect_rmtclient(req->rq_export)) return 0; uid = lustre_idmap_lookup_uid(NULL, idmap, 0, pud->pud_uid); @@ -317,7 +407,7 @@ void mdt_body_reverse_idmap(struct mdt_thread_info *info, struct mdt_body *body) struct mdt_export_data *med = mdt_req2med(req); struct lustre_idmap_table *idmap = med->med_idmap; - if (!med->med_rmtclient) + if (!exp_connect_rmtclient(info->mti_exp)) return; if (body->valid & OBD_MD_FLUID) { @@ -366,7 +456,7 @@ int mdt_fix_attr_ucred(struct mdt_thread_info *info, __u32 op) * done in cmm/mdd layer, here set all cases as uc->mu_fsgid. */ if ((attr->la_valid & LA_GID) && (attr->la_gid != -1)) attr->la_gid = uc->mu_fsgid; - } else if (med->med_rmtclient) { + } else if (exp_connect_rmtclient(info->mti_exp)) { /* NB: -1 case will be handled by mdt_fix_attr() later. */ if ((attr->la_valid & LA_UID) && (attr->la_uid != -1)) { uid_t uid = lustre_idmap_lookup_uid(uc, idmap, 0, diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 4251858..b5aaecb 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -71,6 +71,7 @@ #include #include #include +#include static inline __u64 lcd_last_transno(struct lsd_client_data *lcd) { @@ -176,6 +177,7 @@ struct mdt_device { cfs_proc_dir_entry_t *mdt_proc_entry; struct lprocfs_stats *mdt_stats; + int mdt_sec_level; }; #define MDT_SERVICE_WATCHDOG_FACTOR (2000) @@ -312,7 +314,7 @@ struct mdt_thread_info { /* * XXX: Part Three: - * The following members will be filled explictly + * The following members will be filled explicitly * with zero in mdt_reint_unpack(), because they are only used * by reint requests (including mdt_reint_open()). */ @@ -369,6 +371,7 @@ struct mdt_thread_info { /* Ops object filename */ struct lu_name mti_name; + struct md_attr mti_tmp_attr; }; typedef void (*mdt_cb_t)(const struct mdt_device *mdt, __u64 transno, @@ -599,21 +602,16 @@ int mdt_init_ucred_reint(struct mdt_thread_info *); void mdt_exit_ucred(struct mdt_thread_info *); /* mdt_idmap.c */ +int mdt_init_sec_level(struct mdt_thread_info *); int mdt_init_idmap(struct mdt_thread_info *); - void mdt_cleanup_idmap(struct mdt_export_data *); - int mdt_handle_idmap(struct mdt_thread_info *); - int ptlrpc_user_desc_do_idmap(struct ptlrpc_request *, struct ptlrpc_user_desc *); - void mdt_body_reverse_idmap(struct mdt_thread_info *, struct mdt_body *); - int mdt_remote_perm_reverse_idmap(struct ptlrpc_request *, struct mdt_remote_perm *); - int mdt_fix_attr_ucred(struct mdt_thread_info *, __u32); static inline struct mdt_device *mdt_dev(struct lu_device *d) @@ -778,11 +776,11 @@ static inline void mdt_set_capainfo(struct mdt_thread_info *info, int offset, const struct lu_fid *fid, struct lustre_capa *capa) { - struct mdt_device *dev = info->mti_mdt; struct md_capainfo *ci; LASSERT(offset >= 0 && offset <= MD_CAPAINFO_MAX); - if (!dev->mdt_opts.mo_mds_capa) + if (!info->mti_mdt->mdt_opts.mo_mds_capa || + !(info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA)) return; ci = md_capainfo(info->mti_env); @@ -815,5 +813,9 @@ static inline void mdt_dump_capainfo(struct mdt_thread_info *info) } } +static inline struct obd_device *mdt2obd_dev(const struct mdt_device *mdt) +{ + return mdt->mdt_md_dev.md_lu_dev.ld_obd; +} #endif /* __KERNEL__ */ #endif /* _MDT_H */ diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index c2abdf1..d3bbed9 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -92,12 +92,12 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, void *buf) { struct ptlrpc_request *req = mdt_info_req(info); - struct mdt_export_data *med = mdt_req2med(req); struct mdt_device *mdt = info->mti_mdt; struct ptlrpc_user_desc *pud = req->rq_user_desc; struct md_ucred *ucred = mdt_ucred(info); lnet_nid_t peernid = req->rq_peer.nid; __u32 perm = 0; + __u32 remote = exp_connect_rmtclient(info->mti_exp); int setuid; int setgid; int rc = 0; @@ -123,7 +123,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, } /* sanity check: we expect the uid which client claimed is true */ - if (med->med_rmtclient) { + if (remote) { if (req->rq_auth_mapped_uid == INVALID_UID) { CDEBUG(D_SEC, "remote user not mapped, deny access!\n"); RETURN(-EACCES); @@ -153,7 +153,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, } if (is_identity_get_disabled(mdt->mdt_identity_cache)) { - if (med->med_rmtclient) { + if (remote) { CDEBUG(D_SEC, "remote client must run with identity_get " "enabled!\n"); RETURN(-EACCES); @@ -169,7 +169,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, pud->pud_uid); if (IS_ERR(identity)) { if (unlikely(PTR_ERR(identity) == -EREMCHG && - !med->med_rmtclient)) { + !remote)) { ucred->mu_identity = NULL; perm = CFS_SETUID_PERM | CFS_SETGID_PERM | CFS_SETGRP_PERM; @@ -181,8 +181,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, } else { ucred->mu_identity = identity; perm = mdt_identity_get_perm(ucred->mu_identity, - med->med_rmtclient, - peernid); + remote, peernid); } } @@ -211,7 +210,7 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, /* * NB: remote client not allowed to setgroups anyway. */ - if (!med->med_rmtclient && perm & CFS_SETGRP_PERM) { + if (!remote && perm & CFS_SETGRP_PERM) { if (pud->pud_ngroups) { /* setgroups for local client */ ucred->mu_ginfo = groups_alloc(pud->pud_ngroups); @@ -241,11 +240,14 @@ static int new_init_ucred(struct mdt_thread_info *info, ucred_init_type_t type, /* XXX: need to process root_squash here. */ mdt_root_squash(info); - /* remove fs privilege for non-root user */ + /* remove fs privilege for non-root user. */ if (ucred->mu_fsuid) ucred->mu_cap = pud->pud_cap & ~CFS_CAP_FS_MASK; else ucred->mu_cap = pud->pud_cap; + if (remote && !(perm & CFS_RMTOWN_PERM)) + ucred->mu_cap &= ~(CFS_CAP_SYS_RESOURCE_MASK | + CFS_CAP_CHOWN_MASK); ucred->mu_valid = UCRED_NEW; EXIT; @@ -269,13 +271,13 @@ out: int mdt_check_ucred(struct mdt_thread_info *info) { struct ptlrpc_request *req = mdt_info_req(info); - struct mdt_export_data *med = mdt_req2med(req); struct mdt_device *mdt = info->mti_mdt; struct ptlrpc_user_desc *pud = req->rq_user_desc; struct md_ucred *ucred = mdt_ucred(info); struct md_identity *identity = NULL; lnet_nid_t peernid = req->rq_peer.nid; __u32 perm = 0; + __u32 remote = exp_connect_rmtclient(info->mti_exp); int setuid; int setgid; int rc = 0; @@ -290,7 +292,7 @@ int mdt_check_ucred(struct mdt_thread_info *info) /* sanity check: if we use strong authentication, we expect the * uid which client claimed is true */ - if (med->med_rmtclient) { + if (remote) { if (req->rq_auth_mapped_uid == INVALID_UID) { CDEBUG(D_SEC, "remote user not mapped, deny access!\n"); RETURN(-EACCES); @@ -320,7 +322,7 @@ int mdt_check_ucred(struct mdt_thread_info *info) } if (is_identity_get_disabled(mdt->mdt_identity_cache)) { - if (med->med_rmtclient) { + if (remote) { CDEBUG(D_SEC, "remote client must run with identity_get " "enabled!\n"); RETURN(-EACCES); @@ -331,7 +333,7 @@ int mdt_check_ucred(struct mdt_thread_info *info) identity = mdt_identity_get(mdt->mdt_identity_cache, pud->pud_uid); if (IS_ERR(identity)) { if (unlikely(PTR_ERR(identity) == -EREMCHG && - !med->med_rmtclient)) { + !remote)) { RETURN(0); } else { CDEBUG(D_SEC, "Deny access without identity: uid %u\n", @@ -340,7 +342,7 @@ int mdt_check_ucred(struct mdt_thread_info *info) } } - perm = mdt_identity_get_perm(identity, med->med_rmtclient, peernid); + perm = mdt_identity_get_perm(identity, remote, peernid); /* find out the setuid/setgid attempt */ setuid = (pud->pud_uid != pud->pud_fsuid); setgid = (pud->pud_gid != pud->pud_fsgid || @@ -404,7 +406,7 @@ static int old_init_ucred(struct mdt_thread_info *info, /* XXX: need to process root_squash here. */ mdt_root_squash(info); - /* remove fs privilege for non-root user */ + /* remove fs privilege for non-root user. */ if (uc->mu_fsuid) uc->mu_cap = body->capability & ~CFS_CAP_FS_MASK; else @@ -444,7 +446,7 @@ static int old_init_ucred_reint(struct mdt_thread_info *info) /* XXX: need to process root_squash here. */ mdt_root_squash(info); - /* remove fs privilege for non-root user */ + /* remove fs privilege for non-root user. */ if (uc->mu_fsuid) uc->mu_cap &= ~CFS_CAP_FS_MASK; uc->mu_valid = UCRED_OLD; @@ -571,6 +573,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, { struct mdt_body *repbody; const struct lu_attr *la = &ma->ma_attr; + int rc; ENTRY; repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); @@ -605,6 +608,21 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, repbody->valid |= OBD_MD_FLCOOKIE; } + if (info->mti_mdt->mdt_opts.mo_oss_capa && + info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA && + repbody->valid & OBD_MD_FLEASIZE) { + struct lustre_capa *capa; + + capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2); + LASSERT(capa); + capa->lc_opc = CAPA_OPC_OSS_DESTROY; + rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0); + if (rc) + RETURN(rc); + + repbody->valid |= OBD_MD_FLOSSCAPA; + } + RETURN(0); } diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 0e95718..37c1375 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -425,6 +425,39 @@ static int lprocfs_mdt_wr_evict_client(struct file *file, const char *buffer, return count; } +static int lprocfs_rd_sec_level(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + + return snprintf(page, count, "%d\n", mdt->mdt_sec_level); +} + +static int lprocfs_wr_sec_level(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val > LUSTRE_SEC_ALL || val < LUSTRE_SEC_NONE) + return -EINVAL; + + if (val == LUSTRE_SEC_SPECIFY) { + CWARN("security level %d will be supported in future.\n", + LUSTRE_SEC_SPECIFY); + return -EINVAL; + } + + mdt->mdt_sec_level = val; + return count; +} + static int lprocfs_rd_cos(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -470,6 +503,8 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { { "site_stats", lprocfs_rd_site_stats, 0, 0 }, { "evict_client", 0, lprocfs_mdt_wr_evict_client, 0 }, { "hash_stats", lprocfs_obd_rd_hash, 0, 0 }, + { "sec_level", lprocfs_rd_sec_level, + lprocfs_wr_sec_level, 0 }, { "commit_on_sharing", lprocfs_rd_cos, lprocfs_wr_cos, 0 }, { 0 } }; diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 4c4690d..916e3e0 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -501,8 +501,8 @@ static int mdt_finish_open(struct mdt_thread_info *info, int flags, int created, struct ldlm_reply *rep) { struct ptlrpc_request *req = mdt_info_req(info); + struct obd_export *exp = req->rq_export; struct mdt_export_data *med = &req->rq_export->exp_mdt_data; - struct mdt_device *mdt = info->mti_mdt; struct md_attr *ma = &info->mti_attr; struct lu_attr *la = &ma->ma_attr; struct mdt_file_data *mfd; @@ -521,7 +521,7 @@ static int mdt_finish_open(struct mdt_thread_info *info, islnk = S_ISLNK(la->la_mode); mdt_pack_attr2body(info, repbody, la, mdt_object_fid(o)); - if (med->med_rmtclient) { + if (exp_connect_rmtclient(exp)) { void *buf = req_capsule_server_get(info->mti_pill, &RMF_ACL); rc = mdt_pack_remote_perm(info, o, buf); @@ -534,7 +534,7 @@ static int mdt_finish_open(struct mdt_thread_info *info, } } #ifdef CONFIG_FS_POSIX_ACL - else if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL) { + else if (exp->exp_connect_flags & OBD_CONNECT_ACL) { const struct lu_env *env = info->mti_env; struct md_object *next = mdt_object_child(o); struct lu_buf *buf = &info->mti_buf; @@ -564,26 +564,26 @@ static int mdt_finish_open(struct mdt_thread_info *info, } #endif - if (mdt->mdt_opts.mo_mds_capa) { + if (info->mti_mdt->mdt_opts.mo_mds_capa && + exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) { struct lustre_capa *capa; capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1); LASSERT(capa); capa->lc_opc = CAPA_OPC_MDS_DEFAULT; - capa->lc_uid = 0; rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0); if (rc) RETURN(rc); repbody->valid |= OBD_MD_FLMDSCAPA; } - if (mdt->mdt_opts.mo_oss_capa && + if (info->mti_mdt->mdt_opts.mo_oss_capa && + exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA && S_ISREG(lu_object_attr(&o->mot_obj.mo_lu))) { struct lustre_capa *capa; capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2); LASSERT(capa); capa->lc_opc = CAPA_OPC_OSS_DEFAULT | capa_open_opc(flags); - capa->lc_uid = 0; rc = mo_capa_get(info->mti_env, mdt_object_child(o), capa, 0); if (rc) RETURN(rc); diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c index 4853c3e..1286919 100644 --- a/lustre/mdt/mdt_recovery.c +++ b/lustre/mdt/mdt_recovery.c @@ -101,7 +101,7 @@ int mdt_record_write(const struct lu_env *env, LASSERTF(dt != NULL, "dt is NULL when we want to write record\n"); LASSERT(th != NULL); - rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA); + rc = dt->do_body_ops->dbo_write(env, dt, buf, pos, th, BYPASS_CAPA, 1); if (rc == buf->lb_len) rc = 0; else if (rc >= 0) @@ -329,7 +329,7 @@ static int mdt_clients_data_init(const struct lu_env *env, { struct lr_server_data *lsd = &mdt->mdt_lsd; struct lsd_client_data *lcd = NULL; - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); loff_t off; int cl_idx; int rc = 0; @@ -423,7 +423,7 @@ static int mdt_server_data_init(const struct lu_env *env, { struct lr_server_data *lsd = &mdt->mdt_lsd; struct lsd_client_data *lcd = NULL; - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); struct mdt_thread_info *mti; struct dt_object *obj; struct lu_attr *la; @@ -561,7 +561,7 @@ static int mdt_server_data_update(const struct lu_env *env, void mdt_cb_new_client(const struct mdt_device *mdt, __u64 transno, void *data, int err) { - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); target_client_add_cb(obd, transno, data, err); } @@ -573,7 +573,7 @@ int mdt_client_new(const struct lu_env *env, struct mdt_device *mdt) struct mdt_export_data *med; struct lsd_client_data *lcd; struct lr_server_data *lsd = &mdt->mdt_lsd; - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); struct thandle *th; loff_t off; int rc; @@ -649,7 +649,7 @@ int mdt_client_add(const struct lu_env *env, struct mdt_thread_info *mti; struct mdt_export_data *med; unsigned long *bitmap = mdt->mdt_client_bitmap; - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); struct lr_server_data *lsd = &mdt->mdt_lsd; int rc = 0; ENTRY; @@ -691,7 +691,7 @@ int mdt_client_del(const struct lu_env *env, struct mdt_device *mdt) struct mdt_thread_info *mti; struct mdt_export_data *med; struct lsd_client_data *lcd; - struct obd_device *obd = mdt->mdt_md_dev.md_lu_dev.ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); struct thandle *th; loff_t off; int rc = 0; @@ -918,7 +918,7 @@ static int mdt_txn_commit_cb(const struct lu_env *env, struct thandle *txn, void *cookie) { struct mdt_device *mdt = cookie; - struct obd_device *obd = md2lu_dev(&mdt->mdt_md_dev)->ld_obd; + struct obd_device *obd = mdt2obd_dev(mdt); struct mdt_txn_info *txi; int i; diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index d42e20f..2fb2fde 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -80,7 +80,8 @@ static int mdt_create_pack_capa(struct mdt_thread_info *info, int rc, if (repbody->valid & OBD_MD_FLMDSCAPA) RETURN(rc); - if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa) { + if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa && + info->mti_exp->exp_connect_flags & OBD_CONNECT_MDS_CAPA) { struct lustre_capa *capa; capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA1); @@ -291,7 +292,6 @@ out_unlock: static int mdt_reint_setattr(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { - struct mdt_device *mdt = info->mti_mdt; struct md_attr *ma = &info->mti_attr; struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); @@ -387,7 +387,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo)); - if (mdt->mdt_opts.mo_oss_capa && + if (info->mti_mdt->mdt_opts.mo_oss_capa && + info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA && S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) && (ma->ma_attr.la_valid & LA_SIZE)) { struct lustre_capa *capa; diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index 47cce00..5a93bfe 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -128,6 +128,8 @@ int mdt_getxattr(struct mdt_thread_info *info) struct mdt_body *repbody = NULL; struct md_object *next; struct lu_buf *buf; + __u32 remote = exp_connect_rmtclient(info->mti_exp); + __u32 perm; int easize, rc; ENTRY; @@ -147,11 +149,11 @@ int mdt_getxattr(struct mdt_thread_info *info) next = mdt_object_child(info->mti_object); if (info->mti_body->valid & OBD_MD_FLRMTRGETFACL) { - __u32 perm = mdt_identity_get_perm(uc->mu_identity, - med->med_rmtclient, - req->rq_peer.nid); + if (unlikely(!remote)) + GOTO(out, rc = err_serious(-EINVAL)); - LASSERT(med->med_rmtclient); + perm = mdt_identity_get_perm(uc->mu_identity, remote, + req->rq_peer.nid); if (!(perm & CFS_RMTACL_PERM)) GOTO(out, rc = err_serious(-EPERM)); @@ -197,7 +199,9 @@ int mdt_getxattr(struct mdt_thread_info *info) if (rc > 0 && flags != CFS_IC_NOTHING) { int rc1; - LASSERT(med->med_rmtclient); + if (unlikely(!remote)) + GOTO(out, rc = -EINVAL); + rc1 = lustre_posix_acl_xattr_id2client(uc, med->med_idmap, (posix_acl_xattr_header *)(buf->lb_buf), @@ -275,7 +279,6 @@ int mdt_reint_setxattr(struct mdt_thread_info *info, struct mdt_lock_handle *unused) { struct ptlrpc_request *req = mdt_info_req(info); - struct mdt_export_data *med = mdt_req2med(req); struct md_ucred *uc = mdt_ucred(info); const char user_string[] = "user."; const char trust_string[] = "trusted."; @@ -294,6 +297,8 @@ int mdt_reint_setxattr(struct mdt_thread_info *info, __u64 lockpart; int rc; posix_acl_xattr_header *new_xattr = NULL; + __u32 remote = exp_connect_rmtclient(info->mti_exp); + __u32 perm; ENTRY; CDEBUG(D_INODE, "setxattr for "DFID"\n", PFID(rr->rr_fid1)); @@ -311,11 +316,11 @@ int mdt_reint_setxattr(struct mdt_thread_info *info, RETURN(rc); if (valid & OBD_MD_FLRMTRSETFACL) { - __u32 perm = mdt_identity_get_perm(uc->mu_identity, - med->med_rmtclient, - req->rq_peer.nid); + if (unlikely(!remote)) + GOTO(out, rc = err_serious(-EINVAL)); - LASSERT(med->med_rmtclient); + perm = mdt_identity_get_perm(uc->mu_identity, remote, + req->rq_peer.nid); if (!(perm & CFS_RMTACL_PERM)) GOTO(out, rc = err_serious(-EPERM)); } @@ -368,7 +373,9 @@ int mdt_reint_setxattr(struct mdt_thread_info *info, xattr = req_capsule_client_get(pill, &RMF_EADATA); if (valid & OBD_MD_FLRMTLSETFACL) { - LASSERT(med->med_rmtclient); + if (unlikely(!remote)) + GOTO(out_unlock, rc = -EINVAL); + xattr_len = mdt_rmtlsetfacl(info, child, xattr_name, (ext_acl_xattr_header *)xattr, diff --git a/lustre/obdclass/capa.c b/lustre/obdclass/capa.c index 421df58..b73386f 100644 --- a/lustre/obdclass/capa.c +++ b/lustre/obdclass/capa.c @@ -113,10 +113,11 @@ static inline int capa_on_server(struct obd_capa *ocapa) static inline void capa_delete(struct obd_capa *ocapa) { LASSERT(capa_on_server(ocapa)); - hlist_del(&ocapa->u.tgt.c_hash); - list_del(&ocapa->c_list); + hlist_del_init(&ocapa->u.tgt.c_hash); + list_del_init(&ocapa->c_list); capa_count[ocapa->c_site]--; - free_capa(ocapa); + /* release the ref when alloc */ + capa_put(ocapa); } void cleanup_capa_hash(struct hlist_head *hash) @@ -200,7 +201,7 @@ struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa) struct list_head *list = &capa_list[CAPA_SITE_SERVER]; ocapa = alloc_capa(CAPA_SITE_SERVER); - if (!ocapa) + if (IS_ERR(ocapa)) return NULL; spin_lock(&capa_lock); @@ -210,25 +211,18 @@ struct obd_capa *capa_add(struct hlist_head *hash, struct lustre_capa *capa) set_capa_expiry(ocapa); hlist_add_head(&ocapa->u.tgt.c_hash, head); list_add_tail(&ocapa->c_list, list); - capa_count[CAPA_SITE_SERVER]++; capa_get(ocapa); - + capa_count[CAPA_SITE_SERVER]++; if (capa_count[CAPA_SITE_SERVER] > CAPA_HASH_SIZE) capa_delete_lru(list); - - DEBUG_CAPA(D_SEC, &ocapa->c_capa, "new"); - spin_unlock(&capa_lock); return ocapa; + } else { + capa_get(old); + spin_unlock(&capa_lock); + capa_put(ocapa); + return old; } - - capa_get(old); - spin_unlock(&capa_lock); - - DEBUG_CAPA(D_SEC, &old->c_capa, "update"); - - free_capa(ocapa); - return old; } struct obd_capa *capa_lookup(struct hlist_head *hash, struct lustre_capa *capa, @@ -278,6 +272,110 @@ int capa_hmac(__u8 *hmac, struct lustre_capa *capa, __u8 *key) return 0; } + +int capa_encrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen) +{ + struct ll_crypto_cipher *tfm; + struct scatterlist sd = { + .page = virt_to_page(d), + .offset = (unsigned long)(d) % CFS_PAGE_SIZE, + .length = 16, + }; + struct scatterlist ss = { + .page = virt_to_page(s), + .offset = (unsigned long)(s) % CFS_PAGE_SIZE, + .length = 16, + }; + struct blkcipher_desc desc; + unsigned int min; + int rc; + ENTRY; + + tfm = ll_crypto_alloc_blkcipher("aes", 0, 0 ); + if (tfm == NULL) { + CERROR("failed to load transform for aes\n"); + RETURN(-EFAULT); + } + + min = crypto_tfm_alg_min_keysize(tfm); + if (keylen < min) { + CERROR("keylen at least %d bits for aes\n", min * 8); + GOTO(out, rc = -EINVAL); + } + + rc = ll_crypto_blkcipher_setkey(tfm, key, min); + if (rc) { + CERROR("failed to setting key for aes\n"); + GOTO(out, rc); + } + + desc.tfm = tfm; + desc.info = NULL; + desc.flags = 0; + rc = ll_crypto_blkcipher_encrypt(&desc, &sd, &ss, 16); + if (rc) { + CERROR("failed to encrypt for aes\n"); + GOTO(out, rc); + } + + EXIT; + +out: + ll_crypto_free_blkcipher(tfm); + return rc; +} + +int capa_decrypt_id(__u32 *d, __u32 *s, __u8 *key, int keylen) +{ + struct ll_crypto_cipher *tfm; + struct scatterlist sd = { + .page = virt_to_page(d), + .offset = (unsigned long)(d) % CFS_PAGE_SIZE, + .length = 16, + }; + struct scatterlist ss = { + .page = virt_to_page(s), + .offset = (unsigned long)(s) % CFS_PAGE_SIZE, + .length = 16, + }; + struct blkcipher_desc desc; + unsigned int min; + int rc; + ENTRY; + + tfm = ll_crypto_alloc_blkcipher("aes", 0, 0 ); + if (tfm == NULL) { + CERROR("failed to load transform for aes\n"); + RETURN(-EFAULT); + } + + min = crypto_tfm_alg_min_keysize(tfm); + if (keylen < min) { + CERROR("keylen at least %d bits for aes\n", min * 8); + GOTO(out, rc = -EINVAL); + } + + rc = ll_crypto_blkcipher_setkey(tfm, key, min); + if (rc) { + CERROR("failed to setting key for aes\n"); + GOTO(out, rc); + } + + desc.tfm = tfm; + desc.info = NULL; + desc.flags = 0; + rc = ll_crypto_blkcipher_decrypt(&desc, &sd, &ss, 16); + if (rc) { + CERROR("failed to decrypt for aes\n"); + GOTO(out, rc); + } + + EXIT; + +out: + ll_crypto_free_blkcipher(tfm); + return rc; +} #endif void capa_cpy(void *capa, struct obd_capa *ocapa) @@ -287,22 +385,11 @@ void capa_cpy(void *capa, struct obd_capa *ocapa) spin_unlock(&ocapa->c_lock); } -char *dump_capa_content(char *buf, char *key, int len) -{ - int i, n = 0; - - for (i = 0; i < len; i++) - n += sprintf(buf + n, "%02x", (unsigned char) key[i]); - return buf; -} - EXPORT_SYMBOL(init_capa_hash); EXPORT_SYMBOL(cleanup_capa_hash); - EXPORT_SYMBOL(capa_add); EXPORT_SYMBOL(capa_lookup); - EXPORT_SYMBOL(capa_hmac); +EXPORT_SYMBOL(capa_encrypt_id); +EXPORT_SYMBOL(capa_decrypt_id); EXPORT_SYMBOL(capa_cpy); - -EXPORT_SYMBOL(dump_capa_content); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 0803a15..d5bbe44 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -405,7 +405,6 @@ EXPORT_SYMBOL(class_name2obd); EXPORT_SYMBOL(class_uuid2dev); EXPORT_SYMBOL(class_uuid2obd); EXPORT_SYMBOL(class_find_client_obd); -EXPORT_SYMBOL(class_find_client_notype); EXPORT_SYMBOL(class_devices_in_group); EXPORT_SYMBOL(class_conn2export); EXPORT_SYMBOL(class_exp2obd); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index f317b95..854b5c6 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -506,18 +506,6 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, return NULL; } -struct obd_device *class_find_client_notype(struct obd_uuid *tgt_uuid, - struct obd_uuid *grp_uuid) -{ - struct obd_device *obd; - - obd = class_find_client_obd(tgt_uuid, LUSTRE_MDC_NAME, NULL); - if (!obd) - obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME, - grp_uuid); - return obd; -} - /* Iterate the obd_device list looking devices have grp_uuid. Start searching at *next, and if a device is found, the next index to look at is saved in *next. If next is NULL, then the first matching device diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index abaa9df..60ee61c 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -760,7 +760,7 @@ static int llog_lvfs_destroy(struct llog_handle *handle) if (rc) GOTO(out, rc); - rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL); + rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL, NULL); out: OBDO_FREE(oa); RETURN(rc); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index b9de3be..a347c0d 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1275,6 +1275,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_uuid); LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck); LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit); LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping); LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_new); LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_rem); @@ -1377,6 +1378,7 @@ int lprocfs_alloc_md_stats(struct obd_device *obd, LPROCFS_MD_OP_INIT(num_private_stats, stats, lock_match); LPROCFS_MD_OP_INIT(num_private_stats, stats, cancel_unused); LPROCFS_MD_OP_INIT(num_private_stats, stats, renew_capa); + LPROCFS_MD_OP_INIT(num_private_stats, stats, unpack_capa); LPROCFS_MD_OP_INIT(num_private_stats, stats, get_remote_perm); LPROCFS_MD_OP_INIT(num_private_stats, stats, intent_getattr_async); LPROCFS_MD_OP_INIT(num_private_stats, stats, revalidate_lock); @@ -1856,7 +1858,7 @@ int lprocfs_write_frac_u64_helper(const char *buffer, unsigned long count, __u64 whole, frac = 0, units; unsigned frac_d = 1; - if (count > (sizeof(kernbuf) - 1) ) + if (count > (sizeof(kernbuf) - 1)) return -EINVAL; if (copy_from_user(kernbuf, buffer, count)) diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 42d8cf1..42798fb 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -1063,7 +1063,7 @@ EXPORT_SYMBOL(lu_context_key_register); static void key_fini(struct lu_context *ctx, int index) { - if (ctx->lc_value[index] != NULL) { + if (ctx->lc_value != NULL && ctx->lc_value[index] != NULL) { struct lu_context_key *key; key = lu_keys[index]; diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index c4021ba..128343e 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -888,6 +888,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, int i, keylen, vallen; int matched = 0, j = 0; int rc = 0; + int skip = 0; ENTRY; if (lcfg->lcfg_command != LCFG_PARAM) { @@ -943,6 +944,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, CERROR("%s: unknown param %s\n", (char *)lustre_cfg_string(lcfg, 0), key); /* rc = -EINVAL; continue parsing other params */ + skip++; } else { LCONSOLE_INFO("%s.%.*s: set parameter %.*s=%s\n", lustre_cfg_string(lcfg, 0), @@ -953,6 +955,8 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, if (rc > 0) rc = 0; + if (!rc && skip) + rc = skip; RETURN(rc); #else CDEBUG(D_CONFIG, "liblustre can't process params.\n"); diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 52609b0..ebc3bba3 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -153,7 +153,7 @@ int echo_create(struct obd_export *exp, struct obdo *oa, int echo_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { struct obd_device *obd = class_exp2obd(exp); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 225ec5a..01b9572 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -1301,7 +1301,7 @@ static int echo_create_object(struct echo_device *ed, int on_target, failed: if (created && rc) - obd_destroy(ec->ec_exp, oa, lsm, oti, NULL); + obd_destroy(ec->ec_exp, oa, lsm, oti, NULL, NULL); if (lsm) obd_free_memmd(ec->ec_exp, &lsm); if (rc) @@ -1637,7 +1637,7 @@ static int echo_client_brw_ioctl(int rw, struct obd_export *exp, struct obd_device *obd = class_exp2obd(exp); struct echo_device *ed = obd2echo_dev(obd); struct echo_client_obd *ec = ed->ed_ec; - struct obd_trans_info dummy_oti = { .oti_thread_id = -1 }; + struct obd_trans_info dummy_oti = { .oti_thread = NULL }; struct echo_object *eco; int rc; int async = 1; @@ -1762,7 +1762,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, oa->o_gr = FILTER_GROUP_ECHO; oa->o_valid |= OBD_MD_FLGROUP; rc = obd_destroy(ec->ec_exp, oa, eco->eo_lsm, - &dummy_oti, NULL); + &dummy_oti, NULL, NULL); if (rc == 0) eco->eo_deleted = 1; echo_put_object(eco); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index de162b1..593f37b 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1915,7 +1915,8 @@ static int filter_iobuf_pool_init(struct filter_obd *filter) * If we haven't allocated a pool entry for this thread before, do so now. */ void *filter_iobuf_get(struct filter_obd *filter, struct obd_trans_info *oti) { - int thread_id = oti ? oti->oti_thread_id : -1; + int thread_id = (oti && oti->oti_thread) ? + oti->oti_thread->t_id : -1; struct filter_iobuf *pool = NULL; struct filter_iobuf **pool_place = NULL; @@ -2042,7 +2043,7 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, rwlock_init(&filter->fo_sptlrpc_lock); sptlrpc_rule_set_init(&filter->fo_sptlrpc_rset); - filter->fo_fl_oss_capa = 0; + filter->fo_fl_oss_capa = 1; CFS_INIT_LIST_HEAD(&filter->fo_capa_keys); filter->fo_capa_hash = init_capa_hash(); if (filter->fo_capa_hash == NULL) @@ -2920,9 +2921,7 @@ static int filter_destroy_export(struct obd_export *exp) exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, exp->exp_filter_data.fed_pending); - /* Not ported yet the b1_6 quota functionality - * lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd); - */ + lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd); target_destroy_export(exp); ldlm_destroy_export(exp); @@ -3299,43 +3298,52 @@ out_unlock: int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti) { + struct obdo *oa = oinfo->oi_oa; + struct lustre_capa *capa = oinfo_capa(oinfo); struct ldlm_res_id res_id; struct filter_mod_data *fmd; struct lvfs_run_ctxt saved; struct filter_obd *filter; struct ldlm_resource *res; struct dentry *dentry; + __u64 opc = CAPA_OPC_META_WRITE; int rc; ENTRY; - osc_build_res_name(oinfo->oi_oa->o_id, oinfo->oi_oa->o_gr, &res_id); - rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo), - oinfo_capa(oinfo), CAPA_OPC_META_WRITE); + if (oa->o_valid & OBD_FL_TRUNC) + opc |= CAPA_OPC_OSS_TRUNC; + rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), capa, opc); if (rc) RETURN(rc); + if (oa->o_valid & (OBD_MD_FLUID | OBD_MD_FLGID)) { + rc = filter_capa_fixoa(exp, oa, obdo_mdsno(oa), capa); + if (rc) + RETURN(rc); + } + + osc_build_res_name(oa->o_id, oa->o_gr, &res_id); /* This would be very bad - accidentally truncating a file when * changing the time or similar - bug 12203. */ - if (oinfo->oi_oa->o_valid & OBD_MD_FLSIZE && + if (oa->o_valid & OBD_MD_FLSIZE && oinfo->oi_policy.l_extent.end != OBD_OBJECT_EOF) { static char mdsinum[48]; - if (oinfo->oi_oa->o_valid & OBD_MD_FLFID) + if (oa->o_valid & OBD_MD_FLFID) snprintf(mdsinum, sizeof(mdsinum) - 1, - " of inode "LPU64"/%u", oinfo->oi_oa->o_fid, - oinfo->oi_oa->o_generation); + " of inode "LPU64"/%u", oa->o_fid, + oa->o_generation); else mdsinum[0] = '\0'; CERROR("%s: setattr from %s trying to truncate objid "LPU64 " %s\n", exp->exp_obd->obd_name, obd_export_nid2str(exp), - oinfo->oi_oa->o_id, mdsinum); + oa->o_id, mdsinum); RETURN(-EPERM); } - dentry = __filter_oa2dentry(exp->exp_obd, oinfo->oi_oa, - __FUNCTION__, 1); + dentry = __filter_oa2dentry(exp->exp_obd, oa, __FUNCTION__, 1); if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); @@ -3343,16 +3351,16 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); lock_kernel(); - if (oinfo->oi_oa->o_valid & + if (oa->o_valid & (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) { - fmd = filter_fmd_get(exp,oinfo->oi_oa->o_id,oinfo->oi_oa->o_gr); + fmd = filter_fmd_get(exp, oa->o_id, oa->o_gr); if (fmd && fmd->fmd_mactime_xid < oti->oti_xid) fmd->fmd_mactime_xid = oti->oti_xid; filter_fmd_put(exp, fmd); } /* setting objects attributes (including owner/group) */ - rc = filter_setattr_internal(exp, dentry, oinfo->oi_oa, oti); + rc = filter_setattr_internal(exp, dentry, oa, oti); if (rc) GOTO(out_unlock, rc); @@ -3366,10 +3374,10 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, ldlm_resource_putref(res); } - oinfo->oi_oa->o_valid = OBD_MD_FLID; + oa->o_valid = OBD_MD_FLID; /* Quota release need uid/gid info */ - obdo_from_inode(oinfo->oi_oa, dentry->d_inode, + obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID); EXIT; @@ -3469,7 +3477,7 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, for (id = last; id > oa->o_id; id--) { doa.o_id = id; - rc = filter_destroy(exp, &doa, NULL, NULL, NULL); + rc = filter_destroy(exp, &doa, NULL, NULL, NULL, NULL); if (rc && rc != -ENOENT) /* this is pretty fatal... */ CEMERG("error destroying precreate objid "LPU64": %d\n", id, rc); @@ -3888,7 +3896,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *oti, - struct obd_export *md_exp) + struct obd_export *md_exp, void *capa) { unsigned int qcids[MAXQUOTAS] = {0, 0}; struct obd_device *obd; @@ -3903,6 +3911,11 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, LASSERT(oa->o_valid & OBD_MD_FLGROUP); + rc = filter_auth_capa(exp, NULL, obdo_mdsno(oa), + (struct lustre_capa *)capa, CAPA_OPC_OSS_DESTROY); + if (rc) + RETURN(rc); + obd = exp->exp_obd; filter = &obd->u.filter; @@ -4047,9 +4060,8 @@ cleanup: qcids[GRPQUOTA] = oa->o_gid; rc2 = lquota_adjust(filter_quota_interface_ref, obd, qcids, NULL, rc, FSFILT_OP_UNLINK); - if (rc2) - CDEBUG(D_QUOTA, "filter adjust qunit! (rc:%d)\n", rc2); + CERROR("filter adjust qunit! (rc:%d)\n", rc2); return rc; } @@ -4071,13 +4083,10 @@ static int filter_truncate(struct obd_export *exp, struct obd_info *oinfo, ", o_size = "LPD64"\n", oinfo->oi_oa->o_id, oinfo->oi_oa->o_valid, oinfo->oi_policy.l_extent.start); - rc = filter_auth_capa(exp, NULL, oinfo_mdsno(oinfo), - oinfo_capa(oinfo), CAPA_OPC_OSS_TRUNC); - if (rc) - RETURN(rc); - oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start; + oinfo->oi_oa->o_valid |= OBD_FL_TRUNC; rc = filter_setattr(exp, oinfo, oti); + oinfo->oi_oa->o_valid &= ~OBD_FL_TRUNC; RETURN(rc); } @@ -4246,6 +4255,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, if (KEY_IS(KEY_REVIMP_UPD)) { filter_revimp_update(exp); + lquota_clearinfo(filter_quota_interface_ref, exp, exp->exp_obd); RETURN(0); } @@ -4273,7 +4283,7 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, rc = llog_receptor_accept(ctxt, exp->exp_imp_reverse); llog_ctxt_put(ctxt); - lquota_setinfo(filter_quota_interface_ref, exp, obd); + lquota_setinfo(filter_quota_interface_ref, obd, exp); RETURN(rc); } @@ -4414,6 +4424,8 @@ static int filter_process_config(struct obd_device *obd, obd_count len, rc = class_process_proc_param(PARAM_OST, lvars.obd_vars, lcfg, obd); + if (rc > 0) + rc = 0; break; } diff --git a/lustre/obdfilter/filter_capa.c b/lustre/obdfilter/filter_capa.c index 1fbddb6..bc43a6a 100644 --- a/lustre/obdfilter/filter_capa.c +++ b/lustre/obdfilter/filter_capa.c @@ -128,6 +128,9 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, if (!filter->fo_fl_oss_capa) RETURN(0); + if (!(exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA)) + RETURN(0); + if (capa == NULL) { if (fid) CERROR("mdsno/fid/opc "LPU64"/"DFID"/"LPX64 @@ -164,8 +167,13 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, RETURN(rc); } + if (capa_is_expired_sec(capa)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + RETURN(-ESTALE); + } + spin_lock(&capa_lock); - list_for_each_entry(k, &filter->fo_capa_keys, k_list) + list_for_each_entry(k, &filter->fo_capa_keys, k_list) { if (k->k_key.lk_mdsid == mdsid) { keys_ready = 1; if (k->k_key.lk_keyid == capa_keyid(capa)) { @@ -174,6 +182,7 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, break; } } + } spin_unlock(&capa_lock); if (!keys_ready) { @@ -212,6 +221,64 @@ int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, RETURN(0); } +int filter_capa_fixoa(struct obd_export *exp, struct obdo *oa, __u64 mdsid, + struct lustre_capa *capa) +{ + int rc = 0; + ENTRY; + + if (!(exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA)) + RETURN(0); + + if (unlikely(!capa)) + RETURN(-EACCES); + + if (capa_flags(capa) == LC_ID_CONVERT) { + struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + struct filter_capa_key *k; + int found = 0; + + spin_lock(&capa_lock); + list_for_each_entry(k, &filter->fo_capa_keys, k_list) { + if (k->k_key.lk_mdsid == mdsid && + k->k_key.lk_keyid == capa_keyid(capa)) { + found = 1; + break; + } + } + spin_unlock(&capa_lock); + + if (found) { + union { + __u64 id64; + __u32 id32[2]; + } uid, gid; + __u32 d[4], s[4]; + + uid.id64 = capa_uid(capa); + gid.id64 = capa_gid(capa); + s[0] = uid.id32[0]; + s[1] = uid.id32[1]; + s[2] = gid.id32[0]; + s[3] = gid.id32[1]; + + rc = capa_decrypt_id(d, s, k->k_key.lk_key, + CAPA_HMAC_KEY_MAX_LEN); + if (unlikely(rc)) + RETURN(rc); + + oa->o_uid = d[0]; + oa->o_gid = d[2]; + } else { + DEBUG_CAPA(D_ERROR, capa, "no matched capability key for"); + rc = -ESTALE; + } + } + + RETURN(rc); +} + void filter_free_capa_keys(struct filter_obd *filter) { struct filter_capa_key *key, *n; diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index fb82e44..e5db720 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -150,7 +150,7 @@ int filter_common_setup(struct obd_device *, struct lustre_cfg *lcfg, void *option); int filter_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *, - struct obd_export *); + struct obd_export *, void *); int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, struct obdo *oa, struct obd_trans_info *oti); int filter_setattr(struct obd_export *exp, struct obd_info *oinfo, @@ -244,6 +244,8 @@ static inline __u64 obdo_mdsno(struct obdo *oa) int filter_update_capa_key(struct obd_device *obd, struct lustre_capa_key *key); int filter_auth_capa(struct obd_export *exp, struct lu_fid *fid, __u64 mdsid, struct lustre_capa *capa, __u64 opc); +int filter_capa_fixoa(struct obd_export *exp, struct obdo *oa, __u64 mdsid, + struct lustre_capa *capa); void filter_free_capa_keys(struct filter_obd *filter); void blacklist_add(uid_t uid); diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 5bab0b6..07b2f9c 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -328,8 +328,11 @@ void filter_invalidate_cache(struct obd_device *obd, struct obd_ioobj *obj, LASSERT(inode != NULL); for (i = 0, rnb = nb; i < obj->ioo_bufcnt; i++, rnb++) { - obd_off start = rnb->offset >> CFS_PAGE_SHIFT; - obd_off end = (rnb->offset + rnb->len) >> CFS_PAGE_SHIFT; + obd_off start; + obd_off end; + + start = rnb->offset >> CFS_PAGE_SHIFT; + end = (rnb->offset + rnb->len) >> CFS_PAGE_SHIFT; invalidate_mapping_pages(inode->i_mapping, start, end); /* just to avoid warnings */ start = 0; @@ -650,6 +653,13 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, GOTO(cleanup, rc = -ENOENT); } + if (oa->o_valid & (OBD_MD_FLUID | OBD_MD_FLGID) && + dentry->d_inode->i_mode & (S_ISUID | S_ISGID)) { + rc = filter_capa_fixoa(exp, oa, obdo_mdsno(oa), capa); + if (rc) + GOTO(cleanup, rc); + } + rc = filter_map_remote_to_local(objcount, obj, nb, npages, res); if (rc) GOTO(cleanup, rc); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 8940804..aa49c15 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -246,7 +246,8 @@ void filter_free_iobuf(struct filter_iobuf *iobuf) void filter_iobuf_put(struct filter_obd *filter, struct filter_iobuf *iobuf, struct obd_trans_info *oti) { - int thread_id = oti ? oti->oti_thread_id : -1; + int thread_id = (oti && oti->oti_thread) ? + oti->oti_thread->t_id : -1; if (unlikely(thread_id < 0)) { filter_free_iobuf(iobuf); @@ -556,7 +557,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, struct obd_device *obd = exp->exp_obd; struct filter_obd *fo = &obd->u.filter; void *wait_handle; - int total_size = 0, rc2; + int total_size = 0; + int rec_pending = 0; unsigned int qcids[MAXQUOTAS] = {0, 0}; ENTRY; @@ -567,21 +569,11 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (rc != 0) GOTO(cleanup, rc); - /* Unfortunately, if quota master is too busy to handle the - * pre-dqacq in time and quota hash on ost is used up, we - * have to wait for the completion of in flight dqacq/dqrel, - * then try again */ - if ((rc2 = lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid, - oa->o_gid, niocount)) == QUOTA_RET_ACQUOTA) { - OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); - lquota_acquire(filter_quota_interface_ref, obd, oa->o_uid, - oa->o_gid); - } - - if (rc2 < 0) { - rc = rc2; - GOTO(cleanup, rc); - } + /* we try to get enough quota to write here, and let ldiskfs + * decide if it is out of quota or not b=14783 */ + lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid, + oa->o_gid, niocount, &rec_pending, oti, + LQUOTA_FLAGS_BLK); iobuf = filter_iobuf_get(&obd->u.filter, oti); if (IS_ERR(iobuf)) @@ -595,9 +587,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, iobuf->dr_ignore_quota = 0; for (i = 0, lnb = res; i < niocount; i++, lnb++) { loff_t this_size; + __u32 flags = lnb->flags; /* If overwriting an existing block, we don't need a grant */ - if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && + if (!(flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && filter_range_is_mapped(inode, lnb->offset, lnb->len)) lnb->rc = 0; @@ -627,10 +620,15 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (this_size > iattr.ia_size) iattr.ia_size = this_size; - /* if one page is a write-back page from client cache, or it's - * written by root, then mark the whole io request as ignore - * quota request */ - if (lnb->flags & (OBD_BRW_FROM_GRANT | OBD_BRW_NOQUOTA)) + /* if one page is a write-back page from client cache and + * not from direct_io, or it's written by root, then mark + * the whole io request as ignore quota request, remote + * client can not break through quota. */ + if (exp_connect_rmtclient(exp)) + flags &= ~OBD_BRW_NOQUOTA; + if ((flags & OBD_BRW_NOQUOTA) || + (flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) == + OBD_BRW_FROM_GRANT) iobuf->dr_ignore_quota = 1; } @@ -721,6 +719,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, fsfilt_check_slow(obd, now, "commitrw commit"); cleanup: + if (rec_pending) + lquota_pending_commit(filter_quota_interface_ref, obd, oa->o_uid, + oa->o_gid, niocount, 1); + filter_grant_commit(exp, niocount, res); switch (cleanup_phase) { diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index 0e769ad..2462a07 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -173,7 +173,7 @@ static int filter_recov_log_unlink_cb(struct llog_ctxt *ctxt, oa->o_lcookie = *cookie; oid = oa->o_id; - rc = filter_destroy(exp, oa, NULL, NULL, NULL); + rc = filter_destroy(exp, oa, NULL, NULL, NULL, NULL); OBDO_FREE(oa); if (rc == -ENOENT) { CDEBUG(D_RPCTRACE, "object already removed, send cookie\n"); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index cbe0753..7810acc 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -143,7 +143,6 @@ int lprocfs_filter_wr_readcache(struct file *file, const char *buffer, return count; } - int lprocfs_filter_rd_fmd_max_num(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -242,6 +241,37 @@ static int lprocfs_filter_rd_capa_count(char *page, char **start, off_t off, capa_count[CAPA_SITE_SERVER]); } +static int lprocfs_rd_sec_level(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + + return snprintf(page, count, "%d\n", obd->u.filter.fo_sec_level); +} + +static int lprocfs_wr_sec_level(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val > LUSTRE_SEC_ALL || val < LUSTRE_SEC_NONE) + return -EINVAL; + + if (val == LUSTRE_SEC_SPECIFY) { + CWARN("security level %d will be supported in future.\n", + LUSTRE_SEC_SPECIFY); + return -EINVAL; + } + + obd->u.filter.fo_sec_level = val; + return count; +} + static int lprocfs_filter_rd_cache(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -318,11 +348,8 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = { lprocfs_filter_rd_readcache, lprocfs_filter_wr_readcache, 0 }, #ifdef HAVE_QUOTA_SUPPORT - { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0}, - { "quota_btune_sz", lprocfs_rd_btune, lprocfs_wr_btune, 0}, - { "quota_iunit_sz", lprocfs_rd_iunit, lprocfs_wr_iunit, 0}, - { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0}, - { "quota_type", lprocfs_rd_type, lprocfs_wr_type, 0}, + { "quota_type", lprocfs_quota_rd_type, + lprocfs_quota_wr_type, 0}, #endif { "client_cache_count", lprocfs_filter_rd_fmd_max_num, lprocfs_filter_wr_fmd_max_num, 0 }, @@ -331,6 +358,8 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = { { "capa", lprocfs_filter_rd_capa, lprocfs_filter_wr_capa, 0 }, { "capa_count", lprocfs_filter_rd_capa_count, 0, 0 }, + { "sec_level", lprocfs_rd_sec_level, + lprocfs_wr_sec_level, 0 }, { "read_cache_enable", lprocfs_filter_rd_cache, lprocfs_filter_wr_cache, 0}, { "writethrough_cache_enable", lprocfs_filter_rd_wcache, lprocfs_filter_wr_wcache, 0}, diff --git a/lustre/osc/osc_cl_internal.h b/lustre/osc/osc_cl_internal.h index be6badb..6085101 100644 --- a/lustre/osc/osc_cl_internal.h +++ b/lustre/osc/osc_cl_internal.h @@ -280,11 +280,7 @@ struct osc_page { * True for a `temporary page' created by read-ahead code, probably * outside of any DLM lock. */ - ops_temp:1, - /** - * True iff page was created by a user with `appropriate privileges'. - */ - ops_ignore_quota:1; + ops_temp:1; /** * Linkage into a per-osc_object list of pages in flight. For * debugging. diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 86fe589..4b4ae0a 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -302,9 +302,12 @@ static int osc_io_commit_write(const struct lu_env *env, const struct cl_page_slice *slice, unsigned from, unsigned to) { - LASSERT(to > 0); - + struct osc_page *opg = cl2osc_page(slice); + struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); + struct osc_async_page *oap = &opg->ops_oap; ENTRY; + + LASSERT(to > 0); /* * XXX instead of calling osc_page_touch() here and in * osc_io_fault_start() it might be more logical to introduce @@ -312,6 +315,10 @@ static int osc_io_commit_write(const struct lu_env *env, * fault code calls. */ osc_page_touch(env, cl2osc_page(slice), to); + if (!client_is_remote(osc_export(obj)) && + cfs_capable(CFS_CAP_SYS_RESOURCE)) + oap->oap_brw_flags |= OBD_BRW_NOQUOTA; + RETURN(0); } diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index d42e4f9..a583eef 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -180,16 +180,22 @@ static int osc_page_cache_add(const struct lu_env *env, struct osc_io *oio = osc_env_io(env); int result; int brw_flags; + int noquota = 0; LINVRNT(osc_page_protected(env, opg, CLM_WRITE, 0)); ENTRY; /* Set the OBD_BRW_SRVLOCK before the page is queued. */ brw_flags = oio->oi_lockless ? OBD_BRW_SRVLOCK : 0; + if (!client_is_remote(osc_export(obj)) && + cfs_capable(CFS_CAP_SYS_RESOURCE)) { + brw_flags |= OBD_BRW_NOQUOTA; + noquota = OBD_BRW_NOQUOTA; + } osc_page_transfer_get(opg, "transfer\0cache"); result = osc_queue_async_io(env, osc_export(obj), NULL, obj->oo_oinfo, - &opg->ops_oap, OBD_BRW_WRITE, + &opg->ops_oap, OBD_BRW_WRITE | noquota, 0, 0, brw_flags, 0); if (result != 0) osc_page_transfer_put(env, opg); @@ -467,7 +473,6 @@ struct cl_page *osc_page_init(const struct lu_env *env, opg->ops_from = 0; opg->ops_to = CFS_PAGE_SIZE; - opg->ops_ignore_quota = !!cfs_capable(CFS_CAP_SYS_RESOURCE); result = osc_prep_async_page(osc_export(osc), NULL, osc->oo_oinfo, vmpage, @@ -500,9 +505,18 @@ void osc_io_submit_page(const struct lu_env *env, LINVRNT(osc_page_protected(env, opg, crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1)); + oap->oap_page_off = opg->ops_from; + oap->oap_count = opg->ops_to - opg->ops_from; + oap->oap_brw_flags |= OBD_BRW_SYNC; + if (oio->oi_lockless) + oap->oap_brw_flags |= OBD_BRW_SRVLOCK; + oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; - if (opg->ops_ignore_quota) + if (!client_is_remote(osc_export(cl2osc(opg->ops_cl.cpl_obj))) && + cfs_capable(CFS_CAP_SYS_RESOURCE)) { + oap->oap_brw_flags |= OBD_BRW_NOQUOTA; oap->oap_cmd |= OBD_BRW_NOQUOTA; + } oap->oap_async_flags |= OSC_FLAGS; if (oap->oap_cmd & OBD_BRW_READ) @@ -510,10 +524,6 @@ void osc_io_submit_page(const struct lu_env *env, else if (!(oap->oap_brw_page.flag & OBD_BRW_FROM_GRANT)) osc_enter_cache_try(env, cli, oap->oap_loi, oap, 1); - oap->oap_page_off = opg->ops_from; - oap->oap_count = opg->ops_to - opg->ops_from; - oap->oap_brw_flags |= oio->oi_lockless ? OBD_BRW_SRVLOCK : 0; - osc_oap_to_pending(oap); osc_page_transfer_get(opg, "transfer\0imm"); osc_page_transfer_add(env, opg, crt); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 3a27a42..329200b 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -694,7 +694,7 @@ static int osc_can_send_destroy(struct client_obd *cli) * cookies to the MDS after committing destroy transactions. */ static int osc_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_export) + struct obd_export *md_export, void *capa) { struct client_obd *cli = &exp->exp_obd->u.cli; struct ptlrpc_request *req; @@ -717,6 +717,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-ENOMEM); } + osc_set_capa_size(req, &RMF_CAPA1, (struct obd_capa *)capa); rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY, 0, &cancels, count); if (rc) { @@ -734,6 +735,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, LASSERT(body); body->oa = *oa; + osc_pack_capa(req, body, (struct obd_capa *)capa); ptlrpc_request_set_replen(req); if (!osc_can_send_destroy(cli)) { @@ -1048,7 +1050,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, struct brw_page **pga, struct ptlrpc_request **reqp, - struct obd_capa *ocapa) + struct obd_capa *ocapa, int reserve) { struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; @@ -1075,7 +1077,6 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, opc = OST_READ; req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW); } - if (req == NULL) RETURN(-ENOMEM); @@ -1219,6 +1220,8 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, aa->aa_ppga = pga; aa->aa_cli = cli; CFS_INIT_LIST_HEAD(&aa->aa_oaps); + if (ocapa && reserve) + aa->aa_ocapa = capa_get(ocapa); *reqp = req; RETURN(0); @@ -1448,7 +1451,7 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa, restart_bulk: rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, - page_count, pga, &req, ocapa); + page_count, pga, &req, ocapa, 0); if (rc != 0) return (rc); @@ -1495,18 +1498,13 @@ int osc_brw_redo_request(struct ptlrpc_request *request, } DEBUG_REQ(D_ERROR, request, "redo for recoverable error"); -/* - body = lustre_msg_buf(request->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - if (body->oa.o_valid & OBD_MD_FLOSSCAPA) - ocapa = lustre_unpack_capa(request->rq_reqmsg, - REQ_REC_OFF + 3); -*/ + rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ, aa->aa_cli, aa->aa_oa, NULL /* lsm unused by osc currently */, aa->aa_page_count, aa->aa_ppga, - &new_req, NULL /* ocapa */); + &new_req, aa->aa_ocapa, 0); if (rc) RETURN(rc); @@ -1544,6 +1542,9 @@ int osc_brw_redo_request(struct ptlrpc_request *request, } } + new_aa->aa_ocapa = aa->aa_ocapa; + aa->aa_ocapa = NULL; + /* use ptlrpc_set_add_req is safe because interpret functions work * in check_set context. only one way exist with access to request * from different thread got -EINTR - this way protected with @@ -1944,6 +1945,11 @@ static int brw_interpret(const struct lu_env *env, RETURN(0); } + if (aa->aa_ocapa) { + capa_put(aa->aa_ocapa); + aa->aa_ocapa = NULL; + } + cli = aa->aa_cli; client_obd_list_lock(&cli->cl_loi_list_lock); @@ -2052,7 +2058,7 @@ static struct ptlrpc_request *osc_build_req(const struct lu_env *env, sort_brw_pages(pga, page_count); rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, - pga, &req, crattr.cra_capa); + pga, &req, crattr.cra_capa, 1); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, req = ERR_PTR(rc)); @@ -2560,6 +2566,9 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, oap->oap_page = page; oap->oap_obj_off = offset; + if (!client_is_remote(exp) && + cfs_capable(CFS_CAP_SYS_RESOURCE)) + oap->oap_brw_flags = OBD_BRW_NOQUOTA; LASSERT(!(offset & ~CFS_PAGE_MASK)); @@ -2605,7 +2614,6 @@ int osc_queue_async_io(const struct lu_env *env, RETURN(-EBUSY); /* check if the file's owner/group is over quota */ -#ifdef HAVE_QUOTA_SUPPORT if ((cmd & OBD_BRW_WRITE) && !(cmd & OBD_BRW_NOQUOTA)) { struct cl_object *obj; struct cl_attr attr; /* XXX put attr into thread info */ @@ -2622,7 +2630,6 @@ int osc_queue_async_io(const struct lu_env *env, if (rc) RETURN(rc); } -#endif if (loi == NULL) loi = lsm->lsm_oinfo[0]; @@ -3964,6 +3971,8 @@ int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg) default: rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars, lcfg, obd); + if (rc > 0) + rc = 0; break; } diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 422d255..57e2f71 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -141,7 +141,7 @@ static int osd_fid_lookup (const struct lu_env *env, const struct lu_fid *fid); static void osd_inode_getattr (const struct lu_env *env, struct inode *inode, struct lu_attr *attr); -static void osd_inode_setattr (const struct lu_env *env, +static int osd_inode_setattr (const struct lu_env *env, struct inode *inode, const struct lu_attr *attr); static int osd_param_is_sane (const struct osd_device *dev, const struct txn_param *param); @@ -154,7 +154,8 @@ static int osd_index_insert (const struct lu_env *env, const struct dt_rec *rec, const struct dt_key *key, struct thandle *handle, - struct lustre_capa *capa); + struct lustre_capa *capa, + int ingore_quota); static int osd_index_delete (const struct lu_env *env, struct dt_object *dt, const struct dt_key *key, struct thandle *handle, @@ -236,6 +237,31 @@ struct osd_thandle { }; +#ifdef HAVE_QUOTA_SUPPORT +static inline void +osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save) +{ + struct md_ucred *uc = md_ucred(env); + + LASSERT(uc != NULL); + + save->oc_uid = current->fsuid; + save->oc_gid = current->fsgid; + save->oc_cap = current->cap_effective; + current->fsuid = uc->mu_fsuid; + current->fsgid = uc->mu_fsgid; + current->cap_effective = uc->mu_cap; +} + +static inline void +osd_pop_ctxt(struct osd_ctxt *save) +{ + current->fsuid = save->oc_uid; + current->fsgid = save->oc_gid; + current->cap_effective = save->oc_cap; +} +#endif + /* * Invariants, assertions. */ @@ -758,46 +784,161 @@ static int osd_init_capa_ctxt(const struct lu_env *env, struct dt_device *d, RETURN(0); } -/* Note: we did not count into QUOTA here, If we mount with --data_journal - * we may need more*/ -static const int osd_dto_credits[DTO_NR] = { - /* - * Insert/Delete. IAM EXT3_INDEX_EXTRA_TRANS_BLOCKS(8) + - * EXT3_SINGLEDATA_TRANS_BLOCKS 8 XXX Note: maybe iam need more,since - * iam have more level than Ext3 htree +/** + * Concurrency: serialization provided by callers. + */ +static void osd_init_quota_ctxt(const struct lu_env *env, struct dt_device *d, + struct dt_quota_ctxt *ctxt, void *data) +{ + struct obd_device *obd = (void *)ctxt; + struct vfsmount *mnt = (struct vfsmount *)data; + ENTRY; + + obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb; + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); + obd->obd_lvfs_ctxt.pwdmnt = mnt; + obd->obd_lvfs_ctxt.pwd = mnt->mnt_root; + obd->obd_lvfs_ctxt.fs = get_ds(); + + EXIT; +} + +/** + * Note: we do not count into QUOTA here. + * If we mount with --data_journal we may need more. + */ +static const int osd_dto_credits_noquota[DTO_NR] = { + /** + * Insert/Delete. + * INDEX_EXTRA_TRANS_BLOCKS(8) + + * SINGLEDATA_TRANS_BLOCKS(8) + * XXX Note: maybe iam need more, since iam have more level than + * EXT3 htree. */ [DTO_INDEX_INSERT] = 16, [DTO_INDEX_DELETE] = 16, + /** + * Unused now + */ [DTO_IDNEX_UPDATE] = 16, - /* - * Create a object. Same as create object in Ext3 filesystem, but did - * not count QUOTA i EXT3_DATA_TRANS_BLOCKS(12) + - * INDEX_EXTRA_BLOCKS(8) + 3(inode bits,groups, GDT) + /** + * Create a object. The same as create object in EXT3. + * DATA_TRANS_BLOCKS(14) + + * INDEX_EXTRA_BLOCKS(8) + + * 3(inode bits, groups, GDT) + */ + [DTO_OBJECT_CREATE] = 25, + /** + * Unused now + */ + [DTO_OBJECT_DELETE] = 25, + /** + * Attr set credits. + * 3(inode bits, group, GDT) */ - [DTO_OBJECT_CREATE] = 23, - [DTO_OBJECT_DELETE] = 23, + [DTO_ATTR_SET_BASE] = 3, + /** + * Xattr set. The same as xattr of EXT3. + * DATA_TRANS_BLOCKS(14) + * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are + * also counted in. Do not know why? + */ + [DTO_XATTR_SET] = 14, + [DTO_LOG_REC] = 14, + /** + * creadits for inode change during write. + */ + [DTO_WRITE_BASE] = 3, + /** + * credits for single block write. + */ + [DTO_WRITE_BLOCK] = 14, + /** + * Attr set credits for chown. + * 3 (inode bit, group, GDT) + */ + [DTO_ATTR_SET_CHOWN]= 3 +}; + +/** + * Note: we count into QUOTA here. + * If we mount with --data_journal we may need more. + */ +static const int osd_dto_credits_quota[DTO_NR] = { + /** + * INDEX_EXTRA_TRANS_BLOCKS(8) + + * SINGLEDATA_TRANS_BLOCKS(8) + + * 2 * QUOTA_TRANS_BLOCKS(2) + */ + [DTO_INDEX_INSERT] = 20, + /** + * INDEX_EXTRA_TRANS_BLOCKS(8) + + * SINGLEDATA_TRANS_BLOCKS(8) + + * 2 * QUOTA_TRANS_BLOCKS(2) + */ + [DTO_INDEX_DELETE] = 20, + /** + * Unused now. + */ + [DTO_IDNEX_UPDATE] = 16, /* - * Attr set credits 3 inode, group, GDT + * Create a object. Same as create object in EXT3 filesystem. + * DATA_TRANS_BLOCKS(16) + + * INDEX_EXTRA_BLOCKS(8) + + * 3(inode bits, groups, GDT) + + * 2 * QUOTA_INIT_BLOCKS(25) */ - [DTO_ATTR_SET] = 3, + [DTO_OBJECT_CREATE] = 77, /* - * XATTR_SET. SAME AS XATTR of EXT3 EXT3_DATA_TRANS_BLOCKS XXX Note: - * in original MDS implmentation EXT3_INDEX_EXTRA_TRANS_BLOCKS are - * also counted in. Do not know why? + * Unused now. + * DATA_TRANS_BLOCKS(16) + + * INDEX_EXTRA_BLOCKS(8) + + * 3(inode bits, groups, GDT) + + * QUOTA(?) + */ + [DTO_OBJECT_DELETE] = 27, + /** + * Attr set credits. + * 3 (inode bit, group, GDT) + + */ + [DTO_ATTR_SET_BASE] = 3, + /** + * Xattr set. The same as xattr of EXT3. + * DATA_TRANS_BLOCKS(16) + * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are + * also counted in. Do not know why? */ [DTO_XATTR_SET] = 16, [DTO_LOG_REC] = 16, - /* creadits for inode change during write */ + /** + * creadits for inode change during write. + */ [DTO_WRITE_BASE] = 3, - /* credits for single block write */ - [DTO_WRITE_BLOCK] = 12 + /** + * credits for single block write. + */ + [DTO_WRITE_BLOCK] = 16, + /** + * Attr set credits for chown. + * 3 (inode bit, group, GDT) + + * 2 * QUOTA_INIT_BLOCKS(25) + + * 2 * QUOTA_DEL_BLOCKS(9) + */ + [DTO_ATTR_SET_CHOWN]= 71 }; static int osd_credit_get(const struct lu_env *env, struct dt_device *d, enum dt_txn_op op) { - LASSERT(0 <= op && op < ARRAY_SIZE(osd_dto_credits)); - return osd_dto_credits[op]; + LASSERT(ARRAY_SIZE(osd_dto_credits_noquota) == + ARRAY_SIZE(osd_dto_credits_quota)); + LASSERT(0 <= op && op < ARRAY_SIZE(osd_dto_credits_noquota)); +#ifdef HAVE_QUOTA_SUPPORT + if (test_opt(osd_sb(osd_dt_dev(d)), QUOTA)) + return osd_dto_credits_quota[op]; + else +#endif + return osd_dto_credits_noquota[op]; } static const struct dt_device_operations osd_dt_ops = { @@ -811,6 +952,7 @@ static const struct dt_device_operations osd_dt_ops = { .dt_commit_async = osd_commit_async, .dt_credit_get = osd_credit_get, .dt_init_capa_ctxt = osd_init_capa_ctxt, + .dt_init_quota_ctxt= osd_init_quota_ctxt, }; static void osd_object_read_lock(const struct lu_env *env, @@ -878,6 +1020,7 @@ static int capa_is_sane(const struct lu_env *env, struct lustre_capa_key *keys) { struct osd_thread_info *oti = osd_oti_get(env); + struct lustre_capa *tcapa = &oti->oti_capa; struct obd_capa *oc; int i, rc = 0; ENTRY; @@ -892,6 +1035,11 @@ static int capa_is_sane(const struct lu_env *env, RETURN(rc); } + if (capa_is_expired_sec(capa)) { + DEBUG_CAPA(D_ERROR, capa, "expired"); + RETURN(-ESTALE); + } + spin_lock(&capa_lock); for (i = 0; i < 2; i++) { if (keys[i].lk_keyid == capa->lc_keyid) { @@ -906,11 +1054,11 @@ static int capa_is_sane(const struct lu_env *env, RETURN(-ESTALE); } - rc = capa_hmac(oti->oti_capa.lc_hmac, capa, oti->oti_capa_key.lk_key); + rc = capa_hmac(tcapa->lc_hmac, capa, oti->oti_capa_key.lk_key); if (rc) RETURN(rc); - if (memcmp(oti->oti_capa.lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) - { + + if (memcmp(tcapa->lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) { DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); RETURN(-EACCES); } @@ -926,6 +1074,7 @@ static int osd_object_auth(const struct lu_env *env, struct dt_object *dt, { const struct lu_fid *fid = lu_object_fid(&dt->do_lu); struct osd_device *dev = osd_dev(dt->do_lu.lo_dev); + struct md_capainfo *ci; int rc; if (!dev->od_fl_capa) @@ -934,6 +1083,13 @@ static int osd_object_auth(const struct lu_env *env, struct dt_object *dt, if (capa == BYPASS_CAPA) return 0; + ci = md_capainfo(env); + if (unlikely(!ci)) + return 0; + + if (ci->mc_auth == LC_ID_NONE) + return 0; + if (!capa) { CERROR("no capability is provided for fid "DFID"\n", PFID(fid)); return -EACCES; @@ -984,6 +1140,7 @@ static int osd_attr_set(const struct lu_env *env, struct lustre_capa *capa) { struct osd_object *obj = osd_dt_obj(dt); + int rc; LASSERT(handle != NULL); LASSERT(dt_object_exists(dt)); @@ -993,11 +1150,12 @@ static int osd_attr_set(const struct lu_env *env, return -EACCES; spin_lock(&obj->oo_guard); - osd_inode_setattr(env, obj->oo_inode, attr); + rc = osd_inode_setattr(env, obj->oo_inode, attr); spin_unlock(&obj->oo_guard); - mark_inode_dirty(obj->oo_inode); - return 0; + if (!rc) + mark_inode_dirty(obj->oo_inode); + return rc; } static struct timespec *osd_inode_time(const struct lu_env *env, @@ -1012,8 +1170,8 @@ static struct timespec *osd_inode_time(const struct lu_env *env, return t; } -static void osd_inode_setattr(const struct lu_env *env, - struct inode *inode, const struct lu_attr *attr) +static int osd_inode_setattr(const struct lu_env *env, + struct inode *inode, const struct lu_attr *attr) { __u64 bits; @@ -1021,6 +1179,24 @@ static void osd_inode_setattr(const struct lu_env *env, LASSERT(!(bits & LA_TYPE)); /* Huh? You want too much. */ +#ifdef HAVE_QUOTA_SUPPORT + if ((bits & LA_UID && attr->la_uid != inode->i_uid) || + (bits & LA_GID && attr->la_gid != inode->i_gid)) { + struct osd_ctxt *save = &osd_oti_get(env)->oti_ctxt; + struct iattr iattr; + int rc; + + iattr.ia_valid = bits & (LA_UID | LA_GID); + iattr.ia_uid = attr->la_uid; + iattr.ia_gid = attr->la_gid; + osd_push_ctxt(env, save); + rc = DQUOT_TRANSFER(inode, &iattr) ? -EDQUOT : 0; + osd_pop_ctxt(save); + if (rc != 0) + return rc; + } +#endif + if (bits & LA_ATIME) inode->i_atime = *osd_inode_time(env, inode, attr->la_atime); if (bits & LA_CTIME) @@ -1031,8 +1207,14 @@ static void osd_inode_setattr(const struct lu_env *env, LDISKFS_I(inode)->i_disksize = attr->la_size; i_size_write(inode, attr->la_size); } +# if 0 + /* + * OSD should not change "i_blocks" which is used by quota. + * "i_blocks" should be changed by ldiskfs only. + * Disable this assignment until SOM to fix some EA field. */ if (bits & LA_BLOCKS) inode->i_blocks = attr->la_blocks; +#endif if (bits & LA_MODE) inode->i_mode = (inode->i_mode & S_IFMT) | (attr->la_mode & ~S_IFMT); @@ -1051,6 +1233,7 @@ static void osd_inode_setattr(const struct lu_env *env, li->i_flags = (li->i_flags & ~LDISKFS_FL_USER_MODIFIABLE) | (attr->la_flags & LDISKFS_FL_USER_MODIFIABLE); } + return 0; } /* @@ -1087,6 +1270,9 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, struct osd_thandle *oth; struct inode *parent; struct inode *inode; +#ifdef HAVE_QUOTA_SUPPORT + struct osd_ctxt *save = &info->oti_ctxt; +#endif LINVRNT(osd_invariant(obj)); LASSERT(obj->oo_inode == NULL); @@ -1101,7 +1287,13 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, parent = osd->od_obj_area->d_inode; LASSERT(parent->i_op != NULL); +#ifdef HAVE_QUOTA_SUPPORT + osd_push_ctxt(info->oti_env, save); +#endif inode = ldiskfs_create_inode(oth->ot_handle, parent, mode); +#ifdef HAVE_QUOTA_SUPPORT + osd_pop_ctxt(save); +#endif if (!IS_ERR(inode)) { obj->oo_inode = inode; result = 0; @@ -1271,13 +1463,16 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, } if (result == 0) { struct osd_inode_id *id = &info->oti_id; + struct md_ucred *uc = md_ucred(env); LASSERT(obj->oo_inode != NULL); + LASSERT(uc != NULL); id->oii_ino = obj->oo_inode->i_ino; id->oii_gen = obj->oo_inode->i_generation; - result = osd_oi_insert(info, &osd->od_oi, fid, id, th); + result = osd_oi_insert(info, &osd->od_oi, fid, id, th, + uc->mu_cap & CFS_CAP_SYS_RESOURCE_MASK); } LASSERT(ergo(result == 0, dt_object_exists(dt))); @@ -1471,6 +1666,7 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env, struct lustre_capa_key *key = &info->oti_capa_key; struct lustre_capa *capa = &info->oti_capa; struct obd_capa *oc; + struct md_capainfo *ci; int rc; ENTRY; @@ -1484,10 +1680,41 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env, if (old && osd_object_auth(env, dt, old, opc)) RETURN(ERR_PTR(-EACCES)); + ci = md_capainfo(env); + if (unlikely(!ci)) + RETURN(ERR_PTR(-ENOENT)); + + switch (ci->mc_auth) { + case LC_ID_NONE: + RETURN(NULL); + case LC_ID_PLAIN: + capa->lc_uid = obj->oo_inode->i_uid; + capa->lc_gid = obj->oo_inode->i_gid; + capa->lc_flags = LC_ID_PLAIN; + break; + case LC_ID_CONVERT: { + __u32 d[4], s[4]; + + s[0] = obj->oo_inode->i_uid; + get_random_bytes(&(s[1]), sizeof(__u32)); + s[2] = obj->oo_inode->i_gid; + get_random_bytes(&(s[3]), sizeof(__u32)); + rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN); + if (unlikely(rc)) + RETURN(ERR_PTR(rc)); + + capa->lc_uid = ((__u64)d[1] << 32) | d[0]; + capa->lc_gid = ((__u64)d[3] << 32) | d[2]; + capa->lc_flags = LC_ID_CONVERT; + break; + } + default: + RETURN(ERR_PTR(-EINVAL)); + } + capa->lc_fid = *fid; capa->lc_opc = opc; - capa->lc_uid = 0; - capa->lc_flags = dev->od_capa_alg << 24; + capa->lc_flags |= dev->od_capa_alg << 24; capa->lc_timeout = dev->od_capa_timeout; capa->lc_expiry = 0; @@ -1587,11 +1814,15 @@ static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt, static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, - struct thandle *handle, struct lustre_capa *capa) + struct thandle *handle, struct lustre_capa *capa, + int ignore_quota) { struct inode *inode = osd_dt_obj(dt)->oo_inode; struct osd_thandle *oh; ssize_t result; +#ifdef HAVE_QUOTA_SUPPORT + cfs_cap_t save = current->cap_effective; +#endif LASSERT(handle != NULL); @@ -1600,8 +1831,17 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); +#ifdef HAVE_QUOTA_SUPPORT + if (ignore_quota) + current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK; + else + current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; +#endif result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len, pos, oh->ot_handle); +#ifdef HAVE_QUOTA_SUPPORT + current->cap_effective = save; +#endif if (result == 0) result = buf->lb_len; return result; @@ -1795,12 +2035,16 @@ static int osd_index_lookup(const struct lu_env *env, struct dt_object *dt, static int osd_index_insert(const struct lu_env *env, struct dt_object *dt, const struct dt_rec *rec, const struct dt_key *key, - struct thandle *th, struct lustre_capa *capa) + struct thandle *th, struct lustre_capa *capa, + int ignore_quota) { struct osd_object *obj = osd_dt_obj(dt); struct iam_path_descr *ipd; struct osd_thandle *oh; struct iam_container *bag = &obj->oo_dir->od_container; +#ifdef HAVE_QUOTA_SUPPORT + cfs_cap_t save = current->cap_effective; +#endif int rc; ENTRY; @@ -1820,8 +2064,17 @@ static int osd_index_insert(const struct lu_env *env, struct dt_object *dt, oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); LASSERT(oh->ot_handle->h_transaction != NULL); +#ifdef HAVE_QUOTA_SUPPORT + if (ignore_quota) + current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK; + else + current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK; +#endif rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key, (struct iam_rec *)rec, ipd); +#ifdef HAVE_QUOTA_SUPPORT + current->cap_effective = save; +#endif osd_ipd_put(env, bag, ipd); LINVRNT(osd_invariant(obj)); RETURN(rc); @@ -2135,7 +2388,8 @@ static int osd_index_compat_insert(const struct lu_env *env, struct dt_object *dt, const struct dt_rec *rec, const struct dt_key *key, struct thandle *th, - struct lustre_capa *capa) + struct lustre_capa *capa, + int ignore_quota) { struct osd_object *obj = osd_dt_obj(dt); @@ -2392,7 +2646,7 @@ static int osd_process_config(const struct lu_env *env, RETURN(err); } extern void ldiskfs_orphan_cleanup (struct super_block * sb, - struct ldiskfs_super_block * es); + struct ldiskfs_super_block * es); static int osd_recovery_complete(const struct lu_env *env, struct lu_device *d) diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index bcfcd91..e187323 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -66,6 +66,16 @@ struct inode; +#define OSD_COUNTERS (0) + +#ifdef HAVE_QUOTA_SUPPORT +struct osd_ctxt { + __u32 oc_uid; + __u32 oc_gid; + __u32 oc_cap; +}; +#endif + /* * osd device. */ @@ -145,6 +155,9 @@ struct osd_thread_info { int oti_r_locks; int oti_w_locks; int oti_txns; +#ifdef HAVE_QUOTA_SUPPORT + struct osd_ctxt oti_ctxt; +#endif }; #ifdef LPROCFS diff --git a/lustre/osd/osd_oi.c b/lustre/osd/osd_oi.c index a2e086c..79d4082 100644 --- a/lustre/osd/osd_oi.c +++ b/lustre/osd/osd_oi.c @@ -195,7 +195,7 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, const struct osd_inode_id *id0, - struct thandle *th) + struct thandle *th, int ignore_quota) { struct dt_object *idx; struct osd_inode_id *id; @@ -210,7 +210,8 @@ int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, id->oii_gen = cpu_to_be32(id0->oii_gen); return idx->do_index_ops->dio_insert(info->oti_env, idx, (const struct dt_rec *)id, - key, th, BYPASS_CAPA); + key, th, BYPASS_CAPA, + ignore_quota); } int osd_oi_delete(struct osd_thread_info *info, diff --git a/lustre/osd/osd_oi.h b/lustre/osd/osd_oi.h index b1d2dc8..8e02eb2 100644 --- a/lustre/osd/osd_oi.h +++ b/lustre/osd/osd_oi.h @@ -98,7 +98,7 @@ int osd_oi_lookup(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, struct osd_inode_id *id); int osd_oi_insert(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, const struct osd_inode_id *id, - struct thandle *th); + struct thandle *th, int ingore_quota); int osd_oi_delete(struct osd_thread_info *info, struct osd_oi *oi, const struct lu_fid *fid, struct thandle *th); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index abb4f66..8b2b7fb 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -69,6 +69,18 @@ static int oss_num_create_threads; CFS_MODULE_PARM(oss_num_create_threads, "i", int, 0444, "number of OSS create threads to start"); +/** + * Do not return server-side uid/gid to remote client + */ +static void ost_drop_id(struct obd_export *exp, struct obdo *oa) +{ + if (exp_connect_rmtclient(exp)) { + oa->o_uid = -1; + oa->o_gid = -1; + oa->o_valid &= ~(OBD_MD_FLUID | OBD_MD_FLGID); + } +} + void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req) { struct oti_req_ack_lock *ack_lock; @@ -95,6 +107,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req, { struct ost_body *body, *repbody; __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; + struct lustre_capa *capa = NULL; int rc; ENTRY; @@ -115,6 +128,9 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req, ldlm_request_cancel(req, dlm, 0); } + if (body->oa.o_valid & OBD_MD_FLOSSCAPA) + capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 2); + rc = lustre_pack_reply(req, 2, size, NULL); if (rc) RETURN(rc); @@ -124,7 +140,7 @@ static int ost_destroy(struct obd_export *exp, struct ptlrpc_request *req, repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL); + req->rq_status = obd_destroy(exp, &body->oa, NULL, oti, NULL, capa); RETURN(0); } @@ -154,6 +170,7 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1); req->rq_status = obd_getattr(exp, &oinfo); + ost_drop_id(exp, &repbody->oa); RETURN(0); } @@ -320,6 +337,7 @@ static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req, ost_punch_lock_put(exp, oinfo.oi_oa, &lh); } repbody->oa = *oinfo.oi_oa; + ost_drop_id(exp, &repbody->oa); RETURN(rc); } @@ -348,6 +366,7 @@ static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req) memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size, repbody->oa.o_blocks, capa); + ost_drop_id(exp, &repbody->oa); RETURN(0); } @@ -378,6 +397,7 @@ static int ost_setattr(struct obd_export *exp, struct ptlrpc_request *req, oinfo.oi_capa = lustre_unpack_capa(req->rq_reqmsg, REQ_REC_OFF + 1); req->rq_status = obd_setattr(exp, &oinfo, oti); + ost_drop_id(exp, &repbody->oa); RETURN(0); } @@ -792,6 +812,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) repbody = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); + ost_drop_id(exp, &repbody->oa); } out_lock: @@ -843,6 +864,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) obd_count client_cksum = 0, server_cksum = 0; cksum_type_t cksum_type = OBD_CKSUM_CRC32; int no_reply = 0; + __u32 o_uid = 0, o_gid = 0; ENTRY; req->rq_bulk_write = 1; @@ -970,6 +992,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) body->oa.o_valid &= ~OBD_MD_FLGRANT; } + if (exp_connect_rmtclient(exp)) { + o_uid = body->oa.o_uid; + o_gid = body->oa.o_gid; + } npages = OST_THREAD_POOL_SIZE; rc = obd_preprw(OBD_BRW_WRITE, exp, &body->oa, objcount, ioo, remote_nb, &npages, local_nb, oti, capa); @@ -1065,6 +1091,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) /* Must commit after prep above in all cases */ rc = obd_commitrw(OBD_BRW_WRITE, exp, &repbody->oa, objcount, ioo, remote_nb, npages, local_nb, oti, rc); + if (exp_connect_rmtclient(exp)) { + repbody->oa.o_uid = o_uid; + repbody->oa.o_gid = o_gid; + } if (unlikely(client_cksum != server_cksum && rc == 0)) { int new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type); @@ -1230,26 +1260,25 @@ static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req) RETURN(rc); } +#ifdef HAVE_QUOTA_SUPPORT static int ost_handle_quotactl(struct ptlrpc_request *req) { struct obd_quotactl *oqctl, *repoqc; - __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*repoqc) }; int rc; ENTRY; - oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl), - lustre_swab_obd_quotactl); + oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); if (oqctl == NULL) GOTO(out, rc = -EPROTO); - rc = lustre_pack_reply(req, 2, size, NULL); + rc = req_capsule_server_pack(&req->rq_pill); if (rc) GOTO(out, rc); - repoqc = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repoqc)); - + repoqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL); req->rq_status = obd_quotactl(req->rq_export, oqctl); *repoqc = *oqctl; + out: RETURN(rc); } @@ -1265,15 +1294,38 @@ static int ost_handle_quotacheck(struct ptlrpc_request *req) RETURN(-EPROTO); rc = req_capsule_server_pack(&req->rq_pill); - if (rc) { - CERROR("ost: out of memory while packing quotacheck reply\n"); + if (rc) RETURN(-ENOMEM); - } req->rq_status = obd_quotacheck(req->rq_export, oqctl); RETURN(0); } +static int ost_handle_quota_adjust_qunit(struct ptlrpc_request *req) +{ + struct quota_adjust_qunit *oqaq, *repoqa; + struct lustre_quota_ctxt *qctxt; + int rc; + ENTRY; + + qctxt = &req->rq_export->exp_obd->u.obt.obt_qctxt; + oqaq = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT); + if (oqaq == NULL) + GOTO(out, rc = -EPROTO); + + rc = req_capsule_server_pack(&req->rq_pill); + if (rc) + GOTO(out, rc); + + repoqa = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT); + req->rq_status = obd_quota_adjust_qunit(req->rq_export, oqaq, qctxt); + *repoqa = *oqaq; + + out: + RETURN(rc); +} +#endif + static int ost_llog_handle_connect(struct obd_export *exp, struct ptlrpc_request *req) { @@ -1286,6 +1338,122 @@ static int ost_llog_handle_connect(struct obd_export *exp, RETURN(rc); } +#define ost_init_sec_none(reply, exp) \ +do { \ + reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_RMT_CLIENT_FORCE | \ + OBD_CONNECT_OSS_CAPA); \ + spin_lock(&exp->exp_lock); \ + exp->exp_connect_flags = reply->ocd_connect_flags; \ + spin_unlock(&exp->exp_lock); \ +} while (0) + +static int ost_init_sec_level(struct ptlrpc_request *req) +{ + struct obd_export *exp = req->rq_export; + struct req_capsule *pill = &req->rq_pill; + struct obd_device *obd = exp->exp_obd; + struct filter_obd *filter = &obd->u.filter; + char *client = libcfs_nid2str(req->rq_peer.nid); + struct obd_connect_data *data, *reply; + int rc = 0, remote; + ENTRY; + + data = req_capsule_client_get(pill, &RMF_CONNECT_DATA); + reply = req_capsule_server_get(pill, &RMF_CONNECT_DATA); + if (data == NULL || reply == NULL) + RETURN(-EFAULT); + + /* connection from MDT is always trusted */ + if (req->rq_auth_usr_mdt) { + ost_init_sec_none(reply, exp); + RETURN(0); + } + + /* no GSS support case */ + if (!req->rq_auth_gss) { + if (filter->fo_sec_level > LUSTRE_SEC_NONE) { + CWARN("client %s -> target %s does not user GSS, " + "can not run under security level %d.\n", + client, obd->obd_name, filter->fo_sec_level); + RETURN(-EACCES); + } else { + ost_init_sec_none(reply, exp); + RETURN(0); + } + } + + /* old version case */ + if (unlikely(!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) || + !(data->ocd_connect_flags & OBD_CONNECT_OSS_CAPA))) { + if (filter->fo_sec_level > LUSTRE_SEC_NONE) { + CWARN("client %s -> target %s uses old version, " + "can not run under security level %d.\n", + client, obd->obd_name, filter->fo_sec_level); + RETURN(-EACCES); + } else { + CWARN("client %s -> target %s uses old version, " + "run under security level %d.\n", + client, obd->obd_name, filter->fo_sec_level); + ost_init_sec_none(reply, exp); + RETURN(0); + } + } + + remote = data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT_FORCE; + if (remote) { + if (!req->rq_auth_remote) + CDEBUG(D_SEC, "client (local realm) %s -> target %s " + "asked to be remote.\n", client, obd->obd_name); + } else if (req->rq_auth_remote) { + remote = 1; + CDEBUG(D_SEC, "client (remote realm) %s -> target %s is set " + "as remote by default.\n", client, obd->obd_name); + } + + if (remote) { + if (!filter->fo_fl_oss_capa) { + CDEBUG(D_SEC, "client %s -> target %s is set as remote," + " but OSS capabilities are not enabled: %d.\n", + client, obd->obd_name, filter->fo_fl_oss_capa); + RETURN(-EACCES); + } + } + + switch (filter->fo_sec_level) { + case LUSTRE_SEC_NONE: + if (!remote) { + ost_init_sec_none(reply, exp); + break; + } else { + CDEBUG(D_SEC, "client %s -> target %s is set as remote, " + "can not run under security level %d.\n", + client, obd->obd_name, filter->fo_sec_level); + RETURN(-EACCES); + } + case LUSTRE_SEC_REMOTE: + if (!remote) + ost_init_sec_none(reply, exp); + break; + case LUSTRE_SEC_ALL: + if (!remote) { + reply->ocd_connect_flags &= ~(OBD_CONNECT_RMT_CLIENT | + OBD_CONNECT_RMT_CLIENT_FORCE); + if (!filter->fo_fl_oss_capa) + reply->ocd_connect_flags &= ~OBD_CONNECT_OSS_CAPA; + + spin_lock(&exp->exp_lock); + exp->exp_connect_flags = reply->ocd_connect_flags; + spin_unlock(&exp->exp_lock); + } + break; + default: + RETURN(-EINVAL); + } + + RETURN(rc); +} + static int filter_export_check_flavor(struct filter_obd *filter, struct obd_export *exp, struct ptlrpc_request *req) @@ -1382,8 +1550,11 @@ int ost_msg_check_version(struct lustre_msg *msg) case OST_SYNC: case OST_SET_INFO: case OST_GET_INFO: +#ifdef HAVE_QUOTA_SUPPORT case OST_QUOTACHECK: case OST_QUOTACTL: + case OST_QUOTA_ADJUST_QUNIT: +#endif rc = lustre_msg_check_version(msg, LUSTRE_OST_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1487,12 +1658,14 @@ int ost_handle(struct ptlrpc_request *req) if (OBD_FAIL_CHECK(OBD_FAIL_OST_CONNECT_NET2)) RETURN(0); if (!rc) { - struct obd_export *exp = req->rq_export; + rc = ost_init_sec_level(req); + if (!rc) { + struct obd_export *exp = req->rq_export; - obd = exp->exp_obd; - - rc = filter_export_check_flavor(&obd->u.filter, - exp, req); + obd = exp->exp_obd; + rc = filter_export_check_flavor(&obd->u.filter, + exp, req); + } } break; } @@ -1598,6 +1771,7 @@ int ost_handle(struct ptlrpc_request *req) DEBUG_REQ(D_INODE, req, "get_info"); rc = ost_get_info(req->rq_export, req); break; +#ifdef HAVE_QUOTA_SUPPORT case OST_QUOTACHECK: CDEBUG(D_INODE, "quotacheck\n"); req_capsule_set(&req->rq_pill, &RQF_OST_QUOTACHECK); @@ -1612,6 +1786,12 @@ int ost_handle(struct ptlrpc_request *req) RETURN(0); rc = ost_handle_quotactl(req); break; + case OST_QUOTA_ADJUST_QUNIT: + CDEBUG(D_INODE, "quota_adjust_qunit\n"); + req_capsule_set(&req->rq_pill, &RQF_OST_QUOTA_ADJUST_QUNIT); + rc = ost_handle_quota_adjust_qunit(req); + break; +#endif case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); req_capsule_set(&req->rq_pill, &RQF_OBD_PING); diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index f975d74..e663cea 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -105,6 +105,10 @@ static const struct req_msg_field *quotactl_only[] = { &RMF_PTLRPC_BODY, &RMF_OBD_QUOTACTL }; +static const struct req_msg_field *quota_adjust_qunit_only[] = { + &RMF_PTLRPC_BODY, + &RMF_QUOTA_ADJUST_QUNIT +}; static const struct req_msg_field *qunit_data_only[] = { &RMF_PTLRPC_BODY, @@ -240,7 +244,9 @@ static const struct req_msg_field *mds_last_unlink_server[] = { &RMF_PTLRPC_BODY, &RMF_MDT_BODY, &RMF_MDT_MD, - &RMF_LOGCOOKIES + &RMF_LOGCOOKIES, + &RMF_CAPA1, + &RMF_CAPA2 }; static const struct req_msg_field *mds_reint_setattr_client[] = { @@ -465,7 +471,8 @@ static const struct req_msg_field *ost_body_capa[] = { static const struct req_msg_field *ost_destroy_client[] = { &RMF_PTLRPC_BODY, &RMF_OST_BODY, - &RMF_DLM_REQ + &RMF_DLM_REQ, + &RMF_CAPA1 }; @@ -518,10 +525,10 @@ static const struct req_msg_field *ost_get_fiemap_server[] = { static const struct req_format *req_formats[] = { &RQF_OBD_PING, &RQF_SEC_CTX, - &RQF_SEQ_QUERY, - &RQF_FLD_QUERY, &RQF_MGS_TARGET_REG, &RQF_MGS_SET_INFO, + &RQF_SEQ_QUERY, + &RQF_FLD_QUERY, &RQF_MDS_CONNECT, &RQF_MDS_DISCONNECT, &RQF_MDS_SET_INFO, @@ -552,10 +559,12 @@ static const struct req_format *req_formats[] = { &RQF_MDS_QUOTACHECK, &RQF_MDS_QUOTACTL, &RQF_MDS_QUOTA_DQACQ, + &RQF_QC_CALLBACK, &RQF_OST_CONNECT, &RQF_OST_DISCONNECT, &RQF_OST_QUOTACHECK, &RQF_OST_QUOTACTL, + &RQF_OST_QUOTA_ADJUST_QUNIT, &RQF_OST_GETATTR, &RQF_OST_SETATTR, &RQF_OST_CREATE, @@ -670,6 +679,12 @@ const struct req_msg_field RMF_OBD_QUOTACTL = sizeof(struct obd_quotactl), lustre_swab_obd_quotactl); EXPORT_SYMBOL(RMF_OBD_QUOTACTL); +const struct req_msg_field RMF_QUOTA_ADJUST_QUNIT = + DEFINE_MSGF("quota_adjust_qunit", 0, + sizeof(struct quota_adjust_qunit), + lustre_swab_quota_adjust_qunit); +EXPORT_SYMBOL(RMF_QUOTA_ADJUST_QUNIT); + const struct req_msg_field RMF_QUNIT_DATA = DEFINE_MSGF("qunit_data", 0, sizeof(struct qunit_data), NULL); @@ -878,6 +893,14 @@ const struct req_format RQF_MGS_SET_INFO = mgs_set_info); EXPORT_SYMBOL(RQF_MGS_SET_INFO); +const struct req_format RQF_SEQ_QUERY = + DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server); +EXPORT_SYMBOL(RQF_SEQ_QUERY); + +const struct req_format RQF_FLD_QUERY = + DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); +EXPORT_SYMBOL(RQF_FLD_QUERY); + const struct req_format RQF_LOG_CANCEL = DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); EXPORT_SYMBOL(RQF_LOG_CANCEL); @@ -898,6 +921,11 @@ const struct req_format RQF_OST_QUOTACTL = DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only); EXPORT_SYMBOL(RQF_OST_QUOTACTL); +const struct req_format RQF_OST_QUOTA_ADJUST_QUNIT = + DEFINE_REQ_FMT0("OST_QUOTA_ADJUST_QUNIT", quota_adjust_qunit_only, + quota_adjust_qunit_only); +EXPORT_SYMBOL(RQF_OST_QUOTA_ADJUST_QUNIT); + const struct req_format RQF_QC_CALLBACK = DEFINE_REQ_FMT0("QC_CALLBACK", quotactl_only, empty); EXPORT_SYMBOL(RQF_QC_CALLBACK); @@ -906,14 +934,6 @@ const struct req_format RQF_MDS_QUOTA_DQACQ = DEFINE_REQ_FMT0("MDS_QUOTA_DQACQ", qunit_data_only, qunit_data_only); EXPORT_SYMBOL(RQF_MDS_QUOTA_DQACQ); -const struct req_format RQF_SEQ_QUERY = - DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server); -EXPORT_SYMBOL(RQF_SEQ_QUERY); - -const struct req_format RQF_FLD_QUERY = - DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); -EXPORT_SYMBOL(RQF_FLD_QUERY); - const struct req_format RQF_MDS_GETSTATUS = DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa); EXPORT_SYMBOL(RQF_MDS_GETSTATUS); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 519593c..29b805c 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -72,6 +72,7 @@ struct ll_rpc_opcode { { OST_SET_INFO, "ost_set_info" }, { OST_QUOTACHECK, "ost_quotacheck" }, { OST_QUOTACTL, "ost_quotactl" }, + { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" }, { MDS_GETATTR, "mds_getattr" }, { MDS_GETATTR_NAME, "mds_getattr_lock" }, { MDS_CLOSE, "mds_close" }, @@ -106,7 +107,7 @@ struct ll_rpc_opcode { { MGS_SET_INFO, "mgs_set_info" }, { OBD_PING, "obd_ping" }, { OBD_LOG_CANCEL, "llog_origin_handle_cancel" }, - { OBD_QC_CALLBACK, "obd_qc_callback" }, + { OBD_QC_CALLBACK, "obd_quota_callback" }, { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" }, { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" }, @@ -120,7 +121,9 @@ struct ll_rpc_opcode { { SEQ_QUERY, "seq_query" }, { SEC_CTX_INIT, "sec_ctx_init" }, { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" }, - { SEC_CTX_FINI, "sec_ctx_fini" } + { SEC_CTX_FINI, "sec_ctx_fini" }, + { QUOTA_DQACQ, "quota_acquire" }, + { QUOTA_DQREL, "quota_release" } }; struct ll_eopcode { @@ -132,12 +135,13 @@ struct ll_eopcode { { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" }, { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" }, { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" }, + { MDS_REINT_SETATTR, "mds_reint_setattr" }, { MDS_REINT_CREATE, "mds_reint_create" }, { MDS_REINT_LINK, "mds_reint_link" }, - { MDS_REINT_OPEN, "mds_reint_open" }, - { MDS_REINT_SETATTR, "mds_reint_setattr" }, - { MDS_REINT_RENAME, "mds_reint_rename" }, { MDS_REINT_UNLINK, "mds_reint_unlink" }, + { MDS_REINT_RENAME, "mds_reint_rename" }, + { MDS_REINT_OPEN, "mds_reint_open" }, + { MDS_REINT_SETXATTR, "mds_reint_setxattr" }, { BRW_READ_BYTES, "read_bytes" }, { BRW_WRITE_BYTES, "write_bytes" }, }; @@ -145,15 +149,19 @@ struct ll_eopcode { const char *ll_opcode2str(__u32 opcode) { /* When one of the assertions below fail, chances are that: - * 1) A new opcode was added in lustre_idl.h, but was - * is missing from the table above. + * 1) A new opcode was added in include/lustre/lustre_idl.h, + * but is missing from the table above. * or 2) The opcode space was renumbered or rearranged, * and the opcode_offset() function in * ptlrpc_internal.h needs to be modified. */ __u32 offset = opcode_offset(opcode); - LASSERT(offset < LUSTRE_MAX_OPCODES); - LASSERT(ll_rpc_opcode_table[offset].opcode == opcode); + LASSERTF(offset < LUSTRE_MAX_OPCODES, + "offset %u >= LUSTRE_MAX_OPCODES %u\n", + offset, LUSTRE_MAX_OPCODES); + LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode, + "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n", + offset, ll_rpc_opcode_table[offset].opcode, opcode); return ll_rpc_opcode_table[offset].opname; } diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 5cf4caa..61fbe25 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -733,6 +733,7 @@ void *lustre_swab_buf(struct lustre_msg *msg, int index, int min_size, { void *ptr = NULL; + LASSERT(msg != NULL); switch (msg->lm_magic) { case LUSTRE_MSG_MAGIC_V2: case LUSTRE_MSG_MAGIC_V2_SWABBED: @@ -1744,6 +1745,15 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q) lustre_swab_obd_dqblk (&q->qc_dqblk); } +void lustre_swab_quota_adjust_qunit (struct quota_adjust_qunit *q) +{ + __swab32s (&q->qaq_flags); + __swab32s (&q->qaq_id); + __swab64s (&q->qaq_bunit_sz); + __swab64s (&q->qaq_iunit_sz); + __swab64s (&q->padding1); +} + void lustre_swab_mds_remote_perm (struct mds_remote_perm *p) { __swab32s (&p->rp_uid); @@ -2104,54 +2114,92 @@ void lustre_swab_qdata(struct qunit_data *d) __swab32s (&d->qd_id); __swab32s (&d->qd_flags); __swab64s (&d->qd_count); -} - -void lustre_swab_qdata_old(struct qunit_data_old *d) -{ - __swab32s (&d->qd_id); - __swab32s (&d->qd_type); - __swab32s (&d->qd_count); - __swab32s (&d->qd_isblk); + __swab64s (&d->qd_qunit); + __swab64s (&d->padding); } #ifdef __KERNEL__ -struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d) + +/** + * got qdata from request(req/rep) + */ +int quota_get_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp) { - struct qunit_data_old tmp; - struct qunit_data *ret; - ENTRY; + struct ptlrpc_request *req = (struct ptlrpc_request *)request; + struct qunit_data *new; + __u64 flags = is_exp ? req->rq_export->exp_connect_flags : + req->rq_import->imp_connect_data.ocd_connect_flags; + int rc = 0; - if (!d) - return NULL; + LASSERT(req); + LASSERT(qdata); - tmp = *d; - ret = (struct qunit_data *)d; - ret->qd_id = tmp.qd_id; - ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0); - ret->qd_count = tmp.qd_count; - RETURN(ret); + /* support for quota64 and change_qs */ + if (flags & OBD_CONNECT_CHANGE_QS) { + if (!(flags & OBD_CONNECT_QUOTA64)) { + CDEBUG(D_ERROR, "Wire protocol for qunit is broken!\n"); + return -EINVAL; + } + if (is_req == QUOTA_REQUEST) + new = lustre_swab_reqbuf(req, REQ_REC_OFF, + sizeof(struct qunit_data), + lustre_swab_qdata); + else + new = lustre_swab_repbuf(req, REPLY_REC_OFF, + sizeof(struct qunit_data), + lustre_swab_qdata); + if (new == NULL) + GOTO(out, rc = -EPROTO); + *qdata = *new; + QDATA_SET_CHANGE_QS(qdata); + return 0; + } else { + QDATA_CLR_CHANGE_QS(qdata); + } +out: + return rc; } -EXPORT_SYMBOL(lustre_quota_old_to_new); +EXPORT_SYMBOL(quota_get_qdata); -struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d) +/** + * copy qdata to request(req/rep) + */ +int quota_copy_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp) { - struct qunit_data tmp; - struct qunit_data_old *ret; - ENTRY; + struct ptlrpc_request *req = (struct ptlrpc_request *)request; + void *target; + __u64 flags = is_exp ? req->rq_export->exp_connect_flags : + req->rq_import->imp_connect_data.ocd_connect_flags; + int rc = 0; - if (!d) - return NULL; + LASSERT(req); + LASSERT(qdata); - tmp = *d; - ret = (struct qunit_data_old *)d; - ret->qd_id = tmp.qd_id; - ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA); - ret->qd_count = (__u32)tmp.qd_count; - ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0); - RETURN(ret); + /* support for quota64 and change_qs */ + if (flags & OBD_CONNECT_CHANGE_QS) { + if (!(flags & OBD_CONNECT_QUOTA64)) { + CERROR("Wire protocol for qunit is broken!\n"); + return -EINVAL; + } + if (is_req == QUOTA_REQUEST) + target = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof(struct qunit_data)); + else + target = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data)); + if (!target) + GOTO(out, rc = -EPROTO); + memcpy(target, qdata, sizeof(*qdata)); + return 0; + } + +out: + return rc; } -EXPORT_SYMBOL(lustre_quota_new_to_old); +EXPORT_SYMBOL(quota_copy_qdata); #endif /* __KERNEL__ */ static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req) @@ -2222,11 +2270,12 @@ void lustre_swab_lustre_capa(struct lustre_capa *c) { lustre_swab_lu_fid(&c->lc_fid); __swab64s (&c->lc_opc); - __swab32s (&c->lc_uid); + __swab64s (&c->lc_uid); + __swab64s (&c->lc_gid); __swab32s (&c->lc_flags); __swab32s (&c->lc_keyid); __swab32s (&c->lc_timeout); - __swab64s (&c->lc_expiry); + __swab32s (&c->lc_expiry); } void lustre_swab_lustre_capa_key (struct lustre_capa_key *k) diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 76e0727..d8e7c86 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -264,7 +264,7 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc); EXPORT_SYMBOL(lustre_swab_ldlm_request); EXPORT_SYMBOL(lustre_swab_ldlm_reply); EXPORT_SYMBOL(lustre_swab_qdata); -EXPORT_SYMBOL(lustre_swab_qdata_old); +EXPORT_SYMBOL(lustre_swab_quota_adjust_qunit); EXPORT_SYMBOL(lustre_msg_get_flags); EXPORT_SYMBOL(lustre_msg_add_flags); EXPORT_SYMBOL(lustre_msg_set_flags); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 3f2458d..0045ddd 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -110,7 +110,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) */ list_for_each_safe(tmp, pos, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); - + /* If need to resend the last sent transno (because a reconnect has occurred), then stop on the matching req and send it again. If, however, the last sent @@ -257,7 +257,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) spin_lock(&imp->imp_lock); imp->imp_deactive = 0; spin_unlock(&imp->imp_lock); - + CDEBUG(D_HA, "setting import %s VALID\n", obd2cli_tgt(imp->imp_obd)); rc = ptlrpc_recover_import(imp, NULL); diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index 9db1780..4f72364 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -2271,35 +2271,6 @@ EXPORT_SYMBOL(sec2target_str); * crypto API helper/alloc blkciper * ****************************************/ -#ifdef __KERNEL__ -#ifndef HAVE_ASYNC_BLOCK_CIPHER -struct ll_crypto_cipher *ll_crypto_alloc_blkcipher(const char * algname, - u32 type, u32 mask) -{ - char buf[CRYPTO_MAX_ALG_NAME + 1]; - const char *pan = algname; - u32 flag = 0; - - if (strncmp("cbc(", algname, 4) == 0) - flag |= CRYPTO_TFM_MODE_CBC; - else if (strncmp("ecb(", algname, 4) == 0) - flag |= CRYPTO_TFM_MODE_ECB; - if (flag) { - char *vp = strnchr(algname, CRYPTO_MAX_ALG_NAME, ')'); - if (vp) { - memcpy(buf, algname + 4, vp - algname - 4); - buf[vp - algname - 4] = '\0'; - pan = buf; - } else { - flag = 0; - } - } - return crypto_alloc_tfm(pan, flag); -} -EXPORT_SYMBOL(ll_crypto_alloc_blkcipher); -#endif -#endif - /**************************************** * initialize/finalize * ****************************************/ diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 2381e72..d8d737d 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1415,7 +1415,6 @@ static int ptlrpc_main(void *arg) struct ptlrpc_thread *thread = data->thread; struct obd_device *dev = data->dev; struct ptlrpc_reply_state *rs; - struct lc_watchdog *watchdog; #ifdef WITH_GROUP_INFO struct group_info *ginfo = NULL; #endif @@ -1483,9 +1482,10 @@ static int ptlrpc_main(void *arg) */ cfs_waitq_signal(&thread->t_ctl_waitq); - watchdog = lc_watchdog_add(max_t(int, obd_timeout, AT_OFF ? 0 : - at_get(&svc->srv_at_estimate)) * - svc->srv_watchdog_factor, NULL, NULL); + thread->t_watchdog = lc_watchdog_add(max_t(int, obd_timeout, AT_OFF ? 0 : + at_get(&svc->srv_at_estimate)) + * svc->srv_watchdog_factor, + NULL, NULL); spin_lock(&svc->srv_lock); svc->srv_threads_running++; @@ -1493,8 +1493,8 @@ static int ptlrpc_main(void *arg) spin_unlock(&svc->srv_lock); cfs_waitq_signal(&svc->srv_free_rs_waitq); - CDEBUG(D_NET, "service thread %d (#%d)started\n", thread->t_id, - svc->srv_threads_running); + CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id, + svc->srv_threads_running); /* XXX maintain a list of all managed devices: insert here */ @@ -1504,7 +1504,7 @@ static int ptlrpc_main(void *arg) struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout, ptlrpc_retry_rqbds, svc); - lc_watchdog_disable(watchdog); + lc_watchdog_disable(thread->t_watchdog); cond_resched(); @@ -1521,7 +1521,7 @@ static int ptlrpc_main(void *arg) svc->srv_at_check, &lwi); - lc_watchdog_touch_ms(watchdog, max_t(int, obd_timeout, + lc_watchdog_touch_ms(thread->t_watchdog, max_t(int, obd_timeout, AT_OFF ? 0 : at_get(&svc->srv_at_estimate)) * svc->srv_watchdog_factor); @@ -1568,7 +1568,8 @@ static int ptlrpc_main(void *arg) } } - lc_watchdog_delete(watchdog); + lc_watchdog_delete(thread->t_watchdog); + thread->t_watchdog = NULL; out_srv_fini: /* diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index b1308fb..d35ec0d 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -129,7 +129,9 @@ void lustre_assert_wire_constants(void) (long long)OST_QUOTACHECK); LASSERTF(OST_QUOTACTL == 19, " found %lld\n", (long long)OST_QUOTACTL); - LASSERTF(OST_LAST_OPC == 20, " found %lld\n", + LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n", + (long long)OST_QUOTA_ADJUST_QUNIT); + LASSERTF(OST_LAST_OPC == 21, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -253,9 +255,9 @@ void lustre_assert_wire_constants(void) (long long)OBD_QC_CALLBACK); LASSERTF(OBD_LAST_OPC == 403, " found %lld\n", (long long)OBD_LAST_OPC); - LASSERTF(QUOTA_DQACQ == 601, " found %lld\n", + LASSERTF(QUOTA_DQACQ == 901, " found %lld\n", (long long)QUOTA_DQACQ); - LASSERTF(QUOTA_DQREL == 602, " found %lld\n", + LASSERTF(QUOTA_DQREL == 902, " found %lld\n", (long long)QUOTA_DQREL); LASSERTF(MGS_CONNECT == 250, " found %lld\n", (long long)MGS_CONNECT); @@ -461,8 +463,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL); CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL); CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL); - CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL); - CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL); CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL); CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL); CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL); @@ -2145,7 +2147,7 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); /* Checks for struct qunit_data */ - LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", + LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n", (long long)(int)sizeof(struct qunit_data)); LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n", (long long)(int)offsetof(struct qunit_data, qd_id)); @@ -2159,26 +2161,38 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct qunit_data, qd_count)); LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n", (long long)(int)sizeof(((struct qunit_data *)0)->qd_count)); - - /* Checks for struct qunit_data_old */ - LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n", - (long long)(int)sizeof(struct qunit_data_old)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_id) == 0, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_id)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_id) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_id)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_type) == 4, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_type)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_type) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_type)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_count) == 8, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_count)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_count) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_count)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_isblk) == 12, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_isblk)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk)); + LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_qunit)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit)); + LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, padding)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->padding)); + + /* Checks for struct quota_adjust_qunit */ + LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n", + (long long)(int)sizeof(struct quota_adjust_qunit)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, padding1) == 24, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, padding1)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->padding1) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->padding1)); /* Checks for struct mgs_target_info */ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n", diff --git a/lustre/quota/Makefile.in b/lustre/quota/Makefile.in index e42dff1..f052b42 100644 --- a/lustre/quota/Makefile.in +++ b/lustre/quota/Makefile.in @@ -1,7 +1,7 @@ MODULES := lquota lquota-objs := quota_check.o quota_context.o quota_ctl.o quota_interface.o -lquota-objs += quota_master.o +lquota-objs += quota_master.o quota_adjust_qunit.o lproc_quota.o @INCLUDE_RULES@ diff --git a/lustre/quota/autoMakefile.am b/lustre/quota/autoMakefile.am index e070f4c..9a20d28 100644 --- a/lustre/quota/autoMakefile.am +++ b/lustre/quota/autoMakefile.am @@ -36,12 +36,12 @@ if LIBLUSTRE noinst_LIBRARIES = libquota.a -libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c +libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c quota_adjust_qunit.c libquota_a_CPPFLAGS = $(LLCPPFLAGS) libquota_a_CFLAGS = $(LLCFLAGS) endif -if MODULES +if QUOTA modulefs_DATA = lquota$(KMODEXT) endif diff --git a/lustre/quota/lproc_quota.c b/lustre/quota/lproc_quota.c new file mode 100644 index 0000000..a76807a --- /dev/null +++ b/lustre/quota/lproc_quota.c @@ -0,0 +1,667 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +#define DEBUG_SUBSYSTEM S_LQUOTA + +#include +#include +#include +#include +#include + +#include "quota_internal.h" + +#ifdef HAVE_QUOTA_SUPPORT + +#ifdef LPROCFS +int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_bunit_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_bunit); + +int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val % QUOTABLOCK_SIZE || + val <= obd->u.obt.obt_qctxt.lqc_btune_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_bunit_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_bunit); + +int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_btune_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_btune); + +int lprocfs_quota_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || + val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_btune_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_btune); + +int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_iunit_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_iunit); + +int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_iunit_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_iunit); + +int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_itune_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_itune); + +int lprocfs_quota_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= MIN_QLIMIT || + val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_itune_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_itune); + +#define USER_QUOTA 1 +#define GROUP_QUOTA 2 + +#define MAX_STYPE_SIZE 5 + +int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + char stype[MAX_STYPE_SIZE + 1] = ""; + int oq_type; + struct obd_device_target *obt; + + LASSERT(obd != NULL); + + obt = &obd->u.obt; + + /* Collect the needed information */ + oq_type = obd->u.obt.obt_qctxt.lqc_flags; + + /* Transform the collected data into a user-readable string */ + if (oq_type & LQC_USRQUOTA_FLAG) + strcat(stype, "u"); + if (oq_type & LQC_GRPQUOTA_FLAG) + strcat(stype, "g"); + + strcat(stype, "3"); + + return snprintf(page, count, "%s\n", stype); +} +EXPORT_SYMBOL(lprocfs_quota_rd_type); + +static int auto_quota_on(struct obd_device *obd, int type, + struct super_block *sb, int is_master) +{ + struct obd_quotactl *oqctl; + struct lvfs_run_ctxt saved; + int rc = 0, id; + struct obd_device_target *obt; + ENTRY; + + LASSERT(type == USRQUOTA || type == GRPQUOTA || type == UGQUOTA); + + obt = &obd->u.obt; + + OBD_ALLOC_PTR(oqctl); + if (!oqctl) + RETURN(-ENOMEM); + + if (!atomic_dec_and_test(&obt->obt_quotachecking)) { + CDEBUG(D_INFO, "other people are doing quotacheck\n"); + atomic_inc(&obt->obt_quotachecking); + RETURN(-EBUSY); + } + + id = UGQUOTA2LQC(type); + /* quota already turned on */ + if ((obt->obt_qctxt.lqc_flags & id) == id) { + rc = 0; + goto out; + } + + oqctl->qc_type = type; + oqctl->qc_cmd = Q_QUOTAON; + oqctl->qc_id = obt->obt_qfmt; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (is_master) { + struct mds_obd *mds = &obd->u.mds; + + down(&mds->mds_qonoff_sem); + /* turn on cluster wide quota */ + rc = mds_admin_quota_on(obd, oqctl); + if (rc) + CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, + "auto-enable admin quota failed. rc=%d\n", rc); + up(&mds->mds_qonoff_sem); + + } + if (!rc) { + /* turn on local quota */ + rc = fsfilt_quotactl(obd, sb, oqctl); + if (rc) + CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, + "auto-enable local quota failed. rc=%d\n", rc); + else + obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(type); + } + + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + +out: + atomic_inc(&obt->obt_quotachecking); + + OBD_FREE_PTR(oqctl); + RETURN(rc); +} + +int lprocfs_quota_wr_type(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + struct obd_device_target *obt; + int type = 0, is_mds; + unsigned long i; + char stype[MAX_STYPE_SIZE + 1] = ""; + + LASSERT(obd != NULL); + + obt = &obd->u.obt; + + is_mds = !strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME); + + if (count > MAX_STYPE_SIZE) + return -EINVAL; + + if (copy_from_user(stype, buffer, count)) + return -EFAULT; + + for (i = 0 ; i < count ; i++) { + switch (stype[i]) { + case 'u' : + type |= USER_QUOTA; + break; + case 'g' : + type |= GROUP_QUOTA; + break; + case '1' : + case '2' : + CWARN("quota_type options 1 and 2 are obsolete, " + "they will be ignored\n"); + break; + case '3' : /* the only valid version spec, do nothing */ + default : /* just skip stray symbols like \n */ + break; + } + } + + if (type != 0) + auto_quota_on(obd, type - 1, obt->obt_sb, is_mds); + + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_type); + +int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%d\n", + obd->u.obt.obt_qctxt.lqc_switch_seconds); +} +EXPORT_SYMBOL(lprocfs_quota_rd_switch_seconds); + +int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= 10) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_switch_seconds = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_switch_seconds); + +int lprocfs_quota_rd_sync_blk(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%d\n", + obd->u.obt.obt_qctxt.lqc_sync_blk); +} +EXPORT_SYMBOL(lprocfs_quota_rd_sync_blk); + +int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 0) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_sync_blk = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_sync_blk); + +int lprocfs_quota_rd_switch_qs(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "changing qunit size is %s\n", + obd->u.obt.obt_qctxt.lqc_switch_qs ? + "enabled" : "disabled"); +} +EXPORT_SYMBOL(lprocfs_quota_rd_switch_qs); + +int lprocfs_quota_wr_switch_qs(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val) + obd->u.obt.obt_qctxt.lqc_switch_qs = 1; + else + obd->u.obt.obt_qctxt.lqc_switch_qs = 0; + + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_switch_qs); + +int lprocfs_quota_rd_boundary_factor(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor); +} +EXPORT_SYMBOL(lprocfs_quota_rd_boundary_factor); + +int lprocfs_quota_wr_boundary_factor(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 2) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_boundary_factor); + +int lprocfs_quota_rd_least_bunit(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_least_bunit); +} +EXPORT_SYMBOL(lprocfs_quota_rd_least_bunit); + +int lprocfs_quota_wr_least_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < PTLRPC_MAX_BRW_SIZE || + val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_least_bunit = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_least_bunit); + +int lprocfs_quota_rd_least_iunit(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_least_iunit); +} +EXPORT_SYMBOL(lprocfs_quota_rd_least_iunit); + +int lprocfs_quota_wr_least_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 1 || val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_least_iunit = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_least_iunit); + +int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_qs_factor); +} +EXPORT_SYMBOL(lprocfs_quota_rd_qs_factor); + +int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 2) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_qs_factor = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_qs_factor); + +struct lprocfs_vars lprocfs_quota_common_vars[] = { + { "quota_bunit_sz", lprocfs_quota_rd_bunit, + lprocfs_quota_wr_bunit, 0}, + { "quota_btune_sz", lprocfs_quota_rd_btune, + lprocfs_quota_wr_btune, 0}, + { "quota_iunit_sz", lprocfs_quota_rd_iunit, + lprocfs_quota_wr_iunit, 0}, + { "quota_itune_sz", lprocfs_quota_rd_itune, + lprocfs_quota_wr_itune, 0}, + { "quota_type", lprocfs_quota_rd_type, + lprocfs_quota_wr_type, 0}, + { "quota_switch_seconds", lprocfs_quota_rd_switch_seconds, + lprocfs_quota_wr_switch_seconds, 0 }, + { "quota_sync_blk", lprocfs_quota_rd_sync_blk, + lprocfs_quota_wr_sync_blk, 0}, +}; + +struct lprocfs_vars lprocfs_quota_master_vars[] = { + { "quota_switch_qs", lprocfs_quota_rd_switch_qs, + lprocfs_quota_wr_switch_qs, 0 }, + { "quota_boundary_factor", lprocfs_quota_rd_boundary_factor, + lprocfs_quota_wr_boundary_factor, 0 }, + { "quota_least_bunit", lprocfs_quota_rd_least_bunit, + lprocfs_quota_wr_least_bunit, 0 }, + { "quota_least_iunit", lprocfs_quota_rd_least_iunit, + lprocfs_quota_wr_least_iunit, 0 }, + { "quota_qs_factor", lprocfs_quota_rd_qs_factor, + lprocfs_quota_wr_qs_factor, 0 }, +}; + +int lquota_proc_setup(struct obd_device *obd, int is_master) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + int rc = 0; + ENTRY; + + LASSERT(lquota_type_proc_dir && obd); + qctxt->lqc_proc_dir = lprocfs_register(obd->obd_name, + lquota_type_proc_dir, + lprocfs_quota_common_vars, obd); + if (IS_ERR(qctxt->lqc_proc_dir)) { + rc = PTR_ERR(qctxt->lqc_proc_dir); + CERROR("error %d setting up lprocfs for %s\n", rc, + obd->obd_name); + qctxt->lqc_proc_dir = NULL; + GOTO(out, rc); + } + + if (is_master) { + rc = lprocfs_add_vars(qctxt->lqc_proc_dir, + lprocfs_quota_master_vars, obd); + if (rc) { + CERROR("error %d setting up lprocfs for %s" + "(quota master)\n", rc, obd->obd_name); + GOTO(out_free_proc, rc); + } + } + + qctxt->lqc_stats = lprocfs_alloc_stats(LQUOTA_LAST_STAT - + LQUOTA_FIRST_STAT, 0); + if (!qctxt->lqc_stats) + GOTO(out_free_proc, rc = -ENOMEM); + + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_SYNC_ACQ, + LPROCFS_CNTR_AVGMINMAX, "sync_acq_req", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_SYNC_REL, + LPROCFS_CNTR_AVGMINMAX, "sync_rel_req", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ASYNC_ACQ, + LPROCFS_CNTR_AVGMINMAX, "async_acq_req", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ASYNC_REL, + LPROCFS_CNTR_AVGMINMAX, "async_rel_req", "us"); + + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_CHK_BLK, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_blk_quota(lquota_chkquota)", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_CHK_INO, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_ino_quota(lquota_chkquota)", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_COMMIT_BLK, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_blk_quota(lquota_pending_commit)", + "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_FOR_COMMIT_INO, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_ino_quota(lquota_pending_commit)", + "us"); + + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_PENDING_BLK_QUOTA, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_pending_blk_quota_req" + "(qctxt_wait_pending_dqacq)", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_WAIT_PENDING_INO_QUOTA, + LPROCFS_CNTR_AVGMINMAX, + "wait_for_pending_ino_quota_req" + "(qctxt_wait_pending_dqacq)", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_NOWAIT_PENDING_BLK_QUOTA, + LPROCFS_CNTR_AVGMINMAX, + "nowait_for_pending_blk_quota_req" + "(qctxt_wait_pending_dqacq)", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_NOWAIT_PENDING_INO_QUOTA, + LPROCFS_CNTR_AVGMINMAX, + "nowait_for_pending_ino_quota_req" + "(qctxt_wait_pending_dqacq)", "us"); + + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_QUOTA_CTL, + LPROCFS_CNTR_AVGMINMAX, "quota_ctl", "us"); + lprocfs_counter_init(qctxt->lqc_stats, LQUOTA_ADJUST_QUNIT, + LPROCFS_CNTR_AVGMINMAX, "adjust_qunit", "us"); + + lprocfs_register_stats(qctxt->lqc_proc_dir, "stats", qctxt->lqc_stats); + + RETURN(rc); + +out_free_proc: + lprocfs_remove(&qctxt->lqc_proc_dir); +out: + RETURN(rc); +} + +int lquota_proc_cleanup(struct lustre_quota_ctxt *qctxt) +{ + if (!qctxt || !qctxt->lqc_proc_dir) + return -EINVAL; + + if (qctxt->lqc_stats != NULL) + lprocfs_free_stats(&qctxt->lqc_stats); + + lprocfs_remove(&qctxt->lqc_proc_dir); + return 0; +} + +#endif /* LPROCFS */ +#endif diff --git a/lustre/quota/quota_adjust_qunit.c b/lustre/quota/quota_adjust_qunit.c new file mode 100644 index 0000000..20ee26b --- /dev/null +++ b/lustre/quota/quota_adjust_qunit.c @@ -0,0 +1,419 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LQUOTA + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# include +# include +# include +# else +# include +# endif +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include "quota_internal.h" + +#ifdef HAVE_QUOTA_SUPPORT + +#ifdef __KERNEL__ +/** + * This function is charge of recording lqs_ino_rec and + * lqs_blk_rec. when a lquota slave checks a quota + * request(check_cur_qunit) and finishes a quota + * request(dqacq_completion), it will be called. + * is_chk: whether it is checking quota; otherwise, it is finishing + * is_acq: whether it is acquiring; otherwise, it is releasing + */ +void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs, + int is_chk, int is_acq) +{ + int is_blk; + + LASSERT(qdata && lqs); + LASSERT_SPIN_LOCKED(&lqs->lqs_lock); + is_blk = QDATA_IS_BLK(qdata); + + if (is_chk) { + if (is_acq) { + if (is_blk) + lqs->lqs_blk_rec += qdata->qd_count; + else + lqs->lqs_ino_rec += qdata->qd_count; + } else { + if (is_blk) + lqs->lqs_blk_rec -= qdata->qd_count; + else + lqs->lqs_ino_rec -= qdata->qd_count; + } + } else { + if (is_acq) { + if (is_blk) + lqs->lqs_blk_rec -= qdata->qd_count; + else + lqs->lqs_ino_rec -= qdata->qd_count; + } else { + if (is_blk) + lqs->lqs_blk_rec += qdata->qd_count; + else + lqs->lqs_ino_rec += qdata->qd_count; + } + } +} + +void qdata_to_oqaq(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq) +{ + LASSERT(qdata); + LASSERT(oqaq); + + oqaq->qaq_flags = qdata->qd_flags; + oqaq->qaq_id = qdata->qd_id; + if (QDATA_IS_ADJBLK(qdata)) + oqaq->qaq_bunit_sz = qdata->qd_qunit; + if (QDATA_IS_ADJINO(qdata)) + oqaq->qaq_iunit_sz = qdata->qd_qunit; +} + +int quota_search_lqs(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return) +{ + struct quota_adjust_qunit *oqaq_tmp = NULL; + ENTRY; + + LASSERT(*lqs_return == NULL); + LASSERT(oqaq || qdata); + + if (!oqaq) { + OBD_ALLOC_PTR(oqaq_tmp); + if (!oqaq_tmp) + RETURN(-ENOMEM); + qdata_to_oqaq(qdata, oqaq_tmp); + } else { + oqaq_tmp = oqaq; + } + + *lqs_return = lustre_hash_lookup(qctxt->lqc_lqs_hash, oqaq_tmp); + if (*lqs_return) + LQS_DEBUG((*lqs_return), "show lqs\n"); + + if (!oqaq) + OBD_FREE_PTR(oqaq_tmp); + RETURN(0); +} + +int quota_create_lqs(struct qunit_data *qdata, struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return) +{ + struct lustre_qunit_size *lqs = NULL; + int rc = 0; + ENTRY; + + LASSERT(*lqs_return == NULL); + LASSERT(oqaq || qdata); + + OBD_ALLOC_PTR(lqs); + if (!lqs) + GOTO(out, rc = -ENOMEM); + + if (!oqaq) + qdata_to_oqaq(qdata, &lqs->lqs_key); + else + lqs->lqs_key = *oqaq; + + spin_lock_init(&lqs->lqs_lock); + lqs->lqs_bwrite_pending = 0; + lqs->lqs_iwrite_pending = 0; + lqs->lqs_ino_rec = 0; + lqs->lqs_blk_rec = 0; + lqs->lqs_id = lqs->lqs_key.qaq_id; + lqs->lqs_flags = QAQ_IS_GRP(&lqs->lqs_key); + lqs->lqs_bunit_sz = qctxt->lqc_bunit_sz; + lqs->lqs_iunit_sz = qctxt->lqc_iunit_sz; + lqs->lqs_btune_sz = qctxt->lqc_btune_sz; + lqs->lqs_itune_sz = qctxt->lqc_itune_sz; + lqs->lqs_ctxt = qctxt; + if (qctxt->lqc_handler) { + lqs->lqs_last_bshrink = 0; + lqs->lqs_last_ishrink = 0; + } + lqs_initref(lqs); + rc = lustre_hash_add_unique(qctxt->lqc_lqs_hash, + &lqs->lqs_key, &lqs->lqs_hash); + LQS_DEBUG(lqs, "create lqs\n"); + if (!rc) { + lqs_getref(lqs); + *lqs_return = lqs; + } +out: + if (rc && lqs) + OBD_FREE_PTR(lqs); + RETURN(rc); +} + +int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt) +{ + struct lustre_qunit_size *lqs = NULL; + unsigned long *lbunit, *liunit, *lbtune, *litune; + signed long b_tmp = 0, i_tmp = 0; + cfs_time_t time_limit = 0; + int rc = 0; + ENTRY; + + LASSERT(qctxt); +search_lqs: + rc = quota_search_lqs(NULL, oqaq, qctxt, &lqs); + + /* deleting the lqs, because a user sets lfs quota 0 0 0 0 */ + if (!oqaq->qaq_bunit_sz && !oqaq->qaq_iunit_sz && QAQ_IS_ADJBLK(oqaq) && + QAQ_IS_ADJINO(oqaq)) { + if (lqs) { + LQS_DEBUG(lqs, "release lqs\n"); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + /* kill lqs */ + lqs_putref(lqs); + } + RETURN(rc); + } + + if (!lqs) { + rc = quota_create_lqs(NULL, oqaq, qctxt, &lqs); + if (rc == -EALREADY) + goto search_lqs; + if (rc < 0) + RETURN(rc); + } + + lbunit = &lqs->lqs_bunit_sz; + liunit = &lqs->lqs_iunit_sz; + lbtune = &lqs->lqs_btune_sz; + litune = &lqs->lqs_itune_sz; + + CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit); + spin_lock(&lqs->lqs_lock); + /* adjust the slave's block qunit size */ + if (QAQ_IS_ADJBLK(oqaq)) { + cfs_duration_t sec = cfs_time_seconds(qctxt->lqc_switch_seconds); + + b_tmp = *lbunit - oqaq->qaq_bunit_sz; + + if (qctxt->lqc_handler && b_tmp > 0) + lqs->lqs_last_bshrink = cfs_time_current(); + + if (qctxt->lqc_handler && b_tmp < 0) { + time_limit = cfs_time_add(lqs->lqs_last_bshrink, sec); + if (!lqs->lqs_last_bshrink || + cfs_time_after(cfs_time_current(), time_limit)) { + *lbunit = oqaq->qaq_bunit_sz; + *lbtune = (*lbunit) / 2; + } else { + b_tmp = 0; + } + } else { + *lbunit = oqaq->qaq_bunit_sz; + *lbtune = (*lbunit) / 2; + } + } + + /* adjust the slave's file qunit size */ + if (QAQ_IS_ADJINO(oqaq)) { + i_tmp = *liunit - oqaq->qaq_iunit_sz; + + if (qctxt->lqc_handler && i_tmp > 0) + lqs->lqs_last_ishrink = cfs_time_current(); + + if (qctxt->lqc_handler && i_tmp < 0) { + time_limit = cfs_time_add(lqs->lqs_last_ishrink, + cfs_time_seconds(qctxt-> + lqc_switch_seconds)); + if (!lqs->lqs_last_ishrink || + cfs_time_after(cfs_time_current(), time_limit)) { + *liunit = oqaq->qaq_iunit_sz; + *litune = (*liunit) / 2; + } else { + i_tmp = 0; + } + } else { + *liunit = oqaq->qaq_iunit_sz; + *litune = (*liunit) / 2; + } + } + spin_unlock(&lqs->lqs_lock); + CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit); + + lqs_putref(lqs); + + if (b_tmp > 0) + rc |= LQS_BLK_DECREASE; + else if (b_tmp < 0) + rc |= LQS_BLK_INCREASE; + + if (i_tmp > 0) + rc |= LQS_INO_DECREASE; + else if (i_tmp < 0) + rc |= LQS_INO_INCREASE; + + RETURN(rc); +} + +int filter_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt) +{ + struct obd_device *obd = exp->exp_obd; + unsigned int uid = 0, gid = 0; + int rc = 0; + ENTRY; + + LASSERT(oqaq); + LASSERT(QAQ_IS_ADJBLK(oqaq)); + rc = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc < 0) { + CERROR("adjust mds slave's qunit size failed!(rc:%d)\n", rc); + RETURN(rc); + } + if (QAQ_IS_GRP(oqaq)) + gid = oqaq->qaq_id; + else + uid = oqaq->qaq_id; + + if (rc > 0) { + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 0, NULL); + if (rc == -EDQUOT || rc == -EBUSY || rc == -EAGAIN) { + CDEBUG(D_QUOTA, "rc: %d.\n", rc); + rc = 0; + } + if (rc) + CERROR("slave adjust block quota failed!(rc:%d)\n", rc); + } + RETURN(rc); +} +#endif /* __KERNEL__ */ +#endif + +int client_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt) +{ + struct ptlrpc_request *req; + struct quota_adjust_qunit *oqa; + int rc = 0; + ENTRY; + + /* client don't support this kind of operation, abort it */ + if (!(exp->exp_connect_flags & OBD_CONNECT_CHANGE_QS)) { + CDEBUG(D_QUOTA, "osc: %s don't support change qunit size\n", + exp->exp_obd->obd_name); + RETURN(rc); + } + if (strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) + RETURN(-EINVAL); + + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_OST_QUOTA_ADJUST_QUNIT, + LUSTRE_OST_VERSION, + OST_QUOTA_ADJUST_QUNIT); + if (req == NULL) + RETURN(-ENOMEM); + + oqa = req_capsule_client_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT); + *oqa = *oqaq; + + ptlrpc_request_set_replen(req); + + rc = ptlrpc_queue_wait(req); + if (rc) + CERROR("%s: %s failed: rc = %d\n", exp->exp_obd->obd_name, + __FUNCTION__, rc); + ptlrpc_req_finished(req); + RETURN (rc); +} + +int lov_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lov_obd *lov = &obd->u.lov; + int i, rc = 0; + ENTRY; + + if (!QAQ_IS_ADJBLK(oqaq)) { + CERROR("bad qaq_flags %x for lov obd.\n", oqaq->qaq_flags); + RETURN(-EFAULT); + } + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + if (!lov->lov_tgts[i]->ltd_active) { + CDEBUG(D_HA, "ost %d is inactive\n", i); + continue; + } + + err = obd_quota_adjust_qunit(lov->lov_tgts[i]->ltd_exp, oqaq, + NULL); + if (err) { + if (lov->lov_tgts[i]->ltd_active && !rc) + rc = err; + continue; + } + } + RETURN(rc); +} diff --git a/lustre/quota/quota_check.c b/lustre/quota/quota_check.c index 20ffb9b..97061cb 100644 --- a/lustre/quota/quota_check.c +++ b/lustre/quota/quota_check.c @@ -33,10 +33,12 @@ * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ + + #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_LQUOTA #ifdef __KERNEL__ # include @@ -62,6 +64,7 @@ #include #include "quota_internal.h" +#ifdef HAVE_QUOTA_SUPPORT #ifdef __KERNEL__ static int target_quotacheck_callback(struct obd_export *exp, struct obd_quotactl *oqctl) @@ -71,7 +74,7 @@ static int target_quotacheck_callback(struct obd_export *exp, int rc; ENTRY; - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_QC_CALLBACK, + req = ptlrpc_request_alloc_pack(exp->exp_imp_reverse, &RQF_QC_CALLBACK, LUSTRE_OBD_VERSION, OBD_QC_CALLBACK); if (req == NULL) RETURN(-ENOMEM); @@ -99,7 +102,7 @@ static int target_quotacheck_thread(void *data) ptlrpc_daemonize("quotacheck"); exp = qta->qta_exp; - obd = exp->exp_obd; + obd = qta->qta_obd; oqctl = &qta->qta_oqctl; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); @@ -118,9 +121,9 @@ static int target_quotacheck_thread(void *data) return rc; } -int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl) +int target_quota_check(struct obd_device *obd, struct obd_export *exp, + struct obd_quotactl *oqctl) { - struct obd_device *obd = exp->exp_obd; struct obd_device_target *obt = &obd->u.obt; struct quotacheck_thread_args *qta; int rc = 0; @@ -136,7 +139,9 @@ int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl) GOTO(out, rc = -ENOMEM); qta->qta_exp = exp; + qta->qta_obd = obd; qta->qta_oqctl = *oqctl; + qta->qta_oqctl.qc_id = obt->obt_qfmt; /* override qfmt version */ qta->qta_sb = obt->obt_sb; qta->qta_sem = &obt->obt_quotachecking; @@ -166,27 +171,31 @@ out: } #endif /* __KERNEL__ */ +#endif /* HAVE_QUOTA_SUPPORT */ -int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl) +int client_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) { - struct client_obd *cli = &exp->exp_obd->u.cli; - struct ptlrpc_request *req; - struct obd_quotactl *body; - int ver, opc, rc; + struct client_obd *cli = &exp->exp_obd->u.cli; + struct ptlrpc_request *req; + struct obd_quotactl *body; + const struct req_format *rf; + int ver, opc, rc; ENTRY; if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) { + rf = &RQF_MDS_QUOTACHECK; ver = LUSTRE_MDS_VERSION; opc = MDS_QUOTACHECK; } else if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) { + rf = &RQF_OST_QUOTACHECK; ver = LUSTRE_OST_VERSION; opc = OST_QUOTACHECK; } else { RETURN(-EINVAL); } - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_QUOTACHECK, ver, opc); + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc); if (req == NULL) RETURN(-ENOMEM); @@ -220,18 +229,44 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk) qchk->obd_uuid = cli->cl_target_uuid; /* FIXME change strncmp to strcmp and save the strlen op */ if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME, - strlen(LUSTRE_OSC_NAME))) + strlen(LUSTRE_OSC_NAME)) == 0) memcpy(qchk->obd_type, LUSTRE_OST_NAME, strlen(LUSTRE_OST_NAME)); else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME, - strlen(LUSTRE_MDC_NAME))) + strlen(LUSTRE_MDC_NAME)) == 0) memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME)); RETURN(rc); } -int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl) +int lmv_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + int i, rc = 0; + ENTRY; + + for (i = 0, tgt = lmv->tgts; i < lmv->desc.ld_tgt_count; i++, tgt++) { + int err; + + if (!tgt->ltd_active) { + CERROR("lmv idx %d inactive\n", i); + RETURN(-EIO); + } + + err = obd_quotacheck(tgt->ltd_exp, oqctl); + if (err && tgt->ltd_active && !rc) + rc = err; + } + + RETURN(rc); +} + +int lov_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 92313d2..4aac3ae 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -44,7 +44,7 @@ # define EXPORT_SYMTAB #endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_LQUOTA #include #include @@ -57,33 +57,94 @@ #include #include #include +#include +#include #include "quota_internal.h" -unsigned long default_bunit_sz = 100 * 1024 * 1024; /* 100M bytes */ -unsigned long default_btune_ratio = 50; /* 50 percentage */ -unsigned long default_iunit_sz = 5000; /* 5000 inodes */ -unsigned long default_itune_ratio = 50; /* 50 percentage */ +#ifdef HAVE_QUOTA_SUPPORT + +static lustre_hash_ops_t lqs_hash_ops; + +unsigned long default_bunit_sz = 128 * 1024 * 1024; /* 128M bytes */ +unsigned long default_btune_ratio = 50; /* 50 percentage */ +unsigned long default_iunit_sz = 5120; /* 5120 inodes */ +unsigned long default_itune_ratio = 50; /* 50 percentage */ cfs_mem_cache_t *qunit_cachep = NULL; struct list_head qunit_hash[NR_DQHASH]; spinlock_t qunit_hash_lock = SPIN_LOCK_UNLOCKED; +/* please sync qunit_state with qunit_state_names */ +enum qunit_state { + /** + * a qunit is created + */ + QUNIT_CREATED = 0, + /** + * a qunit is added into qunit hash, that means + * a quota req will be sent or is flying + */ + QUNIT_IN_HASH = 1, + /** + * a qunit is removed from qunit hash, that + * means a quota req is handled and comes back + */ + QUNIT_RM_FROM_HASH = 2, + /** + * qunit can wake up all threads waiting for it + */ + QUNIT_FINISHED = 3, +}; + +static const char *qunit_state_names[] = { + [QUNIT_CREATED] = "CREATED", + [QUNIT_IN_HASH] = "IN_HASH", + [QUNIT_RM_FROM_HASH] = "RM_FROM_HASH", + [QUNIT_FINISHED] = "FINISHED", +}; + struct lustre_qunit { - struct list_head lq_hash; /* Hash list in memory */ - atomic_t lq_refcnt; /* Use count */ - struct lustre_quota_ctxt *lq_ctxt; /* Quota context this applies to */ - struct qunit_data lq_data; /* See qunit_data */ - unsigned int lq_opc; /* QUOTA_DQACQ, QUOTA_DQREL */ - struct list_head lq_waiters; /* All write threads waiting for this qunit */ + struct list_head lq_hash; /** Hash list in memory */ + atomic_t lq_refcnt; /** Use count */ + struct lustre_quota_ctxt *lq_ctxt; /** Quota context this applies to */ + struct qunit_data lq_data; /** See qunit_data */ + unsigned int lq_opc; /** QUOTA_DQACQ, QUOTA_DQREL */ + cfs_waitq_t lq_waitq; /** Threads waiting for this qunit */ + spinlock_t lq_lock; /** Protect the whole structure */ + enum qunit_state lq_state; /** Present the status of qunit */ + int lq_rc; /** The rc of lq_data */ }; +#define QUNIT_SET_STATE(qunit, state) \ +do { \ + spin_lock(&qunit->lq_lock); \ + QDATA_DEBUG((&qunit->lq_data), "qunit(%p) lq_state(%s->%s), " \ + "lq_rc(%d)\n", \ + qunit, qunit_state_names[qunit->lq_state], \ + qunit_state_names[state], qunit->lq_rc); \ + qunit->lq_state = state; \ + spin_unlock(&qunit->lq_lock); \ +} while(0) + +#define QUNIT_SET_STATE_AND_RC(qunit, state, rc) \ +do { \ + spin_lock(&qunit->lq_lock); \ + qunit->lq_rc = rc; \ + QDATA_DEBUG((&qunit->lq_data), "qunit(%p) lq_state(%s->%s), " \ + "lq_rc(%d)\n", \ + qunit, qunit_state_names[qunit->lq_state], \ + qunit_state_names[state], qunit->lq_rc); \ + qunit->lq_state = state; \ + spin_unlock(&qunit->lq_lock); \ +} while(0) + + int should_translate_quota (struct obd_import *imp) { ENTRY; LASSERT(imp); - if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) RETURN(0); else RETURN(1); @@ -135,66 +196,13 @@ static inline int qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) { unsigned int id = qdata->qd_id; - unsigned int type = qdata->qd_flags & QUOTA_IS_GRP; + unsigned int type = QDATA_IS_GRP(qdata); unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id; tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; return tmp; } -/* compute the remaining quota for certain gid or uid b=11693 */ -int compute_remquota(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) -{ - struct super_block *sb = qctxt->lqc_sb; - __u64 usage, limit; - struct obd_quotactl *qctl; - int ret = QUOTA_RET_OK; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - ENTRY; - - if (!sb_any_quota_enabled(sb)) - RETURN(QUOTA_RET_NOQUOTA); - - /* ignore root user */ - if (qdata->qd_id == 0 && qdata_type == USRQUOTA) - RETURN(QUOTA_RET_NOLIMIT); - - OBD_ALLOC_PTR(qctl); - if (qctl == NULL) - RETURN(-ENOMEM); - - /* get fs quota usage & limit */ - qctl->qc_cmd = Q_GETQUOTA; - qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; - ret = fsfilt_quotactl(obd, sb, qctl); - if (ret) { - if (ret == -ESRCH) /* no limit */ - ret = QUOTA_RET_NOLIMIT; - else - CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", - ret); - GOTO(out, ret); - } - - usage = qctl->qc_dqblk.dqb_curspace; - limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; - if (!limit){ /* no limit */ - ret = QUOTA_RET_NOLIMIT; - GOTO(out, ret); - } - - if (limit >= usage) - qdata->qd_count = limit - usage; - else - qdata->qd_count = 0; - EXIT; -out: - OBD_FREE_PTR(qctl); - return ret; -} - /* caller must hold qunit_hash_lock */ static inline struct lustre_qunit *find_qunit(unsigned int hashent, struct lustre_quota_ctxt *qctxt, @@ -207,7 +215,9 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent, list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) { tmp = &qunit->lq_data; if (qunit->lq_ctxt == qctxt && - qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags) + qdata->qd_id == tmp->qd_id && + (qdata->qd_flags & LQUOTA_QUNIT_FLAGS) == + (tmp->qd_flags & LQUOTA_QUNIT_FLAGS)) return qunit; } return NULL; @@ -218,9 +228,9 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent, * @qdata: the type of quota unit to be checked * * return: 1 - need acquire qunit; - * 2 - need release qunit; - * 0 - need do nothing. - * < 0 - error. + * 2 - need release qunit; + * 0 - need do nothing. + * < 0 - error. */ static int check_cur_qunit(struct obd_device *obd, @@ -228,16 +238,23 @@ check_cur_qunit(struct obd_device *obd, { struct super_block *sb = qctxt->lqc_sb; unsigned long qunit_sz, tune_sz; - __u64 usage, limit; + __u64 usage, limit, limit_org, pending_write = 0; + long long record = 0; struct obd_quotactl *qctl; + struct lustre_qunit_size *lqs = NULL; int ret = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; ENTRY; if (!sb_any_quota_enabled(sb)) RETURN(0); + spin_lock(&qctxt->lqc_lock); + if (!qctxt->lqc_valid){ + spin_unlock(&qctxt->lqc_lock); + RETURN(0); + } + spin_unlock(&qctxt->lqc_lock); + OBD_ALLOC_PTR(qctl); if (qctl == NULL) RETURN(-ENOMEM); @@ -245,7 +262,7 @@ check_cur_qunit(struct obd_device *obd, /* get fs quota usage & limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; + qctl->qc_type = QDATA_IS_GRP(qdata); ret = fsfilt_quotactl(obd, sb, qctl); if (ret) { if (ret == -ESRCH) /* no limit */ @@ -255,40 +272,145 @@ check_cur_qunit(struct obd_device *obd, GOTO(out, ret); } - if (is_blk) { + if (QDATA_IS_BLK(qdata)) { usage = qctl->qc_dqblk.dqb_curspace; limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; - qunit_sz = qctxt->lqc_bunit_sz; - tune_sz = qctxt->lqc_btune_sz; - - LASSERT(!(qunit_sz % QUOTABLOCK_SIZE)); } else { usage = qctl->qc_dqblk.dqb_curinodes; limit = qctl->qc_dqblk.dqb_ihardlimit; - qunit_sz = qctxt->lqc_iunit_sz; - tune_sz = qctxt->lqc_itune_sz; } - /* ignore the no quota limit case */ + /* ignore the no quota limit case; and it can avoid creating + * unnecessary lqs for uid/gid */ if (!limit) GOTO(out, ret = 0); + search_lqs: + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (!lqs) { + CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n"); + ret = quota_create_lqs(qdata, NULL, qctxt, &lqs); + if (ret == -EALREADY) { + ret = 0; + goto search_lqs; + } + if (ret < 0) + GOTO (out, ret); + } + spin_lock(&lqs->lqs_lock); + + if (QDATA_IS_BLK(qdata)) { + qunit_sz = lqs->lqs_bunit_sz; + tune_sz = lqs->lqs_btune_sz; + pending_write = lqs->lqs_bwrite_pending * CFS_PAGE_SIZE; + record = lqs->lqs_blk_rec; + LASSERT(!(qunit_sz % QUOTABLOCK_SIZE)); + } else { + /* we didn't need change inode qunit size now */ + qunit_sz = lqs->lqs_iunit_sz; + tune_sz = lqs->lqs_itune_sz; + pending_write = lqs->lqs_iwrite_pending; + record = lqs->lqs_ino_rec; + } + /* we don't count the MIN_QLIMIT */ - if ((limit == MIN_QLIMIT && !is_blk) || - (toqb(limit) == MIN_QLIMIT && is_blk)) + if ((limit == MIN_QLIMIT && !QDATA_IS_BLK(qdata)) || + (toqb(limit) == MIN_QLIMIT && QDATA_IS_BLK(qdata))) limit = 0; + usage += pending_write; + limit_org = limit; + /* when a releasing quota req is sent, before it returned + limit is assigned a small value. limit will overflow */ + if (limit + record < 0) + usage -= record; + else + limit += record; + LASSERT(qdata->qd_count == 0); if (limit <= usage + tune_sz) { - while (qdata->qd_count + limit <= usage + tune_sz) + while (qdata->qd_count + limit <= + usage + tune_sz) qdata->qd_count += qunit_sz; ret = 1; - } else if (limit > usage + qunit_sz + tune_sz) { - while (limit - qdata->qd_count > usage + qunit_sz + tune_sz) + } else if (limit > usage + qunit_sz + tune_sz && + limit_org > qdata->qd_count + qunit_sz) { + while (limit - qdata->qd_count > usage + qunit_sz + tune_sz && + limit_org > qdata->qd_count + qunit_sz) qdata->qd_count += qunit_sz; ret = 2; + /* if there are other pending writes for this uid/gid, releasing + * quota is put off until the last pending write b=16645 */ + if (ret == 2 && pending_write) { + CDEBUG(D_QUOTA, "delay quota release\n"); + ret = 0; + } } + CDEBUG(D_QUOTA, "type: %c, limit: "LPU64", usage: "LPU64 + ", pending_write: "LPU64", record: "LPD64 + ", qunit_sz: %lu, tune_sz: %lu, ret: %d.\n", + QDATA_IS_BLK(qdata) ? 'b' : 'i', limit, usage, pending_write, + record, qunit_sz, tune_sz, ret); LASSERT(ret == 0 || qdata->qd_count); + + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); + EXIT; + out: + OBD_FREE_PTR(qctl); + return ret; +} + +/** + * Compute the remaining quota for certain gid or uid b=11693 + */ +int compute_remquota(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int isblk) +{ + struct super_block *sb = qctxt->lqc_sb; + __u64 usage, limit; + struct obd_quotactl *qctl; + int ret = QUOTA_RET_OK; + ENTRY; + + if (!sb_any_quota_enabled(sb)) + RETURN(QUOTA_RET_NOQUOTA); + + /* ignore root user */ + if (qdata->qd_id == 0 && QDATA_IS_GRP(qdata) == USRQUOTA) + RETURN(QUOTA_RET_NOLIMIT); + + OBD_ALLOC_PTR(qctl); + if (qctl == NULL) + RETURN(-ENOMEM); + + /* get fs quota usage & limit */ + qctl->qc_cmd = Q_GETQUOTA; + qctl->qc_id = qdata->qd_id; + qctl->qc_type = QDATA_IS_GRP(qdata); + ret = fsfilt_quotactl(obd, sb, qctl); + if (ret) { + if (ret == -ESRCH) /* no limit */ + ret = QUOTA_RET_NOLIMIT; + else + CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", + ret); + GOTO(out, ret); + } + + usage = isblk ? qctl->qc_dqblk.dqb_curspace : + qctl->qc_dqblk.dqb_curinodes; + limit = isblk ? qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS : + qctl->qc_dqblk.dqb_ihardlimit; + if (!limit){ /* no limit */ + ret = QUOTA_RET_NOLIMIT; + GOTO(out, ret); + } + + if (limit >= usage) + qdata->qd_count = limit - usage; + else + qdata->qd_count = 0; EXIT; out: OBD_FREE_PTR(qctl); @@ -319,12 +441,13 @@ static struct lustre_qunit *alloc_qunit(struct lustre_quota_ctxt *qctxt, RETURN(NULL); CFS_INIT_LIST_HEAD(&qunit->lq_hash); - CFS_INIT_LIST_HEAD(&qunit->lq_waiters); + init_waitqueue_head(&qunit->lq_waitq); atomic_set(&qunit->lq_refcnt, 1); qunit->lq_ctxt = qctxt; memcpy(&qunit->lq_data, qdata, sizeof(*qdata)); qunit->lq_opc = opc; - + qunit->lq_lock = SPIN_LOCK_UNLOCKED; + QUNIT_SET_STATE_AND_RC(qunit, QUNIT_CREATED, 0); RETURN(qunit); } @@ -351,96 +474,74 @@ insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit) struct list_head *head; LASSERT(list_empty(&qunit->lq_hash)); + qunit_get(qunit); head = qunit_hash + qunit_hashfn(qctxt, &qunit->lq_data); list_add(&qunit->lq_hash, head); + QUNIT_SET_STATE(qunit, QUNIT_IN_HASH); +} + +static void compute_lqs_after_removing_qunit(struct lustre_qunit *qunit) +{ + struct lustre_qunit_size *lqs = NULL; + + quota_search_lqs(&qunit->lq_data, NULL, qunit->lq_ctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + if (qunit->lq_opc == QUOTA_DQACQ) + quota_compute_lqs(&qunit->lq_data, lqs, 0, 1); + if (qunit->lq_opc == QUOTA_DQREL) + quota_compute_lqs(&qunit->lq_data, lqs, 0, 0); + spin_unlock(&lqs->lqs_lock); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + /* this is for schedule_dqacq */ + lqs_putref(lqs); + } + } static void remove_qunit_nolock(struct lustre_qunit *qunit) { LASSERT(!list_empty(&qunit->lq_hash)); + LASSERT_SPIN_LOCKED(&qunit_hash_lock); + list_del_init(&qunit->lq_hash); + QUNIT_SET_STATE(qunit, QUNIT_RM_FROM_HASH); + qunit_put(qunit); } -struct qunit_waiter { - struct list_head qw_entry; - cfs_waitq_t qw_waitq; - int qw_rc; -}; - #define INC_QLIMIT(limit, count) (limit == MIN_QLIMIT) ? \ (limit = count) : (limit += count) -/* FIXME check if this mds is the master of specified id */ -static int -is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, - unsigned int id, int type) +static inline int is_master(struct lustre_quota_ctxt *qctxt) { return qctxt->lqc_handler ? 1 : 0; } static int schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, - struct qunit_data *qdata, int opc, int wait); - -static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, - struct qunit_data *qdata, int opc, int wait) -{ - int rc = 0; - unsigned long factor; - struct qunit_data tmp_qdata; - ENTRY; - - LASSERT(qdata && qdata->qd_count); - QDATA_DEBUG(qdata, "%s quota split.\n", - (qdata->qd_flags & QUOTA_IS_BLOCK) ? "block" : "inode"); - if (qdata->qd_flags & QUOTA_IS_BLOCK) - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * - qctxt->lqc_bunit_sz; - else - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * - qctxt->lqc_iunit_sz; - - if (qctxt->lqc_import && should_translate_quota(qctxt->lqc_import) && - qdata->qd_count > factor) { - tmp_qdata = *qdata; - tmp_qdata.qd_count = factor; - qdata->qd_count -= tmp_qdata.qd_count; - QDATA_DEBUG((&tmp_qdata), "be split.\n"); - rc = schedule_dqacq(obd, qctxt, &tmp_qdata, opc, wait); - } else{ - QDATA_DEBUG(qdata, "don't be split.\n"); - rc = schedule_dqacq(obd, qctxt, qdata, opc, wait); - } - - RETURN(rc); -} + struct qunit_data *qdata, int opc, int wait, + struct obd_trans_info *oti); static int -dqacq_completion(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, +dqacq_completion(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int rc, int opc) { struct lustre_qunit *qunit = NULL; struct super_block *sb = qctxt->lqc_sb; - unsigned long qunit_sz; - struct qunit_waiter *qw, *tmp; int err = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; - __u64 qd_tmp = qdata->qd_count; - unsigned long div_r; + struct quota_adjust_qunit *oqaq = NULL; + int rc1 = 0; ENTRY; LASSERT(qdata); - qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz; - div_r = do_div(qd_tmp, qunit_sz); - LASSERTF(!div_r, "qunit_sz: %lu, return qunit_sz: "LPU64"\n", - qunit_sz, qd_tmp); + QDATA_DEBUG(qdata, "obd(%s): complete %s quota req\n", + obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); /* update local operational quota file */ if (rc == 0) { - __u32 count = QUSG(qdata->qd_count, is_blk); + __u64 count = QUSG(qdata->qd_count, QDATA_IS_BLK(qdata)); struct obd_quotactl *qctl; __u64 *hardlimit; @@ -453,14 +554,14 @@ dqacq_completion(struct obd_device *obd, * set fs quota limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; + qctl->qc_type = QDATA_IS_GRP(qdata); err = fsfilt_quotactl(obd, sb, qctl); if (err) { CERROR("error get quota fs limit! (rc:%d)\n", err); GOTO(out_mem, err); } - if (is_blk) { + if (QDATA_IS_BLK(qdata)) { qctl->qc_dqblk.dqb_valid = QIF_BLIMITS; hardlimit = &qctl->qc_dqblk.dqb_bhardlimit; } else { @@ -468,20 +569,24 @@ dqacq_completion(struct obd_device *obd, hardlimit = &qctl->qc_dqblk.dqb_ihardlimit; } + CDEBUG(D_QUOTA, "hardlimt: "LPU64"\n", *hardlimit); + + if (*hardlimit == 0) + goto out_mem; + switch (opc) { case QUOTA_DQACQ: - CDEBUG(D_QUOTA, "%s(acq):count: %d, hardlimt: "LPU64 - ",type: %s.\n", obd->obd_name, count, *hardlimit, - qdata_type ? "grp": "usr"); INC_QLIMIT(*hardlimit, count); break; case QUOTA_DQREL: - CDEBUG(D_QUOTA, "%s(rel):count: %d, hardlimt: "LPU64 - ",type: %s.\n", obd->obd_name, count, *hardlimit, - qdata_type ? "grp": "usr"); LASSERTF(count < *hardlimit, - "count: %d, hardlimit: "LPU64".\n", - count, *hardlimit); + "id(%u) flag(%u) type(%c) isblk(%c) " + "count("LPU64") qd_qunit("LPU64") " + "hardlimit("LPU64").\n", + qdata->qd_id, qdata->qd_flags, + QDATA_IS_GRP(qdata) ? 'g' : 'u', + QDATA_IS_BLK(qdata) ? 'b': 'i', + qdata->qd_count, qdata->qd_qunit, *hardlimit); *hardlimit -= count; break; default: @@ -516,40 +621,56 @@ out: /* this qunit has been removed by qctxt_cleanup() */ if (!qunit) { spin_unlock(&qunit_hash_lock); + QDATA_DEBUG(qdata, "%s is discarded because qunit isn't found\n", + opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); RETURN(err); } LASSERT(opc == qunit->lq_opc); + /* remove this qunit from lq_hash so that new processes cannot be added + * to qunit->lq_waiters */ remove_qunit_nolock(qunit); + spin_unlock(&qunit_hash_lock); - /* wake up all waiters */ - list_for_each_entry_safe(qw, tmp, &qunit->lq_waiters, qw_entry) { - list_del_init(&qw->qw_entry); - qw->qw_rc = rc; - wake_up(&qw->qw_waitq); - } + compute_lqs_after_removing_qunit(qunit); - spin_unlock(&qunit_hash_lock); + /* wake up all waiters */ + QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, rc); + wake_up_all(&qunit->lq_waitq); qunit_put(qunit); + if (rc < 0 && rc != -EDQUOT) + RETURN(err); /* don't reschedule in such cases: - * - acq/rel failure, but not for quota recovery. + * - acq/rel failure and qunit isn't changed, + * but not for quota recovery. * - local dqacq/dqrel. * - local disk io failure. */ - if (err || (rc && rc != -EBUSY) || - is_master(obd, qctxt, qdata->qd_id, qdata_type)) + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + RETURN(-ENOMEM); + qdata_to_oqaq(qdata, oqaq); + /* adjust the qunit size in slaves */ + rc1 = quota_adjust_slave_lqs(oqaq, qctxt); + OBD_FREE_PTR(oqaq); + if (rc1 < 0) { + CERROR("adjust slave's qunit size failed!(rc:%d)\n", rc1); + RETURN(rc1); + } + if (err || (rc && rc != -EBUSY && rc1 == 0) || is_master(qctxt)) RETURN(err); /* reschedule another dqacq/dqrel if needed */ qdata->qd_count = 0; - rc = check_cur_qunit(obd, qctxt, qdata); - if (rc > 0) { + qdata->qd_flags &= LQUOTA_QUNIT_FLAGS; + rc1 = check_cur_qunit(obd, qctxt, qdata); + if (rc1 > 0) { int opc; - opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0); - QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc); + opc = rc1 == 1 ? QUOTA_DQACQ : QUOTA_DQREL; + rc1 = schedule_dqacq(obd, qctxt, qdata, opc, 0, NULL); + QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc1); } RETURN(err); } @@ -564,158 +685,250 @@ static int dqacq_interpret(const struct lu_env *env, { struct dqacq_async_args *aa = (struct dqacq_async_args *)data; struct lustre_quota_ctxt *qctxt = aa->aa_ctxt; + struct obd_device_target *obt = qctxt->lqc_obt; struct lustre_qunit *qunit = aa->aa_qunit; struct obd_device *obd = req->rq_import->imp_obd; struct qunit_data *qdata = NULL; - struct qunit_data_old *qdata_old = NULL; + int rc1 = 0; ENTRY; LASSERT(req); LASSERT(req->rq_import); - if ((req->rq_import->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - - qdata = req_capsule_server_swab_get(&req->rq_pill, - &RMF_QUNIT_DATA, - (void*)lustre_swab_qdata); - } else { - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); + /* there are several forms of qunit(historic causes), so we need to + * adjust qunit from slaves to the same form here */ + OBD_ALLOC(qdata, sizeof(struct qunit_data)); + if (!qdata) + RETURN(-ENOMEM); - qdata = req_capsule_server_swab_get(&req->rq_pill, - &RMF_QUNIT_DATA, - (void*)lustre_swab_qdata_old); - qdata = lustre_quota_old_to_new(qdata_old); - } - if (qdata == NULL) { - DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data"); - RETURN(-EPROTO); + down_read(&obt->obt_rwsem); + /* if a quota req timeouts or is dropped, we should update quota + * statistics which will be handled in dqacq_completion. And in + * this situation we should get qdata from request instead of + * reply */ + rc1 = quota_get_qdata(req, qdata, + (rc != 0) ? QUOTA_REQUEST : QUOTA_REPLY, + QUOTA_IMPORT); + if (rc1 < 0) { + DEBUG_REQ(D_ERROR, req, + "error unpacking qunit_data(rc: %d)\n", rc1); + GOTO(exit, rc = rc1); } - LASSERT(qdata->qd_id == qunit->lq_data.qd_id && - (qdata->qd_flags & QUOTA_IS_GRP) == - (qunit->lq_data.qd_flags & QUOTA_IS_GRP) && - (qdata->qd_count == qunit->lq_data.qd_count || - qdata->qd_count == 0)); + QDATA_DEBUG(qdata, "qdata: interpret rc(%d).\n", rc); + QDATA_DEBUG((&qunit->lq_data), "lq_data: \n"); - QDATA_DEBUG(qdata, "%s interpret rc(%d).\n", - lustre_msg_get_opc(req->rq_reqmsg) == QUOTA_DQACQ ? - "DQACQ" : "DQREL", rc); + if (qdata->qd_id != qunit->lq_data.qd_id || + OBD_FAIL_CHECK(OBD_FAIL_QUOTA_RET_QDATA)) { + CDEBUG(D_ERROR, "the returned qd_id isn't expected!" + "(qdata: %u, lq_data: %u)\n", qdata->qd_id, + qunit->lq_data.qd_id); + qdata->qd_id = qunit->lq_data.qd_id; + rc = -EPROTO; + } + if (QDATA_IS_GRP(qdata) != QDATA_IS_GRP(&qunit->lq_data)) { + CDEBUG(D_ERROR, "the returned grp/usr isn't expected!" + "(qdata: %u, lq_data: %u)\n", qdata->qd_flags, + qunit->lq_data.qd_flags); + if (QDATA_IS_GRP(&qunit->lq_data)) + QDATA_SET_GRP(qdata); + else + QDATA_CLR_GRP(qdata); + rc = -EPROTO; + } + if (qdata->qd_count > qunit->lq_data.qd_count) { + CDEBUG(D_ERROR, "the returned qd_count isn't expected!" + "(qdata: "LPU64", lq_data: "LPU64")\n", qdata->qd_count, + qunit->lq_data.qd_count); + rc = -EPROTO; + } rc = dqacq_completion(obd, qctxt, qdata, rc, lustre_msg_get_opc(req->rq_reqmsg)); +exit: + up_read(&obt->obt_rwsem); + OBD_FREE(qdata, sizeof(struct qunit_data)); + RETURN(rc); } -static int got_qunit(struct qunit_waiter *waiter) +/** + * check if quota master is online + */ +int check_qm(struct lustre_quota_ctxt *qctxt) { - int rc = 0; + int rc; ENTRY; - spin_lock(&qunit_hash_lock); - rc = list_empty(&waiter->qw_entry); - spin_unlock(&qunit_hash_lock); + + spin_lock(&qctxt->lqc_lock); + /* quit waiting when mds is back or qctxt is cleaned up */ + rc = qctxt->lqc_import || !qctxt->lqc_valid; + spin_unlock(&qctxt->lqc_lock); + + RETURN(rc); +} + +static int got_qunit(struct lustre_qunit *qunit) +{ + int rc; + ENTRY; + + spin_lock(&qunit->lq_lock); + switch (qunit->lq_state) { + case QUNIT_IN_HASH: + case QUNIT_RM_FROM_HASH: + rc = 0; + break; + case QUNIT_FINISHED: + rc = 1; + break; + default: + rc = 0; + CERROR("invalid qunit state %d\n", qunit->lq_state); + } + spin_unlock(&qunit->lq_lock); RETURN(rc); } static int -schedule_dqacq(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, - struct qunit_data *qdata, int opc, int wait) +schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, + struct qunit_data *qdata, int opc, int wait, + struct obd_trans_info *oti) { struct lustre_qunit *qunit, *empty; - struct qunit_waiter qw; struct l_wait_info lwi = { 0 }; struct ptlrpc_request *req; - struct qunit_data *reqdata; struct dqacq_async_args *aa; - unsigned long factor; + struct obd_import *imp = NULL; + struct lustre_qunit_size *lqs = NULL; + struct timeval work_start; + struct timeval work_end; + long timediff; int rc = 0; ENTRY; - CFS_INIT_LIST_HEAD(&qw.qw_entry); - init_waitqueue_head(&qw.qw_waitq); - qw.qw_rc = 0; - + LASSERT(opc == QUOTA_DQACQ || opc == QUOTA_DQREL); + do_gettimeofday(&work_start); if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL) RETURN(-ENOMEM); spin_lock(&qunit_hash_lock); - qunit = dqacq_in_flight(qctxt, qdata); if (qunit) { if (wait) - list_add_tail(&qw.qw_entry, &qunit->lq_waiters); + qunit_get(qunit); spin_unlock(&qunit_hash_lock); + qunit_put(empty); - free_qunit(empty); goto wait_completion; } qunit = empty; insert_qunit_nolock(qctxt, qunit); - if (wait) - list_add_tail(&qw.qw_entry, &qunit->lq_waiters); spin_unlock(&qunit_hash_lock); LASSERT(qunit); + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + quota_compute_lqs(qdata, lqs, 1, (opc == QUOTA_DQACQ) ? 1 : 0); + /* when this qdata returned from mds, it will call lqs_putref */ + lqs_getref(lqs); + spin_unlock(&lqs->lqs_lock); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + } else { + CDEBUG(D_ERROR, "Can't find the lustre qunit size!\n"); + } + + QDATA_DEBUG(qdata, "obd(%s): send %s quota req\n", + obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); /* master is going to dqacq/dqrel from itself */ - if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP)) - { + if (is_master(qctxt)) { int rc2; QDATA_DEBUG(qdata, "local %s.\n", opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); + QDATA_SET_CHANGE_QS(qdata); rc = qctxt->lqc_handler(obd, qdata, opc); rc2 = dqacq_completion(obd, qctxt, qdata, rc, opc); - RETURN((rc && rc != -EDQUOT) ? rc : rc2); + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + if (opc == QUOTA_DQACQ) + lprocfs_counter_add(qctxt->lqc_stats, + wait ? LQUOTA_SYNC_ACQ : LQUOTA_ASYNC_ACQ, + timediff); + else + lprocfs_counter_add(qctxt->lqc_stats, + wait ? LQUOTA_SYNC_REL : LQUOTA_ASYNC_REL, + timediff); + RETURN(rc ? rc : rc2); + } + + spin_lock(&qctxt->lqc_lock); + if (!qctxt->lqc_import) { + spin_unlock(&qctxt->lqc_lock); + QDATA_DEBUG(qdata, "lqc_import is invalid.\n"); + + spin_lock(&qunit_hash_lock); + remove_qunit_nolock(qunit); + spin_unlock(&qunit_hash_lock); + + compute_lqs_after_removing_qunit(qunit); + + QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, -EAGAIN); + wake_up_all(&qunit->lq_waitq); + + qunit_put(qunit); + spin_lock(&qctxt->lqc_lock); + if (wait && !qctxt->lqc_import) { + spin_unlock(&qctxt->lqc_lock); + + LASSERT(oti && oti->oti_thread && + oti->oti_thread->t_watchdog); + + lc_watchdog_disable(oti->oti_thread->t_watchdog); + CDEBUG(D_QUOTA, "sleep for quota master\n"); + l_wait_event(qctxt->lqc_wait_for_qmaster, + check_qm(qctxt), &lwi); + CDEBUG(D_QUOTA, "wake up when quota master is back\n"); + lc_watchdog_touch(oti->oti_thread->t_watchdog); + } else { + spin_unlock(&qctxt->lqc_lock); + } + + RETURN(-EAGAIN); } + imp = class_import_get(qctxt->lqc_import); + spin_unlock(&qctxt->lqc_lock); /* build dqacq/dqrel request */ - LASSERT(qctxt->lqc_import); + LASSERT(imp); - req = ptlrpc_request_alloc_pack(qctxt->lqc_import, &RQF_MDS_QUOTA_DQACQ, + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_QUOTA_DQACQ, LUSTRE_MDS_VERSION, opc); + class_import_put(imp); if (req == NULL) { + CDEBUG(D_ERROR, "Can't alloc request\n"); dqacq_completion(obd, qctxt, qdata, -ENOMEM, opc); RETURN(-ENOMEM); } - if (qdata->qd_flags & QUOTA_IS_BLOCK) - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * - qctxt->lqc_bunit_sz; - else - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * - qctxt->lqc_iunit_sz; - - LASSERT(!should_translate_quota(qctxt->lqc_import) || - qdata->qd_count <= factor); - if (should_translate_quota(qctxt->lqc_import)) - { - struct qunit_data_old *reqdata_old, *tmp; - - reqdata_old = req_capsule_client_get(&req->rq_pill, - &RMF_QUNIT_DATA); - - tmp = lustre_quota_new_to_old(qdata); - *reqdata_old = *tmp; - req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER, - sizeof(*reqdata_old)); - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - } else { - reqdata = req_capsule_client_get(&req->rq_pill, - &RMF_QUNIT_DATA); - - *reqdata = *qdata; - req_capsule_set_size(&req->rq_pill, &RMF_QUNIT_DATA, RCL_SERVER, - sizeof(*reqdata)); - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - } ptlrpc_request_set_replen(req); + req->rq_no_resend = req->rq_no_delay = 1; + rc = quota_copy_qdata(req, qdata, QUOTA_REQUEST, QUOTA_IMPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't pack qunit_data(rc: %d)\n", rc); + ptlrpc_req_finished(req); + dqacq_completion(obd, qctxt, qdata, -EPROTO, opc); + RETURN(rc); + } + + if (wait && qunit) + qunit_get(qunit); CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); - aa = (struct dqacq_async_args *)&req->rq_async_args; + aa = ptlrpc_req_async_args(req); aa->aa_ctxt = qctxt; aa->aa_qunit = qunit; @@ -727,22 +940,45 @@ schedule_dqacq(struct obd_device *obd, wait_completion: if (wait && qunit) { struct qunit_data *p = &qunit->lq_data; - QDATA_DEBUG(p, "wait for dqacq.\n"); - l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi); - if (qw.qw_rc == 0) + QDATA_DEBUG(p, "qunit(%p) is waiting for dqacq.\n", qunit); + l_wait_event(qunit->lq_waitq, got_qunit(qunit), &lwi); + /* rc = -EAGAIN, it means a quota req is finished; + * rc = -EDQUOT, it means out of quota + * rc = -EBUSY, it means recovery is happening + * other rc < 0, it means real errors, functions who call + * schedule_dqacq should take care of this */ + spin_lock(&qunit->lq_lock); + if (qunit->lq_rc == 0) rc = -EAGAIN; - - CDEBUG(D_QUOTA, "wait dqacq done. (rc:%d)\n", qw.qw_rc); + else + rc = qunit->lq_rc; + spin_unlock(&qunit->lq_lock); + CDEBUG(D_QUOTA, "qunit(%p) finishes waiting. (rc:%d)\n", + qunit, rc); + qunit_put(qunit); } + + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + if (opc == QUOTA_DQACQ) + lprocfs_counter_add(qctxt->lqc_stats, + wait ? LQUOTA_SYNC_ACQ : LQUOTA_ASYNC_ACQ, + timediff); + else + lprocfs_counter_add(qctxt->lqc_stats, + wait ? LQUOTA_SYNC_REL : LQUOTA_ASYNC_REL, + timediff); + RETURN(rc); } int qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, - uid_t uid, gid_t gid, __u32 isblk, int wait) + uid_t uid, gid_t gid, __u32 isblk, int wait, + struct obd_trans_info *oti) { - int ret, rc = 0, i = USRQUOTA; + int rc = 0, i = USRQUOTA; __u32 id[MAXQUOTAS] = { uid, gid }; struct qunit_data qdata[MAXQUOTAS]; ENTRY; @@ -753,20 +989,26 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, for (i = 0; i < MAXQUOTAS; i++) { qdata[i].qd_id = id[i]; - qdata[i].qd_flags = 0; - qdata[i].qd_flags |= i; - qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; + qdata[i].qd_flags = i; + if (isblk) + QDATA_SET_BLK(&qdata[i]); qdata[i].qd_count = 0; - ret = check_cur_qunit(obd, qctxt, &qdata[i]); - if (ret > 0) { + rc = check_cur_qunit(obd, qctxt, &qdata[i]); + if (rc > 0) { int opc; /* need acquire or release */ - opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - ret = split_before_schedule_dqacq(obd, qctxt, &qdata[i], - opc, wait); - if (!rc) - rc = ret; + opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL; + rc = schedule_dqacq(obd, qctxt, &qdata[i], opc, + wait,oti); + if (rc < 0) + RETURN(rc); + } else if (wait == 1) { + /* when wait equates 1, that means mds_quota_acquire + * or filter_quota_acquire is calling it. */ + rc = qctxt_wait_pending_dqacq(qctxt, id[i], i, isblk); + if (rc < 0) + RETURN(rc); } } @@ -778,93 +1020,174 @@ qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, unsigned short type, int isblk) { struct lustre_qunit *qunit = NULL; - struct qunit_waiter qw; struct qunit_data qdata; + struct timeval work_start; + struct timeval work_end; + long timediff; struct l_wait_info lwi = { 0 }; + int rc = 0; ENTRY; - CFS_INIT_LIST_HEAD(&qw.qw_entry); - init_waitqueue_head(&qw.qw_waitq); - qw.qw_rc = 0; - + do_gettimeofday(&work_start); qdata.qd_id = id; - qdata.qd_flags = 0; - qdata.qd_flags |= type; - qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; + qdata.qd_flags = type; + if (isblk) + QDATA_SET_BLK(&qdata); qdata.qd_count = 0; spin_lock(&qunit_hash_lock); - qunit = dqacq_in_flight(qctxt, &qdata); if (qunit) - list_add_tail(&qw.qw_entry, &qunit->lq_waiters); - + /* grab reference on this qunit to handle races with + * dqacq_completion(). Otherwise, this qunit could be freed just + * after we release the qunit_hash_lock */ + qunit_get(qunit); spin_unlock(&qunit_hash_lock); if (qunit) { - struct qunit_data *p = &qdata; - QDATA_DEBUG(p, "wait for dqacq completion.\n"); - l_wait_event(qw.qw_waitq, got_qunit(&qw), &lwi); - QDATA_DEBUG(p, "wait dqacq done. (rc:%d)\n", qw.qw_rc); + struct qunit_data *p = &qunit->lq_data; + + QDATA_DEBUG(p, "qunit(%p) is waiting for dqacq.\n", qunit); + l_wait_event(qunit->lq_waitq, got_qunit(qunit), &lwi); + CDEBUG(D_QUOTA, "qunit(%p) finishes waiting. (rc:%d)\n", + qunit, qunit->lq_rc); + qunit_put(qunit); + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, + isblk ? LQUOTA_WAIT_PENDING_BLK_QUOTA : + LQUOTA_WAIT_PENDING_INO_QUOTA, + timediff); + /* keep same as schedule_dqacq() b=17030 */ + spin_lock(&qunit->lq_lock); + if (qunit->lq_rc == 0) + rc = -EAGAIN; + else + rc = qunit->lq_rc; + spin_unlock(&qunit->lq_lock); + } else { + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, + isblk ? LQUOTA_NOWAIT_PENDING_BLK_QUOTA : + LQUOTA_NOWAIT_PENDING_INO_QUOTA, + timediff); } - RETURN(0); + + RETURN(rc); } int -qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, - dqacq_handler_t handler) +qctxt_init(struct obd_device *obd, dqacq_handler_t handler) { + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + struct obd_device_target *obt = &obd->u.obt; + struct super_block *sb = obt->obt_sb; int rc = 0; ENTRY; + LASSERT(qctxt); + rc = ptlrpcd_addref(); if (rc) RETURN(rc); + cfs_waitq_init(&qctxt->lqc_wait_for_qmaster); + spin_lock_init(&qctxt->lqc_lock); + spin_lock(&qctxt->lqc_lock); qctxt->lqc_handler = handler; qctxt->lqc_sb = sb; + qctxt->lqc_obt = obt; qctxt->lqc_import = NULL; qctxt->lqc_recovery = 0; - qctxt->lqc_atype = 0; - qctxt->lqc_status= 0; + qctxt->lqc_switch_qs = 1; /* Change qunit size in default setting */ + qctxt->lqc_valid = 1; + qctxt->lqc_cqs_boundary_factor = 4; + qctxt->lqc_cqs_least_bunit = PTLRPC_MAX_BRW_SIZE; + qctxt->lqc_cqs_least_iunit = 2; + qctxt->lqc_cqs_qs_factor = 2; + qctxt->lqc_flags = 0; + QUOTA_MASTER_UNREADY(qctxt); qctxt->lqc_bunit_sz = default_bunit_sz; qctxt->lqc_btune_sz = default_bunit_sz / 100 * default_btune_ratio; qctxt->lqc_iunit_sz = default_iunit_sz; qctxt->lqc_itune_sz = default_iunit_sz * default_itune_ratio / 100; + qctxt->lqc_switch_seconds = 300; /* enlarging will wait 5 minutes + * after the last shrinking */ + qctxt->lqc_sync_blk = 0; + spin_unlock(&qctxt->lqc_lock); + + qctxt->lqc_lqs_hash = lustre_hash_init("LQS_HASH", 7, 7, + &lqs_hash_ops, 0); + if (!qctxt->lqc_lqs_hash) { + CERROR("initialize hash lqs for %s error!\n", obd->obd_name); + RETURN(-ENOMEM); + } - RETURN(0); +#ifdef LPROCFS + rc = lquota_proc_setup(obd, is_master(qctxt)); + if (rc) + CERROR("initialize proc for %s error!\n", obd->obd_name); +#endif + + RETURN(rc); } void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force) { struct lustre_qunit *qunit, *tmp; - struct qunit_waiter *qw, *tmp2; + struct list_head tmp_list; + struct obd_device_target *obt = qctxt->lqc_obt; int i; ENTRY; - spin_lock(&qunit_hash_lock); + CFS_INIT_LIST_HEAD(&tmp_list); + spin_lock(&qctxt->lqc_lock); + qctxt->lqc_valid = 0; + spin_unlock(&qctxt->lqc_lock); + + spin_lock(&qunit_hash_lock); for (i = 0; i < NR_DQHASH; i++) { list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) { if (qunit->lq_ctxt != qctxt) continue; - remove_qunit_nolock(qunit); - /* wake up all waiters */ - list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters, - qw_entry) { - list_del_init(&qw->qw_entry); - qw->qw_rc = 0; - wake_up(&qw->qw_waitq); - } - qunit_put(qunit); + list_add(&qunit->lq_hash, &tmp_list); } } - spin_unlock(&qunit_hash_lock); + list_for_each_entry_safe(qunit, tmp, &tmp_list, lq_hash) { + list_del_init(&qunit->lq_hash); + compute_lqs_after_removing_qunit(qunit); + + /* wake up all waiters */ + QUNIT_SET_STATE_AND_RC(qunit, QUNIT_FINISHED, 0); + wake_up_all(&qunit->lq_waitq); + qunit_put(qunit); + } + + down_write(&obt->obt_rwsem); + lustre_hash_exit(qctxt->lqc_lqs_hash); + qctxt->lqc_lqs_hash = NULL; + up_write(&obt->obt_rwsem); + + /* after qctxt_cleanup, qctxt might be freed, then check_qm() is + * unpredicted. So we must wait until lqc_wait_for_qmaster is empty */ + while (cfs_waitq_active(&qctxt->lqc_wait_for_qmaster)) { + cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster); + cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, + cfs_time_seconds(1)); + } + ptlrpcd_decref(); +#ifdef LPROCFS + if (lquota_proc_cleanup(qctxt)) + CERROR("cleanup proc error!\n"); +#endif + EXIT; } @@ -919,24 +1242,27 @@ static int qslave_recovery_main(void *arg) list_for_each_entry_safe(dqid, tmp, &id_list, di_link) { list_del_init(&dqid->di_link); /* skip slave recovery on itself */ - if (is_master(obd, qctxt, dqid->di_id, type)) + if (is_master(qctxt)) goto free; if (rc && rc != -EBUSY) goto free; qdata.qd_id = dqid->di_id; - qdata.qd_flags = 0; - qdata.qd_flags |= type; - qdata.qd_flags |= QUOTA_IS_BLOCK; + qdata.qd_flags = type; + QDATA_SET_BLK(&qdata); qdata.qd_count = 0; ret = check_cur_qunit(obd, qctxt, &qdata); if (ret > 0) { int opc; opc = ret == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - rc = split_before_schedule_dqacq(obd, qctxt, &qdata, opc, 0); - } else + rc = schedule_dqacq(obd, qctxt, &qdata, opc, + 0, NULL); + if (rc == -EDQUOT) + rc = 0; + } else { rc = 0; + } if (rc) CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, @@ -974,3 +1300,102 @@ qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt) exit: EXIT; } + + +/** + * lqs<->qctxt hash operations + */ + +/** + * string hashing using djb2 hash algorithm + */ +static unsigned +lqs_hash(lustre_hash_t *lh, void *key, unsigned mask) +{ + struct quota_adjust_qunit *lqs_key; + unsigned hash; + ENTRY; + + LASSERT(key); + lqs_key = (struct quota_adjust_qunit *)key; + hash = (QAQ_IS_GRP(lqs_key) ? 5381 : 5387) * lqs_key->qaq_id; + + RETURN(hash & mask); +} + +static int +lqs_compare(void *key, struct hlist_node *hnode) +{ + struct quota_adjust_qunit *lqs_key; + struct lustre_qunit_size *q; + int rc; + ENTRY; + + LASSERT(key); + lqs_key = (struct quota_adjust_qunit *)key; + q = hlist_entry(hnode, struct lustre_qunit_size, lqs_hash); + + spin_lock(&q->lqs_lock); + rc = ((lqs_key->qaq_id == q->lqs_id) && + (QAQ_IS_GRP(lqs_key) == LQS_IS_GRP(q))); + spin_unlock(&q->lqs_lock); + + RETURN(rc); +} + +static void * +lqs_get(struct hlist_node *hnode) +{ + struct lustre_qunit_size *q = + hlist_entry(hnode, struct lustre_qunit_size, lqs_hash); + ENTRY; + + atomic_inc(&q->lqs_refcount); + CDEBUG(D_QUOTA, "lqs=%p refcount %d\n", + q, atomic_read(&q->lqs_refcount)); + + RETURN(q); +} + +static void * +lqs_put(struct hlist_node *hnode) +{ + struct lustre_qunit_size *q = + hlist_entry(hnode, struct lustre_qunit_size, lqs_hash); + ENTRY; + + LASSERT(atomic_read(&q->lqs_refcount) > 0); + atomic_dec(&q->lqs_refcount); + CDEBUG(D_QUOTA, "lqs=%p refcount %d\n", + q, atomic_read(&q->lqs_refcount)); + + RETURN(q); +} + +static void +lqs_exit(struct hlist_node *hnode) +{ + struct lustre_qunit_size *q; + ENTRY; + + q = hlist_entry(hnode, struct lustre_qunit_size, lqs_hash); + /* + * Nothing should be left. User of lqs put it and + * lqs also was deleted from table by this time + * so we should have 0 refs. + */ + LASSERTF(atomic_read(&q->lqs_refcount) == 0, + "Busy lqs %p with %d refs\n", q, + atomic_read(&q->lqs_refcount)); + OBD_FREE_PTR(q); + EXIT; +} + +static lustre_hash_ops_t lqs_hash_ops = { + .lh_hash = lqs_hash, + .lh_compare = lqs_compare, + .lh_get = lqs_get, + .lh_put = lqs_put, + .lh_exit = lqs_exit +}; +#endif /* HAVE_QUOTA_SUPPORT */ diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index 2cb9c9d..826e9e5 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -36,7 +36,7 @@ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_LQUOTA #ifdef __KERNEL__ # include @@ -44,7 +44,6 @@ # include # include # include -# include # include # include # include @@ -63,19 +62,25 @@ #include #include "quota_internal.h" +#ifdef HAVE_QUOTA_SUPPORT #ifdef __KERNEL__ -int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) +int mds_quota_ctl(struct obd_device *obd, struct obd_export *unused, + struct obd_quotactl *oqctl) { - struct obd_device *obd = exp->exp_obd; + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + struct timeval work_start; + struct timeval work_end; + long timediff; int rc = 0; ENTRY; + do_gettimeofday(&work_start); switch (oqctl->qc_cmd) { case Q_QUOTAON: rc = mds_quota_on(obd, oqctl); break; case Q_QUOTAOFF: - mds_quota_off(obd, oqctl); + rc = mds_quota_off(obd, oqctl); break; case Q_SETINFO: rc = mds_set_dqinfo(obd, oqctl); @@ -93,6 +98,12 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) case Q_GETOQUOTA: rc = mds_get_obd_quota(obd, oqctl); break; + case LUSTRE_Q_INVALIDATE: + rc = mds_quota_invalidate(obd, oqctl); + break; + case LUSTRE_Q_FINVALIDATE: + rc = mds_quota_finvalidate(obd, oqctl); + break; default: CERROR("%s: unsupported mds_quotactl command: %d\n", obd->obd_name, oqctl->qc_cmd); @@ -103,19 +114,29 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) CDEBUG(D_INFO, "mds_quotactl admin quota command %d, id %u, " "type %d, failed: rc = %d\n", oqctl->qc_cmd, oqctl->qc_id, oqctl->qc_type, rc); + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_QUOTA_CTL, timediff); RETURN(rc); } -int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) +int filter_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) { struct obd_device *obd = exp->exp_obd; struct obd_device_target *obt = &obd->u.obt; struct lvfs_run_ctxt saved; + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + struct timeval work_start; + struct timeval work_end; + long timediff; int rc = 0; ENTRY; + do_gettimeofday(&work_start); switch (oqctl->qc_cmd) { + case Q_FINVALIDATE: case Q_QUOTAON: case Q_QUOTAOFF: if (!atomic_dec_and_test(&obt->obt_quotachecking)) { @@ -124,6 +145,12 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) rc = -EBUSY; break; } + if (oqctl->qc_cmd == Q_FINVALIDATE && + (obt->obt_qctxt.lqc_flags & UGQUOTA2LQC(oqctl->qc_type))) { + rc = -EBUSY; + break; + } + oqctl->qc_id = obt->obt_qfmt; /* override qfmt version */ case Q_GETOINFO: case Q_GETOQUOTA: case Q_GETQUOTA: @@ -137,18 +164,21 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) 1); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); + rc = fsfilt_quotactl(obd, obt->obt_sb, oqctl); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF) { - if (!rc) - obt->obt_qctxt.lqc_status = - (oqctl->qc_cmd == Q_QUOTAON) ? 1 : 0; + if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF || + oqctl->qc_cmd == Q_FINVALIDATE) { + if (!rc && oqctl->qc_cmd == Q_QUOTAON) + obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(oqctl->qc_type); + if (!rc && oqctl->qc_cmd == Q_QUOTAOFF) + obt->obt_qctxt.lqc_flags &= ~UGQUOTA2LQC(oqctl->qc_type); atomic_inc(&obt->obt_quotachecking); } break; case Q_SETQUOTA: - qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, + /* currently, it is only used for nullifying the quota */ + qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, oqctl->qc_id, oqctl->qc_type, 1); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); @@ -170,14 +200,14 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0); /* There might be a pending dqacq/dqrel (which is going to - * clear stale limits on slave). we should wait for it's + * clear stale limits on slave). we should wait for it's * completion then initialize limits */ - qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, + qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, oqctl->qc_id, oqctl->qc_type, 1); if (!oqctl->qc_dqblk.dqb_bhardlimit) goto adjust; - + LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); @@ -200,8 +230,13 @@ adjust: else gid = oqctl->qc_id; - rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, - uid, gid, 1, 0); + rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, + uid, gid, 1, 0, NULL); + if (rc == -EDQUOT || rc == -EBUSY) { + CDEBUG(D_QUOTA, "rc: %d.\n", rc); + rc = 0; + } + break; } default: @@ -209,30 +244,37 @@ adjust: obd->obd_name, oqctl->qc_cmd); RETURN(-EFAULT); } + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, LQUOTA_QUOTA_CTL, timediff); RETURN(rc); } #endif /* __KERNEL__ */ +#endif -int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) +int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) { - struct ptlrpc_request *req; - struct obd_quotactl *oqc; - int ver, opc, rc; + struct ptlrpc_request *req; + struct obd_quotactl *oqc; + const struct req_format *rf; + int ver, opc, rc; ENTRY; if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME)) { + rf = &RQF_MDS_QUOTACTL; ver = LUSTRE_MDS_VERSION, opc = MDS_QUOTACTL; } else if (!strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) { + rf = &RQF_OST_QUOTACTL; ver = LUSTRE_OST_VERSION, opc = OST_QUOTACTL; } else { RETURN(-EINVAL); } - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_QUOTACTL, ver, opc); + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), rf, ver, opc); if (req == NULL) RETURN(-ENOMEM); @@ -242,30 +284,65 @@ int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); - if (!rc) { + if (rc) { + CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); + GOTO(out, rc); + } + + oqc = NULL; + if (req->rq_repmsg) oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - if (oqc == NULL) - GOTO(out, rc = -EPROTO); - *oqctl = *oqc; + if (oqc == NULL) { + CERROR ("Can't unpack obd_quotactl\n"); + GOTO(out, rc = -EPROTO); } + + *oqctl = *oqc; + EXIT; out: ptlrpc_req_finished(req); - RETURN (rc); + return rc; +} + +/** + * For lmv, only need to send request to master MDT, and the master MDT will + * process with other slave MDTs. + */ +int lmv_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt = &lmv->tgts[0]; + int rc; + ENTRY; + + if (!lmv->desc.ld_tgt_count || !tgt->ltd_active) { + CERROR("master lmv inactive\n"); + RETURN(-EIO); + } + + rc = obd_quotactl(tgt->ltd_exp, oqctl); + RETURN(rc); } -int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) +int lov_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl) { struct obd_device *obd = class_exp2obd(exp); struct lov_obd *lov = &obd->u.lov; __u64 curspace = 0; - __u32 bhardlimit = 0; + __u64 bhardlimit = 0; int i, rc = 0; ENTRY; - if (oqctl->qc_cmd != Q_QUOTAON && oqctl->qc_cmd != Q_QUOTAOFF && - oqctl->qc_cmd != Q_GETOQUOTA && oqctl->qc_cmd != Q_INITQUOTA && - oqctl->qc_cmd != Q_SETQUOTA) { + if (oqctl->qc_cmd != LUSTRE_Q_QUOTAON && + oqctl->qc_cmd != LUSTRE_Q_QUOTAOFF && + oqctl->qc_cmd != Q_GETOQUOTA && + oqctl->qc_cmd != Q_INITQUOTA && + oqctl->qc_cmd != LUSTRE_Q_SETQUOTA && + oqctl->qc_cmd != Q_FINVALIDATE) { CERROR("bad quota opc %x for lov obd", oqctl->qc_cmd); RETURN(-EFAULT); } @@ -277,11 +354,10 @@ int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) if (oqctl->qc_cmd == Q_GETOQUOTA) { CERROR("ost %d is inactive\n", i); rc = -EIO; - break; } else { CDEBUG(D_HA, "ost %d is inactive\n", i); - continue; } + continue; } err = obd_quotactl(lov->lov_tgts[i]->ltd_exp, oqctl); diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index e035ceb..f4374b7 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -37,7 +37,7 @@ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_LQUOTA #ifdef __KERNEL__ # include @@ -45,11 +45,14 @@ # include # include # include -# include -# include -# include -# include -# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# include +# include +# include +# else +# include +# endif #else /* __KERNEL__ */ # include #endif @@ -64,245 +67,12 @@ #include #include "quota_internal.h" - #ifdef __KERNEL__ -/* quota proc file handling functions */ -#ifdef LPROCFS -int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_bunit_sz); -} -EXPORT_SYMBOL(lprocfs_rd_bunit); - -int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_iunit_sz); -} -EXPORT_SYMBOL(lprocfs_rd_iunit); - -int lprocfs_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - int val, rc; - LASSERT(obd != NULL); - - rc = lprocfs_write_helper(buffer, count, &val); - - if (rc) - return rc; - - if (val % QUOTABLOCK_SIZE || - val <= obd->u.obt.obt_qctxt.lqc_btune_sz) - return -EINVAL; - - obd->u.obt.obt_qctxt.lqc_bunit_sz = val; - return count; -} -EXPORT_SYMBOL(lprocfs_wr_bunit); - -int lprocfs_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - int val, rc; - LASSERT(obd != NULL); - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz) - return -EINVAL; - - obd->u.obt.obt_qctxt.lqc_iunit_sz = val; - return count; -} -EXPORT_SYMBOL(lprocfs_wr_iunit); - -int lprocfs_rd_btune(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_btune_sz); -} -EXPORT_SYMBOL(lprocfs_rd_btune); - -int lprocfs_rd_itune(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_itune_sz); -} -EXPORT_SYMBOL(lprocfs_rd_itune); - -int lprocfs_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - int val, rc; - LASSERT(obd != NULL); - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || - val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) - return -EINVAL; - - obd->u.obt.obt_qctxt.lqc_btune_sz = val; - return count; -} -EXPORT_SYMBOL(lprocfs_wr_btune); - -int lprocfs_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - int val, rc; - LASSERT(obd != NULL); - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= MIN_QLIMIT || - val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) - return -EINVAL; - obd->u.obt.obt_qctxt.lqc_itune_sz = val; - return count; -} -EXPORT_SYMBOL(lprocfs_wr_itune); - -#define USER_QUOTA 1 -#define GROUP_QUOTA 2 - -#define MAX_STYPE_SIZE 4 -int lprocfs_rd_type(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - char stype[MAX_STYPE_SIZE + 1] = ""; - int type = obd->u.obt.obt_qctxt.lqc_atype; - LASSERT(obd != NULL); - - if (type == 0) { - strcpy(stype, "off"); - } else { - if (type & USER_QUOTA) - strcat(stype, "u"); - if (type & GROUP_QUOTA) - strcat(stype, "g"); - } - - return snprintf(page, count, "%s\n", stype); -} -EXPORT_SYMBOL(lprocfs_rd_type); +#ifdef HAVE_QUOTA_SUPPORT -static int auto_quota_on(struct obd_device *obd, int type, - struct super_block *sb, int is_master) -{ - struct obd_quotactl *oqctl; - struct lvfs_run_ctxt saved; - int rc; - ENTRY; - - LASSERT(type == USRQUOTA || type == GRPQUOTA || type == UGQUOTA); - - /* quota already turned on */ - if (obd->u.obt.obt_qctxt.lqc_status) - RETURN(0); - - OBD_ALLOC_PTR(oqctl); - if (!oqctl) - RETURN(-ENOMEM); - - oqctl->qc_type = type; - oqctl->qc_cmd = Q_QUOTAON; - oqctl->qc_id = QFMT_LDISKFS; - - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - if (!is_master) - goto local_quota; - - /* turn on cluster wide quota */ - rc = mds_admin_quota_on(obd, oqctl); - if (rc) { - CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, - "auto-enable admin quota failed. rc=%d\n", rc); - GOTO(out_pop, rc); - } -local_quota: - /* turn on local quota */ - rc = fsfilt_quotactl(obd, sb, oqctl); - if (rc) { - CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, - "auto-enable local quota failed. rc=%d\n", rc); - if (is_master) - mds_quota_off(obd, oqctl); - } else { - obd->u.obt.obt_qctxt.lqc_status = 1; - } -out_pop: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - - OBD_FREE_PTR(oqctl); - RETURN(rc); -} - - -int lprocfs_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - struct obd_device_target *obt = &obd->u.obt; - int type = 0; - char stype[MAX_STYPE_SIZE + 1] = ""; - LASSERT(obd != NULL); - - if (copy_from_user(stype, buffer, MAX_STYPE_SIZE)) - return -EFAULT; - - if (strchr(stype, 'u')) - type |= USER_QUOTA; - if (strchr(stype, 'g')) - type |= GROUP_QUOTA; - - obt->obt_qctxt.lqc_atype = type; - - if (type == 0) - return count; - - if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) - auto_quota_on(obd, type - 1, obt->obt_sb, 1); - else if (!strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) - auto_quota_on(obd, type - 1, obt->obt_sb, 0); - else - return -EFAULT; - - return count; -} -EXPORT_SYMBOL(lprocfs_wr_type); -#endif /* LPROCFS */ +static cfs_time_t last_print = 0; +static spinlock_t last_print_lock = SPIN_LOCK_UNLOCKED; static int filter_quota_setup(struct obd_device *obd) { @@ -310,41 +80,73 @@ static int filter_quota_setup(struct obd_device *obd) struct obd_device_target *obt = &obd->u.obt; ENTRY; + init_rwsem(&obt->obt_rwsem); + obt->obt_qfmt = LUSTRE_QUOTA_V2; atomic_set(&obt->obt_quotachecking, 1); - rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, NULL); - if (rc) { + rc = qctxt_init(obd, NULL); + if (rc) CERROR("initialize quota context failed! (rc:%d)\n", rc); - RETURN(rc); - } RETURN(rc); } static int filter_quota_cleanup(struct obd_device *obd) { + ENTRY; qctxt_cleanup(&obd->u.obt.obt_qctxt, 0); - return 0; + RETURN(0); } -static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd) +static int filter_quota_setinfo(struct obd_device *obd, void *data) { + struct obd_export *exp = data; + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; struct obd_import *imp; + ENTRY; /* setup the quota context import */ - obd->u.obt.obt_qctxt.lqc_import = exp->exp_imp_reverse; + spin_lock(&qctxt->lqc_lock); + qctxt->lqc_import = exp->exp_imp_reverse; + spin_unlock(&qctxt->lqc_lock); + CDEBUG(D_QUOTA, "%s: lqc_import(%p) of obd(%p) is reactivated now, \n", + obd->obd_name,exp->exp_imp_reverse, obd); - /* make imp's connect flags equal relative exp's connect flags + /* make imp's connect flags equal relative exp's connect flags * adding it to avoid the scan export list */ - imp = exp->exp_imp_reverse; - if (imp) - imp->imp_connect_data.ocd_connect_flags |= - (exp->exp_connect_flags & OBD_CONNECT_QUOTA64); + imp = qctxt->lqc_import; + if (likely(imp)) + imp->imp_connect_data.ocd_connect_flags |= + (exp->exp_connect_flags & + (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS)); + cfs_waitq_signal(&qctxt->lqc_wait_for_qmaster); /* start quota slave recovery thread. (release high limits) */ - qslave_start_recovery(obd, &obd->u.obt.obt_qctxt); - return 0; + qslave_start_recovery(obd, qctxt); + RETURN(0); } + +static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + ENTRY; + + /* lquota may be not set up before destroying export, b=14896 */ + if (!obd->obd_set_up) + RETURN(0); + + /* when exp->exp_imp_reverse is destroyed, the corresponding lqc_import + * should be invalid b=12374 */ + if (qctxt->lqc_import && qctxt->lqc_import == exp->exp_imp_reverse) { + spin_lock(&qctxt->lqc_lock); + qctxt->lqc_import = NULL; + spin_unlock(&qctxt->lqc_lock); + CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n", + obd->obd_name, obd); + } + RETURN(0); +} + static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore) { ENTRY; @@ -352,10 +154,12 @@ static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore) if (!sb_any_quota_enabled(obd->u.obt.obt_sb)) RETURN(0); - if (ignore) + if (ignore) { + CDEBUG(D_QUOTA, "blocks will be written with ignoring quota.\n"); cfs_cap_raise(CFS_CAP_SYS_RESOURCE); - else + } else { cfs_cap_lower(CFS_CAP_SYS_RESOURCE); + } RETURN(0); } @@ -363,6 +167,7 @@ static int filter_quota_enforce(struct obd_device *obd, unsigned int ignore) static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) { struct obd_device_target *obt = &obd->u.obt; + struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt; int err, cnt, rc = 0; struct obd_quotactl *oqctl; ENTRY; @@ -370,15 +175,42 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) if (!sb_any_quota_enabled(obt->obt_sb)) RETURN(0); - oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA); - OBD_ALLOC_PTR(oqctl); if (!oqctl) { CERROR("Not enough memory!"); RETURN(-ENOMEM); } + /* set over quota flags for a uid/gid */ + oa->o_valid |= OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA; + oa->o_flags &= ~(OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct quota_adjust_qunit oqaq_tmp; + struct lustre_qunit_size *lqs = NULL; + + oqaq_tmp.qaq_flags = cnt; + oqaq_tmp.qaq_id = (cnt == USRQUOTA) ? oa->o_uid : oa->o_gid; + + quota_search_lqs(NULL, &oqaq_tmp, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + if (lqs->lqs_bunit_sz <= qctxt->lqc_sync_blk) { + oa->o_flags |= (cnt == USRQUOTA) ? + OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA; + spin_unlock(&lqs->lqs_lock); + CDEBUG(D_QUOTA, "set sync flag: bunit(%lu), " + "sync_blk(%d)\n", lqs->lqs_bunit_sz, + qctxt->lqc_sync_blk); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + continue; + } + spin_unlock(&lqs->lqs_lock); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + } + memset(oqctl, 0, sizeof(*oqctl)); oqctl->qc_cmd = Q_GETQUOTA; @@ -388,14 +220,13 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) if (err) { if (!rc) rc = err; + oa->o_valid &= ~((cnt == USRQUOTA) ? OBD_MD_FLUSRQUOTA : + OBD_MD_FLGRPQUOTA); continue; } - /* set over quota flags for a uid/gid */ - oa->o_valid |= (cnt == USRQUOTA) ? - OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA; if (oqctl->qc_dqblk.dqb_bhardlimit && - (toqb(oqctl->qc_dqblk.dqb_curspace) > + (toqb(oqctl->qc_dqblk.dqb_curspace) >= oqctl->qc_dqblk.dqb_bhardlimit)) oa->o_flags |= (cnt == USRQUOTA) ? OBD_FL_NO_USRQUOTA : OBD_FL_NO_GRPQUOTA; @@ -404,58 +235,288 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) RETURN(rc); } -static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, - unsigned int gid) +/** + * check whether the left quota of certain uid and gid can satisfy a block_write + * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA + */ +static int quota_check_common(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int cycle, int isblk) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; - int rc; + int i; + __u32 id[MAXQUOTAS] = { uid, gid }; + struct qunit_data qdata[MAXQUOTAS]; + int rc = 0, rc2[2] = { 0, 0 }; ENTRY; - rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 1); - RETURN(rc == -EAGAIN); -} + CLASSERT(MAXQUOTAS < 4); + if (!sb_any_quota_enabled(qctxt->lqc_sb)) + RETURN(rc); -static int mds_quota_init(void) -{ - return lustre_dquot_init(); + spin_lock(&qctxt->lqc_lock); + if (!qctxt->lqc_valid){ + spin_unlock(&qctxt->lqc_lock); + RETURN(rc); + } + spin_unlock(&qctxt->lqc_lock); + + for (i = 0; i < MAXQUOTAS; i++) { + struct lustre_qunit_size *lqs = NULL; + + qdata[i].qd_id = id[i]; + qdata[i].qd_flags = i; + if (isblk) + QDATA_SET_BLK(&qdata[i]); + qdata[i].qd_count = 0; + + /* ignore root user */ + if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i])) + continue; + + quota_search_lqs(&qdata[i], NULL, qctxt, &lqs); + if (!lqs) + continue; + + rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk); + spin_lock(&lqs->lqs_lock); + if (!cycle) { + rc = QUOTA_RET_INC_PENDING; + if (isblk) + lqs->lqs_bwrite_pending += count; + else + lqs->lqs_iwrite_pending += count; + } + if (rc2[i] == QUOTA_RET_OK) { + if (isblk && qdata[i].qd_count < + lqs->lqs_bwrite_pending * CFS_PAGE_SIZE) + rc2[i] = QUOTA_RET_ACQUOTA; + if (!isblk && qdata[i].qd_count < + lqs->lqs_iwrite_pending) + rc2[i] = QUOTA_RET_ACQUOTA; + } + spin_unlock(&lqs->lqs_lock); + CDEBUG(D_QUOTA, "count: %d, write pending: %lu, qd_count: "LPU64 + ".\n", count, + isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending, + qdata[i].qd_count); + + /* When cycle is zero, lqs_*_pending will be changed. We will + * get reference of the lqs here and put reference of lqs in + * quota_pending_commit b=14784 */ + if (!cycle) + lqs_getref(lqs); + + /* this is for quota_search_lqs */ + lqs_putref(lqs); + } + + if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA) + RETURN(rc | QUOTA_RET_ACQUOTA); + else + RETURN(rc); } -static int mds_quota_exit(void) +static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int *pending, + quota_acquire acquire, + struct obd_trans_info *oti, int isblk) { - lustre_dquot_exit(); - return 0; + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + struct timeval work_start; + struct timeval work_end; + long timediff; + struct l_wait_info lwi = { 0 }; + int rc = 0, cycle = 0, count_err = 1; + ENTRY; + + CDEBUG(D_QUOTA, "check quota for %s\n", obd->obd_name); + *pending = 0; + /* Unfortunately, if quota master is too busy to handle the + * pre-dqacq in time and quota hash on ost is used up, we + * have to wait for the completion of in flight dqacq/dqrel, + * in order to get enough quota for write b=12588 */ + do_gettimeofday(&work_start); + while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk)) & + QUOTA_RET_ACQUOTA) { + + spin_lock(&qctxt->lqc_lock); + if (!qctxt->lqc_import && oti) { + spin_unlock(&qctxt->lqc_lock); + + LASSERT(oti && oti->oti_thread && + oti->oti_thread->t_watchdog); + + lc_watchdog_disable(oti->oti_thread->t_watchdog); + CDEBUG(D_QUOTA, "sleep for quota master\n"); + l_wait_event(qctxt->lqc_wait_for_qmaster, check_qm(qctxt), + &lwi); + CDEBUG(D_QUOTA, "wake up when quota master is back\n"); + lc_watchdog_touch(oti->oti_thread->t_watchdog); + } else { + spin_unlock(&qctxt->lqc_lock); + } + + if (rc & QUOTA_RET_INC_PENDING) + *pending = 1; + + cycle++; + if (isblk) + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); + /* after acquire(), we should run quota_check_common again + * so that we confirm there are enough quota to finish write */ + rc = acquire(obd, uid, gid, oti, isblk); + + /* please reference to dqacq_completion for the below */ + /* a new request is finished, try again */ + if (rc == -EAGAIN) { + CDEBUG(D_QUOTA, "finish a quota req, try again\n"); + continue; + } + + /* it is out of quota already */ + if (rc == -EDQUOT) { + CDEBUG(D_QUOTA, "out of quota, return -EDQUOT\n"); + break; + } + + /* -EBUSY and others, wait a second and try again */ + if (rc < 0) { + cfs_waitq_t waitq; + struct l_wait_info lwi; + + if (oti && oti->oti_thread && oti->oti_thread->t_watchdog) + lc_watchdog_touch(oti->oti_thread->t_watchdog); + CDEBUG(D_QUOTA, "rc: %d, count_err: %d\n", rc, + count_err++); + + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT(cfs_time_seconds(min(cycle, 10)), NULL, + NULL); + l_wait_event(waitq, 0, &lwi); + } + + if (rc < 0 || cycle % 10 == 2) { + spin_lock(&last_print_lock); + if (last_print == 0 || + cfs_time_before((last_print + cfs_time_seconds(30)), + cfs_time_current())) { + last_print = cfs_time_current(); + spin_unlock(&last_print_lock); + CWARN("still haven't managed to acquire quota " + "space from the quota master after %d " + "retries (err=%d, rc=%d)\n", + cycle, count_err - 1, rc); + } else { + spin_unlock(&last_print_lock); + } + } + + CDEBUG(D_QUOTA, "recheck quota with rc: %d, cycle: %d\n", rc, + cycle); + } + + if (!cycle && rc & QUOTA_RET_INC_PENDING) + *pending = 1; + + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, + isblk ? LQUOTA_WAIT_FOR_CHK_BLK : + LQUOTA_WAIT_FOR_CHK_INO, + timediff); + + RETURN(rc); } -/* check whether the left quota of certain uid and uid can satisfy a write rpc - * when need to acquire quota, return QUOTA_RET_ACQUOTA */ -static int filter_quota_check(struct obd_device *obd, unsigned int uid, - unsigned int gid, int npage) +/** + * when a block_write or inode_create rpc is finished, adjust the record for + * pending blocks and inodes + */ +static int quota_pending_commit(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int isblk) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + struct timeval work_start; + struct timeval work_end; + long timediff; int i; __u32 id[MAXQUOTAS] = { uid, gid }; struct qunit_data qdata[MAXQUOTAS]; - int rc; ENTRY; + CDEBUG(D_QUOTA, "commit pending quota for %s\n", obd->obd_name); CLASSERT(MAXQUOTAS < 4); if (!sb_any_quota_enabled(qctxt->lqc_sb)) RETURN(0); + do_gettimeofday(&work_start); for (i = 0; i < MAXQUOTAS; i++) { + struct lustre_qunit_size *lqs = NULL; + qdata[i].qd_id = id[i]; qdata[i].qd_flags = i; - qdata[i].qd_flags |= QUOTA_IS_BLOCK; + if (isblk) + QDATA_SET_BLK(&qdata[i]); qdata[i].qd_count = 0; - qctxt_wait_pending_dqacq(qctxt, id[i], i, 1); - rc = compute_remquota(obd, qctxt, &qdata[i]); - if (rc == QUOTA_RET_OK && - qdata[i].qd_count < npage * CFS_PAGE_SIZE) - RETURN(QUOTA_RET_ACQUOTA); + if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i])) + continue; + + quota_search_lqs(&qdata[i], NULL, qctxt, &lqs); + if (lqs) { + int flag = 0; + CDEBUG(D_QUOTA, "pending: %lu, count: %d.\n", + isblk ? lqs->lqs_bwrite_pending : + lqs->lqs_iwrite_pending, count); + spin_lock(&lqs->lqs_lock); + if (isblk) { + if (lqs->lqs_bwrite_pending >= count) { + lqs->lqs_bwrite_pending -= count; + spin_unlock(&lqs->lqs_lock); + flag = 1; + } else { + spin_unlock(&lqs->lqs_lock); + CDEBUG(D_ERROR, + "there are too many blocks!\n"); + } + } else { + if (lqs->lqs_iwrite_pending >= count) { + lqs->lqs_iwrite_pending -= count; + spin_unlock(&lqs->lqs_lock); + flag = 1; + } else { + spin_unlock(&lqs->lqs_lock); + CDEBUG(D_ERROR, + "there are too many files!\n"); + } + } + + lqs_putref(lqs); + /* When lqs_*_pening is changed back, we'll putref lqs + * here b=14784 */ + if (flag) + lqs_putref(lqs); + } } + do_gettimeofday(&work_end); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); + lprocfs_counter_add(qctxt->lqc_stats, + isblk ? LQUOTA_WAIT_FOR_COMMIT_BLK : + LQUOTA_WAIT_FOR_COMMIT_INO, + timediff); - RETURN(rc); + RETURN(0); +} + +static int mds_quota_init(void) +{ + return lustre_dquot_init(); +} + +static int mds_quota_exit(void) +{ + lustre_dquot_exit(); + return 0; } static int mds_quota_setup(struct obd_device *obd) @@ -465,41 +526,69 @@ static int mds_quota_setup(struct obd_device *obd) int rc; ENTRY; + init_rwsem(&obt->obt_rwsem); + obt->obt_qfmt = LUSTRE_QUOTA_V2; + mds->mds_quota_info.qi_version = LUSTRE_QUOTA_V2; atomic_set(&obt->obt_quotachecking, 1); /* initialize quota master and quota context */ sema_init(&mds->mds_qonoff_sem, 1); - rc = qctxt_init(&obt->obt_qctxt, obt->obt_sb, dqacq_handler); + rc = qctxt_init(obd, dqacq_handler); if (rc) { CERROR("initialize quota context failed! (rc:%d)\n", rc); RETURN(rc); } - + mds->mds_quota = 1; RETURN(rc); } static int mds_quota_cleanup(struct obd_device *obd) { + ENTRY; + obd->u.mds.mds_quota = 0; qctxt_cleanup(&obd->u.obt.obt_qctxt, 0); RETURN(0); } +static int mds_quota_setinfo(struct obd_device *obd, void *data) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + ENTRY; + + if (data != NULL) + QUOTA_MASTER_READY(qctxt); + else + QUOTA_MASTER_UNREADY(qctxt); + RETURN(0); +} + static int mds_quota_fs_cleanup(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; - int i; + struct obd_quotactl oqctl; ENTRY; - /* close admin quota files */ + memset(&oqctl, 0, sizeof(oqctl)); + oqctl.qc_type = UGQUOTA; + down(&mds->mds_qonoff_sem); - for (i = 0; i < MAXQUOTAS; i++) { - if (mds->mds_quota_info.qi_files[i]) { - filp_close(mds->mds_quota_info.qi_files[i], 0); - mds->mds_quota_info.qi_files[i] = NULL; - } - } + mds_admin_quota_off(obd, &oqctl); up(&mds->mds_qonoff_sem); RETURN(0); } + +static int quota_acquire_common(struct obd_device *obd, unsigned int uid, + unsigned int gid, struct obd_trans_info *oti, + int isblk) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + int rc; + ENTRY; + + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, isblk, 1, oti); + RETURN(rc); +} + +#endif /* HAVE_QUOTA_SUPPORT */ #endif /* __KERNEL__ */ struct osc_quota_info { @@ -548,6 +637,7 @@ static inline struct osc_quota_info *find_qinfo(struct client_obd *cli, { unsigned int hashent = hashfn(cli, id, type); struct osc_quota_info *oqi; + ENTRY; LASSERT_SPIN_LOCKED(&qinfo_list_lock); list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) { @@ -555,7 +645,7 @@ static inline struct osc_quota_info *find_qinfo(struct client_obd *cli, oqi->oqi_id == id && oqi->oqi_type == type) return oqi; } - return NULL; + RETURN(NULL); } static struct osc_quota_info *alloc_qinfo(struct client_obd *cli, @@ -581,8 +671,7 @@ static void free_qinfo(struct osc_quota_info *oqi) OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi)); } -int osc_quota_chkdq(struct client_obd *cli, - unsigned int uid, unsigned int gid) +int osc_quota_chkdq(struct client_obd *cli, unsigned int uid, unsigned int gid) { unsigned int id; int cnt, rc = QUOTA_OK; @@ -604,8 +693,7 @@ int osc_quota_chkdq(struct client_obd *cli, RETURN(rc); } -int osc_quota_setdq(struct client_obd *cli, - unsigned int uid, unsigned int gid, +int osc_quota_setdq(struct client_obd *cli, unsigned int uid, unsigned int gid, obd_flag valid, obd_flag flags) { unsigned int id; @@ -713,6 +801,7 @@ int osc_quota_exit(void) } #ifdef __KERNEL__ +#ifdef HAVE_QUOTA_SUPPORT quota_interface_t mds_quota_interface = { .quota_init = mds_quota_init, .quota_exit = mds_quota_exit, @@ -720,9 +809,13 @@ quota_interface_t mds_quota_interface = { .quota_cleanup = mds_quota_cleanup, .quota_check = target_quota_check, .quota_ctl = mds_quota_ctl, - .quota_fs_cleanup =mds_quota_fs_cleanup, + .quota_setinfo = mds_quota_setinfo, + .quota_fs_cleanup = mds_quota_fs_cleanup, .quota_recovery = mds_quota_recovery, .quota_adjust = mds_quota_adjust, + .quota_chkquota = quota_chk_acq_common, + .quota_acquire = quota_acquire_common, + .quota_pending_commit = quota_pending_commit, }; quota_interface_t filter_quota_interface = { @@ -731,12 +824,16 @@ quota_interface_t filter_quota_interface = { .quota_check = target_quota_check, .quota_ctl = filter_quota_ctl, .quota_setinfo = filter_quota_setinfo, + .quota_clearinfo = filter_quota_clearinfo, .quota_enforce = filter_quota_enforce, .quota_getflag = filter_quota_getflag, - .quota_acquire = filter_quota_acquire, + .quota_acquire = quota_acquire_common, .quota_adjust = filter_quota_adjust, - .quota_chkquota = filter_quota_check, + .quota_chkquota = quota_chk_acq_common, + .quota_adjust_qunit = filter_quota_adjust_qunit, + .quota_pending_commit = quota_pending_commit, }; +#endif #endif /* __KERNEL__ */ quota_interface_t mdc_quota_interface = { @@ -745,6 +842,11 @@ quota_interface_t mdc_quota_interface = { .quota_poll_check = client_quota_poll_check, }; +quota_interface_t lmv_quota_interface = { + .quota_ctl = lmv_quota_ctl, + .quota_check = lmv_quota_check, +}; + quota_interface_t osc_quota_interface = { .quota_ctl = client_quota_ctl, .quota_check = client_quota_check, @@ -754,22 +856,42 @@ quota_interface_t osc_quota_interface = { .quota_chkdq = osc_quota_chkdq, .quota_setdq = osc_quota_setdq, .quota_cleanup = osc_quota_cleanup, + .quota_adjust_qunit = client_quota_adjust_qunit, }; quota_interface_t lov_quota_interface = { - .quota_check = lov_quota_check, .quota_ctl = lov_quota_ctl, + .quota_check = lov_quota_check, + .quota_adjust_qunit = lov_quota_adjust_qunit, }; #ifdef __KERNEL__ + +cfs_proc_dir_entry_t *lquota_type_proc_dir = NULL; + static int __init init_lustre_quota(void) { - int rc = qunit_cache_init(); +#ifdef HAVE_QUOTA_SUPPORT + int rc = 0; + + lquota_type_proc_dir = lprocfs_register(OBD_LQUOTA_DEVICENAME, + proc_lustre_root, + NULL, NULL); + if (IS_ERR(lquota_type_proc_dir)) { + CERROR("LProcFS failed in lquota-init\n"); + rc = PTR_ERR(lquota_type_proc_dir); + return rc; + } + + rc = qunit_cache_init(); if (rc) return rc; + PORTAL_SYMBOL_REGISTER(filter_quota_interface); PORTAL_SYMBOL_REGISTER(mds_quota_interface); +#endif PORTAL_SYMBOL_REGISTER(mdc_quota_interface); + PORTAL_SYMBOL_REGISTER(lmv_quota_interface); PORTAL_SYMBOL_REGISTER(osc_quota_interface); PORTAL_SYMBOL_REGISTER(lov_quota_interface); return 0; @@ -777,13 +899,19 @@ static int __init init_lustre_quota(void) static void /*__exit*/ exit_lustre_quota(void) { - PORTAL_SYMBOL_UNREGISTER(filter_quota_interface); - PORTAL_SYMBOL_UNREGISTER(mds_quota_interface); PORTAL_SYMBOL_UNREGISTER(mdc_quota_interface); + PORTAL_SYMBOL_UNREGISTER(lmv_quota_interface); PORTAL_SYMBOL_UNREGISTER(osc_quota_interface); PORTAL_SYMBOL_UNREGISTER(lov_quota_interface); +#ifdef HAVE_QUOTA_SUPPORT + PORTAL_SYMBOL_UNREGISTER(filter_quota_interface); + PORTAL_SYMBOL_UNREGISTER(mds_quota_interface); qunit_cache_cleanup(); + + if (lquota_type_proc_dir) + lprocfs_remove(&lquota_type_proc_dir); +#endif } MODULE_AUTHOR("Sun Microsystems, Inc. "); @@ -792,9 +920,12 @@ MODULE_LICENSE("GPL"); cfs_module(lquota, "1.0.0", init_lustre_quota, exit_lustre_quota); +#ifdef HAVE_QUOTA_SUPPORT EXPORT_SYMBOL(mds_quota_interface); EXPORT_SYMBOL(filter_quota_interface); +#endif EXPORT_SYMBOL(mdc_quota_interface); +EXPORT_SYMBOL(lmv_quota_interface); EXPORT_SYMBOL(osc_quota_interface); EXPORT_SYMBOL(lov_quota_interface); #endif /* __KERNEL */ diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index d896fa7..e9073be 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -39,19 +39,22 @@ #include +#ifdef HAVE_QUOTA_SUPPORT + /* QUSG covnert bytes to blocks when counting block quota */ #define QUSG(count, isblk) (isblk ? toqb(count) : count) -/* This flag is set in qc_stat to distinguish if the current getquota +/* This flag is set in qc_stat to distinguish if the current getquota * operation is for quota recovery */ #define QUOTA_RECOVERING 0x01 +#define OBD_LQUOTA_DEVICENAME "lquota" #ifdef __KERNEL__ #define DQUOT_DEBUG(dquot, fmt, arg...) \ CDEBUG(D_QUOTA, "refcnt(%u) id(%u) type(%u) off(%llu) flags(%lu) " \ - "bhardlimit(%u) curspace("LPX64") ihardlimit(%u) " \ - "curinodes(%u): " fmt, dquot->dq_refcnt, \ + "bhardlimit("LPU64") curspace("LPU64") ihardlimit("LPU64") " \ + "curinodes("LPU64"): " fmt, dquot->dq_refcnt, \ dquot->dq_id, dquot->dq_type, dquot->dq_off, dquot->dq_flags, \ dquot->dq_dqb.dqb_bhardlimit, dquot->dq_dqb.dqb_curspace, \ dquot->dq_dqb.dqb_ihardlimit, dquot->dq_dqb.dqb_curinodes, \ @@ -68,26 +71,48 @@ qinfo->qi_info[1].dqi_free_entry, ## arg); #define QDATA_DEBUG(qd, fmt, arg...) \ - CDEBUG(D_QUOTA, "id(%u) type(%lu) count("LPU64") isblk(%lu):" \ - fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count, \ - (qd->qd_flags & QUOTA_IS_BLOCK) >> 1, \ + CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) isblk(%c) count("LPU64") " \ + "qd_qunit("LPU64"): " fmt, qd->qd_id, qd->qd_flags, \ + QDATA_IS_GRP(qd) ? 'g' : 'u', QDATA_IS_BLK(qd) ? 'b': 'i', \ + qd->qd_count, \ + (QDATA_IS_ADJBLK(qd) | QDATA_IS_ADJINO(qd)) ? qd->qd_qunit : 0,\ ## arg); +#define QAQ_DEBUG(qaq, fmt, arg...) \ + CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) bunit("LPU64") " \ + "iunit("LPU64"): " fmt, qaq->qaq_id, qaq->qaq_flags, \ + QAQ_IS_GRP(qaq) ? 'g': 'u', qaq->qaq_bunit_sz, \ + qaq->qaq_iunit_sz, ## arg); + +#define LQS_DEBUG(lqs, fmt, arg...) \ + CDEBUG(D_QUOTA, "lqs(%p) id(%u) flag(%lu) type(%c) bunit(%lu) " \ + "btune(%lu) iunit(%lu) itune(%lu) lqs_bwrite_pending(%lu) " \ + "lqs_iwrite_pending(%lu) ino_rec("LPD64") blk_rec("LPD64" ) " \ + "refcount(%d): " \ + fmt, lqs, lqs->lqs_id, lqs->lqs_flags, \ + LQS_IS_GRP(lqs) ? 'g' : 'u', \ + lqs->lqs_bunit_sz, lqs->lqs_btune_sz, lqs->lqs_iunit_sz, \ + lqs->lqs_itune_sz, lqs->lqs_bwrite_pending, \ + lqs->lqs_iwrite_pending, lqs->lqs_ino_rec, \ + lqs->lqs_blk_rec, atomic_read(&lqs->lqs_refcount), ## arg); + /* quota_context.c */ void qunit_cache_cleanup(void); int qunit_cache_init(void); int qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, - uid_t uid, gid_t gid, __u32 isblk, int wait); + uid_t uid, gid_t gid, __u32 isblk, int wait, + struct obd_trans_info *oti); int qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, unsigned short type, int isblk); -int qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, - dqacq_handler_t handler); +int qctxt_init(struct obd_device *obd, dqacq_handler_t handler); void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force); -void qslave_start_recovery(struct obd_device *obd, +void qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt); int compute_remquota(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata); + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, + int isblk); +int check_qm(struct lustre_quota_ctxt *qctxt); /* quota_master.c */ int lustre_dquot_init(void); void lustre_dquot_exit(void); @@ -97,27 +122,89 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[], int filter_quota_adjust(struct obd_device *obd, unsigned int qcids[], unsigned int qpids[], int rc, int opc); int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_quota_get_version(struct obd_device *obd, lustre_quota_version_t *ver); +int mds_quota_invalidate(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_quota_finvalidate(struct obd_device *obd, struct obd_quotactl *oqctl); + int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl); +int mds_admin_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_set_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_get_dqinfo(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_recovery(struct obd_device *obd); int mds_get_obd_quota(struct obd_device *obd, struct obd_quotactl *oqctl); +int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt, struct lustre_dquot + *dquot, __u32 ost_num, __u32 mdt_num, int type, + struct quota_adjust_qunit *oqaq); #endif /* quota_ctl.c */ -int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); -int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); -int client_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); -int lov_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl); +int mds_quota_ctl(struct obd_device *obd, struct obd_export *exp, + struct obd_quotactl *oqctl); +int filter_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); /* quota_chk.c */ -int target_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); -int client_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); -int lov_quota_check(struct obd_export *exp, struct obd_quotactl *oqctl); -int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); +int target_quota_check(struct obd_device *obd, struct obd_export *exp, + struct obd_quotactl *oqctl); + +int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct + lustre_quota_ctxt *qctxt); +void qdata_to_oqaq(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq); +#ifdef __KERNEL__ +int quota_search_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return); +int quota_create_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return); +void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs, + int is_chk, int is_acq); + +extern int quote_get_qdata(struct ptlrpc_request *req, struct qunit_data *qdata, + int is_req, int is_exp); +extern int quote_copy_qdata(struct ptlrpc_request *req, struct qunit_data *qdata, + int is_req, int is_exp); +int filter_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); +int lquota_proc_setup(struct obd_device *obd, int is_master); +int lquota_proc_cleanup(struct lustre_quota_ctxt *qctxt); + +extern cfs_proc_dir_entry_t *lquota_type_proc_dir; +#endif + +#define LQS_BLK_DECREASE 1 +#define LQS_BLK_INCREASE 2 +#define LQS_INO_DECREASE 4 +#define LQS_INO_INCREASE 8 + + +#endif +int client_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); +int lov_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt); +int client_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int lmv_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int lov_quota_ctl(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int client_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int lmv_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int lov_quota_check(struct obd_device *unused, struct obd_export *exp, + struct obd_quotactl *oqctl); +int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk); #endif diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 5ffdf8d..512fc49 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -44,7 +44,7 @@ # define EXPORT_SYMTAB #endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_LQUOTA #include #include @@ -62,8 +62,9 @@ #include "quota_internal.h" -/* lock ordering: - * mds->mds_qonoff_sem > dquot->dq_sem */ +#ifdef HAVE_QUOTA_SUPPORT + +/* lock ordering: mds->mds_qonoff_sem > dquot->dq_sem */ static struct list_head lustre_dquot_hash[NR_DQHASH]; static spinlock_t dquot_hash_lock = SPIN_LOCK_UNLOCKED; @@ -198,7 +199,7 @@ static struct lustre_dquot *lustre_dqget(struct obd_device *obd, if ((empty = alloc_dquot(lqi, id, type)) == NULL) RETURN(ERR_PTR(-ENOMEM)); - + spin_lock(&dquot_hash_lock); if ((dquot = find_dquot(hashent, lqi, id, type)) != NULL) { dquot->dq_refcnt++; @@ -226,24 +227,134 @@ static struct lustre_dquot *lustre_dqget(struct obd_device *obd, RETURN(dquot); } +static void init_oqaq(struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + qid_t id, int type) +{ + struct lustre_qunit_size *lqs = NULL; + + oqaq->qaq_id = id; + oqaq->qaq_flags = type; + quota_search_lqs(NULL, oqaq, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + oqaq->qaq_bunit_sz = lqs->lqs_bunit_sz; + oqaq->qaq_iunit_sz = lqs->lqs_iunit_sz; + oqaq->qaq_flags = lqs->lqs_flags; + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); + } else { + CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n"); + oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz; + oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz; + } +} + +int dqacq_adjust_qunit_sz(struct obd_device *obd, qid_t id, int type, + __u32 is_blk) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; + struct obd_device *lov_mds_obd = class_exp2obd(mds->mds_osc_exp); + struct lov_obd *lov = &lov_mds_obd->u.lov; + __u32 ost_num = lov->desc.ld_tgt_count, mdt_num = 1; + struct quota_adjust_qunit *oqaq = NULL; + unsigned int uid = 0, gid = 0; + struct lustre_quota_info *info = &mds->mds_quota_info; + struct lustre_dquot *dquot = NULL; + int adjust_res = 0; + int rc = 0; + ENTRY; + + LASSERT(mds); + dquot = lustre_dqget(obd, info, id, type); + if (IS_ERR(dquot)) + RETURN(PTR_ERR(dquot)); + + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + GOTO(out, rc = -ENOMEM); + + down(&dquot->dq_sem); + init_oqaq(oqaq, qctxt, id, type); + + rc = dquot_create_oqaq(qctxt, dquot, ost_num, mdt_num, + is_blk ? LQUOTA_FLAGS_ADJBLK : + LQUOTA_FLAGS_ADJINO, oqaq); + + if (rc < 0) { + CDEBUG(D_ERROR, "create oqaq failed! (rc:%d)\n", rc); + GOTO(out_sem, rc); + } + QAQ_DEBUG(oqaq, "show oqaq.\n") + + if (!QAQ_IS_ADJBLK(oqaq) && !QAQ_IS_ADJINO(oqaq)) + GOTO(out_sem, rc); + + /* adjust the mds slave qunit size */ + adjust_res = quota_adjust_slave_lqs(oqaq, qctxt); + if (adjust_res <= 0) { + if (adjust_res < 0) { + rc = adjust_res; + CDEBUG(D_ERROR, "adjust mds slave's qunit size failed! \ + (rc:%d)\n", rc); + } else { + CDEBUG(D_QUOTA, "qunit doesn't need to be adjusted.\n"); + } + GOTO(out_sem, rc); + } + + if (type) + gid = dquot->dq_id; + else + uid = dquot->dq_id; + + up(&dquot->dq_sem); + + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, is_blk, 0, NULL); + if (rc == -EDQUOT || rc == -EBUSY) { + CDEBUG(D_QUOTA, "rc: %d.\n", rc); + rc = 0; + } + if (rc) { + CDEBUG(D_ERROR, "mds fail to adjust file quota! \ + (rc:%d)\n", rc); + GOTO(out, rc); + } + + /* only when block qunit is reduced, boardcast to osts */ + if ((adjust_res & LQS_BLK_DECREASE) && QAQ_IS_ADJBLK(oqaq)) + rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq, qctxt); + +out: + lustre_dqput(dquot); + if (oqaq) + OBD_FREE_PTR(oqaq); + + RETURN(rc); +out_sem: + up(&dquot->dq_sem); + goto out; +} + int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) { struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; struct lustre_quota_info *info = &mds->mds_quota_info; struct lustre_dquot *dquot = NULL; __u64 *usage = NULL; - __u32 hlimit = 0, slimit = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; + __u64 hlimit = 0, slimit = 0; time_t *time = NULL; unsigned int grace = 0; + struct lustre_qunit_size *lqs = NULL; int rc = 0; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OBD_DQACQ)) RETURN(-EIO); - dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type); + dquot = lustre_dqget(obd, info, qdata->qd_id, QDATA_IS_GRP(qdata)); if (IS_ERR(dquot)) RETURN(PTR_ERR(dquot)); @@ -258,14 +369,14 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) GOTO(out, rc = -EBUSY); } - if (is_blk) { - grace = info->qi_info[qdata_type].dqi_bgrace; + if (QDATA_IS_BLK(qdata)) { + grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_bgrace; usage = &dquot->dq_dqb.dqb_curspace; hlimit = dquot->dq_dqb.dqb_bhardlimit; slimit = dquot->dq_dqb.dqb_bsoftlimit; time = &dquot->dq_dqb.dqb_btime; } else { - grace = info->qi_info[qdata_type].dqi_igrace; + grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_igrace; usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes; hlimit = dquot->dq_dqb.dqb_ihardlimit; slimit = dquot->dq_dqb.dqb_isoftlimit; @@ -281,12 +392,21 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) switch (opc) { case QUOTA_DQACQ: - if (hlimit && - QUSG(*usage + qdata->qd_count, is_blk) > hlimit) - GOTO(out, rc = -EDQUOT); + if (hlimit && + QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > hlimit) + { + if (QDATA_IS_CHANGE_QS(qdata) && + QUSG(*usage, QDATA_IS_BLK(qdata)) < hlimit) + qdata->qd_count = (hlimit - + QUSG(*usage, QDATA_IS_BLK(qdata))) + * (QDATA_IS_BLK(qdata) ? + QUOTABLOCK_SIZE : 1); + else + GOTO(out, rc = -EDQUOT); + } if (slimit && - QUSG(*usage + qdata->qd_count, is_blk) > slimit) { + QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > slimit) { if (*time && cfs_time_current_sec() >= *time) GOTO(out, rc = -EDQUOT); else if (!*time) @@ -304,7 +424,7 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) *usage -= qdata->qd_count; /* (usage <= soft limit) but not (usage < soft limit) */ - if (!slimit || QUSG(*usage, is_blk) <= slimit) + if (!slimit || QUSG(*usage, QDATA_IS_BLK(qdata)) <= slimit) *time = 0; break; default: @@ -317,6 +437,37 @@ out: up(&dquot->dq_sem); up(&mds->mds_qonoff_sem); lustre_dqput(dquot); + if (rc != -EDQUOT) + dqacq_adjust_qunit_sz(obd, qdata->qd_id, QDATA_IS_GRP(qdata), + QDATA_IS_BLK(qdata)); + + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (QDATA_IS_BLK(qdata)) { + if (!lqs) { + CDEBUG(D_INFO, "Can't find the lustre qunit size!\n"); + qdata->qd_qunit = qctxt->lqc_bunit_sz; + } else { + spin_lock(&lqs->lqs_lock); + qdata->qd_qunit = lqs->lqs_bunit_sz; + spin_unlock(&lqs->lqs_lock); + } + QDATA_SET_ADJBLK(qdata); + } else { + if (!lqs) { + CDEBUG(D_INFO, "Can't find the lustre qunit size!\n"); + qdata->qd_qunit = qctxt->lqc_iunit_sz; + } else { + spin_lock(&lqs->lqs_lock); + qdata->qd_qunit = lqs->lqs_iunit_sz; + spin_unlock(&lqs->lqs_lock); + } + QDATA_SET_ADJINO(qdata); + } + + QDATA_DEBUG(qdata, "alloc/release qunit in dqacq_handler\n"); + if (lqs) + lqs_putref(lqs); + return rc; } @@ -327,25 +478,73 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[], int rc2 = 0; ENTRY; - if (rc && rc != -EDQUOT) + if (rc && rc != -EDQUOT && rc != ENOLCK) RETURN(0); switch (opc) { - case FSFILT_OP_RENAME: - /* acquire/release block quota on owner of original parent */ - rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[2], qpids[3], 1, 0); - /* fall-through */ case FSFILT_OP_SETATTR: - /* acquire/release file quota on original owner */ - rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 0, 0); - /* fall-through */ - case FSFILT_OP_CREATE: + /* release file quota on original owner */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 0, 0, + NULL); + /* release block quota on original owner */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); + /* acquire file quota on current owner */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0, + NULL); + /* acquire block quota on current owner */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0, + NULL); + break; + case FSFILT_OP_UNLINK_PARTIAL_CHILD: + /* release file quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0, + NULL); + /* rlease block quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0, + NULL); + break; + case FSFILT_OP_CREATE_PARTIAL_CHILD: + /* acquire file quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0, + NULL); + /* acquire block quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0, + NULL); + break; + case FSFILT_OP_LINK: + /* acquire block quota on parent */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); + break; case FSFILT_OP_UNLINK: - /* acquire/release file/block quota on owner of child (or current owner) */ - rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0); - rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0); - /* acquire/release block quota on owner of parent (or original owner) */ - rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0); + /* release block quota on parent */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); + /* release file quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0, + NULL); + if (qpids[0] != qcids[0] || qpids[1] != qcids[1]) + /* release block quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], + qcids[1], 1, 0, NULL); + break; + case FSFILT_OP_UNLINK_PARTIAL_PARENT: + /* release block quota on parent */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); + break; + case FSFILT_OP_CREATE: + /* acquire block quota on parent */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); + /* acquire file quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 0, 0, + NULL); + if (qpids[0] != qcids[0] || qpids[1] != qcids[1]) + /* acquire block quota on child */ + rc2 |= qctxt_adjust_qunit(obd, qctxt, qcids[0], + qcids[1], 1, 0, NULL); break; default: LBUG(); @@ -353,7 +552,8 @@ int mds_quota_adjust(struct obd_device *obd, unsigned int qcids[], } if (rc2) - CERROR("mds adjust qunit failed! (opc:%d rc:%d)\n", opc, rc2); + CDEBUG(rc2 == -EAGAIN ? D_QUOTA: D_ERROR, + "mds adjust qunit failed! (opc:%d rc:%d)\n", opc, rc2); RETURN(0); } @@ -370,50 +570,122 @@ int filter_quota_adjust(struct obd_device *obd, unsigned int qcids[], switch (opc) { case FSFILT_OP_SETATTR: /* acquire/release block quota on original & current owner */ - rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0); - rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0); + rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0, + NULL); + rc2 = qctxt_adjust_qunit(obd, qctxt, qpids[0], qpids[1], 1, 0, + NULL); break; case FSFILT_OP_UNLINK: /* release block quota on this owner */ case FSFILT_OP_CREATE: /* XXX for write operation on obdfilter */ /* acquire block quota on this owner */ - rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0); + rc = qctxt_adjust_qunit(obd, qctxt, qcids[0], qcids[1], 1, 0, + NULL); break; default: LBUG(); break; } - if (rc || rc2) - CERROR("filter adjust qunit failed! (opc:%d rc%d)\n", - opc, rc ?: rc2); + if (rc || rc2) { + if (!rc) + rc = rc2; + CDEBUG(rc == -EAGAIN ? D_QUOTA: D_ERROR, + "filter adjust qunit failed! (opc:%d rc%d)\n", + opc, rc); + } + RETURN(0); } -#define LUSTRE_ADMIN_QUOTAFILES {\ - "admin_quotafile.usr", /* user admin quotafile */\ - "admin_quotafile.grp" /* group admin quotafile */\ -} static const char prefix[] = "OBJECTS/"; +int mds_quota_invalidate(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_info *qinfo = &mds->mds_quota_info; + int rc = 0, i; + char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2; + char name[64]; + struct lvfs_run_ctxt saved; + + LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + down(&mds->mds_qonoff_sem); + + for (i = 0; i < MAXQUOTAS; i++) { + struct file *fp; + + if (!Q_TYPESET(oqctl, i)) + continue; + + /* quota file has been opened ? */ + if (qinfo->qi_files[i]) { + rc = -EBUSY; + goto out; + } + + LASSERT(strlen(quotafile[i]) + sizeof(prefix) <= sizeof(name)); + sprintf(name, "%s%s", prefix, quotafile[i]); + + fp = filp_open(name, O_CREAT | O_TRUNC | O_RDWR, 0644); + if (IS_ERR(fp)) { + rc = PTR_ERR(fp); + CERROR("error invalidating admin quotafile %s (rc:%d)\n", + name, rc); + } + else + filp_close(fp, 0); + } + +out: + up(&mds->mds_qonoff_sem); + + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + return rc; +} + +int mds_quota_finvalidate(struct obd_device *obd, struct obd_quotactl *oqctl) +{ + struct mds_obd *mds = &obd->u.mds; + int rc; + struct lvfs_run_ctxt saved; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + down(&mds->mds_qonoff_sem); + + oqctl->qc_cmd = Q_FINVALIDATE; + oqctl->qc_id = obd->u.obt.obt_qfmt; + rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); + if (!rc) + rc = obd_quotactl(mds->mds_osc_exp, oqctl); + + up(&mds->mds_qonoff_sem); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + return rc; +} + int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; struct lustre_quota_info *qinfo = &mds->mds_quota_info; - const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES; + const char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2; struct lvfs_run_ctxt saved; char name[64]; int i, rc = 0; - struct dentry *dparent = mds->mds_objects_dir; - struct inode *iparent = dparent->d_inode; ENTRY; - LASSERT(iparent); + LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); down(&mds->mds_qonoff_sem); - for (i = 0; i < MAXQUOTAS; i++) { - struct dentry *de; + + for (i = 0; i < MAXQUOTAS && !rc; i++) { struct file *fp; if (!Q_TYPESET(oqctl, i)) @@ -426,33 +698,44 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl) continue; } - /* lookup quota file */ - rc = 0; - LOCK_INODE_MUTEX(iparent); - de = lookup_one_len(quotafiles[i], dparent, - strlen(quotafiles[i])); - UNLOCK_INODE_MUTEX(iparent); - if (IS_ERR(de) || de->d_inode == NULL || - !S_ISREG(de->d_inode->i_mode)) - rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT; - if (!IS_ERR(de)) - dput(de); - - if (rc && rc != -ENOENT) { - CERROR("error lookup quotafile %s! (rc:%d)\n", + LASSERT(strlen(quotafile[i]) + sizeof(prefix) <= sizeof(name)); + sprintf(name, "%s%s", prefix, quotafile[i]); + + /* check if quota file exists and is correct */ + fp = filp_open(name, O_RDONLY, 0); + if (!IS_ERR(fp)) { + /* irregular file is not the right place for quota */ + if (!S_ISREG(fp->f_dentry->d_inode->i_mode)) { + CERROR("admin quota file %s is not " + "regular!", name); + filp_close(fp, 0); + rc = -EINVAL; + break; + } + qinfo->qi_files[i] = fp; + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_CHK); + qinfo->qi_files[i] = 0; + filp_close(fp, 0); + } + else + rc = PTR_ERR(fp); + + if (!rc) + continue; + + /* -EINVAL may be returned by quotainfo for bad quota file */ + if (rc != -ENOENT && rc != -EINVAL) { + CERROR("error opening old quota file %s (%d)\n", name, rc); break; - } else if (!rc) { - continue; } - LASSERT(strlen(quotafiles[i]) + sizeof(prefix) <= sizeof(name)); - sprintf(name, "%s%s", prefix, quotafiles[i]); + CDEBUG(D_INFO, "%s new quota file %s\n", name, + rc == -ENOENT ? "creating" : "overwriting"); - LASSERT(rc == -ENOENT); - /* create quota file */ - fp = filp_open(name, O_CREAT | O_EXCL, 0644); - if (IS_ERR(fp) || !S_ISREG(fp->f_dentry->d_inode->i_mode)) { + /* create quota file overwriting old if needed */ + fp = filp_open(name, O_CREAT | O_TRUNC | O_RDWR, 0644); + if (IS_ERR(fp)) { rc = PTR_ERR(fp); CERROR("error creating admin quotafile %s (rc:%d)\n", name, rc); @@ -460,15 +743,14 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl) } qinfo->qi_files[i] = fp; - rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO); - filp_close(fp, 0); - qinfo->qi_files[i] = NULL; - if (rc) { + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_INIT_INFO); + if (rc) CERROR("error init %s admin quotafile! (rc:%d)\n", i == USRQUOTA ? "user" : "group", rc); - break; - } + + filp_close(fp, 0); + qinfo->qi_files[i] = NULL; } up(&mds->mds_qonoff_sem); @@ -476,7 +758,7 @@ int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl) RETURN(rc); } -static int close_quota_files(struct obd_quotactl *oqctl, +static int close_quota_files(struct obd_quotactl *oqctl, struct lustre_quota_info *qinfo) { int i, rc = 0; @@ -499,13 +781,12 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; struct lustre_quota_info *qinfo = &mds->mds_quota_info; - const char *quotafiles[] = LUSTRE_ADMIN_QUOTAFILES; + const char *quotafile[] = LUSTRE_ADMIN_QUOTAFILES_V2; char name[64]; int i, rc = 0; - struct inode *iparent = mds->mds_objects_dir->d_inode; ENTRY; - LASSERT(iparent); + LASSERT(qinfo->qi_version == LUSTRE_QUOTA_V2); /* open admin quota files and read quotafile info */ for (i = 0; i < MAXQUOTAS; i++) { @@ -514,27 +795,33 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) if (!Q_TYPESET(oqctl, i)) continue; - LASSERT(strlen(quotafiles[i]) + sizeof(prefix) <= sizeof(name)); - sprintf(name, "%s%s", prefix, quotafiles[i]); + LASSERT(strlen(quotafile[i]) + + sizeof(prefix) <= sizeof(name)); + sprintf(name, "%s%s", prefix, quotafile[i]); if (qinfo->qi_files[i] != NULL) { rc = -EBUSY; break; } - fp = filp_open(name, O_RDWR | O_EXCL, 0644); + fp = filp_open(name, O_RDWR, 0); if (IS_ERR(fp) || !S_ISREG(fp->f_dentry->d_inode->i_mode)) { - rc = PTR_ERR(fp); - CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, - "open %s failed! (rc:%d)\n", name, rc); + rc = IS_ERR(fp) ? PTR_ERR(fp) : -EINVAL; + CERROR("error open/create %s! (rc:%d)\n", name, rc); break; } qinfo->qi_files[i] = fp; + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_CHK); + if (rc) { + CERROR("invalid quota file %s! (rc:%d)\n", name, rc); + break; + } + rc = fsfilt_quotainfo(obd, qinfo, i, QFILE_RD_INFO); if (rc) { - CERROR("error read quotainfo of %s! (rc:%d)\n", - name, rc); + CERROR("error read quotainfo of %s! (rc:%d)\n", name, + rc); break; } } @@ -545,8 +832,8 @@ int mds_admin_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) RETURN(rc); } -static int mds_admin_quota_off(struct obd_device *obd, - struct obd_quotactl *oqctl) +int mds_admin_quota_off(struct obd_device *obd, + struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; struct lustre_quota_info *qinfo = &mds->mds_quota_info; @@ -584,7 +871,7 @@ int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); if (!rc) - obt->obt_qctxt.lqc_status = 1; + obt->obt_qctxt.lqc_flags |= UGQUOTA2LQC(oqctl->qc_type); out: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); up(&mds->mds_qonoff_sem); @@ -614,7 +901,7 @@ int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl) rc = obd_quotactl(mds->mds_osc_exp, oqctl); rc2 = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); if (!rc2) - obt->obt_qctxt.lqc_status = 0; + obt->obt_qctxt.lqc_flags &= ~UGQUOTA2LQC(oqctl->qc_type); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); up(&mds->mds_qonoff_sem); @@ -671,10 +958,124 @@ out: RETURN(rc); } +int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt, + struct lustre_dquot *dquot, __u32 ost_num, __u32 mdt_num, + int type, struct quota_adjust_qunit *oqaq) +{ + __u64 bunit_curr_o, iunit_curr_o; + unsigned long shrink_qunit_limit = qctxt->lqc_cqs_boundary_factor; + unsigned long cqs_factor = qctxt->lqc_cqs_qs_factor; + __u64 blimit = dquot->dq_dqb.dqb_bhardlimit ? + dquot->dq_dqb.dqb_bhardlimit : dquot->dq_dqb.dqb_bsoftlimit; + __u64 ilimit = dquot->dq_dqb.dqb_ihardlimit ? + dquot->dq_dqb.dqb_ihardlimit : dquot->dq_dqb.dqb_isoftlimit; + int rc = 0; + ENTRY; + + if (!dquot || !oqaq) + RETURN(-EINVAL); + LASSERT_SEM_LOCKED(&dquot->dq_sem); + LASSERT(oqaq->qaq_iunit_sz); + LASSERT(oqaq->qaq_bunit_sz); + + /* don't change qunit size */ + if (!qctxt->lqc_switch_qs) + RETURN(rc); + + bunit_curr_o = oqaq->qaq_bunit_sz; + iunit_curr_o = oqaq->qaq_iunit_sz; + + if (dquot->dq_type == GRPQUOTA) + QAQ_SET_GRP(oqaq); + + if ((type & LQUOTA_FLAGS_ADJBLK) && blimit) { + __u64 b_limitation = + oqaq->qaq_bunit_sz * ost_num * shrink_qunit_limit; + /* enlarge block qunit size */ + while (blimit > + QUSG(dquot->dq_dqb.dqb_curspace + 2 * b_limitation, 1)) { + oqaq->qaq_bunit_sz = + QUSG(oqaq->qaq_bunit_sz * cqs_factor, 1) + << QUOTABLOCK_BITS; + b_limitation = oqaq->qaq_bunit_sz * ost_num * + shrink_qunit_limit; + } + + if (oqaq->qaq_bunit_sz > qctxt->lqc_bunit_sz) + oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz; + + /* shrink block qunit size */ + while (blimit < + QUSG(dquot->dq_dqb.dqb_curspace + b_limitation, 1)) { + do_div(oqaq->qaq_bunit_sz , cqs_factor); + oqaq->qaq_bunit_sz = QUSG(oqaq->qaq_bunit_sz, 1) << + QUOTABLOCK_BITS; + b_limitation = oqaq->qaq_bunit_sz * ost_num * + shrink_qunit_limit; + if (oqaq->qaq_bunit_sz < qctxt->lqc_cqs_least_bunit) + break; + } + + if (oqaq->qaq_bunit_sz < qctxt->lqc_cqs_least_bunit) + oqaq->qaq_bunit_sz = qctxt->lqc_cqs_least_bunit; + + if (bunit_curr_o != oqaq->qaq_bunit_sz) + QAQ_SET_ADJBLK(oqaq); + + } + + if ((type & LQUOTA_FLAGS_ADJINO) && ilimit) { + __u64 i_limitation = + oqaq->qaq_iunit_sz * mdt_num * shrink_qunit_limit; + /* enlarge file qunit size */ + while (ilimit > dquot->dq_dqb.dqb_curinodes + + 2 * i_limitation) { + oqaq->qaq_iunit_sz = oqaq->qaq_iunit_sz * cqs_factor; + i_limitation = oqaq->qaq_iunit_sz * mdt_num * + shrink_qunit_limit; + } + + if (oqaq->qaq_iunit_sz > qctxt->lqc_iunit_sz) + oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz; + + /* shrink file qunit size */ + while (ilimit < dquot->dq_dqb.dqb_curinodes + + i_limitation) { + do_div(oqaq->qaq_iunit_sz, cqs_factor); + i_limitation = oqaq->qaq_iunit_sz * mdt_num * + shrink_qunit_limit; + if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit) + break; + } + + if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit) + oqaq->qaq_iunit_sz = qctxt->lqc_cqs_least_iunit; + + if (iunit_curr_o != oqaq->qaq_iunit_sz) + QAQ_SET_ADJINO(oqaq); + + } + + if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) { + oqaq->qaq_bunit_sz = 0; + oqaq->qaq_iunit_sz = 0; + QAQ_SET_ADJBLK(oqaq); + QAQ_SET_ADJINO(oqaq); + } + + QAQ_DEBUG(oqaq, "the oqaq computed\n"); + + RETURN(rc); +} + static int mds_init_slave_ilimits(struct obd_device *obd, - struct obd_quotactl *oqctl, int set) + struct obd_quotactl *oqctl, int set, + struct quota_adjust_qunit *oqaq) { /* XXX: for file limits only adjust local now */ + struct obd_device_target *obt = &obd->u.obt; + struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt; unsigned int uid = 0, gid = 0; struct obd_quotactl *ioqc = NULL; int flag; @@ -683,21 +1084,29 @@ static int mds_init_slave_ilimits(struct obd_device *obd, /* if we are going to set zero limit, needn't init slaves */ if (!oqctl->qc_dqblk.dqb_ihardlimit && !oqctl->qc_dqblk.dqb_isoftlimit && - set) + !set) RETURN(0); OBD_ALLOC_PTR(ioqc); if (!ioqc) RETURN(-ENOMEM); - - flag = oqctl->qc_dqblk.dqb_ihardlimit || - oqctl->qc_dqblk.dqb_isoftlimit || set; + + flag = oqctl->qc_dqblk.dqb_ihardlimit || + oqctl->qc_dqblk.dqb_isoftlimit || !set; ioqc->qc_cmd = flag ? Q_INITQUOTA : Q_SETQUOTA; ioqc->qc_id = oqctl->qc_id; ioqc->qc_type = oqctl->qc_type; ioqc->qc_dqblk.dqb_valid = QIF_ILIMITS; ioqc->qc_dqblk.dqb_ihardlimit = flag ? MIN_QLIMIT : 0; + if (QAQ_IS_ADJINO(oqaq)) { + /* adjust the mds slave's inode qunit size */ + rc = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc < 0) + CDEBUG(D_ERROR, "adjust mds slave's inode qunit size \ + failed! (rc:%d)\n", rc); + } + /* set local limit to MIN_QLIMIT */ rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc); if (rc) @@ -709,9 +1118,15 @@ static int mds_init_slave_ilimits(struct obd_device *obd, else gid = oqctl->qc_id; - rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 0, 0); + rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 0, 0, + NULL); + if (rc == -EDQUOT || rc == -EBUSY) { + CDEBUG(D_QUOTA, "rc: %d.\n", rc); + rc = 0; + } if (rc) { - CERROR("error mds adjust local file quota! (rc:%d)\n", rc); + CDEBUG(D_QUOTA,"error mds adjust local file quota! (rc:%d)\n", + rc); GOTO(out, rc); } /* FIXME initialize all slaves in CMD */ @@ -723,31 +1138,41 @@ out: } static int mds_init_slave_blimits(struct obd_device *obd, - struct obd_quotactl *oqctl, int set) + struct obd_quotactl *oqctl, int set, + struct quota_adjust_qunit *oqaq) { + struct obd_device_target *obt = &obd->u.obt; + struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt; struct mds_obd *mds = &obd->u.mds; struct obd_quotactl *ioqc; unsigned int uid = 0, gid = 0; + int rc, rc1 = 0; int flag; - int rc; ENTRY; /* if we are going to set zero limit, needn't init slaves */ if (!oqctl->qc_dqblk.dqb_bhardlimit && !oqctl->qc_dqblk.dqb_bsoftlimit && - set) + !set) RETURN(0); OBD_ALLOC_PTR(ioqc); if (!ioqc) RETURN(-ENOMEM); - flag = oqctl->qc_dqblk.dqb_bhardlimit || - oqctl->qc_dqblk.dqb_bsoftlimit || set; + flag = oqctl->qc_dqblk.dqb_bhardlimit || + oqctl->qc_dqblk.dqb_bsoftlimit || !set; ioqc->qc_cmd = flag ? Q_INITQUOTA : Q_SETQUOTA; ioqc->qc_id = oqctl->qc_id; ioqc->qc_type = oqctl->qc_type; ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS; ioqc->qc_dqblk.dqb_bhardlimit = flag ? MIN_QLIMIT : 0; + if (QAQ_IS_ADJBLK(oqaq)) { + /* adjust the mds slave's block qunit size */ + rc1 = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc1 < 0) + CERROR("adjust mds slave's block qunit size failed!" + "(rc:%d)\n", rc1); + } rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc); if (rc) @@ -759,14 +1184,26 @@ static int mds_init_slave_blimits(struct obd_device *obd, else gid = oqctl->qc_id; - rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 1, 0); + /* initialize all slave's limit */ + rc = obd_quotactl(mds->mds_osc_exp, ioqc); + + rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 1, 0, + NULL); + if (rc == -EDQUOT || rc == -EBUSY) { + CDEBUG(D_QUOTA, "rc: %d.\n", rc); + rc = 0; + } if (rc) { CERROR("error mds adjust local block quota! (rc:%d)\n", rc); GOTO(out, rc); } - /* initialize all slave's limit */ - rc = obd_quotactl(mds->mds_osc_exp, ioqc); + /* adjust all slave's qunit size when setting quota + * this is will create a lqs for every ost, which will present + * certain uid/gid is set quota or not */ + QAQ_SET_ADJBLK(oqaq); + rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq, qctxt); + EXIT; out: OBD_FREE_PTR(ioqc); @@ -776,15 +1213,27 @@ out: int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; + struct obd_device *lov_obd = class_exp2obd(mds->mds_osc_exp); + struct lov_obd *lov = &lov_obd->u.lov; + struct quota_adjust_qunit *oqaq = NULL; struct lustre_quota_info *qinfo = &mds->mds_quota_info; - __u32 ihardlimit, isoftlimit, bhardlimit, bsoftlimit; + __u64 ihardlimit, isoftlimit, bhardlimit, bsoftlimit; time_t btime, itime; struct lustre_dquot *dquot; struct obd_dqblk *dqblk = &oqctl->qc_dqblk; - int set, rc; + /* orig_set means if quota was set before; now_set means we are + * setting/cancelling quota */ + int orig_set, now_set; + int rc, rc2 = 0, flag = 0; ENTRY; + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + RETURN(-ENOMEM); down(&mds->mds_qonoff_sem); + init_oqaq(oqaq, qctxt, oqctl->qc_id, oqctl->qc_type); + if (qinfo->qi_files[oqctl->qc_type] == NULL) GOTO(out_sem, rc = -ESRCH); @@ -819,18 +1268,20 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) dquot->dq_dqb.dqb_bhardlimit = dqblk->dqb_bhardlimit; dquot->dq_dqb.dqb_bsoftlimit = dqblk->dqb_bsoftlimit; /* clear usage (limit pool) */ - if (!dquot->dq_dqb.dqb_bhardlimit && + if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_curspace = 0; /* clear grace time */ - if (!dqblk->dqb_bsoftlimit || + if (!dqblk->dqb_bsoftlimit || toqb(dquot->dq_dqb.dqb_curspace) <= dqblk->dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = 0; /* set grace only if user hasn't provided his own */ else if (!(dqblk->dqb_valid & QIF_BTIME)) - dquot->dq_dqb.dqb_btime = cfs_time_current_sec() + + dquot->dq_dqb.dqb_btime = cfs_time_current_sec() + qinfo->qi_info[dquot->dq_type].dqi_bgrace; + + flag |= LQUOTA_FLAGS_ADJBLK; } if (dqblk->dqb_valid & QIF_ILIMITS) { @@ -847,7 +1298,16 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) else if (!(dqblk->dqb_valid & QIF_ITIME)) dquot->dq_dqb.dqb_itime = cfs_time_current_sec() + qinfo->qi_info[dquot->dq_type].dqi_igrace; + + flag |= LQUOTA_FLAGS_ADJINO; } + QAQ_DEBUG(oqaq, "before dquot_create_oqaq\n"); + rc = dquot_create_oqaq(qctxt, dquot, lov->desc.ld_tgt_count, 1, + flag, oqaq); + QAQ_DEBUG(oqaq, "after dquot_create_oqaq\n"); + if (rc < 0) + CDEBUG(D_QUOTA, "adjust qunit size failed! (rc:%d)\n", rc); + rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); @@ -859,38 +1319,47 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) } up(&mds->mds_qonoff_sem); - if (dqblk->dqb_valid & QIF_ILIMITS) { - set = !(ihardlimit || isoftlimit); - rc = mds_init_slave_ilimits(obd, oqctl, set); + orig_set = ihardlimit || isoftlimit; + now_set = dqblk->dqb_ihardlimit || dqblk->dqb_isoftlimit; + if (dqblk->dqb_valid & QIF_ILIMITS && orig_set != now_set) { + down(&dquot->dq_sem); + dquot->dq_dqb.dqb_curinodes = 0; + up(&dquot->dq_sem); + rc = mds_init_slave_ilimits(obd, oqctl, orig_set, oqaq); if (rc) { CERROR("init slave ilimits failed! (rc:%d)\n", rc); goto revoke_out; } } - if (dqblk->dqb_valid & QIF_BLIMITS) { - set = !(bhardlimit || bsoftlimit); - rc = mds_init_slave_blimits(obd, oqctl, set); + orig_set = bhardlimit || bsoftlimit; + now_set = dqblk->dqb_bhardlimit || dqblk->dqb_bsoftlimit; + if (dqblk->dqb_valid & QIF_BLIMITS && orig_set != now_set) { + down(&dquot->dq_sem); + dquot->dq_dqb.dqb_curspace = 0; + up(&dquot->dq_sem); + rc = mds_init_slave_blimits(obd, oqctl, orig_set, oqaq); if (rc) { CERROR("init slave blimits failed! (rc:%d)\n", rc); goto revoke_out; } } - down(&mds->mds_qonoff_sem); revoke_out: + down(&mds->mds_qonoff_sem); + down(&dquot->dq_sem); if (rc) { /* cancel previous setting */ - down(&dquot->dq_sem); dquot->dq_dqb.dqb_ihardlimit = ihardlimit; dquot->dq_dqb.dqb_isoftlimit = isoftlimit; dquot->dq_dqb.dqb_bhardlimit = bhardlimit; dquot->dq_dqb.dqb_bsoftlimit = bsoftlimit; dquot->dq_dqb.dqb_btime = btime; dquot->dq_dqb.dqb_itime = itime; - fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); - up(&dquot->dq_sem); } + rc2 = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); + up(&dquot->dq_sem); + out: down(&dquot->dq_sem); dquot->dq_status &= ~DQ_STATUS_SET; @@ -899,14 +1368,18 @@ out: EXIT; out_sem: up(&mds->mds_qonoff_sem); - return rc; + + if (oqaq) + OBD_FREE_PTR(oqaq); + + return rc ? rc : rc2; } static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl) { struct obd_quotactl *soqc; struct lvfs_run_ctxt saved; - int rc; + int rc, rc1; ENTRY; OBD_ALLOC_PTR(soqc); @@ -917,26 +1390,29 @@ static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl) soqc->qc_id = oqctl->qc_id; soqc->qc_type = oqctl->qc_type; + /* get block usage from OSS */ + soqc->qc_dqblk.dqb_curspace = 0; rc = obd_quotactl(obd->u.mds.mds_osc_exp, soqc); - if (rc) - GOTO(out, rc); - - oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace; + if (!rc) { + oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace; + oqctl->qc_dqblk.dqb_valid |= QIF_SPACE; + } - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + /* get block/inode usage from MDS */ soqc->qc_dqblk.dqb_curspace = 0; - rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, soqc); + soqc->qc_dqblk.dqb_curinodes = 0; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc1 = fsfilt_quotactl(obd, obd->u.obt.obt_sb, soqc); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (!rc1) { + oqctl->qc_dqblk.dqb_curspace += soqc->qc_dqblk.dqb_curspace; + oqctl->qc_dqblk.dqb_curinodes = soqc->qc_dqblk.dqb_curinodes; + oqctl->qc_dqblk.dqb_valid |= QIF_INODES; + } - if (rc) - GOTO(out, rc); - - oqctl->qc_dqblk.dqb_curinodes += soqc->qc_dqblk.dqb_curinodes; - oqctl->qc_dqblk.dqb_curspace += soqc->qc_dqblk.dqb_curspace; - EXIT; -out: OBD_FREE_PTR(soqc); - return rc; + + RETURN(rc ? : rc1); } int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) @@ -949,6 +1425,7 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) ENTRY; down(&mds->mds_qonoff_sem); + dqblk->dqb_valid = 0; if (qinfo->qi_files[oqctl->qc_type] == NULL) GOTO(out, rc = -ESRCH); @@ -963,6 +1440,7 @@ int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) dqblk->dqb_bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit; dqblk->dqb_btime = dquot->dq_dqb.dqb_btime; dqblk->dqb_itime = dquot->dq_dqb.dqb_itime; + dqblk->dqb_valid |= QIF_LIMITS | QIF_TIMES; up(&dquot->dq_sem); lustre_dqput(dquot); @@ -997,7 +1475,7 @@ static int dquot_recovery(struct obd_device *obd, unsigned int id, unsigned short type) { struct mds_obd *mds = &obd->u.mds; - struct lustre_quota_info *qinfo= &obd->u.mds.mds_quota_info; + struct lustre_quota_info *qinfo= &mds->mds_quota_info; struct lustre_dquot *dquot; struct obd_quotactl *qctl; __u64 total_limits = 0; @@ -1030,7 +1508,7 @@ dquot_recovery(struct obd_device *obd, unsigned int id, unsigned short type) qctl->qc_type = type; qctl->qc_id = id; qctl->qc_stat = QUOTA_RECOVERING; - rc = obd_quotactl(obd->u.mds.mds_osc_exp, qctl); + rc = obd_quotactl(mds->mds_osc_exp, qctl); if (rc) GOTO(out, rc); total_limits = qctl->qc_dqblk.dqb_bhardlimit; @@ -1094,7 +1572,7 @@ static int qmaster_recovery_main(void *arg) continue; } CFS_INIT_LIST_HEAD(&id_list); - rc = fsfilt_qids(obd, qinfo->qi_files[type], NULL, type, + rc = fsfilt_qids(obd, qinfo->qi_files[type], NULL, type, &id_list); up(&mds->mds_qonoff_sem); @@ -1142,3 +1620,5 @@ int mds_quota_recovery(struct obd_device *obd) wait_for_completion(&data.comp); RETURN(rc); } + +#endif /* HAVE_QUOTA_SUPPORT */ diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 90cca7c..09fbb63 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -36,7 +36,6 @@ if [ "$ACC_SM_ONLY" ]; then done fi LFSCK="no" # bug 13698 -SANITY_QUOTA="no" # bug 13058 LIBLUSTRETESTS=${LIBLUSTRETESTS:-../liblustre/tests} diff --git a/lustre/tests/cfg/insanity-lmv.sh b/lustre/tests/cfg/insanity-lmv.sh index 99a3ccb..9ef06ad 100644 --- a/lustre/tests/cfg/insanity-lmv.sh +++ b/lustre/tests/cfg/insanity-lmv.sh @@ -59,6 +59,10 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL" +[ "x$MDSCAPA" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -76,6 +80,10 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL" +[ "x$OSSCAPA" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" diff --git a/lustre/tests/cfg/lmv.sh b/lustre/tests/cfg/lmv.sh index 10ba95f..6358789 100644 --- a/lustre/tests/cfg/lmv.sh +++ b/lustre/tests/cfg/lmv.sh @@ -69,8 +69,10 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL" [ "x$MDSCAPA" != "x" ] && - MKFSOPT="--param mdt.capa=$MDSCAPA" + MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -88,8 +90,10 @@ MOUNTOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL" [ "x$OSSCAPA" != "x" ] && - MKFSOPT="--param ost.capa=$OSSCAPA" + MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index f958d58..6422b79 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -50,8 +50,10 @@ MKFSOPT="" MKFSOPT=$MKFSOPT" -i $MDSISIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\\\"$MKFSOPT\\\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param mdt.sec_level=$SECLEVEL" [ "x$MDSCAPA" != "x" ] && - MKFSOPT="--param mdt.capa=$MDSCAPA" + MOUNTOPT=$MOUNTOPT" --param mdt.capa=$MDSCAPA" [ "x$mdsfailover_HOST" != "x" ] && MDSOPT=$MDSOPT" --failnode=`h2$NETTYPE $mdsfailover_HOST`" [ "x$STRIPE_BYTES" != "x" ] && @@ -69,8 +71,10 @@ MKFSOPT="" MKFSOPT=$MKFSOPT" -J size=$OSTJOURNALSIZE" [ "x$MKFSOPT" != "x" ] && MKFSOPT="--mkfsoptions=\\\"$MKFSOPT\\\"" +[ "x$SECLEVEL" != "x" ] && + MOUNTOPT=$MOUNTOPT" --param ost.sec_level=$SECLEVEL" [ "x$OSSCAPA" != "x" ] && - MKFSOPT="--param ost.capa=$OSSCAPA" + MOUNTOPT=$MOUNTOPT" --param ost.capa=$OSSCAPA" [ "x$ostfailover_HOST" != "x" ] && OSTOPT=$OSTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID --param sys.timeout=$TIMEOUT $MKFSOPT $OSTOPT $OST_MKFS_OPTS" @@ -79,6 +83,7 @@ MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop,user_xattr,acl"} OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} #client +MOUNTOPT="" MOUNT=${MOUNT:-/mnt/${FSNAME}} MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-${MOUNT}2} diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index cb3dc82..740ce41 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -17,7 +17,8 @@ SRCDIR=`dirname $0` export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/../utils:$PATH:/sbin ONLY=${ONLY:-"$*"} -ALWAYS_EXCEPT="$SANITY_QUOTA_EXCEPT" +# enable test_23 after bug 16542 fixed. +ALWAYS_EXCEPT="10 23 $SANITY_QUOTA_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! case `uname -r` in @@ -36,10 +37,8 @@ TSTID2=${TSTID2:-60001} TSTUSR=${TSTUSR:-"quota_usr"} TSTUSR2=${TSTUSR2:-"quota_2usr"} BLK_SZ=1024 -BUNIT_SZ=${BUNIT_SZ:-1000} # default 1000 quota blocks -BTUNE_SZ=${BTUNE_SZ:-500} # default 50% of BUNIT_SZ -IUNIT_SZ=${IUNIT_SZ:-10} # default 10 files -ITUNE_SZ=${ITUNE_SZ:-5} # default 50% of IUNIT_SZ +BUNIT_SZ=${BUNIT_SZ:-1024} # min block quota unit(kB) +IUNIT_SZ=${IUNIT_SZ:-10} # min inode quota unit MAX_DQ_TIME=604800 MAX_IQ_TIME=604800 @@ -48,11 +47,12 @@ LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +DIRECTIO=${DIRECTIO:-$LUSTRE/tests/directio} remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 -[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21" QUOTALOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} @@ -63,11 +63,11 @@ DIR2=${DIR2:-$MOUNT2} check_and_setup_lustre -LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` -OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` +LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` +OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` -SHOW_QUOTA_USER="$LFS quota -u $TSTUSR $DIR" -SHOW_QUOTA_GROUP="$LFS quota -g $TSTUSR $DIR" +SHOW_QUOTA_USER="$LFS quota -v -u $TSTUSR $DIR" +SHOW_QUOTA_GROUP="$LFS quota -v -g $TSTUSR $DIR" SHOW_QUOTA_INFO="$LFS quota -t $DIR" # control the time of tests @@ -81,74 +81,59 @@ eval ONLY_99=true # set_blk_tunables(btune_sz) set_blk_tunesz() { + local btune=$(($1 * BLK_SZ)) # set btune size on all obdfilters - do_facet ost1 "set -x; for i in /proc/fs/lustre/obdfilter/*/quota_btune_sz; do - echo $(($1 * BLK_SZ)) >> \\\$i; - done" + do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_btune_sz=$btune" # set btune size on mds - do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_btune_sz; do - echo $(($1 * BLK_SZ)) >> \\\$i; - done" + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_btune_sz=$btune" } # set_blk_unitsz(bunit_sz) set_blk_unitsz() { - do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_bunit_sz; do - echo $(($1 * BLK_SZ)) >> \\\$i; - done" - do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_bunit_sz; do - echo $(($1 * BLK_SZ)) >> \\\$i; - done" + local bunit=$(($1 * BLK_SZ)) + # set bunit size on all obdfilters + do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_bunit_sz=$bunit" + # set bunit size on mds + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_bunit_sz=$bunit" } # set_file_tunesz(itune_sz) set_file_tunesz() { - # set iunit and itune size on all obdfilters - do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_itune_sz; do - echo $1 >> \\\$i; - done" - # set iunit and itune size on mds - do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_itune_sz; do - echo $1 >> \\\$i; - done" + local itune=$1 + # set itune size on all obdfilters + do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_itune_sz=$itune" + # set itune size on mds + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_itune_sz=$itune" } # set_file_unitsz(iunit_sz) set_file_unitsz() { - do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_iunit_sz; do - echo $1 >> \\\$i; - done" - do_facet $SINGLEMDS "for i in /proc/fs/lustre/mds/${FSNAME}-MDT*/quota_iunit_sz; do - echo $1 >> \\\$i; - done" + local iunit=$1 + # set iunit size on all obdfilters + do_facet ost1 "lctl set_param lquota.${FSNAME}-OST*.quota_iunit_sz=$iunit" + # set iunit size on mds + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_iunit_sz=$iunit" } -# These are for test on local machine,if run sanity-quota.sh on -# real cluster, ltest should have setup the test environment: -# -# - create test user/group on all servers with same id. -# - set unit size/tune on all servers size to reasonable value. -pre_test() { - if [ -z "$NOSETUP" ]; then - # set block tunables - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ - # set file tunables - set_file_tunesz $ITUNE_SZ - set_file_unitsz $IUNIT_SZ - fi -} -pre_test - -post_test() { - if [ -z "$NOSETUP" ]; then - # restore block tunables to default size - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) - # restore file tunables to default size - set_file_unitsz 5000 - set_file_tunesz 2500 - fi +lustre_fail() { + local fail_node=$1 + local fail_loc=$2 + + case $fail_node in + "mds" ) + do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ;; + "ost" ) + for num in `seq $OSTCOUNT`; do + do_facet ost$num "lctl set_param fail_loc=$fail_loc" + done ;; + "mds_ost" ) + do_facet $SINGLEMDS "lctl set_param fail_loc=$fail_loc" ; + for num in `seq $OSTCOUNT`; do + do_facet ost$num "lctl set_param fail_loc=$fail_loc" + done ;; + * ) echo "usage: lustre_fail fail_node fail_loc" ; + return 1 ;; + esac } RUNAS="runas -u $TSTID" @@ -158,126 +143,232 @@ FAIL_ON_ERROR=true check_runas_id $TSTID2 $RUNAS2 FAIL_ON_ERROR=false +run_test_with_stat() { + (($# != 2)) && error "the number of arguments is wrong" + + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.stats=0" > /dev/null + for j in `seq $OSTCOUNT`; do + do_facet ost$j "lctl set_param lquota.${FSNAME}-OST*.stats=0" > /dev/null + done + run_test "$@" + if [ ${STAT:-"yes"} != "no" -a -z "$LAST_SKIPPED" ]; then + echo "statistics info begin ***************************************" + do_facet $SINGLEMDS "lctl get_param lquota.mdd_obd-${FSNAME}-MDT*.stats" + for j in `seq $OSTCOUNT`; do + do_facet ost$j "lctl get_param lquota.${FSNAME}-OST*.stats" + done + echo "statistics info end ***************************************" + fi +} + # set quota test_0() { $LFS quotaoff -ug $DIR $LFS quotacheck -ug $DIR - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + + lctl set_param debug="+quota" + do_facet $SINGLEMDS "lctl set_param debug=+quota" + for num in `seq $OSTCOUNT`; do + do_facet ost$num "lctl set_param debug=+quota" + done } -run_test 0 "Set quota =============================" +run_test_with_stat 0 "Set quota =============================" -# block hard limit (normal use and out of quota) -test_1() { +# test for specific quota limitation, qunit, qtune $1=block_quota_limit +test_1_sub() { + LIMIT=$1 mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" - LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 5)) # 5 bunits each sever - TESTFILE=$DIR/$tdir/$tfile-0 - - echo " User quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR + wait_delete_completed + + # test for user + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + sleep 3 $SHOW_QUOTA_USER - + $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE - echo " Write ..." + log " Write ..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(usr) write failure, but expect success" - echo " Done" - echo " Write out of block quota ..." + log " Done" + log " Write out of block quota ..." # this time maybe cache write, ignore it's failure $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true # flush cache, ensure noquota flag is setted on client - sync; sleep 1; sync; + cancel_lru_locks osc $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(usr) write success, but expect EDQUOT" rm -f $TESTFILE - - echo " Group quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR + sync; sleep 1; sync; + OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'` + OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'` + echo $OST0_QUOTA_USED + [ $OST0_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + + # test for group + log "--------------------------------------" + log " Group quota (limit: $LIMIT kbytes)" + $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + sleep 3 $SHOW_QUOTA_GROUP - TESTFILE=$DIR/$tdir/$tfile-1 + TESTFILE="$DIR/$tdir/$tfile-1" $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE - echo " Write ..." + log " Write ..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(grp) write failure, but expect success" - echo " Done" - echo " Write out of block quota ..." + log " Done" + log " Write out of block quota ..." # this time maybe cache write, ignore it's failure $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true - sync; sleep 1; sync; + cancel_lru_locks osc $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(grp) write success, but expect EDQUOT" # cleanup rm -f $TESTFILE - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + sync; sleep 1; sync; + OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'` + OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'` + echo $OST0_QUOTA_USED + [ $OST0_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_GROUP + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 1 "Block hard limit (normal use and out of quota) ===" -# file hard limit (normal use and out of quota) -test_2() { +# block hard limit (normal use and out of quota) +test_1() { + for i in `seq 1 $cycle`; do + # define blk_qunit is between 1M and 4M + blk_qunit=$(( $RANDOM % 3072 + 1024 )) + blk_qtune=$(( $RANDOM % $blk_qunit )) + # other osts and mds will occupy at 1M blk quota + b_limit=$(( ($RANDOM - 16384) / 8 + $OSTCOUNT * $blk_qunit * 4 )) + set_blk_tunesz $blk_qtune + set_blk_unitsz $blk_qunit + echo "cycle: $i(total $cycle) bunit:$blk_qunit, btune:$blk_qtune, blimit:$b_limit" + test_1_sub $b_limit + echo "==================================================" + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + done +} +run_test_with_stat 1 "Block hard limit (normal use and out of quota) ===" + +# test for specific quota limitation, qunit, qtune $1=block_quota_limit +test_2_sub() { + LIMIT=$1 mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" - LIMIT=$(($IUNIT_SZ * 10)) # 10 iunits on mds - TESTFILE=$DIR/$tdir/$tfile-0 + wait_delete_completed - echo " User quota (limit: $LIMIT files)" - $LFS setquota -u $TSTUSR 0 0 0 $LIMIT $DIR + # test for user + log " User quota (limit: $LIMIT files)" + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I $LIMIT $DIR + sleep 3 $SHOW_QUOTA_USER - echo " Create $LIMIT files ..." + log " Create $LIMIT files ..." $RUNAS createmany -m ${TESTFILE} $LIMIT || \ - error "(usr) create failure, but expect success" - echo " Done" - echo " Create out of file quota ..." + error "(usr) create failure, but expect success" + log " Done" + log " Create out of file quota ..." $RUNAS touch ${TESTFILE}_xxx && \ - error "(usr) touch success, but expect EDQUOT" + error "(usr) touch success, but expect EDQUOT" unlinkmany ${TESTFILE} $LIMIT - rm ${TESTFILE}_xxx + rm -f ${TESTFILE}_xxx + sync; sleep 1; sync; + + MDS_UUID=`do_facet $SINGLEMDS $LCTL dl | grep -m1 " mdt " | awk '{print $((NF-1))}'` + MDS_QUOTA_USED=`$LFS quota -o $MDS_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'` + echo $MDS_QUOTA_USED + [ $MDS_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR - echo " Group quota (limit: $LIMIT files)" - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - $LFS setquota -g $TSTUSR 0 0 0 $LIMIT $DIR + # test for group + log "--------------------------------------" + log " Group quota (limit: $LIMIT FILE)" + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I $LIMIT $DIR + sleep 3 $SHOW_QUOTA_GROUP TESTFILE=$DIR/$tdir/$tfile-1 - echo " Create $LIMIT files ..." + log " Create $LIMIT files ..." $RUNAS createmany -m ${TESTFILE} $LIMIT || \ - error "(grp) create failure, but expect success" + error "(usr) create failure, but expect success" + log " Done" + log " Create out of file quota ..." + $RUNAS touch ${TESTFILE}_xxx && \ + error "(usr) touch success, but expect EDQUOT" - echo " Done" - echo " Create out of file quota ..." - $RUNAS touch ${TESTFILE}_xxx && \ - error "(grp) touch success, but expect EDQUOT" + unlinkmany ${TESTFILE} $LIMIT + rm -f ${TESTFILE}_xxx + sync; sleep 1; sync; - $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(grp) touch success, but expect EDQUOT" + MDS_UUID=`do_facet $SINGLEMDS $LCTL dl | grep -m1 " mdt " | awk '{print $((NF-1))}'` + MDS_QUOTA_USED=`$LFS quota -o $MDS_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'` + echo $MDS_QUOTA_USED + [ $MDS_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_GROUP + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR +} - # cleanup - unlinkmany ${TESTFILE} $LIMIT - rm ${TESTFILE}_xxx +# file hard limit (normal use and out of quota) +test_2() { + for i in `seq 1 $cycle`; do + if [ $i -eq 1 ]; then + ino_qunit=52 + ino_qtune=41 + i_limit=11 + else + # define ino_qunit is between 10 and 100 + ino_qunit=$(( $RANDOM % 90 + 10 )) + ino_qtune=$(( $RANDOM % $ino_qunit )) + # RANDOM's maxium is 32767 + i_limit=$(( $RANDOM % 990 + 10 )) + fi - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + set_file_tunesz $ino_qtune + set_file_unitsz $ino_qunit + echo "cycle: $i(total $cycle) iunit:$ino_qunit, itune:$ino_qtune, ilimit:$i_limit" + test_2_sub $i_limit + echo "==================================================" + set_file_unitsz 5120 + set_file_tunesz 2560 + done } -run_test 2 "File hard limit (normal use and out of quota) ===" +run_test_with_stat 2 "File hard limit (normal use and out of quota) ===" test_block_soft() { TESTFILE=$1 TIMER=$(($2 * 3 / 2)) OFFSET=0 + wait_delete_completed + echo " Write to exceed soft limit" RUNDD="$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ" $RUNDD count=$((BUNIT_SZ+1)) || \ - error "write failure, but expect success" + error "write failure, but expect success" OFFSET=$((OFFSET + BUNIT_SZ + 1)) - sync; sleep 1; sync; + cancel_lru_locks osc $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP @@ -285,38 +376,38 @@ test_block_soft() { echo " Write before timer goes off" $RUNDD count=$BUNIT_SZ seek=$OFFSET || \ - error "write failure, but expect success" + error "write failure, but expect success" OFFSET=$((OFFSET + BUNIT_SZ)) - sync; sleep 1; sync; + cancel_lru_locks osc echo " Done" - - echo " Sleep $TIMER seconds ..." - sleep $TIMER - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - $SHOW_QUOTA_INFO + echo " Sleep $TIMER seconds ..." + sleep $TIMER + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + $SHOW_QUOTA_INFO echo " Write after timer goes off" # maybe cache write, ignore. - sync; sleep 1; sync; $RUNDD count=$BUNIT_SZ seek=$OFFSET || true OFFSET=$((OFFSET + BUNIT_SZ)) - sync; sleep 1; sync; + cancel_lru_locks osc $RUNDD count=$BUNIT_SZ seek=$OFFSET && \ - error "write success, but expect EDQUOT" + error "write success, but expect EDQUOT" - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - $SHOW_QUOTA_INFO + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + $SHOW_QUOTA_INFO echo " Unlink file to stop timer" rm -f $TESTFILE + sync; sleep 1; sync echo " Done" - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - $SHOW_QUOTA_INFO + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + $SHOW_QUOTA_INFO echo " Write ..." $RUNDD count=$BUNIT_SZ || error "write failure, but expect success" @@ -324,6 +415,7 @@ test_block_soft() { # cleanup rm -f $TESTFILE + sync; sleep 3; sync; } # block soft limit (start timer, timer goes off, stop timer) @@ -331,7 +423,8 @@ test_3() { mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir - LIMIT=$(( $BUNIT_SZ * 2 )) # 1 bunit on mds and 1 bunit on the ost + # 1 bunit on mds and 1 bunit on every ost + LIMIT=$(( $BUNIT_SZ * ($OSTCOUNT + 1) )) GRACE=10 echo " User quota (soft limit: $LIMIT kbytes grace: $GRACE seconds)" @@ -340,11 +433,11 @@ test_3() { $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE - $LFS setquota -t -u $GRACE $MAX_IQ_TIME $DIR - $LFS setquota -u $TSTUSR $LIMIT 0 0 0 $DIR + $LFS setquota -t -u --block-grace $GRACE --inode-grace $MAX_IQ_TIME $DIR + $LFS setquota -u $TSTUSR -b $LIMIT -B 0 -i 0 -I 0 $DIR test_block_soft $TESTFILE $GRACE - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR echo " Group quota (soft limit: $LIMIT kbytes grace: $GRACE seconds)" TESTFILE=$DIR/$tdir/$tfile-1 @@ -352,19 +445,21 @@ test_3() { $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE - $LFS setquota -t -g $GRACE $MAX_IQ_TIME $DIR - $LFS setquota -g $TSTUSR $LIMIT 0 0 0 $DIR + $LFS setquota -t -g --block-grace $GRACE --inode-grace $MAX_IQ_TIME $DIR + $LFS setquota -g $TSTUSR -b $LIMIT -B 0 -i 0 -I 0 $DIR test_block_soft $TESTFILE $GRACE - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 3 "Block soft limit (start timer, timer goes off, stop timer) ===" +run_test_with_stat 3 "Block soft limit (start timer, timer goes off, stop timer) ===" test_file_soft() { TESTFILE=$1 LIMIT=$2 TIMER=$(($3 * 3 / 2)) + wait_delete_completed + echo " Create files to exceed soft limit" $RUNAS createmany -m ${TESTFILE}_ $((LIMIT + 1)) || \ error "create failure, but expect success" @@ -379,23 +474,24 @@ test_file_soft() { echo " Sleep $TIMER seconds ..." sleep $TIMER - + $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP $SHOW_QUOTA_INFO - + echo " Create file after timer goes off" - $RUNAS createmany -m ${TESTFILE}_after_ $((IUNIT_SZ - 2)) || \ - error "create ${TESTFILE}_after failure, but expect success" + # the least of inode qunit is 2, so there are at most 3(qunit:2+qtune:1) + # inode quota left here + $RUNAS touch ${TESTFILE}_after ${TESTFILE}_after1 ${TESTFILE}_after2 || true sync; sleep 1; sync - $RUNAS touch ${TESTFILE}_after && \ + $RUNAS touch ${TESTFILE}_after3 && \ error "create after timer expired, but expect EDQUOT" sync; sleep 1; sync $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP $SHOW_QUOTA_INFO - + echo " Unlink files to stop timer" find `dirname $TESTFILE` -name "`basename ${TESTFILE}`*" | xargs rm -f echo " Done" @@ -408,6 +504,7 @@ test_file_soft() { # cleanup rm -f ${TESTFILE}_xxx + sync; sleep 3; sync; } # file soft limit (start timer, timer goes off, stop timer) @@ -420,66 +517,70 @@ test_4a() { # was test_4 GRACE=5 echo " User quota (soft limit: $LIMIT files grace: $GRACE seconds)" - $LFS setquota -t -u $MAX_DQ_TIME $GRACE $DIR - $LFS setquota -u $TSTUSR 0 0 $LIMIT 0 $DIR + $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $GRACE $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i $LIMIT -I 0 $DIR $SHOW_QUOTA_USER test_file_soft $TESTFILE $LIMIT $GRACE - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR echo " Group quota (soft limit: $LIMIT files grace: $GRACE seconds)" - $LFS setquota -t -g $MAX_DQ_TIME $GRACE $DIR - $LFS setquota -g $TSTUSR 0 0 $LIMIT 0 $DIR + $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $GRACE $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i $LIMIT -I 0 $DIR $SHOW_QUOTA_GROUP TESTFILE=$DIR/$tdir/$tfile-1 test_file_soft $TESTFILE $LIMIT $GRACE - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR # cleanup - $LFS setquota -t -u $MAX_DQ_TIME $MAX_IQ_TIME $DIR - $LFS setquota -t -g $MAX_DQ_TIME $MAX_IQ_TIME $DIR + $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR + $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR } -run_test 4a "File soft limit (start timer, timer goes off, stop timer) ===" +run_test_with_stat 4a "File soft limit (start timer, timer goes off, stop timer) ===" test_4b() { # was test_4a - GR_STR1="1w3d" - GR_STR2="1000s" - GR_STR3="5s" - GR_STR4="1w2d3h4m5s" - GR_STR5="5c" - GR_STR6="1111111111111111" - - # test of valid grace strings handling - echo " Valid grace strings test" - $LFS setquota -t -u $GR_STR1 $GR_STR2 $DIR - $LFS quota -u -t $DIR | grep "Block grace time: $GR_STR1" - $LFS setquota -t -g $GR_STR3 $GR_STR4 $DIR - $LFS quota -g -t $DIR | grep "Inode grace time: $GR_STR4" - - # test of invalid grace strings handling - echo " Invalid grace strings test" - ! $LFS setquota -t -u $GR_STR4 $GR_STR5 $DIR - ! $LFS setquota -t -g $GR_STR4 $GR_STR6 $DIR - - # cleanup - $LFS setquota -t -u $MAX_DQ_TIME $MAX_IQ_TIME $DIR - $LFS setquota -t -g $MAX_DQ_TIME $MAX_IQ_TIME $DIR + GR_STR1="1w3d" + GR_STR2="1000s" + GR_STR3="5s" + GR_STR4="1w2d3h4m5s" + GR_STR5="5c" + GR_STR6="1111111111111111" + + wait_delete_completed + + # test of valid grace strings handling + echo " Valid grace strings test" + $LFS setquota -t -u --block-grace $GR_STR1 --inode-grace $GR_STR2 $DIR + $LFS quota -u -t $DIR | grep "Block grace time: $GR_STR1" + $LFS setquota -t -g --block-grace $GR_STR3 --inode-grace $GR_STR4 $DIR + $LFS quota -g -t $DIR | grep "Inode grace time: $GR_STR4" + + # test of invalid grace strings handling + echo " Invalid grace strings test" + ! $LFS setquota -t -u --block-grace $GR_STR4 --inode-grace $GR_STR5 $DIR + ! $LFS setquota -t -g --block-grace $GR_STR4 --inode-grace $GR_STR6 $DIR + + # cleanup + $LFS setquota -t -u --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR + $LFS setquota -t -g --block-grace $MAX_DQ_TIME --inode-grace $MAX_IQ_TIME $DIR } -run_test 4b "Grace time strings handling ===" +run_test_with_stat 4b "Grace time strings handling ===" # chown & chgrp (chown & chgrp successfully even out of block/file quota) test_5() { mkdir -p $DIR/$tdir BLIMIT=$(( $BUNIT_SZ * $((OSTCOUNT + 1)) * 10)) # 10 bunits on each server ILIMIT=$(( $IUNIT_SZ * 10 )) # 10 iunits on mds - + + wait_delete_completed + echo " Set quota limit (0 $BLIMIT 0 $ILIMIT) for $TSTUSR.$TSTUSR" - $LFS setquota -u $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR - $LFS setquota -g $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR + $LFS setquota -u $TSTUSR -b 0 -B $BLIMIT -i 0 -I $ILIMIT $DIR + $LFS setquota -g $TSTUSR -b 0 -B $BLIMIT -i 0 -I $ILIMIT $DIR $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - + echo " Create more than $ILIMIT files and more than $BLIMIT kbytes ..." createmany -m $DIR/$tdir/$tfile-0_ $((ILIMIT + 1)) || \ error "touch failure, expect success" @@ -493,11 +594,12 @@ test_5() { # cleanup unlinkmany $DIR/$tdir/$tfile-0_ $((ILIMIT + 1)) + sync; sleep 3; sync; - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 5 "Chown & chgrp successfully even out of block/file quota ===" +run_test_with_stat 5 "Chown & chgrp successfully even out of block/file quota ===" # block quota acquire & release test_6() { @@ -506,16 +608,18 @@ test_6() { return 0; fi + wait_delete_completed + mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir LIMIT=$((BUNIT_SZ * (OSTCOUNT + 1) * 5)) # 5 bunits per server FILEA="$DIR/$tdir/$tfile-0_a" FILEB="$DIR/$tdir/$tfile-0_b" - + echo " Set block limit $LIMIT kbytes to $TSTUSR.$TSTUSR" - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR - $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP @@ -526,42 +630,44 @@ test_6() { chown $TSTUSR.$TSTUSR $FILEB echo " Exceed quota limit ..." - RUNDD="$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ" - $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \ - error "write fileb failure, but expect success" + RUNDD="$RUNAS dd if=/dev/zero of=$FILEB bs=$BLK_SZ" + $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \ + error "write fileb failure, but expect success" - sync; sleep 1; sync; - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - $RUNDD seek=$LIMIT count=$((BUNIT_SZ * OSTCOUNT)) && \ - error "write fileb success, but expect EDQUOT" - sync; sleep 1; sync; + cancel_lru_locks osc + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + $RUNDD seek=$LIMIT count=$((BUNIT_SZ * OSTCOUNT)) && \ + error "write fileb success, but expect EDQUOT" + cancel_lru_locks osc echo " Write to OST0 return EDQUOT" # this write maybe cache write, ignore it's failure - RUNDD="$RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ" - $RUNDD count=$(($BUNIT_SZ * 2)) || true - sync; sleep 1; sync; - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - $RUNDD count=$((BUNIT_SZ * 2)) seek=$((BUNIT_SZ *2)) && \ - error "write filea success, but expect EDQUOT" + RUNDD="$RUNAS dd if=/dev/zero of=$FILEA bs=$BLK_SZ" + $RUNDD count=$(($BUNIT_SZ * 2)) || true + cancel_lru_locks osc + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + $RUNDD count=$((BUNIT_SZ * 2)) seek=$((BUNIT_SZ *2)) && \ + error "write filea success, but expect EDQUOT" echo " Remove fileb to let OST1 release quota" rm -f $FILEB - sync; sleep 10; sync; # need to allow journal commit for small fs + sync; sleep 10; sync; # need to allow journal commit for small fs echo " Write to OST0" $RUNDD count=$((LIMIT - BUNIT_SZ * OSTCOUNT)) || \ - error "write filea failure, expect success" + error "write filea failure, expect success" echo " Done" # cleanup rm -f $FILEA - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + sync; sleep 3; sync; + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR return 0 } -run_test 6 "Block quota acquire & release =========" +run_test_with_stat 6 "Block quota acquire & release =========" # quota recovery (block quota only by now) test_7() @@ -569,23 +675,25 @@ test_7() mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir - LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever + wait_delete_completed + + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) )) TESTFILE="$DIR/$tdir/$tfile-0" - - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR - + + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE echo " Write to OST0..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ || \ error "write failure, but expect success" - - #define OBD_FAIL_OBD_DQACQ 0x604 - echo 0x604 > /proc/sys/lustre/fail_loc + + #define OBD_FAIL_OBD_DQACQ 0x604 + lustre_fail mds 0x604 echo " Remove files on OST0" rm -f $TESTFILE - echo 0 > /proc/sys/lustre/fail_loc + lustre_fail mds 0 echo " Trigger recovery..." OSC0_UUID="`$LCTL dl | awk '$3 ~ /osc/ { print $1 }'`" @@ -598,20 +706,20 @@ test_7() # check limits PATTERN="`echo $DIR | sed 's/\//\\\\\//g'`" - TOTAL_LIMIT="`$LFS quota -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`" + TOTAL_LIMIT="`$LFS quota -v -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`" [ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!" echo " total limits = $TOTAL_LIMIT" - - OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` - [ -z "$OST0_UUID" ] && OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` - OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`" + + OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` + [ -z "$OST0_UUID" ] && OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` + OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $2 }'`" [ $OST0_LIMIT -eq $BUNIT_SZ ] || error "high limits not released!" echo " limits on $OST0_UUID = $OST0_LIMIT" # cleanup - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 7 "Quota recovery (only block limit) ======" +run_test_with_stat 7 "Quota recovery (only block limit) ======" # run dbench with quota enabled test_8() { @@ -622,9 +730,9 @@ test_8() { wait_delete_completed echo " Set enough high limit for user: $TSTUSR" - $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR echo " Set enough high limit for group: $TSTUSR" - $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR + $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR chmod 0777 $DIR/$tdir local duration="" @@ -633,162 +741,140 @@ test_8() { sync; sleep 3; sync; - return 0 + return 0 } -run_test 8 "Run dbench with quota enabled ===========" +run_test_with_stat 8 "Run dbench with quota enabled ===========" # run for fixing bug10707, it needs a big room. test for 64bit KB=1024 GB=$((KB * 1024 * 1024)) -FSIZE=$((OSTCOUNT * 9 / 2)) # Use this as dd bs to decrease time # inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS); blksize=$((1 << 21)) # 2Mb +size_file=$((GB * 9 / 2)) +# this check is just for test9 and test10 +OST0_MIN=4900000 #4.67G +check_whether_skip () { + OST0_SIZE=`$LFS df $DIR | awk '/\[OST:0\]/ {print $4}'` + log "OST0_SIZE: $OST0_SIZE required: $OST0_MIN" + if [ $OST0_SIZE -lt $OST0_MIN ]; then + echo "WARN: OST0 has less than $OST0_MIN free, skip this test." + return 0 + else + return 1 + fi +} test_9() { - chmod 0777 $DIR/$tdir - lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1` - size_file=$((FSIZE * GB)) - echo "lustrefs_size:$lustrefs_size size_file:$((size_file / KB))" - if [ $((lustrefs_size * KB)) -lt $size_file ]; then - skip "less than $size_file bytes free" - return 0; - fi + check_whether_skip && return 0 - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) - - # set the D_QUOTA flag - debugsave - sysctl -w lnet.debug="+quota" - - TESTFILE="$DIR/$tdir/$tfile-0" + wait_delete_completed - BLK_LIMIT=$((100 * KB * KB)) # 100G - FILE_LIMIT=1000000 + set_blk_tunesz 512 + set_blk_unitsz 1024 - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" - $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" - $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR + mkdir -p $DIR/$tdir + chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" - echo " Set stripe" - [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT - touch $TESTFILE - chown $TSTUSR.$TSTUSR $TESTFILE + BLK_LIMIT=$((100 * KB * KB)) # 100G + FILE_LIMIT=1000000 + echo " Set block limit $BLK_LIMIT kbytes to $TSTUSR.$TSTUSR" - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" + $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR - echo " Write the big file of $FSIZE G ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ - error "(usr) write $FSIZE G file failure, but expect success" + echo " Set stripe" + $LFS setstripe $TESTFILE -c 1 + touch $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP - echo " delete the big file of $FSIZE G..." - $RUNAS rm -f $TESTFILE + log " Write the big file of 4.5G ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ + error "(usr) write 4.5G file failure, but expect success" - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP - echo " write the big file of 2 G..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((2 * GB / blksize)) || \ - error "(usr) write 2 G file failure, but expect seccess" + log " delete the big file of 4.5G..." + $RUNAS rm -f $TESTFILE + sync; sleep 3; sync; - echo " delete the big file of 2 G..." - $RUNAS rm -f $TESTFILE - RC=$? + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ + RC=$? - debugrestore - wait_delete_completed + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) - return $RC + return $RC } -run_test 9 "run for fixing bug10707(64bit) ===========" +run_test_with_stat 9 "run for fixing bug10707(64bit) ===========" # run for fixing bug10707, it need a big room. test for 32bit +# 2.0 version does not support 32 bit qd_count, so such test is obsolete. test_10() { mkdir -p $DIR/$tdir chmod 0777 $DIR/$tdir - lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1` - size_file=$((FSIZE * GB)) - echo "lustrefs_size:$lustrefs_size size_file:$((size_file / KB))" - if [ $((lustrefs_size * KB)) -lt $size_file ]; then - skip "less than $size_file bytes free" - return 0; - fi + check_whether_skip && return 0 - sync; sleep 10; sync; + wait_delete_completed - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) + set_blk_tunesz 512 + set_blk_unitsz 1024 - # set the D_QUOTA flag - debugsave - sysctl -w lnet.debug="+quota" - # make qd_count 32 bit - sysctl -w lustre.fail_loc=0xA00 + lustre_fail mds_ost 0xA00 TESTFILE="$DIR/$tdir/$tfile-0" BLK_LIMIT=$((100 * KB * KB)) # 100G FILE_LIMIT=1000000 - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" - $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" - $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" + $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $DIR + echo " Set stripe" - [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT + $LFS setstripe $TESTFILE -c 1 touch $TESTFILE chown $TSTUSR.$TSTUSR $TESTFILE - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP - - echo " Write the big file of $FSIZE G ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ - error "(usr) write $FSIZE G file failure, but expect success" - - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP - echo " delete the big file of $FSIZE G..." - $RUNAS rm -f $TESTFILE + log " Write the big file of 4.5 G ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ + error "(usr) write 4.5 G file failure, but expect success" - $SHOW_QUOTA_USER - $SHOW_QUOTA_GROUP + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP - echo " write the big file of 2 G..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((2 * GB / blkzise)) || \ - error "(usr) write 2 G file failure, but expect success" + log " delete the big file of 4.5 G..." + $RUNAS rm -f $TESTFILE + sync; sleep 3; sync; - echo " delete the big file of 2 G..." - $RUNAS rm -f $TESTFILE + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP RC=$? - # clear the flage - debugrestore - # make qd_count 64 bit - sysctl -w lustre.fail_loc=0 + lustre_fail mds_ost 0 - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ - - wait_delete_completed + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) return $RC } -run_test 10 "run for fixing bug10707(32bit) ===========" +#run_test_with_stat 10 "run for fixing bug10707(32bit) ===========" test_11() { wait_delete_completed @@ -796,14 +882,14 @@ test_11() { #prepare the test block_limit=`(echo 0; df -t lustre -P | awk '{print $(NF - 4)}') | tail -n 1` echo $block_limit - orig_dbr=`cat /proc/sys/vm/dirty_background_ratio` - orig_dec=`cat /proc/sys/vm/dirty_expire_centisecs` - orig_dr=`cat /proc/sys/vm/dirty_ratio` - orig_dwc=`cat /proc/sys/vm/dirty_writeback_centisecs` - echo 1 > /proc/sys/vm/dirty_background_ratio - echo 30 > /proc/sys/vm/dirty_expire_centisecs - echo 1 > /proc/sys/vm/dirty_ratio - echo 50 > /proc/sys/vm/dirty_writeback_centisecs + orig_dbr=`sysctl -n vm.dirty_background_ratio` + orig_dec=`sysctl -n vm.dirty_expire_centisecs` + orig_dr=`sysctl -n vm.dirty_ratio` + orig_dwc=`sysctl -n vm.dirty_writeback_centisecs` + sysctl -w vm.dirty_background_ratio=1 + sysctl -w vm.dirty_expire_centisecs=30 + sysctl -w vm.dirty_ratio=1 + sysctl -w vm.dirty_writeback_centisecs=50 TESTDIR="$DIR/$tdir" local RV=0 @@ -819,7 +905,7 @@ test_11() { echo -n " create a file for uid " for j in `seq 1 30`; do echo -n "$j " - # 30MB per dd for a total of 900MB (if space even permits) + # 30MB per dd for a total of 900MB (if space even permits) runas -u $j dd if=/dev/zero of=$TESTDIR/$tfile bs=$blksize count=15 > /dev/null 2>&1 & done echo "" @@ -838,7 +924,7 @@ test_11() { RV=2 break fi - LAST_USED=$USED + LAST_USED=$USED done echo " removing the test files..." rm -f $TESTDIR/$tfile @@ -848,16 +934,16 @@ test_11() { echo "Test took $SECS sec" #clean - echo $orig_dbr > /proc/sys/vm/dirty_background_ratio - echo $orig_dec > /proc/sys/vm/dirty_expire_centisecs - echo $orig_dr > /proc/sys/vm/dirty_ratio - echo $orig_dwc > /proc/sys/vm/dirty_writeback_centisecs + sysctl -w vm.dirty_background_ratio=$orig_dbr + sysctl -w vm.dirty_expire_centisecs=$orig_dec + sysctl -w vm.dirty_ratio=$orig_dr + sysctl -w vm.dirty_writeback_centisecs=$orig_dwc if [ $RV -ne 0 ]; then - error "Nothing was written for $SECS sec ... aborting" + error "Nothing was written for $SECS sec ... aborting" fi return $RV } -run_test 11 "run for fixing bug10912 ===========" +run_test_with_stat 11 "run for fixing bug10912 ===========" # test a deadlock between quota and journal b=11693 @@ -868,106 +954,115 @@ test_12() { [ "$(grep $DIR2 /proc/mounts)" ] || mount_client $DIR2 || \ { skip "Need lustre mounted on $MOUNT2 " && retutn 0; } + if [ $OSTCOUNT -lt 2 ]; then + skip "$OSTCOUNT < 2, too few osts" + return 0; + fi + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever TESTFILE="$DIR/$tdir/$tfile-0" TESTFILE2="$DIR2/$tdir/$tfile-1" - + + wait_delete_completed + echo " User quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR - $LFS setstripe $TESTFILE -i 0 -c 1 + $LFS setstripe $TESTFILE -i 0 -c 1 chown $TSTUSR.$TSTUSR $TESTFILE - $LFS setstripe $TESTFILE2 -i 0 -c 1 - chown $TSTUSR2.$TSTUSR2 $TESTFILE2 + $LFS setstripe $TESTFILE2 -i 1 -c 1 + chown $TSTUSR2.$TSTUSR2 $TESTFILE2 #define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f - sysctl -w lustre.fail_loc=0x0000021f + lustre_fail ost 0x0000021f echo " step1: write out of block quota ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) & - DDPID=$! - sleep 5 - $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 & + $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 & DDPID1=$! + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) & + DDPID=$! echo " step2: testing ......" count=0 while [ true ]; do - if [ -z `ps -ef | awk '$2 == '${DDPID1}' { print $8 }'` ]; then break; fi + if ! ps -p ${DDPID1} > /dev/null 2>&1; then break; fi count=$[count+1] if [ $count -gt 64 ]; then - sysctl -w lustre.fail_loc=0 + lustre_fail ost 0 error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID1, time=$count)successful" #Recover fail_loc and dd will finish soon - sysctl -w lustre.fail_loc=0 + lustre_fail ost 0 echo " step3: testing ......" count=0 while [ true ]; do - if [ -z `ps -ef | awk '$2 == '${DDPID}' { print $8 }'` ]; then break; fi + if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi count=$[count+1] - if [ $count -gt 100 ]; then + if [ $count -gt 150 ]; then error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID, time=$count)successful" rm -f $TESTFILE $TESTFILE2 - - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit + sync; sleep 3; sync; + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 12 "test a deadlock between quota and journal ===" +run_test_with_stat 12 "test a deadlock between quota and journal ===" # test multiple clients write block quota b=11693 test_13() { + mkdir -p $DIR/$tdir + wait_delete_completed + # one OST * 10 + (mds + other OSTs) LIMIT=$((BUNIT_SZ * 10 + (BUNIT_SZ * OSTCOUNT))) TESTFILE="$DIR/$tdir/$tfile" - mkdir -p $DIR/$tdir echo " User quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR $SHOW_QUOTA_USER - + $LFS setstripe $TESTFILE -i 0 -c 1 chown $TSTUSR.$TSTUSR $TESTFILE $LFS setstripe $TESTFILE.2 -i 0 -c 1 - chown $TSTUSR.$TSTUSR $TESTFILE.2 + chown $TSTUSR.$TSTUSR $TESTFILE.2 echo " step1: write out of block quota ..." # one bunit will give mds - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & DDPID=$! - $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & + $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & DDPID1=$! echo " step2: testing ......" count=0 while [ true ]; do - if [ -z `ps -ef | awk '$2 == '${DDPID}' { print $8 }'` ]; then break; fi + if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi count=$[count+1] if [ $count -gt 64 ]; then error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID, time=$count)successful" count=0 while [ true ]; do - if [ -z `ps -ef | awk '$2 == '${DDPID1}' { print $8 }'` ]; then break; fi + if ! ps -p ${DDPID1} > /dev/null 2>&1 ; then break; fi count=$[count+1] if [ $count -gt 64 ]; then error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID1, time=$count)successful" sync; sleep 5; sync; @@ -980,13 +1075,14 @@ test_13() { error "files too small $fz + $fz2 < $((BUNIT_SZ * BLK_SZ * 10))" rm -f $TESTFILE $TESTFILE.2 - - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit + sync; sleep 3; sync; + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR } -run_test 13 "test multiple clients write block quota ===" +run_test_with_stat 13 "test multiple clients write block quota ===" check_if_quota_zero(){ - line=`$LFS quota -$1 $2 $DIR | wc -l` + line=`$LFS quota -v -$1 $2 $DIR | wc -l` for i in `seq 3 $line`; do if [ $i -eq 3 ]; then field="3 4 6 7" @@ -994,67 +1090,684 @@ check_if_quota_zero(){ field="3 5" fi for j in $field; do - tmp=`$LFS quota -$1 $2 $DIR | sed -n ${i}p | - awk '{print $'"$j"'}'` - [ -n "$tmp" ] && [ $tmp -ne 0 ] && $LFS quota -$1 $2 $DIR && \ + tmp=`$LFS quota -v -$1 $2 $DIR | sed -n ${i}p | + awk '{print $'"$j"'}'` + [ -n "$tmp" ] && [ $tmp -ne 0 ] && $LFS quota -v -$1 $2 $DIR && \ error "quota on $2 isn't clean" done done echo "pass check_if_quota_zero" } -pre_test_14 () { - # reboot the lustre - cd $T_PWD; sh llmountcleanup.sh || error "llmountcleanup failed" - sh llmount.sh - pre_test - run_test 0 "reboot lustre" -} - -pre_test_14 - test_14a() { # was test_14 b=12223 -- setting quota on root TESTFILE="$DIR/$tdir/$tfile" + + # reboot the lustre + sync; sleep 5; sync + cleanup_and_setup_lustre + test_0 + mkdir -p $DIR/$tdir # out of root's file and block quota - $LFS setquota -u root 10 10 10 10 $DIR + $LFS setquota -u root -b 10 -B 10 -i 10 -I 10 $DIR createmany -m ${TESTFILE} 20 || \ error "unexpected: user(root) create files failly!" dd if=/dev/zero of=$TESTFILE bs=4k count=4096 || \ error "unexpected: user(root) write files failly!" chmod 666 $TESTFILE $RUNAS dd if=/dev/zero of=${TESTFILE} seek=4096 bs=4k count=4096 && \ - error "unexpected: user(quota_usr) write a file successfully!" + error "unexpected: user(quota_usr) write a file successfully!" # trigger the llog chmod 777 $DIR - for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done - for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done + for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done + for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done # do the check - dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg" - $LFS setquota -u root 0 0 0 0 $DIR + dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg" + $LFS setquota -u root -b 0 -B 0 -i 0 -I 0 $DIR #check_if_quota_zero u root - # clean + # clean unlinkmany ${TESTFILE} 15 rm -f $TESTFILE + sync; sleep 3; sync; +} +run_test_with_stat 14a "test setting quota on root ===" + +# save quota version (both administrative and operational quotas) +quota_save_version() { + do_facet mgs "lctl conf_param ${FSNAME}-MDT*.mdd.quota_type=$1" + do_facet mgs "lctl conf_param ${FSNAME}-OST*.ost.quota_type=$1" + sleep 5 +} + +test_15(){ + LIMIT=$((24 * 1024 * 1024 * 1024 * 1024)) # 24 TB + PATTERN="`echo $DIR | sed 's/\//\\\\\//g'`" + + wait_delete_completed + + # test for user + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + TOTAL_LIMIT="`$LFS quota -v -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`" + [ $TOTAL_LIMIT -eq $LIMIT ] || error " (user)total limits = $TOTAL_LIMIT; limit = $LIMIT, failed!" + echo " (user)total limits = $TOTAL_LIMIT; limit = $LIMIT, successful!" + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + + # test for group + $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + TOTAL_LIMIT="`$LFS quota -v -g $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`" + [ $TOTAL_LIMIT -eq $LIMIT ] || error " (group)total limits = $TOTAL_LIMIT; limit = $LIMIT, failed!" + echo " (group)total limits = $TOTAL_LIMIT; limit = $LIMIT, successful!" + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR + $LFS quotaoff -ug $DIR + do_facet $SINGLEMDS "lctl set_param lquota.mdd_obd-${FSNAME}-MDT*.quota_type=ug" | grep "error writing" && \ + error "fail to set version for $SINGLEMDS" + for j in `seq $OSTCOUNT`; do + do_facet ost$j "lctl set_param lquota.${FSNAME}-OST*.quota_type=ug" | grep "error writing" && \ + error "fail to set version for ost$j" + done + + echo "invalidating quota files" + $LFS quotainv -ug $DIR + $LFS quotainv -ugf $DIR + $LFS quotacheck -ug $DIR +} +run_test_with_stat 15 "set block quota more than 4T ===" + +# $1=u/g $2=with qunit adjust or not +test_16_tub() { + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 4)) + TESTFILE="$DIR/$tdir/$tfile" + mkdir -p $DIR/$tdir + + wait_delete_completed + + echo " User quota (limit: $LIMIT kbytes)" + if [ $1 == "u" ]; then + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + $SHOW_QUOTA_USER + else + $LFS setquota -g $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + $SHOW_QUOTA_GROUP + fi + + $LFS setstripe $TESTFILE -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + echo " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((BUNIT_SZ * 4)) || \ + error "(usr) write failure, but expect success" + echo " Done" + echo " Write out of block quota ..." + # this time maybe cache write, ignore it's failure + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || true + # flush cache, ensure noquota flag is setted on client + cancel_lru_locks osc + if [ $2 -eq 1 ]; then + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || \ + error "(write failure, but expect success" + else + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) && \ + error "(write success, but expect EDQUOT" + fi + + rm -f $TESTFILE + sync; sleep 3; sync; + $LFS setquota -$1 $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR +} + +# test without adjusting qunit +# 2.0 version does not support WITHOUT_CHANGE_QS, so such test is obsolete +test_16 () { + set_blk_tunesz $((BUNIT_SZ * 2)) + set_blk_unitsz $((BUNIT_SZ * 4)) + for i in u g; do + for j in 0 1; do + # define OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS 0xA01 + echo " grp/usr: $i, adjust qunit: $j" + echo "-------------------------------" + [ $j -eq 1 ] && lustre_fail mds_ost 0 + [ $j -eq 0 ] && lustre_fail mds_ost 0xA01 + test_16_tub $i $j + done + done + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) +} +#run_test_with_stat 16 "test without adjusting qunit" + +# run for fixing bug14526, failed returned quota reqs shouldn't ruin lustre. +test_17() { + set_blk_tunesz 512 + set_blk_unitsz 1024 + + wait_delete_completed + + #define OBD_FAIL_QUOTA_RET_QDATA | OBD_FAIL_ONCE + lustre_fail ost 0x80000A02 + + TESTFILE="$DIR/$tdir/$tfile-a" + TESTFILE2="$DIR/$tdir/$tfile-b" + mkdir -p $DIR/$tdir + + BLK_LIMIT=$((100 * 1024)) # 100M + + log " Set enough high limit(block:$BLK_LIMIT) for user: $TSTUSR" + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR + log " Set enough high limit(block:$BLK_LIMIT) for group: $TSTUSR" + $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I 0 $DIR + + touch $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE + touch $TESTFILE2 + chown $TSTUSR.$TSTUSR $TESTFILE2 + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + log " Write the test file1 ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(( 10 * 1024 )) \ + || echo "write 10M file failure" + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + log " write the test file2 ..." + $RUNAS dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=$(( 10 * 1024 )) \ + || error "write 10M file failure" + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + rm -f $TESTFILE $TESTFILE2 + RC=$? + sync; sleep 3; sync; + + # make qd_count 64 bit + lustre_fail ost 0 + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + return $RC +} +run_test_with_stat 17 "run for fixing bug14526 ===========" + +# test when mds takes a long time to handle a quota req so that +# the ost has dropped it, the ost still could work well b=14840 +test_18() { + LIMIT=$((100 * 1024 * 1024)) # 100G + TESTFILE="$DIR/$tdir/$tfile" + mkdir -p $DIR/$tdir + + wait_delete_completed + + set_blk_tunesz 512 + set_blk_unitsz 1024 + + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT + $SHOW_QUOTA_USER + + $LFS setstripe $TESTFILE -i 0 -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + #define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x142 + lustre_fail mds 0x142 + + log " step1: write 100M block ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 100)) & + DDPID=$! + + sleep 5 + lustre_fail mds 0 + + echo " step2: testing ......" + count=0 + timeout=$(lctl get_param -n timeout) + while [ true ]; do + if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi + count=$[count+1] + if [ $count -gt $((4 * $timeout)) ]; then + error "count=$count dd should be finished!" + fi + sleep 1 + done + log "(dd_pid=$DDPID, time=$count, timeout=$timeout)" + if [ $count -lt $(($timeout - 10)) ]; then + error " should take longer!" + else + echo " successful" + fi + + testfile_size=$(stat -c %s $TESTFILE) + [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \ + error "verifying file failed!" + rm -f $TESTFILE + sync; sleep 3; sync; + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) +} +run_test_with_stat 18 "run for fixing bug14840 ===========" + +# test when mds drops a quota req, the ost still could work well b=14840 +test_18a() { + LIMIT=$((100 * 1024 * 1024)) # 100G + TESTFILE="$DIR/$tdir/$tfile-a" + mkdir -p $DIR/$tdir + + wait_delete_completed + + set_blk_tunesz 512 + set_blk_unitsz 1024 + + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT + $SHOW_QUOTA_USER + + $LFS setstripe $TESTFILE -i 0 -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + #define OBD_FAIL_MDS_DROP_QUOTA_REQ | OBD_FAIL_ONCE 0x80000143 + lustre_fail mds 0x80000143 + + log " step1: write 100M block ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((1024 * 100)) & + DDPID=$! + + echo " step2: testing ......" + count=0 + timeout=$(lctl get_param -n timeout) + while [ true ]; do + if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi + count=$[count+1] + if [ $count -gt $((6 * $timeout)) ]; then + lustre_fail mds 0 + error "count=$count dd should be finished!" + fi + sleep 1 + done + log "(dd_pid=$DDPID, time=$count, timeout=$timeout)" + if [ $count -lt $(($timeout - 10)) ]; then + lustre_fail mds 0 + error " should take longer!" + else + echo " successful" + fi + + lustre_fail mds 0 + + rm -f $TESTFILE + sync; sleep 3; sync; + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) +} +run_test_with_stat 18a "run for fixing bug14840 ===========" + +# test when mds do failover, the ost still could work well without trigger +# watchdog b=14840 +test_18bc_sub() { + type=$1 + + LIMIT=$((110 * 1024 )) # 110M + TESTFILE="$DIR/$tdir/$tfile" + mkdir -p $DIR/$tdir + + wait_delete_completed + + set_blk_tunesz 512 + set_blk_unitsz 1024 + + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT + $SHOW_QUOTA_USER + + $LFS setstripe $TESTFILE -i 0 -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + timeout=$(sysctl -n lustre.timeout) + + if [ $type = "directio" ]; then + log " write 100M block(directio) ..." + $RUNAS $DIRECTIO write $TESTFILE 0 100 $((BLK_SZ * 1024)) & + else + log " write 100M block(normal) ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$((BLK_SZ * 1024)) count=100 & + fi + + DDPID=$! + do_facet $SINGLEMDS "$LCTL conf_param ${FSNAME}-MDT*.mdd.quota_type=ug" + + log "failing mds for $((2 * timeout)) seconds" + fail $SINGLEMDS $((2 * timeout)) + + # check if quotaon successful + $LFS quota -u $TSTUSR $MOUNT 2>&1 | grep -q "quotas are not enabled" + if [ $? -eq 0 ]; then + error "quotaon failed!" + rm -rf $TESTFILE + return + fi + + count=0 + while [ true ]; do + if ! ps -p ${DDPID} > /dev/null 2>&1; then break; fi + if [ $((++count % (2 * timeout) )) -eq 0 ]; then + log "it took $count second" + fi + sleep 1 + done + log "(dd_pid=$DDPID, time=$count, timeout=$timeout)" + sync; sleep 1; sync + + testfile_size=$(stat -c %s $TESTFILE) + [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \ + error "verifying file failed!" + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + rm -rf $TESTFILE + sync; sleep 1; sync +} + +# test when mds does failover, the ost still could work well +# this test shouldn't trigger watchdog b=14840 +test_18b() { + test_18bc_sub normal + test_18bc_sub directio + # check if watchdog is triggered + MSG="test 18b: run for fixing bug14840" + do_facet ost1 "dmesg > $TMP/lustre-log-${TESTNAME}.log" + do_facet client cat > $TMP/lustre-log-${TESTNAME}.awk <<-EOF + /$MSG/ { + start = 1; + } + /Watchdog triggered/ { + if (start) { + print \$0; + } + } + EOF + watchdog=`do_facet ost1 awk -f $TMP/lustre-log-${TESTNAME}.awk $TMP/lustre-log-${TESTNAME}.log` + if [ -n "$watchdog" ]; then error "$watchdog"; fi +} +run_test_with_stat 18b "run for fixing bug14840(mds failover, no watchdog) ===========" + +# test when mds does failover, the ost still could work well +# this test will prevent OST_DISCONNET from happening b=14840 +test_18c() { + # define OBD_FAIL_OST_DISCONNECT_NET 0x202(disable ost_disconnect for osts) + lustre_fail ost 0x202 + test_18bc_sub normal + test_18bc_sub directio + lustre_fail ost 0 +} +run_test_with_stat 18c "run for fixing bug14840(mds failover, OST_DISCONNECT is disabled) ===========" + +run_to_block_limit() { + local LIMIT=$((($OSTCOUNT + 1) * $BUNIT_SZ)) + local TESTFILE=$1 + wait_delete_completed + + # set 1 Mb quota unit size + set_blk_tunesz 512 + set_blk_unitsz 1024 + + # bind file to a single OST + $LFS setstripe -c 1 $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE + + echo " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT + $SHOW_QUOTA_USER + echo " Updating quota limits" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $MOUNT + $SHOW_QUOTA_USER + + RUNDD="$RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ" + $RUNDD count=$BUNIT_SZ || error "(usr) write failure, but expect success" + # for now page cache of TESTFILE may still be dirty, + # let's push it to the corresponding OST, this will also + # cache NOQUOTA on the client from OST's reply + cancel_lru_locks osc + $RUNDD seek=$BUNIT_SZ && error "(usr) write success, should be EDQUOT" +} + +test_19() { + # 1 Mb bunit per each MDS/OSS + local TESTFILE="$DIR/$tdir/$tfile" + mkdir -p $DIR/$tdir + + run_to_block_limit $TESTFILE + $SHOW_QUOTA_USER + + # cleanup + rm -f $TESTFILE + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + +} +run_test_with_stat 19 "test if administrative limits updates do not zero operational limits (14790) ===" + +test_20() +{ + LSTR=(1t 2g 3m 4k) # limits strings + LVAL=($[1*1024*1024*1024] $[2*1024*1024] $[3*1024*1024] $[4*1024]) # limits values + + $LFS setquota -u $TSTUSR --block-softlimit ${LSTR[0]} \ + $MOUNT || error "could not set quota limits" + + $LFS setquota -u $TSTUSR --block-hardlimit ${LSTR[1]} \ + --inode-softlimit ${LSTR[2]} \ + --inode-hardlimit ${LSTR[3]} \ + $MOUNT || error "could not set quota limits" + + ($LFS quota -v -u $TSTUSR $MOUNT | \ + grep -E '^ *'$MOUNT' *[0-9]+\** *'${LVAL[0]}' *'${LVAL[1]}' *[0-9]+\** *'${LVAL[2]}' *'${LVAL[3]}) \ + || error "lfs quota output is unexpected" + + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 \ + $MOUNT || error "could not reset quota limits" + +} +run_test_with_stat 20 "test if setquota specifiers work properly (15754)" + +test_21_sub() { + local testfile=$1 + local blk_number=$2 + local seconds=$3 + + time=$(($(date +%s) + seconds)) + while [ $(date +%s) -lt $time ]; do + $RUNAS dd if=/dev/zero of=$testfile bs=$BLK_SZ count=$blk_number > /dev/null 2>&1 + rm -f $testfile + done } -run_test 14a "test setting quota on root ===" + +# run for fixing bug16053, setquota shouldn't fail when writing and +# deleting are happening +test_21() { + set_blk_tunesz 512 + set_blk_unitsz 1024 + + wait_delete_completed + + TESTFILE="$DIR/$tdir/$tfile" + + BLK_LIMIT=$((10 * 1024 * 1024)) # 10G + FILE_LIMIT=1000000 + + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" + $LFS setquota -u $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $MOUNT + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" + $LFS setquota -g $TSTUSR -b 0 -B $BLK_LIMIT -i 0 -I $FILE_LIMIT $MOUNT + + # repeat writing on a 1M file + test_21_sub ${TESTFILE}_1 1024 30 & + DDPID1=$! + # repeat writing on a 128M file + test_21_sub ${TESTFILE}_2 $((1024 * 128)) 30 & + DDPID2=$! + + time=$(($(date +%s) + 30)) + i=1 + while [ $(date +%s) -lt $time ]; do + log " Set quota for $i times" + $LFS setquota -u $TSTUSR -b 0 -B $((BLK_LIMIT + 1024 * i)) -i 0 -I $((FILE_LIMIT + i)) $MOUNT + $LFS setquota -g $TSTUSR -b 0 -B $((BLK_LIMIT + 1024 * i)) -i 0 -I $((FILE_LIMIT + i)) $MOUNT + i=$((i+1)) + sleep 1 + done + + count=0 + while [ true ]; do + if ! ps -p ${DDPID1} > /dev/null 2>&1; then break; fi + count=$[count+1] + if [ $count -gt 60 ]; then + error "dd should be finished!" + fi + sleep 1 + done + echo "(dd_pid=$DDPID1, time=$count)successful" + + count=0 + while [ true ]; do + if ! ps -p ${DDPID2} > /dev/null 2>&1; then break; fi + count=$[count+1] + if [ $count -gt 60 ]; then + error "dd should be finished!" + fi + sleep 1 + done + echo "(dd_pid=$DDPID2, time=$count)successful" + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + $LFS setquota -g $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + return $RC +} +run_test_with_stat 21 "run for fixing bug16053 ===========" + +test_22() { + local SAVEREFORMAT + + SAVEREFORMAT=$REFORMAT + $LFS quotaoff -ug $DIR || error "could not turn quotas off" + + quota_save_version "ug" + + REFORMAT="reformat" + stopall + mount + setupall + REFORMAT=$SAVEREFORMAT + + echo "checking parameters" + + do_facet $SINGLEMDS "lctl get_param mdd.${FSNAME}-MDT*.quota_type" | grep "ug" || error "admin failure" + do_facet ost1 "lctl get_param obdfilter.*.quota_type" | grep "ug" || error "op failure" + + run_test 0 "reboot lustre" +} +run_test_with_stat 22 "test if quota_type saved as permanent parameter ====" + +test_23_sub() { + mkdir -p $DIR/$tdir + chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" + rm -f $TESTFILE + local bs_unit=$((1024*1024)) + LIMIT=$1 + + wait_delete_completed + + # test for user + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR -b 0 -B $LIMIT -i 0 -I 0 $DIR + sleep 3 + $SHOW_QUOTA_USER + + $LFS setstripe $TESTFILE -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + log " Step1: trigger quota with 0_DIRECT" + log " Write half of file" + $RUNAS $DIRECTIO write $TESTFILE 0 $(($LIMIT/1024/2)) $bs_unit || error "(1) write failure, but expect success: $LIMIT" + log " Write out of block quota ..." + $RUNAS $DIRECTIO write $TESTFILE $(($LIMIT/1024/2)) $(($LIMIT/1024/2)) $bs_unit && error "(2) write success, but expect EDQUOT: $LIMIT" + log " Step1: done" + + log " Step2: rewrite should succeed" + $RUNAS $DIRECTIO write $TESTFILE $(($LIMIT/1024/2)) 1 $bs_unit || error "(3) write failure, but expect success: $LIMIT" + log " Step2: done" + + rm -f $TESTFILE + wait_delete_completed + OST0_UUID=`do_facet ost1 $LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'` + OST0_QUOTA_USED=`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'` + echo $OST0_QUOTA_USED + [ $OST0_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $DIR +} + +test_23() { + log "run for $((OSTCOUNT * 4))MB test file" + test_23_sub $((OSTCOUNT * 4 * 1024)) + + OST0_MIN=120000 + check_whether_skip && return 0 + log "run for $((OSTCOUNT * 40))MB test file" + test_23_sub $((OSTCOUNT * 40 * 1024)) +} +run_test_with_stat 23 "run for fixing bug16125 ===========" + +test_24() { + local TESTFILE="$DIR/$tdir/$tfile" + mkdir -p $DIR/$tdir + + run_to_block_limit $TESTFILE + $SHOW_QUOTA_USER | grep '*' || error "no matching *" + + # cleanup + rm -f $TESTFILE + $LFS setquota -u $TSTUSR -b 0 -B 0 -i 0 -I 0 $MOUNT + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + +} +run_test_with_stat 24 "test if lfs draws an asterix when limit is reached (16646) ===========" # turn off quota test_99() { $LFS quotaoff $DIR + lctl set_param debug="-quota" + return 0 } -run_test 99 "Quota off ===============================" +run_test_with_stat 99 "Quota off ===============================" log "cleanup: ======================================================" cd $ORIG_PWD -post_test check_and_cleanup_lustre echo '=========================== finished ===============================' [ -f "$QUOTALOG" ] && cat $QUOTALOG && grep -q FAIL $QUOTALOG && exit 1 || true diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh index 0bfb2f9..b65f722 100644 --- a/lustre/tests/sanity-sec.sh +++ b/lustre/tests/sanity-sec.sh @@ -64,10 +64,14 @@ fi MDT="`do_facet $SINGLEMDS "lctl get_param -N mdt.\*MDT\*/stats 2>/dev/null | cut -d"." -f2" || true`" if [ ! -z "$MDT" ]; then - do_facet $SINGLEMDS "mkdir -p $CONFDIR" + do_facet $SINGLEMDS "mkdir -p $CONFDIR" IDENTITY_FLUSH=mdt.$MDT.identity_flush MDSCAPA=mdt.$MDT.capa CAPA_TIMEOUT=mdt.$MDT.capa_timeout + MDSSECLEVEL=mdt.$MDT.sec_level + LOCALMDT=$MDT +else + LOCALMDT="" fi # for CLIENT_TYPE @@ -121,25 +125,41 @@ sec_setup # run as different user test_0() { - umask 0022 + umask 0022 - chmod 0755 $DIR || error "chmod (1)" - rm -rf $DIR/$tdir || error "rm (1)" + chmod 0755 $DIR || error "chmod (1)" + rm -rf $DIR/* || error "rm (1)" mkdir -p $DIR/$tdir || error "mkdir (1)" - chown $USER0 $DIR/$tdir || error "chown (1)" + + if [ "$CLIENT_TYPE" = "remote" ]; then + [ -z "$MDT" ] && skip "do not support do_facet operations." && return + do_facet $SINGLEMDS "echo '* 0 normtown' > $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + chown $USER0 $DIR/$tdir && error "chown (1)" + do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + else + chown $USER0 $DIR/$tdir || error "chown (2)" + fi + $RUNAS -u $ID0 ls $DIR || error "ls (1)" rm -f $DIR/f0 || error "rm (2)" $RUNAS -u $ID0 touch $DIR/f0 && error "touch (1)" $RUNAS -u $ID0 touch $DIR/$tdir/f1 || error "touch (2)" $RUNAS -u $ID1 touch $DIR/$tdir/f2 && error "touch (3)" touch $DIR/$tdir/f3 || error "touch (4)" - chown root $DIR/$tdir || error "chown (2)" + chown root $DIR/$tdir || error "chown (3)" chgrp $USER0 $DIR/$tdir || error "chgrp (1)" chmod 0775 $DIR/$tdir || error "chmod (2)" $RUNAS -u $ID0 touch $DIR/$tdir/f4 || error "touch (5)" $RUNAS -u $ID1 touch $DIR/$tdir/f5 && error "touch (6)" touch $DIR/$tdir/f6 || error "touch (7)" - rm -rf $DIR/$tdir || error "rm (3)" + rm -rf $DIR/* || error "rm (3)" + + if [ "$CLIENT_TYPE" = "remote" ]; then + do_facet $SINGLEMDS "rm -f $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + fi } run_test 0 "uid permission =============================" @@ -147,11 +167,11 @@ run_test 0 "uid permission =============================" test_1() { [ $GSS_SUP = 0 ] && skip "without GSS support." && return [ -z "$MDT" ] && skip "do not support do_facet operations." && return - [ "$CLIENT_TYPE" = "remote" ] && \ - skip "test_1 for local client only" && return - do_facet $SINGLEMDS "rm -f $PERM_CONF" - do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + if [ "$CLIENT_TYPE" = "remote" ]; then + do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + fi rm -rf $DIR/$tdir mkdir -p $DIR/$tdir @@ -159,7 +179,7 @@ test_1() { chown $USER0 $DIR/$tdir || error "chown (1)" $RUNAS -u $ID1 -v $ID0 touch $DIR/$tdir/f0 && error "touch (2)" echo "enable uid $ID1 setuid" - do_facet $SINGLEMDS "echo '* $ID1 setuid' > $PERM_CONF" + do_facet $SINGLEMDS "echo '* $ID1 setuid' >> $PERM_CONF" do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" $RUNAS -u $ID1 -v $ID0 touch $DIR/$tdir/f1 || error "touch (3)" @@ -196,6 +216,10 @@ test_2 () { [ -z "$(which setfacl 2>/dev/null)" ] && \ skip "could not find setfacl" && return [ "$UID" != 0 ] && skip "must run as root" && return + [ -z "$MDT" ] && skip "do not support do_facet operations." && return + + do_facet $SINGLEMDS "echo '* 0 rmtacl,rmtown' > $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" sec_login root root sec_login bin bin @@ -206,17 +230,8 @@ test_2 () { umask 0022 cd $DIR - if [ ! -z "$MDT" ]; then - do_facet $SINGLEMDS "echo '* 0 rmtacl' > $PERM_CONF" - do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" - fi - - if lfs rgetfacl $DIR; then - echo "performing cp ..." - run_rmtacl_subtest cp || error "cp" - else - echo "server doesn't permit current user 'lfs r{s,g}etfacl', skip cp test." - fi + echo "performing cp ..." + run_rmtacl_subtest cp || error "cp" echo "performing getfacl-noacl..." run_rmtacl_subtest getfacl-noacl || error "getfacl-noacl" echo "performing misc..." @@ -233,13 +248,11 @@ test_2 () { run_rmtacl_subtest inheritance || error "inheritance" rm -f make-tree - if [ ! -z "$MDT" ]; then - do_facet $SINGLEMDS "rm -f $PERM_CONF" - do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" - fi - cd $SAVE_PWD umask $SAVE_UMASK + + do_facet $SINGLEMDS "rm -f $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" } run_test 2 "rmtacl =============================" @@ -255,22 +268,31 @@ run_test 3 "rootsquash =============================" # as for remote client, the groups of the specified uid on MDT # will be obtained by upcall /sbin/l_getidentity and used. test_4() { + if [ "$CLIENT_TYPE" = "remote" ]; then + [ -z "$MDT" ] && skip "do not support do_facet operations." && return + do_facet $SINGLEMDS "echo '* 0 rmtown' > $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + fi + rm -rf $DIR/$tdir mkdir -p $DIR/$tdir chmod 0771 $DIR/$tdir chgrp $ID0 $DIR/$tdir $RUNAS -u $ID0 ls $DIR/$tdir || error "setgroups (1)" - if [ "$CLIENT_TYPE" != "remote" ]; then + if [ "$CLIENT_TYPE" = "local" ]; then if [ ! -z "$MDT" ]; then do_facet $SINGLEMDS "echo '* $ID1 setgrp' > $PERM_CONF" do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" - $RUNAS -u $ID1 -G1,2,$ID0 ls $DIR/$tdir || error "setgroups (2)" - do_facet $SINGLEMDS "rm -f $PERM_CONF" - do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + $RUNAS -u $ID1 -G1,2,$ID0 ls $DIR/$tdir || error "setgroups (2)" fi fi $RUNAS -u $ID1 -G1,2 ls $DIR/$tdir && error "setgroups (3)" rm -rf $DIR/$tdir + + if [ ! -z "$MDT" ]; then + do_facet $SINGLEMDS "rm -f $PERM_CONF" + do_facet $SINGLEMDS "lctl set_param -n $IDENTITY_FLUSH=-1" + fi } run_test 4 "set supplementary group ===============" @@ -282,6 +304,39 @@ mds_capability_timeout() { return 0 } +mds_sec_level_switch() { + [ $# -lt 1 ] && echo "Miss mds sec level switch value" && return 1 + + case $1 in + 0) echo "Disable capa for all clients";; + 1) echo "Enable capa for remote client";; + 3) echo "Enable capa for all clients";; + *) echo "Invalid mds sec level switch value" && return 2;; + esac + + do_facet $SINGLEMDS "lctl set_param -n $MDSSECLEVEL=$1" + return 0 +} + +oss_sec_level_switch() { + [ $# -lt 1 ] && echo "Miss oss sec level switch value" && return 1 + + case $1 in + 0) echo "Disable capa for all clients";; + 1) echo "Enable capa for remote client";; + 3) echo "Enable capa for all clients";; + *) echo "Invalid oss sec level switch value" && return 2;; + esac + + for i in `seq $OSTCOUNT`; do + local j=`expr $i - 1` + local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats 2>/dev/null | cut -d"." -f2" || true`" + [ -z "$OST" ] && return 3 + do_facet ost$i "lctl set_param -n obdfilter.$OST.sec_level=$1" + done + return 0 +} + mds_capability_switch() { [ $# -lt 1 ] && echo "Miss mds capability switch value" && return 1 @@ -306,12 +361,25 @@ oss_capability_switch() { for i in `seq $OSTCOUNT`; do local j=`expr $i - 1` - local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats | cut -d"." -f2" || true`" + local OST="`do_facet ost$i "lctl get_param -N obdfilter.\*OST\*$j/stats 2>/dev/null | cut -d"." -f2" || true`" + [ -z "$OST" ] && return 3 do_facet ost$i "lctl set_param -n obdfilter.$OST.capa=$1" done return 0 } +turn_mds_capa_on() { + mds_capability_switch 3 || return 1 + mds_sec_level_switch 3 || return 2 + return 0 +} + +turn_oss_capa_on() { + oss_capability_switch 1 || return 1 + oss_sec_level_switch 3 || return 2 + return 0 +} + turn_capability_on() { local capa_timeout=${1:-"1800"} @@ -320,13 +388,22 @@ turn_capability_on() { # is turned on on all MDS/OSS servers before # client mount. - umount $MOUNT || return 1 + turn_mds_capa_on || return 1 + turn_oss_capa_on || return 2 + mds_capability_timeout $capa_timeout || return 3 + remount_client $MOUNT || return 4 + return 0 +} - mds_capability_switch 3 || return 2 - oss_capability_switch 1 || return 3 - mds_capability_timeout $capa_timeout || return 4 +turn_mds_capa_off() { + mds_sec_level_switch 0 || return 1 + mds_capability_switch 0 || return 2 + return 0 +} - mount_client $MOUNT || return 5 +turn_oss_capa_off() { + oss_sec_level_switch 0 || return 1 + oss_capability_switch 0 || return 2 return 0 } @@ -335,8 +412,8 @@ turn_capability_off() { # it in a live system. But, please turn off # capability of all OSS servers before MDS servers. - oss_capability_switch 0 || return 1 - mds_capability_switch 0 || return 2 + turn_oss_capa_off || return 1 + turn_mds_capa_off || return 2 return 0 } @@ -347,24 +424,29 @@ turn_capability_off() { test_5() { local file=$DIR/f5 + [ $GSS_SUP = 0 ] && skip "without GSS support." && return [ -z "$MDT" ] && skip "do not support do_facet operations." && return + [ ! -z "$LOCALMDT" ] && skip "client should be separated from server." && return + rm -f $file + turn_capability_off if [ $? != 0 ]; then error "turn_capability_off" return 1 fi - rm -f $file - # Disable proc variable - mds_capability_switch 0 + turn_oss_capa_on if [ $? != 0 ]; then - error "mds_capability_switch 0" + error "turn_oss_capa_on" return 2 fi - oss_capability_switch 1 - if [ $? != 0 ]; then - error "oss_capability_switch 1" - return 3 + + if [ "$CLIENT_TYPE" = "remote" ]; then + remount_client $MOUNT && return 3 + turn_oss_capa_off + return 0 + else + remount_client $MOUNT || return 4 fi # proc variable disabled -- access to the objects in the filesystem @@ -374,14 +456,15 @@ test_5() { $WTL $file 30 if [ $? == 0 ]; then error "Write worked well even though secrets not supplied." - return 4 + return 5 fi turn_capability_on if [ $? != 0 ]; then error "turn_capability_on" - return 4 + return 6 fi + sleep 5 # proc variable enabled, secrets supplied -- write should work now @@ -390,13 +473,13 @@ test_5() { $WTL $file 30 if [ $? != 0 ]; then error "Write failed even though secrets supplied." - return 5 + return 7 fi turn_capability_off if [ $? != 0 ]; then error "turn_capability_off" - return 7 + return 8 fi rm -f $file } @@ -409,12 +492,16 @@ run_test 5 "capa secrets =========================" test_6() { local file=$DIR/f6 + [ $GSS_SUP = 0 ] && skip "without GSS support." && return [ -z "$MDT" ] && skip "do not support do_facet operations." && return + [ ! -z "$LOCALMDT" ] && skip "client should be separated from server." && return + turn_capability_off if [ $? != 0 ]; then error "turn_capability_off" return 1 fi + rm -f $file turn_capability_on 30 @@ -422,6 +509,7 @@ test_6() { error "turn_capability_on 30" return 2 fi + # Token expiry $WTL $file 60 if [ $? != 0 ]; then @@ -435,14 +523,15 @@ test_6() { error "mds_capability_timeout 30" return 4 fi + $WTL $file 60 & local PID=$! sleep 5 # To disable automatic renew, only need turn capa off on MDS. - mds_capability_switch 0 + turn_mds_capa_off if [ $? != 0 ]; then - error "mds_capability_switch 0" + error "turn_mds_capa_off" return 5 fi diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 0cff0fb..875513f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3623,7 +3623,7 @@ test_80() { # bug 10718 dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1M sync; sleep 1; sync BEFORE=`date +%s` - cancel_lru_locks OSC + cancel_lru_locks osc AFTER=`date +%s` DIFF=$((AFTER-BEFORE)) if [ $DIFF -gt 1 ] ; then @@ -5111,8 +5111,10 @@ test_123a() { # was test 123, statahead(bug 11401) SLOWOK=1 fi - remount_client $MOUNT mkdir -p $DIR/$tdir + rm -rf $DIR/$tdir/* + cancel_lru_locks mdc + cancel_lru_locks osc error=0 NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` [ $NUMFREE -gt 100000 ] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index b713dd7..f4b7a48 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -179,6 +179,11 @@ init_test_env() { } +case `uname -r` in +2.4.*) EXT=".o"; USE_QUOTA=no; [ ! "$CLIENTONLY" ] && FSTYPE=ext3;; + *) EXT=".ko"; USE_QUOTA=yes;; +esac + load_module() { EXT=".ko" module=$1 @@ -227,10 +232,7 @@ load_modules() { load_module obdclass/obdclass load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss - # Now, some modules depend on lquota without USE_QUOTA check, - # will fix later. Disable check "$USE_QUOTA" = "yes" temporary. - #[ "$USE_QUOTA" = "yes" ] && load_module quota/lquota - load_module quota/lquota + [ "$USE_QUOTA" = "yes" -a "$LQUOTA" != "no" ] && load_module quota/lquota load_module fid/fid load_module fld/fld load_module lmv/lmv @@ -747,8 +749,10 @@ client_reconnect() { facet_failover() { facet=$1 + sleep_time=$2 echo "Failing $facet on node `facet_active_host $facet`" shutdown_facet $facet + [ -n "$sleep_time" ] && sleep $sleep_time reboot_facet $facet client_df & DFPID=$! @@ -1124,7 +1128,7 @@ switch_identity() { local num=$1 local switch=$2 local j=`expr $num - 1` - local MDT="`do_facet mds$num lctl get_param -N mdt.*MDT*$j | cut -d"." -f2 2>/dev/null || true`" + local MDT="`(do_facet mds$num lctl get_param -N mdt.*MDT*$j 2>/dev/null | cut -d"." -f2 2>/dev/null) || true`" if [ -z "$MDT" ]; then return 2 @@ -1635,6 +1639,8 @@ basetest() { IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 } +# print a newline if the last test was skipped +export LAST_SKIPPED= run_test() { assert_DIR @@ -1642,38 +1648,46 @@ run_test() { if [ ! -z "$ONLY" ]; then testname=ONLY_$1 if [ ${!testname}x != x ]; then + [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= run_one $1 "$2" return $? fi testname=ONLY_$base if [ ${!testname}x != x ]; then + [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= run_one $1 "$2" return $? fi + LAST_SKIPPED="y" echo -n "." return 0 fi testname=EXCEPT_$1 if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping excluded test $1" return 0 fi testname=EXCEPT_$base if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping excluded test $1 (base $base)" return 0 fi testname=EXCEPT_SLOW_$1 if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping SLOW test $1" return 0 fi testname=EXCEPT_SLOW_$base if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping SLOW test $1 (base $base)" return 0 fi + LAST_SKIPPED= run_one $1 "$2" return $? diff --git a/lustre/utils/l_getidentity.c b/lustre/utils/l_getidentity.c index f45a8ae..ae4c437 100644 --- a/lustre/utils/l_getidentity.c +++ b/lustre/utils/l_getidentity.c @@ -194,6 +194,7 @@ static perm_type_t perm_types[] = { { "setgid", CFS_SETGID_PERM }, { "setgrp", CFS_SETGRP_PERM }, { "rmtacl", CFS_RMTACL_PERM }, + { "rmtown", CFS_RMTOWN_PERM }, { 0 } }; @@ -202,6 +203,7 @@ static perm_type_t noperm_types[] = { { "nosetgid", CFS_SETGID_PERM }, { "nosetgrp", CFS_SETGRP_PERM }, { "normtacl", CFS_RMTACL_PERM }, + { "normtown", CFS_RMTOWN_PERM }, { 0 } }; diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index f0d07ff..130f41c 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -59,6 +59,10 @@ #include #include #include +#ifdef HAVE_SYS_QUOTA_H +# include +#endif + /* For dirname() */ #include @@ -83,13 +87,14 @@ static int lfs_osts(int argc, char **argv); static int lfs_df(int argc, char **argv); static int lfs_check(int argc, char **argv); static int lfs_catinfo(int argc, char **argv); -#ifdef HAVE_QUOTA_SUPPORT +#ifdef HAVE_SYS_QUOTA_H static int lfs_quotachown(int argc, char **argv); static int lfs_quotacheck(int argc, char **argv); static int lfs_quotaon(int argc, char **argv); static int lfs_quotaoff(int argc, char **argv); static int lfs_setquota(int argc, char **argv); static int lfs_quota(int argc, char **argv); +static int lfs_quotainv(int argc, char **argv); #endif static int lfs_flushctx(int argc, char **argv); static int lfs_join(int argc, char **argv); @@ -156,7 +161,7 @@ command_t cmdlist[] = { "report filesystem disk space usage or inodes usage" "of each MDS/OSD.\n" "Usage: df [-i] [-h] [path]"}, -#ifdef HAVE_QUOTA_SUPPORT +#ifdef HAVE_SYS_QUOTA_H {"quotachown",lfs_quotachown, 0, "Change files' owner or group on the specified filesystem.\n" "usage: quotachown [-i] \n" @@ -170,10 +175,24 @@ command_t cmdlist[] = { {"quotaoff", lfs_quotaoff, 0, "Turn filesystem quotas off.\n" "usage: quotaoff [ -ug ] "}, {"setquota", lfs_setquota, 0, "Set filesystem quotas.\n" - "usage: setquota [ -u | -g ] \n" - " setquota -t [ -u | -g ] "}, + "usage: setquota [ -u | -g ] -b -B -i -I \n" + " setquota -t [ -u | -g ] \n" + " setquota [ -u | --user | -g | --group ] \n" + " [--block-softlimit ]\n" + " [--block-hardlimit ]\n" + " [--inode-softlimit ]\n" + " [--inode-hardlimit ] \n" + " setquota [-t] [ -u | --user | -g | --group ]\n" + " [--block-grace ]\n" + " [--inode-grace ] \n" + " -b can be used instead of --block-softlimit/--block-grace\n" + " -B can be used instead of --block-hardlimit\n" + " -i can be used instead of --inode-softlimit/--inode-grace\n" + " -I can be used instead of --inode-hardlimit"}, {"quota", lfs_quota, 0, "Display disk usage and limits.\n" - "usage: quota [ -o obd_uuid ] [{-u|-g }|-t] "}, + "usage: quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [{-u|-g }|-t] "}, + {"quotainv", lfs_quotainv, 0, "Invalidate quota data.\n" + "usage: quotainv [-u|-g] "}, #endif {"flushctx", lfs_flushctx, 0, "Flush security context for current user.\n" "usage: flushctx [-k] [mountpoint...]"}, @@ -267,7 +286,7 @@ static int lfs_setstripe(int argc, char **argv) { optind = 0; while ((c = getopt_long(argc, argv, "c:di:o:s:p:", - long_opts, NULL)) >= 0) { + long_opts, NULL)) >= 0) { switch (c) { case 0: /* Long options. */ @@ -313,13 +332,13 @@ static int lfs_setstripe(int argc, char **argv) if (optind == argc) { fprintf(stderr, "error: %s: missing filename|dirname\n", - argv[0]); + argv[0]); return CMD_HELP; } /* get the stripe size */ if (stripe_size_arg != NULL) { - result = parse_size(stripe_size_arg, &st_size, &size_units); + result = parse_size(stripe_size_arg, &st_size, &size_units, 0); if (result) { fprintf(stderr, "error: %s: bad size '%s'\n", argv[0], stripe_size_arg); @@ -392,9 +411,12 @@ static int set_time(time_t *time, time_t *set, char *str) return res; } +#define USER 0 +#define GROUP 1 + static int name2id(unsigned int *id, char *name, int type) { - if (type == USRQUOTA) { + if (type == USER) { struct passwd *entry; if (!(entry = getpwnam(name))) { @@ -421,7 +443,7 @@ static int name2id(unsigned int *id, char *name, int type) static int id2name(char **name, unsigned int id, int type) { - if (type == USRQUOTA) { + if (type == USER) { struct passwd *entry; if (!(entry = getpwuid(id))) { @@ -582,7 +604,7 @@ static int lfs_find(int argc, char **argv) new_fashion = 1; param.gid = strtol(optarg, &endptr, 10); if (optarg == endptr) { - ret = name2id(¶m.gid, optarg, GRPQUOTA); + ret = name2id(¶m.gid, optarg, GROUP); if (ret != 0) { fprintf(stderr, "Group/GID: %s cannot " "be found.\n", optarg); @@ -606,7 +628,7 @@ static int lfs_find(int argc, char **argv) new_fashion = 1; param.uid = strtol(optarg, &endptr, 10); if (optarg == endptr) { - ret = name2id(¶m.uid, optarg, USRQUOTA); + ret = name2id(¶m.uid, optarg, USER); if (ret != 0) { fprintf(stderr, "User/UID: %s cannot " "be found.\n", optarg); @@ -723,7 +745,8 @@ static int lfs_find(int argc, char **argv) if (param.size_sign) optarg++; - ret = parse_size(optarg, ¶m.size,¶m.size_units); + ret = parse_size(optarg, ¶m.size, + ¶m.size_units, 0); if (ret) { fprintf(stderr,"error: bad size '%s'\n", optarg); @@ -1066,12 +1089,12 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, - "MDT", index, rc); + showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf), + ishow, cooked, "MDT", index, rc); } else { fprintf(stderr, "error: llapi_obd_statfs(%s): %s (%d)\n", - uuid_buf.uuid, strerror(-rc), rc); + obd_uuid2str(&uuid_buf), strerror(-rc), rc); return rc; } if (rc == 0) { @@ -1093,8 +1116,8 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, - "OST", index, rc); + showdf(mntdir, &stat_buf, obd_uuid2str(&uuid_buf), + ishow, cooked, "OST", index, rc); } else { fprintf(stderr, "error: llapi_obd_statfs failed: %s (%d)\n", @@ -1314,7 +1337,7 @@ out: return rc; } -#ifdef HAVE_QUOTA_SUPPORT +#ifdef HAVE_SYS_QUOTA_H static int lfs_quotachown(int argc, char **argv) { @@ -1341,15 +1364,13 @@ static int lfs_quotachown(int argc, char **argv) return rc; } - static int lfs_quotacheck(int argc, char **argv) { int c, check_type = 0; char *mnt; struct if_quotacheck qchk; struct if_quotactl qctl; - char *obd_type = qchk.obd_type; - char *obd_uuid = qchk.obd_uuid.uuid; + char *obd_type = (char *)qchk.obd_type; int rc; memset(&qchk, 0, sizeof(qchk)); @@ -1382,7 +1403,6 @@ static int lfs_quotacheck(int argc, char **argv) memset(&qctl, 0, sizeof(qctl)); qctl.qc_cmd = LUSTRE_Q_QUOTAOFF; - qctl.qc_id = QFMT_LDISKFS; qctl.qc_type = check_type; rc = llapi_quotactl(mnt, &qctl); if (rc) { @@ -1399,20 +1419,20 @@ static int lfs_quotacheck(int argc, char **argv) rc = llapi_poll_quotacheck(mnt, &qchk); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "%s %s ", obd_type, + obd_uuid2str(&qchk.obd_uuid)); fprintf(stderr, "quota check failed: %s\n", strerror(errno)); return rc; } memset(&qctl, 0, sizeof(qctl)); qctl.qc_cmd = LUSTRE_Q_QUOTAON; - qctl.qc_id = QFMT_LDISKFS; qctl.qc_type = check_type; rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", - qctl.obd_type, qctl.obd_uuid.uuid); + fprintf(stderr, "%s %s ", (char *)qctl.obd_type, + obd_uuid2str(&qctl.obd_uuid)); fprintf(stderr, "%s turn on quota failed: %s\n", argv[0], strerror(errno)); return rc; @@ -1426,13 +1446,11 @@ static int lfs_quotaon(int argc, char **argv) int c; char *mnt; struct if_quotactl qctl; - char *obd_type = qctl.obd_type; - char *obd_uuid = qctl.obd_uuid.uuid; + char *obd_type = (char *)qctl.obd_type; int rc; memset(&qctl, 0, sizeof(qctl)); qctl.qc_cmd = LUSTRE_Q_QUOTAON; - qctl.qc_id = QFMT_LDISKFS; optind = 0; while ((c = getopt(argc, argv, "ugf")) != -1) { @@ -1466,7 +1484,8 @@ static int lfs_quotaon(int argc, char **argv) rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "%s %s ", obd_type, + obd_uuid2str(&qctl.obd_uuid)); fprintf(stderr, "%s failed: %s\n", argv[0], strerror(errno)); return rc; } @@ -1479,8 +1498,7 @@ static int lfs_quotaoff(int argc, char **argv) int c; char *mnt; struct if_quotactl qctl; - char *obd_type = qctl.obd_type; - char *obd_uuid = qctl.obd_uuid.uuid; + char *obd_type = (char *)qctl.obd_type; int rc; memset(&qctl, 0, sizeof(qctl)); @@ -1513,9 +1531,15 @@ static int lfs_quotaoff(int argc, char **argv) mnt = argv[optind]; rc = llapi_quotactl(mnt, &qctl); + if (rc == -1 && errno == ESRCH) { + fprintf(stderr, "\n%s quotas are not enabled.\n", + qctl.qc_type == 0x00 ? "user" : "group"); + return 0; + } if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "%s %s ", obd_type, + obd_uuid2str(&qctl.obd_uuid)); fprintf(stderr, "quotaoff failed: %s\n", strerror(errno)); return rc; } @@ -1523,6 +1547,54 @@ static int lfs_quotaoff(int argc, char **argv) return 0; } +static int lfs_quotainv(int argc, char **argv) +{ + int c; + char *mnt; + struct if_quotactl qctl; + int rc; + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_INVALIDATE; + + optind = 0; + while ((c = getopt(argc, argv, "ugf")) != -1) { + switch (c) { + case 'u': + qctl.qc_type |= 0x01; + break; + case 'g': + qctl.qc_type |= 0x02; + break; + case 'f': + qctl.qc_cmd = LUSTRE_Q_FINVALIDATE; + break; + default: + fprintf(stderr, "error: %s: option '-%c' " + "unrecognized\n", argv[0], c); + return CMD_HELP; + } + } + + if (qctl.qc_type) + qctl.qc_type--; + else /* by default, invalidate quota for both user & group */ + qctl.qc_type = 0x02; + + if (argc == optind) + return CMD_HELP; + + mnt = argv[optind]; + + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + fprintf(stderr, "quotainv failed: %s\n", strerror(errno)); + return rc; + } + + return 0; +} + #define ARG2INT(nr, str, msg) \ do { \ char *endp; \ @@ -1592,87 +1664,224 @@ error: return ULONG_MAX; } -int lfs_setquota(int argc, char **argv) +#define ARG2ULL(nr, str, defscale) \ +do { \ + unsigned long long limit, units = 0; \ + int rc; \ + \ + rc = parse_size(str, &limit, &units, 1); \ + if (rc < 0) { \ + fprintf(stderr, "error: bad limit value %s\n", str); \ + return CMD_HELP; \ + } \ + nr = ((units == 0) ? (defscale) : 1) * limit; \ +} while (0) + +static inline int has_times_option(int argc, char **argv) { - int c; - char *mnt; + int i; + + for (i = 1; i < argc; i++) + if (!strcmp(argv[i], "-t")) + return 1; + + return 0; +} + +int lfs_setquota_times(int argc, char **argv) +{ + int c, rc; struct if_quotactl qctl; - char *obd_type = qctl.obd_type; - char *obd_uuid = qctl.obd_uuid.uuid; - int rc; + char *mnt, *obd_type = (char *)qctl.obd_type; + struct obd_dqblk *dqb = &qctl.qc_dqblk; + struct obd_dqinfo *dqi = &qctl.qc_dqinfo; + struct option long_opts[] = { + {"user", no_argument, 0, 'u'}, + {"group", no_argument, 0, 'g'}, + {"block-grace", required_argument, 0, 'b'}, + {"inode-grace", required_argument, 0, 'i'}, + {"times", no_argument, 0, 't'}, + {0, 0, 0, 0} + }; memset(&qctl, 0, sizeof(qctl)); - qctl.qc_cmd = LUSTRE_Q_SETQUOTA; + qctl.qc_cmd = LUSTRE_Q_SETINFO; + qctl.qc_type = UGQUOTA; optind = 0; - while ((c = getopt(argc, argv, "ugt")) != -1) { + while ((c = getopt_long(argc, argv, "ugb:i:t", long_opts, NULL)) != -1) { switch (c) { case 'u': - qctl.qc_type |= 0x01; - break; case 'g': - qctl.qc_type |= 0x02; + if (qctl.qc_type != UGQUOTA) { + fprintf(stderr, "error: -u and -g can't be used " + "more than once\n"); + return CMD_HELP; + } + qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA; break; - case 't': - qctl.qc_cmd = LUSTRE_Q_SETINFO; + case 'b': + if ((dqi->dqi_bgrace = str2sec(optarg)) == ULONG_MAX) { + fprintf(stderr, "error: bad block-grace: %s\n", + optarg); + return CMD_HELP; + } + dqb->dqb_valid |= QIF_BTIME; break; - default: - fprintf(stderr, "error: %s: option '-%c' " - "unrecognized\n", argv[0], c); + case 'i': + if ((dqi->dqi_igrace = str2sec(optarg)) == ULONG_MAX) { + fprintf(stderr, "error: bad inode-grace: %s\n", + optarg); + return CMD_HELP; + } + dqb->dqb_valid |= QIF_ITIME; + break; + case 't': /* Yes, of course! */ + break; + default: /* getopt prints error message for us when opterr != 0 */ return CMD_HELP; } } - if (qctl.qc_type) - qctl.qc_type--; - if (qctl.qc_type == UGQUOTA) { - fprintf(stderr, "error: user and group quotas can't be set " - "both\n"); + fprintf(stderr, "error: neither -u nor -g specified\n"); return CMD_HELP; } - if (qctl.qc_cmd == LUSTRE_Q_SETQUOTA) { - struct obd_dqblk *dqb = &qctl.qc_dqblk; + if (optind != argc - 1) { + fprintf(stderr, "error: unexpected parameters encountered\n"); + return CMD_HELP; + } - if (optind + 6 != argc) - return CMD_HELP; + mnt = argv[optind]; + rc = llapi_quotactl(mnt, &qctl); + if (rc) { + if (*obd_type) + fprintf(stderr, "%s %s ", obd_type, + obd_uuid2str(&qctl.obd_uuid)); + fprintf(stderr, "setquota failed: %s\n", strerror(errno)); + return rc; + } - rc = name2id(&qctl.qc_id, argv[optind++], qctl.qc_type); - if (rc) { - fprintf(stderr, "error: find id for name %s failed: %s\n", - argv[optind - 1], strerror(errno)); - return CMD_HELP; - } + return 0; +} - ARG2INT(dqb->dqb_bsoftlimit, argv[optind++], "block-softlimit"); - ARG2INT(dqb->dqb_bhardlimit, argv[optind++], "block-hardlimit"); - ARG2INT(dqb->dqb_isoftlimit, argv[optind++], "inode-softlimit"); - ARG2INT(dqb->dqb_ihardlimit, argv[optind++], "inode-hardlimit"); +#define BSLIMIT (1 << 0) +#define BHLIMIT (1 << 1) +#define ISLIMIT (1 << 2) +#define IHLIMIT (1 << 3) - dqb->dqb_valid = QIF_LIMITS; - } else { - struct obd_dqinfo *dqi = &qctl.qc_dqinfo; +int lfs_setquota(int argc, char **argv) +{ + int c, rc; + struct if_quotactl qctl; + char *mnt, *obd_type = (char *)qctl.obd_type; + struct obd_dqblk *dqb = &qctl.qc_dqblk; + struct option long_opts[] = { + {"user", required_argument, 0, 'u'}, + {"group", required_argument, 0, 'g'}, + {"block-softlimit", required_argument, 0, 'b'}, + {"block-hardlimit", required_argument, 0, 'B'}, + {"inode-softlimit", required_argument, 0, 'i'}, + {"inode-hardlimit", required_argument, 0, 'I'}, + {0, 0, 0, 0} + }; + unsigned limit_mask = 0; - if (optind + 3 != argc) - return CMD_HELP; + if (has_times_option(argc, argv)) + return lfs_setquota_times(argc, argv); + + memset(&qctl, 0, sizeof(qctl)); + qctl.qc_cmd = LUSTRE_Q_SETQUOTA; + qctl.qc_type = UGQUOTA; /* UGQUOTA makes no sense for setquota, + * so it can be used as a marker that qc_type + * isn't reinitialized from command line */ - if ((dqi->dqi_bgrace = str2sec(argv[optind++])) == ULONG_MAX) { - fprintf(stderr, "error: bad %s: %s\n", "block-grace", argv[optind - 1]); + optind = 0; + while ((c = getopt_long(argc, argv, "u:g:b:B:i:I:", long_opts, NULL)) != -1) { + switch (c) { + case 'u': + case 'g': + if (qctl.qc_type != UGQUOTA) { + fprintf(stderr, "error: -u and -g can't be used" + " more than once\n"); + return CMD_HELP; + } + qctl.qc_type = (c == 'u') ? USRQUOTA : GRPQUOTA; + rc = name2id(&qctl.qc_id, optarg, + (qctl.qc_type == USRQUOTA) ? USER : GROUP); + if (rc) { + fprintf(stderr, "error: unknown id %s\n", + optarg); + return CMD_HELP; + } + break; + case 'b': + ARG2ULL(dqb->dqb_bsoftlimit, optarg, 1024); + dqb->dqb_bsoftlimit >>= 10; + limit_mask |= BSLIMIT; + break; + case 'B': + ARG2ULL(dqb->dqb_bhardlimit, optarg, 1024); + dqb->dqb_bhardlimit >>= 10; + limit_mask |= BHLIMIT; + break; + case 'i': + ARG2ULL(dqb->dqb_isoftlimit, optarg, 1); + limit_mask |= ISLIMIT; + break; + case 'I': + ARG2ULL(dqb->dqb_ihardlimit, optarg, 1); + limit_mask |= IHLIMIT; + break; + default: /* getopt prints error message for us when opterr != 0 */ return CMD_HELP; } - if ((dqi->dqi_igrace = str2sec(argv[optind++])) == ULONG_MAX) { - fprintf(stderr, "error: bad %s: %s\n", "inode-grace", argv[optind - 1]); + } + + if (qctl.qc_type == UGQUOTA) { + fprintf(stderr, "error: neither -u nor -g are specified\n"); + return CMD_HELP; + } + + if (optind != argc - 1) { + fprintf(stderr, "error: unexpected parameters encountered\n"); + return CMD_HELP; + } + + mnt = argv[optind]; + + if ((!(limit_mask & BHLIMIT) ^ !(limit_mask & BSLIMIT)) || + (!(limit_mask & IHLIMIT) ^ !(limit_mask & ISLIMIT))) { + /* sigh, we can't just set blimits/ilimits */ + struct if_quotactl tmp_qctl = {.qc_cmd = LUSTRE_Q_GETQUOTA, + .qc_type = qctl.qc_type, + .qc_id = qctl.qc_id}; + + rc = llapi_quotactl(mnt, &tmp_qctl); + if (rc < 0) { + fprintf(stderr, "error: getquota failed\n"); return CMD_HELP; } + + if (!(limit_mask & BHLIMIT)) + dqb->dqb_bhardlimit = tmp_qctl.qc_dqblk.dqb_bhardlimit; + if (!(limit_mask & BSLIMIT)) + dqb->dqb_bsoftlimit = tmp_qctl.qc_dqblk.dqb_bsoftlimit; + if (!(limit_mask & IHLIMIT)) + dqb->dqb_ihardlimit = tmp_qctl.qc_dqblk.dqb_ihardlimit; + if (!(limit_mask & ISLIMIT)) + dqb->dqb_isoftlimit = tmp_qctl.qc_dqblk.dqb_isoftlimit; } - mnt = argv[optind]; + dqb->dqb_valid |= (limit_mask & (BHLIMIT | BSLIMIT)) ? QIF_BLIMITS : 0; + dqb->dqb_valid |= (limit_mask & (IHLIMIT | ISLIMIT)) ? QIF_ILIMITS : 0; rc = llapi_quotactl(mnt, &qctl); if (rc) { if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, obd_uuid); + fprintf(stderr, "%s %s ", obd_type, + obd_uuid2str(&qctl.obd_uuid)); fprintf(stderr, "setquota failed: %s\n", strerror(errno)); return rc; } @@ -1741,7 +1950,7 @@ static void print_quota_title(char *name, struct if_quotactl *qctl) "files", "quota", "limit", "grace"); } -static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) +static void print_quota(char *mnt, struct if_quotactl *qctl) { time_t now; @@ -1752,10 +1961,10 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) struct obd_dqblk *dqb = &qctl->qc_dqblk; if (dqb->dqb_bhardlimit && - toqb(dqb->dqb_curspace) > dqb->dqb_bhardlimit) { + toqb(dqb->dqb_curspace) >= dqb->dqb_bhardlimit) { bover = 1; } else if (dqb->dqb_bsoftlimit && - toqb(dqb->dqb_curspace) > dqb->dqb_bsoftlimit) { + toqb(dqb->dqb_curspace) >= dqb->dqb_bsoftlimit) { if (dqb->dqb_btime > now) { bover = 2; } else { @@ -1764,10 +1973,10 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) } if (dqb->dqb_ihardlimit && - dqb->dqb_curinodes > dqb->dqb_ihardlimit) { + dqb->dqb_curinodes >= dqb->dqb_ihardlimit) { iover = 1; } else if (dqb->dqb_isoftlimit && - dqb->dqb_curinodes > dqb->dqb_isoftlimit) { + dqb->dqb_curinodes >= dqb->dqb_isoftlimit) { if (dqb->dqb_btime > now) { iover = 2; } else { @@ -1789,10 +1998,16 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) if (bover) diff2str(dqb->dqb_btime, timebuf, now); - - sprintf(numbuf[0], LPU64, toqb(dqb->dqb_curspace)); - sprintf(numbuf[1], LPU64, dqb->dqb_bsoftlimit); - sprintf(numbuf[2], LPU64, dqb->dqb_bhardlimit); + sprintf(numbuf[0], (dqb->dqb_valid & QIF_SPACE) ? + LPU64 : "["LPU64"]", toqb(dqb->dqb_curspace)); + if (qctl->qc_valid == QC_GENERAL) + sprintf(numbuf[1], (dqb->dqb_valid & QIF_BLIMITS) + ? LPU64 : "["LPU64"]", + dqb->dqb_bsoftlimit); + else + sprintf(numbuf[1], "%s", ""); + sprintf(numbuf[2], (dqb->dqb_valid & QIF_BLIMITS) + ? LPU64 : "["LPU64"]", dqb->dqb_bhardlimit); printf(" %7s%c %6s %7s %7s", numbuf[0], bover ? '*' : ' ', numbuf[1], numbuf[2], bover > 1 ? timebuf : ""); @@ -1800,10 +2015,17 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) if (iover) diff2str(dqb->dqb_itime, timebuf, now); - sprintf(numbuf[0], LPU64, dqb->dqb_curinodes); - sprintf(numbuf[1], LPU64, dqb->dqb_isoftlimit); - sprintf(numbuf[2], LPU64, dqb->dqb_ihardlimit); - if (!ost_only) + sprintf(numbuf[0], (dqb->dqb_valid & QIF_INODES) ? + LPU64 : "["LPU64"]", dqb->dqb_curinodes); + if (qctl->qc_valid == QC_GENERAL) + sprintf(numbuf[1], (dqb->dqb_valid & QIF_ILIMITS) + ? LPU64 : "["LPU64"]", + dqb->dqb_isoftlimit); + else + sprintf(numbuf[1], "%s", ""); + sprintf(numbuf[2], (dqb->dqb_valid & QIF_ILIMITS) ? + LPU64 : "["LPU64"]", dqb->dqb_ihardlimit); + if (qctl->qc_valid != QC_OSTIDX) printf(" %7s%c %6s %7s %7s", numbuf[0], iover ? '*' : ' ', numbuf[1], numbuf[2], iover > 1 ? timebuf : ""); @@ -1821,103 +2043,89 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only) } } -static void print_mds_quota(char *mnt, struct if_quotactl *qctl) +static int print_obd_quota(char *mnt, struct if_quotactl *qctl, int is_mdt) { - int rc; + int rc = 0, rc1 = 0, count = 0; + __u32 valid = qctl->qc_valid; - /* XXX: this is a flag to mark that only mds quota is wanted */ - qctl->qc_dqblk.dqb_valid = 1; - rc = llapi_quotactl(mnt, qctl); + rc = llapi_get_obd_count(mnt, &count, is_mdt); if (rc) { - fprintf(stderr, "quotactl failed: %s\n", strerror(errno)); - return; - } - qctl->qc_dqblk.dqb_valid = 0; - - print_quota(qctl->obd_uuid.uuid, qctl, 0); -} - -static void print_lov_quota(char *mnt, struct if_quotactl *qctl) -{ - DIR *dir; - struct obd_uuid *uuids = NULL, *uuidp; - int obdcount = 1024; - int i, rc; - - dir = opendir(mnt); - if (!dir) { - fprintf(stderr, "open %s failed: %s\n", mnt, strerror(errno)); - return; - } - - uuids = (struct obd_uuid *)malloc(INIT_ALLOC_NUM_OSTS * - sizeof(struct obd_uuid)); - if (uuids == NULL) - goto out; - -retry_get_uuids: - rc = llapi_lov_get_uuids(dirfd(dir), uuids, &obdcount); - if (rc != 0) { - struct obd_uuid *uuids_temp; - - if (rc == -EOVERFLOW) { - uuids_temp = realloc(uuids, obdcount * - sizeof(struct obd_uuid)); - if (uuids_temp != NULL) - goto retry_get_uuids; - else - rc = -ENOMEM; - } - - fprintf(stderr, "get ost uuid failed: %s\n", strerror(rc)); - goto out; + fprintf(stderr, "can not get %s count: %s\n", + is_mdt ? "mdt": "ost", strerror(errno)); + return rc; } - for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) { - memcpy(&qctl->obd_uuid, uuidp, sizeof(*uuidp)); - - /* XXX clear this flag to get quota from osts */ - qctl->qc_dqblk.dqb_valid = 0; + for (qctl->qc_idx = 0; qctl->qc_idx < count; qctl->qc_idx++) { + qctl->qc_valid = is_mdt ? QC_MDTIDX : QC_OSTIDX; rc = llapi_quotactl(mnt, qctl); if (rc) { - fprintf(stderr, "%s quotactl failed: %s\n", - uuidp->uuid, strerror(errno)); + /* It is remote client case. */ + if (errno == EOPNOTSUPP) { + rc = 0; + goto out; + } + + if (!rc1) + rc1 = rc; + fprintf(stderr, "quotactl %s%d failed.\n", + is_mdt ? "mdt": "ost", qctl->qc_idx); continue; } - print_quota(uuidp->uuid, qctl, 1); + print_quota(obd_uuid2str(&qctl->obd_uuid), qctl); } out: - closedir(dir); - return; + qctl->qc_valid = valid; + return rc ? : rc1; } static int lfs_quota(int argc, char **argv) { int c; - char *name = NULL, *mnt; + char *mnt, *name = NULL; struct if_quotactl qctl = { .qc_cmd = LUSTRE_Q_GETQUOTA, - .qc_type = 0x01 }; - char *obd_type = qctl.obd_type; - char *obd_uuid = qctl.obd_uuid.uuid; - int rc; + .qc_type = UGQUOTA }; + char *obd_type = (char *)qctl.obd_type; + char *obd_uuid = (char *)qctl.obd_uuid.uuid; + int rc, rc1 = 0, rc2 = 0, rc3 = 0, verbose = 0, pass = 0; + __u32 valid = QC_GENERAL, idx = 0; optind = 0; - while ((c = getopt(argc, argv, "ugto:")) != -1) { + while ((c = getopt(argc, argv, "ugto:i:I:v")) != -1) { switch (c) { case 'u': - qctl.qc_type = 0x01; + if (qctl.qc_type != UGQUOTA) { + fprintf(stderr, "error: use either -u or -g\n"); + return CMD_HELP; + } + qctl.qc_type = USRQUOTA; break; case 'g': - qctl.qc_type = 0x02; + if (qctl.qc_type != UGQUOTA) { + fprintf(stderr, "error: use either -u or -g\n"); + return CMD_HELP; + } + qctl.qc_type = GRPQUOTA; break; case 't': qctl.qc_cmd = LUSTRE_Q_GETINFO; break; case 'o': + valid = qctl.qc_valid = QC_UUID; strncpy(obd_uuid, optarg, sizeof(qctl.obd_uuid)); break; + case 'i': + valid = qctl.qc_valid = QC_MDTIDX; + idx = qctl.qc_idx = atoi(optarg); + break; + case 'I': + valid = qctl.qc_valid = QC_OSTIDX; + idx = qctl.qc_idx = atoi(optarg); + break; + case 'v': + verbose = 1; + break; default: fprintf(stderr, "error: %s: option '-%c' " "unrecognized\n", argv[0], c); @@ -1925,57 +2133,80 @@ static int lfs_quota(int argc, char **argv) } } - if (qctl.qc_type) - qctl.qc_type--; - - - if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) { + /* current uid/gid info for "lfs quota /path/to/lustre/mount" */ + if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA && qctl.qc_type == UGQUOTA && + optind == argc - 1) { +ug_output: + memset(&qctl, 0, sizeof(qctl)); /* spoiled by print_*_quota */ + qctl.qc_cmd = LUSTRE_Q_GETQUOTA; + qctl.qc_valid = valid; + qctl.qc_idx = idx; + if (pass++ == 0) { + qctl.qc_type = USRQUOTA; + qctl.qc_id = geteuid(); + } else { + qctl.qc_type = GRPQUOTA; + qctl.qc_id = getegid(); + } + rc = id2name(&name, qctl.qc_id, + (qctl.qc_type == USRQUOTA) ? USER : GROUP); + if (rc) + name = ""; + } else if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) { if (optind + 2 != argc) { fprintf(stderr, "error: missing quota argument(s)\n"); return CMD_HELP; } name = argv[optind++]; - rc = name2id(&qctl.qc_id, name, qctl.qc_type); + rc = name2id(&qctl.qc_id, name, + (qctl.qc_type == USRQUOTA) ? USER : GROUP); if (rc) { fprintf(stderr,"error: can't find id for name %s: %s\n", name, strerror(errno)); return CMD_HELP; } - print_quota_title(name, &qctl); } else if (optind + 1 != argc) { fprintf(stderr, "error: missing quota info argument(s)\n"); return CMD_HELP; } + if (qctl.qc_cmd == LUSTRE_Q_GETQUOTA) + print_quota_title(name, &qctl); + mnt = argv[optind]; - rc = llapi_quotactl(mnt, &qctl); - if (rc) { - if (*obd_type) - fprintf(stderr, "%s %s ", obd_type, obd_uuid); - fprintf(stderr, "quota failed: %s\n", strerror(errno)); - return rc; + rc1 = llapi_quotactl(mnt, &qctl); + if (rc1 == -1 && errno == ESRCH) { + fprintf(stderr, "\n%s quotas are not enabled.\n", + qctl.qc_type == USRQUOTA ? "user" : "group"); + goto out; } + if (rc1 && *obd_type) + fprintf(stderr, "%s %s ", obd_type, obd_uuid); - if (!name) - rc = id2name(&name, getuid(), qctl.qc_type); + if (qctl.qc_valid != QC_GENERAL) + mnt = obd_uuid2str(&qctl.obd_uuid); - if (*obd_uuid) { - mnt = ""; - name = obd_uuid; + print_quota(mnt, &qctl); + + if (qctl.qc_valid == QC_GENERAL && qctl.qc_cmd != LUSTRE_Q_GETINFO && verbose) { + rc2 = print_obd_quota(mnt, &qctl, 1); + rc3 = print_obd_quota(mnt, &qctl, 0); } - print_quota(mnt, &qctl, 0); + if (rc1 || rc2 || rc3) + printf("Some errors happened when getting quota info. " + "Some devices may be not working or deactivated. " + "The data in \"[]\" is inaccurate.\n"); - if (!*obd_uuid && qctl.qc_cmd != LUSTRE_Q_GETINFO) { - print_mds_quota(mnt, &qctl); - print_lov_quota(mnt, &qctl); - } +out: + if (pass == 1) + goto ug_output; return 0; } -#endif /* HAVE_QUOTA_SUPPORT */ +#endif /* HAVE_SYS_QUOTA_H! */ static int flushctx_ioctl(char *mp) { diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 9aad868..3856947 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -160,18 +160,25 @@ void llapi_printf(int level, char *fmt, ...) va_end(args); } +/** + * size_units is unchanged if no specifier used + */ int parse_size(char *optarg, unsigned long long *size, - unsigned long long *size_units) + unsigned long long *size_units, int bytes_spec) { char *end; - *size = strtoul(optarg, &end, 0); + *size = strtoull(optarg, &end, 0); if (*end != '\0') { if ((*end == 'b') && *(end+1) == '\0' && - (*size & (~0ULL << (64 - 9))) == 0) { + (*size & (~0ULL << (64 - 9))) == 0 && + !bytes_spec) { *size <<= 9; *size_units = 1 << 9; + } else if ((*end == 'b') && *(end+1) == '\0' && + bytes_spec) { + *size_units = 1; } else if ((*end == 'k' || *end == 'K') && *(end+1) == '\0' && (*size & (~0ULL << (64 - 10))) == 0) { @@ -651,6 +658,24 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count) return rc; } +int llapi_get_obd_count(char *mnt, int *count, int is_mdt) +{ + DIR *root; + int rc; + + root = opendir(mnt); + if (!root) { + llapi_err(LLAPI_MSG_ERROR, "open %s failed", mnt); + return -1; + } + + *count = is_mdt; + rc = ioctl(dirfd(root), LL_IOC_GETOBDCOUNT, count); + + closedir(root); + return rc; +} + /* Here, param->obduuid points to a single obduuid, the index of which is * returned in param->obdindex */ static int setup_obd_uuid(DIR *dir, char *dname, struct find_param *param) @@ -1201,7 +1226,7 @@ err: * @mds indicates if this is MDS timestamps and there are attributes on OSTs. * * The result is -1 if it does not match, 0 if not yet clear, 1 if matches. - * The table bolow gives the answers for the specified parameters (value and + * The table below gives the answers for the specified parameters (value and * sign), 1st column is the answer for the MDS value, the 2nd is for the OST: * -------------------------------------- * 1 | file > limit; sign > 0 | -1 / -1 | diff --git a/lustre/utils/lmc b/lustre/utils/lmc index d106c64..861e318 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -222,9 +222,9 @@ lmc_options = [ ('quota', """ quotaon: enable quota, only u|g|ug is supported now. iunit: the unit for slave to acquire/release inode quota from/to master. - Int type (>0), default value in Lustre is 5000 inodes. + Int type (>0), default value in Lustre is 5120 inodes. bunit: the unit for slave to acquire/release block quota from/to master. - Mbytes (>0), default value in Lustre is 100(Mbytes). + Mbytes (>0), default value in Lustre is 128(Mbytes). itune: used to tune the threthold. When inode quota usage reach the threthold, slave should acquire/release inode quota from/to master. Int type (100 > btune > 0), default value in Lustre is 50 (percentge). diff --git a/lustre/utils/req-layout.c b/lustre/utils/req-layout.c index dc366bd..2611093 100644 --- a/lustre/utils/req-layout.c +++ b/lustre/utils/req-layout.c @@ -69,6 +69,7 @@ #define lustre_swab_llog_hdr NULL #define lustre_swab_llogd_body NULL #define lustre_swab_obd_quotactl NULL +#define lustre_swab_quota_adjust_qunit NULL #define lustre_swab_mgs_target_info NULL #define lustre_swab_niobuf_remote NULL #define lustre_swab_obd_ioobj NULL diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 4ca8ec5..55d026b 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -206,8 +206,8 @@ static void check_obd_connect_data(void) CHECK_CDEFINE(OBD_CONNECT_JOIN); CHECK_CDEFINE(OBD_CONNECT_ATTRFID); CHECK_CDEFINE(OBD_CONNECT_NODEVOH); - CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT); CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT); + CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT_FORCE); CHECK_CDEFINE(OBD_CONNECT_BRW_SIZE); CHECK_CDEFINE(OBD_CONNECT_QUOTA64); CHECK_CDEFINE(OBD_CONNECT_MDS_CAPA); @@ -1051,17 +1051,8 @@ check_qunit_data(void) CHECK_MEMBER(qunit_data, qd_id); CHECK_MEMBER(qunit_data, qd_flags); CHECK_MEMBER(qunit_data, qd_count); -} - -static void -check_qunit_data_old(void) -{ - BLANK_LINE(); - CHECK_STRUCT(qunit_data_old); - CHECK_MEMBER(qunit_data_old, qd_id); - CHECK_MEMBER(qunit_data_old, qd_type); - CHECK_MEMBER(qunit_data_old, qd_count); - CHECK_MEMBER(qunit_data_old, qd_isblk); + CHECK_MEMBER(qunit_data, qd_qunit); + CHECK_MEMBER(qunit_data, padding); } static void @@ -1122,6 +1113,18 @@ check_posix_acl_xattr_header(void) } static void +check_quota_adjust_qunit(void) +{ + BLANK_LINE(); + CHECK_STRUCT(quota_adjust_qunit); + CHECK_MEMBER(quota_adjust_qunit, qaq_flags); + CHECK_MEMBER(quota_adjust_qunit, qaq_id); + CHECK_MEMBER(quota_adjust_qunit, qaq_bunit_sz); + CHECK_MEMBER(quota_adjust_qunit, qaq_iunit_sz); + CHECK_MEMBER(quota_adjust_qunit, padding1); +} + +static void check_ll_user_fiemap(void) { BLANK_LINE(); @@ -1269,6 +1272,7 @@ main(int argc, char **argv) CHECK_VALUE(OST_SYNC); CHECK_VALUE(OST_QUOTACHECK); CHECK_VALUE(OST_QUOTACTL); + CHECK_VALUE(OST_QUOTA_ADJUST_QUNIT); CHECK_VALUE(OST_LAST_OPC); CHECK_DEFINE(OBD_OBJECT_EOF); @@ -1411,7 +1415,7 @@ main(int argc, char **argv) check_llog_array_rec(); check_mds_extent_desc(); check_qunit_data(); - check_qunit_data_old(); + check_quota_adjust_qunit(); check_mgs_target_info(); check_lustre_disk_data(); check_ll_user_fiemap(); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index e56dd28..02e5746 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -126,7 +126,9 @@ void lustre_assert_wire_constants(void) (long long)OST_QUOTACHECK); LASSERTF(OST_QUOTACTL == 19, " found %lld\n", (long long)OST_QUOTACTL); - LASSERTF(OST_LAST_OPC == 20, " found %lld\n", + LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n", + (long long)OST_QUOTA_ADJUST_QUNIT); + LASSERTF(OST_LAST_OPC == 21, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -250,9 +252,9 @@ void lustre_assert_wire_constants(void) (long long)OBD_QC_CALLBACK); LASSERTF(OBD_LAST_OPC == 403, " found %lld\n", (long long)OBD_LAST_OPC); - LASSERTF(QUOTA_DQACQ == 601, " found %lld\n", + LASSERTF(QUOTA_DQACQ == 901, " found %lld\n", (long long)QUOTA_DQACQ); - LASSERTF(QUOTA_DQREL == 602, " found %lld\n", + LASSERTF(QUOTA_DQREL == 902, " found %lld\n", (long long)QUOTA_DQREL); LASSERTF(MGS_CONNECT == 250, " found %lld\n", (long long)MGS_CONNECT); @@ -458,8 +460,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_JOIN == 0x00002000ULL); CLASSERT(OBD_CONNECT_ATTRFID == 0x00004000ULL); CLASSERT(OBD_CONNECT_NODEVOH == 0x00008000ULL); - CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x00010000ULL); - CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00020000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x00010000ULL); + CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x00020000ULL); CLASSERT(OBD_CONNECT_BRW_SIZE == 0x00040000ULL); CLASSERT(OBD_CONNECT_QUOTA64 == 0x00080000ULL); CLASSERT(OBD_CONNECT_MDS_CAPA == 0x00100000ULL); @@ -2142,7 +2144,7 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); /* Checks for struct qunit_data */ - LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", + LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n", (long long)(int)sizeof(struct qunit_data)); LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n", (long long)(int)offsetof(struct qunit_data, qd_id)); @@ -2156,26 +2158,38 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct qunit_data, qd_count)); LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n", (long long)(int)sizeof(((struct qunit_data *)0)->qd_count)); - - /* Checks for struct qunit_data_old */ - LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n", - (long long)(int)sizeof(struct qunit_data_old)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_id) == 0, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_id)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_id) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_id)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_type) == 4, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_type)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_type) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_type)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_count) == 8, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_count)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_count) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_count)); - LASSERTF((int)offsetof(struct qunit_data_old, qd_isblk) == 12, " found %lld\n", - (long long)(int)offsetof(struct qunit_data_old, qd_isblk)); - LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n", - (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk)); + LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_qunit)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit)); + LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, padding)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->padding)); + + /* Checks for struct quota_adjust_qunit */ + LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n", + (long long)(int)sizeof(struct quota_adjust_qunit)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, padding1) == 24, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, padding1)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->padding1) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->padding1)); /* Checks for struct mgs_target_info */ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n", -- 1.8.3.1