From c18904035ebd5dfaa86b192a22fa8d5942a5bf4b Mon Sep 17 00:00:00 2001 From: tianzy Date: Fri, 18 Jan 2008 05:59:09 +0000 Subject: [PATCH] b=10600 r=adilger r=johann Branch b1_6 qunit size will be changed when quota limitation is too low/high; record the pending quota write in order to get more accureate quota; delete the patch for bug12588, which is unnecessary when this patch is landed. This bug also contains 14526, 14299, 14601 and 13794, which are found and landed during v1_4_12. --- lustre/ChangeLog | 11 + lustre/include/class_hash.h | 8 + lustre/include/lustre/lustre_idl.h | 96 ++++++- lustre/include/lustre_quota.h | 261 +++++++++++------ lustre/include/obd.h | 4 + lustre/include/obd_class.h | 13 + lustre/include/obd_support.h | 12 + lustre/ldlm/ldlm_lib.c | 70 ++--- lustre/mds/lproc_mds.c | 173 +++++++++++- lustre/mds/mds_lov.c | 3 +- lustre/mds/mds_open.c | 20 +- lustre/mds/mds_reint.c | 21 +- lustre/obdclass/class_hash.c | 111 +++++++- lustre/obdclass/lprocfs_status.c | 158 +++++++++++ lustre/obdfilter/filter_io_26.c | 23 +- lustre/obdfilter/lproc_obdfilter.c | 20 +- lustre/osc/osc_request.c | 3 +- lustre/ost/ost_handler.c | 27 ++ lustre/ptlrpc/lproc_ptlrpc.c | 1 + lustre/ptlrpc/pack_generic.c | 276 +++++++++++++++++- lustre/ptlrpc/ptlrpc_module.c | 10 + lustre/ptlrpc/wiretest.c | 50 +++- lustre/quota/Makefile.in | 2 +- lustre/quota/autoMakefile.am | 2 +- lustre/quota/quota_adjust_qunit.c | 402 ++++++++++++++++++++++++++ lustre/quota/quota_check.c | 4 +- lustre/quota/quota_context.c | 426 ++++++++++++++++++---------- lustre/quota/quota_ctl.c | 9 +- lustre/quota/quota_interface.c | 401 ++++++++++++++++++-------- lustre/quota/quota_internal.h | 72 ++++- lustre/quota/quota_master.c | 365 ++++++++++++++++++++++-- lustre/tests/sanity-quota.sh | 557 +++++++++++++++++++++++-------------- lustre/utils/wirecheck.c | 26 ++ lustre/utils/wiretest.c | 50 +++- 34 files changed, 2984 insertions(+), 703 deletions(-) create mode 100644 lustre/quota/quota_adjust_qunit.c diff --git a/lustre/ChangeLog b/lustre/ChangeLog index bd061ea..9e3f552 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -226,6 +226,17 @@ Details : Before packing join_file req, all the related reference should be checked carefully in case some malformed flags cause fake join_file req on client. +Severity : normal +Frequency : always +Bugzilla : 10600 +Description: shrink/enlarge qunit size when needed; fix the problem of coarse + grain of quota doing harm to quota's accuracy +Details : qunit size will be changed when quota limitation is too low/high; + record the pending quota write in order to get more accureate + quota; delete the patch for bug12588, which is unnecessary when + this patch is landed. This bug also contains 14526, 14299, 14601 + and 13794, which are found and landed during v1_4_12. + -------------------------------------------------------------------------------- 2007-12-07 Cluster File Systems, Inc. diff --git a/lustre/include/class_hash.h b/lustre/include/class_hash.h index 1be8c52..f27a046 100644 --- a/lustre/include/class_hash.h +++ b/lustre/include/class_hash.h @@ -135,4 +135,12 @@ void* nidstats_refcount_get(struct hlist_node * actual_hnode); void nidstats_refcount_put(struct hlist_node * actual_hnode); extern struct lustre_hash_operations nid_stat_hash_operations; +#ifdef __KERNEL__ +/* ( lqs <-> qctxt ) hash operations define b=10600 */ +__u32 lqs_hashfn(struct lustre_class_hash_body *hash_body, void * key); +int lqs_hash_key_compare(void *key, struct hlist_node * compared_hnode); +void * lqs_refcount_get(struct hlist_node * actual_hnode); +void lqs_refcount_put(struct hlist_node * actual_hnode); +#endif + #endif /* __CLASS_HASH_H */ diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index ffa440f..6447e68 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -64,6 +64,7 @@ /* Defn's shared with user-space. */ #include +#include /* * this file contains all data structures used in Lustre interfaces: @@ -292,6 +293,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /*Lru resize feature. */ #define OBD_CONNECT_MDS_MDS 0x04000000ULL /*MDS-MDS connection */ #define OBD_CONNECT_REAL 0x08000000ULL /*real connection */ +#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*shrink/enlarge qunit size + *b=10600 */ #define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ @@ -313,7 +316,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \ OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \ - LRU_RESIZE_CONNECT_FLAG) + LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CHANGE_QS) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT) @@ -373,6 +376,7 @@ typedef enum { OST_SET_INFO = 17, OST_QUOTACHECK = 18, OST_QUOTACTL = 19, + OST_QUOTA_ADJUST_QUNIT = 20, OST_LAST_OPC } ost_cmd_t; #define OST_FIRST_OPC OST_REPLY @@ -819,6 +823,34 @@ struct obd_quotactl { extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); +struct quota_adjust_qunit { + __u32 qaq_flags; + __u32 qaq_id; + __u64 qaq_bunit_sz; + __u64 qaq_iunit_sz; + __u64 padding1; +}; +extern void lustre_swab_quota_adjust_qunit(struct quota_adjust_qunit *q); + +/* flags in qunit_data and quota_adjust_qunit will use macroes below */ +#define LQUOTA_FLAGS_GRP 1UL /* 0 is user, 1 is group */ +#define LQUOTA_FLAGS_BLK 2UL /* 0 is inode, 1 is block */ +#define LQUOTA_FLAGS_ADJBLK 4UL /* adjust the block qunit size */ +#define LQUOTA_FLAGS_ADJINO 8UL /* adjust the inode qunit size */ +#define LQUOTA_FLAGS_CHG_QS 16UL /* indicate whether it has capability of + * OBD_CONNECT_CHANGE_QS */ + +/* the status of lqs_flags in struct lustre_qunit_size */ +#define LQUOTA_QUNIT_FLAGS (LQUOTA_FLAGS_GRP | LQUOTA_FLAGS_BLK) + +#define QAQ_IS_GRP(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_GRP) +#define QAQ_IS_ADJBLK(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJBLK) +#define QAQ_IS_ADJINO(qaq) ((qaq)->qaq_flags & LQUOTA_FLAGS_ADJINO) + +#define QAQ_SET_GRP(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_GRP) +#define QAQ_SET_ADJBLK(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJBLK) +#define QAQ_SET_ADJINO(qaq) ((qaq)->qaq_flags |= LQUOTA_FLAGS_ADJINO) + struct mds_rec_setattr { __u32 sa_opcode; __u32 sa_fsuid; @@ -1462,28 +1494,69 @@ extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct lustre_cfg; extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); -/* quota. fixed by tianzy for bug10707 */ -#define QUOTA_IS_GRP 0X1UL /* 0 is user, 1 is group. Used by qd_flags*/ -#define QUOTA_IS_BLOCK 0x2UL /* 0 is inode, 1 is block. Used by qd_flags*/ - +/* this will be used when OBD_CONNECT_CHANGE_QS is set */ struct qunit_data { + __u32 qd_id; /* ID appiles to (uid, gid) */ + __u32 qd_flags; /* LQUOTA_FLAGS_* affect the responding bits */ + __u64 qd_count; /* acquire/release count (bytes for block quota) */ + __u64 qd_qunit; /* when a master returns the reply to a slave, it will + * contain the current corresponding qunit size */ + __u64 padding; +}; + +#define QDATA_IS_GRP(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_GRP) +#define QDATA_IS_BLK(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_BLK) +#define QDATA_IS_ADJBLK(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJBLK) +#define QDATA_IS_ADJINO(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_ADJINO) +#define QDATA_IS_CHANGE_QS(qdata) ((qdata)->qd_flags & LQUOTA_FLAGS_CHG_QS) + +#define QDATA_SET_GRP(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_GRP) +#define QDATA_SET_BLK(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_BLK) +#define QDATA_SET_ADJBLK(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJBLK) +#define QDATA_SET_ADJINO(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_ADJINO) +#define QDATA_SET_CHANGE_QS(qdata) ((qdata)->qd_flags |= LQUOTA_FLAGS_CHG_QS) + +#define QDATA_CLR_GRP(qdata) ((qdata)->qd_flags &= ~LQUOTA_FLAGS_GRP) +#define QDATA_CLR_CHANGE_QS(qdata) ((qdata)->qd_flags &= ~LQUOTA_FLAGS_CHG_QS) + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +/* this will be used when OBD_CONNECT_QUOTA64 is set */ +struct qunit_data_old2 { __u32 qd_id; /* ID appiles to (uid, gid) */ __u32 qd_flags; /* Quota type (USRQUOTA, GRPQUOTA) occupy one bit; * Block quota or file quota occupy one bit */ __u64 qd_count; /* acquire/release count (bytes for block quota) */ }; +#else +#warning "remove quota code above for format absolete in new release" +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) struct qunit_data_old { __u32 qd_id; /* ID appiles to (uid, gid) */ __u32 qd_type; /* Quota type (USRQUOTA, GRPQUOTA) */ __u32 qd_count; /* acquire/release count (bytes for block quota) */ __u32 qd_isblk; /* Block quota or file quota */ }; +#else +#warning "remove quota code above for format absolete in new release" +#endif extern void lustre_swab_qdata(struct qunit_data *d); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) extern void lustre_swab_qdata_old(struct qunit_data_old *d); -extern struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d); -extern struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d); +#else +#warning "remove quota code above for format absolete in new release" +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +extern void lustre_swab_qdata_old2(struct qunit_data_old2 *d); +#else +#warning "remove quota code above for format absolete in new release" +#endif +extern int quota_get_qdata(void*req, struct qunit_data *qdata, + int is_req, int is_exp); +extern int quota_copy_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp); typedef enum { QUOTA_DQACQ = 601, @@ -1492,10 +1565,17 @@ typedef enum { #define JOIN_FILE_ALIGN 4096 +#define QUOTA_REQUEST 1 +#define QUOTA_REPLY 0 +#define QUOTA_EXPORT 1 +#define QUOTA_IMPORT 0 + /* quota check function */ #define QUOTA_RET_OK 0 /* return successfully */ #define QUOTA_RET_NOQUOTA 1 /* not support quota */ #define QUOTA_RET_NOLIMIT 2 /* quota limit isn't set */ -#define QUOTA_RET_ACQUOTA 3 /* need to acquire extra quota */ +#define QUOTA_RET_ACQUOTA 4 /* need to acquire extra quota */ +#define QUOTA_RET_INC_PENDING 8 /* pending value is increased */ +extern int quota_get_qunit_data_size(__u64 flag); #endif diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h index 2e0acb8..eb8b227 100644 --- a/lustre/include/lustre_quota.h +++ b/lustre/include/lustre_quota.h @@ -17,6 +17,7 @@ #include #include #include +#include struct obd_device; struct client_obd; @@ -69,7 +70,7 @@ struct lustre_dquot { int dq_refcnt; /* Pointer of quota info it belongs to */ struct lustre_quota_info *dq_info; - + loff_t dq_off; /* Offset of dquot on disk */ unsigned int dq_id; /* ID this applies to (uid, gid) */ int dq_type; /* Type fo quota (USRQUOTA, GRPQUOUTA) */ @@ -99,7 +100,7 @@ int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); int lustre_read_dquot(struct lustre_dquot *dquot); int lustre_commit_dquot(struct lustre_dquot *dquot); int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_get_qids(struct file *file, struct inode *inode, int type, +int lustre_get_qids(struct file *file, struct inode *inode, int type, struct list_head *list); int lustre_quota_convert(struct lustre_quota_info *lqi, int type); #else @@ -150,23 +151,97 @@ typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd, struct lustre_quota_ctxt { struct super_block *lqc_sb; /* superblock this applies to */ struct obd_import *lqc_import; /* import used to send dqacq/dqrel RPC */ - dqacq_handler_t lqc_handler; /* dqacq/dqrel RPC handler, only for quota master */ - unsigned long lqc_recovery:1, /* Doing recovery */ + dqacq_handler_t lqc_handler; /* dqacq/dqrel RPC handler, only for quota master */ + unsigned long lqc_recovery:1, /* Doing recovery */ lqc_atype:2, /* Turn on user/group quota at setup automatically, * 0: none, 1: user quota, 2: group quota, 3: both */ - lqc_status:1; /* Quota status. 0:Off, 1:On */ - spinlock_t lqc_lock; /* guard lqc_imp_valid now */ - unsigned long lqc_iunit_sz; /* Unit size of file quota */ - unsigned long lqc_itune_sz; /* Trigger dqacq when available file quota less than - * this value, trigger dqrel when available file quota + lqc_status:1, /* Quota status. 0:Off, 1:On */ + lqc_switch_qs:1; /* the function of change qunit size + * 0:Off, 1:On */ + unsigned long lqc_iunit_sz; /* original unit size of file quota and + * upper limitation for adjust file + * qunit */ + unsigned long lqc_itune_sz; /* Trigger dqacq when available file + * quota less than this value, trigger + * dqrel when available file quota * more than this value + 1 iunit */ - unsigned long lqc_bunit_sz; /* Unit size of block quota */ + unsigned long lqc_bunit_sz; /* original unit size of block quota and + * upper limitation for adjust block + * qunit */ unsigned long lqc_btune_sz; /* See comment of lqc_itune_sz */ - unsigned long lqc_limit_sz; /* When remaining quota on ost is less - * than this value, ost will request - * quota from mds */ + struct lustre_class_hash_body *lqc_lqs_hash_body; + /* all lustre_qunit_size structure in + * it */ + /* the values below are relative to how master change its qunit sizes */ + unsigned long lqc_cqs_boundary_factor; /* this affects the boundary of + * shrinking and enlarging qunit + * size. default=4 */ + unsigned long lqc_cqs_least_bunit; /* the least value of block qunit */ + unsigned long lqc_cqs_least_iunit; /* the least value of inode qunit */ + unsigned long lqc_cqs_qs_factor; /* when enlarging, qunit size will + * mutilple it; when shrinking, + * qunit size will divide it */ + int lqc_switch_seconds; /* avoid ping-pong effect of + * adjusting qunit size. How many + * seconds must be waited between + * enlarging and shinking qunit */ + spinlock_t lqc_lock; /* guard lqc_imp_valid now */ }; +#define LQC_HASH_BODY(qctxt) (qctxt->lqc_lqs_hash_body) + +struct lustre_qunit_size { + struct hlist_node lqs_hash; /* the hash entry */ + unsigned int lqs_id; /* id of user/group */ + unsigned long lqs_flags; /* is user/group; FULLBUF or LESSBUF */ + unsigned long lqs_iunit_sz; /* Unit size of file quota currently */ + unsigned long lqs_itune_sz; /* Trigger dqacq when available file quota + * less than this value, trigger dqrel + * when more than this value + 1 iunit */ + unsigned long lqs_bunit_sz; /* Unit size of block quota currently */ + unsigned long lqs_btune_sz; /* See comment of lqs itune sz */ + unsigned long lqs_bwrite_pending; /* the blocks reached ost and don't + * finish */ + unsigned long lqs_iwrite_pending; /* the inodes reached mds and don't + * finish */ + long long lqs_ino_rec; /* when inodes are allocated/released, + * this value will record it */ + long long lqs_blk_rec; /* when blocks are allocated/released, + * this value will record it */ + atomic_t lqs_refcount; + cfs_time_t lqs_last_bshrink; /* time of last block shrink */ + cfs_time_t lqs_last_ishrink; /* time of last inode shrink */ + spinlock_t lqs_lock; +}; + +#define LQS_IS_GRP(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_GRP) +#define LQS_IS_ADJBLK(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJBLK) +#define LQS_IS_ADJINO(lqs) ((lqs)->lqs_flags & LQUOTA_FLAGS_ADJINO) + +#define LQS_SET_GRP(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_GRP) +#define LQS_SET_ADJBLK(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJBLK) +#define LQS_SET_ADJINO(lqs) ((lqs)->lqs_flags |= LQUOTA_FLAGS_ADJINO) + +static inline void lqs_getref(struct lustre_qunit_size *lqs) +{ + atomic_inc(&lqs->lqs_refcount); +} + +static inline void lqs_putref(struct lustre_qunit_size *lqs) +{ + if (atomic_dec_and_test(&lqs->lqs_refcount)) { + spin_lock(&lqs->lqs_lock); + hlist_del_init(&lqs->lqs_hash); + spin_unlock(&lqs->lqs_lock); + OBD_FREE_PTR(lqs); + } +} + +static inline void lqs_initref(struct lustre_qunit_size *lqs) +{ + atomic_set(&lqs->lqs_refcount, 0); +} + #else struct lustre_quota_info { @@ -200,6 +275,9 @@ struct quotacheck_thread_args { atomic_t *qta_sem; /* obt_quotachecking */ }; +typedef int (*quota_acquire)(struct obd_device *obd, + unsigned int uid, unsigned int gid); + typedef struct { int (*quota_init) (void); int (*quota_exit) (void); @@ -210,45 +288,55 @@ typedef struct { int (*quota_ctl) (struct obd_export *, struct obd_quotactl *); int (*quota_check) (struct obd_export *, struct obd_quotactl *); int (*quota_recovery) (struct obd_device *); - + /* For quota master/slave, adjust quota limit after fs operation */ - int (*quota_adjust) (struct obd_device *, unsigned int[], - unsigned int[], int, int); - + int (*quota_adjust) (struct obd_device *, unsigned int[], + unsigned int[], int, int); + /* For quota slave, set import, trigger quota recovery */ int (*quota_setinfo) (struct obd_export *, struct obd_device *); - + /* For quota slave, clear import when relative import is invalid */ int (*quota_clearinfo) (struct obd_export *, struct obd_device *); - + /* For quota slave, set proper thread resoure capability */ int (*quota_enforce) (struct obd_device *, unsigned int); - + /* For quota slave, check whether specified uid/gid is over quota */ int (*quota_getflag) (struct obd_device *, struct obdo *); - + /* For quota slave, acquire/release quota from master if needed */ int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int); /* For quota slave, check whether specified uid/gid's remaining quota - * can finish a write rpc */ + * can finish a block_write or inode_create rpc. It updates the pending + * record of block and inode, acquires quota if necessary */ int (*quota_chkquota) (struct obd_device *, unsigned int, unsigned int, - int); - + int, int *, quota_acquire); + /* For quota client, poll if the quota check done */ int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *); - + /* For quota client, check whether specified uid/gid is over quota */ int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int); - + + /* For quota client, the actions after the pending write is committed */ + int (*quota_pending_commit) (struct obd_device *, unsigned int, + unsigned int, int); + /* For quota client, set over quota flag for specifed uid/gid */ int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int, obd_flag, obd_flag); + + /* For adjusting qunit size b=10600 */ + int (*quota_adjust_qunit) (struct obd_export *exp, struct + quota_adjust_qunit *oqaq); + } quota_interface_t; #define Q_COPY(out, in, member) (out)->member = (in)->member -#define QUOTA_OP(interface, op) interface->quota_ ## op +#define QUOTA_OP(interface, op) interface->quota_ ## op #define QUOTA_CHECK_OP(interface, op) \ do { \ @@ -264,17 +352,17 @@ static inline int lquota_init(quota_interface_t *interface) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, init); rc = QUOTA_OP(interface, init)(); RETURN(rc); } -static inline int lquota_exit(quota_interface_t *interface) +static inline int lquota_exit(quota_interface_t *interface) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, exit); rc = QUOTA_OP(interface, exit)(); RETURN(rc); @@ -285,18 +373,18 @@ static inline int lquota_setup(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, setup); rc = QUOTA_OP(interface, setup)(obd); RETURN(rc); } static inline int lquota_cleanup(quota_interface_t *interface, - struct obd_device *obd) + struct obd_device *obd) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, cleanup); rc = QUOTA_OP(interface, cleanup)(obd); RETURN(rc); @@ -307,32 +395,32 @@ static inline int lquota_fs_cleanup(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, fs_cleanup); rc = QUOTA_OP(interface, fs_cleanup)(obd); RETURN(rc); } static inline int lquota_recovery(quota_interface_t *interface, - struct obd_device *obd) -{ + struct obd_device *obd) +{ int rc; ENTRY; - + QUOTA_CHECK_OP(interface, recovery); rc = QUOTA_OP(interface, recovery)(obd); RETURN(rc); } static inline int lquota_adjust(quota_interface_t *interface, - struct obd_device *obd, - unsigned int qcids[], - unsigned int qpids[], - int rc, int opc) + struct obd_device *obd, + unsigned int qcids[], + unsigned int qpids[], + int rc, int opc) { int ret; ENTRY; - + QUOTA_CHECK_OP(interface, adjust); ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc); RETURN(ret); @@ -344,7 +432,7 @@ static inline int lquota_chkdq(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, chkdq); rc = QUOTA_OP(interface, chkdq)(cli, uid, gid); RETURN(rc); @@ -357,7 +445,7 @@ static inline int lquota_setdq(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, setdq); rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags); RETURN(rc); @@ -369,16 +457,15 @@ static inline int lquota_poll_check(quota_interface_t *interface, { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, poll_check); rc = QUOTA_OP(interface, poll_check)(exp, qchk); RETURN(rc); } - static inline int lquota_setinfo(quota_interface_t *interface, - struct obd_export *exp, - struct obd_device *obd) + struct obd_export *exp, + struct obd_device *obd) { int rc; ENTRY; @@ -389,8 +476,8 @@ static inline int lquota_setinfo(quota_interface_t *interface, } static inline int lquota_clearinfo(quota_interface_t *interface, - struct obd_export *exp, - struct obd_device *obd) + struct obd_export *exp, + struct obd_device *obd) { int rc; ENTRY; @@ -400,7 +487,7 @@ static inline int lquota_clearinfo(quota_interface_t *interface, RETURN(rc); } -static inline int lquota_enforce(quota_interface_t *interface, +static inline int lquota_enforce(quota_interface_t *interface, struct obd_device *obd, unsigned int ignore) { @@ -422,9 +509,9 @@ static inline int lquota_getflag(quota_interface_t *interface, rc = QUOTA_OP(interface, getflag)(obd, oa); RETURN(rc); } - + static inline int lquota_acquire(quota_interface_t *interface, - struct obd_device *obd, + struct obd_device *obd, unsigned int uid, unsigned int gid) { int rc; @@ -438,41 +525,55 @@ static inline int lquota_acquire(quota_interface_t *interface, static inline int lquota_chkquota(quota_interface_t *interface, struct obd_device *obd, unsigned int uid, unsigned int gid, - int npage) + int count, int *flag) { int rc; ENTRY; - + QUOTA_CHECK_OP(interface, chkquota); - rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, npage); + QUOTA_CHECK_OP(interface, acquire); + rc = QUOTA_OP(interface, chkquota)(obd, uid, gid, count, flag, + QUOTA_OP(interface, acquire)); RETURN(rc); } -int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_rd_btune(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_rd_itune(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_rd_type(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_filter_rd_limit(char *page, char **start, off_t off, int count, - int *eof, void *data); -int lprocfs_filter_wr_limit(struct file *file, const char *buffer, - unsigned long count, void *data); +static inline int lquota_pending_commit(quota_interface_t *interface, + struct obd_device *obd, + unsigned int uid, unsigned int gid, + int npage) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, pending_commit); + rc = QUOTA_OP(interface, pending_commit)(obd, uid, gid, npage); + RETURN(rc); +} +int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data); +int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data); +int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data); +int lprocfs_quota_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data); +int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data); +int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data); +int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data); +int lprocfs_quota_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data); +int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count, + int *eof, void *data); +int lprocfs_quota_wr_type(struct file *file, const char *buffer, + unsigned long count, void *data); +int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off, + int count, int *eof, void *data); +int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer, + unsigned long count, void *data); #ifndef __KERNEL__ extern quota_interface_t osc_quota_interface; diff --git a/lustre/include/obd.h b/lustre/include/obd.h index ee38516..6601373 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1034,6 +1034,9 @@ struct obd_ops { /* quota methods */ int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); + int (*o_quota_adjust_qunit)(struct obd_export *exp, + struct quota_adjust_qunit *oqaq); + int (*o_ping)(struct obd_export *exp); @@ -1117,6 +1120,7 @@ static inline void init_obd_quota_ops(quota_interface_t *interface, LASSERT(obd_ops); obd_ops->o_quotacheck = QUOTA_OP(interface, check); obd_ops->o_quotactl = QUOTA_OP(interface, ctl); + obd_ops->o_quota_adjust_qunit = QUOTA_OP(interface, adjust_qunit); } #endif /* __OBD_H */ diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 5820128..08672a9 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1359,6 +1359,19 @@ static inline int obd_quotactl(struct obd_export *exp, RETURN(rc); } +static inline int obd_quota_adjust_qunit(struct obd_export *exp, + struct quota_adjust_qunit *oqaq) +{ + int rc; + ENTRY; + + EXP_CHECK_OP(exp, quota_adjust_qunit); + EXP_COUNTER_INCREMENT(exp, quota_adjust_qunit); + + rc = OBP(exp->exp_obd, quota_adjust_qunit)(exp, oqaq); + RETURN(rc); +} + static inline int obd_health_check(struct obd_device *obd) { /* returns: 0 on healthy diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index eaa1db8..8cef41a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -26,6 +26,7 @@ #include #include #include +#include /* global variables */ extern struct lprocfs_stats *obd_memory; @@ -254,7 +255,18 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_MGS_PAUSE_REQ 0x904 #define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) #define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 +#else +#warning "remove quota code above for format obsolete in new release" +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +#define OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS 0xA01 +#else +#warning "remove quota code above for format obsolete in new release" +#endif + +#define OBD_FAIL_QUOTA_RET_QDATA 0xA02 #define OBD_FAIL_LPROC_REMOVE 0xB00 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 8944e90..fc31826 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1606,68 +1606,56 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct obd_device *master_obd; struct lustre_quota_ctxt *qctxt; - struct qunit_data *qdata; - void* rep; - struct qunit_data_old *qdata_old; + struct qunit_data *qdata = NULL; int rc = 0; - int repsize[2] = { sizeof(struct ptlrpc_body), - sizeof(struct qunit_data) }; + int repsize[2] = { sizeof(struct ptlrpc_body), 0 }; ENTRY; - + + repsize[1] = quota_get_qunit_data_size(req->rq_export-> + exp_connect_flags); + rc = lustre_pack_reply(req, 2, repsize, NULL); if (rc) RETURN(rc); LASSERT(req->rq_export); - /* fixed for bug10707 */ - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(struct qunit_data)); - LASSERT(rep); - qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), - lustre_swab_qdata); - } else { - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - rep = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(struct qunit_data_old)); - LASSERT(rep); - qdata_old = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata_old), - lustre_swab_qdata_old); - qdata = lustre_quota_old_to_new(qdata_old); - } - - if (qdata == NULL) { - CERROR("Can't unpack qunit_data\n"); - RETURN(-EPROTO); + /* there are three forms of qunit(historic causes), so we need to + * adjust qunits from slaves to the same form here */ + OBD_ALLOC(qdata, sizeof(struct qunit_data)); + if (!qdata) + RETURN(-ENOMEM); + rc = quota_get_qdata(req, qdata, QUOTA_REQUEST, QUOTA_EXPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't unpack qunit_data\n"); + GOTO(out, rc = -EPROTO); } /* we use the observer */ LASSERT(obd->obd_observer && obd->obd_observer->obd_observer); master_obd = obd->obd_observer->obd_observer; qctxt = &master_obd->u.obt.obt_qctxt; - + LASSERT(qctxt->lqc_handler); rc = qctxt->lqc_handler(master_obd, qdata, lustre_msg_get_opc(req->rq_reqmsg)); if (rc && rc != -EDQUOT) - CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, + CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, "dqacq failed! (rc:%d)\n", rc); - - /* the qd_count might be changed in lqc_handler */ - if ((req->rq_export->exp_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - memcpy(rep,qdata,sizeof(*qdata)); - } else { - qdata_old = lustre_quota_new_to_old(qdata); - memcpy(rep,qdata_old,sizeof(*qdata_old)); - } req->rq_status = rc; + + /* there are three forms of qunit(historic causes), so we need to + * adjust the same form to different forms slaves needed */ + rc = quota_copy_qdata(req, qdata, QUOTA_REPLY, QUOTA_EXPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't pack qunit_data\n"); + GOTO(out, rc = -EPROTO); + } + rc = ptlrpc_reply(req); - - RETURN(rc); +out: + OBD_FREE(qdata, sizeof(struct qunit_data)); + RETURN(rc); #else return 0; #endif /* !__KERNEL__ */ diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index b9cdeb1..9046814 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -99,7 +99,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, ptlrpc_check_set(set); } - /* See the comments in function lprocfs_wr_evict_client() + /* See the comments in function lprocfs_wr_evict_client() * in ptlrpc/lproc_ptlrpc.c for details. - jay */ class_incref(obd); LPROCFS_EXIT(); @@ -392,6 +392,155 @@ static int lprocfs_rd_nosquash_nid(char *page, char **start, off_t off, libcfs_nid2str(mds->mds_nosquash_nid)); } +#ifdef HAVE_QUOTA_SUPPORT +static int lprocfs_mds_rd_switch_qs(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "changing qunit size is %s\n", + obd->u.obt.obt_qctxt.lqc_switch_qs ? + "enabled" : "disabled"); +} + +static int lprocfs_mds_rd_boundary_factor(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor); +} + +static int lprocfs_mds_rd_least_bunit(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_least_bunit); +} + +static int lprocfs_mds_rd_least_iunit(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_least_iunit); +} + +static int lprocfs_mds_rd_qs_factor(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_cqs_qs_factor); +} + +static int lprocfs_mds_wr_switch_qs(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val) + obd->u.obt.obt_qctxt.lqc_switch_qs = 1; + else + obd->u.obt.obt_qctxt.lqc_switch_qs = 0; + + return count; +} + +static int lprocfs_mds_wr_boundary_factor(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 2) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_boundary_factor = val; + return count; +} + +static int lprocfs_mds_wr_least_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < PTLRPC_MAX_BRW_SIZE || + val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_least_bunit = val; + return count; +} + +static int lprocfs_mds_wr_least_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 1 || val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_least_iunit = val; + return count; +} + +static int lprocfs_mds_wr_qs_factor(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val < 2) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_cqs_qs_factor = val; + return count; +} +#endif + struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "blocksize", lprocfs_rd_blksize, 0, 0 }, @@ -408,11 +557,23 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { lprocfs_mds_wr_evictostnids, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, #ifdef HAVE_QUOTA_SUPPORT - { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0 }, - { "quota_btune_sz", lprocfs_rd_btune, lprocfs_wr_btune, 0 }, - { "quota_iunit_sz", lprocfs_rd_iunit, lprocfs_wr_iunit, 0 }, - { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0 }, - { "quota_type", lprocfs_rd_type, lprocfs_wr_type, 0 }, + { "quota_bunit_sz", lprocfs_quota_rd_bunit, lprocfs_quota_wr_bunit, 0 }, + { "quota_btune_sz", lprocfs_quota_rd_btune, lprocfs_quota_wr_btune, 0 }, + { "quota_iunit_sz", lprocfs_quota_rd_iunit, lprocfs_quota_wr_iunit, 0 }, + { "quota_itune_sz", lprocfs_quota_rd_itune, lprocfs_quota_wr_itune, 0 }, + { "quota_type", lprocfs_quota_rd_type, lprocfs_quota_wr_type, 0 }, + { "quota_switch_qs", lprocfs_mds_rd_switch_qs, + lprocfs_mds_wr_switch_qs, 0 }, + { "quota_boundary_factor", lprocfs_mds_rd_boundary_factor, + lprocfs_mds_wr_boundary_factor, 0 }, + { "quota_least_bunit", lprocfs_mds_rd_least_bunit, + lprocfs_mds_wr_least_bunit, 0 }, + { "quota_least_iunit", lprocfs_mds_rd_least_iunit, + lprocfs_mds_wr_least_iunit, 0 }, + { "quota_qs_factor", lprocfs_mds_rd_qs_factor, + lprocfs_mds_wr_qs_factor, 0 }, + { "quota_switch_seconds", lprocfs_quota_rd_switch_seconds, + lprocfs_quota_wr_switch_seconds, 0 }, #endif { "group_expire_interval", lprocfs_rd_group_expire, lprocfs_wr_group_expire, 0}, diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 4cf876a..d6bbdcb 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -507,7 +507,8 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) if (data == NULL) RETURN(-ENOMEM); data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX | - OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 | OBD_CONNECT_AT; + OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 | OBD_CONNECT_AT | + OBD_CONNECT_CHANGE_QS; #ifdef HAVE_LRU_RESIZE_SUPPORT data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; #endif diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index a2dda30..53c4af6 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -872,6 +872,8 @@ int mds_open(struct mds_update_record *rec, int offset, ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}}; struct ldlm_res_id child_res_id = { .name = {0}}; int lock_flags = 0; + int rec_pending = 0; + unsigned int gid = current->fsgid; ENTRY; mds_counter_incr(req->rq_export, LPROC_MDS_OPEN); @@ -1018,6 +1020,16 @@ int mds_open(struct mds_update_record *rec, int offset, if (req->rq_export->exp_connect_flags & OBD_CONNECT_RDONLY) GOTO(cleanup, rc = -EROFS); + if (dparent->d_inode->i_mode & S_ISGID) + gid = dparent->d_inode->i_gid; + else + gid = current->fsgid; + rc = lquota_chkquota(mds_quota_interface_ref, obd, + current->fsuid, gid, 1, &rec_pending); + + if (rc < 0) + GOTO(cleanup, rc); + intent_set_disposition(rep, DISP_OPEN_CREATE); handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE, NULL); @@ -1053,10 +1065,7 @@ int mds_open(struct mds_update_record *rec, int offset, LTIME_S(iattr.ia_mtime) = rec->ur_time; iattr.ia_uid = current->fsuid; /* set by push_ctxt already */ - if (dparent->d_inode->i_mode & S_ISGID) - iattr.ia_gid = dparent->d_inode->i_gid; - else - iattr.ia_gid = current->fsgid; + iattr.ia_gid = gid; iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME; @@ -1189,6 +1198,9 @@ found_child: req, rc, rep ? rep->lock_policy_res1 : 0, 0); cleanup_no_trans: + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + current->fsuid, gid, 1); switch (cleanup_phase) { case 3: if (rc) diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 1200f13..55a5cd1 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -776,6 +776,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid }; unsigned int qpids[MAXQUOTAS] = { 0, 0 }; struct lvfs_dentry_params dp = LVFS_DENTRY_PARAMS_INIT; + int rec_pending = 0; + unsigned int gid = current->fsgid; ENTRY; LASSERT(offset == REQ_REC_OFF); @@ -835,6 +837,17 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, dp.ldp_inum = (unsigned long)rec->ur_fid2->id; dp.ldp_ptr = req; + if (dir->i_mode & S_ISGID) + gid = dir->i_gid; + else + gid = current->fsgid; + + rc = lquota_chkquota(mds_quota_interface_ref, obd, + current->fsuid, gid, 1, &rec_pending); + + if (rc < 0) + GOTO(cleanup, rc); + switch (type) { case S_IFREG:{ handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL); @@ -902,10 +915,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, LTIME_S(iattr.ia_ctime) = rec->ur_time; LTIME_S(iattr.ia_mtime) = rec->ur_time; iattr.ia_uid = current->fsuid; /* set by push_ctxt already */ - if (dir->i_mode & S_ISGID) - iattr.ia_gid = dir->i_gid; - else - iattr.ia_gid = current->fsgid; + iattr.ia_gid = gid; iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME; @@ -952,6 +962,9 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, cleanup: err = mds_finish_transno(mds, dir, handle, req, rc, 0, 0); + if (rec_pending) + lquota_pending_commit(mds_quota_interface_ref, obd, + current->fsuid, gid, 1); if (rc && created) { /* Destroy the file we just created. This should not need diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c index fd725aa..c578564 100644 --- a/lustre/obdclass/class_hash.c +++ b/lustre/obdclass/class_hash.c @@ -21,9 +21,10 @@ #include #include #include +#include -int lustre_hash_init(struct lustre_class_hash_body **hash_body_new, - char *hashname, __u32 hashsize, +int lustre_hash_init(struct lustre_class_hash_body **hash_body_new, + char *hashname, __u32 hashsize, struct lustre_hash_operations *hash_operations) { int i, n = 0; @@ -42,28 +43,28 @@ int lustre_hash_init(struct lustre_class_hash_body **hash_body_new, LASSERTF(n == 1, "hashsize %u isn't 2^n\n", hashsize); - /* alloc space for hash_body */ - OBD_ALLOC(hash_body, sizeof(*hash_body)); + /* alloc space for hash_body */ + OBD_ALLOC(hash_body, sizeof(*hash_body)); if (hash_body == NULL) { - CERROR("Cannot alloc space for hash body, hashname = %s \n", + CERROR("Cannot alloc space for hash body, hashname = %s \n", hashname); RETURN(-ENOMEM); } - LASSERT(hashname != NULL && + LASSERT(hashname != NULL && strlen(hashname) <= sizeof(hash_body->hashname)); strcpy(hash_body->hashname, hashname); - hash_body->lchb_hash_max_size = hashsize; - hash_body->lchb_hash_operations = hash_operations; + hash_body->lchb_hash_max_size = hashsize; + hash_body->lchb_hash_operations = hash_operations; /* alloc space for the hash tables */ - OBD_ALLOC(hash_body->lchb_hash_tables, + OBD_ALLOC(hash_body->lchb_hash_tables, sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size); if (hash_body->lchb_hash_tables == NULL) { - OBD_FREE(hash_body, sizeof(*hash_body)); - CERROR("Cannot alloc space for hashtables, hashname = %s \n", + OBD_FREE(hash_body, sizeof(*hash_body)); + CERROR("Cannot alloc space for hashtables, hashname = %s \n", hash_body->hashname); RETURN(-ENOMEM); } @@ -99,7 +100,7 @@ void lustre_hash_exit(struct lustre_class_hash_body **new_hash_body) if (hash_body->lchb_hash_tables == NULL ) { spin_unlock(&hash_body->lchb_lock); CWARN("hash tables has been deleted\n"); - goto out_hash; + goto out_hash; } for( i = 0; i < hash_body->lchb_hash_max_size; i++ ) { @@ -111,12 +112,13 @@ void lustre_hash_exit(struct lustre_class_hash_body **new_hash_body) hlist_for_each_safe(actual_hnode, pos, &(bucket->lhb_head)) { lustre_hash_delitem_nolock(hash_body, i, actual_hnode); } - spin_unlock(&bucket->lhb_lock); + spin_unlock(&bucket->lhb_lock); } /* free the hash_tables's memory space */ OBD_FREE(hash_body->lchb_hash_tables, - sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size); + sizeof(*hash_body->lchb_hash_tables) * + hash_body->lchb_hash_max_size); hash_body->lchb_hash_tables = NULL; @@ -671,3 +673,84 @@ void nidstats_refcount_put(struct hlist_node * actual_hnode) } /*******************************************************************************/ + +#ifdef __KERNEL__ +/* + * define ( lqs <-> qctxt ) hash operations and function define + */ + +/* define the conn hash operations */ +struct lustre_hash_operations lqs_hash_operations = { + .lustre_hashfn = lqs_hashfn, + .lustre_hash_key_compare = lqs_hash_key_compare, + .lustre_hash_object_refcount_get = lqs_refcount_get, + .lustre_hash_object_refcount_put = lqs_refcount_put, +}; +EXPORT_SYMBOL(lqs_hash_operations); + +/* string hashing using djb2 hash algorithm */ +__u32 lqs_hashfn(struct lustre_class_hash_body *hash_body, void * key) +{ + struct quota_adjust_qunit *lqs_key = NULL; + __u32 hash; + + LASSERT(key != NULL); + + lqs_key = (struct quota_adjust_qunit *)key; + + hash = QAQ_IS_GRP(lqs_key) ? 5381 : 5387; + hash *= lqs_key->qaq_id; + + hash &= (hash_body->lchb_hash_max_size - 1); + + RETURN(hash); +} + +int lqs_hash_key_compare(void *key, struct hlist_node *compared_hnode) +{ + struct quota_adjust_qunit *lqs_key = NULL; + struct lustre_qunit_size *q = NULL; + int retval = 0; + + LASSERT( key != NULL); + + lqs_key = (struct quota_adjust_qunit *)key; + + q = hlist_entry(compared_hnode, struct lustre_qunit_size, lqs_hash); + + spin_lock(&q->lqs_lock); + if (lqs_key->qaq_id == q->lqs_id && QAQ_IS_GRP(lqs_key) == LQS_IS_GRP(q)) + retval = 1; + spin_unlock(&q->lqs_lock); + + return retval; +} + +void * lqs_refcount_get(struct hlist_node * actual_hnode) +{ + struct lustre_qunit_size *q = NULL; + + LASSERT(actual_hnode != NULL); + + q = hlist_entry(actual_hnode, struct lustre_qunit_size, lqs_hash); + + LASSERT(q != NULL); + + lqs_getref(q); + + RETURN(q); +} + +void lqs_refcount_put(struct hlist_node * actual_hnode) +{ + struct lustre_qunit_size *q = NULL; + + LASSERT(actual_hnode != NULL); + + q = hlist_entry(actual_hnode, struct lustre_qunit_size, lqs_hash); + + LASSERT(q != NULL); + + lqs_putref(q); +} +#endif diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 521b393..786d50a 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1121,6 +1121,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check); LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck); LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, quota_adjust_qunit); LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping); LPROCFS_OBD_OP_INIT(num_private_stats, stats, register_page_removal_cb); LPROCFS_OBD_OP_INIT(num_private_stats,stats,unregister_page_removal_cb); @@ -1708,6 +1709,163 @@ int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime); #endif /* CRAY_XT3 */ +#ifdef HAVE_QUOTA_SUPPORT +int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_bunit_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_bunit); + +int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val % QUOTABLOCK_SIZE || + val <= obd->u.obt.obt_qctxt.lqc_btune_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_bunit_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_bunit); + +int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_btune_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_btune); + +int lprocfs_quota_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || + val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_btune_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_btune); + +int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_iunit_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_iunit); + +int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_iunit_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_iunit); + +int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_itune_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_itune); + +int lprocfs_quota_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= MIN_QLIMIT || + val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_itune_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_itune); + +int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%d\n", + obd->u.obt.obt_qctxt.lqc_switch_seconds); +} +EXPORT_SYMBOL(lprocfs_quota_rd_switch_seconds); + +int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + int val, rc; + LASSERT(obd != NULL); + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= 10) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_switch_seconds = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_switch_seconds); + +#endif + + EXPORT_SYMBOL(lprocfs_register); EXPORT_SYMBOL(lprocfs_srch); EXPORT_SYMBOL(lprocfs_remove); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 538be1e..01c1518 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -633,7 +633,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int i, err, cleanup_phase = 0; struct obd_device *obd = exp->exp_obd; void *wait_handle; - int total_size = 0, rc2; + int total_size = 0, rc2 = 0; + int rec_pending = 0; unsigned int qcids[MAXQUOTAS] = {0, 0}; ENTRY; @@ -647,18 +648,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, /* Unfortunately, if quota master is too busy to handle the * pre-dqacq in time and quota hash on ost is used up, we * have to wait for the completion of in flight dqacq/dqrel, - * then try again */ - if ((rc2 = lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid, - oa->o_gid, niocount)) == QUOTA_RET_ACQUOTA) { - OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); - lquota_acquire(filter_quota_interface_ref, obd, oa->o_uid, - oa->o_gid); - } + * in order not to get enough quota for write b=12588 */ + rc2 = lquota_chkquota(filter_quota_interface_ref, obd, oa->o_uid, + oa->o_gid, niocount, &rec_pending); - if (rc2 < 0) { - rc = rc2; - GOTO(cleanup, rc); - } + if (rc2 < 0) + GOTO(cleanup, rc = rc2); iobuf = filter_iobuf_get(&obd->u.filter, oti); if (IS_ERR(iobuf)) @@ -788,6 +783,10 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, fsfilt_check_slow(obd, now, "commitrw commit"); cleanup: + if (rec_pending) + lquota_pending_commit(filter_quota_interface_ref, obd, oa->o_uid, + oa->o_gid, niocount); + filter_grant_commit(exp, niocount, res); switch (cleanup_phase) { diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 0c06c04..7b6aafa 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -203,13 +203,19 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = { lprocfs_filter_rd_readcache, lprocfs_filter_wr_readcache, 0 }, #ifdef HAVE_QUOTA_SUPPORT - { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0}, - { "quota_btune_sz", lprocfs_rd_btune, lprocfs_wr_btune, 0}, - { "quota_iunit_sz", lprocfs_rd_iunit, lprocfs_wr_iunit, 0}, - { "quota_itune_sz", lprocfs_rd_itune, lprocfs_wr_itune, 0}, - { "quota_type", lprocfs_rd_type, lprocfs_wr_type, 0}, - { "quota_limit_sz", lprocfs_filter_rd_limit, - lprocfs_filter_wr_limit, 0}, + { "quota_bunit_sz", lprocfs_quota_rd_bunit, + lprocfs_quota_wr_bunit, 0}, + { "quota_btune_sz", lprocfs_quota_rd_btune, + lprocfs_quota_wr_btune, 0}, + { "quota_iunit_sz", lprocfs_quota_rd_iunit, + lprocfs_quota_wr_iunit, 0}, + { "quota_itune_sz", lprocfs_quota_rd_itune, + lprocfs_quota_wr_itune, 0}, + { "quota_type", lprocfs_quota_rd_type, + lprocfs_quota_wr_type, 0}, + { "quota_switch_seconds", lprocfs_quota_rd_switch_seconds, + lprocfs_quota_wr_switch_seconds, 0 }, + #endif { "client_cache_count", lprocfs_filter_rd_fmd_max_num, lprocfs_filter_wr_fmd_max_num, 0 }, diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 44a2e9d..097c006 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -673,7 +673,8 @@ static void osc_consume_write_grant(struct client_obd *cli,struct brw_page *pga) pga->flag |= OBD_BRW_FROM_GRANT; CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n", CFS_PAGE_SIZE, pga, pga->pg); - LASSERT(cli->cl_avail_grant >= 0); + LASSERTF(cli->cl_avail_grant >= 0, "invalid avail grant is %ld \n", + cli->cl_avail_grant); } /* the companion to osc_consume_write_grant, called when a brw has completed. diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 0d3661a..cb83e58 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1344,6 +1344,28 @@ static int ost_handle_quotacheck(struct ptlrpc_request *req) RETURN(0); } +static int ost_handle_quota_adjust_qunit(struct ptlrpc_request *req) +{ + struct quota_adjust_qunit *oqaq, *repoqa; + int size[2] = { sizeof(struct ptlrpc_body), sizeof(*repoqa) }; + int rc; + ENTRY; + + oqaq = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqaq), + lustre_swab_quota_adjust_qunit); + + if (oqaq == NULL) + GOTO(out, rc = -EPROTO); + rc = lustre_pack_reply(req, 2, size, NULL); + if (rc) + GOTO(out, rc); + repoqa = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*repoqa)); + req->rq_status = obd_quota_adjust_qunit(req->rq_export, oqaq); + *repoqa = *oqaq; + out: + RETURN(rc); +} + static int ost_filter_recovery_request(struct ptlrpc_request *req, struct obd_device *obd, int *process) { @@ -1402,6 +1424,7 @@ int ost_msg_check_version(struct lustre_msg *msg) case OST_GET_INFO: case OST_QUOTACHECK: case OST_QUOTACTL: + case OST_QUOTA_ADJUST_QUNIT: rc = lustre_msg_check_version(msg, LUSTRE_OST_VERSION); if (rc) CERROR("bad opc %u version %08x, expecting %08x\n", @@ -1594,6 +1617,10 @@ static int ost_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_OST_QUOTACTL_NET, 0); rc = ost_handle_quotactl(req); break; + case OST_QUOTA_ADJUST_QUNIT: + CDEBUG(D_INODE, "quota_adjust_qunit\n"); + rc = ost_handle_quota_adjust_qunit(req); + break; case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); rc = target_handle_ping(req); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 7486b91..0b6a401 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -61,6 +61,7 @@ struct ll_rpc_opcode { { OST_SET_INFO, "ost_set_info" }, { OST_QUOTACHECK, "ost_quotacheck" }, { OST_QUOTACTL, "ost_quotactl" }, + { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" }, { MDS_GETATTR, "mds_getattr" }, { MDS_GETATTR_NAME, "mds_getattr_lock" }, { MDS_CLOSE, "mds_close" }, diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index b9da95b..5b3808f 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2091,6 +2091,14 @@ void lustre_swab_obd_quotactl (struct obd_quotactl *q) lustre_swab_obd_dqblk (&q->qc_dqblk); } +void lustre_swab_quota_adjust_qunit (struct quota_adjust_qunit *q) +{ + __swab32s (&q->qaq_flags); + __swab32s (&q->qaq_id); + __swab64s (&q->qaq_bunit_sz); + __swab64s (&q->qaq_iunit_sz); +} + void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) { __swab32s (&sa->sa_opcode); @@ -2360,8 +2368,10 @@ void lustre_swab_qdata(struct qunit_data *d) __swab32s (&d->qd_id); __swab32s (&d->qd_flags); __swab64s (&d->qd_count); + __swab64s (&d->qd_qunit); } +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) void lustre_swab_qdata_old(struct qunit_data_old *d) { __swab32s (&d->qd_id); @@ -2369,45 +2379,283 @@ void lustre_swab_qdata_old(struct qunit_data_old *d) __swab32s (&d->qd_count); __swab32s (&d->qd_isblk); } +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +void lustre_swab_qdata_old2(struct qunit_data_old2 *d) +{ + __swab32s (&d->qd_id); + __swab32s (&d->qd_flags); + __swab64s (&d->qd_count); +} +#else +#warning "remove quota code above for format absolete in new release" +#endif #ifdef __KERNEL__ -struct qunit_data *lustre_quota_old_to_new(struct qunit_data_old *d) + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) +void qdata_v1_v3(struct qunit_data_old *d, + struct qunit_data *qdata) +{ + LASSERT(d); + LASSERT(qdata); + + qdata->qd_id = d->qd_id; + if (d->qd_type) + QDATA_SET_GRP(qdata); + if (d->qd_isblk) + QDATA_SET_BLK(qdata); + qdata->qd_count = d->qd_count; +} + +struct qunit_data_old *qdata_v3_to_v1(struct qunit_data *d) { - struct qunit_data_old tmp; - struct qunit_data *ret; + struct qunit_data tmp; + struct qunit_data_old *ret; ENTRY; if (!d) return NULL; tmp = *d; - ret = (struct qunit_data *)d; + ret = (struct qunit_data_old *)d; ret->qd_id = tmp.qd_id; - ret->qd_flags = (tmp.qd_type ? QUOTA_IS_GRP : 0) | (tmp.qd_isblk ? QUOTA_IS_BLOCK : 0); - ret->qd_count = tmp.qd_count; + ret->qd_type = (QDATA_IS_GRP(&tmp) ? GRPQUOTA : USRQUOTA); + ret->qd_count = (__u32)tmp.qd_count; + ret->qd_isblk = (QDATA_IS_BLK(&tmp) ? 1 : 0); RETURN(ret); +} +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +void qdata_v2_to_v3(struct qunit_data_old2 *d, + struct qunit_data *qdata) +{ + LASSERT(d); + LASSERT(qdata); + qdata->qd_id = d->qd_id; + qdata->qd_flags = d->qd_flags; + qdata->qd_count = d->qd_count; } -EXPORT_SYMBOL(lustre_quota_old_to_new); -struct qunit_data_old *lustre_quota_new_to_old(struct qunit_data *d) +struct qunit_data_old2 *qdata_v3_to_v2(struct qunit_data *d) { struct qunit_data tmp; - struct qunit_data_old *ret; + struct qunit_data_old2 *ret; ENTRY; if (!d) return NULL; tmp = *d; - ret = (struct qunit_data_old *)d; + ret = (struct qunit_data_old2 *)d; ret->qd_id = tmp.qd_id; - ret->qd_type = ((tmp.qd_flags & QUOTA_IS_GRP) ? GRPQUOTA : USRQUOTA); - ret->qd_count = (__u32)tmp.qd_count; - ret->qd_isblk = ((tmp.qd_flags & QUOTA_IS_BLOCK) ? 1 : 0); + ret->qd_flags = tmp.qd_flags & LQUOTA_QUNIT_FLAGS; + ret->qd_count = tmp.qd_count; RETURN(ret); } -EXPORT_SYMBOL(lustre_quota_new_to_old); +#else +#warning "remove quota code above for format absolete in new release" +#endif + +/* got qdata from request(req/rep) */ +int quota_get_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp) +{ + struct ptlrpc_request *req = (struct ptlrpc_request *)request; + struct qunit_data *new; + struct qunit_data_old *old; + struct qunit_data_old2 *old2; + int size = sizeof(struct qunit_data_old); + int size2 = sizeof(struct qunit_data_old2); + __u64 flags = is_exp ? req->rq_export->exp_connect_flags : + req->rq_import->imp_connect_data.ocd_connect_flags; + + LASSERT(req); + LASSERT(qdata); + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) + if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) + goto quota32; +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) + if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS)) + goto without_change_qs; +#else +#warning "remove quota code above for format absolete in new release" +#endif + + /* support for quota64 and change_qs */ + if (flags & OBD_CONNECT_CHANGE_QS) { + if (!(flags & OBD_CONNECT_QUOTA64)) { + CDEBUG(D_ERROR, "Wire protocol for qunit is broken!\n"); + return -EINVAL; + } + if (is_req == QUOTA_REQUEST) + new = lustre_swab_reqbuf(req, REQ_REC_OFF, + sizeof(struct qunit_data), + lustre_swab_qdata); + else + new = lustre_swab_repbuf(req, REPLY_REC_OFF, + sizeof(struct qunit_data), + lustre_swab_qdata); + *qdata = *new; + QDATA_SET_CHANGE_QS(qdata); + return 0; + } else { + QDATA_CLR_CHANGE_QS(qdata); + } + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +without_change_qs: + /* only support for quota64 */ + if (flags & OBD_CONNECT_QUOTA64) { + + if (is_req == QUOTA_REQUEST) + old2 = lustre_swab_reqbuf(req, REQ_REC_OFF, size2, + lustre_swab_qdata_old2); + else + old2 = lustre_swab_repbuf(req, REPLY_REC_OFF, size2, + lustre_swab_qdata_old2); + qdata_v2_to_v3(old2, qdata); + + return 0; + } +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) +quota32: + /* not support for quota64 and change_qs */ + if (is_req == QUOTA_REQUEST) + old = lustre_swab_reqbuf(req, REQ_REC_OFF, size, + lustre_swab_qdata_old); + else + old = lustre_swab_repbuf(req, REPLY_REC_OFF, size, + lustre_swab_qdata_old); + qdata_v1_v3(old, qdata); +#else +#warning "remove quota code above for format absolete in new release" +#endif + + return 0; +} +EXPORT_SYMBOL(quota_get_qdata); + +/* copy qdata to request(req/rep) */ +int quota_copy_qdata(void *request, struct qunit_data *qdata, + int is_req, int is_exp) +{ + struct ptlrpc_request *req = (struct ptlrpc_request *)request; + void *target; + struct qunit_data_old *old; + struct qunit_data_old2 *old2; + __u64 flags = is_exp ? req->rq_export->exp_connect_flags : + req->rq_import->imp_connect_data.ocd_connect_flags; + + LASSERT(req); + LASSERT(qdata); + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) + if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) + goto quota32; +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) + if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS)) + goto without_change_qs; +#else +#warning "remove quota code above for format absolete in new release" +#endif + + /* support for quota64 and change_qs */ + if (flags & OBD_CONNECT_CHANGE_QS) { + if (!(flags & OBD_CONNECT_QUOTA64)) { + CERROR("Wire protocol for qunit is broken!\n"); + return -EINVAL; + } + if (is_req == QUOTA_REQUEST) + target = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof(struct qunit_data)); + else + target = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data)); + if (!target) + return -EINVAL; + memcpy(target, qdata, sizeof(*qdata)); + return 0; + } + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +without_change_qs: + /* only support for quota64 */ + if (flags & OBD_CONNECT_QUOTA64) { + if (is_req == QUOTA_REQUEST) + target = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof(struct qunit_data_old2)); + else + target = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data_old2)); + if (!target) + return -EINVAL; + old2 = qdata_v3_to_v2(qdata); + memcpy(target, old2, sizeof(*old2)); + return 0; + } +#else +#warning "remove quota code above for format absolete in new release" +#endif + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) +quota32: + /* not support for quota64 and change_qs */ + if (is_req == QUOTA_REQUEST) + target = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof(struct qunit_data_old)); + else + target = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, + sizeof(struct qunit_data_old)); + if (!target) + return -EINVAL; + old = qdata_v3_to_v1(qdata); + memcpy(target, old, sizeof(*old)); +#else +#warning "remove quota code above for format absolete in new release" +#endif + + return 0; +} +EXPORT_SYMBOL(quota_copy_qdata); + +int quota_get_qunit_data_size(__u64 flag) +{ + int size; + + if (flag & OBD_CONNECT_CHANGE_QS) { + size = sizeof(struct qunit_data); + } else { + /* write in this way because sizes of qunit_data_old and + * qunit_data_old2 are same */ + LASSERT(sizeof(struct qunit_data_old) == + sizeof(struct qunit_data_old2)); + size = sizeof(struct qunit_data_old); + } + + return(size); +} +EXPORT_SYMBOL(quota_get_qunit_data_size); #endif /* __KERNEL__ */ static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req) diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 5ecfdee..4f0e2a2 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -228,7 +228,17 @@ EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc); EXPORT_SYMBOL(lustre_swab_ldlm_request); EXPORT_SYMBOL(lustre_swab_ldlm_reply); EXPORT_SYMBOL(lustre_swab_qdata); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) EXPORT_SYMBOL(lustre_swab_qdata_old); +#else +#warning "remove quota code above for format absolete in new release" +#endif +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 9, 0, 0) +EXPORT_SYMBOL(lustre_swab_qdata_old2); +#else +#warning "remove quota code above for format absolete in new release" +#endif +EXPORT_SYMBOL(lustre_swab_quota_adjust_qunit); EXPORT_SYMBOL(lustre_msg_get_flags); EXPORT_SYMBOL(lustre_msg_add_flags); EXPORT_SYMBOL(lustre_msg_set_flags); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 22b8a46..d27635b 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -77,7 +77,9 @@ void lustre_assert_wire_constants(void) (long long)OST_QUOTACHECK); LASSERTF(OST_QUOTACTL == 19, " found %lld\n", (long long)OST_QUOTACTL); - LASSERTF(OST_LAST_OPC == 20, " found %lld\n", + LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n", + (long long)OST_QUOTA_ADJUST_QUNIT); + LASSERTF(OST_LAST_OPC == 21, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -1987,7 +1989,7 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); /* Checks for struct qunit_data */ - LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", + LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n", (long long)(int)sizeof(struct qunit_data)); LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n", (long long)(int)offsetof(struct qunit_data, qd_id)); @@ -2001,6 +2003,30 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct qunit_data, qd_count)); LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n", (long long)(int)sizeof(((struct qunit_data *)0)->qd_count)); + LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_qunit)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit)); + LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, padding)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->padding)); + + /* Checks for struct qunit_data_old2 */ + LASSERTF((int)sizeof(struct qunit_data_old2) == 16, " found %lld\n", + (long long)(int)sizeof(struct qunit_data_old2)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_id) == 0, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_id)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_id)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_flags) == 4, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_flags)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_flags)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_count) == 8, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_count)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_count) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_count)); /* Checks for struct qunit_data_old */ LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n", @@ -2022,6 +2048,26 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n", (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk)); + /* Checks for struct quota_adjust_qunit */ + LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n", + (long long)(int)sizeof(struct quota_adjust_qunit)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz)); + /* Checks for struct mgs_target_info */ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n", (long long)(int)sizeof(struct mgs_target_info)); diff --git a/lustre/quota/Makefile.in b/lustre/quota/Makefile.in index 19a37ca..2f1bfad 100644 --- a/lustre/quota/Makefile.in +++ b/lustre/quota/Makefile.in @@ -2,7 +2,7 @@ MODULES := lquota MODULES += quotactl_test quotacheck_test lquota-objs := quota_check.o quota_context.o quota_ctl.o quota_interface.o -lquota-objs += quota_master.o +lquota-objs += quota_master.o quota_adjust_qunit.o quotactl-objs := quotactl_test.o quotaccheck-objs := quotacheck_test.o diff --git a/lustre/quota/autoMakefile.am b/lustre/quota/autoMakefile.am index c23c370..a397190 100644 --- a/lustre/quota/autoMakefile.am +++ b/lustre/quota/autoMakefile.am @@ -5,7 +5,7 @@ if LIBLUSTRE noinst_LIBRARIES = libquota.a -libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c +libquota_a_SOURCES = quota_check.c quota_ctl.c quota_interface.c quota_adjust_qunit.c libquota_a_CPPFLAGS = $(LLCPPFLAGS) libquota_a_CFLAGS = $(LLCFLAGS) endif diff --git a/lustre/quota/quota_adjust_qunit.c b/lustre/quota/quota_adjust_qunit.c new file mode 100644 index 0000000..078876d --- /dev/null +++ b/lustre/quota/quota_adjust_qunit.c @@ -0,0 +1,402 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/quota/quota_adjust_qunit.c + * + * Copyright (c) 2005 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * No redistribution or use is permitted outside of Cluster File Systems, Inc. + * + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_MDS + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include +# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# include +# include +# include +# else +# include +# endif +#else /* __KERNEL__ */ +# include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include "quota_internal.h" + +#ifdef __KERNEL__ +/* this function is charge of recording lqs_ino_rec and + * lqs_blk_rec. when a lquota slave checks a quota + * request(check_cur_qunit) and finishes a quota + * request(dqacq_completion), it will be called. + * is_chk: whether it is checking quota; otherwise, it is finishing + * is_acq: whether it is acquiring; otherwise, it is releasing + */ +void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs, + int is_chk, int is_acq) +{ + int is_blk; + + LASSERT(qdata && lqs); + LASSERT_SPIN_LOCKED(&lqs->lqs_lock); + is_blk = QDATA_IS_BLK(qdata); + + if (is_chk) { + if (is_acq) { + if (is_blk) + lqs->lqs_blk_rec += qdata->qd_count; + else + lqs->lqs_ino_rec += qdata->qd_count; + } else { + if (is_blk) + lqs->lqs_blk_rec -= qdata->qd_count; + else + lqs->lqs_ino_rec -= qdata->qd_count; + } + } else { + if (is_acq) { + if (is_blk) + lqs->lqs_blk_rec -= qdata->qd_count; + else + lqs->lqs_ino_rec -= qdata->qd_count; + } else { + if (is_blk) + lqs->lqs_blk_rec += qdata->qd_count; + else + lqs->lqs_ino_rec += qdata->qd_count; + } + } +} + +void qdata_to_oqaq(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq) +{ + LASSERT(qdata); + LASSERT(oqaq); + + oqaq->qaq_flags = qdata->qd_flags; + oqaq->qaq_id = qdata->qd_id; + if (QDATA_IS_ADJBLK(qdata)) + oqaq->qaq_bunit_sz = qdata->qd_qunit; + if (QDATA_IS_ADJINO(qdata)) + oqaq->qaq_iunit_sz = qdata->qd_qunit; +} + +int quota_search_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return) +{ + struct quota_adjust_qunit *oqaq_tmp = NULL; + ENTRY; + + LASSERT(*lqs_return == NULL); + LASSERT(oqaq || qdata); + + if (!oqaq) { + OBD_ALLOC_PTR(oqaq_tmp); + if (!oqaq_tmp) + RETURN(-ENOMEM); + qdata_to_oqaq(qdata, oqaq_tmp); + } else { + oqaq_tmp = oqaq; + } + + *lqs_return = lustre_hash_get_object_by_key(LQC_HASH_BODY(qctxt), + oqaq_tmp); + if (*lqs_return) + LQS_DEBUG((*lqs_return), "show lqs\n"); + + if (!oqaq) + OBD_FREE_PTR(oqaq_tmp); + RETURN(0); +} + +int quota_create_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return) +{ + int rc = 0; + struct quota_adjust_qunit *oqaq_tmp = NULL; + struct lustre_qunit_size *lqs = NULL; + ENTRY; + + LASSERT(*lqs_return == NULL); + LASSERT(oqaq || qdata); + + if (!oqaq) { + OBD_ALLOC_PTR(oqaq_tmp); + if (!oqaq_tmp) + RETURN(-ENOMEM); + qdata_to_oqaq(qdata, oqaq_tmp); + } else { + oqaq_tmp = oqaq; + } + + OBD_ALLOC_PTR(lqs); + if (!lqs) + GOTO(out, rc = -ENOMEM); + + spin_lock_init(&lqs->lqs_lock); + lqs->lqs_bwrite_pending = 0; + lqs->lqs_iwrite_pending = 0; + lqs->lqs_ino_rec = 0; + lqs->lqs_blk_rec = 0; + lqs->lqs_id = oqaq_tmp->qaq_id; + lqs->lqs_flags = QAQ_IS_GRP(oqaq_tmp); + lqs->lqs_bunit_sz = qctxt->lqc_bunit_sz; + lqs->lqs_iunit_sz = qctxt->lqc_iunit_sz; + lqs->lqs_btune_sz = qctxt->lqc_btune_sz; + lqs->lqs_itune_sz = qctxt->lqc_itune_sz; + if (qctxt->lqc_handler) { + lqs->lqs_last_bshrink = 0; + lqs->lqs_last_ishrink = 0; + } + lqs_initref(lqs); + rc = lustre_hash_additem_unique(LQC_HASH_BODY(qctxt), + oqaq_tmp, &lqs->lqs_hash); + LQS_DEBUG(lqs, "create lqs\n"); + if (!rc) { + lqs_getref(lqs); + *lqs_return = lqs; + } + out: + if (rc && lqs) + OBD_FREE_PTR(lqs); + if (!oqaq) + OBD_FREE_PTR(oqaq_tmp); + RETURN(rc); +} + +int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct + lustre_quota_ctxt *qctxt) +{ + struct lustre_qunit_size *lqs = NULL; + unsigned long *lbunit, *liunit, *lbtune, *litune; + signed long b_tmp = 0, i_tmp = 0; + static cfs_time_t time_limit = 0; + int rc = 0; + ENTRY; + + if (OBD_FAIL_CHECK(OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS)) + RETURN(0); + + LASSERT(qctxt); + search_lqs: + rc = quota_search_lqs(NULL, oqaq, qctxt, &lqs); + + /* deleting the lqs, because a user sets lfs quota 0 0 0 0 */ + if (!oqaq->qaq_bunit_sz && !oqaq->qaq_iunit_sz && QAQ_IS_ADJBLK(oqaq) && + QAQ_IS_ADJINO(oqaq)) { + if (lqs) { + LQS_DEBUG(lqs, "release lqs\n"); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + /* this is for deleting this lqs */ + lqs_putref(lqs); + } + RETURN(rc); + } + + if (!lqs) { + rc = quota_create_lqs(NULL, oqaq, qctxt, &lqs); + if (rc == -EALREADY) + goto search_lqs; + if (rc < 0) + RETURN(rc); + } + + lbunit = &lqs->lqs_bunit_sz; + liunit = &lqs->lqs_iunit_sz; + lbtune = &lqs->lqs_btune_sz; + litune = &lqs->lqs_itune_sz; + + spin_lock(&lqs->lqs_lock); + CDEBUG(D_QUOTA, "before: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit); + /* adjust the slave's block qunit size */ + if (QAQ_IS_ADJBLK(oqaq)) { + cfs_duration_t sec = cfs_time_seconds(qctxt->lqc_switch_seconds); + + b_tmp = *lbunit - oqaq->qaq_bunit_sz; + + if (qctxt->lqc_handler && b_tmp > 0) + lqs->lqs_last_bshrink = cfs_time_current(); + + if (qctxt->lqc_handler && b_tmp < 0) { + time_limit = cfs_time_add(lqs->lqs_last_bshrink, sec); + if (!lqs->lqs_last_bshrink || + cfs_time_after(cfs_time_current(), time_limit)) { + *lbunit = oqaq->qaq_bunit_sz; + *lbtune = (*lbunit) / 2; + } else { + b_tmp = 0; + } + } else { + *lbunit = oqaq->qaq_bunit_sz; + *lbtune = (*lbunit) / 2; + } + } + + /* adjust the slave's file qunit size */ + if (QAQ_IS_ADJINO(oqaq)) { + i_tmp = *liunit - oqaq->qaq_iunit_sz; + + if (qctxt->lqc_handler && i_tmp > 0) + lqs->lqs_last_ishrink = cfs_time_current(); + + if (qctxt->lqc_handler && i_tmp < 0) { + time_limit = cfs_time_add(lqs->lqs_last_ishrink, + cfs_time_seconds(qctxt-> + lqc_switch_seconds)); + if (!lqs->lqs_last_ishrink || + cfs_time_after(cfs_time_current(), time_limit)) { + *liunit = oqaq->qaq_iunit_sz; + *litune = (*liunit) / 2; + } else { + i_tmp = 0; + } + } else { + *liunit = oqaq->qaq_iunit_sz; + *litune = (*liunit) / 2; + } + } + CDEBUG(D_QUOTA, "after: bunit: %lu, iunit: %lu.\n", *lbunit, *liunit); + spin_unlock(&lqs->lqs_lock); + + lqs_putref(lqs); + + if (b_tmp > 0) + rc |= LQS_BLK_DECREASE; + else if (b_tmp < 0) + rc |= LQS_BLK_INCREASE; + + if (i_tmp > 0) + rc |= LQS_INO_DECREASE; + else if (i_tmp < 0) + rc |= LQS_INO_INCREASE; + + RETURN(rc); +} + +int filter_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq) +{ + struct obd_device *obd = exp->exp_obd; + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + unsigned int uid = 0, gid = 0; + int rc = 0; + ENTRY; + + LASSERT(oqaq); + LASSERT(QAQ_IS_ADJBLK(oqaq)); + rc = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc < 0) { + CERROR("adjust mds slave's qunit size failed!(rc:%d)\n", rc); + RETURN(rc); + } + if (QAQ_IS_GRP(oqaq)) + gid = oqaq->qaq_id; + else + uid = oqaq->qaq_id; + + if (rc > 0) { + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 0); + if (rc) + CERROR("slave adjust block quota failed!(rc:%d)\n", rc); + } + RETURN(rc); +} +#endif /* __KERNEL__ */ + +int client_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq) +{ + struct ptlrpc_request *req; + struct quota_adjust_qunit *oqa; + int size[2] = { sizeof(struct ptlrpc_body), sizeof(*oqaq) }; + int rc = 0; + ENTRY; + + /* client don't support this kind of operation, abort it */ + if (!(exp->exp_connect_flags & OBD_CONNECT_CHANGE_QS)|| + OBD_FAIL_CHECK(OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS)) { + CDEBUG(D_QUOTA, "osc: %s don't support change qunit size\n", + exp->exp_obd->obd_name); + RETURN(rc); + } + if (strcmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME)) + RETURN(-EINVAL); + + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_OST_VERSION, + OST_QUOTA_ADJUST_QUNIT, 2, size, NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + oqa = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*oqaq)); + *oqa = *oqaq; + + ptlrpc_req_set_repsize(req, 2, size); + + rc = ptlrpc_queue_wait(req); + if (rc) { + CERROR("%s: %s failed: rc = %d\n", exp->exp_obd->obd_name, + __FUNCTION__, rc); + GOTO(out, rc); + } + ptlrpc_req_finished(req); +out: + RETURN (rc); +} + +int lov_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lov_obd *lov = &obd->u.lov; + int i, rc = 0; + ENTRY; + + if (!QAQ_IS_ADJBLK(oqaq)) { + CERROR("bad qaq_flags %x for lov obd.\n", oqaq->qaq_flags); + RETURN(-EFAULT); + } + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int err; + + if (!lov->lov_tgts[i]->ltd_active) { + CDEBUG(D_HA, "ost %d is inactive\n", i); + continue; + } + + err = obd_quota_adjust_qunit(lov->lov_tgts[i]->ltd_exp, oqaq); + if (err) { + if (lov->lov_tgts[i]->ltd_active && !rc) + rc = err; + continue; + } + } + RETURN(rc); +} diff --git a/lustre/quota/quota_check.c b/lustre/quota/quota_check.c index 1596b74..bde6b6e 100644 --- a/lustre/quota/quota_check.c +++ b/lustre/quota/quota_check.c @@ -202,11 +202,11 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk) qchk->obd_uuid = cli->cl_target_uuid; /* FIXME change strncmp to strcmp and save the strlen op */ if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME, - strlen(LUSTRE_OSC_NAME))) + strlen(LUSTRE_OSC_NAME)) == 0) memcpy(qchk->obd_type, LUSTRE_OST_NAME, strlen(LUSTRE_OST_NAME)); else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME, - strlen(LUSTRE_MDC_NAME))) + strlen(LUSTRE_MDC_NAME)) == 0) memcpy(qchk->obd_type, LUSTRE_MDS_NAME, strlen(LUSTRE_MDS_NAME)); diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 5418f0e..e5e12f6 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -29,13 +29,15 @@ #include #include #include +#include #include "quota_internal.h" -unsigned long default_bunit_sz = 100 * 1024 * 1024; /* 100M bytes */ +extern struct lustre_hash_operations lqs_hash_operations; + +unsigned long default_bunit_sz = 128 * 1024 * 1024; /* 128M bytes */ unsigned long default_btune_ratio = 50; /* 50 percentage */ -unsigned long default_iunit_sz = 5000; /* 5000 inodes */ +unsigned long default_iunit_sz = 5120; /* 5120 inodes */ unsigned long default_itune_ratio = 50; /* 50 percentage */ -unsigned long default_limit_sz = 20 * 1024 * 1024; cfs_mem_cache_t *qunit_cachep = NULL; struct list_head qunit_hash[NR_DQHASH]; @@ -55,8 +57,12 @@ int should_translate_quota (struct obd_import *imp) ENTRY; LASSERT(imp); - if ((imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(1, 7, 0, 0) + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64 && !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) +#else + if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) +#endif RETURN(0); else RETURN(1); @@ -108,7 +114,7 @@ static inline int qunit_hashfn(struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) { unsigned int id = qdata->qd_id; - unsigned int type = qdata->qd_flags & QUOTA_IS_GRP; + unsigned int type = QDATA_IS_GRP(qdata); unsigned long tmp = ((unsigned long)qctxt >> L1_CACHE_SHIFT) ^ id; tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; @@ -127,7 +133,9 @@ static inline struct lustre_qunit *find_qunit(unsigned int hashent, list_for_each_entry(qunit, qunit_hash + hashent, lq_hash) { tmp = &qunit->lq_data; if (qunit->lq_ctxt == qctxt && - qdata->qd_id == tmp->qd_id && qdata->qd_flags == tmp->qd_flags) + qdata->qd_id == tmp->qd_id && + (qdata->qd_flags & LQUOTA_QUNIT_FLAGS) == + (tmp->qd_flags & LQUOTA_QUNIT_FLAGS)) return qunit; } return NULL; @@ -148,11 +156,11 @@ check_cur_qunit(struct obd_device *obd, { struct super_block *sb = qctxt->lqc_sb; unsigned long qunit_sz, tune_sz; - __u64 usage, limit; + __u64 usage, limit, limit_org, pending_write = 0; + long long record = 0; struct obd_quotactl *qctl; + struct lustre_qunit_size *lqs = NULL; int ret = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; ENTRY; if (!sb_any_quota_enabled(sb)) @@ -165,7 +173,7 @@ check_cur_qunit(struct obd_device *obd, /* get fs quota usage & limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; + qctl->qc_type = QDATA_IS_GRP(qdata); ret = fsfilt_quotactl(obd, sb, qctl); if (ret) { if (ret == -ESRCH) /* no limit */ @@ -175,84 +183,133 @@ check_cur_qunit(struct obd_device *obd, GOTO(out, ret); } - if (is_blk) { + if (QDATA_IS_BLK(qdata)) { usage = qctl->qc_dqblk.dqb_curspace; limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; - qunit_sz = qctxt->lqc_bunit_sz; - tune_sz = qctxt->lqc_btune_sz; - - LASSERT(!(qunit_sz % QUOTABLOCK_SIZE)); } else { usage = qctl->qc_dqblk.dqb_curinodes; limit = qctl->qc_dqblk.dqb_ihardlimit; - qunit_sz = qctxt->lqc_iunit_sz; - tune_sz = qctxt->lqc_itune_sz; } - /* ignore the no quota limit case */ + /* ignore the no quota limit case; and it can avoid creating + * unnecessary lqs for uid/gid */ if (!limit) GOTO(out, ret = 0); + search_lqs: + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (!lqs) { + CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n"); + ret = quota_create_lqs(qdata, NULL, qctxt, &lqs); + if (ret == -EALREADY) + goto search_lqs; + if (ret < 0) + GOTO (out, ret); + } + spin_lock(&lqs->lqs_lock); + + if (QDATA_IS_BLK(qdata)) { + qunit_sz = lqs->lqs_bunit_sz; + tune_sz = lqs->lqs_btune_sz; + pending_write = lqs->lqs_bwrite_pending * CFS_PAGE_SIZE; + record = lqs->lqs_blk_rec; + LASSERT(!(qunit_sz % QUOTABLOCK_SIZE)); + } else { + /* we didn't need change inode qunit size now */ + qunit_sz = lqs->lqs_iunit_sz; + tune_sz = lqs->lqs_itune_sz; + pending_write = lqs->lqs_iwrite_pending; + record = lqs->lqs_ino_rec; + } + /* we don't count the MIN_QLIMIT */ - if ((limit == MIN_QLIMIT && !is_blk) || - (toqb(limit) == MIN_QLIMIT && is_blk)) + if ((limit == MIN_QLIMIT && !QDATA_IS_BLK(qdata)) || + (toqb(limit) == MIN_QLIMIT && QDATA_IS_BLK(qdata))) limit = 0; + usage += pending_write; + limit_org = limit; + /* when a releasing quota req is sent, before it returned + limit is assigned a small value. limit will overflow */ + if (limit + record < 0) + usage -= record; + else + limit += record; + LASSERT(qdata->qd_count == 0); if (limit <= usage + tune_sz) { - while (qdata->qd_count + limit <= usage + tune_sz) + while (qdata->qd_count + limit <= + usage + tune_sz) qdata->qd_count += qunit_sz; ret = 1; - } else if (limit > usage + qunit_sz + tune_sz) { - while (limit - qdata->qd_count > usage + qunit_sz + tune_sz) + } else if (limit > usage + qunit_sz + tune_sz && + limit_org > qdata->qd_count + qunit_sz) { + while (limit - qdata->qd_count > usage + qunit_sz + tune_sz && + limit_org > qdata->qd_count + qunit_sz) qdata->qd_count += qunit_sz; ret = 2; } + CDEBUG(D_QUOTA, "type: %c, limit: "LPU64", usage: "LPU64 + ", pending_write: "LPU64", record: "LPD64 + ", qunit_sz: %lu, tune_sz: %lu, ret: %d.\n", + QDATA_IS_BLK(qdata) ? 'b' : 'i', limit, usage, pending_write, + record, qunit_sz, tune_sz, ret); LASSERT(ret == 0 || qdata->qd_count); + + if (ret > 0) { + quota_compute_lqs(qdata, lqs, 1, (ret == 1) ? 1 : 0); + /* when this qdata returned from mds, it will call lqs_putref */ + lqs_getref(lqs); + } + + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); EXIT; -out: + out: OBD_FREE_PTR(qctl); return ret; } /* compute the remaining quota for certain gid or uid b=11693 */ int compute_remquota(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata) + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, + int isblk) { struct super_block *sb = qctxt->lqc_sb; __u64 usage, limit; struct obd_quotactl *qctl; int ret = QUOTA_RET_OK; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; ENTRY; if (!sb_any_quota_enabled(sb)) RETURN(QUOTA_RET_NOQUOTA); /* ignore root user */ - if (qdata->qd_id == 0 && qdata_type == USRQUOTA) + if (qdata->qd_id == 0 && QDATA_IS_GRP(qdata) == USRQUOTA) RETURN(QUOTA_RET_NOLIMIT); OBD_ALLOC_PTR(qctl); - if (qctl == NULL) + if (qctl == NULL) RETURN(-ENOMEM); /* get fs quota usage & limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; + qctl->qc_type = QDATA_IS_GRP(qdata); ret = fsfilt_quotactl(obd, sb, qctl); if (ret) { if (ret == -ESRCH) /* no limit */ ret = QUOTA_RET_NOLIMIT; else - CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", + CDEBUG(D_QUOTA, "can't get fs quota usage! (rc:%d)", ret); GOTO(out, ret); } - usage = qctl->qc_dqblk.dqb_curspace; - limit = qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS; + usage = isblk ? qctl->qc_dqblk.dqb_curspace : + qctl->qc_dqblk.dqb_curinodes; + limit = isblk ? qctl->qc_dqblk.dqb_bhardlimit << QUOTABLOCK_BITS : + qctl->qc_dqblk.dqb_ihardlimit; if (!limit){ /* no limit */ ret = QUOTA_RET_NOLIMIT; GOTO(out, ret); @@ -328,9 +385,31 @@ insert_qunit_nolock(struct lustre_quota_ctxt *qctxt, struct lustre_qunit *qunit) list_add(&qunit->lq_hash, head); } +static void compute_lqs_after_removing_qunit(struct lustre_qunit *qunit) +{ + struct lustre_qunit_size *lqs = NULL; + + quota_search_lqs(&qunit->lq_data, NULL, qunit->lq_ctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + if (qunit->lq_opc == QUOTA_DQACQ) + quota_compute_lqs(&qunit->lq_data, lqs, 0, 1); + if (qunit->lq_opc == QUOTA_DQREL) + quota_compute_lqs(&qunit->lq_data, lqs, 0, 0); + spin_unlock(&lqs->lqs_lock); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + /* this is for check_cur_qunit */ + lqs_putref(lqs); + } + +} + static void remove_qunit_nolock(struct lustre_qunit *qunit) { LASSERT(!list_empty(&qunit->lq_hash)); + LASSERT_SPIN_LOCKED(&qunit_hash_lock); + list_del_init(&qunit->lq_hash); } @@ -345,18 +424,19 @@ struct qunit_waiter { /* FIXME check if this mds is the master of specified id */ -static int -is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, +static int +is_master(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, unsigned int id, int type) { return qctxt->lqc_handler ? 1 : 0; } -static int +static int schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int opc, int wait); -static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, +static int split_before_schedule_dqacq(struct obd_device *obd, + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, int opc, int wait) { int rc = 0; @@ -366,17 +446,17 @@ static int split_before_schedule_dqacq(struct obd_device *obd, struct lustre_quo LASSERT(qdata && qdata->qd_count); QDATA_DEBUG(qdata, "%s quota split.\n", - (qdata->qd_flags & QUOTA_IS_BLOCK) ? "block" : "inode"); - if (qdata->qd_flags & QUOTA_IS_BLOCK) - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * + QDATA_IS_BLK(qdata) ? "block" : "inode"); + if (QDATA_IS_BLK(qdata)) + factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * qctxt->lqc_bunit_sz; else - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * + factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * qctxt->lqc_iunit_sz; if (qctxt->lqc_import && should_translate_quota(qctxt->lqc_import) && qdata->qd_count > factor) { - tmp_qdata = *qdata; + tmp_qdata = *qdata; tmp_qdata.qd_count = factor; qdata->qd_count -= tmp_qdata.qd_count; QDATA_DEBUG((&tmp_qdata), "be split.\n"); @@ -396,24 +476,19 @@ dqacq_completion(struct obd_device *obd, { struct lustre_qunit *qunit = NULL; struct super_block *sb = qctxt->lqc_sb; - unsigned long qunit_sz; struct qunit_waiter *qw, *tmp; int err = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; - __u64 qd_tmp = qdata->qd_count; - unsigned long div_r; + struct quota_adjust_qunit *oqaq = NULL; + int rc1 = 0; ENTRY; LASSERT(qdata); - qunit_sz = is_blk ? qctxt->lqc_bunit_sz : qctxt->lqc_iunit_sz; - div_r = do_div(qd_tmp, qunit_sz); - LASSERTF(!div_r, "qunit_sz: %lu, return qunit_sz: "LPU64"\n", - qunit_sz, qd_tmp); + QDATA_DEBUG(qdata, "obd(%s): complete %s quota req\n", + obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); /* update local operational quota file */ if (rc == 0) { - __u32 count = QUSG(qdata->qd_count, is_blk); + __u32 count = QUSG(qdata->qd_count, QDATA_IS_BLK(qdata)); struct obd_quotactl *qctl; __u64 *hardlimit; @@ -426,14 +501,14 @@ dqacq_completion(struct obd_device *obd, * set fs quota limit */ qctl->qc_cmd = Q_GETQUOTA; qctl->qc_id = qdata->qd_id; - qctl->qc_type = qdata_type; + qctl->qc_type = QDATA_IS_GRP(qdata); err = fsfilt_quotactl(obd, sb, qctl); if (err) { CERROR("error get quota fs limit! (rc:%d)\n", err); GOTO(out_mem, err); } - if (is_blk) { + if (QDATA_IS_BLK(qdata)) { qctl->qc_dqblk.dqb_valid = QIF_BLIMITS; hardlimit = &qctl->qc_dqblk.dqb_bhardlimit; } else { @@ -441,19 +516,14 @@ dqacq_completion(struct obd_device *obd, hardlimit = &qctl->qc_dqblk.dqb_ihardlimit; } + CDEBUG(D_QUOTA, "hardlimt: "LPU64"\n", *hardlimit); switch (opc) { case QUOTA_DQACQ: - CDEBUG(D_QUOTA, "%s(acq):count: %d, hardlimt: "LPU64 - ",type: %s.\n", obd->obd_name, count, *hardlimit, - qdata_type ? "grp": "usr"); INC_QLIMIT(*hardlimit, count); break; case QUOTA_DQREL: - CDEBUG(D_QUOTA, "%s(rel):count: %d, hardlimt: "LPU64 - ",type: %s.\n", obd->obd_name, count, *hardlimit, - qdata_type ? "grp": "usr"); - LASSERTF(count < *hardlimit, - "count: %d, hardlimit: "LPU64".\n", + LASSERTF(count < *hardlimit, + "count: %d, hardlimit: "LPU64".\n", count, *hardlimit); *hardlimit -= count; break; @@ -494,6 +564,9 @@ out: LASSERT(opc == qunit->lq_opc); remove_qunit_nolock(qunit); + spin_unlock(&qunit_hash_lock); + + compute_lqs_after_removing_qunit(qunit); /* wake up all waiters */ list_for_each_entry_safe(qw, tmp, &qunit->lq_waiters, qw_entry) { @@ -502,27 +575,38 @@ out: wake_up(&qw->qw_waitq); } - spin_unlock(&qunit_hash_lock); - qunit_put(qunit); /* don't reschedule in such cases: - * - acq/rel failure, but not for quota recovery. + * - acq/rel failure and qunit isn't changed, + * but not for quota recovery. * - local dqacq/dqrel. * - local disk io failure. */ - if (err || (rc && rc != -EBUSY) || - is_master(obd, qctxt, qdata->qd_id, qdata_type)) + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + RETURN(-ENOMEM); + qdata_to_oqaq(qdata, oqaq); + /* adjust the qunit size in slaves */ + rc1 = quota_adjust_slave_lqs(oqaq, qctxt); + OBD_FREE_PTR(oqaq); + if (rc1 < 0) { + CERROR("adjust slave's qunit size failed!(rc:%d)\n", rc1); + RETURN(rc1); + } + if (err || (rc && rc != -EBUSY && rc1 == 0) || + is_master(obd, qctxt, qdata->qd_id, QDATA_IS_GRP(qdata))) RETURN(err); /* reschedule another dqacq/dqrel if needed */ qdata->qd_count = 0; - rc = check_cur_qunit(obd, qctxt, qdata); - if (rc > 0) { + qdata->qd_flags &= LQUOTA_QUNIT_FLAGS; + rc1 = check_cur_qunit(obd, qctxt, qdata); + if (rc1 > 0) { int opc; - opc = rc == 1 ? QUOTA_DQACQ : QUOTA_DQREL; - rc = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0); - QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc); + opc = rc1 == 1 ? QUOTA_DQACQ : QUOTA_DQREL; + rc1 = split_before_schedule_dqacq(obd, qctxt, qdata, opc, 0); + QDATA_DEBUG(qdata, "reschedudle opc(%d) rc(%d)\n", opc, rc1); } RETURN(err); } @@ -539,38 +623,57 @@ static int dqacq_interpret(struct ptlrpc_request *req, void *data, int rc) struct lustre_qunit *qunit = aa->aa_qunit; struct obd_device *obd = req->rq_import->imp_obd; struct qunit_data *qdata = NULL; - struct qunit_data_old *qdata_old = NULL; + int rc1 = 0; ENTRY; LASSERT(req); LASSERT(req->rq_import); - if ((req->rq_import->imp_connect_data.ocd_connect_flags & OBD_CONNECT_QUOTA64) && - !OBD_FAIL_CHECK(OBD_FAIL_QUOTA_QD_COUNT_32BIT)) { - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); - qdata = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(*qdata), lustre_swab_qdata); - } else { - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - qdata_old = lustre_swab_reqbuf(req, REPLY_REC_OFF, sizeof(struct qunit_data_old), - lustre_swab_qdata_old); - qdata = lustre_quota_old_to_new(qdata_old); - } - if (qdata == NULL) { - DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data"); - RETURN(-EPROTO); + + /* there are several forms of qunit(historic causes), so we need to + * adjust qunit from slaves to the same form here */ + OBD_ALLOC(qdata, sizeof(struct qunit_data)); + if (!qdata) + RETURN(-ENOMEM); + rc1 = quota_get_qdata(req, qdata, QUOTA_REPLY, QUOTA_IMPORT); + if (rc1 < 0) { + DEBUG_REQ(D_ERROR, req, "error unpacking qunit_data\n"); + GOTO(exit, rc = -EPROTO); } - LASSERT(qdata->qd_id == qunit->lq_data.qd_id && - (qdata->qd_flags & QUOTA_IS_GRP) == (qunit->lq_data.qd_flags & QUOTA_IS_GRP) && - (qdata->qd_count == qunit->lq_data.qd_count || - qdata->qd_count == 0)); + QDATA_DEBUG(qdata, "qdata: interpret rc(%d).\n", rc); + QDATA_DEBUG((&qunit->lq_data), "lq_data: \n"); - QDATA_DEBUG(qdata, "%s interpret rc(%d).\n", - lustre_msg_get_opc(req->rq_reqmsg) == QUOTA_DQACQ ? - "DQACQ" : "DQREL", rc); + if (qdata->qd_id != qunit->lq_data.qd_id || + OBD_FAIL_CHECK_ONCE(OBD_FAIL_QUOTA_RET_QDATA)) { + CDEBUG(D_ERROR, "the returned qd_id isn't expected!" + "(qdata: %u, lq_data: %u)\n", qdata->qd_id, + qunit->lq_data.qd_id); + qdata->qd_id = qunit->lq_data.qd_id; + rc = -EPROTO; + } + if (QDATA_IS_GRP(qdata) != QDATA_IS_GRP(&qunit->lq_data)) { + CDEBUG(D_ERROR, "the returned grp/usr isn't expected!" + "(qdata: %u, lq_data: %u)\n", qdata->qd_flags, + qunit->lq_data.qd_flags); + if (QDATA_IS_GRP(&qunit->lq_data)) + QDATA_SET_GRP(qdata); + else + QDATA_CLR_GRP(qdata); + rc = -EPROTO; + } + if (qdata->qd_count > qunit->lq_data.qd_count) { + CDEBUG(D_ERROR, "the returned qd_count isn't expected!" + "(qdata: "LPU64", lq_data: "LPU64")\n", qdata->qd_count, + qunit->lq_data.qd_count); + rc = -EPROTO; + } rc = dqacq_completion(obd, qctxt, qdata, rc, lustre_msg_get_opc(req->rq_reqmsg)); +exit: + OBD_FREE(qdata, sizeof(struct qunit_data)); + RETURN(rc); } @@ -593,9 +696,8 @@ schedule_dqacq(struct obd_device *obd, struct qunit_waiter qw; struct l_wait_info lwi = { 0 }; struct ptlrpc_request *req; - struct qunit_data *reqdata; struct dqacq_async_args *aa; - int size[2] = { sizeof(struct ptlrpc_body), sizeof(*reqdata) }; + int size[2] = { sizeof(struct ptlrpc_body), 0 }; struct obd_import *imp = NULL; unsigned long factor; int rc = 0; @@ -607,18 +709,34 @@ schedule_dqacq(struct obd_device *obd, if ((empty = alloc_qunit(qctxt, qdata, opc)) == NULL) RETURN(-ENOMEM); - + spin_lock(&qunit_hash_lock); qunit = dqacq_in_flight(qctxt, qdata); if (qunit) { - if (wait) + struct lustre_qunit_size *lqs = NULL; + + if (wait) list_add_tail(&qw.qw_entry, &qunit->lq_waiters); spin_unlock(&qunit_hash_lock); - free_qunit(empty); + + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + quota_compute_lqs(qdata, lqs, 0, + (opc == QUOTA_DQACQ) ? 1 : 0); + spin_unlock(&lqs->lqs_lock); + /* this is for quota_search_lqs */ + lqs_putref(lqs); + /* this is for check_cur_qunit */ + lqs_putref(lqs); + } else { + CDEBUG(D_ERROR, "Can't find the lustre qunit size!\n"); + } + goto wait_completion; - } + } qunit = empty; insert_qunit_nolock(qctxt, qunit); if (wait) @@ -627,8 +745,10 @@ schedule_dqacq(struct obd_device *obd, LASSERT(qunit); + QDATA_DEBUG(qdata, "obd(%s): send %s quota req\n", + obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); /* master is going to dqacq/dqrel from itself */ - if (is_master(obd, qctxt, qdata->qd_id, qdata->qd_flags & QUOTA_IS_GRP)) { + if (is_master(obd, qctxt, qdata->qd_id, QDATA_IS_GRP(qdata))) { int rc2; QDATA_DEBUG(qdata, "local %s.\n", opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); @@ -645,9 +765,10 @@ schedule_dqacq(struct obd_device *obd, if (wait) list_del_init(&qw.qw_entry); remove_qunit_nolock(qunit); - free_qunit(empty); qunit = NULL; spin_unlock(&qunit_hash_lock); + compute_lqs_after_removing_qunit(qunit); + free_qunit(empty); RETURN(-EAGAIN); } else { imp = class_import_get(qctxt->lqc_import); @@ -656,6 +777,9 @@ schedule_dqacq(struct obd_device *obd, /* build dqacq/dqrel request */ LASSERT(imp); + size[1] = quota_get_qunit_data_size(imp-> + imp_connect_data.ocd_connect_flags); + req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, opc, 2, size, NULL); if (!req) { @@ -664,33 +788,21 @@ schedule_dqacq(struct obd_device *obd, RETURN(-ENOMEM); } - if (qdata->qd_flags & QUOTA_IS_BLOCK) - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * + if (QDATA_IS_BLK(qdata)) + factor = MAX_QUOTA_COUNT32 / qctxt->lqc_bunit_sz * qctxt->lqc_bunit_sz; else - factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * + factor = MAX_QUOTA_COUNT32 / qctxt->lqc_iunit_sz * qctxt->lqc_iunit_sz; LASSERTF(!should_translate_quota(imp) || qdata->qd_count <= factor, "qd_count: "LPU64"; should_translate_quota: %d.\n", qdata->qd_count, should_translate_quota(imp)); - if (should_translate_quota(imp)) - { - struct qunit_data_old *reqdata_old, *tmp; - - reqdata_old = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, - sizeof(*reqdata_old)); - tmp = lustre_quota_new_to_old(qdata); - *reqdata_old = *tmp; - size[1] = sizeof(*reqdata_old); - CDEBUG(D_QUOTA, "qd_count is 32bit!\n"); - } else { - reqdata = lustre_msg_buf(req->rq_reqmsg, REPLY_REC_OFF, - sizeof(*reqdata)); - *reqdata = *qdata; - size[1] = sizeof(*reqdata); - CDEBUG(D_QUOTA, "qd_count is 64bit!\n"); + rc = quota_copy_qdata(req, qdata, QUOTA_REQUEST, QUOTA_IMPORT); + if (rc < 0) { + CDEBUG(D_ERROR, "Can't pack qunit_data\n"); + RETURN(-EPROTO); } ptlrpc_req_set_repsize(req, 2, size); class_import_put(imp); @@ -703,7 +815,7 @@ schedule_dqacq(struct obd_device *obd, req->rq_interpret_reply = dqacq_interpret; ptlrpcd_add_req(req); - QDATA_DEBUG(qdata, "%s scheduled.\n", + QDATA_DEBUG(qdata, "%s scheduled.\n", opc == QUOTA_DQACQ ? "DQACQ" : "DQREL"); wait_completion: if (wait && qunit) { @@ -734,9 +846,9 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, for (i = 0; i < MAXQUOTAS; i++) { qdata[i].qd_id = id[i]; - qdata[i].qd_flags = 0; - qdata[i].qd_flags |= i; - qdata[i].qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; + qdata[i].qd_flags = i; + if (isblk) + QDATA_SET_BLK(&qdata[i]); qdata[i].qd_count = 0; ret = check_cur_qunit(obd, qctxt, &qdata[i]); @@ -754,7 +866,7 @@ qctxt_adjust_qunit(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, RETURN(rc); } -int +int qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, unsigned short type, int isblk) { @@ -769,9 +881,9 @@ qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, qw.qw_rc = 0; qdata.qd_id = id; - qdata.qd_flags = 0; - qdata.qd_flags |= type; - qdata.qd_flags |= isblk ? QUOTA_IS_BLOCK : 0; + qdata.qd_flags = type; + if (isblk) + QDATA_SET_BLK(&qdata); qdata.qd_count = 0; spin_lock(&qunit_hash_lock); @@ -798,6 +910,8 @@ qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, int rc = 0; ENTRY; + LASSERT(qctxt); + rc = ptlrpcd_addref(); if (rc) RETURN(rc); @@ -808,46 +922,65 @@ qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, qctxt->lqc_sb = sb; qctxt->lqc_import = NULL; qctxt->lqc_recovery = 0; + qctxt->lqc_switch_qs = 1; /* Change qunit size in default setting */ + qctxt->lqc_cqs_boundary_factor = 4; + qctxt->lqc_cqs_least_bunit = PTLRPC_MAX_BRW_SIZE; + qctxt->lqc_cqs_least_iunit = 1; + qctxt->lqc_cqs_qs_factor = 2; qctxt->lqc_atype = 0; qctxt->lqc_status= 0; qctxt->lqc_bunit_sz = default_bunit_sz; qctxt->lqc_btune_sz = default_bunit_sz / 100 * default_btune_ratio; qctxt->lqc_iunit_sz = default_iunit_sz; qctxt->lqc_itune_sz = default_iunit_sz * default_itune_ratio / 100; - qctxt->lqc_limit_sz = default_limit_sz; + qctxt->lqc_switch_seconds = 300; /* enlarging will wait 5 minutes + * after the last shrinking */ + rc = lustre_hash_init(&LQC_HASH_BODY(qctxt), "LQS_HASH",128, + &lqs_hash_operations); + if (rc) { + CDEBUG(D_ERROR, "initialize hash lqs on ost error!\n"); + lustre_hash_exit(&LQC_HASH_BODY(qctxt)); + } spin_unlock(&qctxt->lqc_lock); - RETURN(0); + RETURN(rc); } void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force) { struct lustre_qunit *qunit, *tmp; struct qunit_waiter *qw, *tmp2; + struct list_head tmp_list; int i; ENTRY; - spin_lock(&qunit_hash_lock); + INIT_LIST_HEAD(&tmp_list); + spin_lock(&qunit_hash_lock); for (i = 0; i < NR_DQHASH; i++) { list_for_each_entry_safe(qunit, tmp, &qunit_hash[i], lq_hash) { if (qunit->lq_ctxt != qctxt) continue; - remove_qunit_nolock(qunit); - /* wake up all waiters */ - list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters, - qw_entry) { - list_del_init(&qw->qw_entry); - qw->qw_rc = 0; - wake_up(&qw->qw_waitq); - } - qunit_put(qunit); + list_add(&qunit->lq_hash, &tmp_list); } } - spin_unlock(&qunit_hash_lock); + list_for_each_entry_safe(qunit, tmp, &tmp_list, lq_hash) { + list_del_init(&qunit->lq_hash); + compute_lqs_after_removing_qunit(qunit); + /* wake up all waiters */ + list_for_each_entry_safe(qw, tmp2, &qunit->lq_waiters, + qw_entry) { + list_del_init(&qw->qw_entry); + qw->qw_rc = 0; + wake_up(&qw->qw_waitq); + } + qunit_put(qunit); + } + + lustre_hash_exit(&LQC_HASH_BODY(qctxt)); ptlrpcd_decref(); EXIT; @@ -865,7 +998,7 @@ static int qslave_recovery_main(void *arg) struct qslave_recov_thread_data *data = arg; struct obd_device *obd = data->obd; struct lustre_quota_ctxt *qctxt = data->qctxt; - unsigned int type; + unsigned int type; int rc = 0; ENTRY; @@ -892,7 +1025,7 @@ static int qslave_recovery_main(void *arg) LASSERT(dqopt->files[type] != NULL); INIT_LIST_HEAD(&id_list); -#ifndef KERNEL_SUPPORTS_QUOTA_READ +#ifndef KERNEL_SUPPORTS_QUOTA_READ rc = fsfilt_qids(obd, dqopt->files[type], NULL, type, &id_list); #else rc = fsfilt_qids(obd, NULL, dqopt->files[type], type, &id_list); @@ -910,9 +1043,8 @@ static int qslave_recovery_main(void *arg) goto free; qdata.qd_id = dqid->di_id; - qdata.qd_flags = 0; - qdata.qd_flags |= type; - qdata.qd_flags |= QUOTA_IS_BLOCK; + qdata.qd_flags = type; + QDATA_SET_BLK(&qdata); qdata.qd_count = 0; ret = check_cur_qunit(obd, qctxt, &qdata); @@ -924,7 +1056,7 @@ static int qslave_recovery_main(void *arg) rc = 0; if (rc) - CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, + CDEBUG(rc == -EBUSY ? D_QUOTA : D_ERROR, "qslave recovery failed! (id:%d type:%d " " rc:%d)\n", dqid->di_id, type, rc); free: @@ -936,7 +1068,7 @@ free: RETURN(rc); } -void +void qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt) { struct qslave_recov_thread_data data; diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index 40804f7..30ae7c9 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -132,7 +132,8 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) } break; case Q_SETQUOTA: - qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, + /* currently, it is only used for nullifying the quota */ + qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, oqctl->qc_id, oqctl->qc_type, 1); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); @@ -154,14 +155,14 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0); /* There might be a pending dqacq/dqrel (which is going to - * clear stale limits on slave). we should wait for it's + * clear stale limits on slave). we should wait for it's * completion then initialize limits */ - qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, + qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, oqctl->qc_id, oqctl->qc_type, 1); if (!oqctl->qc_dqblk.dqb_bhardlimit) goto adjust; - + LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index ab3b619..98f82a8 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -49,30 +49,20 @@ /* quota proc file handling functions */ #ifdef LPROCFS -int lprocfs_rd_bunit(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_bunit_sz); -} -EXPORT_SYMBOL(lprocfs_rd_bunit); -int lprocfs_rd_iunit(char *page, char **start, off_t off, int count, - int *eof, void *data) +int lprocfs_quota_rd_bunit(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *obd = (struct obd_device *)data; LASSERT(obd != NULL); - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_iunit_sz); + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_bunit_sz); } -EXPORT_SYMBOL(lprocfs_rd_iunit); +EXPORT_SYMBOL(lprocfs_quota_rd_bunit); -int lprocfs_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data) +int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; int val, rc; @@ -89,10 +79,21 @@ int lprocfs_wr_bunit(struct file *file, const char *buffer, obd->u.obt.obt_qctxt.lqc_bunit_sz = val; return count; } -EXPORT_SYMBOL(lprocfs_wr_bunit); +EXPORT_SYMBOL(lprocfs_quota_wr_bunit); -int lprocfs_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data) +int lprocfs_quota_rd_btune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_btune_sz); +} +EXPORT_SYMBOL(lprocfs_quota_rd_btune); + +int lprocfs_quota_wr_btune(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; int val, rc; @@ -102,38 +103,58 @@ int lprocfs_wr_iunit(struct file *file, const char *buffer, if (rc) return rc; - if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz) + if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || + val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) return -EINVAL; - obd->u.obt.obt_qctxt.lqc_iunit_sz = val; + obd->u.obt.obt_qctxt.lqc_btune_sz = val; return count; } -EXPORT_SYMBOL(lprocfs_wr_iunit); +EXPORT_SYMBOL(lprocfs_quota_wr_btune); -int lprocfs_rd_btune(char *page, char **start, off_t off, int count, - int *eof, void *data) +int lprocfs_quota_rd_iunit(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *obd = (struct obd_device *)data; LASSERT(obd != NULL); - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_btune_sz); + return snprintf(page, count, "%lu\n", + obd->u.obt.obt_qctxt.lqc_iunit_sz); } -EXPORT_SYMBOL(lprocfs_rd_btune); +EXPORT_SYMBOL(lprocfs_quota_rd_iunit); -int lprocfs_rd_itune(char *page, char **start, off_t off, int count, - int *eof, void *data) +int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; + int val, rc; LASSERT(obd != NULL); - return snprintf(page, count, "%lu\n", + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= obd->u.obt.obt_qctxt.lqc_itune_sz) + return -EINVAL; + + obd->u.obt.obt_qctxt.lqc_iunit_sz = val; + return count; +} +EXPORT_SYMBOL(lprocfs_quota_wr_iunit); + +int lprocfs_quota_rd_itune(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%lu\n", obd->u.obt.obt_qctxt.lqc_itune_sz); } -EXPORT_SYMBOL(lprocfs_rd_itune); +EXPORT_SYMBOL(lprocfs_quota_rd_itune); -int lprocfs_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data) +int lprocfs_quota_wr_itune(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; int val, rc; @@ -142,18 +163,29 @@ int lprocfs_wr_btune(struct file *file, const char *buffer, rc = lprocfs_write_helper(buffer, count, &val); if (rc) return rc; - - if (val <= QUOTABLOCK_SIZE * MIN_QLIMIT || val % QUOTABLOCK_SIZE || - val >= obd->u.obt.obt_qctxt.lqc_bunit_sz) + + if (val <= MIN_QLIMIT || + val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) return -EINVAL; - obd->u.obt.obt_qctxt.lqc_btune_sz = val; + obd->u.obt.obt_qctxt.lqc_itune_sz = val; return count; } -EXPORT_SYMBOL(lprocfs_wr_btune); +EXPORT_SYMBOL(lprocfs_quota_wr_itune); + +int lprocfs_quota_rd_switch_seconds(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + LASSERT(obd != NULL); + + return snprintf(page, count, "%d\n", + obd->u.obt.obt_qctxt.lqc_switch_seconds); +} +EXPORT_SYMBOL(lprocfs_quota_rd_switch_seconds); -int lprocfs_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data) +int lprocfs_quota_wr_switch_seconds(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; int val, rc; @@ -162,22 +194,21 @@ int lprocfs_wr_itune(struct file *file, const char *buffer, rc = lprocfs_write_helper(buffer, count, &val); if (rc) return rc; - - if (val <= MIN_QLIMIT || - val >= obd->u.obt.obt_qctxt.lqc_iunit_sz) + + if (val <= 10) return -EINVAL; - obd->u.obt.obt_qctxt.lqc_itune_sz = val; + obd->u.obt.obt_qctxt.lqc_switch_seconds = val; return count; } -EXPORT_SYMBOL(lprocfs_wr_itune); +EXPORT_SYMBOL(lprocfs_quota_wr_switch_seconds); #define USER_QUOTA 1 #define GROUP_QUOTA 2 #define MAX_STYPE_SIZE 5 -int lprocfs_rd_type(char *page, char **start, off_t off, int count, - int *eof, void *data) +int lprocfs_quota_rd_type(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *obd = (struct obd_device *)data; char stype[MAX_STYPE_SIZE + 1] = ""; @@ -216,9 +247,9 @@ int lprocfs_rd_type(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n", stype); } -EXPORT_SYMBOL(lprocfs_rd_type); +EXPORT_SYMBOL(lprocfs_quota_rd_type); -static int auto_quota_on(struct obd_device *obd, int type, +static int auto_quota_on(struct obd_device *obd, int type, struct super_block *sb, int is_master) { struct obd_quotactl *oqctl; @@ -248,7 +279,7 @@ static int auto_quota_on(struct obd_device *obd, int type, /* turn on cluster wide quota */ rc = mds_admin_quota_on(obd, oqctl); if (rc) { - CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, + CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, "auto-enable admin quota failed. rc=%d\n", rc); GOTO(out_pop, rc); } @@ -256,7 +287,7 @@ local_quota: /* turn on local quota */ rc = fsfilt_quotactl(obd, sb, oqctl); if (rc) { - CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, + CDEBUG(rc == -ENOENT ? D_QUOTA : D_ERROR, "auto-enable local quota failed. rc=%d\n", rc); if (is_master) mds_quota_off(obd, oqctl); @@ -270,8 +301,8 @@ out_pop: RETURN(rc); } -int lprocfs_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data) +int lprocfs_quota_wr_type(struct file *file, const char *buffer, + unsigned long count, void *data) { struct obd_device *obd = (struct obd_device *)data; struct obd_device_target *obt = &obd->u.obt; @@ -290,14 +321,14 @@ int lprocfs_wr_type(struct file *file, const char *buffer, int rc; switch (stype[i]) { - case 'u' : + case 'u' : type |= USER_QUOTA; break; - case 'g' : + case 'g' : type |= GROUP_QUOTA; break; /* quota version specifiers */ - case '1' : + case '1' : if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) break; @@ -307,7 +338,7 @@ int lprocfs_wr_type(struct file *file, const char *buffer, return rc; } break; - case '2' : + case '2' : if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) break; @@ -331,42 +362,13 @@ int lprocfs_wr_type(struct file *file, const char *buffer, auto_quota_on(obd, type - 1, obt->obt_sb, 1); else if (!strcmp(obd->obd_type->typ_name, LUSTRE_OST_NAME)) auto_quota_on(obd, type - 1, obt->obt_sb, 0); - else + else return -EFAULT; return count; } -EXPORT_SYMBOL(lprocfs_wr_type); +EXPORT_SYMBOL(lprocfs_quota_wr_type); -int lprocfs_filter_rd_limit(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - LASSERT(obd != NULL); - - return snprintf(page, count, "%lu\n", - obd->u.obt.obt_qctxt.lqc_limit_sz); -} -EXPORT_SYMBOL(lprocfs_filter_rd_limit); - -int lprocfs_filter_wr_limit(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - int val, rc; - LASSERT(obd != NULL); - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val <= 1 << 20) - return -EINVAL; - - obd->u.obt.obt_qctxt.lqc_limit_sz = val; - return count; -} -EXPORT_SYMBOL(lprocfs_filter_wr_limit); #endif /* LPROCFS */ @@ -406,7 +408,8 @@ static int filter_quota_setinfo(struct obd_export *exp, struct obd_device *obd) imp = exp->exp_imp_reverse; if (imp) imp->imp_connect_data.ocd_connect_flags |= - (exp->exp_connect_flags & OBD_CONNECT_QUOTA64); + (exp->exp_connect_flags & + (OBD_CONNECT_QUOTA64 | OBD_CONNECT_CHANGE_QS)); /* start quota slave recovery thread. (release high limits) */ qslave_start_recovery(obd, &obd->u.obt.obt_qctxt); @@ -487,53 +490,202 @@ static int filter_quota_getflag(struct obd_device *obd, struct obdo *oa) RETURN(rc); } -static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, +static int filter_quota_acquire(struct obd_device *obd, unsigned int uid, unsigned int gid) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; int rc; ENTRY; - rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 1, 1); - RETURN(rc == -EAGAIN); + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, LQUOTA_FLAGS_BLK, 1); + RETURN(rc); } -/* check whether the left quota of certain uid and uid can satisfy a write rpc - * when need to acquire quota, return QUOTA_RET_ACQUOTA */ -static int filter_quota_check(struct obd_device *obd, unsigned int uid, - unsigned int gid, int npage) +/* check whether the left quota of certain uid and gid can satisfy a block_write + * or inode_create rpc. When need to acquire quota, return QUOTA_RET_ACQUOTA */ +static int quota_check_common(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int cycle, int isblk) { struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; int i; __u32 id[MAXQUOTAS] = { uid, gid }; - __u64 limit; struct qunit_data qdata[MAXQUOTAS]; - int rc; + int rc = 0, rc2[2] = { 0, 0 }; ENTRY; CLASSERT(MAXQUOTAS < 4); if (!sb_any_quota_enabled(qctxt->lqc_sb)) - RETURN(0); + RETURN(rc); for (i = 0; i < MAXQUOTAS; i++) { + struct lustre_qunit_size *lqs = NULL; + qdata[i].qd_id = id[i]; qdata[i].qd_flags = i; - qdata[i].qd_flags |= QUOTA_IS_BLOCK; + if (isblk) + QDATA_SET_BLK(&qdata[i]); qdata[i].qd_count = 0; - qctxt_wait_pending_dqacq(qctxt, id[i], i, 1); - rc = compute_remquota(obd, qctxt, &qdata[i]); - limit = npage * CFS_PAGE_SIZE; - if (limit < qctxt->lqc_limit_sz ) - limit = qctxt->lqc_limit_sz; - if (rc == QUOTA_RET_OK && - qdata[i].qd_count < limit) - RETURN(QUOTA_RET_ACQUOTA); + /* ignore root user */ + if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i])) + continue; + + quota_search_lqs(&qdata[i], NULL, qctxt, &lqs); + if (!lqs) + continue; + + qctxt_wait_pending_dqacq(qctxt, id[i], i, isblk); + rc2[i] = compute_remquota(obd, qctxt, &qdata[i], isblk); + spin_lock(&lqs->lqs_lock); + if (!cycle) { + rc = QUOTA_RET_INC_PENDING; + if (isblk) + lqs->lqs_bwrite_pending += count; + else + lqs->lqs_iwrite_pending += count; + } + + CDEBUG(D_QUOTA, "write pending: %lu, qd_count: "LPU64".\n", + isblk ? lqs->lqs_bwrite_pending : lqs->lqs_iwrite_pending, + qdata[i].qd_count); + if (rc2[i] == QUOTA_RET_OK) { + if (isblk && qdata[i].qd_count < + lqs->lqs_bwrite_pending * CFS_PAGE_SIZE) + rc2[i] = QUOTA_RET_ACQUOTA; + if (!isblk && qdata[i].qd_count < + lqs->lqs_iwrite_pending) + rc2[i] = QUOTA_RET_ACQUOTA; + } + + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); } + if (rc2[0] == QUOTA_RET_ACQUOTA || rc2[1] == QUOTA_RET_ACQUOTA) + RETURN(rc | QUOTA_RET_ACQUOTA); + else + RETURN(rc); +} + +static int quota_chk_acq_common(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int *pending, + int isblk, quota_acquire acquire) +{ + int rc = 0, cycle = 0; + ENTRY; + + while ((rc = quota_check_common(obd, uid, gid, count, cycle, isblk)) & + QUOTA_RET_ACQUOTA) { + + if (rc & QUOTA_RET_INC_PENDING) + *pending = 1; + + cycle++; + if (isblk) + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_HOLD_WRITE_RPC, 90); + rc = acquire(obd, uid, gid); + + /* please reference to dqacq_completion for the below */ + /* a new request is finished, try again */ + if (rc == -EAGAIN) { + CDEBUG(D_QUOTA, "finish a quota req, try again\n"); + continue; + } + + /* it is out of quota already */ + if (rc == -EDQUOT) { + CDEBUG(D_QUOTA, "out of quota, return -EDQUOT\n"); + break; + } + + /* -EBUSY and others, try 10 times */ + if (rc < 0 && cycle < 10) { + CDEBUG(D_QUOTA, "rc: %d, cycle: %d\n", rc, cycle); + cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ); + continue; + } + + CDEBUG(D_QUOTA, "exit with rc: %d\n", rc); + break; + } + + if (!cycle && rc & QUOTA_RET_INC_PENDING) + *pending = 1; + RETURN(rc); } + +static int filter_quota_check(struct obd_device *obd, unsigned int uid, + unsigned int gid, int npage, int *flag, + quota_acquire acquire) +{ + return quota_chk_acq_common(obd, uid, gid, npage, flag, LQUOTA_FLAGS_BLK, + acquire); +} + +/* when a block_write or inode_create rpc is finished, adjust the record for + * pending blocks and inodes*/ +static int quota_pending_commit(struct obd_device *obd, unsigned int uid, + unsigned int gid, int count, int isblk) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + int i; + __u32 id[MAXQUOTAS] = { uid, gid }; + struct qunit_data qdata[MAXQUOTAS]; + ENTRY; + + CLASSERT(MAXQUOTAS < 4); + if (!sb_any_quota_enabled(qctxt->lqc_sb)) + RETURN(0); + + for (i = 0; i < MAXQUOTAS; i++) { + struct lustre_qunit_size *lqs = NULL; + + qdata[i].qd_id = id[i]; + qdata[i].qd_flags = i; + if (isblk) + QDATA_SET_BLK(&qdata[i]); + qdata[i].qd_count = 0; + + if (qdata[i].qd_id == 0 && !QDATA_IS_GRP(&qdata[i])) + continue; + + quota_search_lqs(&qdata[i], NULL, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + CDEBUG(D_QUOTA, "pending: %lu, count: %d.\n", + isblk ? lqs->lqs_bwrite_pending : + lqs->lqs_iwrite_pending, count); + + if (isblk) { + if (lqs->lqs_bwrite_pending >= count) + lqs->lqs_bwrite_pending -= count; + else + CDEBUG(D_ERROR, + "there are too many blocks!\n"); + } else { + if (lqs->lqs_iwrite_pending >= count) + lqs->lqs_iwrite_pending -= count; + else + CDEBUG(D_ERROR, + "there are too many files!\n"); + } + + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); + } + } + + RETURN(0); +} + +static int filter_quota_pending_commit(struct obd_device *obd, unsigned int uid, + unsigned int gid, int npage) +{ + return quota_pending_commit(obd, uid, gid, npage, LQUOTA_FLAGS_BLK); +} + static int mds_quota_init(void) { return lustre_dquot_init(); @@ -587,6 +739,30 @@ static int mds_quota_fs_cleanup(struct obd_device *obd) up(&mds->mds_qonoff_sem); RETURN(0); } + +static int mds_quota_check(struct obd_device *obd, unsigned int uid, + unsigned int gid, int inodes, int *flag, + quota_acquire acquire) +{ + return quota_chk_acq_common(obd, uid, gid, inodes, flag, 0, acquire); +} + +static int mds_quota_acquire(struct obd_device *obd, unsigned int uid, + unsigned int gid) +{ + struct lustre_quota_ctxt *qctxt = &obd->u.obt.obt_qctxt; + int rc; + ENTRY; + + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, 0, 1); + RETURN(rc); +} + +static int mds_quota_pending_commit(struct obd_device *obd, unsigned int uid, + unsigned int gid, int inodes) +{ + return quota_pending_commit(obd, uid, gid, inodes, 0); +} #endif /* __KERNEL__ */ struct osc_quota_info { @@ -810,6 +986,9 @@ quota_interface_t mds_quota_interface = { .quota_fs_cleanup =mds_quota_fs_cleanup, .quota_recovery = mds_quota_recovery, .quota_adjust = mds_quota_adjust, + .quota_chkquota = mds_quota_check, + .quota_acquire = mds_quota_acquire, + .quota_pending_commit = mds_quota_pending_commit, }; quota_interface_t filter_quota_interface = { @@ -824,6 +1003,8 @@ quota_interface_t filter_quota_interface = { .quota_acquire = filter_quota_acquire, .quota_adjust = filter_quota_adjust, .quota_chkquota = filter_quota_check, + .quota_adjust_qunit = filter_quota_adjust_qunit, + .quota_pending_commit = filter_quota_pending_commit, }; #endif /* __KERNEL__ */ @@ -842,11 +1023,13 @@ quota_interface_t osc_quota_interface = { .quota_chkdq = osc_quota_chkdq, .quota_setdq = osc_quota_setdq, .quota_cleanup = osc_quota_cleanup, + .quota_adjust_qunit = client_quota_adjust_qunit, }; quota_interface_t lov_quota_interface = { .quota_check = lov_quota_check, .quota_ctl = lov_quota_ctl, + .quota_adjust_qunit = lov_quota_adjust_qunit, }; #ifdef __KERNEL__ diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index b12fff9..1c957f1 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -19,7 +19,7 @@ /* QUSG covnert bytes to blocks when counting block quota */ #define QUSG(count, isblk) (isblk ? toqb(count) : count) -/* This flag is set in qc_stat to distinguish if the current getquota +/* This flag is set in qc_stat to distinguish if the current getquota * operation is for quota recovery */ #define QUOTA_RECOVERING 0x01 @@ -45,10 +45,28 @@ qinfo->qi_info[1].dqi_free_entry, ## arg); #define QDATA_DEBUG(qd, fmt, arg...) \ - CDEBUG(D_QUOTA, "id(%u) type(%lu) count("LPU64") isblk(%lu):" \ - fmt, qd->qd_id, qd->qd_flags & QUOTA_IS_GRP, qd->qd_count, \ - (qd->qd_flags & QUOTA_IS_BLOCK) >> 1, \ - ## arg); + CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) isblk(%c) count("LPU64") " \ + "qd_qunit("LPU64"): " fmt, qd->qd_id, qd->qd_flags, \ + QDATA_IS_GRP(qd) ? 'g' : 'u', QDATA_IS_BLK(qd) ? 'b': 'i', \ + qd->qd_count, qd->qd_qunit, ## arg); + +#define QAQ_DEBUG(qaq, fmt, arg...) \ + CDEBUG(D_QUOTA, "id(%u) flag(%u) type(%c) bunit("LPU64") " \ + "iunit("LPU64"): " fmt, qaq->qaq_id, qaq->qaq_flags, \ + QAQ_IS_GRP(qaq) ? 'g': 'u', qaq->qaq_bunit_sz, \ + qaq->qaq_iunit_sz, ## arg); + +#define LQS_DEBUG(lqs, fmt, arg...) \ + CDEBUG(D_QUOTA, "lqs(%p) id(%u) flag(%lu) type(%c) bunit(%lu) " \ + "btune(%lu) iunit(%lu) itune(%lu) lqs_bwrite_pending(%lu) " \ + "lqs_iwrite_pending(%lu) ino_rec("LPD64") blk_rec("LPD64" )" \ + "refcount(%d): " \ + fmt, lqs, lqs->lqs_id, lqs->lqs_flags, \ + LQS_IS_GRP(lqs) ? 'g' : 'u', \ + lqs->lqs_bunit_sz, lqs->lqs_btune_sz, lqs->lqs_iunit_sz, \ + lqs->lqs_itune_sz, lqs->lqs_bwrite_pending, \ + lqs->lqs_iwrite_pending, lqs->lqs_ino_rec, \ + lqs->lqs_blk_rec, atomic_read(&lqs->lqs_refcount), ## arg); /* quota_context.c */ @@ -61,10 +79,11 @@ int qctxt_wait_pending_dqacq(struct lustre_quota_ctxt *qctxt, unsigned int id, int qctxt_init(struct lustre_quota_ctxt *qctxt, struct super_block *sb, dqacq_handler_t handler); void qctxt_cleanup(struct lustre_quota_ctxt *qctxt, int force); -void qslave_start_recovery(struct obd_device *obd, +void qslave_start_recovery(struct obd_device *obd, struct lustre_quota_ctxt *qctxt); int compute_remquota(struct obd_device *obd, - struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata); + struct lustre_quota_ctxt *qctxt, struct qunit_data *qdata, + int isblk); /* quota_master.c */ int lustre_dquot_init(void); void lustre_dquot_exit(void); @@ -87,6 +106,9 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_get_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_recovery(struct obd_device *obd); int mds_get_obd_quota(struct obd_device *obd, struct obd_quotactl *oqctl); +int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt, struct lustre_dquot + *dquot, __u32 ost_num, __u32 mdt_num, int type, + struct quota_adjust_qunit *oqaq); #endif /* quota_ctl.c */ @@ -117,4 +139,40 @@ static inline void lprocfs_quotacheck_test_init_vars } #endif +/* quota_adjust_qunit.c */ +int client_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq); +int lov_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq); +int quota_adjust_slave_lqs(struct quota_adjust_qunit *oqaq, struct + lustre_quota_ctxt *qctxt); +void qdata_to_oqaq(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq); +#ifdef __KERNEL__ +int quota_search_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return); +int quota_create_lqs(struct qunit_data *qdata, + struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + struct lustre_qunit_size **lqs_return); +void quota_compute_lqs(struct qunit_data *qdata, struct lustre_qunit_size *lqs, + int is_chk, int is_acq); + + +extern int quote_get_qdata(struct ptlrpc_request *req, struct qunit_data *qdata, + int is_req, int is_exp); +extern int quote_copy_qdata(struct ptlrpc_request *req, struct qunit_data *qdata, + int is_req, int is_exp); +int filter_quota_adjust_qunit(struct obd_export *exp, struct + quota_adjust_qunit *oqaq); +#endif + +#define LQS_BLK_DECREASE 1 +#define LQS_BLK_INCREASE 2 +#define LQS_INO_DECREASE 4 +#define LQS_INO_INCREASE 8 + + #endif diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index c112bdd..91b5719 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -198,23 +198,129 @@ static struct lustre_dquot *lustre_dqget(struct obd_device *obd, RETURN(dquot); } +static void init_oqaq(struct quota_adjust_qunit *oqaq, + struct lustre_quota_ctxt *qctxt, + qid_t id, int type) +{ + struct lustre_qunit_size *lqs = NULL; + + oqaq->qaq_id = id; + oqaq->qaq_flags = type; + quota_search_lqs(NULL, oqaq, qctxt, &lqs); + if (lqs) { + spin_lock(&lqs->lqs_lock); + oqaq->qaq_bunit_sz = lqs->lqs_bunit_sz; + oqaq->qaq_iunit_sz = lqs->lqs_iunit_sz; + oqaq->qaq_flags = lqs->lqs_flags; + spin_unlock(&lqs->lqs_lock); + lqs_putref(lqs); + } else { + CDEBUG(D_QUOTA, "Can't find the lustre qunit size!\n"); + oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz; + oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz; + } +} + +int dqacq_adjust_qunit_sz(struct obd_device *obd, qid_t id, int type, + __u32 is_blk) +{ + struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; + struct obd_device *lov_mds_obd = class_exp2obd(mds->mds_osc_exp); + struct lov_obd *lov = &lov_mds_obd->u.lov; + __u32 ost_num = lov->desc.ld_tgt_count, mdt_num = 1; + struct quota_adjust_qunit *oqaq = NULL; + unsigned int uid = 0, gid = 0; + struct lustre_quota_info *info = &mds->mds_quota_info; + struct lustre_dquot *dquot = NULL; + int adjust_res = 0; + int rc = 0; + ENTRY; + + LASSERT(mds); + dquot = lustre_dqget(obd, info, id, type); + if (IS_ERR(dquot)) + RETURN(PTR_ERR(dquot)); + + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + GOTO(out, rc = -ENOMEM); + + down(&dquot->dq_sem); + init_oqaq(oqaq, qctxt, id, type); + + rc = dquot_create_oqaq(qctxt, dquot, ost_num, mdt_num, + is_blk ? LQUOTA_FLAGS_ADJBLK : + LQUOTA_FLAGS_ADJINO, oqaq); + + if (rc < 0) { + CDEBUG(D_ERROR, "create oqaq failed! (rc:%d)\n", rc); + GOTO(out_sem, rc); + } + QAQ_DEBUG(oqaq, "show oqaq.\n") + + if (!QAQ_IS_ADJBLK(oqaq) && !QAQ_IS_ADJINO(oqaq)) + GOTO(out_sem, rc); + + /* adjust the mds slave qunit size */ + adjust_res = quota_adjust_slave_lqs(oqaq, qctxt); + if (adjust_res <= 0) { + if (adjust_res < 0) { + rc = adjust_res; + CDEBUG(D_ERROR, "adjust mds slave's qunit size failed! \ + (rc:%d)\n", rc); + } else { + CDEBUG(D_QUOTA, "qunit doesn't need to be adjusted.\n"); + } + GOTO(out_sem, rc); + } + + if (type) + gid = dquot->dq_id; + else + uid = dquot->dq_id; + + up(&dquot->dq_sem); + + rc = qctxt_adjust_qunit(obd, qctxt, uid, gid, is_blk, 0); + if (rc) { + CDEBUG(D_ERROR, "mds fail to adjust file quota! \ + (rc:%d)\n", rc); + GOTO(out, rc); + } + + /* only when block qunit is reduced, boardcast to osts */ + if ((adjust_res & LQS_BLK_DECREASE) && QAQ_IS_ADJBLK(oqaq)) + rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq); + +out: + lustre_dqput(dquot); + if (oqaq) + OBD_FREE_PTR(oqaq); + + RETURN(rc); +out_sem: + up(&dquot->dq_sem); + goto out; +} + int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) { struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; struct lustre_quota_info *info = &mds->mds_quota_info; struct lustre_dquot *dquot = NULL; __u64 *usage = NULL; __u32 hlimit = 0, slimit = 0; - __u32 qdata_type = qdata->qd_flags & QUOTA_IS_GRP; - __u32 is_blk = (qdata->qd_flags & QUOTA_IS_BLOCK) >> 1; time_t *time = NULL; unsigned int grace = 0; + struct lustre_qunit_size *lqs = NULL; int rc = 0; ENTRY; OBD_FAIL_RETURN(OBD_FAIL_OBD_DQACQ, -EIO); - dquot = lustre_dqget(obd, info, qdata->qd_id, qdata_type); + dquot = lustre_dqget(obd, info, qdata->qd_id, QDATA_IS_GRP(qdata)); if (IS_ERR(dquot)) RETURN(PTR_ERR(dquot)); @@ -229,14 +335,14 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) GOTO(out, rc = -EBUSY); } - if (is_blk) { - grace = info->qi_info[qdata_type].dqi_bgrace; + if (QDATA_IS_BLK(qdata)) { + grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_bgrace; usage = &dquot->dq_dqb.dqb_curspace; hlimit = dquot->dq_dqb.dqb_bhardlimit; slimit = dquot->dq_dqb.dqb_bsoftlimit; time = &dquot->dq_dqb.dqb_btime; } else { - grace = info->qi_info[qdata_type].dqi_igrace; + grace = info->qi_info[QDATA_IS_GRP(qdata)].dqi_igrace; usage = (__u64 *) & dquot->dq_dqb.dqb_curinodes; hlimit = dquot->dq_dqb.dqb_ihardlimit; slimit = dquot->dq_dqb.dqb_isoftlimit; @@ -252,12 +358,20 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) switch (opc) { case QUOTA_DQACQ: - if (hlimit && - QUSG(*usage + qdata->qd_count, is_blk) > hlimit) - GOTO(out, rc = -EDQUOT); + if (hlimit && + QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > hlimit) + { + if (QDATA_IS_CHANGE_QS(qdata) && + QUSG(*usage, QDATA_IS_BLK(qdata)) < hlimit) + qdata->qd_count = (hlimit - + QUSG(*usage, QDATA_IS_BLK(qdata))) + * QUOTABLOCK_SIZE; + else + GOTO(out, rc = -EDQUOT); + } if (slimit && - QUSG(*usage + qdata->qd_count, is_blk) > slimit) { + QUSG(*usage + qdata->qd_count, QDATA_IS_BLK(qdata)) > slimit) { if (*time && cfs_time_current_sec() >= *time) GOTO(out, rc = -EDQUOT); else if (!*time) @@ -275,7 +389,7 @@ int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc) *usage -= qdata->qd_count; /* (usage <= soft limit) but not (usage < soft limit) */ - if (!slimit || QUSG(*usage, is_blk) <= slimit) + if (!slimit || QUSG(*usage, QDATA_IS_BLK(qdata)) <= slimit) *time = 0; break; default: @@ -288,6 +402,36 @@ out: up(&dquot->dq_sem); up(&mds->mds_qonoff_sem); lustre_dqput(dquot); + if (rc != -EDQUOT) + dqacq_adjust_qunit_sz(obd, qdata->qd_id, QDATA_IS_GRP(qdata), QDATA_IS_BLK(qdata)); + + quota_search_lqs(qdata, NULL, qctxt, &lqs); + if (QDATA_IS_BLK(qdata)) { + if (!lqs) { + CDEBUG(D_INFO, "Can't find the lustre qunit size!\n"); + qdata->qd_qunit = qctxt->lqc_bunit_sz; + } else { + spin_lock(&lqs->lqs_lock); + qdata->qd_qunit = lqs->lqs_bunit_sz; + spin_unlock(&lqs->lqs_lock); + } + QDATA_SET_ADJBLK(qdata); + } else { + if (!lqs) { + CDEBUG(D_INFO, "Can't find the lustre qunit size!\n"); + qdata->qd_qunit = qctxt->lqc_iunit_sz; + } else { + spin_lock(&lqs->lqs_lock); + qdata->qd_qunit = lqs->lqs_iunit_sz; + spin_unlock(&lqs->lqs_lock); + } + QDATA_SET_ADJINO(qdata); + } + + QDATA_DEBUG(qdata, "alloc/release qunit in dqacq_handler\n"); + if (lqs) + lqs_putref(lqs); + return rc; } @@ -778,10 +922,125 @@ out: RETURN(rc); } +int dquot_create_oqaq(struct lustre_quota_ctxt *qctxt, + struct lustre_dquot *dquot, __u32 ost_num, + __u32 mdt_num, int type, + struct quota_adjust_qunit *oqaq) +{ + __u64 bunit_curr_o, iunit_curr_o; + unsigned long shrink_qunit_limit = qctxt->lqc_cqs_boundary_factor; + unsigned long cqs_factor = qctxt->lqc_cqs_qs_factor; + __u64 blimit = dquot->dq_dqb.dqb_bhardlimit ? + dquot->dq_dqb.dqb_bhardlimit : dquot->dq_dqb.dqb_bsoftlimit; + __u64 ilimit = dquot->dq_dqb.dqb_ihardlimit ? + dquot->dq_dqb.dqb_ihardlimit : dquot->dq_dqb.dqb_isoftlimit; + int rc = 0; + ENTRY; + + if (!dquot || !oqaq) + RETURN(-EINVAL); + LASSERT_SEM_LOCKED(&dquot->dq_sem); + LASSERT(oqaq->qaq_iunit_sz); + LASSERT(oqaq->qaq_bunit_sz); + + /* don't change qunit size */ + if (!qctxt->lqc_switch_qs) + RETURN(rc); + + bunit_curr_o = oqaq->qaq_bunit_sz; + iunit_curr_o = oqaq->qaq_iunit_sz; + + if (dquot->dq_type == GRPQUOTA) + QAQ_SET_GRP(oqaq); + + if ((type & LQUOTA_FLAGS_ADJBLK) && blimit) { + __u64 b_limitation = + oqaq->qaq_bunit_sz * ost_num * shrink_qunit_limit; + /* enlarge block qunit size */ + while (blimit > + QUSG(dquot->dq_dqb.dqb_curspace + 2 * b_limitation, 1)) { + oqaq->qaq_bunit_sz = + QUSG(oqaq->qaq_bunit_sz * cqs_factor, 1) + << QUOTABLOCK_BITS; + b_limitation = oqaq->qaq_bunit_sz * ost_num * + shrink_qunit_limit; + } + + if (oqaq->qaq_bunit_sz > qctxt->lqc_bunit_sz) + oqaq->qaq_bunit_sz = qctxt->lqc_bunit_sz; + + /* shrink block qunit size */ + while (blimit < + QUSG(dquot->dq_dqb.dqb_curspace + b_limitation, 1)) { + do_div(oqaq->qaq_bunit_sz , cqs_factor); + oqaq->qaq_bunit_sz = QUSG(oqaq->qaq_bunit_sz, 1) << + QUOTABLOCK_BITS; + b_limitation = oqaq->qaq_bunit_sz * ost_num * + shrink_qunit_limit; + if (oqaq->qaq_bunit_sz < qctxt->lqc_cqs_least_bunit) + break; + } + + if (oqaq->qaq_bunit_sz < qctxt->lqc_cqs_least_bunit) + oqaq->qaq_bunit_sz = qctxt->lqc_cqs_least_bunit; + + if (bunit_curr_o != oqaq->qaq_bunit_sz) + QAQ_SET_ADJBLK(oqaq); + + } + + if ((type & LQUOTA_FLAGS_ADJINO) && ilimit) { + __u64 i_limitation = + oqaq->qaq_iunit_sz * mdt_num * shrink_qunit_limit; + /* enlarge file qunit size */ + while (ilimit > dquot->dq_dqb.dqb_curinodes + + 2 * i_limitation) { + oqaq->qaq_iunit_sz = oqaq->qaq_iunit_sz * cqs_factor; + i_limitation = oqaq->qaq_iunit_sz * mdt_num * + shrink_qunit_limit; + } + + if (oqaq->qaq_iunit_sz > qctxt->lqc_iunit_sz) + oqaq->qaq_iunit_sz = qctxt->lqc_iunit_sz; + + /* shrink file qunit size */ + while (ilimit < dquot->dq_dqb.dqb_curinodes + + i_limitation) { + do_div(oqaq->qaq_iunit_sz, cqs_factor); + i_limitation = oqaq->qaq_iunit_sz * mdt_num * + shrink_qunit_limit; + if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit) + break; + } + + if (oqaq->qaq_iunit_sz < qctxt->lqc_cqs_least_iunit) + oqaq->qaq_iunit_sz = qctxt->lqc_cqs_least_iunit; + + if (iunit_curr_o != oqaq->qaq_iunit_sz) + QAQ_SET_ADJINO(oqaq); + + } + + if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) { + oqaq->qaq_bunit_sz = 0; + oqaq->qaq_iunit_sz = 0; + QAQ_SET_ADJBLK(oqaq); + QAQ_SET_ADJINO(oqaq); + } + + QAQ_DEBUG(oqaq, "the oqaq computed\n"); + + RETURN(rc); +} + static int mds_init_slave_ilimits(struct obd_device *obd, - struct obd_quotactl *oqctl, int set) + struct obd_quotactl *oqctl, int set, + struct quota_adjust_qunit *oqaq) { /* XXX: for file limits only adjust local now */ + struct obd_device_target *obt = &obd->u.obt; + struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt; unsigned int uid = 0, gid = 0; struct obd_quotactl *ioqc = NULL; int flag; @@ -796,8 +1055,8 @@ static int mds_init_slave_ilimits(struct obd_device *obd, OBD_ALLOC_PTR(ioqc); if (!ioqc) RETURN(-ENOMEM); - - flag = oqctl->qc_dqblk.dqb_ihardlimit || + + flag = oqctl->qc_dqblk.dqb_ihardlimit || oqctl->qc_dqblk.dqb_isoftlimit || set; ioqc->qc_cmd = flag ? Q_INITQUOTA : Q_SETQUOTA; ioqc->qc_id = oqctl->qc_id; @@ -805,6 +1064,14 @@ static int mds_init_slave_ilimits(struct obd_device *obd, ioqc->qc_dqblk.dqb_valid = QIF_ILIMITS; ioqc->qc_dqblk.dqb_ihardlimit = flag ? MIN_QLIMIT : 0; + if (QAQ_IS_ADJINO(oqaq)) { + /* adjust the mds slave's inode qunit size */ + rc = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc < 0) + CDEBUG(D_ERROR, "adjust mds slave's inode qunit size \ + failed! (rc:%d)\n", rc); + } + /* set local limit to MIN_QLIMIT */ rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc); if (rc) @@ -818,7 +1085,8 @@ static int mds_init_slave_ilimits(struct obd_device *obd, rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 0, 0); if (rc) { - CERROR("error mds adjust local file quota! (rc:%d)\n", rc); + CDEBUG(D_QUOTA,"error mds adjust local file quota! (rc:%d)\n", + rc); GOTO(out, rc); } /* FIXME initialize all slaves in CMD */ @@ -830,13 +1098,16 @@ out: } static int mds_init_slave_blimits(struct obd_device *obd, - struct obd_quotactl *oqctl, int set) + struct obd_quotactl *oqctl, int set, + struct quota_adjust_qunit *oqaq) { + struct obd_device_target *obt = &obd->u.obt; + struct lustre_quota_ctxt *qctxt = &obt->obt_qctxt; struct mds_obd *mds = &obd->u.mds; struct obd_quotactl *ioqc; unsigned int uid = 0, gid = 0; + int rc, rc1 = 0; int flag; - int rc; ENTRY; /* if we are going to set zero limit, needn't init slaves */ @@ -855,6 +1126,13 @@ static int mds_init_slave_blimits(struct obd_device *obd, ioqc->qc_type = oqctl->qc_type; ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS; ioqc->qc_dqblk.dqb_bhardlimit = flag ? MIN_QLIMIT : 0; + if (QAQ_IS_ADJBLK(oqaq)) { + /* adjust the mds slave's block qunit size */ + rc1 = quota_adjust_slave_lqs(oqaq, qctxt); + if (rc1 < 0) + CERROR("adjust mds slave's block qunit size failed!" + "(rc:%d)\n", rc1); + } rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc); if (rc) @@ -866,14 +1144,21 @@ static int mds_init_slave_blimits(struct obd_device *obd, else gid = oqctl->qc_id; + /* initialize all slave's limit */ + rc = obd_quotactl(mds->mds_osc_exp, ioqc); + rc = qctxt_adjust_qunit(obd, &obd->u.obt.obt_qctxt, uid, gid, 1, 0); if (rc) { CERROR("error mds adjust local block quota! (rc:%d)\n", rc); GOTO(out, rc); } - /* initialize all slave's limit */ - rc = obd_quotactl(mds->mds_osc_exp, ioqc); + /* adjust all slave's qunit size when setting quota + * this is will create a lqs for every ost, which will present + * certain uid/gid is set quota or not */ + QAQ_SET_ADJBLK(oqaq); + rc = obd_quota_adjust_qunit(mds->mds_osc_exp, oqaq); + EXIT; out: OBD_FREE_PTR(ioqc); @@ -883,15 +1168,24 @@ out: int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; + struct lustre_quota_ctxt *qctxt = &mds->mds_obt.obt_qctxt; + struct obd_device *lov_obd = class_exp2obd(mds->mds_osc_exp); + struct lov_obd *lov = &lov_obd->u.lov; + struct quota_adjust_qunit *oqaq = NULL; struct lustre_quota_info *qinfo = &mds->mds_quota_info; __u64 ihardlimit, isoftlimit, bhardlimit, bsoftlimit; time_t btime, itime; struct lustre_dquot *dquot; struct obd_dqblk *dqblk = &oqctl->qc_dqblk; - int set, rc; + int set, rc, flag = 0; ENTRY; + OBD_ALLOC_PTR(oqaq); + if (!oqaq) + RETURN(-ENOMEM); down(&mds->mds_qonoff_sem); + init_oqaq(oqaq, qctxt, oqctl->qc_id, oqctl->qc_type); + if (qinfo->qi_files[oqctl->qc_type] == NULL) GOTO(out_sem, rc = -ESRCH); @@ -926,18 +1220,20 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) dquot->dq_dqb.dqb_bhardlimit = dqblk->dqb_bhardlimit; dquot->dq_dqb.dqb_bsoftlimit = dqblk->dqb_bsoftlimit; /* clear usage (limit pool) */ - if (!dquot->dq_dqb.dqb_bhardlimit && + if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_curspace = 0; /* clear grace time */ - if (!dqblk->dqb_bsoftlimit || + if (!dqblk->dqb_bsoftlimit || toqb(dquot->dq_dqb.dqb_curspace) <= dqblk->dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = 0; /* set grace only if user hasn't provided his own */ else if (!(dqblk->dqb_valid & QIF_BTIME)) - dquot->dq_dqb.dqb_btime = cfs_time_current_sec() + + dquot->dq_dqb.dqb_btime = cfs_time_current_sec() + qinfo->qi_info[dquot->dq_type].dqi_bgrace; + + flag |= LQUOTA_FLAGS_ADJBLK; } if (dqblk->dqb_valid & QIF_ILIMITS) { @@ -954,7 +1250,16 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) else if (!(dqblk->dqb_valid & QIF_ITIME)) dquot->dq_dqb.dqb_itime = cfs_time_current_sec() + qinfo->qi_info[dquot->dq_type].dqi_igrace; + + flag |= LQUOTA_FLAGS_ADJINO; } + QAQ_DEBUG(oqaq, "before dquot_create_oqaq\n"); + rc = dquot_create_oqaq(qctxt, dquot, lov->desc.ld_tgt_count, 1, + flag, oqaq); + QAQ_DEBUG(oqaq, "after dquot_create_oqaq\n"); + if (rc < 0) + CDEBUG(D_QUOTA, "adjust qunit size failed! (rc:%d)\n", rc); + rc = fsfilt_dquot(obd, dquot, QFILE_WR_DQUOT); @@ -968,7 +1273,10 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) up(&mds->mds_qonoff_sem); if (dqblk->dqb_valid & QIF_ILIMITS) { set = !(ihardlimit || isoftlimit); - rc = mds_init_slave_ilimits(obd, oqctl, set); + down(&dquot->dq_sem); + dquot->dq_dqb.dqb_curinodes = 0; + up(&dquot->dq_sem); + rc = mds_init_slave_ilimits(obd, oqctl, set, oqaq); if (rc) { CERROR("init slave ilimits failed! (rc:%d)\n", rc); goto revoke_out; @@ -977,7 +1285,10 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl) if (dqblk->dqb_valid & QIF_BLIMITS) { set = !(bhardlimit || bsoftlimit); - rc = mds_init_slave_blimits(obd, oqctl, set); + down(&dquot->dq_sem); + dquot->dq_dqb.dqb_curspace = 0; + up(&dquot->dq_sem); + rc = mds_init_slave_blimits(obd, oqctl, set, oqaq); if (rc) { CERROR("init slave blimits failed! (rc:%d)\n", rc); goto revoke_out; @@ -1006,6 +1317,10 @@ out: EXIT; out_sem: up(&mds->mds_qonoff_sem); + + if (oqaq) + OBD_FREE_PTR(oqaq); + return rc; } diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index b233bf3..d8822ae 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -37,10 +37,8 @@ TSTID2=${TSTID2:-60001} TSTUSR=${TSTUSR:-"quota_usr"} TSTUSR2=${TSTUSR2:-"quota_2usr"} BLK_SZ=1024 -BUNIT_SZ=${BUNIT_SZ:-1000} # default 1000 quota blocks -BTUNE_SZ=${BTUNE_SZ:-500} # default 50% of BUNIT_SZ -IUNIT_SZ=${IUNIT_SZ:-10} # default 10 files -ITUNE_SZ=${ITUNE_SZ:-5} # default 50% of IUNIT_SZ +BUNIT_SZ=${BUNIT_SZ:-1024} # min block quota unit(kB) +IUNIT_SZ=${IUNIT_SZ:-10} # min inode quota unit MAX_DQ_TIME=604800 MAX_IQ_TIME=604800 @@ -66,6 +64,9 @@ SHOW_QUOTA_USER="$LFS quota -u $TSTUSR $DIR" SHOW_QUOTA_GROUP="$LFS quota -g $TSTUSR $DIR" SHOW_QUOTA_INFO="$LFS quota -t $DIR" +# control the time of tests +cycle=30 + build_test_filter eval ONLY_0=true @@ -73,6 +74,7 @@ eval ONLY_99=true # set_blk_tunables(btune_sz) set_blk_tunesz() { + local i # set btune size on all obdfilters do_facet ost1 "set -x; for i in /proc/fs/lustre/obdfilter/*/quota_btune_sz; do echo $(($1 * BLK_SZ)) >> \\\$i; @@ -85,6 +87,7 @@ set_blk_tunesz() { # set_blk_unitsz(bunit_sz) set_blk_unitsz() { + local i do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_bunit_sz; do echo $(($1 * BLK_SZ)) >> \\\$i; done" @@ -95,6 +98,7 @@ set_blk_unitsz() { # set_file_tunesz(itune_sz) set_file_tunesz() { + local i # set iunit and itune size on all obdfilters do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_itune_sz; do echo $1 >> \\\$i; @@ -107,6 +111,7 @@ set_file_tunesz() { # set_file_unitsz(iunit_sz) set_file_unitsz() { + local i do_facet ost1 "for i in /proc/fs/lustre/obdfilter/*/quota_iunit_sz; do echo $1 >> \\\$i; done" @@ -115,34 +120,6 @@ set_file_unitsz() { done" } -# These are for test on local machine,if run sanity-quota.sh on -# real cluster, ltest should have setup the test environment: -# -# - create test user/group on all servers with same id. -# - set unit size/tune on all servers size to reasonable value. -pre_test() { - if [ -z "$NOSETUP" ]; then - # set block tunables - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ - # set file tunables - set_file_tunesz $ITUNE_SZ - set_file_unitsz $IUNIT_SZ - fi -} -pre_test - -post_test() { - if [ -z "$NOSETUP" ]; then - # restore block tunables to default size - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) - # restore file tunables to default size - set_file_unitsz 5000 - set_file_tunesz 2500 - fi -} - RUNAS="runas -u $TSTID" RUNAS2="runas -u $TSTID2" FAIL_ON_ERROR=true check_runas_id $TSTID $RUNAS @@ -157,103 +134,173 @@ test_0() { $LFS setquota -u $TSTUSR 0 0 0 0 $DIR $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + sysctl -w lnet.debug="+quota" } run_test 0 "Set quota =============================" -# block hard limit (normal use and out of quota) -test_1() { +# test for specific quota limitation, qunit, qtune $1=block_quota_limit +test_1_sub() { + LIMIT=$1 chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" - LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 5)) # 5 bunits each sever - TESTFILE=$DIR/$tdir/$tfile-0 - - echo " User quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR - $SHOW_QUOTA_USER - - $LFS setstripe $TESTFILE -c 1 - chown $TSTUSR.$TSTUSR $TESTFILE + # test for user + log " User quota (limit: $LIMIT kbytes)" + $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR + sleep 3 + $SHOW_QUOTA_USER - echo " Write ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(usr) write failure, but expect success" - echo " Done" - echo " Write out of block quota ..." + $LFS setstripe $TESTFILE -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + log " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(usr) write failure, but expect success" + log " Done" + log " Write out of block quota ..." # this time maybe cache write, ignore it's failure - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true # flush cache, ensure noquota flag is setted on client + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(usr) write success, but expect EDQUOT" + + rm -f $TESTFILE sync; sleep 1; sync; - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(usr) write success, but expect EDQUOT" + OST0_UUID=`$LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'` + OST0_QUOTA_USED="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`" + echo $OST0_QUOTA_USED + [ $OST0_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - rm -f $TESTFILE - - echo " Group quota (limit: $LIMIT kbytes)" - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR - $SHOW_QUOTA_GROUP - TESTFILE=$DIR/$tdir/$tfile-1 + # test for group + log "--------------------------------------" + log " Group quota (limit: $LIMIT kbytes)" + $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR + sleep 3 + $SHOW_QUOTA_GROUP + TESTFILE="$DIR/$tdir/$tfile-1" - $LFS setstripe $TESTFILE -c 1 - chown $TSTUSR.$TSTUSR $TESTFILE + $LFS setstripe $TESTFILE -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE - echo " Write ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(grp) write failure, but expect success" - echo " Done" - echo " Write out of block quota ..." + log " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) || error "(grp) write failure, but expect success" + log " Done" + log " Write out of block quota ..." # this time maybe cache write, ignore it's failure - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true - sync; sleep 1; sync; - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(grp) write success, but expect EDQUOT" + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT/2)) seek=$(($LIMIT/2)) || true + sync; sleep 1; sync; + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$LIMIT && error "(grp) write success, but expect EDQUOT" # cleanup - rm -f $TESTFILE - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + rm -f $TESTFILE + sync; sleep 1; sync; + OST0_UUID=`$LCTL dl | grep -m1 obdfilter | awk '{print $((NF-1))}'` + OST0_QUOTA_USED="`$LFS quota -o $OST0_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $1 }'`" + echo $OST0_QUOTA_USED + [ $OST0_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_GROUP + $LFS setquota -g $TSTUSR 0 0 0 0 $DIR # clear group limit +} + +# block hard limit (normal use and out of quota) +test_1() { + for i in `seq 1 $cycle`; do + # define blk_qunit is between 1M and 4M + blk_qunit=$(( $RANDOM % 3072 + 1024 )) + blk_qtune=$(( $RANDOM % $blk_qunit )) + # other osts and mds will occupy at 1M blk quota + b_limit=$(( ($RANDOM - 16384) / 8 + $OSTCOUNT * $blk_qunit * 4 )) + set_blk_tunesz $blk_qtune + set_blk_unitsz $blk_qunit + echo "cycle: $i(total $cycle) bunit:$blk_qunit, btune:$blk_qtune, blimit:$b_limit" + test_1_sub $b_limit + echo "==================================================" + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + done } run_test 1 "Block hard limit (normal use and out of quota) ===" -# file hard limit (normal use and out of quota) -test_2() { +# test for specific quota limitation, qunit, qtune $1=block_quota_limit +test_2_sub() { + LIMIT=$1 chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" - LIMIT=$(($IUNIT_SZ * 10)) # 10 iunits on mds - TESTFILE=$DIR/$tdir/$tfile-0 - - echo " User quota (limit: $LIMIT files)" - $LFS setquota -u $TSTUSR 0 0 0 $LIMIT $DIR - $SHOW_QUOTA_USER + # test for user + log " User quota (limit: $LIMIT files)" + $LFS setquota -u $TSTUSR 0 0 0 $LIMIT $DIR + sleep 3 + $SHOW_QUOTA_USER - echo " Create $LIMIT files ..." + log " Create $LIMIT files ..." $RUNAS createmany -m ${TESTFILE} $LIMIT || \ - error "(usr) create failure, but except success" - echo " Done" - echo " Create out of file quota ..." + error "(usr) create failure, but except success" + log " Done" + log " Create out of file quota ..." $RUNAS touch ${TESTFILE}_xxx && \ - error "(usr) touch success, but expect EDQUOT" + error "(usr) touch success, but expect EDQUOT" unlinkmany ${TESTFILE} $LIMIT - rm ${TESTFILE}_xxx - - echo " Group quota (limit: $LIMIT files)" - $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - $LFS setquota -g $TSTUSR 0 0 0 $LIMIT $DIR - $SHOW_QUOTA_GROUP - TESTFILE=$DIR/$tdir/$tfile-1 + rm -f ${TESTFILE}_xxx + sync; sleep 1; sync; - echo " Create $LIMIT files ..." - $RUNAS createmany -m ${TESTFILE} $LIMIT || \ - error "(grp) create failure, but expect success" + MDS_UUID=`$LCTL dl | grep -m1 mds | awk '{print $((NF-1))}'` + MDS_QUOTA_USED="`$LFS quota -o $MDS_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $4 }'`" + echo $MDS_QUOTA_USED + [ $MDS_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_USER + $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit - echo " Done" - echo " Create out of file quota ..." - $RUNAS touch ${TESTFILE}_xxx && \ - error "(grp) touch success, but expect EDQUOT" + # test for group + log "--------------------------------------" + log " Group quota (limit: $LIMIT FILE)" + $LFS setquota -g $TSTUSR 0 0 0 $LIMIT $DIR + sleep 3 + $SHOW_QUOTA_GROUP + TESTFILE=$DIR/$tdir/$tfile-1 - $RUNAS touch ${TESTFILE}_xxx > /dev/null 2>&1 && error "(grp) touch success, but expect EDQUOT" + log " Create $LIMIT files ..." + $RUNAS createmany -m ${TESTFILE} $LIMIT || \ + error "(usr) create failure, but except success" + log " Done" + log " Create out of file quota ..." + $RUNAS touch ${TESTFILE}_xxx && \ + error "(usr) touch success, but expect EDQUOT" - # cleanup unlinkmany ${TESTFILE} $LIMIT - rm ${TESTFILE}_xxx + rm -f ${TESTFILE}_xxx + sync; sleep 1; sync; - $LFS setquota -g $TSTUSR 0 0 0 0 $DIR + MDS_UUID=`$LCTL dl | grep -m1 mds | awk '{print $((NF-1))}'` + MDS_QUOTA_USED="`$LFS quota -o $MDS_UUID -g $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $4 }'`" + echo $MDS_QUOTA_USED + [ $MDS_QUOTA_USED -ne 0 ] && \ + ($SHOW_QUOTA_USER; error "quota deleted isn't released") + $SHOW_QUOTA_GROUP + $LFS setquota -g $TSTUSR 0 0 0 0 $DIR # clear user limit +} + +# file hard limit (normal use and out of quota) +test_2() { + for i in `seq 1 $cycle`; do + # define ino_qunit is between 10 and 100 + ino_qunit=$(( $RANDOM % 90 + 10 )) + ino_qtune=$(( $RANDOM % $ino_qunit )) + # RANDOM's maxium is 32767 + i_limit=$(( $RANDOM % 990 + 10 )) + set_file_tunesz $ino_qtune + set_file_unitsz $ino_qunit + echo "cycle: $i(total $cycle) iunit:$ino_qunit, itune:$ino_qtune, ilimit:$i_limit" + test_2_sub $i_limit + echo "==================================================" + set_file_unitsz 5120 + set_file_tunesz 2560 + done } run_test 2 "File hard limit (normal use and out of quota) ===" @@ -279,7 +326,7 @@ test_block_soft() { OFFSET=$((OFFSET + BUNIT_SZ)) sync; sleep 1; sync; echo " Done" - + echo " Sleep $TIMER seconds ..." sleep $TIMER @@ -302,6 +349,7 @@ test_block_soft() { echo " Unlink file to stop timer" rm -f $TESTFILE + sync; sleep 1; sync echo " Done" $SHOW_QUOTA_USER @@ -320,7 +368,8 @@ test_block_soft() { test_3() { chmod 0777 $DIR/$tdir - LIMIT=$(( $BUNIT_SZ * 2 )) # 1 bunit on mds and 1 bunit on the ost + # 1 bunit on mds and 1 bunit on every ost + LIMIT=$(( $BUNIT_SZ * ($OSTCOUNT + 1) )) GRACE=10 echo " User quota (soft limit: $LIMIT kbytes grace: $GRACE seconds)" @@ -368,23 +417,20 @@ test_file_soft() { echo " Sleep $TIMER seconds ..." sleep $TIMER - + $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP $SHOW_QUOTA_INFO - + echo " Create file after timer goes off" - $RUNAS createmany -m ${TESTFILE}_after_ $((IUNIT_SZ - 2)) || \ - error "create ${TESTFILE}_after failure, but expect success" - sync; sleep 1; sync - $RUNAS touch ${TESTFILE}_after && \ + $RUNAS touch ${TESTFILE}_after ${TESTFILE}_after1 && \ error "create after timer expired, but expect EDQUOT" sync; sleep 1; sync $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP $SHOW_QUOTA_INFO - + echo " Unlink files to stop timer" find `dirname $TESTFILE` -name "`basename ${TESTFILE}`*" | xargs rm -f echo " Done" @@ -423,7 +469,7 @@ test_4() { test_file_soft $TESTFILE $LIMIT $GRACE $LFS setquota -g $TSTUSR 0 0 0 0 $DIR - + # cleanup $LFS setquota -t -u $MAX_DQ_TIME $MAX_IQ_TIME $DIR $LFS setquota -t -g $MAX_DQ_TIME $MAX_IQ_TIME $DIR @@ -460,13 +506,13 @@ run_test 4a "Grace time strings handling ===" test_5() { BLIMIT=$(( $BUNIT_SZ * $((OSTCOUNT + 1)) * 10)) # 10 bunits on each server ILIMIT=$(( $IUNIT_SZ * 10 )) # 10 iunits on mds - + echo " Set quota limit (0 $BLIMIT 0 $ILIMIT) for $TSTUSR.$TSTUSR" $LFS setquota -u $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR $LFS setquota -g $TSTUSR 0 $BLIMIT 0 $ILIMIT $DIR $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - + echo " Create more than $ILIMIT files and more than $BLIMIT kbytes ..." createmany -m $DIR/$tdir/$tfile-0_ $((ILIMIT + 1)) || \ error "touch failure, expect success" @@ -498,7 +544,7 @@ test_6() { LIMIT=$((BUNIT_SZ * (OSTCOUNT + 1) * 5)) # 5 bunits per server FILEA="$DIR/$tdir/$tfile-0_a" FILEB="$DIR/$tdir/$tfile-0_b" - + echo " Set block limit $LIMIT kbytes to $TSTUSR.$TSTUSR" $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR @@ -555,18 +601,18 @@ test_7() chmod 0777 $DIR/$tdir remote_mds && skip "remote mds" && return 0 - LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) )) TESTFILE="$DIR/$tdir/$tfile-0" - + $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR - + $LFS setstripe $TESTFILE -c 1 chown $TSTUSR.$TSTUSR $TESTFILE echo " Write to OST0..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ || \ error "write failure, but expect success" - + #define OBD_FAIL_OBD_DQACQ 0x604 echo 0x604 > /proc/sys/lustre/fail_loc echo " Remove files on OST0" @@ -587,7 +633,7 @@ test_7() TOTAL_LIMIT="`$LFS quota -u $TSTUSR $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $4 }'`" [ $TOTAL_LIMIT -eq $LIMIT ] || error "total limits not recovery!" echo " total limits = $TOTAL_LIMIT" - + OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` [ -z "$OST0_UUID" ] && OST0_UUID=`do_facet ost1 "$LCTL dl | grep -m1 obdfilter" | awk '{print $((NF-1))}'` OST0_LIMIT="`$LFS quota -o $OST0_UUID -u $TSTUSR $DIR | awk '/^.*[[:digit:]+][[:space:]+]/ { print $3 }'`" @@ -604,14 +650,13 @@ test_8() { BLK_LIMIT=$((100 * 1024 * 1024)) # 100G FILE_LIMIT=1000000 DBENCH_LIB=${DBENCH_LIB:-/usr/lib/dbench} - + [ ! -d $DBENCH_LIB ] && skip "dbench not installed" && return 0 - + echo " Set enough high limit for user: $TSTUSR" $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR echo " Set enough high limit for group: $TSTUSR" $LFS setquota -g $USER 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - TGT=$DIR/$tdir/client.txt SRC=${SRC:-$DBENCH_LIB/client.txt} @@ -624,7 +669,7 @@ test_8() { cd $DIR/$tdir $RUNAS dbench -c client.txt 3 RC=$? - + rm -f client.txt cd $SAVE_PWD return $RC @@ -634,71 +679,67 @@ run_test 8 "Run dbench with quota enabled ===========" # run for fixing bug10707, it needs a big room. test for 64bit KB=1024 GB=$((KB * 1024 * 1024)) -FSIZE=$((OSTCOUNT * 9 / 2)) # Use this as dd bs to decrease time # inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS+1, LL_MAX_BLKSIZE_BITS); blksize=$((1 << 21)) # 2Mb +size_file=$((GB * 9 / 2)) +# this check is just for test9 and test10 +OST0_MIN=4900000 #4.67G +check_whether_skip () { + OST0_SIZE=`$LFS df $DIR | awk '/\[OST:0\]/ {print $4}'` + log "OST0_SIZE: $OST0_SIZE required: $OST0_MIN" + if [ $OST0_SIZE -lt $OST0_MIN ]; then + echo "WARN: OST0 has less than $OST0_MIN free, skip this test." + return 0 + else + return 1 + fi +} test_9() { - chmod 0777 $DIR/$tdir - lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1` - size_file=$((FSIZE * GB)) - echo "lustrefs_size:$lustrefs_size size_file:$((size_file / KB))" - if [ $((lustrefs_size * KB)) -lt $size_file ]; then - skip "less than $size_file bytes free" - return 0; - fi + check_whether_skip && return 0 - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) - - # set the D_QUOTA flag - debugsave - sysctl -w lnet.debug="+quota" + set_blk_tunesz 512 + set_blk_unitsz 1024 + chmod 0777 $DIR/$tdir TESTFILE="$DIR/$tdir/$tfile-0" BLK_LIMIT=$((100 * KB * KB)) # 100G FILE_LIMIT=1000000 - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR echo " Set stripe" - [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT + $LFS setstripe $TESTFILE -c 1 touch $TESTFILE chown $TSTUSR.$TSTUSR $TESTFILE $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " Write the big file of $FSIZE G ..." + log " Write the big file of 4.5G ..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ - error "(usr) write $FSIZE G file failure, but expect success" + error "(usr) write 4.5G file failure, but expect success" $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " delete the big file of $FSIZE G..." + log " delete the big file of 4.5G..." $RUNAS rm -f $TESTFILE + sync; sleep 3; sync; $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " write the big file of 2 G..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((2 * GB / blksize)) || \ - error "(usr) write 2 G file failure, but expect seccess" - - echo " delete the big file of 2 G..." - $RUNAS rm -f $TESTFILE RC=$? - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) - debugrestore wait_delete_completed return $RC @@ -708,23 +749,13 @@ run_test 9 "run for fixing bug10707(64bit) ===========" # run for fixing bug10707, it need a big room. test for 32bit test_10() { chmod 0777 $DIR/$tdir - lustrefs_size=`(echo 0; df -t lustre -P | awk '{print $4}') | tail -n 1` - size_file=$((FSIZE * GB)) - echo "lustrefs_size:$lustrefs_size size_file:$((size_file / KB))" - if [ $((lustrefs_size * KB)) -lt $size_file ]; then - skip "less than $size_file bytes free" - return 0; - fi + check_whether_skip && return 0 sync; sleep 10; sync; - set_blk_unitsz $((1024 * 100)) - set_blk_tunesz $((1024 * 50)) + set_blk_tunesz 512 + set_blk_unitsz 1024 - # set the D_QUOTA flag - debugsave - sysctl -w lnet.debug="+quota" - # make qd_count 32 bit sysctl -w lustre.fail_loc=0xA00 @@ -733,49 +764,40 @@ test_10() { BLK_LIMIT=$((100 * KB * KB)) # 100G FILE_LIMIT=1000000 - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for user: $TSTUSR" $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - echo " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" + log " Set enough high limit(block:$BLK_LIMIT; file: $FILE_LIMIT) for group: $TSTUSR" $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 $FILE_LIMIT $DIR - + echo " Set stripe" - [ $OSTCOUNT -ge 2 ] && $LFS setstripe $TESTFILE -c $OSTCOUNT + $LFS setstripe $TESTFILE -c 1 touch $TESTFILE chown $TSTUSR.$TSTUSR $TESTFILE $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " Write the big file of $FSIZE G ..." + log " Write the big file of 4.5 G ..." $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((size_file / blksize)) || \ - error "(usr) write $FSIZE G file failure, but expect success" - + error "(usr) write 4.5 G file failure, but expect success" + $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " delete the big file of $FSIZE G..." - $RUNAS rm -f $TESTFILE + log " delete the big file of 4.5 G..." + $RUNAS rm -f $TESTFILE + sync; sleep 3; sync; $SHOW_QUOTA_USER $SHOW_QUOTA_GROUP - echo " write the big file of 2 G..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$blksize count=$((2 * GB / blksize)) || \ - error "(usr) write 2 G file failure, but expect success" - - echo " delete the big file of 2 G..." - $RUNAS rm -f $TESTFILE - RC=$? - # clear the flage - debugrestore - # make qd_count 64 bit sysctl -w lustre.fail_loc=0 - set_blk_tunesz $BTUNE_SZ - set_blk_unitsz $BUNIT_SZ + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) wait_delete_completed @@ -859,24 +881,23 @@ test_12() { LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 10)) # 10 bunits each sever TESTFILE="$DIR/$tdir/$tfile-0" TESTFILE2="$DIR2/$tdir/$tfile-1" - + echo " User quota (limit: $LIMIT kbytes)" $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR - $LFS setstripe $TESTFILE -i 0 -c 1 + $LFS setstripe $TESTFILE -i 0 -c 1 chown $TSTUSR.$TSTUSR $TESTFILE $LFS setstripe $TESTFILE2 -i 0 -c 1 chown $TSTUSR2.$TSTUSR2 $TESTFILE2 #define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f - sysctl -w lustre.fail_loc=0x0000021f + sysctl -w lustre.fail_loc=0x0000021f echo " step1: write out of block quota ..." - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) & - DDPID=$! - sleep 5 - $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 & + $RUNAS2 dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=102400 & DDPID1=$! + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(($LIMIT*2)) & + DDPID=$! echo " step2: testing ......" count=0 @@ -888,7 +909,7 @@ test_12() { error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID1, time=$count)successful" #Recover fail_loc and dd will finish soon @@ -903,25 +924,28 @@ test_12() { error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID, time=$count)successful" rm -f $TESTFILE $TESTFILE2 - + sync; sleep 3; sync; + $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit } run_test 12 "test a deadlock between quota and journal ===" # test multiple clients write block quota b=11693 test_13() { + wait_delete_completed + # one OST * 10 + (mds + other OSTs) LIMIT=$((BUNIT_SZ * 10 + (BUNIT_SZ * OSTCOUNT))) TESTFILE="$DIR/$tdir/$tfile" - + echo " User quota (limit: $LIMIT kbytes)" $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR $SHOW_QUOTA_USER - + $LFS setstripe $TESTFILE -i 0 -c 1 chown $TSTUSR.$TSTUSR $TESTFILE $LFS setstripe $TESTFILE.2 -i 0 -c 1 @@ -929,9 +953,9 @@ test_13() { echo " step1: write out of block quota ..." # one bunit will give mds - $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & DDPID=$! - $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & + $RUNAS dd if=/dev/zero of=$TESTFILE.2 bs=$BLK_SZ count=$[($LIMIT - $BUNIT_SZ) / 2] & DDPID1=$! echo " step2: testing ......" @@ -943,7 +967,7 @@ test_13() { error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID, time=$count)successful" count=0 @@ -954,7 +978,7 @@ test_13() { error "dd should be finished!" fi sleep 1 - done + done echo "(dd_pid=$DDPID1, time=$count)successful" sync; sleep 5; sync; @@ -967,7 +991,7 @@ test_13() { error "files too small $fz + $fz2 < $((BUNIT_SZ * BLK_SZ * 10))" rm -f $TESTFILE $TESTFILE.2 - + $LFS setquota -u $TSTUSR 0 0 0 0 $DIR # clear user limit } run_test 13 "test multiple clients write block quota ===" @@ -981,7 +1005,7 @@ check_if_quota_zero(){ field="3 5" fi for j in $field; do - tmp=`$LFS quota -$1 $2 $DIR | sed -n ${i}p | + tmp=`$LFS quota -$1 $2 $DIR | sed -n ${i}p | awk '{print $'"$j"'}'` [ -n "$tmp" ] && [ $tmp -ne 0 ] && $LFS quota -$1 $2 $DIR && \ error "quota on $2 isn't clean" @@ -992,13 +1016,12 @@ check_if_quota_zero(){ pre_test_14 () { # reboot the lustre + sync; sleep 5; sync cd $T_PWD; sh llmountcleanup.sh || error "llmountcleanup failed" sh llmount.sh - pre_test run_test 0 "reboot lustre" } - -pre_test_14 +pre_test_14 test_14(){ # b=12223 -- setting quota on root TESTFILE="$DIR/$tdir/$tfile" @@ -1011,19 +1034,19 @@ test_14(){ # b=12223 -- setting quota on root error "unexpected: user(root) write files failly!" chmod 666 $TESTFILE $RUNAS dd if=/dev/zero of=${TESTFILE} seek=4096 bs=4k count=4096 && \ - error "unexpected: user(quota_usr) write a file successfully!" + error "unexpected: user(quota_usr) write a file successfully!" # trigger the llog chmod 777 $DIR - for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done - for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done + for i in `seq 1 10`; do $RUNAS touch ${TESTFILE}a_$i; done + for i in `seq 1 10`; do $RUNAS rm -f ${TESTFILE}a_$i; done # do the check - dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg" + dmesg | tail | grep "\-122" |grep llog_obd_origin_add && error "err -122 not found in dmesg" $LFS setquota -u root 0 0 0 0 $DIR #check_if_quota_zero u root - # clean + # clean unlinkmany ${TESTFILE} 15 rm -f $TESTFILE } @@ -1102,10 +1125,123 @@ test_15(){ } run_test 15 "set block quota more than 4T ===" +# $1=u/g $2=with qunit adjust or not +test_16_tub() { + LIMIT=$(( $BUNIT_SZ * $(($OSTCOUNT + 1)) * 4)) + TESTFILE="$DIR/$tdir/$tfile" + + echo " User quota (limit: $LIMIT kbytes)" + if [ $1 == "u" ]; then + $LFS setquota -u $TSTUSR 0 $LIMIT 0 0 $DIR + $SHOW_QUOTA_USER + else + $LFS setquota -g $TSTUSR 0 $LIMIT 0 0 $DIR + $SHOW_QUOTA_GROUP + fi + + $LFS setstripe $TESTFILE -c 1 + chown $TSTUSR.$TSTUSR $TESTFILE + + echo " Write ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$((BUNIT_SZ * 4)) || \ + error "(usr) write failure, but expect success" + echo " Done" + echo " Write out of block quota ..." + # this time maybe cache write, ignore it's failure + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || true + # flush cache, ensure noquota flag is setted on client + sync; sleep 1; sync; + if [ $2 -eq 1 ]; then + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) || \ + error "(write failure, but expect success" + else + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$BUNIT_SZ seek=$((BUNIT_SZ * 4)) && \ + error "(write success, but expect EDQUOT" + fi + + rm -f $TESTFILE + $LFS setquota -$1 $TSTUSR 0 0 0 0 $DIR +} + +# test without adjusting qunit +test_16 () { + set_blk_tunesz $((BUNIT_SZ * 2)) + set_blk_unitsz $((BUNIT_SZ * 4)) + for i in u g; do + for j in 0 1; do + # define OBD_FAIL_QUOTA_WITHOUT_CHANGE_QS 0xA01 + echo " grp/usr: $i, adjust qunit: $j" + echo "-------------------------------" + [ $j -eq 1 ] && sysctl -w lustre.fail_loc=0 + [ $j -eq 0 ] && sysctl -w lustre.fail_loc=0xA01 + test_16_tub $i $j + done + done + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) +} +run_test 16 "test without adjusting qunit" + +# run for fixing bug14526, failed returned quota reqs shouldn't ruin lustre. +test_17() { + set_blk_tunesz 512 + set_blk_unitsz 1024 + + #define OBD_FAIL_QUOTA_RET_QDATA | OBD_FAIL_ONCE + sysctl -w lustre.fail_loc=0x80000A02 + + TESTFILE="$DIR/$tdir/$tfile-a" + TESTFILE2="$DIR/$tdir/$tfile-b" + + BLK_LIMIT=$((100 * 1024)) # 100M + + log " Set enough high limit(block:$BLK_LIMIT) for user: $TSTUSR" + $LFS setquota -u $TSTUSR 0 $BLK_LIMIT 0 0 $DIR + log " Set enough high limit(block:$BLK_LIMIT) for group: $TSTUSR" + $LFS setquota -g $TSTUSR 0 $BLK_LIMIT 0 0 $DIR + + touch $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE + touch $TESTFILE2 + chown $TSTUSR.$TSTUSR $TESTFILE2 + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + log " Write the test file1 ..." + $RUNAS dd if=/dev/zero of=$TESTFILE bs=$BLK_SZ count=$(( 10 * 1024 )) \ + || echo "write 10M file failure" + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + log " write the test file2 ..." + $RUNAS dd if=/dev/zero of=$TESTFILE2 bs=$BLK_SZ count=$(( 10 * 1024 )) \ + || error "write 10M file failure" + + $SHOW_QUOTA_USER + $SHOW_QUOTA_GROUP + + rm -f $TESTFILE $TESTFILE2 + RC=$? + sync; sleep 3; sync; + + # make qd_count 64 bit + sysctl -w lustre.fail_loc=0 + + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + + return $RC +} +run_test 17 "run for fixing bug14526 ===========" + # turn off quota test_99() { $LFS quotaoff $DIR + sysctl -w lnet.debug="-quota" + return 0 } run_test 99 "Quota off ===============================" @@ -1113,7 +1249,6 @@ run_test 99 "Quota off ===============================" log "cleanup: ======================================================" cd $ORIG_PWD -post_test check_and_cleanup_lustre echo '=========================== finished ===============================' [ -f "$QUOTALOG" ] && cat $QUOTALOG && grep -q FAIL $QUOTALOG && exit 1 || true diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 72a0e43..66cf5ae 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -975,6 +975,18 @@ check_qunit_data(void) CHECK_MEMBER(qunit_data, qd_id); CHECK_MEMBER(qunit_data, qd_flags); CHECK_MEMBER(qunit_data, qd_count); + CHECK_MEMBER(qunit_data, qd_qunit); + CHECK_MEMBER(qunit_data, padding ); +} + +static void +check_qunit_data_old2(void) +{ + BLANK_LINE(); + CHECK_STRUCT(qunit_data_old2); + CHECK_MEMBER(qunit_data_old2, qd_id); + CHECK_MEMBER(qunit_data_old2, qd_flags); + CHECK_MEMBER(qunit_data_old2, qd_count); } static void @@ -1048,6 +1060,17 @@ check_posix_acl_xattr_header(void) #endif static void +check_quota_adjust_qunit(void) +{ + BLANK_LINE(); + CHECK_STRUCT(quota_adjust_qunit); + CHECK_MEMBER(quota_adjust_qunit, qaq_flags); + CHECK_MEMBER(quota_adjust_qunit, qaq_id); + CHECK_MEMBER(quota_adjust_qunit, qaq_bunit_sz); + CHECK_MEMBER(quota_adjust_qunit, qaq_iunit_sz); +} + +static void system_string (char *cmdline, char *str, int len) { int fds[2]; @@ -1152,6 +1175,7 @@ main(int argc, char **argv) CHECK_VALUE(OST_SYNC); CHECK_VALUE(OST_QUOTACHECK); CHECK_VALUE(OST_QUOTACTL); + CHECK_VALUE(OST_QUOTA_ADJUST_QUNIT); CHECK_VALUE(OST_LAST_OPC); CHECK_DEFINE(OBD_OBJECT_EOF); @@ -1293,7 +1317,9 @@ main(int argc, char **argv) check_llog_array_rec(); check_mds_extent_desc(); check_qunit_data(); + check_qunit_data_old2(); check_qunit_data_old(); + check_quota_adjust_qunit(); check_mgs_target_info(); check_lustre_disk_data(); #ifdef LIBLUSTRE_POSIX_ACL diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 7a7bda8..279e568 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -93,7 +93,9 @@ void lustre_assert_wire_constants(void) (long long)OST_QUOTACHECK); LASSERTF(OST_QUOTACTL == 19, " found %lld\n", (long long)OST_QUOTACTL); - LASSERTF(OST_LAST_OPC == 20, " found %lld\n", + LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, " found %lld\n", + (long long)OST_QUOTA_ADJUST_QUNIT); + LASSERTF(OST_LAST_OPC == 21, " found %lld\n", (long long)OST_LAST_OPC); LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL," found %lld\n", (long long)OBD_OBJECT_EOF); @@ -2003,7 +2005,7 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct mds_extent_desc *)0)->med_lmm)); /* Checks for struct qunit_data */ - LASSERTF((int)sizeof(struct qunit_data) == 16, " found %lld\n", + LASSERTF((int)sizeof(struct qunit_data) == 32, " found %lld\n", (long long)(int)sizeof(struct qunit_data)); LASSERTF((int)offsetof(struct qunit_data, qd_id) == 0, " found %lld\n", (long long)(int)offsetof(struct qunit_data, qd_id)); @@ -2017,6 +2019,30 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct qunit_data, qd_count)); LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_count) == 8, " found %lld\n", (long long)(int)sizeof(((struct qunit_data *)0)->qd_count)); + LASSERTF((int)offsetof(struct qunit_data, qd_qunit) == 16, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, qd_qunit)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->qd_qunit) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->qd_qunit)); + LASSERTF((int)offsetof(struct qunit_data, padding) == 24, " found %lld\n", + (long long)(int)offsetof(struct qunit_data, padding)); + LASSERTF((int)sizeof(((struct qunit_data *)0)->padding) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data *)0)->padding)); + + /* Checks for struct qunit_data_old2 */ + LASSERTF((int)sizeof(struct qunit_data_old2) == 16, " found %lld\n", + (long long)(int)sizeof(struct qunit_data_old2)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_id) == 0, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_id)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_id)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_flags) == 4, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_flags)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_flags)); + LASSERTF((int)offsetof(struct qunit_data_old2, qd_count) == 8, " found %lld\n", + (long long)(int)offsetof(struct qunit_data_old2, qd_count)); + LASSERTF((int)sizeof(((struct qunit_data_old2 *)0)->qd_count) == 8, " found %lld\n", + (long long)(int)sizeof(((struct qunit_data_old2 *)0)->qd_count)); /* Checks for struct qunit_data_old */ LASSERTF((int)sizeof(struct qunit_data_old) == 16, " found %lld\n", @@ -2038,6 +2064,26 @@ void lustre_assert_wire_constants(void) LASSERTF((int)sizeof(((struct qunit_data_old *)0)->qd_isblk) == 4, " found %lld\n", (long long)(int)sizeof(((struct qunit_data_old *)0)->qd_isblk)); + /* Checks for struct quota_adjust_qunit */ + LASSERTF((int)sizeof(struct quota_adjust_qunit) == 32, " found %lld\n", + (long long)(int)sizeof(struct quota_adjust_qunit)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_flags) == 0, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_flags)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_flags)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_id) == 4, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_id)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id) == 4, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_id)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_bunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_bunit_sz)); + LASSERTF((int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz) == 16, " found %lld\n", + (long long)(int)offsetof(struct quota_adjust_qunit, qaq_iunit_sz)); + LASSERTF((int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz) == 8, " found %lld\n", + (long long)(int)sizeof(((struct quota_adjust_qunit *)0)->qaq_iunit_sz)); + /* Checks for struct mgs_target_info */ LASSERTF((int)sizeof(struct mgs_target_info) == 4544, " found %lld\n", (long long)(int)sizeof(struct mgs_target_info)); -- 1.8.3.1