From 1b044fecb42c1f72ca2d2bc2bf80a4345b9ccf11 Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Thu, 12 Jul 2012 15:23:18 +0800 Subject: [PATCH] LU-1057 quota: speed up lookup in osc_quota_chkdq This patch replace the global hash table used to store uid/gid about to run out of quota space with a per-OSC cfs_hash. Signed-off-by: Hongchao Zhang Change-Id: Ibf0785a60b007f33a8660298159abcc387dd8507 Reviewed-on: http://review.whamcloud.com/2074 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Johann Lombardi Reviewed-by: Niu Yawei Reviewed-by: Jinshan Xiong Reviewed-by: Andreas Dilger --- lustre/include/obd.h | 2 + lustre/osc/osc_dev.c | 6 + lustre/osc/osc_internal.h | 9 +- lustre/osc/osc_quota.c | 353 +++++++++++++++++++++++----------------------- lustre/osc/osc_request.c | 99 ++++++------- 5 files changed, 239 insertions(+), 230 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 80f8ec7..57a0e2a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -545,6 +545,8 @@ struct client_obd { /* ptlrpc work for writeback in ptlrpcd context */ void *cl_writeback_work; + /* hash tables for osc_quota_info */ + cfs_hash_t *cl_quota_hash[MAXQUOTAS]; }; #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) diff --git a/lustre/osc/osc_dev.c b/lustre/osc/osc_dev.c index 281ea61..fc3a47f 100644 --- a/lustre/osc/osc_dev.c +++ b/lustre/osc/osc_dev.c @@ -54,6 +54,7 @@ cfs_mem_cache_t *osc_thread_kmem; cfs_mem_cache_t *osc_session_kmem; cfs_mem_cache_t *osc_req_kmem; cfs_mem_cache_t *osc_extent_kmem; +cfs_mem_cache_t *osc_quota_kmem; struct lu_kmem_descr osc_caches[] = { { @@ -92,6 +93,11 @@ struct lu_kmem_descr osc_caches[] = { .ckd_size = sizeof (struct osc_extent) }, { + .ckd_cache = &osc_quota_kmem, + .ckd_name = "osc_quota_kmem", + .ckd_size = sizeof(struct osc_quota_info) + }, + { .ckd_cache = NULL } }; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index d3ebac1..aa24a5b 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -201,8 +201,13 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d) int osc_dlm_lock_pageref(struct ldlm_lock *dlm); -int osc_quota_init(void); -int osc_quota_exit(void); +extern cfs_mem_cache_t *osc_quota_kmem; +struct osc_quota_info { + /** linkage for quota hash table */ + cfs_hlist_node_t oqi_hash; + obd_uid oqi_id; +}; +int osc_quota_setup(struct obd_device *obd); int osc_quota_cleanup(struct obd_device *obd); int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], obd_flag valid, obd_flag flags); diff --git a/lustre/osc/osc_quota.c b/lustre/osc/osc_quota.c index 3d2b45b..0cd9fae7 100644 --- a/lustre/osc/osc_quota.c +++ b/lustre/osc/osc_quota.c @@ -34,226 +34,221 @@ #include #include "osc_internal.h" -struct osc_quota_info { - cfs_list_t oqi_hash; /* hash list */ - struct client_obd *oqi_cli; /* osc obd */ - unsigned int oqi_id; /* uid/gid of a file */ - short oqi_type; /* quota type */ -}; - -cfs_spinlock_t qinfo_list_lock = CFS_SPIN_LOCK_UNLOCKED; - -static cfs_list_t qinfo_hash[NR_DQHASH]; -/* SLAB cache for client quota context */ -cfs_mem_cache_t *qinfo_cachep = NULL; +static inline struct osc_quota_info *osc_oqi_alloc(obd_uid id) +{ + struct osc_quota_info *oqi; -static inline int hashfn(struct client_obd *cli, unsigned long id, int type) - __attribute__((__const__)); + OBD_SLAB_ALLOC_PTR(oqi, osc_quota_kmem); + if (oqi != NULL) + oqi->oqi_id = id; -static inline int hashfn(struct client_obd *cli, unsigned long id, int type) -{ - unsigned long tmp = ((unsigned long)cli>>6) ^ id; - tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH; - return tmp; + return oqi; } -/* caller must hold qinfo_list_lock */ -static inline void insert_qinfo_hash(struct osc_quota_info *oqi) +int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]) { - cfs_list_t *head = qinfo_hash + - hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type); + int type; + ENTRY; - LASSERT_SPIN_LOCKED(&qinfo_list_lock); - cfs_list_add(&oqi->oqi_hash, head); -} + for (type = 0; type < MAXQUOTAS; type++) { + struct osc_quota_info *oqi; -/* caller must hold qinfo_list_lock */ -static inline void remove_qinfo_hash(struct osc_quota_info *oqi) -{ - LASSERT_SPIN_LOCKED(&qinfo_list_lock); - cfs_list_del_init(&oqi->oqi_hash); -} + oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]); + if (oqi) { + obd_uid id = oqi->oqi_id; -/* caller must hold qinfo_list_lock */ -static inline struct osc_quota_info *find_qinfo(struct client_obd *cli, - unsigned int id, int type) -{ - struct osc_quota_info *oqi; - unsigned int hashent = hashfn(cli, id, type); - ENTRY; + LASSERTF(id == qid[type], + "The ids don't match %u != %u\n", + id, qid[type]); - LASSERT_SPIN_LOCKED(&qinfo_list_lock); - cfs_list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) { - if (oqi->oqi_cli == cli && - oqi->oqi_id == id && oqi->oqi_type == type) - RETURN(oqi); - } - RETURN(NULL); + /* the slot is busy, the user is about to run out of + * quota space on this OST */ + CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n", + type == USRQUOTA ? "user" : "grout", qid[type]); + RETURN(NO_QUOTA); + } + } + + RETURN(QUOTA_OK); } -static struct osc_quota_info *alloc_qinfo(struct client_obd *cli, - unsigned int id, int type) +#define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \ + : OBD_MD_FLGRPQUOTA) +#define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \ + : OBD_FL_NO_GRPQUOTA) + +int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], + obd_flag valid, obd_flag flags) { - struct osc_quota_info *oqi; + int type; + int rc = 0; ENTRY; - OBD_SLAB_ALLOC_PTR(oqi, qinfo_cachep); - if(!oqi) - RETURN(NULL); - - CFS_INIT_LIST_HEAD(&oqi->oqi_hash); - oqi->oqi_cli = cli; - oqi->oqi_id = id; - oqi->oqi_type = type; - - RETURN(oqi); + if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0) + RETURN(0); + + for (type = 0; type < MAXQUOTAS; type++) { + struct osc_quota_info *oqi; + + if ((valid & MD_QUOTA_FLAG(type)) == 0) + continue; + + /* lookup the ID in the per-type hash table */ + oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]); + if ((flags & FL_QUOTA_FLAG(type)) != 0) { + /* This ID is getting close to its quota limit, let's + * switch to sync I/O */ + if (oqi != NULL) + continue; + + oqi = osc_oqi_alloc(qid[type]); + if (oqi == NULL) { + rc = -ENOMEM; + break; + } + + rc = cfs_hash_add_unique(cli->cl_quota_hash[type], + &qid[type], &oqi->oqi_hash); + /* race with others? */ + if (rc == -EALREADY) { + rc = 0; + OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem); + } + + CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n", + cli->cl_import->imp_obd->obd_name, + type == USRQUOTA ? "user" : "group", + qid[type], rc); + } else { + /* This ID is now off the hook, let's remove it from + * the hash table */ + if (oqi == NULL) + continue; + + oqi = cfs_hash_del_key(cli->cl_quota_hash[type], + &qid[type]); + if (oqi) + OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem); + + CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n", + cli->cl_import->imp_obd->obd_name, + type == USRQUOTA ? "user" : "group", + qid[type], oqi); + } + } + + RETURN(rc); } -static void free_qinfo(struct osc_quota_info *oqi) +/* + * Hash operations for uid/gid <-> osc_quota_info + */ +static unsigned +oqi_hashfn(cfs_hash_t *hs, const void *key, unsigned mask) { - OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi)); + return cfs_hash_u32_hash(*((__u32*)key), mask); } -int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]) +static int +oqi_keycmp(const void *key, cfs_hlist_node_t *hnode) { - unsigned int id; - int cnt, rc = QUOTA_OK; - ENTRY; - - cfs_spin_lock(&qinfo_list_lock); - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - struct osc_quota_info *oqi = NULL; + struct osc_quota_info *oqi; + obd_uid uid; - id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA]; - oqi = find_qinfo(cli, id, cnt); - if (oqi) { - rc = NO_QUOTA; - break; - } - } - cfs_spin_unlock(&qinfo_list_lock); + LASSERT(key != NULL); + uid = *((obd_uid*)key); + oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash); - if (rc == NO_QUOTA) - CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n", - cnt == USRQUOTA ? "user" : "group", id); - RETURN(rc); + return uid == oqi->oqi_id; } -int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], - obd_flag valid, obd_flag flags) +static void * +oqi_key(cfs_hlist_node_t *hnode) { - unsigned int id; - obd_flag noquota; - int cnt, rc = 0; - ENTRY; - - for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - struct osc_quota_info *oqi = NULL, *old; - - if (!(valid & ((cnt == USRQUOTA) ? - OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA))) - continue; - - id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA]; - noquota = (cnt == USRQUOTA) ? - (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA); - - if (noquota) { - oqi = alloc_qinfo(cli, id, cnt); - if (!oqi) { - rc = -ENOMEM; - CDEBUG(D_QUOTA, "setdq for %s %d failed, " - "(rc = %d)\n", - cnt == USRQUOTA ? "user" : "group", - id, rc); - break; - } - } - - cfs_spin_lock(&qinfo_list_lock); - old = find_qinfo(cli, id, cnt); - if (old && !noquota) - remove_qinfo_hash(old); - else if (!old && noquota) - insert_qinfo_hash(oqi); - cfs_spin_unlock(&qinfo_list_lock); - - if (old && !noquota) - CDEBUG(D_QUOTA, "setdq to remove for %s %d\n", - cnt == USRQUOTA ? "user" : "group", id); - else if (!old && noquota) - CDEBUG(D_QUOTA, "setdq to insert for %s %d\n", - cnt == USRQUOTA ? "user" : "group", id); - - if (old) { - if (noquota) - free_qinfo(oqi); - else - free_qinfo(old); - } - } - RETURN(rc); + struct osc_quota_info *oqi; + oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash); + return &oqi->oqi_id; } -int osc_quota_cleanup(struct obd_device *obd) +static void * +oqi_object(cfs_hlist_node_t *hnode) { - struct client_obd *cli = &obd->u.cli; - struct osc_quota_info *oqi, *n; - int i; - ENTRY; - - cfs_spin_lock(&qinfo_list_lock); - for (i = 0; i < NR_DQHASH; i++) { - cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) { - if (oqi->oqi_cli != cli) - continue; - remove_qinfo_hash(oqi); - free_qinfo(oqi); - } - } - cfs_spin_unlock(&qinfo_list_lock); + return cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash); +} - RETURN(0); +static void +oqi_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +{ } -int osc_quota_init() +static void +oqi_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode) { - int i; - ENTRY; +} - LASSERT(qinfo_cachep == NULL); - qinfo_cachep = cfs_mem_cache_create("osc_quota_info", - sizeof(struct osc_quota_info), - 0, 0); - if (!qinfo_cachep) - RETURN(-ENOMEM); +static void +oqi_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +{ + struct osc_quota_info *oqi; - for (i = 0; i < NR_DQHASH; i++) - CFS_INIT_LIST_HEAD(qinfo_hash + i); + oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash); - RETURN(0); + OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem); } -int osc_quota_exit() +#define HASH_QUOTA_BKT_BITS 5 +#define HASH_QUOTA_CUR_BITS 5 +#define HASH_QUOTA_MAX_BITS 15 + +static cfs_hash_ops_t quota_hash_ops = { + .hs_hash = oqi_hashfn, + .hs_keycmp = oqi_keycmp, + .hs_key = oqi_key, + .hs_object = oqi_object, + .hs_get = oqi_get, + .hs_put_locked = oqi_put_locked, + .hs_exit = oqi_exit, +}; + +int osc_quota_setup(struct obd_device *obd) { - struct osc_quota_info *oqi, *n; - int i, rc; - ENTRY; + struct client_obd *cli = &obd->u.cli; + int i, type; + ENTRY; + + for (type = 0; type < MAXQUOTAS; type++) { + cli->cl_quota_hash[type] = cfs_hash_create("QUOTA_HASH", + HASH_QUOTA_CUR_BITS, + HASH_QUOTA_MAX_BITS, + HASH_QUOTA_BKT_BITS, + 0, + CFS_HASH_MIN_THETA, + CFS_HASH_MAX_THETA, + "a_hash_ops, + CFS_HASH_DEFAULT); + if (cli->cl_quota_hash[type] == NULL) + break; + } + + if (type == MAXQUOTAS) + RETURN(0); + + for (i = 0; i < type; i++) + cfs_hash_putref(cli->cl_quota_hash[i]); + + RETURN(-ENOMEM); +} - cfs_spin_lock(&qinfo_list_lock); - for (i = 0; i < NR_DQHASH; i++) { - cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) { - remove_qinfo_hash(oqi); - free_qinfo(oqi); - } - } - cfs_spin_unlock(&qinfo_list_lock); +int osc_quota_cleanup(struct obd_device *obd) +{ + struct client_obd *cli = &obd->u.cli; + int type; + ENTRY; - rc = cfs_mem_cache_destroy(qinfo_cachep); - LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n"); - qinfo_cachep = NULL; + for (type = 0; type < MAXQUOTAS; type++) + cfs_hash_putref(cli->cl_quota_hash[type]); - RETURN(0); + RETURN(0); } int osc_quotactl(struct obd_device *unused, struct obd_export *exp, diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index ed0ac4b..b288b04 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3554,56 +3554,59 @@ static int brw_queue_work(const struct lu_env *env, void *data) int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { - struct client_obd *cli = &obd->u.cli; - int rc; - ENTRY; - - ENTRY; - rc = ptlrpcd_addref(); - if (rc) - RETURN(rc); - - rc = client_obd_setup(obd, lcfg); - if (rc == 0) { - void *handler; - handler = ptlrpcd_alloc_work(cli->cl_import, - brw_queue_work, cli); - if (!IS_ERR(handler)) - cli->cl_writeback_work = handler; - else - rc = PTR_ERR(handler); - } - - if (rc == 0) { - struct lprocfs_static_vars lvars = { 0 }; - - cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL; - lprocfs_osc_init_vars(&lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { - lproc_osc_attach_seqstat(obd); - sptlrpc_lprocfs_cliobd_attach(obd); - ptlrpc_lprocfs_register_obd(obd); - } - - oscc_init(obd); - /* We need to allocate a few requests more, because - brw_interpret tries to create new requests before freeing - previous ones. Ideally we want to have 2x max_rpcs_in_flight - reserved, but I afraid that might be too much wasted RAM - in fact, so 2 is just my guess and still should work. */ - cli->cl_import->imp_rq_pool = - ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2, - OST_MAXREQSIZE, - ptlrpc_add_rqs_to_pool); + struct lprocfs_static_vars lvars = { 0 }; + struct client_obd *cli = &obd->u.cli; + void *handler; + int rc; + ENTRY; - CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list); + rc = ptlrpcd_addref(); + if (rc) + RETURN(rc); + + rc = client_obd_setup(obd, lcfg); + if (rc) + GOTO(out_ptlrpcd, rc); + + handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli); + if (IS_ERR(handler)) + GOTO(out_client_setup, PTR_ERR(handler)); + cli->cl_writeback_work = handler; + + rc = osc_quota_setup(obd); + if (rc) + GOTO(out_ptlrpcd_work, rc); + + cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL; + lprocfs_osc_init_vars(&lvars); + if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { + lproc_osc_attach_seqstat(obd); + sptlrpc_lprocfs_cliobd_attach(obd); + ptlrpc_lprocfs_register_obd(obd); + } - ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery); - } + oscc_init(obd); + /* We need to allocate a few requests more, because + * brw_interpret tries to create new requests before freeing + * previous ones, Ideally we want to have 2x max_rpcs_in_flight + * reserved, but I'm afraid that might be too much wasted RAM + * in fact, so 2 is just my guess and still should work. */ + cli->cl_import->imp_rq_pool = + ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2, + OST_MAXREQSIZE, + ptlrpc_add_rqs_to_pool); + + CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list); + ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery); + RETURN(rc); - if (rc) - ptlrpcd_decref(); - RETURN(rc); +out_ptlrpcd_work: + ptlrpcd_destroy_work(handler); +out_client_setup: + client_obd_cleanup(obd); +out_ptlrpcd: + ptlrpcd_decref(); + RETURN(rc); } static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) @@ -3751,7 +3754,6 @@ int __init osc_init(void) lprocfs_osc_init_vars(&lvars); - osc_quota_init(); rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars, LUSTRE_OSC_NAME, &osc_device_type); if (rc) { @@ -3774,7 +3776,6 @@ int __init osc_init(void) #ifdef __KERNEL__ static void /*__exit*/ osc_exit(void) { - osc_quota_exit(); class_unregister_type(LUSTRE_OSC_NAME); lu_kmem_fini(osc_caches); } -- 1.8.3.1