Whamcloud - gitweb
LU-1057 quota: speed up lookup in osc_quota_chkdq
authorHongchao Zhang <hongchao.zhang@whamcloud.com>
Thu, 12 Jul 2012 07:23:18 +0000 (15:23 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Jul 2012 16:17:32 +0000 (12:17 -0400)
This patch replace the global hash table used to store uid/gid
about to run out of quota space with a per-OSC cfs_hash.

Signed-off-by: Hongchao Zhang <hongchao.zhang@whamcloud.com>
Change-Id: Ibf0785a60b007f33a8660298159abcc387dd8507
Reviewed-on: http://review.whamcloud.com/2074
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Johann Lombardi <johann@whamcloud.com>
Reviewed-by: Niu Yawei <niu@whamcloud.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd.h
lustre/osc/osc_dev.c
lustre/osc/osc_internal.h
lustre/osc/osc_quota.c
lustre/osc/osc_request.c

index 80f8ec7..57a0e2a 100644 (file)
@@ -545,6 +545,8 @@ struct client_obd {
 
         /* ptlrpc work for writeback in ptlrpcd context */
         void                    *cl_writeback_work;
+       /* hash tables for osc_quota_info */
+       cfs_hash_t              *cl_quota_hash[MAXQUOTAS];
 };
 #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
 
index 281ea61..fc3a47f 100644 (file)
@@ -54,6 +54,7 @@ cfs_mem_cache_t *osc_thread_kmem;
 cfs_mem_cache_t *osc_session_kmem;
 cfs_mem_cache_t *osc_req_kmem;
 cfs_mem_cache_t *osc_extent_kmem;
+cfs_mem_cache_t *osc_quota_kmem;
 
 struct lu_kmem_descr osc_caches[] = {
         {
@@ -92,6 +93,11 @@ struct lu_kmem_descr osc_caches[] = {
                .ckd_size  = sizeof (struct osc_extent)
        },
        {
+               .ckd_cache = &osc_quota_kmem,
+               .ckd_name  = "osc_quota_kmem",
+               .ckd_size  = sizeof(struct osc_quota_info)
+       },
+       {
                 .ckd_cache = NULL
         }
 };
index d3ebac1..aa24a5b 100644 (file)
@@ -201,8 +201,13 @@ static inline struct osc_device *obd2osc_dev(const struct obd_device *d)
 
 int osc_dlm_lock_pageref(struct ldlm_lock *dlm);
 
-int osc_quota_init(void);
-int osc_quota_exit(void);
+extern cfs_mem_cache_t *osc_quota_kmem;
+struct osc_quota_info {
+        /** linkage for quota hash table */
+        cfs_hlist_node_t oqi_hash;
+       obd_uid          oqi_id;
+};
+int osc_quota_setup(struct obd_device *obd);
 int osc_quota_cleanup(struct obd_device *obd);
 int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
                     obd_flag valid, obd_flag flags);
index 3d2b45b..0cd9fae 100644 (file)
 #include <obd_ost.h>
 #include "osc_internal.h"
 
-struct osc_quota_info {
-        cfs_list_t         oqi_hash; /* hash list */
-        struct client_obd *oqi_cli;  /* osc obd */
-        unsigned int       oqi_id;   /* uid/gid of a file */
-        short              oqi_type; /* quota type */
-};
-
-cfs_spinlock_t qinfo_list_lock = CFS_SPIN_LOCK_UNLOCKED;
-
-static cfs_list_t qinfo_hash[NR_DQHASH];
-/* SLAB cache for client quota context */
-cfs_mem_cache_t *qinfo_cachep = NULL;
+static inline struct osc_quota_info *osc_oqi_alloc(obd_uid id)
+{
+       struct osc_quota_info *oqi;
 
-static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
-                         __attribute__((__const__));
+       OBD_SLAB_ALLOC_PTR(oqi, osc_quota_kmem);
+       if (oqi != NULL)
+               oqi->oqi_id = id;
 
-static inline int hashfn(struct client_obd *cli, unsigned long id, int type)
-{
-        unsigned long tmp = ((unsigned long)cli>>6) ^ id;
-        tmp = (tmp * (MAXQUOTAS - type)) % NR_DQHASH;
-        return tmp;
+       return oqi;
 }
 
-/* caller must hold qinfo_list_lock */
-static inline void insert_qinfo_hash(struct osc_quota_info *oqi)
+int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
 {
-        cfs_list_t *head = qinfo_hash +
-                hashfn(oqi->oqi_cli, oqi->oqi_id, oqi->oqi_type);
+       int type;
+       ENTRY;
 
-        LASSERT_SPIN_LOCKED(&qinfo_list_lock);
-        cfs_list_add(&oqi->oqi_hash, head);
-}
+       for (type = 0; type < MAXQUOTAS; type++) {
+               struct osc_quota_info *oqi;
 
-/* caller must hold qinfo_list_lock */
-static inline void remove_qinfo_hash(struct osc_quota_info *oqi)
-{
-        LASSERT_SPIN_LOCKED(&qinfo_list_lock);
-        cfs_list_del_init(&oqi->oqi_hash);
-}
+               oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
+               if (oqi) {
+                       obd_uid id = oqi->oqi_id;
 
-/* caller must hold qinfo_list_lock */
-static inline struct osc_quota_info *find_qinfo(struct client_obd *cli,
-                                                unsigned int id, int type)
-{
-        struct osc_quota_info *oqi;
-        unsigned int           hashent = hashfn(cli, id, type);
-        ENTRY;
+                       LASSERTF(id == qid[type],
+                                "The ids don't match %u != %u\n",
+                                id, qid[type]);
 
-        LASSERT_SPIN_LOCKED(&qinfo_list_lock);
-        cfs_list_for_each_entry(oqi, &qinfo_hash[hashent], oqi_hash) {
-                if (oqi->oqi_cli == cli &&
-                    oqi->oqi_id == id && oqi->oqi_type == type)
-                        RETURN(oqi);
-        }
-        RETURN(NULL);
+                       /* the slot is busy, the user is about to run out of
+                        * quota space on this OST */
+                       CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
+                              type == USRQUOTA ? "user" : "grout", qid[type]);
+                       RETURN(NO_QUOTA);
+               }
+       }
+
+       RETURN(QUOTA_OK);
 }
 
-static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
-                                          unsigned int id, int type)
+#define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \
+                                               : OBD_MD_FLGRPQUOTA)
+#define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \
+                                               : OBD_FL_NO_GRPQUOTA)
+
+int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
+                    obd_flag valid, obd_flag flags)
 {
-        struct osc_quota_info *oqi;
+       int type;
+       int rc = 0;
         ENTRY;
 
-       OBD_SLAB_ALLOC_PTR(oqi, qinfo_cachep);
-        if(!oqi)
-                RETURN(NULL);
-
-        CFS_INIT_LIST_HEAD(&oqi->oqi_hash);
-        oqi->oqi_cli = cli;
-        oqi->oqi_id = id;
-        oqi->oqi_type = type;
-
-        RETURN(oqi);
+       if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0)
+               RETURN(0);
+
+       for (type = 0; type < MAXQUOTAS; type++) {
+               struct osc_quota_info *oqi;
+
+               if ((valid & MD_QUOTA_FLAG(type)) == 0)
+                       continue;
+
+               /* lookup the ID in the per-type hash table */
+               oqi = cfs_hash_lookup(cli->cl_quota_hash[type], &qid[type]);
+               if ((flags & FL_QUOTA_FLAG(type)) != 0) {
+                       /* This ID is getting close to its quota limit, let's
+                        * switch to sync I/O */
+                       if (oqi != NULL)
+                               continue;
+
+                       oqi = osc_oqi_alloc(qid[type]);
+                       if (oqi == NULL) {
+                               rc = -ENOMEM;
+                               break;
+                       }
+
+                       rc = cfs_hash_add_unique(cli->cl_quota_hash[type],
+                                                &qid[type], &oqi->oqi_hash);
+                       /* race with others? */
+                       if (rc == -EALREADY) {
+                               rc = 0;
+                               OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
+                       }
+
+                       CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n",
+                              cli->cl_import->imp_obd->obd_name,
+                              type == USRQUOTA ? "user" : "group",
+                              qid[type], rc);
+               } else {
+                       /* This ID is now off the hook, let's remove it from
+                        * the hash table */
+                       if (oqi == NULL)
+                               continue;
+
+                       oqi = cfs_hash_del_key(cli->cl_quota_hash[type],
+                                              &qid[type]);
+                       if (oqi)
+                               OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
+
+                       CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n",
+                              cli->cl_import->imp_obd->obd_name,
+                              type == USRQUOTA ? "user" : "group",
+                              qid[type], oqi);
+               }
+       }
+
+       RETURN(rc);
 }
 
-static void free_qinfo(struct osc_quota_info *oqi)
+/*
+ * Hash operations for uid/gid <-> osc_quota_info
+ */
+static unsigned
+oqi_hashfn(cfs_hash_t *hs, const void *key, unsigned mask)
 {
-        OBD_SLAB_FREE(oqi, qinfo_cachep, sizeof(*oqi));
+       return cfs_hash_u32_hash(*((__u32*)key), mask);
 }
 
-int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[])
+static int
+oqi_keycmp(const void *key, cfs_hlist_node_t *hnode)
 {
-        unsigned int id;
-        int          cnt, rc = QUOTA_OK;
-        ENTRY;
-
-        cfs_spin_lock(&qinfo_list_lock);
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                struct osc_quota_info *oqi = NULL;
+       struct osc_quota_info *oqi;
+       obd_uid uid;
 
-                id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
-                oqi = find_qinfo(cli, id, cnt);
-                if (oqi) {
-                        rc = NO_QUOTA;
-                        break;
-                }
-        }
-        cfs_spin_unlock(&qinfo_list_lock);
+       LASSERT(key != NULL);
+       uid = *((obd_uid*)key);
+       oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
 
-        if (rc == NO_QUOTA)
-                CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n",
-                       cnt == USRQUOTA ? "user" : "group", id);
-        RETURN(rc);
+       return uid == oqi->oqi_id;
 }
 
-int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[],
-                    obd_flag valid, obd_flag flags)
+static void *
+oqi_key(cfs_hlist_node_t *hnode)
 {
-        unsigned int id;
-        obd_flag     noquota;
-        int          cnt, rc = 0;
-        ENTRY;
-
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                struct osc_quota_info *oqi = NULL, *old;
-
-                if (!(valid & ((cnt == USRQUOTA) ?
-                    OBD_MD_FLUSRQUOTA : OBD_MD_FLGRPQUOTA)))
-                        continue;
-
-                id = (cnt == USRQUOTA) ? qid[USRQUOTA] : qid[GRPQUOTA];
-                noquota = (cnt == USRQUOTA) ?
-                    (flags & OBD_FL_NO_USRQUOTA) : (flags & OBD_FL_NO_GRPQUOTA);
-
-                if (noquota) {
-                        oqi = alloc_qinfo(cli, id, cnt);
-                        if (!oqi) {
-                                rc = -ENOMEM;
-                                CDEBUG(D_QUOTA, "setdq for %s %d failed, "
-                                       "(rc = %d)\n",
-                                       cnt == USRQUOTA ? "user" : "group",
-                                       id, rc);
-                                break;
-                        }
-                }
-
-                cfs_spin_lock(&qinfo_list_lock);
-                old = find_qinfo(cli, id, cnt);
-                if (old && !noquota)
-                        remove_qinfo_hash(old);
-                else if (!old && noquota)
-                        insert_qinfo_hash(oqi);
-                cfs_spin_unlock(&qinfo_list_lock);
-
-                if (old && !noquota)
-                        CDEBUG(D_QUOTA, "setdq to remove for %s %d\n",
-                               cnt == USRQUOTA ? "user" : "group", id);
-                else if (!old && noquota)
-                        CDEBUG(D_QUOTA, "setdq to insert for %s %d\n",
-                               cnt == USRQUOTA ? "user" : "group", id);
-
-                if (old) {
-                        if (noquota)
-                                free_qinfo(oqi);
-                        else
-                                free_qinfo(old);
-                }
-        }
-        RETURN(rc);
+       struct osc_quota_info *oqi;
+       oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+       return &oqi->oqi_id;
 }
 
-int osc_quota_cleanup(struct obd_device *obd)
+static void *
+oqi_object(cfs_hlist_node_t *hnode)
 {
-        struct client_obd     *cli = &obd->u.cli;
-        struct osc_quota_info *oqi, *n;
-        int i;
-        ENTRY;
-
-        cfs_spin_lock(&qinfo_list_lock);
-        for (i = 0; i < NR_DQHASH; i++) {
-                cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
-                        if (oqi->oqi_cli != cli)
-                                continue;
-                        remove_qinfo_hash(oqi);
-                        free_qinfo(oqi);
-                }
-        }
-        cfs_spin_unlock(&qinfo_list_lock);
+       return cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
+}
 
-        RETURN(0);
+static void
+oqi_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
 }
 
-int osc_quota_init()
+static void
+oqi_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
 {
-        int i;
-        ENTRY;
+}
 
-        LASSERT(qinfo_cachep == NULL);
-        qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
-                                            sizeof(struct osc_quota_info),
-                                            0, 0);
-        if (!qinfo_cachep)
-                RETURN(-ENOMEM);
+static void
+oqi_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+{
+       struct osc_quota_info *oqi;
 
-        for (i = 0; i < NR_DQHASH; i++)
-                CFS_INIT_LIST_HEAD(qinfo_hash + i);
+       oqi = cfs_hlist_entry(hnode, struct osc_quota_info, oqi_hash);
 
-        RETURN(0);
+        OBD_SLAB_FREE_PTR(oqi, osc_quota_kmem);
 }
 
-int osc_quota_exit()
+#define HASH_QUOTA_BKT_BITS 5
+#define HASH_QUOTA_CUR_BITS 5
+#define HASH_QUOTA_MAX_BITS 15
+
+static cfs_hash_ops_t quota_hash_ops = {
+       .hs_hash        = oqi_hashfn,
+       .hs_keycmp      = oqi_keycmp,
+       .hs_key         = oqi_key,
+       .hs_object      = oqi_object,
+       .hs_get         = oqi_get,
+       .hs_put_locked  = oqi_put_locked,
+       .hs_exit        = oqi_exit,
+};
+
+int osc_quota_setup(struct obd_device *obd)
 {
-        struct osc_quota_info *oqi, *n;
-        int                    i, rc;
-        ENTRY;
+       struct client_obd *cli = &obd->u.cli;
+       int i, type;
+       ENTRY;
+
+       for (type = 0; type < MAXQUOTAS; type++) {
+               cli->cl_quota_hash[type] = cfs_hash_create("QUOTA_HASH",
+                                                          HASH_QUOTA_CUR_BITS,
+                                                          HASH_QUOTA_MAX_BITS,
+                                                          HASH_QUOTA_BKT_BITS,
+                                                          0,
+                                                          CFS_HASH_MIN_THETA,
+                                                          CFS_HASH_MAX_THETA,
+                                                          &quota_hash_ops,
+                                                          CFS_HASH_DEFAULT);
+               if (cli->cl_quota_hash[type] == NULL)
+                       break;
+       }
+
+       if (type == MAXQUOTAS)
+               RETURN(0);
+
+       for (i = 0; i < type; i++)
+               cfs_hash_putref(cli->cl_quota_hash[i]);
+
+       RETURN(-ENOMEM);
+}
 
-        cfs_spin_lock(&qinfo_list_lock);
-        for (i = 0; i < NR_DQHASH; i++) {
-                cfs_list_for_each_entry_safe(oqi, n, &qinfo_hash[i], oqi_hash) {
-                        remove_qinfo_hash(oqi);
-                        free_qinfo(oqi);
-                }
-        }
-        cfs_spin_unlock(&qinfo_list_lock);
+int osc_quota_cleanup(struct obd_device *obd)
+{
+       struct client_obd     *cli = &obd->u.cli;
+       int type;
+       ENTRY;
 
-        rc = cfs_mem_cache_destroy(qinfo_cachep);
-        LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
-        qinfo_cachep = NULL;
+       for (type = 0; type < MAXQUOTAS; type++)
+               cfs_hash_putref(cli->cl_quota_hash[type]);
 
-        RETURN(0);
+       RETURN(0);
 }
 
 int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
index ed0ac4b..b288b04 100644 (file)
@@ -3554,56 +3554,59 @@ static int brw_queue_work(const struct lu_env *env, void *data)
 
 int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 {
-        struct client_obd *cli = &obd->u.cli;
-        int rc;
-        ENTRY;
-
-        ENTRY;
-        rc = ptlrpcd_addref();
-        if (rc)
-                RETURN(rc);
-
-        rc = client_obd_setup(obd, lcfg);
-        if (rc == 0) {
-                void *handler;
-                handler = ptlrpcd_alloc_work(cli->cl_import,
-                                             brw_queue_work, cli);
-                if (!IS_ERR(handler))
-                        cli->cl_writeback_work = handler;
-                else
-                        rc = PTR_ERR(handler);
-        }
-
-        if (rc == 0) {
-                struct lprocfs_static_vars lvars = { 0 };
-
-                cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
-                lprocfs_osc_init_vars(&lvars);
-                if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
-                        lproc_osc_attach_seqstat(obd);
-                        sptlrpc_lprocfs_cliobd_attach(obd);
-                        ptlrpc_lprocfs_register_obd(obd);
-                }
-
-                oscc_init(obd);
-                /* We need to allocate a few requests more, because
-                   brw_interpret tries to create new requests before freeing
-                   previous ones. Ideally we want to have 2x max_rpcs_in_flight
-                   reserved, but I afraid that might be too much wasted RAM
-                   in fact, so 2 is just my guess and still should work. */
-                cli->cl_import->imp_rq_pool =
-                        ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
-                                            OST_MAXREQSIZE,
-                                            ptlrpc_add_rqs_to_pool);
+       struct lprocfs_static_vars lvars = { 0 };
+       struct client_obd          *cli = &obd->u.cli;
+       void                       *handler;
+       int                        rc;
+       ENTRY;
 
-                CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
+       rc = ptlrpcd_addref();
+       if (rc)
+               RETURN(rc);
+
+       rc = client_obd_setup(obd, lcfg);
+       if (rc)
+               GOTO(out_ptlrpcd, rc);
+
+       handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli);
+       if (IS_ERR(handler))
+               GOTO(out_client_setup, PTR_ERR(handler));
+       cli->cl_writeback_work = handler;
+
+       rc = osc_quota_setup(obd);
+       if (rc)
+               GOTO(out_ptlrpcd_work, rc);
+
+       cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
+       lprocfs_osc_init_vars(&lvars);
+       if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
+               lproc_osc_attach_seqstat(obd);
+               sptlrpc_lprocfs_cliobd_attach(obd);
+               ptlrpc_lprocfs_register_obd(obd);
+       }
 
-                ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
-        }
+       oscc_init(obd);
+       /* We need to allocate a few requests more, because
+        * brw_interpret tries to create new requests before freeing
+        * previous ones, Ideally we want to have 2x max_rpcs_in_flight
+        * reserved, but I'm afraid that might be too much wasted RAM
+        * in fact, so 2 is just my guess and still should work. */
+       cli->cl_import->imp_rq_pool =
+               ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
+                                   OST_MAXREQSIZE,
+                                   ptlrpc_add_rqs_to_pool);
+
+       CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
+       ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery);
+       RETURN(rc);
 
-        if (rc)
-                ptlrpcd_decref();
-        RETURN(rc);
+out_ptlrpcd_work:
+       ptlrpcd_destroy_work(handler);
+out_client_setup:
+       client_obd_cleanup(obd);
+out_ptlrpcd:
+       ptlrpcd_decref();
+       RETURN(rc);
 }
 
 static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
@@ -3751,7 +3754,6 @@ int __init osc_init(void)
 
         lprocfs_osc_init_vars(&lvars);
 
-        osc_quota_init();
         rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars,
                                  LUSTRE_OSC_NAME, &osc_device_type);
         if (rc) {
@@ -3774,7 +3776,6 @@ int __init osc_init(void)
 #ifdef __KERNEL__
 static void /*__exit*/ osc_exit(void)
 {
-       osc_quota_exit();
        class_unregister_type(LUSTRE_OSC_NAME);
        lu_kmem_fini(osc_caches);
 }