Whamcloud - gitweb
LU-14825 lod: pool spilling
[fs/lustre-release.git] / lustre / lod / lod_pool.c
index eb23de4..18bb313 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  */
 /*
  * lustre/lod/lod_pool.c
@@ -58,6 +57,8 @@
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
+#include <libcfs/linux/linux-hash.h>
+#include <libcfs/linux/linux-fs.h>
 #include <obd.h>
 #include "lod_internal.h"
 
@@ -96,130 +97,38 @@ void lod_pool_putref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
-               LASSERT(hlist_unhashed(&pool->pool_hash));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
-               lod_ost_pool_free(&(pool->pool_rr.lqr_pool));
-               lod_ost_pool_free(&(pool->pool_obds));
-               OBD_FREE_PTR(pool);
+               lu_tgt_pool_free(&(pool->pool_rr.lqr_pool));
+               lu_tgt_pool_free(&(pool->pool_obds));
+               kfree_rcu(pool, pool_rcu);
                EXIT;
        }
 }
 
-/**
- * Drop the refcount in cases where the caller holds a spinlock.
- *
- * This is needed if the caller cannot be blocked while freeing memory.
- * It assumes that there is some other known refcount held on the \a pool
- * and the memory cannot actually be freed, but the refcounting needs to
- * be kept accurate.
- *
- * \param[in] pool     pool descriptor on which to drop reference
- */
-static void pool_putref_locked(struct pool_desc *pool)
-{
-       CDEBUG(D_INFO, "pool %p\n", pool);
-       LASSERT(atomic_read(&pool->pool_refcount) > 1);
-
-       atomic_dec(&pool->pool_refcount);
-}
-
-/*
- * Group of functions needed for cfs_hash implementation.  This
- * includes pool lookup, refcounting, and cleanup.
- */
-
-/**
- * Hash the pool name for use by the cfs_hash handlers.
- *
- * Use the standard DJB2 hash function for ASCII strings in Lustre.
- *
- * \param[in] hash_body        hash structure where this key is embedded (unused)
- * \param[in] key      key to be hashed (in this case the pool name)
- * \param[in] mask     bitmask to limit the hash value to the desired size
- *
- * \retval             computed hash value from \a key and limited by \a mask
- */
-static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key,
-                        unsigned mask)
-{
-       return cfs_hash_djb2_hash(key, strnlen(key, LOV_MAXPOOLNAME), mask);
-}
-
-/**
- * Return the actual key (pool name) from the hashed \a hnode.
- *
- * Allows extracting the key name when iterating over all hash entries.
- *
- * \param[in] hnode    hash node found by lookup or iteration
- *
- * \retval             char array referencing the pool name (no refcount)
- */
-static void *pool_key(struct hlist_node *hnode)
-{
-       struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       return pool->pool_name;
-}
-
-/**
- * Check if the specified hash key matches the hash node.
- *
- * This is needed in case there is a hash key collision, allowing the hash
- * table lookup/iteration to distinguish between the two entries.
- *
- * \param[in] key      key (pool name) being searched for
- * \param[in] compared current entry being compared
- *
- * \retval             0 if \a key is the same as the key of \a compared
- * \retval             1 if \a key is different from the key of \a compared
- */
-static int pool_hashkey_keycmp(const void *key, struct hlist_node *compared)
-{
-       return !strncmp(key, pool_key(compared), LOV_MAXPOOLNAME);
-}
-
-/**
- * Return the actual pool data structure from the hash table entry.
- *
- * Once the hash table entry is found, extract the pool data from it.
- * The return type of this function is void * because it needs to be
- * assigned to the generic hash operations table.
- *
- * \param[in] hnode    hash table entry
- *
- * \retval             struct pool_desc for the specified \a hnode
- */
-static void *pool_hashobject(struct hlist_node *hnode)
+static u32 pool_hashfh(const void *data, u32 len, u32 seed)
 {
-       return hlist_entry(hnode, struct pool_desc, pool_hash);
-}
-
-static void pool_hashrefcount_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-       struct pool_desc *pool;
+       const char *pool_name = data;
 
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       pool_getref(pool);
+       return hashlen_hash(cfs_hashlen_string((void *)(unsigned long)seed,
+                                              pool_name));
 }
 
-static void pool_hashrefcount_put_locked(struct cfs_hash *hs,
-                                        struct hlist_node *hnode)
+static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
 {
-       struct pool_desc *pool;
+       const struct pool_desc *pool = obj;
+       const char *pool_name = arg->key;
 
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       pool_putref_locked(pool);
+       return strcmp(pool_name, pool->pool_name);
 }
 
-struct cfs_hash_ops pool_hash_operations = {
-       .hs_hash        = pool_hashfn,
-       .hs_key         = pool_key,
-       .hs_keycmp      = pool_hashkey_keycmp,
-       .hs_object      = pool_hashobject,
-       .hs_get         = pool_hashrefcount_get,
-       .hs_put_locked  = pool_hashrefcount_put_locked,
+const struct rhashtable_params pools_hash_params = {
+       .key_len        = 1, /* actually variable */
+       .key_offset     = offsetof(struct pool_desc, pool_name),
+       .head_offset    = offsetof(struct pool_desc, pool_hash),
+       .hashfn         = pool_hashfh,
+       .obj_cmpfn      = pool_cmpfn,
+       .automatic_shrinking = true,
 };
 
 /*
@@ -256,8 +165,9 @@ static void *pool_proc_next(struct seq_file *seq, void *v, loff_t *pos)
 
        LASSERTF(iter->lpi_magic == POOL_IT_MAGIC, "%08X\n", iter->lpi_magic);
 
+       (*pos)++;
        /* test if end of file */
-       if (*pos >= pool_tgt_count(iter->lpi_pool))
+       if (*pos > pool_tgt_count(iter->lpi_pool))
                return NULL;
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_LIST_ASSERT, cfs_fail_val);
@@ -269,7 +179,7 @@ static void *pool_proc_next(struct seq_file *seq, void *v, loff_t *pos)
                iter->lpi_idx = prev_idx; /* we stay on the last entry */
                return NULL;
        }
-       (*pos)++;
+
        /* return != NULL to continue */
        return iter;
 }
@@ -407,11 +317,11 @@ static int pool_proc_open(struct inode *inode, struct file *file)
        return rc;
 }
 
-static struct file_operations pool_proc_operations = {
-       .open           = pool_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = seq_release,
+const static struct proc_ops pool_proc_operations = {
+       .proc_open      = pool_proc_open,
+       .proc_read      = seq_read,
+       .proc_lseek     = seq_lseek,
+       .proc_release   = seq_release,
 };
 
 /**
@@ -449,181 +359,21 @@ void lod_dump_pool(int level, struct pool_desc *pool)
        lod_pool_putref(pool);
 }
 
-/**
- * Initialize the pool data structures at startup.
- *
- * Allocate and initialize the pool data structures with the specified
- * array size.  If pool count is not specified (\a count == 0), then
- * POOL_INIT_COUNT will be used.  Allocating a non-zero initial array
- * size avoids the need to reallocate as new pools are added.
- *
- * \param[in] op       pool structure
- * \param[in] count    initial size of the target op_array[] array
- *
- * \retval             0 indicates successful pool initialization
- * \retval             negative error number on failure
- */
-#define POOL_INIT_COUNT 2
-int lod_ost_pool_init(struct lu_tgt_pool *op, unsigned int count)
+static void pools_hash_exit(void *vpool, void *data)
 {
-       ENTRY;
-
-       if (count == 0)
-               count = POOL_INIT_COUNT;
-       op->op_array = NULL;
-       op->op_count = 0;
-       init_rwsem(&op->op_rw_sem);
-       op->op_size = count * sizeof(op->op_array[0]);
-       OBD_ALLOC(op->op_array, op->op_size);
-       if (op->op_array == NULL) {
-               op->op_size = 0;
-               RETURN(-ENOMEM);
-       }
-       EXIT;
-       return 0;
-}
+       struct pool_desc *pool = vpool;
 
-/**
- * Increase the op_array size to hold more targets in this pool.
- *
- * The size is increased to at least \a min_count, but may be larger
- * for an existing pool since ->op_array[] is growing exponentially.
- * Caller must hold write op_rwlock.
- *
- * \param[in] op       pool structure
- * \param[in] min_count        minimum number of entries to handle
- *
- * \retval             0 on success
- * \retval             negative error number on failure.
- */
-int lod_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
-{
-       __u32 *new;
-       __u32 new_size;
-
-       LASSERT(min_count != 0);
-
-       if (op->op_count * sizeof(op->op_array[0]) < op->op_size)
-               return 0;
-
-       new_size = max_t(__u32, min_count * sizeof(op->op_array[0]),
-                        2 * op->op_size);
-       OBD_ALLOC(new, new_size);
-       if (new == NULL)
-               return -ENOMEM;
-
-       /* copy old array to new one */
-       memcpy(new, op->op_array, op->op_size);
-       OBD_FREE(op->op_array, op->op_size);
-       op->op_array = new;
-       op->op_size = new_size;
-
-       return 0;
-}
-
-/**
- * Add a new target to an existing pool.
- *
- * Add a new target device to the pool previously created and returned by
- * lod_pool_new().  Each target can only be in each pool at most one time.
- *
- * \param[in] op       target pool to add new entry
- * \param[in] idx      pool index number to add to the \a op array
- * \param[in] min_count        minimum number of entries to expect in the pool
- *
- * \retval             0 if target could be added to the pool
- * \retval             negative error if target \a idx was not added
- */
-int lod_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
-{
-       unsigned int i;
-       int rc = 0;
-       ENTRY;
-
-       down_write(&op->op_rw_sem);
-
-       rc = lod_ost_pool_extend(op, min_count);
-       if (rc)
-               GOTO(out, rc);
-
-       /* search ost in pool array */
-       for (i = 0; i < op->op_count; i++) {
-               if (op->op_array[i] == idx)
-                       GOTO(out, rc = -EEXIST);
-       }
-       /* ost not found we add it */
-       op->op_array[op->op_count] = idx;
-       op->op_count++;
-       EXIT;
-out:
-       up_write(&op->op_rw_sem);
-       return rc;
+       lod_pool_putref(pool);
 }
 
-/**
- * Remove an existing pool from the system.
- *
- * The specified pool must have previously been allocated by
- * lod_pool_new() and not have any target members in the pool.
- * If the removed target is not the last, compact the array
- * to remove empty spaces.
- *
- * \param[in] op       pointer to the original data structure
- * \param[in] idx      target index to be removed
- *
- * \retval             0 on success
- * \retval             negative error number on failure
- */
-int lod_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx)
+int lod_pool_hash_init(struct rhashtable *tbl)
 {
-       unsigned int i;
-       ENTRY;
-
-       down_write(&op->op_rw_sem);
-
-       for (i = 0; i < op->op_count; i++) {
-               if (op->op_array[i] == idx) {
-                       memmove(&op->op_array[i], &op->op_array[i + 1],
-                               (op->op_count - i - 1) *
-                               sizeof(op->op_array[0]));
-                       op->op_count--;
-                       up_write(&op->op_rw_sem);
-                       EXIT;
-                       return 0;
-               }
-       }
-
-       up_write(&op->op_rw_sem);
-       RETURN(-EINVAL);
+       return rhashtable_init(tbl, &pools_hash_params);
 }
 
-/**
- * Free the pool after it was emptied and removed from /proc.
- *
- * Note that all of the child/target entries referenced by this pool
- * must have been removed by lod_ost_pool_remove() before it can be
- * deleted from memory.
- *
- * \param[in] op       pool to be freed.
- *
- * \retval             0 on success or if pool was already freed
- */
-int lod_ost_pool_free(struct lu_tgt_pool *op)
+void lod_pool_hash_destroy(struct rhashtable *tbl)
 {
-       ENTRY;
-
-       if (op->op_size == 0)
-               RETURN(0);
-
-       down_write(&op->op_rw_sem);
-
-       OBD_FREE(op->op_array, op->op_size);
-       op->op_array = NULL;
-       op->op_count = 0;
-       op->op_size = 0;
-
-       up_write(&op->op_rw_sem);
-       RETURN(0);
+       rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
 }
 
 /**
@@ -650,25 +400,28 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        if (strlen(poolname) > LOV_MAXPOOLNAME)
                RETURN(-ENAMETOOLONG);
 
-       OBD_ALLOC_PTR(new_pool);
+       /* OBD_ALLOC_* doesn't work with direct kfree_rcu use */
+       new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
        if (new_pool == NULL)
                RETURN(-ENOMEM);
 
        strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
+       new_pool->pool_spill_expire = 0;
+       new_pool->pool_spill_is_active = false;
+       new_pool->pool_spill_threshold_pct = 0;
+       new_pool->pool_spill_target[0] = '\0';
        new_pool->pool_lobd = obd;
        atomic_set(&new_pool->pool_refcount, 1);
-       rc = lod_ost_pool_init(&new_pool->pool_obds, 0);
+       rc = lu_tgt_pool_init(&new_pool->pool_obds, 0);
        if (rc)
                GOTO(out_err, rc);
 
        lu_qos_rr_init(&new_pool->pool_rr);
 
-       rc = lod_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+       rc = lu_tgt_pool_init(&new_pool->pool_rr.lqr_pool, 0);
        if (rc)
                GOTO(out_free_pool_obds, rc);
 
-       INIT_HLIST_NODE(&new_pool->pool_hash);
-
 #ifdef CONFIG_PROC_FS
        pool_getref(new_pool);
        new_pool->pool_proc_entry = lprocfs_add_simple(lod->lod_pool_proc_entry,
@@ -680,6 +433,17 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
                new_pool->pool_proc_entry = NULL;
                lod_pool_putref(new_pool);
        }
+
+       pool_getref(new_pool);
+       new_pool->pool_spill_proc_entry =
+               lprocfs_register(poolname, lod->lod_spill_proc_entry,
+                       lprocfs_lod_spill_vars, new_pool);
+       if (IS_ERR(new_pool->pool_spill_proc_entry)) {
+               rc = PTR_ERR(new_pool->pool_spill_proc_entry);
+               new_pool->pool_proc_entry = NULL;
+               lod_pool_putref(new_pool);
+       }
+
        CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool,
               new_pool->pool_proc_entry);
 #endif
@@ -689,11 +453,19 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        lod->lod_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
-       /* add to find only when it fully ready  */
-       rc = cfs_hash_add_unique(lod->lod_pools_hash_body, poolname,
-                                &new_pool->pool_hash);
-       if (rc)
-               GOTO(out_err, rc = -EEXIST);
+       /* Add to hash table only when it is fully ready. */
+       rc = rhashtable_lookup_insert_fast(&lod->lod_pools_hash_body,
+                                          &new_pool->pool_hash,
+                                          pools_hash_params);
+       if (rc) {
+               if (rc != -EEXIST)
+                       /*
+                        * Hide -E2BIG and -EBUSY which
+                        * are not helpful.
+                        */
+                       rc = -ENOMEM;
+               GOTO(out_err, rc);
+       }
 
        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                        poolname, lod->lod_pool_count);
@@ -706,11 +478,12 @@ out_err:
        lod->lod_pool_count--;
        spin_unlock(&obd->obd_dev_lock);
 
+       lprocfs_remove(&new_pool->pool_spill_proc_entry);
        lprocfs_remove(&new_pool->pool_proc_entry);
 
-       lod_ost_pool_free(&new_pool->pool_rr.lqr_pool);
+       lu_tgt_pool_free(&new_pool->pool_rr.lqr_pool);
 out_free_pool_obds:
-       lod_ost_pool_free(&new_pool->pool_obds);
+       lu_tgt_pool_free(&new_pool->pool_obds);
        OBD_FREE_PTR(new_pool);
        return rc;
 }
@@ -731,8 +504,15 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
        ENTRY;
 
        /* lookup and kill hash reference */
-       pool = cfs_hash_del_key(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && rhashtable_remove_fast(&lod->lod_pools_hash_body,
+                                          &pool->pool_hash,
+                                          pools_hash_params) != 0)
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        if (pool->pool_proc_entry != NULL) {
@@ -740,6 +520,11 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
                lprocfs_remove(&pool->pool_proc_entry);
                lod_pool_putref(pool);
        }
+       if (pool->pool_spill_proc_entry != NULL) {
+               CDEBUG(D_INFO, "proc entry %p\n", pool->pool_spill_proc_entry);
+               lprocfs_remove(&pool->pool_spill_proc_entry);
+               lod_pool_putref(pool);
+       }
 
        spin_lock(&obd->obd_dev_lock);
        list_del_init(&pool->pool_list);
@@ -773,8 +558,13 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        int rc = -EINVAL;
        ENTRY;
 
-       pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
@@ -791,12 +581,12 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       rc = lod_ost_pool_add(&pool->pool_obds, tgt->ltd_index,
-                             lod->lod_ost_descs.ltd_tgts_size);
+       rc = lu_tgt_pool_add(&pool->pool_obds, tgt->ltd_index,
+                            lod->lod_ost_count);
        if (rc)
                GOTO(out, rc);
 
-       pool->pool_rr.lqr_dirty = 1;
+       set_bit(LQ_DIRTY, &pool->pool_rr.lqr_flags);
 
        CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
                        ostname, poolname,  pool_tgt_count(pool));
@@ -831,8 +621,14 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        int rc = -EINVAL;
        ENTRY;
 
-       pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
@@ -849,9 +645,8 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       lod_ost_pool_remove(&pool->pool_obds, ost->ltd_index);
-
-       pool->pool_rr.lqr_dirty = 1;
+       lu_tgt_pool_remove(&pool->pool_obds, ost->ltd_index);
+       set_bit(LQ_DIRTY, &pool->pool_rr.lqr_flags);
 
        CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
               poolname);
@@ -877,23 +672,10 @@ out:
  */
 int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool)
 {
-       unsigned int i;
        int rc;
-       ENTRY;
 
        pool_getref(pool);
-
-       down_read(&pool_tgt_rw_sem(pool));
-
-       for (i = 0; i < pool_tgt_count(pool); i++) {
-               if (pool_tgt_array(pool)[i] == idx)
-                       GOTO(out, rc = 0);
-       }
-       rc = -ENOENT;
-       EXIT;
-out:
-       up_read(&pool_tgt_rw_sem(pool));
-
+       rc = lu_tgt_check_index(idx, &pool->pool_obds);
        lod_pool_putref(pool);
        return rc;
 }
@@ -914,10 +696,15 @@ struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname)
 
        pool = NULL;
        if (poolname[0] != '\0') {
-               pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-               if (pool == NULL)
-                       CDEBUG(D_CONFIG, "%s: request for an unknown pool ("
-                              LOV_POOLNAMEF")\n",
+               rcu_read_lock();
+               pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                        pools_hash_params);
+               if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+                       pool = NULL;
+               rcu_read_unlock();
+               if (!pool)
+                       CDEBUG(D_CONFIG,
+                              "%s: request for an unknown pool (" LOV_POOLNAMEF ")\n",
                               lod->lod_child_exp->exp_obd->obd_name, poolname);
                if (pool != NULL && pool_tgt_count(pool) == 0) {
                        CDEBUG(D_CONFIG, "%s: request for an empty pool ("
@@ -931,3 +718,91 @@ struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname)
        return pool;
 }
 
+void lod_spill_target_refresh(const struct lu_env *env, struct lod_device *lod,
+                             struct pool_desc *pool)
+{
+       __u64 avail_bytes = 0, total_bytes = 0;
+       struct lu_tgt_pool *osts;
+       int i;
+
+       if (ktime_get_seconds() < pool->pool_spill_expire)
+               return;
+
+       if (pool->pool_spill_threshold_pct == 0)
+               return;
+
+       lod_qos_statfs_update(env, lod, &lod->lod_ost_descs);
+
+       down_write(&pool_tgt_rw_sem(pool));
+       if (ktime_get_seconds() < pool->pool_spill_expire)
+               goto out_sem;
+       pool->pool_spill_expire = ktime_get_seconds() +
+               lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage;
+
+       osts = &(pool->pool_obds);
+       for (i = 0; i < osts->op_count; i++) {
+               int idx = osts->op_array[i];
+               struct lod_tgt_desc *tgt;
+               struct obd_statfs *sfs;
+
+               if (!test_bit(idx, lod->lod_ost_bitmap))
+                       continue;
+               tgt = OST_TGT(lod, idx);
+               if (tgt->ltd_active == 0)
+                       continue;
+               sfs = &tgt->ltd_statfs;
+
+               avail_bytes += sfs->os_bavail * sfs->os_bsize;
+               total_bytes += sfs->os_blocks * sfs->os_bsize;
+       }
+       if (total_bytes - avail_bytes >=
+           total_bytes * pool->pool_spill_threshold_pct / 100)
+               pool->pool_spill_is_active = true;
+       else
+               pool->pool_spill_is_active = false;
+
+out_sem:
+       up_write(&pool_tgt_rw_sem(pool));
+}
+
+/*
+ * to prevent infinite loops during spilling, lets limit number of passes
+ */
+#define LOD_SPILL_MAX  10
+
+/*
+ * XXX: consider a better schema to detect loops
+ */
+void lod_check_and_spill_pool(const struct lu_env *env, struct lod_device *lod,
+                             char **poolname)
+{
+       struct pool_desc *pool;
+       int replaced = 0;
+
+       if (!poolname || !*poolname || (*poolname)[0] == '\0')
+               return;
+repeat:
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, *poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               return;
+
+       lod_spill_target_refresh(env, lod, pool);
+       if (pool->pool_spill_is_active) {
+               if (++replaced >= LOD_SPILL_MAX)
+                       CWARN("%s: more than %d levels of pool spill for '%s->%s'\n",
+                             lod2obd(lod)->obd_name, LOD_SPILL_MAX,
+                             *poolname, pool->pool_spill_target);
+               lod_set_pool(poolname, pool->pool_spill_target);
+               lod_pool_putref(pool);
+               if (replaced >= LOD_SPILL_MAX)
+                       return;
+               goto repeat;
+       }
+
+       lod_pool_putref(pool);
+}