Whamcloud - gitweb
LU-9859 lod: use linux kernel bitmap API
[fs/lustre-release.git] / lustre / lod / lod_dev.c
index a452e5d..c8ec185 100644 (file)
@@ -96,6 +96,7 @@
 #include <uapi/linux/lustre/lustre_param.h>
 #include <lustre_update.h>
 #include <lustre_log.h>
+#include <lustre_lmv.h>
 
 #include "lod_internal.h"
 
@@ -269,8 +270,10 @@ static int lod_sub_process_config(const struct lu_env *env,
 struct lod_recovery_data {
        struct lod_device       *lrd_lod;
        struct lod_tgt_desc     *lrd_ltd;
-       struct ptlrpc_thread    *lrd_thread;
+       struct task_struct      **lrd_task;
        u32                     lrd_idx;
+       struct lu_env           lrd_env;
+       struct completion       *lrd_started;
 };
 
 
@@ -354,29 +357,16 @@ static int lod_sub_recovery_thread(void *arg)
        struct lod_recovery_data *lrd = arg;
        struct lod_device *lod = lrd->lrd_lod;
        struct dt_device *dt;
-       struct ptlrpc_thread *thread = lrd->lrd_thread;
        struct llog_ctxt *ctxt = NULL;
-       struct lu_env env;
+       struct lu_env *env = &lrd->lrd_env;
        struct lu_target *lut;
-       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
-       struct lod_tgt_desc *tgt = NULL;
+       struct lu_tgt_desc *mdt = NULL;
        time64_t start;
        int retries = 0;
        int rc;
 
        ENTRY;
 
-       thread->t_flags = SVC_RUNNING;
-       wake_up(&thread->t_ctl_waitq);
-
-       rc = lu_env_init(&env, LCT_LOCAL | LCT_MD_THREAD);
-       if (rc != 0) {
-               OBD_FREE_PTR(lrd);
-               CERROR("%s: can't initialize env: rc = %d\n",
-                      lod2obd(lod)->obd_name, rc);
-               RETURN(rc);
-       }
-
        lut = lod2lu_dev(lod)->ld_site->ls_tgt;
        atomic_inc(&lut->lut_tdtd->tdtd_recovery_threads_count);
        if (!lrd->lrd_ltd)
@@ -385,9 +375,17 @@ static int lod_sub_recovery_thread(void *arg)
                dt = lrd->lrd_ltd->ltd_tgt;
 
        start = ktime_get_real_seconds();
+       complete(lrd->lrd_started);
 
 again:
-       rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
+
+       if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT)) &&
+           lrd->lrd_ltd) {
+               OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val);
+               rc = -EIO;
+       } else {
+               rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
+       }
        if (!rc && !lod->lod_child->dd_rdonly) {
                /* Process the recovery record */
                ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
@@ -395,7 +393,7 @@ again:
                LASSERT(ctxt != NULL);
                LASSERT(ctxt->loc_handle != NULL);
 
-               rc = llog_cat_process(&env, ctxt->loc_handle,
+               rc = llog_cat_process(env, ctxt->loc_handle,
                                      lod_process_recovery_updates, lrd, 0, 0);
        }
 
@@ -413,7 +411,7 @@ again:
                    !top_device->ld_obd->obd_stopping) {
                        if (ctxt) {
                                if (ctxt->loc_handle)
-                                       llog_cat_close(&env,
+                                       llog_cat_close(env,
                                                       ctxt->loc_handle);
                                llog_ctxt_put(ctxt);
                        }
@@ -452,8 +450,8 @@ again:
                GOTO(out, rc = 0);
        }
 
-       ltd_foreach_tgt(ltd, tgt) {
-               if (!tgt->ltd_got_update_log) {
+       lod_foreach_mdt(lod, mdt) {
+               if (!mdt->ltd_got_update_log) {
                        spin_unlock(&lod->lod_lock);
                        GOTO(out, rc = 0);
                }
@@ -467,13 +465,16 @@ again:
        EXIT;
 
 out:
-       OBD_FREE_PTR(lrd);
-       thread->t_flags = SVC_STOPPED;
        atomic_dec(&lut->lut_tdtd->tdtd_recovery_threads_count);
        wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq);
-       wake_up(&thread->t_ctl_waitq);
-       lu_env_fini(&env);
-       return rc;
+       if (xchg(lrd->lrd_task, NULL) == NULL)
+               /* Someone is waiting for us to finish, need
+                * to synchronize cleanly.
+                */
+               wait_var_event(lrd, kthread_should_stop());
+       lu_env_fini(env);
+       OBD_FREE_PTR(lrd);
+       return 0;
 }
 
 /**
@@ -487,21 +488,21 @@ out:
  * \param[in] thread   recovery thread on this sub device
  */
 void lod_sub_fini_llog(const struct lu_env *env,
-                      struct dt_device *dt, struct ptlrpc_thread *thread)
+                      struct dt_device *dt, struct task_struct **thread)
 {
        struct obd_device *obd;
        struct llog_ctxt *ctxt;
+       struct task_struct *task = NULL;
 
        ENTRY;
 
        obd = dt->dd_lu_dev.ld_obd;
        CDEBUG(D_INFO, "%s: finish sub llog\n", obd->obd_name);
-       /* Stop recovery thread first */
-       if (thread && thread->t_flags & SVC_RUNNING) {
-               thread->t_flags = SVC_STOPPING;
-               wake_up(&thread->t_ctl_waitq);
-               wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
-       }
+       /* Wait for recovery thread to complete */
+       if (thread)
+               task = xchg(thread, NULL);
+       if (task)
+               kthread_stop(task);
 
        ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT);
        if (!ctxt)
@@ -529,8 +530,8 @@ void lod_sub_fini_llog(const struct lu_env *env,
  */
 int lodname2mdt_index(char *lodname, u32 *mdt_index)
 {
-       unsigned long index;
-       char *ptr, *tmp;
+       u32 index;
+       const char *ptr, *tmp;
        int rc;
 
        /* 1.8 configs don't have "-MDT0000" at the end */
@@ -565,8 +566,8 @@ int lodname2mdt_index(char *lodname, u32 *mdt_index)
                return rc;
        }
 
-       index = simple_strtol(ptr - 4, &tmp, 16);
-       if (*tmp != '-' || index > INT_MAX) {
+       rc = target_name2index(ptr - 7, &index, &tmp);
+       if (rc < 0 || rc & LDD_F_SV_ALL || *tmp != '-') {
                rc = -EINVAL;
                CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc);
                return rc;
@@ -594,9 +595,9 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod,
 {
        struct obd_device *obd;
        struct lod_recovery_data *lrd = NULL;
-       struct ptlrpc_thread *thread;
+       DECLARE_COMPLETION_ONSTACK(started);
+       struct task_struct **taskp;
        struct task_struct *task;
-       struct l_wait_info lwi = { 0 };
        struct lod_tgt_desc *subtgt = NULL;
        u32 index;
        u32 master_index;
@@ -613,33 +614,29 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod,
                RETURN(-ENOMEM);
 
        if (lod->lod_child == dt) {
-               thread = &lod->lod_child_recovery_thread;
+               taskp = &lod->lod_child_recovery_task;
                index = master_index;
        } else {
-               struct lu_tgt_desc *tgt;
+               struct lu_tgt_desc *mdt;
 
-               ltd_foreach_tgt(&lod->lod_mdt_descs, tgt) {
-                       if (tgt->ltd_tgt == dt) {
-                               index = tgt->ltd_index;
-                               subtgt = tgt;
+               lod_foreach_mdt(lod, mdt) {
+                       if (mdt->ltd_tgt == dt) {
+                               index = mdt->ltd_index;
+                               subtgt = mdt;
                                break;
                        }
                }
                LASSERT(subtgt != NULL);
-               OBD_ALLOC_PTR(subtgt->ltd_recovery_thread);
-               if (!subtgt->ltd_recovery_thread)
-                       GOTO(free_lrd, rc = -ENOMEM);
-
-               thread = subtgt->ltd_recovery_thread;
+               taskp = &subtgt->ltd_recovery_task;
        }
 
        CDEBUG(D_INFO, "%s init sub log %s\n", lod2obd(lod)->obd_name,
               dt->dd_lu_dev.ld_obd->obd_name);
        lrd->lrd_lod = lod;
        lrd->lrd_ltd = subtgt;
-       lrd->lrd_thread = thread;
+       lrd->lrd_task = taskp;
        lrd->lrd_idx = index;
-       init_waitqueue_head(&thread->t_ctl_waitq);
+       lrd->lrd_started = &started;
 
        obd = dt->dd_lu_dev.ld_obd;
        obd->obd_lvfs_ctxt.dt = dt;
@@ -648,30 +645,33 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod,
        if (rc < 0) {
                CERROR("%s: cannot setup updatelog llog: rc = %d\n",
                       obd->obd_name, rc);
-               GOTO(free_thread, rc);
+               GOTO(free_lrd, rc);
+       }
+
+       rc = lu_env_init(&lrd->lrd_env, LCT_LOCAL | LCT_MD_THREAD);
+       if (rc != 0) {
+               CERROR("%s: can't initialize env: rc = %d\n",
+                      lod2obd(lod)->obd_name, rc);
+               GOTO(free_lrd, rc);
        }
 
        /* Start the recovery thread */
-       task = kthread_run(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x",
-                          master_index, index);
+       task = kthread_create(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x",
+                             master_index, index);
        if (IS_ERR(task)) {
                rc = PTR_ERR(task);
                CERROR("%s: cannot start recovery thread: rc = %d\n",
                       obd->obd_name, rc);
+               lu_env_fini(&lrd->lrd_env);
                GOTO(out_llog, rc);
        }
-
-       l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING ||
-                                         thread->t_flags & SVC_STOPPED, &lwi);
+       *taskp = task;
+       wake_up_process(task);
+       wait_for_completion(&started);
 
        RETURN(0);
 out_llog:
-       lod_sub_fini_llog(env, dt, thread);
-free_thread:
-       if (lod->lod_child != dt) {
-               OBD_FREE_PTR(subtgt->ltd_recovery_thread);
-               subtgt->ltd_recovery_thread = NULL;
-       }
+       lod_sub_fini_llog(env, dt, taskp);
 free_lrd:
        OBD_FREE_PTR(lrd);
        RETURN(rc);
@@ -688,36 +688,24 @@ free_lrd:
 static void lod_sub_stop_recovery_threads(const struct lu_env *env,
                                          struct lod_device *lod)
 {
-       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
-       struct ptlrpc_thread *thread;
-       struct lu_tgt_desc *tgt;
+       struct task_struct *task;
+       struct lu_tgt_desc *mdt;
 
        /*
         * Stop the update log commit cancel threads and finish master
         * llog ctxt
         */
-       thread = &lod->lod_child_recovery_thread;
-       /* Stop recovery thread first */
-       if (thread && thread->t_flags & SVC_RUNNING) {
-               thread->t_flags = SVC_STOPPING;
-               wake_up(&thread->t_ctl_waitq);
-               wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
-       }
+       task = xchg(&lod->lod_child_recovery_task, NULL);
+       if (task)
+               kthread_stop(task);
 
-       lod_getref(ltd);
-       ltd_foreach_tgt(ltd, tgt) {
-               thread = tgt->ltd_recovery_thread;
-               if (thread && thread->t_flags & SVC_RUNNING) {
-                       thread->t_flags = SVC_STOPPING;
-                       wake_up(&thread->t_ctl_waitq);
-                       wait_event(thread->t_ctl_waitq,
-                                  thread->t_flags & SVC_STOPPED);
-                       OBD_FREE_PTR(tgt->ltd_recovery_thread);
-                       tgt->ltd_recovery_thread = NULL;
-               }
+       lod_getref(&lod->lod_mdt_descs);
+       lod_foreach_mdt(lod, mdt) {
+               task = xchg(&mdt->ltd_recovery_task, NULL);
+               if (task)
+                       kthread_stop(task);
        }
-
-       lod_putref(lod, ltd);
+       lod_putref(lod, &lod->lod_mdt_descs);
 }
 
 /**
@@ -731,28 +719,26 @@ static void lod_sub_stop_recovery_threads(const struct lu_env *env,
 static void lod_sub_fini_all_llogs(const struct lu_env *env,
                                   struct lod_device *lod)
 {
-       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
-       struct lu_tgt_desc *tgt;
+       struct lu_tgt_desc *mdt;
 
        /*
         * Stop the update log commit cancel threads and finish master
         * llog ctxt
         */
        lod_sub_fini_llog(env, lod->lod_child,
-                         &lod->lod_child_recovery_thread);
-       lod_getref(ltd);
-       ltd_foreach_tgt(ltd, tgt)
-               lod_sub_fini_llog(env, tgt->ltd_tgt,
-                                 tgt->ltd_recovery_thread);
-       lod_putref(lod, ltd);
+                         &lod->lod_child_recovery_task);
+       lod_getref(&lod->lod_mdt_descs);
+       lod_foreach_mdt(lod, mdt)
+               lod_sub_fini_llog(env, mdt->ltd_tgt,
+                                 &mdt->ltd_recovery_task);
+       lod_putref(lod, &lod->lod_mdt_descs);
 }
 
 static char *lod_show_update_logs_retrievers(void *data, int *size, int *count)
 {
        struct lod_device *lod = (struct lod_device *)data;
        struct lu_target *lut = lod2lu_dev(lod)->ld_site->ls_tgt;
-       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
-       struct lod_tgt_desc *tgt = NULL;
+       struct lu_tgt_desc *mdt = NULL;
        char *buf;
        int len = 0;
        int rc;
@@ -776,17 +762,17 @@ static char *lod_show_update_logs_retrievers(void *data, int *size, int *count)
                rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i);
                LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc);
 
-               rc = snprintf(buf + len, *size - len, " %04x", i);
+               rc = scnprintf(buf + len, *size - len, " %04x", i);
                LASSERT(rc > 0);
 
                len += rc;
                (*count)++;
        }
 
-       ltd_foreach_tgt(ltd, tgt) {
-               if (!tgt->ltd_got_update_log) {
-                       rc = snprintf(buf + len, *size - len, " %04x",
-                                     tgt->ltd_index);
+       lod_foreach_mdt(lod, mdt) {
+               if (!mdt->ltd_got_update_log) {
+                       rc = scnprintf(buf + len, *size - len, " %04x",
+                                      mdt->ltd_index);
                        if (unlikely(rc <= 0))
                                break;
 
@@ -954,9 +940,8 @@ static int lod_process_config(const struct lu_env *env,
                        rc = lod_add_device(env, lod, arg1, index, gen,
                                            mdt_index, LUSTRE_OSC_NAME, 0);
                } else {
-                       rc = lod_del_device(env, lod,
-                                           &lod->lod_ost_descs,
-                                           arg1, index, gen, true);
+                       rc = lod_del_device(env, lod, &lod->lod_ost_descs,
+                                           arg1, index, gen);
                }
 
                break;
@@ -973,9 +958,8 @@ static int lod_process_config(const struct lu_env *env,
                 */
                param = lustre_cfg_buf(lcfg, 1);
                if (strstr(param, "osp") && strstr(param, ".active=")) {
-                       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
                        struct lod_tgt_desc *sub_tgt = NULL;
-                       struct lu_tgt_desc *tgt;
+                       struct lu_tgt_desc *mdt;
                        char *ptr;
                        char *tmp;
 
@@ -989,9 +973,9 @@ static int lod_process_config(const struct lu_env *env,
                                GOTO(out, rc);
                        }
 
-                       ltd_foreach_tgt(ltd, tgt) {
-                               if (tgt->ltd_tgt->dd_lu_dev.ld_obd == obd) {
-                                       sub_tgt = tgt;
+                       lod_foreach_mdt(lod, mdt) {
+                               if (mdt->ltd_tgt->dd_lu_dev.ld_obd == obd) {
+                                       sub_tgt = mdt;
                                        break;
                                }
                        }
@@ -1047,6 +1031,7 @@ static int lod_process_config(const struct lu_env *env,
        case LCFG_PRE_CLEANUP: {
                lod_sub_process_config(env, lod, &lod->lod_mdt_descs, lcfg);
                lod_sub_process_config(env, lod, &lod->lod_ost_descs, lcfg);
+               OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val * 2);
                next = &lod->lod_child->dd_lu_dev;
                rc = next->ld_ops->ldo_process_config(env, next, lcfg);
                if (rc != 0)
@@ -1107,7 +1092,7 @@ static int lod_recovery_complete(const struct lu_env *env,
 {
        struct lod_device *lod = lu2lod_dev(dev);
        struct lu_device *next = &lod->lod_child->dd_lu_dev;
-       unsigned int i;
+       struct lod_tgt_desc *tgt;
        int rc;
 
        ENTRY;
@@ -1118,17 +1103,15 @@ static int lod_recovery_complete(const struct lu_env *env,
        rc = next->ld_ops->ldo_recovery_complete(env, next);
 
        lod_getref(&lod->lod_ost_descs);
-       if (lod->lod_osts_size > 0) {
-               cfs_foreach_bit(lod->lod_ost_bitmap, i) {
-                       struct lod_tgt_desc *tgt;
-
-                       tgt = OST_TGT(lod, i);
+       if (lod->lod_ost_descs.ltd_tgts_size > 0) {
+               lod_foreach_ost(lod, tgt) {
                        LASSERT(tgt && tgt->ltd_tgt);
-                       next = &tgt->ltd_ost->dd_lu_dev;
+                       next = &tgt->ltd_tgt->dd_lu_dev;
                        rc = next->ld_ops->ldo_recovery_complete(env, next);
                        if (rc)
                                CERROR("%s: can't complete recovery on #%d: rc = %d\n",
-                                      lod2obd(lod)->obd_name, i, rc);
+                                      lod2obd(lod)->obd_name, tgt->ltd_index,
+                                      rc);
                }
        }
        lod_putref(lod, &lod->lod_ost_descs);
@@ -1149,8 +1132,7 @@ static int lod_recovery_complete(const struct lu_env *env,
  */
 static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod)
 {
-       struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
-       struct lu_tgt_desc *tgt;
+       struct lu_tgt_desc *mdt;
        int rc;
 
        ENTRY;
@@ -1166,8 +1148,8 @@ static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod)
        if (rc < 0)
                RETURN(rc);
 
-       ltd_foreach_tgt(ltd, tgt) {
-               rc = lod_sub_init_llog(env, lod, tgt->ltd_tgt);
+       lod_foreach_mdt(lod, mdt) {
+               rc = lod_sub_init_llog(env, lod, mdt->ltd_tgt);
                if (rc != 0)
                        break;
        }
@@ -1248,11 +1230,68 @@ out_put:
        RETURN(rc);
 }
 
+/**
+ * Implementation of lu_device_operations::ldo_fid_alloc() for LOD
+ *
+ * Find corresponding device by passed parent and name, and allocate FID from
+ * there.
+ *
+ * see include/lu_object.h for the details.
+ */
+static int lod_fid_alloc(const struct lu_env *env, struct lu_device *d,
+                        struct lu_fid *fid, struct lu_object *parent,
+                        const struct lu_name *name)
+{
+       struct lod_device *lod = lu2lod_dev(d);
+       struct lod_object *lo = lu2lod_obj(parent);
+       struct dt_device *next;
+       int rc;
+
+       ENTRY;
+
+       /* if @parent is remote, we don't know whether its layout was changed,
+        * always reload layout.
+        */
+       if (lu_object_remote(parent))
+               lod_striping_free(env, lo);
+
+       rc = lod_striping_load(env, lo);
+       if (rc)
+               RETURN(rc);
+
+       if (lo->ldo_dir_stripe_count > 0 && name) {
+               struct dt_object *stripe;
+               int idx;
+
+               idx = __lmv_name_to_stripe_index(lo->ldo_dir_hash_type,
+                                                lo->ldo_dir_stripe_count,
+                                                lo->ldo_dir_migrate_hash,
+                                                lo->ldo_dir_migrate_offset,
+                                                name->ln_name,
+                                                name->ln_namelen, true);
+               if (idx < 0)
+                       RETURN(idx);
+
+               stripe = lo->ldo_stripe[idx];
+               if (!stripe || !dt_object_exists(stripe))
+                       RETURN(-ENODEV);
+
+               next = lu2dt_dev(stripe->do_lu.lo_dev);
+       } else {
+               next = lod->lod_child;
+       }
+
+       rc = dt_fid_alloc(env, next, fid, parent, name);
+
+       RETURN(rc);
+}
+
 const struct lu_device_operations lod_lu_ops = {
        .ldo_object_alloc       = lod_object_alloc,
        .ldo_process_config     = lod_process_config,
        .ldo_recovery_complete  = lod_recovery_complete,
        .ldo_prepare            = lod_prepare,
+       .ldo_fid_alloc          = lod_fid_alloc,
 };
 
 /**
@@ -1299,12 +1338,11 @@ static int lod_statfs(const struct lu_env *env, struct dt_device *dev,
                      struct obd_statfs *sfs, struct obd_statfs_info *info)
 {
        struct lod_device *lod = dt2lod_dev(dev);
-       struct lod_ost_desc *ost;
-       struct lod_mdt_desc *mdt;
+       struct lu_tgt_desc *tgt;
        struct obd_statfs ost_sfs;
        u64 ost_files = 0;
        u64 ost_ffree = 0;
-       int i, rc, bs;
+       int rc, bs;
 
        rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
        if (rc)
@@ -1318,10 +1356,8 @@ static int lod_statfs(const struct lu_env *env, struct dt_device *dev,
        sfs->os_granted = 0;
 
        lod_getref(&lod->lod_mdt_descs);
-       lod_foreach_mdt(lod, i) {
-               mdt = MDT_TGT(lod, i);
-               LASSERT(mdt && mdt->ltd_mdt);
-               rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs);
+       lod_foreach_mdt(lod, tgt) {
+               rc = dt_statfs(env, tgt->ltd_tgt, &ost_sfs);
                /* ignore errors */
                if (rc)
                        continue;
@@ -1337,10 +1373,8 @@ static int lod_statfs(const struct lu_env *env, struct dt_device *dev,
         * just fallback to pre-DoM policy if any OST is alive
         */
        lod_getref(&lod->lod_ost_descs);
-       lod_foreach_ost(lod, i) {
-               ost = OST_TGT(lod, i);
-               LASSERT(ost && ost->ltd_ost);
-               rc = dt_statfs(env, ost->ltd_ost, &ost_sfs);
+       lod_foreach_ost(lod, tgt) {
+               rc = dt_statfs(env, tgt->ltd_tgt, &ost_sfs);
                /* ignore errors */
                if (rc || ost_sfs.os_bsize == 0)
                        continue;
@@ -1362,7 +1396,7 @@ static int lod_statfs(const struct lu_env *env, struct dt_device *dev,
                        (int)sfs->os_bsize, (int)ost_sfs.os_bsize);
        }
        lod_putref(lod, &lod->lod_ost_descs);
-       sfs->os_state |= OS_STATE_SUM;
+       sfs->os_state |= OS_STATFS_SUM;
 
        /* If we have _some_ OSTs, but don't have as many free objects on the
         * OSTs as inodes on the MDTs, reduce the reported number of inodes
@@ -1506,24 +1540,21 @@ static void lod_conf_get(const struct lu_env *env,
 static int lod_sync(const struct lu_env *env, struct dt_device *dev)
 {
        struct lod_device *lod = dt2lod_dev(dev);
-       struct lod_ost_desc *ost;
-       struct lod_mdt_desc *mdt;
-       unsigned int i;
+       struct lu_tgt_desc *tgt;
        int rc = 0;
 
        ENTRY;
 
        lod_getref(&lod->lod_ost_descs);
-       lod_foreach_ost(lod, i) {
-               ost = OST_TGT(lod, i);
-               LASSERT(ost && ost->ltd_ost);
-               if (!ost->ltd_active)
+       lod_foreach_ost(lod, tgt) {
+               if (!tgt->ltd_active)
                        continue;
-               rc = dt_sync(env, ost->ltd_ost);
+               rc = dt_sync(env, tgt->ltd_tgt);
                if (rc) {
                        if (rc != -ENOTCONN) {
                                CERROR("%s: can't sync ost %u: rc = %d\n",
-                                      lod2obd(lod)->obd_name, i, rc);
+                                      lod2obd(lod)->obd_name, tgt->ltd_index,
+                                      rc);
                                break;
                        }
                        rc = 0;
@@ -1535,16 +1566,15 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev)
                RETURN(rc);
 
        lod_getref(&lod->lod_mdt_descs);
-       lod_foreach_mdt(lod, i) {
-               mdt = MDT_TGT(lod, i);
-               LASSERT(mdt && mdt->ltd_mdt);
-               if (!mdt->ltd_active)
+       lod_foreach_mdt(lod, tgt) {
+               if (!tgt->ltd_active)
                        continue;
-               rc = dt_sync(env, mdt->ltd_mdt);
+               rc = dt_sync(env, tgt->ltd_tgt);
                if (rc) {
                        if (rc != -ENOTCONN) {
                                CERROR("%s: can't sync mdt %u: rc = %d\n",
-                                      lod2obd(lod)->obd_name, i, rc);
+                                      lod2obd(lod)->obd_name, tgt->ltd_index,
+                                      rc);
                                break;
                        }
                        rc = 0;
@@ -1698,6 +1728,26 @@ out:
        RETURN(rc);
 }
 
+static int lod_lsfs_init(const struct lu_env *env, struct lod_device *d)
+{
+       struct obd_statfs sfs;
+       int rc;
+
+       rc = dt_statfs(env, d->lod_child, &sfs);
+       if (rc) {
+               CDEBUG(D_LAYOUT, "%s: failed to get OSD statfs, rc = %d\n",
+                      lod2obd(d)->obd_name, rc);
+               return rc;
+       }
+
+       /* udpate local OSD cached statfs data */
+       spin_lock_init(&d->lod_lsfs_lock);
+       d->lod_lsfs_age = ktime_get_seconds();
+       d->lod_lsfs_total_mb = (sfs.os_blocks * sfs.os_bsize) >> 20;
+       d->lod_lsfs_free_mb = (sfs.os_bfree * sfs.os_bsize) >> 20;
+       return 0;
+}
+
 /**
  * Initialize LOD device at setup.
  *
@@ -1741,7 +1791,17 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod,
 
        dt_conf_get(env, &lod->lod_dt_dev, &ddp);
        lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
-       lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */
+       lod->lod_dom_stripesize_max_kb = (1ULL << 10); /* 1Mb is default */
+
+       /* initialize local statfs cached values */
+       rc = lod_lsfs_init(env, lod);
+       if (rc)
+               GOTO(out_disconnect, rc);
+
+       /* default threshold as half of total space, in MiB */
+       lod->lod_dom_threshold_free_mb = lod->lod_lsfs_total_mb / 2;
+       /* set default DoM stripe size based on free space amount */
+       lod_dom_stripesize_recalc(lod);
 
        /* setup obd to be used with old lov code */
        rc = lod_pools_init(lod, cfg);
@@ -1754,8 +1814,8 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod,
 
        spin_lock_init(&lod->lod_lock);
        spin_lock_init(&lod->lod_connects_lock);
-       lu_tgt_descs_init(&lod->lod_mdt_descs);
-       lu_tgt_descs_init(&lod->lod_ost_descs);
+       lu_tgt_descs_init(&lod->lod_mdt_descs, true);
+       lu_tgt_descs_init(&lod->lod_ost_descs, false);
 
        RETURN(0);
 
@@ -1781,10 +1841,9 @@ static struct lu_device *lod_device_free(const struct lu_env *env,
 
        ENTRY;
 
-       if (atomic_read(&lu->ld_ref) > 0 &&
-           !cfs_hash_is_empty(lu->ld_site->ls_obj_hash)) {
-               LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
-               lu_site_print(env, lu->ld_site, &msgdata, lu_cdebug_printer);
+       if (atomic_read(&lu->ld_site->ls_obj_hash.nelems)) {
+               lu_site_print(env, lu->ld_site, &lu->ld_ref, D_ERROR,
+                             lu_cdebug_printer);
        }
        LASSERTF(atomic_read(&lu->ld_ref) == 0, "lu is %p\n", lu);
        dt_device_fini(&lod->lod_dt_dev);
@@ -1827,10 +1886,9 @@ static struct lu_device *lod_device_alloc(const struct lu_env *env,
 static void lod_avoid_guide_fini(struct lod_avoid_guide *lag)
 {
        if (lag->lag_oss_avoid_array)
-               OBD_FREE(lag->lag_oss_avoid_array,
-                        sizeof(u32) * lag->lag_oaa_size);
-       if (lag->lag_ost_avoid_bitmap)
-               CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap);
+               OBD_FREE_PTR_ARRAY(lag->lag_oss_avoid_array,
+                                  lag->lag_oaa_size);
+       bitmap_free(lag->lag_ost_avoid_bitmap);
 }
 
 /**
@@ -1852,12 +1910,12 @@ static struct lu_device *lod_device_fini(const struct lu_env *env,
 
        lod_procfs_fini(lod);
 
-       rc = lod_fini_tgt(env, lod, &lod->lod_ost_descs, true);
+       rc = lod_fini_tgt(env, lod, &lod->lod_ost_descs);
        if (rc)
                CERROR("%s: can not fini ost descriptors: rc =  %d\n",
                        lod2obd(lod)->obd_name, rc);
 
-       rc = lod_fini_tgt(env, lod, &lod->lod_mdt_descs, false);
+       rc = lod_fini_tgt(env, lod, &lod->lod_mdt_descs);
        if (rc)
                CERROR("%s: can not fini mdt descriptors: rc =  %d\n",
                        lod2obd(lod)->obd_name, rc);
@@ -1977,8 +2035,8 @@ static void lod_key_fini(const struct lu_context *ctx,
                lod_free_def_comp_entries(&info->lti_def_striping);
 
        if (info->lti_comp_size > 0)
-               OBD_FREE(info->lti_comp_idx,
-                        info->lti_comp_size * sizeof(u32));
+               OBD_FREE_PTR_ARRAY(info->lti_comp_idx,
+                                  info->lti_comp_size);
 
        lod_avoid_guide_fini(&info->lti_avoid);
 
@@ -2039,7 +2097,6 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp,
                struct obd_device *obd = exp->exp_obd;
                struct lod_device *d;
                struct lod_tgt_desc *tgt;
-               unsigned int i;
                int rc = 1;
 
                if (!obd->obd_set_up || obd->obd_stopping)
@@ -2047,9 +2104,7 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp,
 
                d = lu2lod_dev(obd->obd_lu_dev);
                lod_getref(&d->lod_ost_descs);
-               lod_foreach_ost(d, i) {
-                       tgt = OST_TGT(d, i);
-                       LASSERT(tgt && tgt->ltd_tgt);
+               lod_foreach_ost(d, tgt) {
                        rc = obd_get_info(env, tgt->ltd_exp, keylen, key,
                                          vallen, val);
                        /* one healthy device is enough */
@@ -2059,12 +2114,9 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp,
                lod_putref(d, &d->lod_ost_descs);
 
                lod_getref(&d->lod_mdt_descs);
-               lod_foreach_mdt(d, i) {
+               lod_foreach_mdt(d, tgt) {
                        struct llog_ctxt *ctxt;
 
-                       tgt = MDT_TGT(d, i);
-                       LASSERT(tgt != NULL);
-                       LASSERT(tgt->ltd_tgt != NULL);
                        if (!tgt->ltd_active)
                                continue;
 
@@ -2105,7 +2157,7 @@ static int lod_obd_set_info_async(const struct lu_env *env,
        struct lod_device *d;
        struct lod_tgt_desc *tgt;
        int no_set = 0;
-       int i, rc = 0, rc2;
+       int rc = 0, rc2;
 
        ENTRY;
 
@@ -2118,9 +2170,7 @@ static int lod_obd_set_info_async(const struct lu_env *env,
 
        d = lu2lod_dev(obd->obd_lu_dev);
        lod_getref(&d->lod_ost_descs);
-       lod_foreach_ost(d, i) {
-               tgt = OST_TGT(d, i);
-               LASSERT(tgt && tgt->ltd_tgt);
+       lod_foreach_ost(d, tgt) {
                if (!tgt->ltd_active)
                        continue;
 
@@ -2132,9 +2182,7 @@ static int lod_obd_set_info_async(const struct lu_env *env,
        lod_putref(d, &d->lod_ost_descs);
 
        lod_getref(&d->lod_mdt_descs);
-       lod_foreach_mdt(d, i) {
-               tgt = MDT_TGT(d, i);
-               LASSERT(tgt && tgt->ltd_tgt);
+       lod_foreach_mdt(d, tgt) {
                if (!tgt->ltd_active)
                        continue;
                rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
@@ -2154,7 +2202,7 @@ static int lod_obd_set_info_async(const struct lu_env *env,
        RETURN(rc);
 }
 
-static struct obd_ops lod_obd_device_ops = {
+static const struct obd_ops lod_obd_device_ops = {
        .o_owner        = THIS_MODULE,
        .o_connect      = lod_obd_connect,
        .o_disconnect   = lod_obd_disconnect,
@@ -2166,10 +2214,9 @@ static struct obd_ops lod_obd_device_ops = {
        .o_pool_del     = lod_pool_del,
 };
 
-static struct obd_type *sym;
-
 static int __init lod_init(void)
 {
+       struct obd_type *sym;
        int rc;
 
        rc = lu_kmem_init(lod_caches);
@@ -2197,8 +2244,18 @@ static int __init lod_init(void)
 
 static void __exit lod_exit(void)
 {
-       if (!IS_ERR_OR_NULL(sym))
+       struct obd_type *sym = class_search_type(LUSTRE_LOV_NAME);
+
+       /* if this was never fully initialized by the lov layer
+        * then we are responsible for freeing this obd_type
+        */
+       if (sym) {
+               /* final put if we manage this obd type */
+               if (sym->typ_sym_filter)
+                       kobject_put(&sym->typ_kobj);
+               /* put reference taken by class_search_type */
                kobject_put(&sym->typ_kobj);
+       }
 
        class_unregister_type(LUSTRE_LOD_NAME);
        lu_kmem_fini(lod_caches);