Whamcloud - gitweb
LU-2675 lustre: remove linux/lustre_lib.h
[fs/lustre-release.git] / lustre / lod / lod_lov.c
index dc5201e..4bdb1a3 100644 (file)
 /*
  * lustre/lod/lod_lov.c
  *
- * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com> 
+ * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <obd_class.h>
-#include <obd_lov.h>
 #include <lustre_lfsck.h>
+#include <lustre_lmv.h>
 
 #include "lod_internal.h"
 
@@ -67,12 +67,14 @@ void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd)
        ltd->ltd_refcount--;
        if (ltd->ltd_refcount == 0 && ltd->ltd_death_row) {
                struct lod_tgt_desc *tgt_desc, *tmp;
-               int                  idx;
-               CFS_LIST_HEAD(kill);
+               struct list_head kill;
+               unsigned int idx;
 
                CDEBUG(D_CONFIG, "destroying %d ltd desc\n",
                       ltd->ltd_death_row);
 
+               INIT_LIST_HEAD(&kill);
+
                cfs_foreach_bit(ltd->ltd_tgt_bitmap, idx) {
                        tgt_desc = LTD_TGT(ltd, idx);
                        LASSERT(tgt_desc);
@@ -80,7 +82,7 @@ void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd)
                        if (!tgt_desc->ltd_reap)
                                continue;
 
-                       cfs_list_add(&tgt_desc->ltd_kill, &kill);
+                       list_add(&tgt_desc->ltd_kill, &kill);
                        LTD_TGT(ltd, idx) = NULL;
                        /*FIXME: only support ost pool for now */
                        if (ltd == &lod->lod_ost_descs) {
@@ -95,9 +97,9 @@ void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd)
                mutex_unlock(&ltd->ltd_mutex);
                up_read(&ltd->ltd_rw_sem);
 
-               cfs_list_for_each_entry_safe(tgt_desc, tmp, &kill, ltd_kill) {
+               list_for_each_entry_safe(tgt_desc, tmp, &kill, ltd_kill) {
                        int rc;
-                       cfs_list_del(&tgt_desc->ltd_kill);
+                       list_del(&tgt_desc->ltd_kill);
                        if (ltd == &lod->lod_ost_descs) {
                                /* remove from QoS structures */
                                rc = qos_del_tgt(lod, tgt_desc);
@@ -394,7 +396,7 @@ static void __lod_del_device(const struct lu_env *env, struct lod_device *lod,
 int lod_fini_tgt(const struct lu_env *env, struct lod_device *lod,
                 struct lod_tgt_descs *ltd, bool for_ost)
 {
-       int idx;
+       unsigned int idx;
 
        if (ltd->ltd_tgts_size <= 0)
                return 0;
@@ -479,9 +481,9 @@ out:
        return(rc);
 }
 
-int lod_ea_store_resize(struct lod_thread_info *info, int size)
+int lod_ea_store_resize(struct lod_thread_info *info, size_t size)
 {
-       int round = size_roundup_power2(size);
+       __u32 round = size_roundup_power2(size);
 
        LASSERT(round <=
                lov_mds_md_size(LOV_MAX_STRIPE_COUNT, LOV_MAGIC_V3));
@@ -514,8 +516,8 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        struct lov_mds_md_v1    *lmm;
        struct lov_ost_data_v1  *objs;
        __u32                    magic;
-       int                      i, rc, lmm_size;
-       int                      cplen = 0;
+       int                      i, rc;
+       size_t                   lmm_size;
        ENTRY;
 
        LASSERT(lo);
@@ -548,7 +550,7 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
                objs = &lmm->lmm_objects[0];
        } else {
                struct lov_mds_md_v3 *v3 = (struct lov_mds_md_v3 *) lmm;
-               cplen = strlcpy(v3->lmm_pool_name, lo->ldo_pool,
+               size_t cplen = strlcpy(v3->lmm_pool_name, lo->ldo_pool,
                                sizeof(v3->lmm_pool_name));
                if (cplen >= sizeof(v3->lmm_pool_name))
                        RETURN(-E2BIG);
@@ -591,6 +593,8 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        info->lti_buf.lb_len = lmm_size;
        rc = dt_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV, 0,
                          th, BYPASS_CAPA);
+       if (rc < 0)
+               lod_object_free_striping(env, lo);
 
        RETURN(rc);
 }
@@ -675,8 +679,8 @@ int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
        v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
        v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
        v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
-       if (lo->ldo_pool)
-               strncpy(v3->lmm_pool_name, lo->ldo_pool,
+       if (lo->ldo_pool != NULL)
+               strlcpy(v3->lmm_pool_name, lo->ldo_pool,
                        sizeof(v3->lmm_pool_name));
        info->lti_buf.lb_buf = v3;
        info->lti_buf.lb_len = sizeof(*v3);
@@ -686,7 +690,7 @@ int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
        RETURN(rc);
 }
 
-static int validate_lod_and_idx(struct lod_device *md, int idx)
+static int validate_lod_and_idx(struct lod_device *md, __u32 idx)
 {
        if (unlikely(idx >= md->lod_ost_descs.ltd_tgts_size ||
                     !cfs_bitmap_check(md->lod_ost_bitmap, idx))) {
@@ -723,7 +727,8 @@ int lod_initialize_objects(const struct lu_env *env, struct lod_object *lo,
        struct lu_device        *nd;
        struct dt_object       **stripe;
        int                      stripe_len;
-       int                      i, idx, rc = 0;
+       int                      i, rc = 0;
+       __u32                   idx;
        ENTRY;
 
        LASSERT(lo != NULL);
@@ -738,6 +743,9 @@ int lod_initialize_objects(const struct lu_env *env, struct lod_object *lo,
                RETURN(-ENOMEM);
 
        for (i = 0; i < lo->ldo_stripenr; i++) {
+               if (unlikely(lovea_slot_is_dummy(&objs[i])))
+                       continue;
+
                ostid_le_to_cpu(&objs[i].l_ost_oi, &info->lti_ostid);
                idx = le32_to_cpu(objs[i].l_ost_idx);
                rc = ostid_to_fid(&info->lti_fid, &info->lti_ostid, idx);
@@ -776,6 +784,7 @@ out:
                                lu_object_put(env, &stripe[i]->do_lu);
 
                OBD_FREE(stripe, sizeof(stripe[0]) * stripe_len);
+               lo->ldo_stripenr = 0;
        } else {
                lo->ldo_stripe = stripe;
                lo->ldo_stripes_allocated = stripe_len;
@@ -835,32 +844,26 @@ out:
        RETURN(rc);
 }
 
-/*
- * Load and parse striping information, create in-core representation for the
- * stripes
- */
-int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
+int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo)
 {
        struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_buf           *buf  = &info->lti_buf;
        struct dt_object        *next = dt_object_child(&lo->ldo_obj);
        int                      rc = 0;
        ENTRY;
 
-       /*
-        * currently this code is supposed to be called from declaration
-        * phase only, thus the object is not expected to be locked by caller
-        */
-       dt_write_lock(env, next, 0);
        /* already initialized? */
        if (lo->ldo_stripe != NULL)
                GOTO(out, rc = 0);
 
+       if (!dt_object_exists(next))
+               GOTO(out, rc = 0);
+
        /* Do not load stripe for slaves of striped dir */
-       if (!dt_object_exists(next) || lo->ldo_dir_slave_stripe)
+       if (lo->ldo_dir_slave_stripe)
                GOTO(out, rc = 0);
 
-       /* only regular files can be striped */
-       if (lu_object_attr(lod2lu_obj(lo)) & S_IFREG) {
+       if (S_ISREG(lu_object_attr(lod2lu_obj(lo)))) {
                rc = lod_get_lov_ea(env, lo);
                if (rc <= 0)
                        GOTO(out, rc);
@@ -868,28 +871,59 @@ int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
                 * there is LOV EA (striping information) in this object
                 * let's parse it and create in-core objects for the stripes
                 */
-               info->lti_buf.lb_buf = info->lti_ea_store;
-               info->lti_buf.lb_len = info->lti_ea_store_size;
-               rc = lod_parse_striping(env, lo, &info->lti_buf);
-       } else if (lu_object_attr(lod2lu_obj(lo)) & S_IFDIR) {
+               buf->lb_buf = info->lti_ea_store;
+               buf->lb_len = info->lti_ea_store_size;
+               rc = lod_parse_striping(env, lo, buf);
+       } else if (S_ISDIR(lu_object_attr(lod2lu_obj(lo)))) {
                rc = lod_get_lmv_ea(env, lo);
-               if (rc <= 0)
-                       GOTO(out, rc);
+               if (rc < (typeof(rc))sizeof(struct lmv_mds_md_v1))
+                       GOTO(out, rc = rc > 0 ? -EINVAL : rc);
+
+               buf->lb_buf = info->lti_ea_store;
+               buf->lb_len = info->lti_ea_store_size;
+               if (rc == sizeof(struct lmv_mds_md_v1)) {
+                       rc = lod_load_lmv_shards(env, lo, buf, true);
+                       if (buf->lb_buf != info->lti_ea_store) {
+                               OBD_FREE_LARGE(info->lti_ea_store,
+                                              info->lti_ea_store_size);
+                               info->lti_ea_store = buf->lb_buf;
+                               info->lti_ea_store_size = buf->lb_len;
+                       }
+
+                       if (rc < 0)
+                               GOTO(out, rc);
+               }
+
                /*
                 * there is LOV EA (striping information) in this object
                 * let's parse it and create in-core objects for the stripes
                 */
-               info->lti_buf.lb_buf = info->lti_ea_store;
-               info->lti_buf.lb_len = info->lti_ea_store_size;
-               rc = lod_parse_dir_striping(env, lo, &info->lti_buf);
+               rc = lod_parse_dir_striping(env, lo, buf);
        }
 out:
-       dt_write_unlock(env, next);
        RETURN(rc);
 }
 
+/**
+ * Load and parse striping information, create in-core representation for the
+ * stripes
+ **/
+int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
+{
+       struct dt_object        *next = dt_object_child(&lo->ldo_obj);
+       int                     rc = 0;
+
+       /* currently this code is supposed to be called from declaration
+        * phase only, thus the object is not expected to be locked by caller */
+       dt_write_lock(env, next, 0);
+       rc = lod_load_striping_locked(env, lo);
+       dt_write_unlock(env, next);
+       return rc;
+}
+
+/* verify the striping information for directory */
 int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
-                       int specific)
+                       bool is_from_disk)
 {
        struct lov_user_md_v1   *lum;
        struct lov_user_md_v3   *lum3;
@@ -921,8 +955,11 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                GOTO(out, rc = -EINVAL);
        }
 
-       if ((specific && le32_to_cpu(lum->lmm_pattern) != LOV_PATTERN_RAID0) ||
-           (!specific && lum->lmm_pattern != 0)) {
+       /* the user uses "0" for default stripe pattern normally. */
+       if (!is_from_disk && lum->lmm_pattern == 0)
+               lum->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0);
+
+       if (le32_to_cpu(lum->lmm_pattern) != LOV_PATTERN_RAID0) {
                CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
                       le32_to_cpu(lum->lmm_pattern));
                GOTO(out, rc = -EINVAL);
@@ -955,17 +992,17 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                }
        }
 
-       stripe_count = le16_to_cpu(lum->lmm_stripe_count);
        if (magic == LOV_USER_MAGIC_V1 || magic == LOV_MAGIC_V1_DEF)
                lum_size = offsetof(struct lov_user_md_v1,
-                                   lmm_objects[stripe_count]);
+                                   lmm_objects[0]);
        else if (magic == LOV_USER_MAGIC_V3 || magic == LOV_MAGIC_V3_DEF)
                lum_size = offsetof(struct lov_user_md_v3,
-                                   lmm_objects[stripe_count]);
+                                   lmm_objects[0]);
        else
-               LBUG();
+               GOTO(out, rc = -EINVAL);
 
-       if (specific && buf->lb_len != lum_size) {
+       stripe_count = le16_to_cpu(lum->lmm_stripe_count);
+       if (buf->lb_len != lum_size) {
                CDEBUG(D_IOCTL, "invalid buf len %zd for lov_user_md with "
                       "magic %#x and stripe_count %u\n",
                       buf->lb_len, magic, stripe_count);
@@ -995,7 +1032,7 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                        GOTO(out, rc = -EINVAL);
        }
 
-       if (specific && stripe_count > pool_tgt_count(pool)) {
+       if (is_from_disk && stripe_count > pool_tgt_count(pool)) {
                CDEBUG(D_IOCTL,
                       "stripe count %u > # OSTs %u in the pool\n",
                       stripe_count, pool_tgt_count(pool));
@@ -1015,8 +1052,8 @@ void lod_fix_desc_stripe_size(__u64 *val)
                if (*val != 0)
                        LCONSOLE_INFO("Increasing default stripe size to "
                                      "minimum value %u\n",
-                                     LOV_DEFAULT_STRIPE_SIZE);
-               *val = LOV_DEFAULT_STRIPE_SIZE;
+                                     LOV_DESC_STRIPE_SIZE_DEFAULT);
+               *val = LOV_DESC_STRIPE_SIZE_DEFAULT;
        } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) {
                *val &= ~(LOV_MIN_STRIPE_SIZE - 1);
                LCONSOLE_WARN("Changing default stripe size to "LPU64" (a "
@@ -1044,7 +1081,7 @@ void lod_fix_desc_qos_maxage(__u32 *val)
 {
        /* fix qos_maxage */
        if (*val == 0)
-               *val = QOS_DEFAULT_MAXAGE;
+               *val = LOV_DESC_QOS_MAXAGE_DEFAULT;
 }
 
 void lod_fix_desc(struct lov_desc *desc)
@@ -1099,7 +1136,7 @@ int lod_pools_init(struct lod_device *lod, struct lustre_cfg *lcfg)
        lod->lod_sp_me = LUSTRE_SP_CLI;
 
        /* Set up allocation policy (QoS and RR) */
-       CFS_INIT_LIST_HEAD(&lod->lod_qos.lq_oss_list);
+       INIT_LIST_HEAD(&lod->lod_qos.lq_oss_list);
        init_rwsem(&lod->lod_qos.lq_rw_sem);
        lod->lod_qos.lq_dirty = 1;
        lod->lod_qos.lq_rr.lqr_dirty = 1;
@@ -1120,7 +1157,7 @@ int lod_pools_init(struct lod_device *lod, struct lustre_cfg *lcfg)
        if (lod->lod_pools_hash_body == NULL)
                RETURN(-ENOMEM);
 
-       CFS_INIT_LIST_HEAD(&lod->lod_pool_list);
+       INIT_LIST_HEAD(&lod->lod_pool_list);
        lod->lod_pool_count = 0;
        rc = lod_ost_pool_init(&lod->lod_pool_info, 0);
        if (rc)
@@ -1142,12 +1179,10 @@ out_hash:
 int lod_pools_fini(struct lod_device *lod)
 {
        struct obd_device   *obd = lod2obd(lod);
-       cfs_list_t          *pos, *tmp;
-       struct pool_desc    *pool;
+       struct pool_desc    *pool, *tmp;
        ENTRY;
 
-       cfs_list_for_each_safe(pos, tmp, &lod->lod_pool_list) {
-               pool = cfs_list_entry(pos, struct pool_desc, pool_list);
+       list_for_each_entry_safe(pool, tmp, &lod->lod_pool_list, pool_list) {
                /* free pool structs */
                CDEBUG(D_INFO, "delete pool %p\n", pool);
                /* In the function below, .hs_keycmp resolves to