Whamcloud - gitweb
LU-5223 lmv: build master LMV EA dynamically build via readdir
[fs/lustre-release.git] / lustre / lod / lod_lov.c
index 693d94b..80e96cd 100644 (file)
 /*
  * lustre/lod/lod_lov.c
  *
- * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com> 
+ * Author: Alex Zhuravlev <alexey.zhuravlev@intel.com>
  */
 
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <obd_class.h>
-#include <obd_lov.h>
 #include <lustre_lfsck.h>
+#include <lustre_lmv.h>
 
 #include "lod_internal.h"
 
@@ -483,7 +483,8 @@ int lod_ea_store_resize(struct lod_thread_info *info, int size)
 {
        int round = size_roundup_power2(size);
 
-       LASSERT(round <= lov_mds_md_size(LOV_MAX_STRIPE_COUNT, LOV_MAGIC_V3));
+       LASSERT(round <=
+               lov_mds_md_size(LOV_MAX_STRIPE_COUNT, LOV_MAGIC_V3));
        if (info->lti_ea_store) {
                LASSERT(info->lti_ea_store_size);
                LASSERT(info->lti_ea_store_size < round);
@@ -535,6 +536,8 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        lmm->lmm_magic = cpu_to_le32(magic);
        lmm->lmm_pattern = cpu_to_le32(lo->ldo_pattern);
        fid_to_lmm_oi(fid, &lmm->lmm_oi);
+       if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_LMMOI))
+               lmm->lmm_oi.oi.oi_id++;
        lmm_oi_cpu_to_le(&lmm->lmm_oi, &lmm->lmm_oi);
        lmm->lmm_stripe_size = cpu_to_le32(lo->ldo_stripe_size);
        lmm->lmm_stripe_count = cpu_to_le16(lo->ldo_stripenr);
@@ -553,24 +556,32 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        }
 
        for (i = 0; i < lo->ldo_stripenr; i++) {
-               const struct lu_fid     *fid;
+               struct lu_fid           *fid    = &info->lti_fid;
                struct lod_device       *lod;
                __u32                   index;
+               int                     type    = LU_SEQ_RANGE_OST;
 
                lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
                LASSERT(lo->ldo_stripe[i]);
-               fid = lu_object_fid(&lo->ldo_stripe[i]->do_lu);
+
+               *fid = *lu_object_fid(&lo->ldo_stripe[i]->do_lu);
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_MULTIPLE_REF)) {
+                       if (cfs_fail_val == 0)
+                               cfs_fail_val = fid->f_oid;
+                       else
+                               fid->f_oid = cfs_fail_val;
+               }
 
                rc = fid_to_ostid(fid, &info->lti_ostid);
                LASSERT(rc == 0);
 
                ostid_cpu_to_le(&info->lti_ostid, &objs[i].l_ost_oi);
                objs[i].l_ost_gen    = cpu_to_le32(0);
-               rc = lod_fld_lookup(env, lod, fid, &index, LU_SEQ_RANGE_OST);
+               rc = lod_fld_lookup(env, lod, fid, &index, &type);
                if (rc < 0) {
-                       lod_object_free_striping(env, lo);
                        CERROR("%s: Can not locate "DFID": rc = %d\n",
                               lod2obd(lod)->obd_name, PFID(fid), rc);
+                       lod_object_free_striping(env, lo);
                        RETURN(rc);
                }
                objs[i].l_ost_idx = cpu_to_le32(index);
@@ -580,38 +591,41 @@ int lod_generate_and_set_lovea(const struct lu_env *env,
        info->lti_buf.lb_len = lmm_size;
        rc = dt_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV, 0,
                          th, BYPASS_CAPA);
+       if (rc < 0)
+               lod_object_free_striping(env, lo);
 
        RETURN(rc);
 }
 
-int lod_get_lov_ea(const struct lu_env *env, struct lod_object *lo)
+int lod_get_ea(const struct lu_env *env, struct lod_object *lo,
+              const char *name)
 {
-       struct lod_thread_info *info = lod_env_info(env);
-       struct dt_object       *next = dt_object_child(&lo->ldo_obj);
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct dt_object        *next = dt_object_child(&lo->ldo_obj);
        int                     rc;
        ENTRY;
 
        LASSERT(info);
 
-       if (unlikely(info->lti_ea_store_size == 0)) {
+       if (unlikely(info->lti_ea_store == NULL)) {
                /* just to enter in allocation block below */
                rc = -ERANGE;
        } else {
 repeat:
                info->lti_buf.lb_buf = info->lti_ea_store;
                info->lti_buf.lb_len = info->lti_ea_store_size;
-               rc = dt_xattr_get(env, next, &info->lti_buf, XATTR_NAME_LOV,
-                                 BYPASS_CAPA);
+               rc = dt_xattr_get(env, next, &info->lti_buf, name, BYPASS_CAPA);
        }
+
        /* if object is not striped or inaccessible */
-       if (rc == -ENODATA)
+       if (rc == -ENODATA || rc == -ENOENT)
                RETURN(0);
 
        if (rc == -ERANGE) {
                /* EA doesn't fit, reallocate new buffer */
-               rc = dt_xattr_get(env, next, &LU_BUF_NULL, XATTR_NAME_LOV,
+               rc = dt_xattr_get(env, next, &LU_BUF_NULL, name,
                                  BYPASS_CAPA);
-               if (rc == -ENODATA)
+               if (rc == -ENODATA || rc == -ENOENT)
                        RETURN(0);
                else if (rc < 0)
                        RETURN(rc);
@@ -634,11 +648,10 @@ int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
        struct dt_object        *next = dt_object_child(dt);
        struct lov_user_md_v3   *v3;
        int                      rc;
-       int                      cplen = 0;
        ENTRY;
 
-       LASSERT(S_ISDIR(dt->do_lu.lo_header->loh_attr));
-
+       if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
+               RETURN(-ENOTDIR);
        /*
         * store striping defaults into new directory
         * used to implement defaults inheritance
@@ -652,32 +665,26 @@ int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
                                lo->ldo_def_stripe_offset))
                RETURN(0);
 
-       /* XXX: use thread info */
-       OBD_ALLOC_PTR(v3);
-       if (v3 == NULL)
-               RETURN(-ENOMEM);
-
-       v3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
-       v3->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0);
-       v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
+       v3 = info->lti_ea_store;
+       if (info->lti_ea_store_size < sizeof(*v3)) {
+               rc = lod_ea_store_resize(info, sizeof(*v3));
+               if (rc != 0)
+                       RETURN(rc);
+               v3 = info->lti_ea_store;
+       }
+       memset(v3, 0, sizeof(*v3));
+       v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
        v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
        v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
-       if (lo->ldo_pool) {
-               cplen = strlcpy(v3->lmm_pool_name, lo->ldo_pool,
-                               sizeof(v3->lmm_pool_name));
-               if (cplen >= sizeof(v3->lmm_pool_name)) {
-                       OBD_FREE_PTR(v3);
-                       RETURN(-E2BIG);
-               }
-       }
-
+       v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
+       if (lo->ldo_pool != NULL)
+               strlcpy(v3->lmm_pool_name, lo->ldo_pool,
+                       sizeof(v3->lmm_pool_name));
        info->lti_buf.lb_buf = v3;
        info->lti_buf.lb_len = sizeof(*v3);
        rc = dt_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV, 0, th,
                        BYPASS_CAPA);
 
-       OBD_FREE_PTR(v3);
-
        RETURN(rc);
 }
 
@@ -733,6 +740,9 @@ int lod_initialize_objects(const struct lu_env *env, struct lod_object *lo,
                RETURN(-ENOMEM);
 
        for (i = 0; i < lo->ldo_stripenr; i++) {
+               if (unlikely(lovea_slot_is_dummy(&objs[i])))
+                       continue;
+
                ostid_le_to_cpu(&objs[i].l_ost_oi, &info->lti_ostid);
                idx = le32_to_cpu(objs[i].l_ost_idx);
                rc = ostid_to_fid(&info->lti_fid, &info->lti_ostid, idx);
@@ -771,6 +781,7 @@ out:
                                lu_object_put(env, &stripe[i]->do_lu);
 
                OBD_FREE(stripe, sizeof(stripe[0]) * stripe_len);
+               lo->ldo_stripenr = 0;
        } else {
                lo->ldo_stripe = stripe;
                lo->ldo_stripes_allocated = stripe_len;
@@ -830,22 +841,14 @@ out:
        RETURN(rc);
 }
 
-/*
- * Load and parse striping information, create in-core representation for the
- * stripes
- */
-int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
+int lod_load_striping_locked(const struct lu_env *env, struct lod_object *lo)
 {
        struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_buf           *buf  = &info->lti_buf;
        struct dt_object        *next = dt_object_child(&lo->ldo_obj);
-       int                      rc;
+       int                      rc = 0;
        ENTRY;
 
-       /*
-        * currently this code is supposed to be called from declaration
-        * phase only, thus the object is not expected to be locked by caller
-        */
-       dt_write_lock(env, next, 0);
        /* already initialized? */
        if (lo->ldo_stripe != NULL)
                GOTO(out, rc = 0);
@@ -853,28 +856,71 @@ int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
        if (!dt_object_exists(next))
                GOTO(out, rc = 0);
 
-       /* only regular files can be striped */
-       if (!(lu_object_attr(lod2lu_obj(lo)) & S_IFREG))
+       /* Do not load stripe for slaves of striped dir */
+       if (lo->ldo_dir_slave_stripe)
                GOTO(out, rc = 0);
 
-       rc = lod_get_lov_ea(env, lo);
-       if (rc <= 0)
-               GOTO(out, rc);
+       if (S_ISREG(lu_object_attr(lod2lu_obj(lo)))) {
+               rc = lod_get_lov_ea(env, lo);
+               if (rc <= 0)
+                       GOTO(out, rc);
+               /*
+                * there is LOV EA (striping information) in this object
+                * let's parse it and create in-core objects for the stripes
+                */
+               buf->lb_buf = info->lti_ea_store;
+               buf->lb_len = info->lti_ea_store_size;
+               rc = lod_parse_striping(env, lo, buf);
+       } else if (S_ISDIR(lu_object_attr(lod2lu_obj(lo)))) {
+               rc = lod_get_lmv_ea(env, lo);
+               if (rc < sizeof(struct lmv_mds_md_v1))
+                       GOTO(out, rc = rc > 0 ? -EINVAL : rc);
+
+               buf->lb_buf = info->lti_ea_store;
+               buf->lb_len = info->lti_ea_store_size;
+               if (rc == sizeof(struct lmv_mds_md_v1)) {
+                       rc = lod_load_lmv_shards(env, lo, buf, true);
+                       if (buf->lb_buf != info->lti_ea_store) {
+                               OBD_FREE_LARGE(info->lti_ea_store,
+                                              info->lti_ea_store_size);
+                               info->lti_ea_store = buf->lb_buf;
+                               info->lti_ea_store_size = buf->lb_len;
+                       }
 
-       /*
-        * there is LOV EA (striping information) in this object
-        * let's parse it and create in-core objects for the stripes
-        */
-       info->lti_buf.lb_buf = info->lti_ea_store;
-       info->lti_buf.lb_len = info->lti_ea_store_size;
-       rc = lod_parse_striping(env, lo, &info->lti_buf);
+                       if (rc < 0)
+                               GOTO(out, rc);
+               }
+
+               /*
+                * there is LOV EA (striping information) in this object
+                * let's parse it and create in-core objects for the stripes
+                */
+               rc = lod_parse_dir_striping(env, lo, buf);
+       }
 out:
-       dt_write_unlock(env, next);
        RETURN(rc);
 }
 
+/**
+ * Load and parse striping information, create in-core representation for the
+ * stripes
+ **/
+int lod_load_striping(const struct lu_env *env, struct lod_object *lo)
+{
+       struct dt_object        *next = dt_object_child(&lo->ldo_obj);
+       int                     rc = 0;
+
+       /* currently this code is supposed to be called from declaration
+        * phase only, thus the object is not expected to be locked by caller */
+       dt_write_lock(env, next, 0);
+       rc = lod_load_striping_locked(env, lo);
+       dt_write_unlock(env, next);
+       return rc;
+}
+
+/* verify the striping information for directory */
 int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
-                       int specific)
+                       bool is_from_disk)
 {
        struct lov_user_md_v1   *lum;
        struct lov_user_md_v3   *lum3;
@@ -906,8 +952,11 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                GOTO(out, rc = -EINVAL);
        }
 
-       if ((specific && le32_to_cpu(lum->lmm_pattern) != LOV_PATTERN_RAID0) ||
-           (!specific && lum->lmm_pattern != 0)) {
+       /* the user uses "0" for default stripe pattern normally. */
+       if (!is_from_disk && lum->lmm_pattern == 0)
+               lum->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0);
+
+       if (le32_to_cpu(lum->lmm_pattern) != LOV_PATTERN_RAID0) {
                CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
                       le32_to_cpu(lum->lmm_pattern));
                GOTO(out, rc = -EINVAL);
@@ -940,17 +989,17 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                }
        }
 
-       stripe_count = le16_to_cpu(lum->lmm_stripe_count);
        if (magic == LOV_USER_MAGIC_V1 || magic == LOV_MAGIC_V1_DEF)
                lum_size = offsetof(struct lov_user_md_v1,
-                                   lmm_objects[stripe_count]);
+                                   lmm_objects[0]);
        else if (magic == LOV_USER_MAGIC_V3 || magic == LOV_MAGIC_V3_DEF)
                lum_size = offsetof(struct lov_user_md_v3,
-                                   lmm_objects[stripe_count]);
+                                   lmm_objects[0]);
        else
-               LBUG();
+               GOTO(out, rc = -EINVAL);
 
-       if (specific && buf->lb_len != lum_size) {
+       stripe_count = le16_to_cpu(lum->lmm_stripe_count);
+       if (buf->lb_len != lum_size) {
                CDEBUG(D_IOCTL, "invalid buf len %zd for lov_user_md with "
                       "magic %#x and stripe_count %u\n",
                       buf->lb_len, magic, stripe_count);
@@ -980,7 +1029,7 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                        GOTO(out, rc = -EINVAL);
        }
 
-       if (specific && stripe_count > pool_tgt_count(pool)) {
+       if (is_from_disk && stripe_count > pool_tgt_count(pool)) {
                CDEBUG(D_IOCTL,
                       "stripe count %u > # OSTs %u in the pool\n",
                       stripe_count, pool_tgt_count(pool));
@@ -1000,8 +1049,8 @@ void lod_fix_desc_stripe_size(__u64 *val)
                if (*val != 0)
                        LCONSOLE_INFO("Increasing default stripe size to "
                                      "minimum value %u\n",
-                                     LOV_DEFAULT_STRIPE_SIZE);
-               *val = LOV_DEFAULT_STRIPE_SIZE;
+                                     LOV_DESC_STRIPE_SIZE_DEFAULT);
+               *val = LOV_DESC_STRIPE_SIZE_DEFAULT;
        } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) {
                *val &= ~(LOV_MIN_STRIPE_SIZE - 1);
                LCONSOLE_WARN("Changing default stripe size to "LPU64" (a "
@@ -1029,7 +1078,7 @@ void lod_fix_desc_qos_maxage(__u32 *val)
 {
        /* fix qos_maxage */
        if (*val == 0)
-               *val = QOS_DEFAULT_MAXAGE;
+               *val = LOV_DESC_QOS_MAXAGE_DEFAULT;
 }
 
 void lod_fix_desc(struct lov_desc *desc)