Whamcloud - gitweb
LU-1303 lod: directories to inherit default striping
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Thu, 20 Sep 2012 09:04:52 +0000 (13:04 +0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 28 Sep 2012 20:46:54 +0000 (16:46 -0400)
on creation. the patch adds sanity check on default striping
being set by an user.

Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Change-Id: I4142a2189b00158454c7525939a9c6019a5e874a
Reviewed-on: http://review.whamcloud.com/4056
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: wangdi <di.wang@whamcloud.com>
Reviewed-by: Mike Pershin <tappro@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/lustre/lustre_idl.h
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c

index f4c6784..5426309 100644 (file)
@@ -1383,6 +1383,21 @@ enum obdo_flags {
 #define LOV_MAGIC_JOIN_V1 0x0BD20BD0
 #define LOV_MAGIC_V3      0x0BD30BD0
 
+/*
+ * magic for fully defined striping
+ * the idea is that we should have different magics for striping "hints"
+ * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct
+ * lov_mds_md_v[13]). at the moment the magics are used in wire protocol,
+ * we can't just change it w/o long way preparation, but we still need a
+ * mechanism to allow LOD to differentiate hint versus ready striping.
+ * so, at the moment we do a trick: MDT knows what to expect from request
+ * depending on the case (replay uses ready striping, non-replay req uses
+ * hints), so MDT replaces magic with appropriate one and now LOD can
+ * easily understand what's inside -bzzz
+ */
+#define LOV_MAGIC_V1_DEF  0x0CD10BD0
+#define LOV_MAGIC_V3_DEF  0x0CD30BD0
+
 #define LOV_PATTERN_RAID0 0x001   /* stripes are used round-robin */
 #define LOV_PATTERN_RAID1 0x002   /* stripes are mirrors of each other */
 #define LOV_PATTERN_FIRST 0x100   /* first stripe is not in round-robin */
index e0fedfb..8a76732 100644 (file)
@@ -259,6 +259,9 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *mo,
                       const struct lu_buf *buf);
 int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo,
                           struct lov_ost_data_v1 *objs);
+int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
+                          struct thandle *th);
+int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, int specific);
 
 /* lod_pool.c */
 int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count);
index a793659..821012b 100644 (file)
@@ -445,6 +445,56 @@ repeat:
        RETURN(rc);
 }
 
+int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt,
+                          struct thandle *th)
+{
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lod_object       *lo = lod_dt_obj(dt);
+       struct dt_object        *next = dt_object_child(dt);
+       struct lov_user_md_v3   *v3;
+       int                      rc;
+       ENTRY;
+
+       LASSERT(S_ISDIR(dt->do_lu.lo_header->loh_attr));
+
+       /*
+        * store striping defaults into new directory
+        * used to implement defaults inheritance
+        */
+
+       /* probably nothing to inherite */
+       if (lo->ldo_striping_cached == 0)
+               RETURN(0);
+
+       if (LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size, lo->ldo_def_stripenr,
+                               lo->ldo_def_stripe_offset))
+               RETURN(0);
+
+       /* XXX: use thread info */
+       OBD_ALLOC_PTR(v3);
+       if (v3 == NULL)
+               RETURN(-ENOMEM);
+
+       v3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3);
+       v3->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0);
+       v3->lmm_object_id = 0;
+       v3->lmm_object_seq = 0;
+       v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size);
+       v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr);
+       v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset);
+       if (lo->ldo_pool)
+               strncpy(v3->lmm_pool_name, lo->ldo_pool, LOV_MAXPOOLNAME);
+
+       info->lti_buf.lb_buf = v3;
+       info->lti_buf.lb_len = sizeof(*v3);
+       rc = dt_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV, 0, th,
+                       BYPASS_CAPA);
+
+       OBD_FREE_PTR(v3);
+
+       RETURN(rc);
+}
+
 /*
  * allocate array of objects pointers, find/create objects
  * stripenr and other fields should be initialized by this moment
@@ -599,6 +649,90 @@ out:
        RETURN(rc);
 }
 
+int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
+                       int specific)
+{
+       struct lov_user_md_v1   *lum;
+       struct lov_user_md_v3   *v3 = NULL;
+       struct pool_desc        *pool = NULL;
+       int                      rc;
+       ENTRY;
+
+       lum = buf->lb_buf;
+
+       if (lum->lmm_magic != LOV_USER_MAGIC_V1 &&
+           lum->lmm_magic != LOV_USER_MAGIC_V3 &&
+           lum->lmm_magic != LOV_MAGIC_V1_DEF &&
+           lum->lmm_magic != LOV_MAGIC_V3_DEF) {
+               CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#x\n",
+                      lum->lmm_magic);
+               RETURN(-EINVAL);
+       }
+
+       if ((specific && lum->lmm_pattern != LOV_PATTERN_RAID0) ||
+           (specific == 0 && lum->lmm_pattern != 0)) {
+               CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n",
+                      lum->lmm_pattern);
+               RETURN(-EINVAL);
+       }
+
+       /* 64kB is the largest common page size we see (ia64), and matches the
+        * check in lfs */
+       if (lum->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+               CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n",
+                      lum->lmm_stripe_size, LOV_MIN_STRIPE_SIZE);
+               RETURN(-EINVAL);
+       }
+
+       /* an offset of -1 is treated as a "special" valid offset */
+       if (lum->lmm_stripe_offset != (typeof(lum->lmm_stripe_offset))(-1)) {
+               /* if offset is not within valid range [0, osts_size) */
+               if (lum->lmm_stripe_offset >= d->lod_osts_size) {
+                       CDEBUG(D_IOCTL, "stripe offset %u >= bitmap size %u\n",
+                              lum->lmm_stripe_offset, d->lod_osts_size);
+                       RETURN(-EINVAL);
+               }
+
+               /* if lmm_stripe_offset is *not* in bitmap */
+               if (!cfs_bitmap_check(d->lod_ost_bitmap,
+                                     lum->lmm_stripe_offset)) {
+                       CDEBUG(D_IOCTL, "stripe offset %u not in bitmap\n",
+                              lum->lmm_stripe_offset);
+                       RETURN(-EINVAL);
+               }
+       }
+
+       if (lum->lmm_magic == LOV_USER_MAGIC_V3)
+               v3 = buf->lb_buf;
+
+       if (v3)
+               pool = lod_find_pool(d, v3->lmm_pool_name);
+
+       if (pool != NULL) {
+               __u16 offs = v3->lmm_stripe_offset;
+
+               if (offs != (typeof(v3->lmm_stripe_offset))(-1)) {
+                       rc = lod_check_index_in_pool(offs, pool);
+                       if (rc < 0) {
+                               lod_pool_putref(pool);
+                               RETURN(-EINVAL);
+                       }
+               }
+
+               if (specific && lum->lmm_stripe_count > pool_tgt_count(pool)) {
+                       CDEBUG(D_IOCTL,
+                              "stripe count %u > # OSTs %u in the pool\n",
+                              lum->lmm_stripe_count, pool_tgt_count(pool));
+                       lod_pool_putref(pool);
+                       RETURN(-EINVAL);
+               }
+
+               lod_pool_putref(pool);
+       }
+
+       RETURN(0);
+}
+
 void lod_fix_desc_stripe_size(__u64 *val)
 {
        if (*val < PTLRPC_MAX_BRW_SIZE) {
index bc063a9..30760cd 100644 (file)
@@ -235,7 +235,44 @@ static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
                         struct lu_buf *buf, const char *name,
                         struct lustre_capa *capa)
 {
-       return dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
+       struct lod_thread_info  *info = lod_env_info(env);
+       struct lod_device       *dev = lu2lod_dev(dt->do_lu.lo_dev);
+       int                      rc, is_root;
+       ENTRY;
+
+       rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa);
+       if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT))
+               RETURN(rc);
+
+       /*
+        * lod returns default striping on the real root of the device
+        * this is like the root stores default striping for the whole
+        * filesystem. historically we've been using a different approach
+        * and store it in the config.
+        */
+       dt_root_get(env, dev->lod_child, &info->lti_fid);
+       is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu));
+
+       if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) {
+               struct lov_user_md *lum = buf->lb_buf;
+               struct lov_desc    *desc = &dev->lod_desc;
+
+               if (buf->lb_buf == NULL) {
+                       rc = sizeof(struct lov_user_md_v1);
+               } else if (buf->lb_len >= sizeof(struct lov_user_md_v1)) {
+                       lum->lmm_magic = LOV_USER_MAGIC_V1;
+                       lum->lmm_object_seq = FID_SEQ_LOV_DEFAULT;
+                       lum->lmm_pattern = desc->ld_pattern;
+                       lum->lmm_stripe_size = desc->ld_default_stripe_size;
+                       lum->lmm_stripe_count = desc->ld_default_stripe_count;
+                       lum->lmm_stripe_offset = desc->ld_default_stripe_offset;
+                       rc = sizeof(struct lov_user_md_v1);
+               } else {
+                       rc = -ERANGE;
+               }
+       }
+
+       RETURN(rc);
 }
 
 /*
@@ -263,19 +300,87 @@ static int lod_declare_xattr_set(const struct lu_env *env,
        RETURN(rc);
 }
 
+static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
+                                   struct dt_object *dt,
+                                   const struct lu_buf *buf,
+                                   const char *name, int fl,
+                                   struct thandle *th,
+                                   struct lustre_capa *capa)
+{
+       struct lod_device       *d = lu2lod_dev(dt->do_lu.lo_dev);
+       struct dt_object        *next = dt_object_child(dt);
+       struct lod_object       *l = lod_dt_obj(dt);
+       struct lov_user_md_v1   *lum;
+       struct lov_user_md_v3   *v3 = NULL;
+       int                      rc;
+       ENTRY;
+
+       LASSERT(l->ldo_stripe == NULL);
+       l->ldo_striping_cached = 0;
+       l->ldo_def_striping_set = 0;
+       lod_object_set_pool(l, NULL);
+       l->ldo_def_stripe_size = 0;
+       l->ldo_def_stripenr = 0;
+
+       LASSERT(buf);
+       LASSERT(buf->lb_buf);
+       lum = buf->lb_buf;
+
+       rc = lod_verify_striping(d, buf, 0);
+       if (rc)
+               RETURN(rc);
+
+       if (lum->lmm_magic == LOV_USER_MAGIC_V3)
+               v3 = buf->lb_buf;
+
+       /* if { size, offset, count } = { 0, -1, 0 } and no pool
+        * (i.e. all default values specified) then delete default
+        * striping from dir. */
+       CDEBUG(D_OTHER,
+               "set default striping: sz %u # %u offset %d %s %s\n",
+               (unsigned)lum->lmm_stripe_size,
+               (unsigned)lum->lmm_stripe_count,
+               (int)lum->lmm_stripe_offset,
+               v3 ? "from" : "", v3 ? v3->lmm_pool_name : "");
+
+       if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size),
+                               (lum->lmm_stripe_count),
+                               (lum->lmm_stripe_offset)) &&
+                       lum->lmm_magic == LOV_USER_MAGIC_V1) {
+               rc = dt_xattr_del(env, next, name, th, capa);
+               if (rc == -ENODATA)
+                       rc = 0;
+       } else {
+               rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
+       }
+
+       RETURN(rc);
+}
+
 static int lod_xattr_set(const struct lu_env *env,
                         struct dt_object *dt, const struct lu_buf *buf,
                         const char *name, int fl, struct thandle *th,
                         struct lustre_capa *capa)
 {
        struct dt_object *next = dt_object_child(dt);
+       __u32             attr;
        int               rc;
        ENTRY;
 
-       /*
-        * behave transparantly for all other EAs
-        */
-       rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
+       attr = dt->do_lu.lo_header->loh_attr & S_IFMT;
+       if (S_ISDIR(attr)) {
+               if (strncmp(name, XATTR_NAME_LOV, strlen(XATTR_NAME_LOV)) == 0)
+                       rc = lod_xattr_set_lov_on_dir(env, dt, buf, name,
+                                                     fl, th, capa);
+               else
+                       rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
+
+       } else {
+               /*
+                * behave transparantly for all other EAs
+                */
+               rc = dt_xattr_set(env, next, buf, name, fl, th, capa);
+       }
 
        RETURN(rc);
 }
@@ -503,6 +608,7 @@ static int lod_declare_object_create(const struct lu_env *env,
                                     struct thandle *th)
 {
        struct dt_object   *next = dt_object_child(dt);
+       struct lod_object  *lo = lod_dt_obj(dt);
        int                 rc;
        ENTRY;
 
@@ -521,6 +627,16 @@ static int lod_declare_object_create(const struct lu_env *env,
        if (dof->dof_type == DFT_SYM)
                dt->do_body_ops = &lod_body_lnk_ops;
 
+       if (dof->dof_type == DFT_DIR && lo->ldo_striping_cached) {
+               struct lod_thread_info *info = lod_env_info(env);
+
+               info->lti_buf.lb_buf = NULL;
+               info->lti_buf.lb_len = sizeof(struct lov_user_md_v3);
+               /* to transfer default striping from the parent */
+               rc = dt_declare_xattr_set(env, next, &info->lti_buf,
+                                         XATTR_NAME_LOV, 0, th);
+       }
+
 out:
        RETURN(rc);
 }
@@ -537,6 +653,11 @@ static int lod_object_create(const struct lu_env *env, struct dt_object *dt,
        /* create local object */
        rc = dt_create(env, next, attr, hint, dof, th);
 
+       if (rc == 0) {
+               if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
+                       rc = lod_store_def_striping(env, dt, th);
+       }
+
        RETURN(rc);
 }