From 135092984c44a0594194abd8267c3e2eaf7187c1 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 20 Sep 2012 13:04:52 +0400 Subject: [PATCH] LU-1303 lod: directories to inherit default striping on creation. the patch adds sanity check on default striping being set by an user. Signed-off-by: Alex Zhuravlev Change-Id: I4142a2189b00158454c7525939a9c6019a5e874a Reviewed-on: http://review.whamcloud.com/4056 Tested-by: Hudson Tested-by: Maloo Reviewed-by: wangdi Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger --- lustre/include/lustre/lustre_idl.h | 15 +++++ lustre/lod/lod_internal.h | 3 + lustre/lod/lod_lov.c | 134 +++++++++++++++++++++++++++++++++++++ lustre/lod/lod_object.c | 131 ++++++++++++++++++++++++++++++++++-- 4 files changed, 278 insertions(+), 5 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index f4c6784..5426309 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1383,6 +1383,21 @@ enum obdo_flags { #define LOV_MAGIC_JOIN_V1 0x0BD20BD0 #define LOV_MAGIC_V3 0x0BD30BD0 +/* + * magic for fully defined striping + * the idea is that we should have different magics for striping "hints" + * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct + * lov_mds_md_v[13]). at the moment the magics are used in wire protocol, + * we can't just change it w/o long way preparation, but we still need a + * mechanism to allow LOD to differentiate hint versus ready striping. + * so, at the moment we do a trick: MDT knows what to expect from request + * depending on the case (replay uses ready striping, non-replay req uses + * hints), so MDT replaces magic with appropriate one and now LOD can + * easily understand what's inside -bzzz + */ +#define LOV_MAGIC_V1_DEF 0x0CD10BD0 +#define LOV_MAGIC_V3_DEF 0x0CD30BD0 + #define LOV_PATTERN_RAID0 0x001 /* stripes are used round-robin */ #define LOV_PATTERN_RAID1 0x002 /* stripes are mirrors of each other */ #define LOV_PATTERN_FIRST 0x100 /* first stripe is not in round-robin */ diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index e0fedfb..8a76732 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -259,6 +259,9 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *mo, const struct lu_buf *buf); int lod_initialize_objects(const struct lu_env *env, struct lod_object *mo, struct lov_ost_data_v1 *objs); +int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt, + struct thandle *th); +int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, int specific); /* lod_pool.c */ int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index a793659..821012b 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -445,6 +445,56 @@ repeat: RETURN(rc); } +int lod_store_def_striping(const struct lu_env *env, struct dt_object *dt, + struct thandle *th) +{ + struct lod_thread_info *info = lod_env_info(env); + struct lod_object *lo = lod_dt_obj(dt); + struct dt_object *next = dt_object_child(dt); + struct lov_user_md_v3 *v3; + int rc; + ENTRY; + + LASSERT(S_ISDIR(dt->do_lu.lo_header->loh_attr)); + + /* + * store striping defaults into new directory + * used to implement defaults inheritance + */ + + /* probably nothing to inherite */ + if (lo->ldo_striping_cached == 0) + RETURN(0); + + if (LOVEA_DELETE_VALUES(lo->ldo_def_stripe_size, lo->ldo_def_stripenr, + lo->ldo_def_stripe_offset)) + RETURN(0); + + /* XXX: use thread info */ + OBD_ALLOC_PTR(v3); + if (v3 == NULL) + RETURN(-ENOMEM); + + v3->lmm_magic = cpu_to_le32(LOV_MAGIC_V3); + v3->lmm_pattern = cpu_to_le32(LOV_PATTERN_RAID0); + v3->lmm_object_id = 0; + v3->lmm_object_seq = 0; + v3->lmm_stripe_size = cpu_to_le32(lo->ldo_def_stripe_size); + v3->lmm_stripe_count = cpu_to_le16(lo->ldo_def_stripenr); + v3->lmm_stripe_offset = cpu_to_le16(lo->ldo_def_stripe_offset); + if (lo->ldo_pool) + strncpy(v3->lmm_pool_name, lo->ldo_pool, LOV_MAXPOOLNAME); + + info->lti_buf.lb_buf = v3; + info->lti_buf.lb_len = sizeof(*v3); + rc = dt_xattr_set(env, next, &info->lti_buf, XATTR_NAME_LOV, 0, th, + BYPASS_CAPA); + + OBD_FREE_PTR(v3); + + RETURN(rc); +} + /* * allocate array of objects pointers, find/create objects * stripenr and other fields should be initialized by this moment @@ -599,6 +649,90 @@ out: RETURN(rc); } +int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, + int specific) +{ + struct lov_user_md_v1 *lum; + struct lov_user_md_v3 *v3 = NULL; + struct pool_desc *pool = NULL; + int rc; + ENTRY; + + lum = buf->lb_buf; + + if (lum->lmm_magic != LOV_USER_MAGIC_V1 && + lum->lmm_magic != LOV_USER_MAGIC_V3 && + lum->lmm_magic != LOV_MAGIC_V1_DEF && + lum->lmm_magic != LOV_MAGIC_V3_DEF) { + CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#x\n", + lum->lmm_magic); + RETURN(-EINVAL); + } + + if ((specific && lum->lmm_pattern != LOV_PATTERN_RAID0) || + (specific == 0 && lum->lmm_pattern != 0)) { + CDEBUG(D_IOCTL, "bad userland stripe pattern: %#x\n", + lum->lmm_pattern); + RETURN(-EINVAL); + } + + /* 64kB is the largest common page size we see (ia64), and matches the + * check in lfs */ + if (lum->lmm_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) { + CDEBUG(D_IOCTL, "stripe size %u not multiple of %u, fixing\n", + lum->lmm_stripe_size, LOV_MIN_STRIPE_SIZE); + RETURN(-EINVAL); + } + + /* an offset of -1 is treated as a "special" valid offset */ + if (lum->lmm_stripe_offset != (typeof(lum->lmm_stripe_offset))(-1)) { + /* if offset is not within valid range [0, osts_size) */ + if (lum->lmm_stripe_offset >= d->lod_osts_size) { + CDEBUG(D_IOCTL, "stripe offset %u >= bitmap size %u\n", + lum->lmm_stripe_offset, d->lod_osts_size); + RETURN(-EINVAL); + } + + /* if lmm_stripe_offset is *not* in bitmap */ + if (!cfs_bitmap_check(d->lod_ost_bitmap, + lum->lmm_stripe_offset)) { + CDEBUG(D_IOCTL, "stripe offset %u not in bitmap\n", + lum->lmm_stripe_offset); + RETURN(-EINVAL); + } + } + + if (lum->lmm_magic == LOV_USER_MAGIC_V3) + v3 = buf->lb_buf; + + if (v3) + pool = lod_find_pool(d, v3->lmm_pool_name); + + if (pool != NULL) { + __u16 offs = v3->lmm_stripe_offset; + + if (offs != (typeof(v3->lmm_stripe_offset))(-1)) { + rc = lod_check_index_in_pool(offs, pool); + if (rc < 0) { + lod_pool_putref(pool); + RETURN(-EINVAL); + } + } + + if (specific && lum->lmm_stripe_count > pool_tgt_count(pool)) { + CDEBUG(D_IOCTL, + "stripe count %u > # OSTs %u in the pool\n", + lum->lmm_stripe_count, pool_tgt_count(pool)); + lod_pool_putref(pool); + RETURN(-EINVAL); + } + + lod_pool_putref(pool); + } + + RETURN(0); +} + void lod_fix_desc_stripe_size(__u64 *val) { if (*val < PTLRPC_MAX_BRW_SIZE) { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index bc063a9..30760cd 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -235,7 +235,44 @@ static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, const char *name, struct lustre_capa *capa) { - return dt_xattr_get(env, dt_object_child(dt), buf, name, capa); + struct lod_thread_info *info = lod_env_info(env); + struct lod_device *dev = lu2lod_dev(dt->do_lu.lo_dev); + int rc, is_root; + ENTRY; + + rc = dt_xattr_get(env, dt_object_child(dt), buf, name, capa); + if (rc != -ENODATA || !S_ISDIR(dt->do_lu.lo_header->loh_attr & S_IFMT)) + RETURN(rc); + + /* + * lod returns default striping on the real root of the device + * this is like the root stores default striping for the whole + * filesystem. historically we've been using a different approach + * and store it in the config. + */ + dt_root_get(env, dev->lod_child, &info->lti_fid); + is_root = lu_fid_eq(&info->lti_fid, lu_object_fid(&dt->do_lu)); + + if (is_root && strcmp(XATTR_NAME_LOV, name) == 0) { + struct lov_user_md *lum = buf->lb_buf; + struct lov_desc *desc = &dev->lod_desc; + + if (buf->lb_buf == NULL) { + rc = sizeof(struct lov_user_md_v1); + } else if (buf->lb_len >= sizeof(struct lov_user_md_v1)) { + lum->lmm_magic = LOV_USER_MAGIC_V1; + lum->lmm_object_seq = FID_SEQ_LOV_DEFAULT; + lum->lmm_pattern = desc->ld_pattern; + lum->lmm_stripe_size = desc->ld_default_stripe_size; + lum->lmm_stripe_count = desc->ld_default_stripe_count; + lum->lmm_stripe_offset = desc->ld_default_stripe_offset; + rc = sizeof(struct lov_user_md_v1); + } else { + rc = -ERANGE; + } + } + + RETURN(rc); } /* @@ -263,19 +300,87 @@ static int lod_declare_xattr_set(const struct lu_env *env, RETURN(rc); } +static int lod_xattr_set_lov_on_dir(const struct lu_env *env, + struct dt_object *dt, + const struct lu_buf *buf, + const char *name, int fl, + struct thandle *th, + struct lustre_capa *capa) +{ + struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev); + struct dt_object *next = dt_object_child(dt); + struct lod_object *l = lod_dt_obj(dt); + struct lov_user_md_v1 *lum; + struct lov_user_md_v3 *v3 = NULL; + int rc; + ENTRY; + + LASSERT(l->ldo_stripe == NULL); + l->ldo_striping_cached = 0; + l->ldo_def_striping_set = 0; + lod_object_set_pool(l, NULL); + l->ldo_def_stripe_size = 0; + l->ldo_def_stripenr = 0; + + LASSERT(buf); + LASSERT(buf->lb_buf); + lum = buf->lb_buf; + + rc = lod_verify_striping(d, buf, 0); + if (rc) + RETURN(rc); + + if (lum->lmm_magic == LOV_USER_MAGIC_V3) + v3 = buf->lb_buf; + + /* if { size, offset, count } = { 0, -1, 0 } and no pool + * (i.e. all default values specified) then delete default + * striping from dir. */ + CDEBUG(D_OTHER, + "set default striping: sz %u # %u offset %d %s %s\n", + (unsigned)lum->lmm_stripe_size, + (unsigned)lum->lmm_stripe_count, + (int)lum->lmm_stripe_offset, + v3 ? "from" : "", v3 ? v3->lmm_pool_name : ""); + + if (LOVEA_DELETE_VALUES((lum->lmm_stripe_size), + (lum->lmm_stripe_count), + (lum->lmm_stripe_offset)) && + lum->lmm_magic == LOV_USER_MAGIC_V1) { + rc = dt_xattr_del(env, next, name, th, capa); + if (rc == -ENODATA) + rc = 0; + } else { + rc = dt_xattr_set(env, next, buf, name, fl, th, capa); + } + + RETURN(rc); +} + static int lod_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, struct thandle *th, struct lustre_capa *capa) { struct dt_object *next = dt_object_child(dt); + __u32 attr; int rc; ENTRY; - /* - * behave transparantly for all other EAs - */ - rc = dt_xattr_set(env, next, buf, name, fl, th, capa); + attr = dt->do_lu.lo_header->loh_attr & S_IFMT; + if (S_ISDIR(attr)) { + if (strncmp(name, XATTR_NAME_LOV, strlen(XATTR_NAME_LOV)) == 0) + rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, + fl, th, capa); + else + rc = dt_xattr_set(env, next, buf, name, fl, th, capa); + + } else { + /* + * behave transparantly for all other EAs + */ + rc = dt_xattr_set(env, next, buf, name, fl, th, capa); + } RETURN(rc); } @@ -503,6 +608,7 @@ static int lod_declare_object_create(const struct lu_env *env, struct thandle *th) { struct dt_object *next = dt_object_child(dt); + struct lod_object *lo = lod_dt_obj(dt); int rc; ENTRY; @@ -521,6 +627,16 @@ static int lod_declare_object_create(const struct lu_env *env, if (dof->dof_type == DFT_SYM) dt->do_body_ops = &lod_body_lnk_ops; + if (dof->dof_type == DFT_DIR && lo->ldo_striping_cached) { + struct lod_thread_info *info = lod_env_info(env); + + info->lti_buf.lb_buf = NULL; + info->lti_buf.lb_len = sizeof(struct lov_user_md_v3); + /* to transfer default striping from the parent */ + rc = dt_declare_xattr_set(env, next, &info->lti_buf, + XATTR_NAME_LOV, 0, th); + } + out: RETURN(rc); } @@ -537,6 +653,11 @@ static int lod_object_create(const struct lu_env *env, struct dt_object *dt, /* create local object */ rc = dt_create(env, next, attr, hint, dof, th); + if (rc == 0) { + if (S_ISDIR(dt->do_lu.lo_header->loh_attr)) + rc = lod_store_def_striping(env, dt, th); + } + RETURN(rc); } -- 1.8.3.1