X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flod%2Flod_object.c;h=f4829a3ac0a86c8ed4546d816328f20c61f9c0b3;hb=bd7a2f9938a7edf09afd133601ca4181e109a7d0;hp=caa32fbf90ff1f4e80c3279d0e73264f71380ada;hpb=110fbf9f76006d240751f2902c4017ba84adc6ca;p=fs%2Flustre-release.git diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index caa32fb..f4829a3 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -920,13 +920,12 @@ int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo, /* The ent->lde_name is composed of ${FID}:${index} */ if (ent->lde_namelen < len + 1 || memcmp(ent->lde_name, name, len) != 0) { - CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO, - "%s: invalid shard name %.*s with the FID "DFID - " for the striped directory "DFID", %s\n", - lod2obd(lod)->obd_name, ent->lde_namelen, - ent->lde_name, PFID(&fid), - PFID(lu_object_fid(&obj->do_lu)), - lod->lod_lmv_failout ? "failout" : "skip"); + CDEBUG_LIMIT(lod->lod_lmv_failout ? D_ERROR : D_INFO, + "%s: invalid shard name %.*s with the FID "DFID" for the striped directory "DFID", %s\n", + lod2obd(lod)->obd_name, ent->lde_namelen, + ent->lde_name, PFID(&fid), + PFID(lu_object_fid(&obj->do_lu)), + lod->lod_lmv_failout ? "failout" : "skip"); if (lod->lod_lmv_failout) break; @@ -938,15 +937,15 @@ int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo, do { if (ent->lde_name[len] < '0' || ent->lde_name[len] > '9') { - CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO, - "%s: invalid shard name %.*s with the " - "FID "DFID" for the striped directory " - DFID", %s\n", - lod2obd(lod)->obd_name, ent->lde_namelen, - ent->lde_name, PFID(&fid), - PFID(lu_object_fid(&obj->do_lu)), - lod->lod_lmv_failout ? - "failout" : "skip"); + CDEBUG_LIMIT(lod->lod_lmv_failout ? + D_ERROR : D_INFO, + "%s: invalid shard name %.*s with the FID "DFID" for the striped directory "DFID", %s\n", + lod2obd(lod)->obd_name, + ent->lde_namelen, + ent->lde_name, PFID(&fid), + PFID(lu_object_fid(&obj->do_lu)), + lod->lod_lmv_failout ? + "failout" : "skip"); if (lod->lod_lmv_failout) break; @@ -1144,6 +1143,7 @@ void lod_adjust_stripe_size(struct lod_layout_component *comp, } /* check stripe size is multiplier of comp_end */ if (comp_end != LUSTRE_EOF && + comp_end != comp->llc_extent.e_start && comp_end % comp->llc_stripe_size) { /* fix that even for defined stripe size but warn * about the problem, that must not happen @@ -1176,10 +1176,10 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo, struct lod_obj_stripe_cb_data *data) { struct lod_layout_component *lod_comp; - int i, j, rc; + int i, j, rc = 0; ENTRY; - LASSERT(lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL); + mutex_lock(&lo->ldo_layout_mutex); for (i = 0; i < lo->ldo_comp_cnt; i++) { lod_comp = &lo->ldo_comp_entries[i]; @@ -1199,7 +1199,7 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo, if (data->locd_comp_cb) { rc = data->locd_comp_cb(env, lo, i, data); if (rc) - RETURN(rc); + GOTO(unlock, rc); } /* could used just to do sth about component, not each @@ -1216,10 +1216,12 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo, continue; rc = data->locd_stripe_cb(env, lo, dt, th, i, j, data); if (rc != 0) - RETURN(rc); + GOTO(unlock, rc); } } - RETURN(0); +unlock: + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(rc); } static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env, @@ -2747,10 +2749,15 @@ static int lod_declare_layout_add(const struct lu_env *env, if (magic != LOV_USER_MAGIC_COMP_V1) RETURN(-EINVAL); + mutex_lock(&lo->ldo_layout_mutex); + array_cnt = lo->ldo_comp_cnt + comp_v1->lcm_entry_count; OBD_ALLOC_PTR_ARRAY(comp_array, array_cnt); - if (comp_array == NULL) + if (comp_array == NULL) { + mutex_unlock(&lo->ldo_layout_mutex); RETURN(-ENOMEM); + } + memcpy(comp_array, lo->ldo_comp_entries, sizeof(*comp_array) * lo->ldo_comp_cnt); @@ -2807,6 +2814,8 @@ static int lod_declare_layout_add(const struct lu_env *env, LASSERT(lo->ldo_mirror_count == 1); lo->ldo_mirrors[0].lme_end = array_cnt - 1; + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(0); error: @@ -2819,6 +2828,8 @@ error: } } OBD_FREE_PTR_ARRAY(comp_array, array_cnt); + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(rc); } @@ -2914,6 +2925,7 @@ static int lod_declare_layout_set(const struct lu_env *env, RETURN(-EINVAL); } + mutex_lock(&lo->ldo_layout_mutex); for (i = 0; i < comp_v1->lcm_entry_count; i++) { __u32 id = comp_v1->lcm_entries[i].lcme_id; __u32 flags = comp_v1->lcm_entries[i].lcme_flags; @@ -2923,7 +2935,8 @@ static int lod_declare_layout_set(const struct lu_env *env, if (flags & LCME_FL_INIT) { if (changed) - lod_striping_free(env, lo); + lod_striping_free_nolock(env, lo); + mutex_unlock(&lo->ldo_layout_mutex); RETURN(-EINVAL); } @@ -2946,8 +2959,11 @@ static int lod_declare_layout_set(const struct lu_env *env, if (flags) { if ((flags & LCME_FL_STALE) && lod_last_non_stale_mirror(mirror_id, - lo)) + lo)) { + mutex_unlock( + &lo->ldo_layout_mutex); RETURN(-EUCLEAN); + } lod_comp->llc_flags |= flags; } if (mirror_flag) { @@ -2960,6 +2976,7 @@ static int lod_declare_layout_set(const struct lu_env *env, changed = true; } } + mutex_unlock(&lo->ldo_layout_mutex); if (!changed) { CDEBUG(D_LAYOUT, "%s: requested component(s) not found.\n", @@ -3042,9 +3059,13 @@ static int lod_declare_layout_del(const struct lu_env *env, flags = 0; } + mutex_lock(&lo->ldo_layout_mutex); + left = lo->ldo_comp_cnt; - if (left <= 0) + if (left <= 0) { + mutex_unlock(&lo->ldo_layout_mutex); RETURN(-EINVAL); + } for (i = (lo->ldo_comp_cnt - 1); i >= 0; i--) { struct lod_layout_component *lod_comp; @@ -3061,6 +3082,7 @@ static int lod_declare_layout_del(const struct lu_env *env, if (left != (i + 1)) { CDEBUG(D_LAYOUT, "%s: this deletion will create " "a hole.\n", lod2obd(d)->obd_name); + mutex_unlock(&lo->ldo_layout_mutex); RETURN(-EINVAL); } left--; @@ -3079,8 +3101,10 @@ static int lod_declare_layout_del(const struct lu_env *env, if (obj == NULL) continue; rc = lod_sub_declare_destroy(env, obj, th); - if (rc) + if (rc) { + mutex_unlock(&lo->ldo_layout_mutex); RETURN(rc); + } } } @@ -3088,9 +3112,12 @@ static int lod_declare_layout_del(const struct lu_env *env, if (left == lo->ldo_comp_cnt) { CDEBUG(D_LAYOUT, "%s: requested component id:%#x not found\n", lod2obd(d)->obd_name, id); + mutex_unlock(&lo->ldo_layout_mutex); RETURN(-EINVAL); } + mutex_unlock(&lo->ldo_layout_mutex); + memset(attr, 0, sizeof(*attr)); attr->la_valid = LA_SIZE; rc = lod_sub_declare_attr_set(env, next, attr, th); @@ -3422,6 +3449,196 @@ static int lod_declare_layout_split(const struct lu_env *env, RETURN(rc); } +static int lod_layout_declare_or_purge_mirror(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *buf, + struct thandle *th, bool declare) +{ + struct lod_thread_info *info = lod_env_info(env); + struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev); + struct lod_object *lo = lod_dt_obj(dt); + struct lov_comp_md_v1 *comp_v1 = buf->lb_buf; + struct lov_comp_md_entry_v1 *entry; + struct lov_mds_md_v1 *lmm; + struct dt_object **sub_objs = NULL; + int rc = 0, i, k, array_count = 0; + + ENTRY; + + /** + * other ops (like lod_declare_destroy) could destroying sub objects + * as well. + */ + mutex_lock(&lo->ldo_layout_mutex); + + if (!declare) { + /* prepare sub-objects array */ + for (i = 0; i < comp_v1->lcm_entry_count; i++) { + entry = &comp_v1->lcm_entries[i]; + + if (!(entry->lcme_flags & LCME_FL_INIT)) + continue; + + lmm = (struct lov_mds_md_v1 *) + ((char *)comp_v1 + entry->lcme_offset); + array_count += lmm->lmm_stripe_count; + } + OBD_ALLOC_PTR_ARRAY(sub_objs, array_count); + if (sub_objs == NULL) { + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(-ENOMEM); + } + } + + k = 0; /* sub_objs index */ + for (i = 0; i < comp_v1->lcm_entry_count; i++) { + struct lov_ost_data_v1 *objs; + struct lu_object *o, *n; + struct dt_object *dto; + struct lu_device *nd; + struct lov_mds_md_v3 *v3; + __u32 idx; + int j; + + entry = &comp_v1->lcm_entries[i]; + + if (!(entry->lcme_flags & LCME_FL_INIT)) + continue; + + lmm = (struct lov_mds_md_v1 *) + ((char *)comp_v1 + entry->lcme_offset); + v3 = (struct lov_mds_md_v3 *)lmm; + if (lmm->lmm_magic == LOV_MAGIC_V3) + objs = &v3->lmm_objects[0]; + else + objs = &lmm->lmm_objects[0]; + + for (j = 0; j < lmm->lmm_stripe_count; j++) { + idx = objs[j].l_ost_idx; + rc = ostid_to_fid(&info->lti_fid, &objs[j].l_ost_oi, + idx); + if (rc) + GOTO(out, rc); + + if (!fid_is_sane(&info->lti_fid)) { + CERROR("%s: sub-object insane fid "DFID"\n", + lod2obd(d)->obd_name, + PFID(&info->lti_fid)); + GOTO(out, rc = -EINVAL); + } + + lod_getref(&d->lod_ost_descs); + + rc = validate_lod_and_idx(d, idx); + if (unlikely(rc)) { + lod_putref(d, &d->lod_ost_descs); + GOTO(out, rc); + } + + nd = &OST_TGT(d, idx)->ltd_tgt->dd_lu_dev; + lod_putref(d, &d->lod_ost_descs); + + o = lu_object_find_at(env, nd, &info->lti_fid, NULL); + if (IS_ERR(o)) + GOTO(out, rc = PTR_ERR(o)); + + n = lu_object_locate(o->lo_header, nd->ld_type); + if (unlikely(!n)) { + lu_object_put(env, n); + GOTO(out, rc = -ENOENT); + } + + dto = container_of(n, struct dt_object, do_lu); + + if (declare) { + rc = lod_sub_declare_destroy(env, dto, th); + dt_object_put(env, dto); + if (rc) + GOTO(out, rc); + } else { + /** + * collect to-be-destroyed sub objects, the + * reference would be released after actual + * deletion. + */ + sub_objs[k] = dto; + k++; + } + } /* for each stripe */ + } /* for each component in the mirror */ +out: + if (!declare) { + i = 0; + if (!rc) { + /* destroy the sub objects */ + for (; i < k; i++) { + rc = lod_sub_destroy(env, sub_objs[i], th); + if (rc) + break; + dt_object_put(env, sub_objs[i]); + } + } + /** + * if a sub object destroy failed, we'd release sub objects + * reference get from above sub_objs collection. + */ + for (; i < k; i++) + dt_object_put(env, sub_objs[i]); + + OBD_FREE_PTR_ARRAY(sub_objs, array_count); + } + mutex_unlock(&lo->ldo_layout_mutex); + + RETURN(rc); +} + +/** + * Purge layouts, delete sub objects in the mirror stored in the vic_buf, + * and set the LOVEA with the layout from mbuf. + */ +static int lod_declare_layout_purge(const struct lu_env *env, + struct dt_object *dt, const struct lu_buf *buf, + struct thandle *th) +{ + struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev); + struct lov_comp_md_v1 *comp_v1 = buf->lb_buf; + int rc; + + ENTRY; + + if (le32_to_cpu(comp_v1->lcm_magic) != LOV_MAGIC_COMP_V1) { + CERROR("%s: invalid layout magic %#x != %#x\n", + lod2obd(d)->obd_name, le32_to_cpu(comp_v1->lcm_magic), + LOV_MAGIC_COMP_V1); + RETURN(-EINVAL); + } + + if (cpu_to_le32(LOV_MAGIC_COMP_V1) != LOV_MAGIC_COMP_V1) + lustre_swab_lov_comp_md_v1(comp_v1); + + /* from now on, @buf contains cpu endian data */ + + if (comp_v1->lcm_mirror_count != 0) { + CERROR("%s: can only purge one mirror from "DFID"\n", + lod2obd(d)->obd_name, PFID(lu_object_fid(&dt->do_lu))); + RETURN(-EINVAL); + } + + /* delcare sub objects deletion in the mirror stored in @buf */ + rc = lod_layout_declare_or_purge_mirror(env, dt, buf, th, true); + RETURN(rc); +} + +/* delete sub objects from the mirror stored in @buf */ +static int lod_layout_purge(const struct lu_env *env, struct dt_object *dt, + const struct lu_buf *buf, struct thandle *th) +{ + int rc; + + ENTRY; + rc = lod_layout_declare_or_purge_mirror(env, dt, buf, th, false); + RETURN(rc); +} + /** * Implementation of dt_object_operations::do_declare_xattr_set. * @@ -3446,7 +3663,8 @@ static int lod_declare_xattr_set(const struct lu_env *env, mode = dt->do_lu.lo_header->loh_attr & S_IFMT; if ((S_ISREG(mode) || mode == 0) && - !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE | LU_XATTR_SPLIT)) && + !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE | LU_XATTR_SPLIT | + LU_XATTR_PURGE)) && (strcmp(name, XATTR_NAME_LOV) == 0 || strcmp(name, XATTR_LUSTRE_LOV) == 0)) { /* @@ -3476,6 +3694,10 @@ static int lod_declare_xattr_set(const struct lu_env *env, LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 || strcmp(name, XATTR_LUSTRE_LOV) == 0); rc = lod_declare_layout_split(env, dt, buf, th); + } else if (fl & LU_XATTR_PURGE) { + LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 || + strcmp(name, XATTR_LUSTRE_LOV) == 0); + rc = lod_declare_layout_purge(env, dt, buf, th); } else if (S_ISREG(mode) && strlen(name) >= sizeof(XATTR_LUSTRE_LOV) + 3 && allowed_lustre_lov(name)) { @@ -4150,7 +4372,7 @@ static int lod_generate_and_set_lovea(const struct lu_env *env, LASSERT(lo); if (lo->ldo_comp_cnt == 0 && !lo->ldo_is_foreign) { - lod_striping_free(env, lo); + lod_striping_free_nolock(env, lo); rc = lod_sub_xattr_del(env, next, XATTR_NAME_LOV, th); RETURN(rc); } @@ -4446,6 +4668,8 @@ static int lod_layout_del(const struct lu_env *env, struct dt_object *dt, LASSERT(lo->ldo_mirror_count == 1); + mutex_lock(&lo->ldo_layout_mutex); + rc = lod_layout_del_prep_layout(env, lo, th); if (rc < 0) GOTO(out, rc); @@ -4473,7 +4697,10 @@ static int lod_layout_del(const struct lu_env *env, struct dt_object *dt, EXIT; out: if (rc) - lod_striping_free(env, lo); + lod_striping_free_nolock(env, lo); + + mutex_unlock(&lo->ldo_layout_mutex); + return rc; } @@ -4599,6 +4826,8 @@ static int lod_xattr_set(const struct lu_env *env, lod_striping_free(env, lod_dt_obj(dt)); rc = lod_sub_xattr_set(env, next, buf, name, fl, th); + } else if (fl & LU_XATTR_PURGE) { + rc = lod_layout_purge(env, dt, buf, th); } else if (dt_object_remote(dt)) { /* This only happens during migration, see * mdd_migrate_create(), in which Master MDT will @@ -5741,6 +5970,8 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt, int rc = 0, i, j; ENTRY; + mutex_lock(&lo->ldo_layout_mutex); + LASSERT((lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL) || lo->ldo_is_foreign); @@ -5799,15 +6030,20 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt, if (rc) GOTO(out, rc); + lo->ldo_comp_cached = 1; + rc = lod_generate_and_set_lovea(env, lo, th); if (rc) GOTO(out, rc); - lo->ldo_comp_cached = 1; + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(0); out: - lod_striping_free(env, lo); + lod_striping_free_nolock(env, lo); + mutex_unlock(&lo->ldo_layout_mutex); + RETURN(rc); } @@ -5867,11 +6103,12 @@ lod_obj_stripe_destroy_cb(const struct lu_env *env, struct lod_object *lo, { if (data->locd_declare) return lod_sub_declare_destroy(env, dt, th); - else if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_SPEOBJ) || - stripe_idx == cfs_fail_val) + + if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_SPEOBJ) || + stripe_idx == cfs_fail_val) return lod_sub_destroy(env, dt, th); - else - return 0; + + return 0; } /** @@ -7150,7 +7387,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo, } /* 2nd pick is for the primary mirror containing unavail OST */ - if (lo->ldo_mirrors[index].lme_primary && second_pick < 0) + if (lo->ldo_mirrors[index].lme_prefer && second_pick < 0) second_pick = index; /* 3rd pick is for non-primary mirror containing unavail OST */ @@ -7161,7 +7398,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo, * we found a non-primary 1st pick, we'd like to find a * potential pirmary mirror. */ - if (picked >= 0 && !lo->ldo_mirrors[index].lme_primary) + if (picked >= 0 && !lo->ldo_mirrors[index].lme_prefer) continue; /* check the availability of OSTs */ @@ -7198,7 +7435,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo, * primary with all OSTs are available, this is the perfect * 1st pick. */ - if (lo->ldo_mirrors[index].lme_primary) + if (lo->ldo_mirrors[index].lme_prefer) break; } /* for all mirrors */ @@ -7442,22 +7679,15 @@ static int lod_declare_update_write_pending(const struct lu_env *env, LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE || mlc->mlc_opc == MD_LAYOUT_RESYNC); - /* look for the primary mirror */ + /* look for the first preferred mirror */ for (i = 0; i < lo->ldo_mirror_count; i++) { if (lo->ldo_mirrors[i].lme_stale) continue; - if (lo->ldo_mirrors[i].lme_primary == 0) + if (lo->ldo_mirrors[i].lme_prefer == 0) continue; - if (unlikely(primary >= 0)) { - CERROR(DFID " has multiple primary: %u / %u\n", - PFID(lod_object_fid(lo)), - lo->ldo_mirrors[i].lme_id, - lo->ldo_mirrors[primary].lme_id); - RETURN(-EIO); - } - primary = i; + break; } if (primary < 0) { /* no primary, use any in-sync */