X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flod%2Flod_qos.c;h=2b8f1d16b5a02cd77ab37ef0964f2595a997a033;hb=75ee8334498f948ecd030ad5edb22bc596f300fc;hp=2b81ad840839925ca2fcebbd89e859a56c38b5cc;hpb=42472bb9507816a5fd413bc69b17a4fe8ff92d3c;p=fs%2Flustre-release.git diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 2b81ad8..2b8f1d1 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -23,7 +23,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved * Use is subject to license terms. * - * Copyright (c) 2012, Intel Corporation. + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -35,9 +35,9 @@ #define DEBUG_SUBSYSTEM S_LOV +#include #include #include -#include #include #include "lod_internal.h" @@ -61,7 +61,7 @@ int qos_add_tgt(struct lod_device *lod, struct lod_tgt_desc *ost_desc) { - struct lov_qos_oss *oss = NULL, *temposs; + struct lod_qos_oss *oss = NULL, *temposs; struct obd_export *exp = ost_desc->ltd_exp; int rc = 0, found = 0; cfs_list_t *list; @@ -120,7 +120,7 @@ out: int qos_del_tgt(struct lod_device *lod, struct lod_tgt_desc *ost_desc) { - struct lov_qos_oss *oss; + struct lod_qos_oss *oss; int rc = 0; ENTRY; @@ -159,6 +159,10 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, if (rc && rc != -ENOTCONN) CERROR("%s: statfs: rc = %d\n", lod2obd(d)->obd_name, rc); + /* If the OST is readonly then we can't allocate objects there */ + if (sfs->os_state & OS_STATE_READONLY) + rc = -EROFS; + /* check whether device has changed state (active, inactive) */ if (rc != 0 && ost->ltd_active) { /* turned inactive? */ @@ -175,7 +179,9 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, spin_unlock(&d->lod_desc_lock); } else if (rc == 0 && ost->ltd_active == 0) { /* turned active? */ - LASSERT(d->lod_desc.ld_active_tgt_count < d->lod_ostnr); + LASSERTF(d->lod_desc.ld_active_tgt_count < d->lod_ostnr, + "active tgt count %d, ost nr %d\n", + d->lod_desc.ld_active_tgt_count, d->lod_ostnr); spin_lock(&d->lod_desc_lock); if (ost->ltd_active == 0) { ost->ltd_active = 1; @@ -188,7 +194,7 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, spin_unlock(&d->lod_desc_lock); } - return rc; + RETURN(rc); } static void lod_qos_statfs_update(const struct lu_env *env, @@ -225,13 +231,14 @@ static void lod_qos_statfs_update(const struct lu_env *env, out: up_write(&lod->lod_qos.lq_rw_sem); + EXIT; } /* Recalculate per-object penalties for OSSs and OSTs, depends on size of each ost in an oss */ static int lod_qos_calc_ppo(struct lod_device *lod) { - struct lov_qos_oss *oss; + struct lod_qos_oss *oss; __u64 ba_max, ba_min, temp; __u32 num_active; int rc, i, prio_wide; @@ -277,7 +284,7 @@ static int lod_qos_calc_ppo(struct lod_device *lod) /* per-OST penalty is prio * TGT_bavail / (num_ost - 1) / 2 */ temp >>= 1; - lov_do_div64(temp, num_active); + do_div(temp, num_active); OST_TGT(lod,i)->ltd_qos.ltq_penalty_per_obj = (temp * prio_wide) >> 8; @@ -307,7 +314,7 @@ static int lod_qos_calc_ppo(struct lod_device *lod) /* Per-OSS penalty is prio * oss_avail / oss_osts / (num_oss - 1) / 2 */ cfs_list_for_each_entry(oss, &lod->lod_qos.lq_oss_list, lqo_oss_list) { temp = oss->lqo_bavail >> 1; - lov_do_div64(temp, oss->lqo_ost_count * num_active); + do_div(temp, oss->lqo_ost_count * num_active); oss->lqo_penalty_per_obj = (temp * prio_wide) >> 8; age = (now - oss->lqo_used) >> 3; @@ -364,7 +371,7 @@ static int lod_qos_used(struct lod_device *lod, struct ost_pool *osts, __u32 index, __u64 *total_wt) { struct lod_tgt_desc *ost; - struct lov_qos_oss *oss; + struct lod_qos_oss *oss; int j; ENTRY; @@ -440,9 +447,9 @@ static int lod_qos_used(struct lod_device *lod, struct ost_pool *osts, #define LOV_QOS_EMPTY ((__u32)-1) /* compute optimal round-robin order, based on OSTs per OSS */ static int lod_qos_calc_rr(struct lod_device *lod, struct ost_pool *src_pool, - struct lov_qos_rr *lqr) + struct lod_qos_rr *lqr) { - struct lov_qos_oss *oss; + struct lod_qos_oss *oss; struct lod_tgt_desc *ost; unsigned placed, real_count; int i, rc; @@ -617,8 +624,6 @@ static int inline lod_qos_dev_is_full(struct obd_statfs *msfs) return (msfs->os_bavail < used); } -int lod_ea_store_resize(struct lod_thread_info *info, int size); - static inline int lod_qos_ost_in_use_clear(const struct lu_env *env, int stripes) { struct lod_thread_info *info = lod_env_info(env); @@ -629,7 +634,7 @@ static inline int lod_qos_ost_in_use_clear(const struct lu_env *env, int stripes CERROR("can't allocate memory for ost-in-use array\n"); return -ENOMEM; } - memset(info->lti_ea_store, 0, sizeof(int) * stripes); + memset(info->lti_ea_store, -1, sizeof(int) * stripes); return 0; } @@ -664,7 +669,7 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo, struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs; struct pool_desc *pool = NULL; struct ost_pool *osts; - struct lov_qos_rr *lqr; + struct lod_qos_rr *lqr; struct dt_object *o; unsigned array_idx; int i, rc; @@ -768,7 +773,7 @@ repeat_find: /* * try to use another OSP if this one is degraded */ - if (sfs->os_state == OS_STATE_DEGRADED && speed < 2) { + if (sfs->os_state & OS_STATE_DEGRADED && speed < 2) { QOS_DEBUG("#%d: degraded\n", ost_idx); continue; } @@ -918,6 +923,7 @@ repeat_find: /* * We've successfuly declared (reserved) an object */ + lod_qos_ost_in_use(env, stripe_num, ost_idx); stripe[stripe_num] = o; stripe_num++; @@ -1197,8 +1203,8 @@ static __u16 lod_get_stripecnt(struct lod_device *lod, __u32 magic, /* stripe count is based on whether OSD can handle larger EA sizes */ if (lod->lod_osd_max_easize > 0) - max_stripes = lov_mds_md_stripecnt(lod->lod_osd_max_easize, - magic); + max_stripes = lov_mds_md_max_stripe_count( + lod->lod_osd_max_easize, magic); return (stripe_count < max_stripes) ? stripe_count : max_stripes; } @@ -1207,41 +1213,40 @@ static int lod_use_defined_striping(const struct lu_env *env, struct lod_object *mo, const struct lu_buf *buf) { - struct lod_device *d = lu2lod_dev(lod2lu_obj(mo)->lo_dev); struct lov_mds_md_v1 *v1 = buf->lb_buf; struct lov_mds_md_v3 *v3 = buf->lb_buf; struct lov_ost_data_v1 *objs; __u32 magic; - int rc; + int rc = 0; ENTRY; - rc = lod_verify_striping(d, buf, 1); - if (rc) - RETURN(rc); - magic = le32_to_cpu(v1->lmm_magic); if (magic == LOV_MAGIC_V1_DEF) { + magic = LOV_MAGIC_V1; objs = &v1->lmm_objects[0]; } else if (magic == LOV_MAGIC_V3_DEF) { + magic = LOV_MAGIC_V3; objs = &v3->lmm_objects[0]; lod_object_set_pool(mo, v3->lmm_pool_name); } else { GOTO(out, rc = -EINVAL); } - /* - * LOD shouldn't be aware of recovery at all, - * but this track recovery status (to some extent) - * to be do additional checks like this one - */ - LASSERT(d->lod_recovery_completed == 0); - + mo->ldo_pattern = le32_to_cpu(v1->lmm_pattern); mo->ldo_stripe_size = le32_to_cpu(v1->lmm_stripe_size); mo->ldo_stripenr = le16_to_cpu(v1->lmm_stripe_count); mo->ldo_layout_gen = le16_to_cpu(v1->lmm_layout_gen); + + /* fixup for released file before object initialization */ + if (mo->ldo_pattern & LOV_PATTERN_F_RELEASED) { + mo->ldo_released_stripenr = mo->ldo_stripenr; + mo->ldo_stripenr = 0; + } + LASSERT(buf->lb_len >= lov_mds_md_size(mo->ldo_stripenr, magic)); - rc = lod_initialize_objects(env, mo, objs); + if (mo->ldo_stripenr > 0) + rc = lod_initialize_objects(env, mo, objs); out: RETURN(rc); @@ -1285,10 +1290,14 @@ static int lod_qos_parse_config(const struct lu_env *env, RETURN(-EINVAL); } - if (v1->lmm_pattern != 0 && v1->lmm_pattern != LOV_PATTERN_RAID0) { + v1->lmm_magic = magic; + if (v1->lmm_pattern == 0) + v1->lmm_pattern = LOV_PATTERN_RAID0; + if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0) { CERROR("invalid pattern: %x\n", v1->lmm_pattern); RETURN(-EINVAL); } + lo->ldo_pattern = v1->lmm_pattern; if (v1->lmm_stripe_size) lo->ldo_stripe_size = v1->lmm_stripe_size; @@ -1342,6 +1351,12 @@ static int lod_qos_parse_config(const struct lu_env *env, } else lod_object_set_pool(lo, NULL); + /* fixup for released file */ + if (lo->ldo_pattern & LOV_PATTERN_F_RELEASED) { + lo->ldo_released_stripenr = lo->ldo_stripenr; + lo->ldo_stripenr = 0; + } + RETURN(0); } @@ -1379,11 +1394,20 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, if (rc) GOTO(out, rc); + /* A released file is being created */ + if (lo->ldo_stripenr == 0) + GOTO(out, rc = 0); + if (likely(lo->ldo_stripe == NULL)) { /* * no striping has been created so far */ LASSERT(lo->ldo_stripenr > 0); + /* + * statfs and check OST targets now, since ld_active_tgt_count + * could be changed if some OSTs are [de]activated manually. + */ + lod_qos_statfs_update(env, d); lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC, lo->ldo_stripenr); @@ -1394,8 +1418,9 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, lod_getref(&d->lod_ost_descs); /* XXX: support for non-0 files w/o objects */ + CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n", + d->lod_desc.ld_tgt_count, stripe_len); if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) { - lod_qos_statfs_update(env, d); rc = lod_alloc_qos(env, lo, stripe, flag, th); if (rc == -EAGAIN) rc = lod_alloc_rr(env, lo, stripe, flag, th);