From 7ff7b6e6f32e5937bd33f184fc0026d9da756565 Mon Sep 17 00:00:00 2001 From: wangdi Date: Fri, 8 Nov 2013 03:42:23 -0800 Subject: [PATCH] LU-1187 lod: reorganize lod_ost Move lod_ost to lod_tgt_desc, so LOD can be used to manage OSP for both MDT and OST. Signed-off-by: wang di Change-Id: I01c8033ba53e6729bfb62588306f48073f505b17 Reviewed-on: http://review.whamcloud.com/4921 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/include/lu_object.h | 8 +- lustre/lod/lod_dev.c | 137 ++++++++++++++++-------- lustre/lod/lod_internal.h | 90 ++++++++++------ lustre/lod/lod_lov.c | 260 +++++++++++++++++++++++---------------------- lustre/lod/lod_pool.c | 10 +- lustre/lod/lod_qos.c | 18 ++-- lustre/lod/lproc_lod.c | 8 +- 7 files changed, 305 insertions(+), 226 deletions(-) diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 4bc6359..82b136a 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -490,10 +490,10 @@ struct lu_object { * Depth. Top level layer depth is 0. */ int lo_depth; - /** - * Flags from enum lu_object_flags. - */ - unsigned long lo_flags; + /** + * Flags from enum lu_object_flags. + */ + __u32 lo_flags; /** * Link to the device, for debugging. */ diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 9bf10d8..194e496 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -132,6 +132,39 @@ struct lu_object *lod_object_alloc(const struct lu_env *env, return lu_obj; } +static int lod_cleanup_desc_tgts(const struct lu_env *env, + struct lod_device *lod, + struct lod_tgt_descs *ltd, + struct lustre_cfg *lcfg) +{ + struct lu_device *next; + int rc = 0; + int i; + + lod_getref(ltd); + if (ltd->ltd_tgts_size <= 0) { + lod_putref(lod, ltd); + return 0; + } + cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { + struct lod_tgt_desc *tgt; + int rc1; + + tgt = LTD_TGT(ltd, i); + LASSERT(tgt && tgt->ltd_tgt); + next = &tgt->ltd_tgt->dd_lu_dev; + rc1 = next->ld_ops->ldo_process_config(env, next, lcfg); + if (rc1) { + CERROR("%s: error cleaning up LOD index %u: cmd %#x" + ": rc = %d\n", lod2obd(lod)->obd_name, i, + lcfg->lcfg_command, rc1); + rc = rc1; + } + } + lod_putref(lod, ltd); + return rc; +} + static int lod_process_config(const struct lu_env *env, struct lu_device *dev, struct lustre_cfg *lcfg) @@ -139,11 +172,10 @@ static int lod_process_config(const struct lu_env *env, struct lod_device *lod = lu2lod_dev(dev); struct lu_device *next = &lod->lod_child->dd_lu_dev; char *arg1; - int rc, i; + int rc; ENTRY; switch(lcfg->lcfg_command) { - case LCFG_LOV_DEL_OBD: case LCFG_LOV_ADD_INA: case LCFG_LOV_ADD_OBD: { @@ -162,7 +194,9 @@ static int lod_process_config(const struct lu_env *env, else if (lcfg->lcfg_command == LCFG_LOV_ADD_INA) rc = lod_add_device(env, lod, arg1, index, gen, 0); else - rc = lod_del_device(env, lod, arg1, index, gen); + rc = lod_del_device(env, lod, + &lod->lod_ost_descs, + arg1, index, gen); break; } @@ -177,24 +211,11 @@ static int lod_process_config(const struct lu_env *env, if (rc > 0) rc = 0; GOTO(out, rc); - } - + } case LCFG_CLEANUP: lu_dev_del_linkage(dev->ld_site, dev); - lod_getref(lod); - lod_foreach_ost(lod, i) { - struct lod_ost_desc *ost; - ost = OST_TGT(lod, i); - LASSERT(ost && ost->ltd_ost); - next = &ost->ltd_ost->dd_lu_dev; - rc = next->ld_ops->ldo_process_config(env, next, lcfg); - if (rc) - CERROR("%s: can't process %u: %d\n", - lod2obd(lod)->obd_name, - lcfg->lcfg_command, rc); - } - lod_putref(lod); - + lod_cleanup_desc_tgts(env, lod, &lod->lod_mdt_descs, lcfg); + lod_cleanup_desc_tgts(env, lod, &lod->lod_ost_descs, lcfg); /* * do cleanup on underlying storage only when * all OSPs are cleaned up, as they use that OSD as well @@ -226,7 +247,6 @@ static int lod_recovery_complete(const struct lu_env *env, { struct lod_device *lod = lu2lod_dev(dev); struct lu_device *next = &lod->lod_child->dd_lu_dev; - struct lod_ost_desc *ost; int i, rc; ENTRY; @@ -235,18 +255,20 @@ static int lod_recovery_complete(const struct lu_env *env, rc = next->ld_ops->ldo_recovery_complete(env, next); - lod_getref(lod); - lod_foreach_ost(lod, i) { - ost = OST_TGT(lod, i); - LASSERT(ost && ost->ltd_ost); - next = &ost->ltd_ost->dd_lu_dev; - rc = next->ld_ops->ldo_recovery_complete(env, next); - if (rc) - CERROR("%s: can't complete recovery on #%d: %d\n", - lod2obd(lod)->obd_name, i, rc); + lod_getref(&lod->lod_ost_descs); + if (lod->lod_osts_size > 0) { + cfs_foreach_bit(lod->lod_ost_bitmap, i) { + struct lod_tgt_desc *tgt; + tgt = OST_TGT(lod, i); + LASSERT(tgt && tgt->ltd_tgt); + next = &tgt->ltd_ost->dd_lu_dev; + rc = next->ld_ops->ldo_recovery_complete(env, next); + if (rc) + CERROR("%s: can't complete recovery on #%d:" + "%d\n", lod2obd(lod)->obd_name, i, rc); + } } - lod_putref(lod); - + lod_putref(lod, &lod->lod_ost_descs); RETURN(rc); } @@ -321,7 +343,7 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) int rc = 0, i; ENTRY; - lod_getref(lod); + lod_getref(&lod->lod_ost_descs); lod_foreach_ost(lod, i) { ost = OST_TGT(lod, i); LASSERT(ost && ost->ltd_ost); @@ -332,7 +354,7 @@ static int lod_sync(const struct lu_env *env, struct dt_device *dev) break; } } - lod_putref(lod); + lod_putref(lod, &lod->lod_ost_descs); if (rc == 0) rc = dt_sync(env, lod->lod_child); @@ -454,6 +476,25 @@ out: RETURN(rc); } +static int lod_tgt_desc_init(struct lod_tgt_descs *ltd) +{ + mutex_init(<d->ltd_mutex); + init_rwsem(<d->ltd_rw_sem); + + /* the OST array and bitmap are allocated/grown dynamically as OSTs are + * added to the LOD, see lod_add_device() */ + ltd->ltd_tgt_bitmap = CFS_ALLOCATE_BITMAP(32); + if (ltd->ltd_tgt_bitmap == NULL) + RETURN(-ENOMEM); + + ltd->ltd_tgts_size = 32; + ltd->ltd_tgtnr = 0; + + ltd->ltd_death_row = 0; + ltd->ltd_refcount = 0; + return 0; +} + static int lod_init0(const struct lu_env *env, struct lod_device *lod, struct lu_device_type *ldt, struct lustre_cfg *cfg) { @@ -490,9 +531,10 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, if (rc) GOTO(out_pools, rc); - mutex_init(&lod->lod_mutex); - init_rwsem(&lod->lod_rw_sem); spin_lock_init(&lod->lod_desc_lock); + spin_lock_init(&lod->lod_connects_lock); + lod_tgt_desc_init(&lod->lod_mdt_descs); + lod_tgt_desc_init(&lod->lod_ost_descs); RETURN(0); @@ -545,12 +587,23 @@ static struct lu_device *lod_device_fini(const struct lu_env *env, struct lu_device *d) { struct lod_device *lod = lu2lod_dev(d); + int rc; ENTRY; lod_pools_fini(lod); lod_procfs_fini(lod); + rc = lod_fini_tgt(lod, &lod->lod_ost_descs); + if (rc) + CERROR("%s:can not fini ost descs %d\n", + lod2obd(lod)->obd_name, rc); + + rc = lod_fini_tgt(lod, &lod->lod_mdt_descs); + if (rc) + CERROR("%s:can not fini mdt descs %d\n", + lod2obd(lod)->obd_name, rc); + RETURN(NULL); } @@ -574,11 +627,11 @@ static int lod_obd_connect(const struct lu_env *env, struct obd_export **exp, *exp = class_conn2export(&conn); - mutex_lock(&lod->lod_mutex); + spin_lock(&lod->lod_connects_lock); lod->lod_connects++; /* at the moment we expect the only user */ LASSERT(lod->lod_connects == 1); - mutex_unlock(&lod->lod_mutex); + spin_unlock(&lod->lod_connects_lock); RETURN(0); } @@ -595,16 +648,16 @@ static int lod_obd_disconnect(struct obd_export *exp) ENTRY; /* Only disconnect the underlying layers on the final disconnect. */ - mutex_lock(&lod->lod_mutex); + spin_lock(&lod->lod_connects_lock); lod->lod_connects--; if (lod->lod_connects != 0) { /* why should there be more than 1 connect? */ - mutex_unlock(&lod->lod_mutex); + spin_unlock(&lod->lod_connects_lock); CERROR("%s: disconnect #%d\n", exp->exp_obd->obd_name, lod->lod_connects); goto out; } - mutex_unlock(&lod->lod_mutex); + spin_unlock(&lod->lod_connects_lock); /* the last user of lod has gone, let's release the device */ release = 1; @@ -669,7 +722,7 @@ static int lod_obd_health_check(const struct lu_env *env, ENTRY; LASSERT(d); - lod_getref(d); + lod_getref(&d->lod_ost_descs); lod_foreach_ost(d, i) { ost = OST_TGT(d, i); LASSERT(ost && ost->ltd_ost); @@ -678,7 +731,7 @@ static int lod_obd_health_check(const struct lu_env *env, if (rc == 0) break; } - lod_putref(d); + lod_putref(d, &d->lod_ost_descs); RETURN(rc); } diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 28d6081..a07201f 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -45,8 +45,8 @@ #define LOV_USES_ASSIGNED_STRIPE 0 #define LOV_USES_DEFAULT_STRIPE 1 -struct lod_ost_desc { - struct dt_device *ltd_ost; +struct lod_tgt_desc { + struct dt_device *ltd_tgt; struct list_head ltd_kill; struct obd_export *ltd_exp; struct obd_uuid ltd_uuid; @@ -59,16 +59,36 @@ struct lod_ost_desc { ltd_reap:1; /* should this target be deleted */ }; -#define OST_PTRS 256 /* number of pointers at 1st level */ -#define OST_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */ +#define TGT_PTRS 256 /* number of pointers at 1st level */ +#define TGT_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */ -struct lod_ost_desc_idx { - struct lod_ost_desc *ldi_ost[OST_PTRS_PER_BLOCK]; +struct lod_tgt_desc_idx { + struct lod_tgt_desc *ldi_tgt[TGT_PTRS_PER_BLOCK]; }; -#define OST_TGT(dev,index) \ - ((dev)->lod_ost_idx[(index) / \ - OST_PTRS_PER_BLOCK]->ldi_ost[(index)%OST_PTRS_PER_BLOCK]) +#define LTD_TGT(ltd, index) \ + ((ltd)->ltd_tgt_idx[(index) / \ + TGT_PTRS_PER_BLOCK]->ldi_tgt[(index) % TGT_PTRS_PER_BLOCK]) + +#define OST_TGT(lod, index) LTD_TGT(&lod->lod_ost_descs, index) +struct lod_tgt_descs { + /* list of known TGTs */ + struct lod_tgt_desc_idx *ltd_tgt_idx[TGT_PTRS]; + /* Size of the lod_tgts array, granted to be a power of 2 */ + __u32 ltd_tgts_size; + /* number of registered TGTs */ + int ltd_tgtnr; + /* bitmap of TGTs available */ + cfs_bitmap_t *ltd_tgt_bitmap; + /* TGTs scheduled to be deleted */ + __u32 ltd_death_row; + /* Table refcount used for delayed deletion */ + int ltd_refcount; + /* mutex to serialize concurrent updates to the tgt table */ + struct mutex ltd_mutex; + /* read/write semaphore used for array relocation */ + struct rw_semaphore ltd_rw_sem; +}; struct lod_device { struct dt_device lod_dt_dev; @@ -76,6 +96,7 @@ struct lod_device { struct dt_device *lod_child; cfs_proc_dir_entry_t *lod_proc_entry; struct lprocfs_stats *lod_stats; + spinlock_t lod_connects_lock; int lod_connects; unsigned int lod_recovery_completed:1, lod_initialized:1; @@ -86,28 +107,17 @@ struct lod_device { /* use to protect ld_active_tgt_count and all ltd_active */ spinlock_t lod_desc_lock; - /* list of known OSTs */ - struct lod_ost_desc_idx *lod_ost_idx[OST_PTRS]; - - /* Size of the lod_osts array, granted to be a power of 2 */ - __u32 lod_osts_size; - /* number of registered OSTs */ - int lod_ostnr; - /* OSTs scheduled to be deleted */ - __u32 lod_death_row; - /* bitmap of OSTs available */ - cfs_bitmap_t *lod_ost_bitmap; + /* Description of OST */ + struct lod_tgt_descs lod_ost_descs; + /* Description of MDT */ + struct lod_tgt_descs lod_mdt_descs; /* maximum EA size underlied OSD may have */ unsigned int lod_osd_max_easize; - /* Table refcount used for delayed deletion */ - int lod_refcount; - /* mutex to serialize concurrent updates to the ost table */ - struct mutex lod_mutex; - /* read/write semaphore used for array relocation */ - struct rw_semaphore lod_rw_sem; - + /*FIXME: When QOS and pool is implemented for MDT, probably these + * structure should be moved to lod_tgt_descs as well. + */ /* QoS info per LOD */ struct lov_qos lod_qos; /* qos info per lod */ @@ -123,6 +133,13 @@ struct lod_device { cfs_proc_dir_entry_t *lod_symlink; }; +#define lod_osts lod_ost_descs.ltd_tgts +#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap +#define lod_ostnr lod_ost_descs.ltd_tgtnr +#define lod_osts_size lod_ost_descs.ltd_tgts_size +#define ltd_ost ltd_tgt +#define lod_ost_desc lod_tgt_desc + /* * XXX: shrink this structure, currently it's 72bytes on 32bit arch, * so, slab will be allocating 128bytes @@ -140,11 +157,12 @@ struct lod_object { int ldo_stripes_allocated; /* default striping for directory represented by this object * is cached in stripenr/stripe_size */ - int ldo_striping_cached:1; - int ldo_def_striping_set:1; + int ldo_striping_cached:1, + ldo_def_striping_set:1; __u32 ldo_def_stripe_size; __u16 ldo_def_stripenr; __u16 ldo_def_stripe_offset; + mdsno_t ldo_mds_num; }; @@ -252,12 +270,14 @@ static inline struct lod_thread_info *lod_env_info(const struct lu_env *env) int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, const struct lu_fid *fid, mdsno_t *tgt, int flags); /* lod_lov.c */ -void lod_getref(struct lod_device *lod); -void lod_putref(struct lod_device *lod); +void lod_getref(struct lod_tgt_descs *ltd); +void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd); int lod_add_device(const struct lu_env *env, struct lod_device *m, char *osp, unsigned index, unsigned gen, int active); -int lod_del_device(const struct lu_env *env, struct lod_device *m, - char *osp, unsigned index, unsigned gen); +int lod_del_device(const struct lu_env *env, struct lod_device *lod, + struct lod_tgt_descs *ltd, char *osp, unsigned idx, + unsigned gen); +int lod_fini_tgt(struct lod_device *lod, struct lod_tgt_descs *ltd); int lod_load_striping(const struct lu_env *env, struct lod_object *mo); int lod_get_lov_ea(const struct lu_env *env, struct lod_object *mo); void lod_fix_desc(struct lov_desc *desc); @@ -296,8 +316,8 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname); int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, struct lu_attr *attr, const struct lu_buf *buf, struct thandle *th); -int qos_add_tgt(struct lod_device*, struct lod_ost_desc *); -int qos_del_tgt(struct lod_device *, struct lod_ost_desc *); +int qos_add_tgt(struct lod_device*, struct lod_tgt_desc *); +int qos_del_tgt(struct lod_device *, struct lod_tgt_desc *); /* lproc_lod.c */ void lprocfs_lod_init_vars(struct lprocfs_static_vars *lvars); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index 72d8204..81a5ed1 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -42,18 +42,18 @@ #include "lod_internal.h" /* - * Keep a refcount of lod->lod_osts usage to prevent racing with + * Keep a refcount of lod->ltd_tgts usage to prevent racing with * addition/deletion. Any function that expects lov_tgts to remain stationary * must take a ref. * * \param lod - is the lod device from which we want to grab a reference */ -void lod_getref(struct lod_device *lod) +void lod_getref(struct lod_tgt_descs *ltd) { - down_read(&lod->lod_rw_sem); - mutex_lock(&lod->lod_mutex); - lod->lod_refcount++; - mutex_unlock(&lod->lod_mutex); + down_read(<d->ltd_rw_sem); + mutex_lock(<d->ltd_mutex); + ltd->ltd_refcount++; + mutex_unlock(<d->ltd_mutex); } /* @@ -63,62 +63,67 @@ void lod_getref(struct lod_device *lod) * * \param lod - is the lod device from which we release a reference */ -void lod_putref(struct lod_device *lod) +void lod_putref(struct lod_device *lod, struct lod_tgt_descs *ltd) { - mutex_lock(&lod->lod_mutex); - lod->lod_refcount--; - if (lod->lod_refcount == 0 && lod->lod_death_row) { - struct lod_ost_desc *ost_desc, *tmp; + mutex_lock(<d->ltd_mutex); + ltd->ltd_refcount--; + if (ltd->ltd_refcount == 0 && ltd->ltd_death_row) { + struct lod_tgt_desc *tgt_desc, *tmp; int idx; CFS_LIST_HEAD(kill); - CDEBUG(D_CONFIG, "destroying %d lod desc\n", - lod->lod_death_row); + CDEBUG(D_CONFIG, "destroying %d ltd desc\n", + ltd->ltd_death_row); - cfs_foreach_bit(lod->lod_ost_bitmap, idx) { - ost_desc = OST_TGT(lod, idx); - LASSERT(ost_desc); + cfs_foreach_bit(ltd->ltd_tgt_bitmap, idx) { + tgt_desc = LTD_TGT(ltd, idx); + LASSERT(tgt_desc); - if (!ost_desc->ltd_reap) + if (!tgt_desc->ltd_reap) continue; - cfs_list_add(&ost_desc->ltd_kill, &kill); - - lod_ost_pool_remove(&lod->lod_pool_info, idx); - OST_TGT(lod, idx) = NULL; - lod->lod_ostnr--; - cfs_bitmap_clear(lod->lod_ost_bitmap, idx); - if (ost_desc->ltd_active) - lod->lod_desc.ld_active_tgt_count--; - lod->lod_death_row--; + cfs_list_add(&tgt_desc->ltd_kill, &kill); + LTD_TGT(ltd, idx) = NULL; + /*FIXME: only support ost pool for now */ + if (ltd == &lod->lod_ost_descs) { + lod_ost_pool_remove(&lod->lod_pool_info, idx); + if (tgt_desc->ltd_active) + lod->lod_desc.ld_active_tgt_count--; + } + ltd->ltd_tgtnr--; + cfs_bitmap_clear(ltd->ltd_tgt_bitmap, idx); + ltd->ltd_death_row--; } - mutex_unlock(&lod->lod_mutex); - up_read(&lod->lod_rw_sem); + mutex_unlock(<d->ltd_mutex); + up_read(<d->ltd_rw_sem); - cfs_list_for_each_entry_safe(ost_desc, tmp, &kill, ltd_kill) { + cfs_list_for_each_entry_safe(tgt_desc, tmp, &kill, ltd_kill) { int rc; - cfs_list_del(&ost_desc->ltd_kill); - /* remove from QoS structures */ - rc = qos_del_tgt(lod, ost_desc); - if (rc) - CERROR("%s: qos_del_tgt(%s) failed: rc = %d\n", - lod2obd(lod)->obd_name, - obd_uuid2str(&ost_desc->ltd_uuid), rc); - - rc = obd_disconnect(ost_desc->ltd_exp); + cfs_list_del(&tgt_desc->ltd_kill); + if (ltd == &lod->lod_ost_descs) { + /* remove from QoS structures */ + rc = qos_del_tgt(lod, tgt_desc); + if (rc) + CERROR("%s: qos_del_tgt(%s) failed:" + "rc = %d\n", + lod2obd(lod)->obd_name, + obd_uuid2str(&tgt_desc->ltd_uuid), + rc); + } + rc = obd_disconnect(tgt_desc->ltd_exp); if (rc) CERROR("%s: failed to disconnect %s: rc = %d\n", lod2obd(lod)->obd_name, - obd_uuid2str(&ost_desc->ltd_uuid), rc); - OBD_FREE_PTR(ost_desc); + obd_uuid2str(&tgt_desc->ltd_uuid), rc); + OBD_FREE_PTR(tgt_desc); } } else { - mutex_unlock(&lod->lod_mutex); - up_read(&lod->lod_rw_sem); + mutex_unlock(<d->ltd_mutex); + up_read(<d->ltd_rw_sem); } } -static int lod_bitmap_resize(struct lod_device *lod, __u32 newsize) +static int ltd_bitmap_resize(struct lod_tgt_descs *ltd, __u32 newsize) { cfs_bitmap_t *new_bitmap, *old_bitmap = NULL; int rc = 0; @@ -126,9 +131,9 @@ static int lod_bitmap_resize(struct lod_device *lod, __u32 newsize) /* grab write reference on the lod. Relocating the array requires * exclusive access */ - down_write(&lod->lod_rw_sem); - if (newsize <= lod->lod_osts_size) + down_write(<d->ltd_rw_sem); + if (newsize <= ltd->ltd_tgts_size) /* someone else has already resize the array */ GOTO(out, rc = 0); @@ -137,24 +142,24 @@ static int lod_bitmap_resize(struct lod_device *lod, __u32 newsize) if (!new_bitmap) GOTO(out, rc = -ENOMEM); - if (lod->lod_osts_size > 0) { + if (ltd->ltd_tgts_size > 0) { /* the bitmap already exists, we need * to copy data from old one */ - cfs_bitmap_copy(new_bitmap, lod->lod_ost_bitmap); - old_bitmap = lod->lod_ost_bitmap; + cfs_bitmap_copy(new_bitmap, ltd->ltd_tgt_bitmap); + old_bitmap = ltd->ltd_tgt_bitmap; } - lod->lod_osts_size = newsize; - lod->lod_ost_bitmap = new_bitmap; + ltd->ltd_tgts_size = newsize; + ltd->ltd_tgt_bitmap = new_bitmap; if (old_bitmap) CFS_FREE_BITMAP(old_bitmap); - CDEBUG(D_CONFIG, "ost size: %d\n", lod->lod_osts_size); + CDEBUG(D_CONFIG, "tgt size: %d\n", ltd->ltd_tgts_size); EXIT; out: - up_write(&lod->lod_rw_sem); + up_write(<d->ltd_rw_sem); return rc; } @@ -176,9 +181,9 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, struct lu_device *ldev; struct dt_device *d; int rc; - struct lod_ost_desc *ost_desc; - struct obd_uuid obd_uuid; - + struct lod_tgt_desc *tgt_desc; + struct lod_tgt_descs *ltd = &lod->lod_ost_descs; + struct obd_uuid obd_uuid; ENTRY; CDEBUG(D_CONFIG, "osp:%s idx:%d gen:%d\n", osp, index, gen); @@ -221,64 +226,66 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, d = lu2dt_dev(ldev); /* Allocate ost descriptor and fill it */ - OBD_ALLOC_PTR(ost_desc); - if (!ost_desc) + OBD_ALLOC_PTR(tgt_desc); + if (!tgt_desc) GOTO(out_conn, rc = -ENOMEM); - ost_desc->ltd_ost = d; - ost_desc->ltd_exp = exp; - ost_desc->ltd_uuid = obd->u.cli.cl_target_uuid; - ost_desc->ltd_gen = gen; - ost_desc->ltd_index = index; - ost_desc->ltd_active = active; + tgt_desc->ltd_tgt = d; + tgt_desc->ltd_exp = exp; + tgt_desc->ltd_uuid = obd->u.cli.cl_target_uuid; + tgt_desc->ltd_gen = gen; + tgt_desc->ltd_index = index; + tgt_desc->ltd_active = active; - lod_getref(lod); - if (index >= lod->lod_osts_size) { + lod_getref(ltd); + if (index >= ltd->ltd_tgts_size) { /* we have to increase the size of the lod_osts array */ __u32 newsize; - newsize = max(lod->lod_osts_size, (__u32)2); + newsize = max(ltd->ltd_tgts_size, (__u32)2); while (newsize < index + 1) newsize = newsize << 1; /* lod_bitmap_resize() needs lod_rw_sem * which we hold with th reference */ - lod_putref(lod); + lod_putref(lod, ltd); - rc = lod_bitmap_resize(lod, newsize); + rc = ltd_bitmap_resize(ltd, newsize); if (rc) GOTO(out_desc, rc); - lod_getref(lod); + lod_getref(ltd); } - mutex_lock(&lod->lod_mutex); - if (cfs_bitmap_check(lod->lod_ost_bitmap, index)) { + mutex_lock(<d->ltd_mutex); + if (cfs_bitmap_check(ltd->ltd_tgt_bitmap, index)) { CERROR("%s: device %d is registered already\n", obd->obd_name, index); GOTO(out_mutex, rc = -EEXIST); } - if (lod->lod_ost_idx[index / OST_PTRS_PER_BLOCK] == NULL) { - OBD_ALLOC_PTR(lod->lod_ost_idx[index / OST_PTRS_PER_BLOCK]); - if (lod->lod_ost_idx[index / OST_PTRS_PER_BLOCK] == NULL) { + if (ltd->ltd_tgt_idx[index / TGT_PTRS_PER_BLOCK] == NULL) { + OBD_ALLOC_PTR(ltd->ltd_tgt_idx[index / TGT_PTRS_PER_BLOCK]); + if (ltd->ltd_tgt_idx[index / TGT_PTRS_PER_BLOCK] == NULL) { CERROR("can't allocate index to add %s\n", obd->obd_name); GOTO(out_mutex, rc = -ENOMEM); } } - rc = lod_ost_pool_add(&lod->lod_pool_info, index, lod->lod_osts_size); + /* pool and qos are not supported for MDS stack yet */ + rc = lod_ost_pool_add(&lod->lod_pool_info, index, + lod->lod_osts_size); if (rc) { CERROR("%s: can't set up pool, failed with %d\n", obd->obd_name, rc); GOTO(out_mutex, rc); } - rc = qos_add_tgt(lod, ost_desc); + rc = qos_add_tgt(lod, tgt_desc); if (rc) { - CERROR("%s: qos_add_tgt(%s) failed: rc = %d\n", obd->obd_name, - obd_uuid2str(&ost_desc->ltd_uuid), rc); + CERROR("%s: qos_add_tgt failed with %d\n", + obd->obd_name, rc); GOTO(out_pool, rc); } @@ -287,12 +294,12 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, lod->lod_desc.ld_tgt_count = index + 1; if (active) lod->lod_desc.ld_active_tgt_count++; - OST_TGT(lod, index) = ost_desc; - cfs_bitmap_set(lod->lod_ost_bitmap, index); - lod->lod_ostnr++; - mutex_unlock(&lod->lod_mutex); - lod_putref(lod); + LTD_TGT(ltd, index) = tgt_desc; + cfs_bitmap_set(ltd->ltd_tgt_bitmap, index); + ltd->ltd_tgtnr++; + mutex_unlock(<d->ltd_mutex); + lod_putref(lod, ltd); if (lod->lod_recovery_completed) ldev->ld_ops->ldo_recovery_complete(env, ldev); @@ -301,10 +308,10 @@ int lod_add_device(const struct lu_env *env, struct lod_device *lod, out_pool: lod_ost_pool_remove(&lod->lod_pool_info, index); out_mutex: - mutex_unlock(&lod->lod_mutex); - lod_putref(lod); + mutex_unlock(<d->ltd_mutex); + lod_putref(lod, ltd); out_desc: - OBD_FREE_PTR(ost_desc); + OBD_FREE_PTR(tgt_desc); out_conn: obd_disconnect(exp); out_free: @@ -314,15 +321,37 @@ out_free: /* * helper function to schedule OST removal from the device table */ -static void __lod_del_device(struct lod_device *lod, unsigned idx) +static void __lod_del_device(struct lod_tgt_descs *ltd, + unsigned idx) { - LASSERT(OST_TGT(lod,idx)); - if (OST_TGT(lod,idx)->ltd_reap == 0) { - OST_TGT(lod,idx)->ltd_reap = 1; - lod->lod_death_row++; + LASSERT(LTD_TGT(ltd, idx)); + if (LTD_TGT(ltd, idx)->ltd_reap == 0) { + LTD_TGT(ltd, idx)->ltd_reap = 1; + ltd->ltd_death_row++; } } +int lod_fini_tgt(struct lod_device *lod, struct lod_tgt_descs *ltd) +{ + int idx; + + if (ltd->ltd_tgts_size <= 0) + return 0; + lod_getref(ltd); + mutex_lock(<d->ltd_mutex); + cfs_foreach_bit(ltd->ltd_tgt_bitmap, idx) + __lod_del_device(ltd, idx); + mutex_unlock(<d->ltd_mutex); + lod_putref(lod, ltd); + CFS_FREE_BITMAP(ltd->ltd_tgt_bitmap); + for (idx = 0; idx < TGT_PTRS; idx++) { + if (ltd->ltd_tgt_idx[idx]) + OBD_FREE_PTR(ltd->ltd_tgt_idx[idx]); + } + ltd->ltd_tgts_size = 0; + return 0; +} + /* * Add support for administratively disabled OST (through the MGS). * Schedule a target for deletion. Disconnection and real removal from the @@ -337,7 +366,8 @@ static void __lod_del_device(struct lod_device *lod, unsigned idx) * \param gen - is the generation number, not used currently */ int lod_del_device(const struct lu_env *env, struct lod_device *lod, - char *osp, unsigned idx, unsigned gen) + struct lod_tgt_descs *ltd, char *osp, unsigned idx, + unsigned gen) { struct obd_device *obd; int rc = 0; @@ -360,27 +390,28 @@ int lod_del_device(const struct lu_env *env, struct lod_device *lod, obd_str2uuid(&uuid, osp); - lod_getref(lod); - mutex_lock(&lod->lod_mutex); + lod_getref(ltd); + mutex_lock(<d->ltd_mutex); /* check that the index is allocated in the bitmap */ - if (!cfs_bitmap_check(lod->lod_ost_bitmap, idx) || !OST_TGT(lod,idx)) { + if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx) || + !LTD_TGT(ltd, idx)) { CERROR("%s: device %d is not set up\n", obd->obd_name, idx); GOTO(out, rc = -EINVAL); } /* check that the UUID matches */ - if (!obd_uuid_equals(&uuid, &OST_TGT(lod,idx)->ltd_uuid)) { + if (!obd_uuid_equals(&uuid, <D_TGT(ltd, idx)->ltd_uuid)) { CERROR("%s: LOD target UUID %s at index %d does not match %s\n", - obd->obd_name, obd_uuid2str(&OST_TGT(lod,idx)->ltd_uuid), + obd->obd_name, obd_uuid2str(<D_TGT(ltd,idx)->ltd_uuid), idx, osp); GOTO(out, rc = -EINVAL); } - __lod_del_device(lod, idx); + __lod_del_device(ltd, idx); EXIT; out: - mutex_unlock(&lod->lod_mutex); - lod_putref(lod); + mutex_unlock(<d->ltd_mutex); + lod_putref(lod, ltd); return(rc); } @@ -611,12 +642,12 @@ int lod_initialize_objects(const struct lu_env *env, struct lod_object *lo, * is completed. to be changed to -EINVAL */ - lod_getref(md); + lod_getref(&md->lod_ost_descs); LASSERT(cfs_bitmap_check(md->lod_ost_bitmap, idx)); LASSERT(OST_TGT(md,idx)); LASSERTF(OST_TGT(md,idx)->ltd_ost, "idx %d\n", idx); nd = &OST_TGT(md,idx)->ltd_ost->dd_lu_dev; - lod_putref(md); + lod_putref(md, &md->lod_ost_descs); o = lu_object_find_at(env, nd, &info->lti_fid, NULL); if (IS_ERR(o)) @@ -932,15 +963,6 @@ int lod_pools_init(struct lod_device *lod, struct lustre_cfg *lcfg) if (rc) GOTO(out_pool_info, rc); - /* the OST array and bitmap are allocated/grown dynamically as OSTs are - * added to the LOD, see lod_add_device() */ - lod->lod_ost_bitmap = NULL; - lod->lod_osts_size = 0; - lod->lod_ostnr = 0; - - lod->lod_death_row = 0; - lod->lod_refcount = 0; - RETURN(0); out_pool_info: @@ -966,22 +988,6 @@ int lod_pools_fini(struct lod_device *lod) lod_pool_del(obd, pool->pool_name); } - if (lod->lod_osts_size > 0) { - int idx; - lod_getref(lod); - mutex_lock(&lod->lod_mutex); - cfs_foreach_bit(lod->lod_ost_bitmap, idx) - __lod_del_device(lod, idx); - mutex_unlock(&lod->lod_mutex); - lod_putref(lod); - CFS_FREE_BITMAP(lod->lod_ost_bitmap); - for (idx = 0; idx < OST_PTRS; idx++) { - if (lod->lod_ost_idx[idx]) - OBD_FREE_PTR(lod->lod_ost_idx[idx]); - } - lod->lod_osts_size = 0; - } - cfs_hash_putref(lod->lod_pools_hash_body); lod_ost_pool_free(&(lod->lod_qos.lq_rr.lqr_pool)); lod_ost_pool_free(&lod->lod_pool_info); diff --git a/lustre/lod/lod_pool.c b/lustre/lod/lod_pool.c index 82476d7..d71c25c 100644 --- a/lustre/lod/lod_pool.c +++ b/lustre/lod/lod_pool.c @@ -254,7 +254,7 @@ static void pool_proc_stop(struct seq_file *s, void *v) static int pool_proc_show(struct seq_file *s, void *v) { struct pool_iterator *iter = (struct pool_iterator *)v; - struct lod_ost_desc *osc_desc; + struct lod_tgt_desc *osc_desc; LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X", iter->magic); LASSERT(iter->pool != NULL); @@ -549,7 +549,7 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname) obd_str2uuid(&ost_uuid, ostname); /* search ost in lod array */ - lod_getref(lod); + lod_getref(&lod->lod_ost_descs); lod_foreach_ost(lod, idx) { if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) { rc = 0; @@ -571,7 +571,7 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname) EXIT; out: - lod_putref(lod); + lod_putref(lod, &lod->lod_ost_descs); lod_pool_putref(pool); return rc; } @@ -591,7 +591,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname) obd_str2uuid(&ost_uuid, ostname); - lod_getref(lod); + lod_getref(&lod->lod_ost_descs); /* search ost in lod array, to get index */ cfs_foreach_bit(lod->lod_ost_bitmap, idx) { if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) { @@ -613,7 +613,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname) EXIT; out: - lod_putref(lod); + lod_putref(lod, &lod->lod_ost_descs); lod_pool_putref(pool); return rc; } diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 5e91e88..72986b1 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -59,7 +59,7 @@ #define TGT_BAVAIL(i) (OST_TGT(lod,i)->ltd_statfs.os_bavail * \ OST_TGT(lod,i)->ltd_statfs.os_bsize) -int qos_add_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc) +int qos_add_tgt(struct lod_device *lod, struct lod_tgt_desc *ost_desc) { struct lov_qos_oss *oss = NULL, *temposs; struct obd_export *exp = ost_desc->ltd_exp; @@ -118,7 +118,7 @@ out: RETURN(rc); } -int qos_del_tgt(struct lod_device *lod, struct lod_ost_desc *ost_desc) +int qos_del_tgt(struct lod_device *lod, struct lod_tgt_desc *ost_desc) { struct lov_qos_oss *oss; int rc = 0; @@ -148,7 +148,7 @@ out: static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d, int index, struct obd_statfs *sfs) { - struct lod_ost_desc *ost; + struct lod_tgt_desc *ost; int rc; LASSERT(d); @@ -363,7 +363,7 @@ static int lod_qos_calc_weight(struct lod_device *lod, int i) static int lod_qos_used(struct lod_device *lod, struct ost_pool *osts, __u32 index, __u64 *total_wt) { - struct lod_ost_desc *ost; + struct lod_tgt_desc *ost; struct lov_qos_oss *oss; int j; ENTRY; @@ -443,7 +443,7 @@ static int lod_qos_calc_rr(struct lod_device *lod, struct ost_pool *src_pool, struct lov_qos_rr *lqr) { struct lov_qos_oss *oss; - struct lod_ost_desc *ost; + struct lod_tgt_desc *ost; unsigned placed, real_count; int i, rc; ENTRY; @@ -549,7 +549,7 @@ static struct dt_object *lod_qos_declare_object_on(const struct lu_env *env, int ost_idx, struct thandle *th) { - struct lod_ost_desc *ost; + struct lod_tgt_desc *ost; struct lu_object *o, *n; struct lu_device *nd; struct dt_object *dt; @@ -975,7 +975,7 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo, { struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev); struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs; - struct lod_ost_desc *ost; + struct lod_tgt_desc *ost; struct dt_object *o; __u64 total_weight = 0; int nfound, good_osts, i, rc = 0; @@ -1382,7 +1382,7 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, GOTO(out, rc = -ENOMEM); lo->ldo_stripes_allocated = lo->ldo_stripenr; - lod_getref(d); + lod_getref(&d->lod_ost_descs); /* XXX: support for non-0 files w/o objects */ if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) { lod_qos_statfs_update(env, d); @@ -1391,7 +1391,7 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo, rc = lod_alloc_rr(env, lo, flag, th); } else rc = lod_alloc_specific(env, lo, flag, th); - lod_putref(d); + lod_putref(d, &d->lod_ost_descs); } else { /* * lod_qos_parse_config() found supplied buf as a predefined diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index 41495a5..d900baa 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -315,14 +315,14 @@ static int lod_wr_qos_maxage(struct file *file, const char *buffer, sprintf(str, "%smaxage=%d", PARAM_OSP, val); lustre_cfg_bufs_set_string(&bufs, 1, str); lcfg = lustre_cfg_new(LCFG_PARAM, &bufs); - lod_getref(lod); + lod_getref(&lod->lod_ost_descs); lod_foreach_ost(lod, i) { next = &OST_TGT(lod,i)->ltd_ost->dd_lu_dev; rc = next->ld_ops->ldo_process_config(NULL, next, lcfg); if (rc) CERROR("can't set maxage on #%d: %d\n", i, rc); } - lod_putref(lod); + lod_putref(lod, &lod->lod_ost_descs); lustre_cfg_free(lcfg); return count; @@ -336,7 +336,7 @@ static void *lod_osts_seq_start(struct seq_file *p, loff_t *pos) LASSERT(dev != NULL); lod = lu2lod_dev(dev->obd_lu_dev); - lod_getref(lod); /* released in lod_osts_seq_stop */ + lod_getref(&lod->lod_ost_descs); /* released in lod_osts_seq_stop */ if (*pos >= lod->lod_ost_bitmap->size) return NULL; @@ -355,7 +355,7 @@ static void lod_osts_seq_stop(struct seq_file *p, void *v) LASSERT(dev != NULL); lod = lu2lod_dev(dev->obd_lu_dev); - lod_putref(lod); + lod_putref(lod, &lod->lod_ost_descs); } static void *lod_osts_seq_next(struct seq_file *p, void *v, loff_t *pos) -- 1.8.3.1