From cc902b9f22a7becf458b7f94118534f2731a00e0 Mon Sep 17 00:00:00 2001 From: shadow Date: Tue, 7 Apr 2009 17:58:39 +0000 Subject: [PATCH] don't call obd_disconnect under lov_lock. Branch b_release_1_8_0 b=17310 i=johann i=rread --- lustre/ChangeLog | 7 ++++++ lustre/include/obd.h | 1 + lustre/lov/lov_internal.h | 2 +- lustre/lov/lov_obd.c | 61 +++++++++++++++++++++++++++-------------------- lustre/lov/lov_qos.c | 7 ++---- 5 files changed, 46 insertions(+), 32 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 91c09d5..3a9760a 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -44,6 +44,13 @@ information, please refer to bugzilla 17630. Severity : normal +Frequency : rare, connect and disconnect target at same time +Bugzilla : 17310 +Descriptoin: ASSERTION(atomic_read(&imp->imp_inflight) == 0 +Details : don't call obd_disconnect under lov_lock. this long time + operation and can block ptlrpcd which answer to connect request. + +Severity : normal Frequency : rare, on failed llog setup Bugzilla : 18896 Descriptoin: don't leak obd reference on failed llog setup diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 55a5799..453f45a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -661,6 +661,7 @@ struct lov_qos { }; struct lov_tgt_desc { + struct list_head ltd_kill; struct obd_uuid ltd_uuid; struct obd_export *ltd_exp; struct ltd_qos ltd_qos; /* qos info per target */ diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index cd689b7..7f67a29 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -176,7 +176,7 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off); #define LOV_USES_ASSIGNED_STRIPE 0 #define LOV_USES_DEFAULT_STRIPE 1 int qos_add_tgt(struct obd_device *obd, __u32 index); -int qos_del_tgt(struct obd_device *obd, __u32 index); +int qos_del_tgt(struct obd_device *obd, struct lov_tgt_desc *tgt); void qos_shrink_lsm(struct lov_request_set *set); int qos_prep_create(struct obd_export *exp, struct lov_request_set *set); void qos_update(struct lov_obd *lov); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 3e86f67..4fb1dfb 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -82,26 +82,44 @@ void lov_getref(struct obd_device *obd) return; } -static void __lov_del_obd(struct obd_device *obd, __u32 index); +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); void lov_putref(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; + CFS_LIST_HEAD(kill); + struct lov_tgt_desc *tgt; + mutex_down(&lov->lov_lock); /* ok to dec to 0 more than once -- ltd_exp's will be null */ if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { int i; + struct lov_tgt_desc *n; CDEBUG(D_CONFIG, "destroying %d lov targets\n", lov->lov_death_row); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_reap) + tgt = lov->lov_tgts[i]; + + if (!tgt || !tgt->ltd_reap) continue; - /* Disconnect and delete from list */ - __lov_del_obd(obd, i); + list_add(&tgt->ltd_kill, &kill); + /* XXX - right now there is a dependency on ld_tgt_count + * being the maximum tgt index for computing the + * mds_max_easize. So we can't shrink it. */ + lov_ost_pool_remove(&lov->lov_packed, i); + lov->lov_tgts[i] = NULL; lov->lov_death_row--; } + mutex_up(&lov->lov_lock); + + list_for_each_entry_safe(tgt, n, &kill, ltd_kill) { + list_del(&tgt->ltd_kill); + /* Disconnect and delete from list */ + __lov_del_obd(obd, tgt); + } + } else { + mutex_up(&lov->lov_lock); } - mutex_up(&lov->lov_lock); } static int lov_obd_register_page_removal_cb(struct obd_device *obd, @@ -408,12 +426,11 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, RETURN(0); } -static int lov_disconnect_obd(struct obd_device *obd, __u32 index) +static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { cfs_proc_dir_entry_t *lov_proc_dir; struct lov_obd *lov = &obd->u.lov; - struct obd_device *osc_obd = - class_exp2obd(lov->lov_tgts[index]->ltd_exp); + struct obd_device *osc_obd = class_exp2obd(tgt->ltd_exp); int rc; ENTRY; @@ -421,10 +438,10 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", obd->obd_name, osc_obd->obd_name); - if (lov->lov_tgts[index]->ltd_active) { - lov->lov_tgts[index]->ltd_active = 0; + if (tgt->ltd_active) { + tgt->ltd_active = 0; lov->desc.ld_active_tgt_count--; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; + tgt->ltd_exp->exp_obd->obd_inactive = 1; } lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); @@ -455,16 +472,16 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) obd_unregister_page_removal_cb(osc_obd, lov->lov_page_removal_cb); obd_unregister_lock_cancel_cb(osc_obd, lov->lov_lock_cancel_cb); - rc = obd_disconnect(lov->lov_tgts[index]->ltd_exp); + rc = obd_disconnect(tgt->ltd_exp); if (rc) { CERROR("Target %s disconnect error %d\n", - lov_uuid2str(lov, index), rc); + tgt->ltd_uuid.uuid, rc); rc = 0; } - qos_del_tgt(obd, index); + qos_del_tgt(obd, tgt); - lov->lov_tgts[index]->ltd_exp = NULL; + tgt->ltd_exp = NULL; RETURN(0); } @@ -730,11 +747,9 @@ out: } /* We are holding lov_lock */ -static void __lov_del_obd(struct obd_device *obd, __u32 index) +static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { - struct lov_obd *lov = &obd->u.lov; struct obd_device *osc_obd; - struct lov_tgt_desc *tgt = lov->lov_tgts[index]; LASSERT(tgt); LASSERT(tgt->ltd_reap); @@ -742,18 +757,12 @@ static void __lov_del_obd(struct obd_device *obd, __u32 index) osc_obd = class_exp2obd(tgt->ltd_exp); CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", - lov_uuid2str(lov, index), + tgt->ltd_uuid.uuid, osc_obd ? osc_obd->obd_name : ""); if (tgt->ltd_exp) - lov_disconnect_obd(obd, index); - - /* XXX - right now there is a dependency on ld_tgt_count being the - * maximum tgt index for computing the mds_max_easize. So we can't - * shrink it. */ + lov_disconnect_obd(obd, tgt); - lov_ost_pool_remove(&lov->lov_packed, index); - lov->lov_tgts[index] = NULL; OBD_FREE_PTR(tgt); /* Manual cleanup - no cleanup logs to clean up the osc's. We must diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index e1536e2..8a7df62 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -121,19 +121,16 @@ out: RETURN(rc); } -int qos_del_tgt(struct obd_device *obd, __u32 index) +int qos_del_tgt(struct obd_device *obd, struct lov_tgt_desc *tgt) { struct lov_obd *lov = &obd->u.lov; struct lov_qos_oss *oss; int rc = 0; ENTRY; - if (!lov->lov_tgts[index]) - RETURN(0); - down_write(&lov->lov_qos.lq_rw_sem); - oss = lov->lov_tgts[index]->ltd_qos.ltq_oss; + oss = tgt->ltd_qos.ltq_oss; if (!oss) GOTO(out, rc = -ENOENT); -- 1.8.3.1