From da2854ef4e519899c128f2d32388fb28bdf31993 Mon Sep 17 00:00:00 2001 From: shadow Date: Thu, 17 Jan 2008 20:44:43 +0000 Subject: [PATCH] more checks for NULL lov_tgts for avoid oops. b=14607 i=umka i=tappro --- lustre/ChangeLog | 5 +++++ lustre/lov/lov_obd.c | 39 ++++++++++++++++++++++----------------- lustre/lov/lov_qos.c | 12 ++++++------ lustre/lov/lproc_lov.c | 10 +++++++--- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 1c0e054..70b403e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,6 +12,11 @@ tbd Sun Microsystems, Inc. * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a removed cwd "./" (refer to Bugzilla 14399). +Severity : normal +Bugzilla : 14607 +Description: NULL lov_tgts causing MDS oops +Details : more safe checks for NULL lov_tgts for avoid oops. + Severity : enhancement Bugzilla : 14531 Description: Update to RHEL4 latest kernel-2.6.9-67.0.1.EL. diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 50ebb49..183a323 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -92,7 +92,7 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, struct obd_connect_data *data) { struct lov_obd *lov = &obd->u.lov; - struct obd_uuid tgt_uuid = lov->lov_tgts[index]->ltd_uuid; + struct obd_uuid tgt_uuid; struct obd_device *tgt_obd; struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; struct lustre_handle conn = {0, }; @@ -107,6 +107,8 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, if (!lov->lov_tgts[index]) RETURN(-EINVAL); + tgt_uuid = lov->lov_tgts[index]->ltd_uuid; + tgt_obd = class_find_client_obd(&tgt_uuid, LUSTRE_OSC_NAME, &obd->obd_uuid); @@ -253,15 +255,17 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index) { cfs_proc_dir_entry_t *lov_proc_dir; struct lov_obd *lov = &obd->u.lov; - struct obd_device *osc_obd = - class_exp2obd(lov->lov_tgts[index]->ltd_exp); + struct obd_device *osc_obd; int rc; ENTRY; CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", obd->obd_name, osc_obd->obd_name); + if (lov->lov_tgts[index] == NULL) + RETURN(-EINVAL); + osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); if (lov->lov_tgts[index]->ltd_active) { lov->lov_tgts[index]->ltd_active = 0; lov->desc.ld_active_tgt_count--; @@ -788,19 +792,20 @@ static int lov_cleanup(struct obd_device *obd) if (lov->lov_tgts) { int i; for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (lov->lov_tgts[i]) { - /* Inactive targets may never have connected */ - if (lov->lov_tgts[i]->ltd_active || - atomic_read(&lov->lov_refcount)) - /* We should never get here - these - should have been removed in the - disconnect. */ - CERROR("lov tgt %d not cleaned!" - " deathrow=%d, lovrc=%d\n", - i, lov->lov_death_row, - atomic_read(&lov->lov_refcount)); - lov_del_target(obd, i, 0, 0); - } + if (!lov->lov_tgts[i]) + continue; + + /* Inactive targets may never have connected */ + if (lov->lov_tgts[i]->ltd_active || + atomic_read(&lov->lov_refcount)) + /* We should never get here - these + should have been removed in the + disconnect. */ + CERROR("lov tgt %d not cleaned!" + " deathrow=%d, lovrc=%d\n", + i, lov->lov_death_row, + atomic_read(&lov->lov_refcount)); + lov_del_target(obd, i, 0, 0); } OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) * lov->lov_tgt_size); @@ -2390,7 +2395,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, for(i = 0; i < lov->desc.ld_tgt_count; i++) { tgt = lov->lov_tgts[i]; - if (obd_uuid_equals(val, &tgt->ltd_uuid)) + if (tgt && obd_uuid_equals(val, &tgt->ltd_uuid)) GOTO(out, rc = i); } } diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index 45fff43..de2ae53 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -243,7 +243,7 @@ out: static int qos_calc_weight(struct lov_obd *lov, int i) { __u64 temp, temp2; - + /* Final ost weight = TGT_BAVAIL - ost_penalty - oss_penalty */ temp = TGT_BAVAIL(i); temp2 = lov->lov_tgts[i]->ltd_qos.ltq_penalty + @@ -266,7 +266,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) lov->lov_tgts[index]->ltd_qos.ltq_usable = 0; oss = lov->lov_tgts[index]->ltd_qos.ltq_oss; - + /* Decay old penalty by half (we're adding max penalty, and don't want it to run away.) */ lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1; @@ -278,7 +278,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt) lov->desc.ld_active_tgt_count; oss->lqo_penalty += oss->lqo_penalty_per_obj * lov->lov_qos.lq_active_oss_count; - + /* Decrease all OSS penalties */ list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { if (oss->lqo_penalty < oss->lqo_penalty_per_obj) @@ -373,8 +373,8 @@ static int qos_calc_rr(struct lov_obd *lov) list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) { int j = 0; for (i = 0; i < ost_count; i++) { - LASSERT(lov->lov_tgts[i] != NULL); - if (lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) { + if(lov->lov_tgts[i] && + lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) { /* Evenly space these OSTs across arrayspace */ int next = j * ost_count / oss->lqo_ost_count; LASSERT(next < ost_count); @@ -384,7 +384,7 @@ static int qos_calc_rr(struct lov_obd *lov) lov->lov_qos.lq_rr_array[next] = i; j++; placed++; - } + } } LASSERT(j == oss->lqo_ost_count); } diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 0ecba0d..15d1445 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -262,8 +262,12 @@ static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos) struct obd_device *dev = p->private; struct lov_obd *lov = &dev->u.lov; - return (*pos >= lov->desc.ld_tgt_count) ? NULL : lov->lov_tgts[*pos]; - + while (*pos < lov->desc.ld_tgt_count) { + if (lov->lov_tgts[*pos]) + return lov->lov_tgts[*pos]; + ++*pos; + } + return NULL; } static void lov_tgt_seq_stop(struct seq_file *p, void *v) @@ -302,7 +306,7 @@ static int lov_target_seq_open(struct inode *inode, struct file *file) struct proc_dir_entry *dp = PDE(inode); struct seq_file *seq; int rc; - + LPROCFS_ENTRY_AND_CHECK(dp); rc = seq_open(file, &lov_tgt_sops); if (rc) { -- 1.8.3.1