Whamcloud - gitweb
more checks for NULL lov_tgts for avoid oops.
authorshadow <shadow>
Thu, 17 Jan 2008 20:44:43 +0000 (20:44 +0000)
committershadow <shadow>
Thu, 17 Jan 2008 20:44:43 +0000 (20:44 +0000)
b=14607
i=umka
i=tappro

lustre/ChangeLog
lustre/lov/lov_obd.c
lustre/lov/lov_qos.c
lustre/lov/lproc_lov.c

index 1c0e054..70b403e 100644 (file)
@@ -12,6 +12,11 @@ tbd  Sun Microsystems, Inc.
        * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a
         removed cwd "./" (refer to Bugzilla 14399).
 
+Severity   : normal
+Bugzilla   : 14607
+Description: NULL lov_tgts causing MDS oops
+Details    : more safe checks for NULL lov_tgts for avoid oops.
+
 Severity   : enhancement
 Bugzilla   : 14531
 Description: Update to RHEL4 latest kernel-2.6.9-67.0.1.EL.
index 50ebb49..183a323 100644 (file)
@@ -92,7 +92,7 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
                            struct obd_connect_data *data)
 {
         struct lov_obd *lov = &obd->u.lov;
-        struct obd_uuid tgt_uuid = lov->lov_tgts[index]->ltd_uuid;
+        struct obd_uuid tgt_uuid;
         struct obd_device *tgt_obd;
         struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" };
         struct lustre_handle conn = {0, };
@@ -107,6 +107,8 @@ static int lov_connect_obd(struct obd_device *obd, __u32 index, int activate,
         if (!lov->lov_tgts[index])
                 RETURN(-EINVAL);
 
+        tgt_uuid = lov->lov_tgts[index]->ltd_uuid;
+
         tgt_obd = class_find_client_obd(&tgt_uuid, LUSTRE_OSC_NAME,
                                         &obd->obd_uuid);
 
@@ -253,15 +255,17 @@ static int lov_disconnect_obd(struct obd_device *obd, __u32 index)
 {
         cfs_proc_dir_entry_t *lov_proc_dir;
         struct lov_obd *lov = &obd->u.lov;
-        struct obd_device *osc_obd =
-                class_exp2obd(lov->lov_tgts[index]->ltd_exp);
+        struct obd_device *osc_obd;
         int rc;
 
         ENTRY;
 
         CDEBUG(D_CONFIG, "%s: disconnecting target %s\n",
                obd->obd_name, osc_obd->obd_name);
+        if (lov->lov_tgts[index] == NULL)
+                RETURN(-EINVAL);
 
+        osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp);
         if (lov->lov_tgts[index]->ltd_active) {
                 lov->lov_tgts[index]->ltd_active = 0;
                 lov->desc.ld_active_tgt_count--;
@@ -788,19 +792,20 @@ static int lov_cleanup(struct obd_device *obd)
         if (lov->lov_tgts) {
                 int i;
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                        if (lov->lov_tgts[i]) {
-                                /* Inactive targets may never have connected */
-                                if (lov->lov_tgts[i]->ltd_active ||
-                                    atomic_read(&lov->lov_refcount)) 
-                                        /* We should never get here - these 
-                                           should have been removed in the 
-                                           disconnect. */
-                                        CERROR("lov tgt %d not cleaned!"
-                                               " deathrow=%d, lovrc=%d\n",
-                                               i, lov->lov_death_row,
-                                               atomic_read(&lov->lov_refcount));
-                                lov_del_target(obd, i, 0, 0);
-                        }
+                        if (!lov->lov_tgts[i])
+                                continue;
+
+                        /* Inactive targets may never have connected */
+                        if (lov->lov_tgts[i]->ltd_active ||
+                            atomic_read(&lov->lov_refcount))
+                            /* We should never get here - these
+                               should have been removed in the
+                             disconnect. */
+                                CERROR("lov tgt %d not cleaned!"
+                                       " deathrow=%d, lovrc=%d\n",
+                                       i, lov->lov_death_row,
+                                       atomic_read(&lov->lov_refcount));
+                        lov_del_target(obd, i, 0, 0);
                 }
                 OBD_FREE(lov->lov_tgts, sizeof(*lov->lov_tgts) *
                          lov->lov_tgt_size);
@@ -2390,7 +2395,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
 
                 for(i = 0; i < lov->desc.ld_tgt_count; i++) {
                         tgt = lov->lov_tgts[i];
-                        if (obd_uuid_equals(val, &tgt->ltd_uuid))
+                        if (tgt && obd_uuid_equals(val, &tgt->ltd_uuid))
                                 GOTO(out, rc = i);
                 }
         }
index 45fff43..de2ae53 100644 (file)
@@ -243,7 +243,7 @@ out:
 static int qos_calc_weight(struct lov_obd *lov, int i)
 {
         __u64 temp, temp2;
-        
+
         /* Final ost weight = TGT_BAVAIL - ost_penalty - oss_penalty */
         temp = TGT_BAVAIL(i);
         temp2 = lov->lov_tgts[i]->ltd_qos.ltq_penalty + 
@@ -266,7 +266,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
         lov->lov_tgts[index]->ltd_qos.ltq_usable = 0;
 
         oss = lov->lov_tgts[index]->ltd_qos.ltq_oss;
-        
+
         /* Decay old penalty by half (we're adding max penalty, and don't
            want it to run away.) */
         lov->lov_tgts[index]->ltd_qos.ltq_penalty >>= 1;
@@ -278,7 +278,7 @@ static int qos_used(struct lov_obd *lov, __u32 index, __u64 *total_wt)
                 lov->desc.ld_active_tgt_count;
         oss->lqo_penalty += oss->lqo_penalty_per_obj * 
                 lov->lov_qos.lq_active_oss_count;
-        
+
         /* Decrease all OSS penalties */
         list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) {
                 if (oss->lqo_penalty < oss->lqo_penalty_per_obj) 
@@ -373,8 +373,8 @@ static int qos_calc_rr(struct lov_obd *lov)
         list_for_each_entry(oss, &lov->lov_qos.lq_oss_list, lqo_oss_list) {
                 int j = 0;
                 for (i = 0; i < ost_count; i++) {
-                      LASSERT(lov->lov_tgts[i] != NULL);
-                      if (lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) {
+                        if(lov->lov_tgts[i] &&
+                           lov->lov_tgts[i]->ltd_qos.ltq_oss == oss) {
                               /* Evenly space these OSTs across arrayspace */
                               int next = j * ost_count / oss->lqo_ost_count;
                               LASSERT(next < ost_count);
@@ -384,7 +384,7 @@ static int qos_calc_rr(struct lov_obd *lov)
                               lov->lov_qos.lq_rr_array[next] = i;
                               j++;
                               placed++;
-                      }
+                        }
                 }
                 LASSERT(j == oss->lqo_ost_count);
         }
index 0ecba0d..15d1445 100644 (file)
@@ -262,8 +262,12 @@ static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
         struct obd_device *dev = p->private;
         struct lov_obd *lov = &dev->u.lov;
 
-        return (*pos >= lov->desc.ld_tgt_count) ? NULL : lov->lov_tgts[*pos];
-
+        while (*pos < lov->desc.ld_tgt_count) {
+                if (lov->lov_tgts[*pos])
+                        return lov->lov_tgts[*pos];
+                ++*pos;
+        }
+        return NULL;
 }
 
 static void lov_tgt_seq_stop(struct seq_file *p, void *v)
@@ -302,7 +306,7 @@ static int lov_target_seq_open(struct inode *inode, struct file *file)
         struct proc_dir_entry *dp = PDE(inode);
         struct seq_file *seq;
         int rc;
-        
+
         LPROCFS_ENTRY_AND_CHECK(dp);
         rc = seq_open(file, &lov_tgt_sops);
         if (rc) {