Whamcloud - gitweb
Branch b1_4
authornathan <nathan>
Thu, 5 May 2005 21:52:47 +0000 (21:52 +0000)
committernathan <nathan>
Thu, 5 May 2005 21:52:47 +0000 (21:52 +0000)
b=5949
r=adilger
Various fixes to make --failover under load safer

21 files changed:
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/lov/lov_obd.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_lov.c
lustre/obdclass/llog_obd.c
lustre/obdclass/obd_config.c
lustre/obdfilter/filter.c
lustre/osc/osc_create.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/recov_thread.c
lustre/tests/recovery-small.sh
lustre/tests/runtests
lustre/tests/writemany.c
lustre/utils/lconf

index 3715578..9266323 100644 (file)
@@ -583,7 +583,7 @@ struct obd_ops {
         int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
         int (*o_detach)(struct obd_device *dev);
         int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
-        int (*o_precleanup)(struct obd_device *dev);
+        int (*o_precleanup)(struct obd_device *dev, int cleanup_stage);
         int (*o_cleanup)(struct obd_device *dev);
         int (*o_process_config)(struct obd_device *dev, obd_count len,
                                 void *data);
index 0867717..eda2851 100644 (file)
@@ -285,7 +285,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data)
         RETURN(rc);
 }
 
-static inline int obd_precleanup(struct obd_device *obd)
+static inline int obd_precleanup(struct obd_device *obd, int cleanup_stage)
 {
         int rc;
         ENTRY;
@@ -293,7 +293,7 @@ static inline int obd_precleanup(struct obd_device *obd)
         OBD_CHECK_OP(obd, precleanup, 0);
         OBD_COUNTER_INCREMENT(obd, precleanup);
 
-        rc = OBP(obd, precleanup)(obd);
+        rc = OBP(obd, precleanup)(obd, cleanup_stage);
         RETURN(rc);
 }
 
index 9f22cfe..9bb7058 100644 (file)
@@ -143,6 +143,7 @@ extern wait_queue_head_t obd_race_waitq;
 #define OBD_FAIL_OSC_LOCK_CP_AST         0x404
 #define OBD_FAIL_OSC_MATCH               0x405
 #define OBD_FAIL_OSC_BRW_PREP_REQ        0x406
+#define OBD_FAIL_OSC_SHUTDOWN            0x407
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
index 4e08881..bf339a8 100644 (file)
@@ -556,8 +556,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         }
 
         if (!target || target->obd_stopping || !target->obd_set_up) {
-                DEBUG_REQ(D_ERROR, req, "UUID '%s' not available for connect\n",
-                          str);
+                DEBUG_REQ(D_ERROR, req, "UUID '%s' is not available "
+                       " for connect (%s)\n", str,
+                       !target ? "no target" : 
+                       (target->obd_stopping ? "stopping" : "not set up"));
                 GOTO(out, rc = -ENODEV);
         }
 
@@ -843,6 +845,7 @@ void target_cleanup_recovery(struct obd_device *obd)
 {
         struct list_head *tmp, *n;
         struct ptlrpc_request *req;
+        ENTRY;
 
         LASSERT(obd->obd_stopping);
 
@@ -867,6 +870,7 @@ void target_cleanup_recovery(struct obd_device *obd)
                 list_del(&req->rq_list);
                 target_release_saved_req(req);
         }
+        EXIT;
 }
 
 void target_abort_recovery(void *data)
@@ -1407,7 +1411,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
         req->rq_status = rc;
         rc = ptlrpc_reply(req);
         
-        RETURN(rc);    
+        RETURN(rc);     
 }
  
 EXPORT_SYMBOL(target_committed_to_req);
index 269c4c6..6336e3d 100644 (file)
@@ -200,6 +200,8 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
         struct lov_obd *lov = &obd->u.lov;
         int rc;
         ENTRY;
+
+        CDEBUG(D_CONFIG, "Disconnecting lov target %s\n", obd->obd_uuid.uuid);
         
         lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
         if (lov_proc_dir) {
@@ -224,7 +226,7 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
                         osc_obd->obd_no_recov = 1;
         }
 
-        obd_register_observer(tgt->ltd_exp->exp_obd, NULL);
+        obd_register_observer(osc_obd, NULL);
 
         rc = obd_disconnect(tgt->ltd_exp);
         if (rc) {
@@ -251,22 +253,22 @@ static int lov_disconnect(struct obd_export *exp)
         struct lov_tgt_desc *tgt;
         int rc, i;
         ENTRY;
+        
+        rc = class_disconnect(exp);
 
         if (!lov->tgts)
-                goto out_local;
+                RETURN(rc);
 
         /* Only disconnect the underlying layers on the final disconnect. */
         lov->refcount--;
         if (lov->refcount != 0)
-                goto out_local;
+                RETURN(rc);
 
         for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
                 if (tgt->ltd_exp)
                         lov_disconnect_obd(obd, tgt);
         }
-
- out_local:
-        rc = class_disconnect(exp);
+        
         RETURN(rc);
 }
 
@@ -352,110 +354,6 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched,
         RETURN(rc);
 }
 
-static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
-{
-        struct lprocfs_static_vars lvars;
-        struct lustre_cfg *lcfg = buf;
-        struct lov_desc *desc;
-        struct lov_obd *lov = &obd->u.lov;
-        int count;
-        ENTRY;
-
-        if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
-                CERROR("LOV setup requires a descriptor\n");
-                RETURN(-EINVAL);
-        }
-
-        desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1);
-        
-        if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
-                CERROR("descriptor size wrong: %d > %d\n",
-                       (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
-                RETURN(-EINVAL);
-        }
-
-        if (desc->ld_magic != LOV_DESC_MAGIC) {
-                if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) {
-                            CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n",
-                                   obd->obd_name, desc);
-                            lustre_swab_lov_desc(desc);
-                } else {
-                        CERROR("%s: Bad lov desc magic: %#x\n",
-                               obd->obd_name, desc->ld_magic);
-                        RETURN(-EINVAL);
-                }
-        }
-
-        if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
-                CWARN("Increasing default_stripe_size "LPU64" to %u\n",
-                      desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
-                CWARN("Please update config and run --write-conf on MDS\n");
-
-                desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
-        } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
-                CWARN("default_stripe_size "LPU64" isn't a multiple of %u\n",
-                      desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
-                CWARN("Please update config and run --write-conf on MDS\n");
-
-                desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
-        }
-
-        /* Because of 64-bit divide/mod operations only work with a 32-bit
-         * divisor in a 32-bit kernel, we cannot support a stripe width
-         * of 4GB or larger on 32-bit CPUs. */
-        count = desc->ld_default_stripe_count;
-        if ((count ? count : desc->ld_tgt_count) *
-            desc->ld_default_stripe_size > ~0UL) {
-                CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
-                       desc->ld_default_stripe_size, count, ~0UL);
-                RETURN(-EINVAL);
-        }
-
-        /* Allocate space for target list */
-        if (desc->ld_tgt_count)
-                count = desc->ld_tgt_count;
-        lov->bufsize = sizeof(struct lov_tgt_desc) * count;
-        OBD_ALLOC(lov->tgts, lov->bufsize);
-        if (lov->tgts == NULL) {
-                CERROR("Out of memory\n");
-                RETURN(-EINVAL);
-        }
-        memset(lov->tgts, 0, lov->bufsize);
-
-        desc->ld_active_tgt_count = 0;
-        lov->desc = *desc;
-        spin_lock_init(&lov->lov_lock);
-       
-        lprocfs_init_vars(lov, &lvars);
-        lprocfs_obd_setup(obd, lvars.obd_vars);
-#ifdef __KERNEL__
-        {
-                struct proc_dir_entry *entry;
-
-                entry = create_proc_entry("target_obd", 0444,
-                                          obd->obd_proc_entry);
-                if (entry != NULL) {
-                        entry->proc_fops = &lov_proc_target_fops;
-                        entry->data = obd;
-                }
-        }
-#endif
-
-        RETURN(0);
-}
-
-static int lov_cleanup(struct obd_device *obd)
-{
-        struct lov_obd *lov = &obd->u.lov;
-
-        lprocfs_obd_cleanup(obd);
-        obd_llog_finish(obd, 0);
-        if (lov->tgts)
-                OBD_FREE(lov->tgts, lov->bufsize);
-
-        RETURN(0);
-}
-
 static int
 lov_add_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
 {
@@ -581,7 +479,7 @@ lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
                 RETURN(-EINVAL);
         }
 
-        tgt = lov->tgts + index;
+        tgt = &lov->tgts[index];
 
         if (obd_uuid_empty(&tgt->uuid)) {
                 CERROR("LOV target at index %d is not setup.\n", index);
@@ -610,6 +508,131 @@ lov_del_obd(struct obd_device *obd, struct obd_uuid *uuidp, int index, int gen)
         RETURN(rc);
 }
 
+static int lov_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct lprocfs_static_vars lvars;
+        struct lustre_cfg *lcfg = buf;
+        struct lov_desc *desc;
+        struct lov_obd *lov = &obd->u.lov;
+        int count;
+        ENTRY;
+
+        if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
+                CERROR("LOV setup requires a descriptor\n");
+                RETURN(-EINVAL);
+        }
+
+        desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1);
+        
+        if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) {
+                CERROR("descriptor size wrong: %d > %d\n",
+                       (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1));
+                RETURN(-EINVAL);
+        }
+
+        if (desc->ld_magic != LOV_DESC_MAGIC) {
+                if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) {
+                            CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n",
+                                   obd->obd_name, desc);
+                            lustre_swab_lov_desc(desc);
+                } else {
+                        CERROR("%s: Bad lov desc magic: %#x\n",
+                               obd->obd_name, desc->ld_magic);
+                        RETURN(-EINVAL);
+                }
+        }
+
+        if (desc->ld_default_stripe_size < PTLRPC_MAX_BRW_SIZE) {
+                CWARN("Increasing default_stripe_size "LPU64" to %u\n",
+                      desc->ld_default_stripe_size, PTLRPC_MAX_BRW_SIZE);
+                CWARN("Please update config and run --write-conf on MDS\n");
+
+                desc->ld_default_stripe_size = PTLRPC_MAX_BRW_SIZE;
+        } else if (desc->ld_default_stripe_size & (LOV_MIN_STRIPE_SIZE - 1)) {
+                CWARN("default_stripe_size "LPU64" isn't a multiple of %lu\n",
+                      desc->ld_default_stripe_size, LOV_MIN_STRIPE_SIZE);
+                CWARN("Please update config and run --write-conf on MDS\n");
+
+                desc->ld_default_stripe_size &= ~(LOV_MIN_STRIPE_SIZE - 1);
+       }
+
+        /* Because of 64-bit divide/mod operations only work with a 32-bit
+         * divisor in a 32-bit kernel, we cannot support a stripe width
+         * of 4GB or larger on 32-bit CPUs. */
+        count = desc->ld_default_stripe_count;
+        if ((count ? count : desc->ld_tgt_count) *
+            desc->ld_default_stripe_size > ~0UL) {
+                CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
+                       desc->ld_default_stripe_size, count, ~0UL);
+                RETURN(-EINVAL);
+        }
+  
+        /* Allocate space for target list */
+        if (desc->ld_tgt_count)
+                count = desc->ld_tgt_count;
+        lov->bufsize = sizeof(struct lov_tgt_desc) * count;
+        OBD_ALLOC(lov->tgts, lov->bufsize);
+        if (lov->tgts == NULL) {
+                CERROR("Out of memory\n");
+                RETURN(-EINVAL);
+        }
+        memset(lov->tgts, 0, lov->bufsize);
+
+        desc->ld_active_tgt_count = 0;
+        lov->desc = *desc;
+        spin_lock_init(&lov->lov_lock);
+       
+        lprocfs_init_vars(lov, &lvars);
+        lprocfs_obd_setup(obd, lvars.obd_vars);
+#ifdef __KERNEL__
+        {
+                struct proc_dir_entry *entry;
+
+                entry = create_proc_entry("target_obd", 0444,
+                                          obd->obd_proc_entry);
+                if (entry != NULL) {
+                        entry->proc_fops = &lov_proc_target_fops;
+                        entry->data = obd;
+                }
+        }
+#endif
+
+        RETURN(0);
+}
+
+static int lov_precleanup(struct obd_device *obd, int stage)
+{
+        int rc = 0;
+        ENTRY;
+
+        if (stage < 2) 
+                RETURN(0);
+
+        rc = obd_llog_finish(obd, 0);
+        if (rc != 0)
+                CERROR("failed to cleanup llogging subsystems\n");
+
+        RETURN(rc);
+}
+
+static int lov_cleanup(struct obd_device *obd)
+{
+        struct lov_obd *lov = &obd->u.lov;
+
+        lprocfs_obd_cleanup(obd);
+        if (lov->tgts) {
+                int i;
+                struct lov_tgt_desc *tgt;
+                for (i = 0, tgt = lov->tgts;
+                      i < lov->desc.ld_tgt_count; i++, tgt++) {
+                        if (!obd_uuid_empty(&tgt->uuid))
+                                lov_del_obd(obd, &tgt->uuid, i, 0);
+                }
+                OBD_FREE(lov->tgts, lov->bufsize);
+        }
+        RETURN(0);
+}
+
 static int lov_process_config(struct obd_device *obd, obd_count len, void *buf)
 {
         struct lustre_cfg *lcfg = buf;
@@ -694,6 +717,7 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa,
 
                 memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
 
+                LASSERT(lov->tgts[i].ltd_exp);
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 err = obd_create(lov->tgts[i].ltd_exp, tmp_oa, &obj_mdp, oti);
                 if (err)
@@ -796,11 +820,11 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
         RETURN(rc);
 }
 
-#define ASSERT_LSM_MAGIC(lsmp)                                          \
-do {                                                                    \
-        LASSERT((lsmp) != NULL);                                        \
+#define ASSERT_LSM_MAGIC(lsmp)                                  \
+do {                                                            \
+        LASSERT((lsmp) != NULL);                                \
         LASSERTF((lsmp)->lsm_magic == LOV_MAGIC, "%p->lsm_magic=%x\n",  \
-                 (lsmp), (lsmp)->lsm_magic);                            \
+                 (lsmp), (lsmp)->lsm_magic);                    \
 } while (0)
 
 static int lov_destroy(struct obd_export *exp, struct obdo *oa,
@@ -1016,12 +1040,12 @@ static int lov_setattr_async(struct obd_export *exp, struct obdo *src_oa,
         obd_id objid = src_oa->o_id;
         int i;
         ENTRY;
-
+                                                                                                                             
         ASSERT_LSM_MAGIC(lsm);
         LASSERT(oti);
         if (src_oa->o_valid & OBD_MD_FLCOOKIE)
                 LASSERT(oti->oti_logcookies);
-
+                                                                                                                             
         if (!exp || !exp->exp_obd)
                 RETURN(-ENODEV);
 
@@ -1671,8 +1695,12 @@ static int lov_cancel_unused(struct obd_export *exp,
         lov = &exp->exp_obd->u.lov;
         if (lsm == NULL) {
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                        int err = obd_cancel_unused(lov->tgts[i].ltd_exp, NULL,
-                                                    flags, opaque);
+                        int err;
+                        if (!lov->tgts[i].ltd_exp)
+                                continue;
+                        
+                        err = obd_cancel_unused(lov->tgts[i].ltd_exp, NULL,
+                                                flags, opaque);
                         if (!rc)
                                 rc = err;
                 }
@@ -1890,8 +1918,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 for (i = 0; i < count; i++) {
                         int err;
 
-                        /* OST was deleted */
-                        if (obd_uuid_empty(&lov->tgts[i].uuid))
+                        /* OST was disconnected */
+                        if (!lov->tgts[i].ltd_exp)
                                 continue;
 
                         err = obd_iocontrol(cmd, lov->tgts[i].ltd_exp,
@@ -2010,8 +2038,8 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                 if (vallen != lov->desc.ld_tgt_count)
                         RETURN(-EINVAL);
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                        /* OST was deleted */
-                        if (obd_uuid_empty(&lov->tgts[i].uuid))
+                        /* OST was disconnected */
+                        if (!lov->tgts[i].ltd_exp)
                                 continue;
 
                         /* initialize all OSCs, even inactive ones */
@@ -2035,8 +2063,8 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                 if (val && !obd_uuid_equals(val, &lov->tgts[i].uuid))
                         continue;
 
-                /* OST was deleted */
-                if (obd_uuid_empty(&lov->tgts[i].uuid))
+                /* OST was disconnected */
+                if (!lov->tgts[i].ltd_exp)
                         continue;
 
                 if (!val && !lov->tgts[i].active)
@@ -2217,6 +2245,7 @@ static int lov_quotactl(struct obd_export *exp, struct obd_quotactl *oqctl)
 struct obd_ops lov_obd_ops = {
         .o_owner               = THIS_MODULE,
         .o_setup               = lov_setup,
+        .o_precleanup          = lov_precleanup,
         .o_cleanup             = lov_cleanup,
         .o_process_config      = lov_process_config,
         .o_connect             = lov_connect,
index 1afcc33..ccba26e 100644 (file)
@@ -474,7 +474,14 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
         mod = och->och_mod;
         if (likely(mod != NULL)) {
                 mod->mod_close_req = req;
-                LASSERT(mod->mod_open_req->rq_type != LI_POISON);
+                if (mod->mod_open_req->rq_type == LI_POISON) {
+                        /* FIXME This should be an ASSERT, but until we
+                           figure out why it can be poisoned here, give 
+                           a reasonable return. */
+                        CERROR("LBUG POISONED req %p!\n", mod->mod_open_req);
+                        ptlrpc_free_req(req);
+                        GOTO(out, rc = -EIO);
+                }
                 DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
         } else {
                 CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
@@ -715,10 +722,10 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         MOD_INC_USE_COUNT;
 #else
-       if (!try_module_get(THIS_MODULE)) {
-               CERROR("Can't get module. Is it alive?");
-               return -EINVAL;
-       }
+        if (!try_module_get(THIS_MODULE)) {
+                CERROR("Can't get module. Is it alive?");
+                return -EINVAL;
+        }
 #endif
         switch (cmd) {
         case OBD_IOC_CLIENT_RECOVER:
@@ -754,7 +761,7 @@ out:
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         MOD_DEC_USE_COUNT;
 #else
-       module_put(THIS_MODULE);
+        module_put(THIS_MODULE);
 #endif
 
         return rc;
@@ -1071,9 +1078,13 @@ int mdc_init_ea_size(struct obd_export *mdc_exp, struct obd_export *lov_exp)
         RETURN(0);
 }
 
-static int mdc_precleanup(struct obd_device *obd)
+static int mdc_precleanup(struct obd_device *obd, int stage)
 {
         int rc = 0;
+        ENTRY;
+        
+        if (stage < 2) 
+                RETURN(0);
 
         rc = obd_llog_finish(obd, 0);
         if (rc != 0)
index 1781dad..6acf36b 100644 (file)
@@ -349,7 +349,7 @@ static int mds_destroy_export(struct obd_export *export)
 
                 /* If you change this message, be sure to update
                  * replay_single:test_46 */
-                CDEBUG(D_INODE, "force closing file handle for %.*s (%s:%lu)\n",
+                CDEBUG(D_INODE|D_IOCTL, "force closing file handle for %.*s (%s:%lu)\n",
                        dentry->d_name.len, dentry->d_name.name,
                        ll_bdevname(dentry->d_inode->i_sb, btmp),
                        dentry->d_inode->i_ino);
@@ -360,7 +360,7 @@ static int mds_destroy_export(struct obd_export *export)
                                    !(export->exp_flags & OBD_OPT_FAILOVER));
 
                 if (rc)
-                        CDEBUG(D_INODE, "Error closing file: %d\n", rc);
+                        CDEBUG(D_INODE|D_IOCTL, "Error closing file: %d\n", rc);
                 spin_lock(&med->med_open_lock);
         }
         spin_unlock(&med->med_open_lock);
@@ -1852,6 +1852,10 @@ int mds_postrecov(struct obd_device *obd)
 {
         struct mds_obd *mds = &obd->u.mds;
         int rc, item = 0;
+        ENTRY;
+
+        if (obd->obd_fail) 
+                RETURN(0);
 
         LASSERT(!obd->obd_recovering);
         LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
@@ -1932,16 +1936,22 @@ int mds_lov_clean(struct obd_device *obd)
         RETURN(0);
 }
 
-static int mds_precleanup(struct obd_device *obd)
+static int mds_precleanup(struct obd_device *obd, int stage)
 {
         int rc = 0;
         ENTRY;
 
-        mds_lov_set_cleanup_flags(obd);
-        target_cleanup_recovery(obd);
-        mds_lov_disconnect(obd);
-        mds_lov_clean(obd);
-        llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
+        switch (stage) {
+        case 1:
+                mds_lov_set_cleanup_flags(obd);
+                target_cleanup_recovery(obd);
+                break;
+        case 2:
+                mds_lov_disconnect(obd);
+                mds_lov_clean(obd);
+                llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
+                rc = obd_llog_finish(obd, 0);
+        }
         RETURN(rc);
 }
 
@@ -1987,8 +1997,6 @@ static int mds_cleanup(struct obd_device *obd)
                 must_relock++;
         }
 
-        obd_llog_finish(obd, 0);
-
         mntput(mds->mds_vfsmnt);
         mds->mds_sb = NULL;
 
index 201adbe..b71f02d 100644 (file)
@@ -505,7 +505,7 @@ int mds_lov_synchronize(void *data)
         struct obd_device *obd;
         struct obd_uuid *uuid;
         unsigned long flags;
-        int rc;
+        int rc = 0;
 
         lock_kernel();
         ptlrpc_daemonize();
@@ -527,7 +527,7 @@ int mds_lov_synchronize(void *data)
         rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"),
                           "mds_conn", 0, uuid);
         if (rc != 0)
-                RETURN(rc);
+                GOTO(out, rc);
 
         rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT),
                           obd->u.mds.mds_lov_desc.ld_tgt_count,
@@ -535,7 +535,7 @@ int mds_lov_synchronize(void *data)
         if (rc != 0) {
                 CERROR("%s: failed at llog_origin_connect: %d\n",
                        obd->obd_name, rc);
-                RETURN(rc);
+                GOTO(out, rc);
         }
 
         CWARN("MDS %s: %s now active, resetting orphans\n",
@@ -544,10 +544,12 @@ int mds_lov_synchronize(void *data)
         if (rc != 0) {
                 CERROR("%s: failed at mds_lov_clearorphans: %d\n",
                        obd->obd_name, rc);
-                RETURN(rc);
+                GOTO(out, rc);
         }
 
-        RETURN(0);
+out:
+        class_export_put(obd->obd_self_export);
+        RETURN(rc);
 }
 
 int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid)
@@ -563,6 +565,9 @@ int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid)
 
         mlsi->mlsi_obd = obd;
         mlsi->mlsi_uuid = uuid;
+        
+        /* We need to lock the mds in place for our new thread context. */
+        class_export_get(obd->obd_self_export);
 
         rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES);
         if (rc < 0)
index 0796c50..bf82dfd 100644 (file)
@@ -68,7 +68,6 @@ int llog_cleanup(struct llog_ctxt *ctxt)
                 rc = CTXTP(ctxt, cleanup)(ctxt);
 
         ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL;
-        ctxt->loc_exp = NULL;
         OBD_FREE(ctxt, sizeof(*ctxt));
 
         RETURN(rc);
index bc07593..91535d7 100644 (file)
@@ -127,7 +127,7 @@ int class_attach(struct lustre_cfg *lcfg)
                         GOTO(out, rc = -EINVAL);
         }
 
-        /* The attach is our first obd reference */
+        /* Detach drops this */
         atomic_set(&obd->obd_refcount, 1);
 
         obd->obd_attached = 1;
@@ -195,6 +195,11 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
         obd->obd_type->typ_refcnt++;
         obd->obd_set_up = 1;
+        spin_lock(&obd->obd_dev_lock);
+        /* cleanup drops this */
+        atomic_inc(&obd->obd_refcount);
+        spin_unlock(&obd->obd_dev_lock);
+        
         CDEBUG(D_IOCTL, "finished setup of obd %s (uuid %s)\n",
                obd->obd_name, obd->obd_uuid.uuid);
         
@@ -210,6 +215,10 @@ err_exp:
 static int __class_detach(struct obd_device *obd)
 {
         int err = 0;
+        ENTRY;
+
+        CDEBUG(D_CONFIG | D_WARNING, "destroying obd %d (%s)\n",
+               obd->obd_minor, obd->obd_name);
 
         if (OBP(obd, detach)) 
                 err = OBP(obd,detach)(obd);
@@ -226,12 +235,13 @@ static int __class_detach(struct obd_device *obd)
         obd->obd_type->typ_refcnt--;
         class_put_type(obd->obd_type);
         class_release_dev(obd);
-        return (err);
+        RETURN(err);
 }
 
 int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
 {
         ENTRY;
+
         if (obd->obd_set_up) {
                 CERROR("OBD device %d still set up\n", obd->obd_minor);
                 RETURN(-EBUSY);
@@ -281,8 +291,8 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
 {
         int err = 0;
         char *flag;
-
         ENTRY;
+
         OBD_RACE(OBD_FAIL_LDLM_RECOV_CLIENTS);
 
         if (!obd->obd_set_up) {
@@ -311,6 +321,7 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
                                        obd->obd_name);
                                 obd->obd_fail = 1;
                                 obd->obd_no_transno = 1;
+                                obd->obd_no_recov = 1;
                                 /* Set the obd readonly if we can */
                                 if (OBP(obd, iocontrol))
                                         obd_iocontrol(OBD_IOC_SET_READONLY,
@@ -323,9 +334,9 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
                         }
         }
         
-        /* The two references that should be remaining are the
-         * obd_self_export and the attach reference. */
-        if (atomic_read(&obd->obd_refcount) > 2) {
+        /* The three references that should be remaining are the
+         * obd_self_export and the attach and setup references. */
+        if (atomic_read(&obd->obd_refcount) > 3) {
                 if (!(obd->obd_fail || obd->obd_force)) {
                         CERROR("OBD %s is still busy with %d references\n"
                                "You should stop active file system users,"
@@ -341,20 +352,18 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
         }
 
         LASSERT(obd->obd_self_export);
-        if (obd->obd_self_export) {
-               /* mds_precleanup will clean up the lov (and osc's)*/
-               err = obd_precleanup(obd);
-               if (err)
-                       GOTO(out, err);
-               obd->obd_self_export->exp_flags |= 
-                       (obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
-                       (obd->obd_force ? OBD_OPT_FORCE : 0);
-               class_unlink_export(obd->obd_self_export);
-               obd->obd_self_export = NULL;
-        }
+        
+        /* Precleanup stage 1, we must make sure all exports (other than the
+           self-export) get destroyed. */
+        err = obd_precleanup(obd, 1);
+        if (err)
+                CERROR("Precleanup %s returned %d\n",
+                       obd->obd_name, err);
 
+        class_decref(obd);
         obd->obd_set_up = 0;
         obd->obd_type->typ_refcnt--;
+
         RETURN(0);
 out:
         /* Allow a failed cleanup to try again. */
@@ -364,16 +373,45 @@ out:
 
 void class_decref(struct obd_device *obd)
 {
-        if (atomic_dec_and_test(&obd->obd_refcount)) {
-                int err;
-                CDEBUG(D_IOCTL, "finishing cleanup of obd %s (%s)\n",
+        int err;
+        int refs;
+
+        spin_lock(&obd->obd_dev_lock);
+        atomic_dec(&obd->obd_refcount);
+        refs = atomic_read(&obd->obd_refcount);
+        spin_unlock(&obd->obd_dev_lock);
+        
+        CDEBUG(D_INFO, "Decref %s now %d\n", obd->obd_name, refs);
+
+        if ((refs == 1) && obd->obd_stopping) {
+                /* All exports (other than the self-export) have been 
+                   destroyed; there should be no more in-progress ops
+                   by this point.*/
+                /* if we're not stopping, we didn't finish setup */
+                /* Precleanup stage 2,  do other type-specific
+                   cleanup requiring the self-export. */
+                err = obd_precleanup(obd, 2);
+                if (err)
+                        CERROR("Precleanup %s returned %d\n",
+                               obd->obd_name, err);
+                obd->obd_self_export->exp_flags |= 
+                        (obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
+                        (obd->obd_force ? OBD_OPT_FORCE : 0);
+                /* note that we'll recurse into class_decref again */
+                class_unlink_export(obd->obd_self_export);
+                return;
+        }
+
+        if (refs == 0) {
+                CDEBUG(D_CONFIG, "finishing cleanup of obd %s (%s)\n",
                        obd->obd_name, obd->obd_uuid.uuid);
                 LASSERT(!obd->obd_attached);
                 if (obd->obd_stopping) {
-                        /* If we're not stopping, we never set up */
+                        /* If we're not stopping, we were never set up */
                         err = obd_cleanup(obd);
                         if (err)
-                                CERROR("Cleanup returned %d\n", err);
+                                CERROR("Cleanup %s returned %d\n",
+                                       obd->obd_name, err);
                 }
                 err = __class_detach(obd);
                 if (err)
index 79ba6ed..ae39b15 100644 (file)
@@ -1373,10 +1373,19 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf)
         return rc;
 }
 
-static int filter_precleanup(struct obd_device *obd)
+static int filter_precleanup(struct obd_device *obd, int stage)
 {
-        target_cleanup_recovery(obd);
-        return (0);
+        int rc = 0;
+        ENTRY;
+
+        switch(stage) {
+        case 1:                                                         
+                target_cleanup_recovery(obd);
+                break;
+        case 2:                                 
+                rc = obd_llog_finish(obd, 0);
+        }
+        RETURN(rc);
 }
 
 static int filter_cleanup(struct obd_device *obd)
@@ -1430,8 +1439,6 @@ static int filter_cleanup(struct obd_device *obd)
                 must_relock++;
         }
         
-        obd_llog_finish(obd, 0);
-
         mntput(filter->fo_vfsmnt);
         //destroy_buffers(filter->fo_sb->s_dev);
         filter->fo_sb = NULL;
index ededb7e..27d1e41 100644 (file)
@@ -70,6 +70,8 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
         }
 
         oscc = req->rq_async_args.pointer_arg[0];
+        LASSERT(oscc && (oscc->oscc_obd != LP_POISON));
+        
         spin_lock(&oscc->oscc_lock);
         oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
         if (rc == -ENOSPC || rc == -EROFS) {
index 0e21a8c..f3756cc 100644 (file)
@@ -3018,6 +3018,8 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
         char *bufs[1] = {key};
         ENTRY;
 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10);
+
         if (keylen == strlen("next_id") &&
             memcmp(key, "next_id", strlen("next_id")) == 0) {
                 if (vallen != sizeof(obd_id))
@@ -3239,6 +3241,21 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf)
         RETURN(rc);
 }
 
+static int osc_precleanup(struct obd_device *obd, int stage)
+{
+        int rc = 0;
+        ENTRY;
+
+        if (stage < 2) 
+                RETURN(0);
+
+        rc = obd_llog_finish(obd, 0);
+        if (rc != 0)
+                CERROR("failed to cleanup llogging subsystems\n");
+
+        RETURN(rc);
+}
+
 int osc_cleanup(struct obd_device *obd)
 {
         struct osc_creator *oscc = &obd->u.cli.cl_oscc;
@@ -3258,7 +3275,6 @@ int osc_cleanup(struct obd_device *obd)
 
         rc = client_obd_cleanup(obd);
         ptlrpcd_decref();
-        obd_llog_finish(obd, 0);
         RETURN(rc);
 }
 
@@ -3266,6 +3282,7 @@ int osc_cleanup(struct obd_device *obd)
 struct obd_ops osc_obd_ops = {
         .o_owner                = THIS_MODULE,
         .o_setup                = osc_setup,
+        .o_precleanup           = osc_precleanup,
         .o_cleanup              = osc_cleanup,
         .o_add_conn             = client_import_add_conn,
         .o_del_conn             = client_import_del_conn,
index 96901f1..5044fe5 100644 (file)
@@ -189,6 +189,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode,
         ENTRY;
 
         LASSERT((unsigned long)imp > 0x1000);
+        LASSERT(imp != LP_POISON);
 
         OBD_ALLOC(request, sizeof(*request));
         if (!request) {
index 732ee37..35254bd 100644 (file)
@@ -273,7 +273,7 @@ static int import_select_connection(struct obd_import *imp)
 
         /* if not found, simply choose the current one */
         if (!found) {
-                CWARN("%s: continuing with current connection\n",
+                CDEBUG(D_NET, "%s: continuing with current connection\n",
                       imp->imp_obd->obd_name);
                 LASSERT(imp->imp_conn_current);
                 imp_conn = imp->imp_conn_current;
@@ -305,7 +305,7 @@ static int import_select_connection(struct obd_import *imp)
         class_export_put(dlmexp);
 
         imp->imp_conn_current = imp_conn;
-        CDEBUG(D_HA, "%s: import %p using connection %s\n",
+        CDEBUG(D_NET, "%s: import %p using connection %s\n",
                imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid);
         spin_unlock(&imp->imp_lock);
 
index 26ad632..165f36a 100644 (file)
@@ -47,7 +47,7 @@ int ptlrpc_ping(struct obd_import *imp)
         req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
                               NULL);
         if (req) {
-                DEBUG_REQ(D_HA, req, "pinging %s->%s",
+                DEBUG_REQ(D_INFO, req, "pinging %s->%s",
                           imp->imp_obd->obd_uuid.uuid,
                           imp->imp_target_uuid.uuid);
                 req->rq_no_resend = req->rq_no_delay = 1;
@@ -149,7 +149,7 @@ static int ptlrpc_pinger_main(void *arg)
                         } else {
                                 if (!imp->imp_pingable)
                                         continue;
-                                CDEBUG(D_HA,
+                                CDEBUG(D_INFO,
                                        "don't need to ping %s (%lu > %lu)\n",
                                        imp->imp_target_uuid.uuid,
                                        imp->imp_next_ping, this_ping);
@@ -170,7 +170,7 @@ static int ptlrpc_pinger_main(void *arg)
                    next ping time to next_ping + .01 sec, which means
                    we will SKIP the next ping at next_ping, and the
                    ping will get sent 2 timeouts from now!  Beware. */
-                CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
+                CDEBUG(D_INFO, "next ping in %lu (%lu)\n", time_to_next_ping,
                        this_ping + PING_INTERVAL * HZ);
                 if (time_to_next_ping > 0) {
                         lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ),
index 6d0f118..19aea6c 100644 (file)
@@ -340,6 +340,13 @@ static int log_commit_thread(void *arg)
                         }
                         up(&llcd->llcd_ctxt->loc_sem);
 
+                        if (!import || (import == LP_POISON)) {
+                                CERROR("No import %p (llcd=%p, ctxt=%p)\n",
+                                       import, llcd, llcd->llcd_ctxt);
+                                llcd_put(llcd);
+                                continue;
+                        }
+
                         request = ptlrpc_prep_req(import, OBD_LOG_CANCEL, 1,
                                                   &llcd->llcd_cookiebytes,
                                                   bufs);
index fb740b3..cb9a89b 100755 (executable)
@@ -421,6 +421,31 @@ test_26() {      # bug 5921 - evict dead exports
 }
 run_test 26 "evict dead exports"
 
+test_27() {
+       [ "`lsmod | grep mds`" ] || \
+           { echo "skipping test 27 (non-local MDS)" && return 0; }
+       mkdir -p $DIR/$tdir
+       writemany -q -a $DIR/$tdir/$tfile 0 5 &
+       CLIENT_PID=$!
+       sleep 1
+       FAILURE_MODE="SOFT"
+       facet_failover mds
+#define OBD_FAIL_OSC_SHUTDOWN            0x407
+       sysctl -w lustre.fail_loc=0x80000407
+       # need to wait for reconnect
+       echo -n waiting for fail_loc
+       while [ `sysctl -n lustre.fail_loc` -eq -2147482617 ]; do
+           sleep 1
+           echo -n .
+       done
+       facet_failover mds
+       #no crashes allowed!
+        kill -USR1 $CLIENT_PID
+       wait $CLIENT_PID 
+       true
+}
+run_test 27 "fail LOV while using OSC's"
+
 test_28() {      # bug 6086 - error adding new clients
        do_facet client mcreate $MOUNT/$tfile        || return 1
        drop_bl_callback "chmod 0777 $MOUNT/$tfile"  || return 2
@@ -433,10 +458,10 @@ test_28() {      # bug 6086 - error adding new clients
 }
 run_test 28 "handle error adding new clients (bug 6086)"
 
-test_50() {     # bug 4834 - failover under load failures
+test_50() {
        mkdir -p $DIR/$tdir
-       # put a load of file creates/writes/deletes for 10 min.
-       do_facet client "writemany -q -a $DIR/$tdir/$tfile 600 5" &
+       # put a load of file creates/writes/deletes
+       writemany -q $DIR/$tdir/$tfile 0 5 &
        CLIENT_PID=$!
        echo writemany pid $CLIENT_PID
        sleep 10
@@ -448,9 +473,12 @@ test_50() {     # bug 4834 - failover under load failures
        sleep 60
        fail mds
        # client process should see no problems even though MDS went down
+       sleep $TIMEOUT
+        kill -USR1 $CLIENT_PID
        wait $CLIENT_PID 
        rc=$?
        echo writemany returned $rc
+       #these may fail because of eviction due to slow AST response.
        return $rc
 }
 run_test 50 "failover MDS under load"
@@ -458,23 +486,24 @@ run_test 50 "failover MDS under load"
 test_51() {
        mkdir -p $DIR/$tdir
        # put a load of file creates/writes/deletes
-       do_facet client "writemany -q -a $DIR/$tdir/$tfile 300 5" &
+       writemany -q $DIR/$tdir/$tfile 0 5 &
        CLIENT_PID=$!
-       echo writemany pid $CLIENT_PID
        sleep 1
        FAILURE_MODE="SOFT"
        facet_failover mds
        # failover at various points during recovery
-       sleep 1
-       facet_failover mds
-       sleep 5
-       facet_failover mds
-       sleep 10
-       facet_failover mds
-       sleep 20
-       facet_failover mds
+       SEQ="1 5 10 $(seq $TIMEOUT 5 $(($TIMEOUT+10)))"
+        echo will failover at $SEQ
+        for i in $SEQ
+          do
+          echo failover in $i sec
+          sleep $i
+          facet_failover mds
+        done
        # client process should see no problems even though MDS went down
        # and recovery was interrupted
+       sleep $TIMEOUT
+        kill -USR1 $CLIENT_PID
        wait $CLIENT_PID 
        rc=$?
        echo writemany returned $rc
@@ -483,7 +512,7 @@ test_51() {
 run_test 51 "failover MDS during recovery"
 
 test_52_guts() {
-       do_facet client "writemany -q $DIR/$tdir/$tfile 600 5" &
+       do_facet client "writemany -q -a $DIR/$tdir/$tfile 0 5" &
        CLIENT_PID=$!
        echo writemany pid $CLIENT_PID
        sleep 10
@@ -513,7 +542,7 @@ test_52() {
        test_52_guts
        rc=$?
        client_reconnect
-       return $rc
+       #return $rc
 }
 run_test 52 "failover OST under load"
 
index f31295a..75f8765 100755 (executable)
@@ -130,5 +130,6 @@ if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
 fi
 
 if [ "$I_MOUNTED" = "yes" ]; then
+       sync && sleep 2 && sync     # wait for delete thread
        sh llmountcleanup.sh || exit 29
 fi
index 3473393..15a7292 100644 (file)
@@ -25,6 +25,7 @@ char cmdname[512];
 int o_abort = 0;
 int o_quiet = 0;
 
+
 struct kid_list_t {
         pid_t kid;
         struct kid_list_t *next;
@@ -49,6 +50,13 @@ void kill_kids(void)
         }
 }
 
+static int usr1_received;
+void usr1_handler(int unused)
+{
+        usr1_received = 1;
+        kill_kids();
+}
+
 int wait_for_threads(int live_threads)
 {
         int rc = 0;
@@ -75,7 +83,7 @@ int wait_for_threads(int live_threads)
                          * always returns 1 (OK).  See wait(2).
                          */
                         int err = WEXITSTATUS(status);
-                        if (err || WIFSIGNALED(status))
+                        if (err)
                                 fprintf(stderr,
                                         "%s: error: PID %d had rc=%d\n",
                                         cmdname, ret, err);
@@ -126,10 +134,15 @@ int run_one_child(char *file, int thread, int seconds)
         gettimeofday(&start, NULL);
 
         while(!rc) {
-                gettimeofday(&cur, NULL);
-                if (cur.tv_sec > (start.tv_sec + seconds))
+                if (usr1_received)
                         break;
 
+                gettimeofday(&cur, NULL);
+                if (seconds) {
+                        if (cur.tv_sec > (start.tv_sec + seconds))
+                                break;
+                }
+                
                 sprintf(filename, "%s-%d-%ld", file, thread, nfiles);
 
                 fd = open(filename, O_RDWR | O_CREAT, 0666);
@@ -226,6 +239,8 @@ int main(int argc, char *argv[])
                 exit(2);
         }
 
+        signal(SIGUSR1, usr1_handler);
+
         for (i = 1; i <= threads; i++) {
                 rc = fork();
                 if (rc < 0) {
index 9b2bc22..827edfd 100755 (executable)
@@ -1550,13 +1550,10 @@ class LOV(Module):
             lctl.lov_add_obd(self.name, self.uuid, target_uuid, index, gen)
 
     def cleanup(self):
-        for (osc, index, gen, active) in self.osclist:
-            target_uuid = osc.target_uuid
-            if is_prepared(osc.name):
-                lctl.lov_del_obd(self.name, self.uuid, target_uuid, index, gen)
-            osc.cleanup()
         if is_prepared(self.name):
             Module.cleanup(self)
+        for (osc, index, gen, active) in self.osclist:
+            osc.cleanup()
         if self.config_only:
             panic("Can't clean up config_only LOV ", self.name)