Whamcloud - gitweb
timeout and assert in invalidate import.
authorshadow <shadow>
Wed, 11 Jun 2008 06:48:53 +0000 (06:48 +0000)
committershadow <shadow>
Wed, 11 Jun 2008 06:48:53 +0000 (06:48 +0000)
b=15716
i=rread
i=johann

lustre/ChangeLog
lustre/include/linux/obd_class.h
lustre/include/obd_class.h
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c
lustre/ptlrpc/import.c
lustre/ptlrpc/ptlrpcd.c

index 04dee50..cd60758 100644 (file)
@@ -12,6 +12,13 @@ tbd  Sun Microsystems, Inc.
        * RHEL 4 and RHEL 5/SLES 10 clients behaves differently on 'cd' to a
         removed cwd "./" (refer to Bugzilla 14399).
 
+Severity   : minor
+Bugzilla   : 15716
+Description: timeout with invalidate import.
+Details    : ptlrpcd_check call obd_zombie_impexp_cull and wait request which should be
+             handled by ptlrpcd. This produce long age waiting and -ETIMEOUT
+             ptlrpc_invalidate_import and as result LASSERT.
+
 Severity   : enhancement
 Bugzilla   : 15741
 Description: Update to RHEL5 kernel-2.6.18-53.1.19.el5.
@@ -73,7 +80,7 @@ Description: Obsolete CURRENT_SECONDS and use cfs_time_current_sec() instead.
 
 Severity   : minor
 Bugzilla   : 14645
- rare, on shutdown ost
+Frequency  : rare, on shutdown ost
 Description: don't hit live lock with umount ost.
 Details    : shrink_dcache_parent can be in long loop with destroy dentries,
             use shrink_dcache_sb instead.
index b074853..914e6d0 100644 (file)
@@ -65,10 +65,6 @@ int ldiskfsfs_dirhash(const char *name, int len, struct ldiskfs_dx_hash_info *hi
 # endif /* HAVE_SERVER_SUPPORT */
 #endif /* __KERNEL__ */
 
-void obd_zombie_impexp_init(void);
-void obd_zombie_impexp_cull(void);
-extern void (*obd_zombie_impexp_notify)(void);
-
 /* obdo.c */
 #ifdef __KERNEL__
 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
index c5d7f79..6fcbd52 100644 (file)
@@ -90,6 +90,10 @@ char *obd_export_nid2str(struct obd_export *exp);
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
 int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
 
+int obd_zombie_impexp_init(void);
+void obd_zombie_impexp_stop(void);
+void obd_zombie_impexp_cull(void);
+
 /* obd_config.c */
 int class_process_config(struct lustre_cfg *lcfg);
 int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
index 1c16400..9de3728 100644 (file)
@@ -634,6 +634,7 @@ static void cleanup_obdclass(void)
 
         class_handle_cleanup();
         class_exit_uuidlist();
+        obd_zombie_impexp_stop();
 
         memory_leaked = obd_memory_sum();
         pages_leaked = obd_pages_sum();
index e906d83..2bb7514 100644 (file)
@@ -45,9 +45,7 @@ cfs_mem_cache_t *import_cachep;
 struct list_head  obd_zombie_imports;
 struct list_head  obd_zombie_exports;
 spinlock_t        obd_zombie_impexp_lock;
-void            (*obd_zombie_impexp_notify)(void) = NULL;
-EXPORT_SYMBOL(obd_zombie_impexp_notify);
-
+static void obd_zombie_impexp_notify(void);
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
@@ -1349,12 +1347,15 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 }
 EXPORT_SYMBOL(obd_export_evict_by_uuid);
 
-void obd_zombie_impexp_cull(void) 
+/**
+ * kill zombie imports and exports
+ */
+void obd_zombie_impexp_cull(void)
 {
         struct obd_import *import;
         struct obd_export *export;
         ENTRY;
-        
+
         do {
                 spin_lock (&obd_zombie_impexp_lock);
 
@@ -1365,7 +1366,7 @@ void obd_zombie_impexp_cull(void)
                                             imp_zombie_chain);
                         list_del(&import->imp_zombie_chain);
                 }
-                
+
                 export = NULL;
                 if (!list_empty(&obd_zombie_exports)) {
                         export = list_entry(obd_zombie_exports.next,
@@ -1375,7 +1376,7 @@ void obd_zombie_impexp_cull(void)
                 }
 
                 spin_unlock(&obd_zombie_impexp_lock);
-                
+
                 if (import != NULL)
                         class_import_destroy(import);
 
@@ -1385,12 +1386,136 @@ void obd_zombie_impexp_cull(void)
         } while (import != NULL || export != NULL);
         EXIT;
 }
-EXPORT_SYMBOL(obd_zombie_impexp_cull);
 
-void obd_zombie_impexp_init(void)
+static struct completion        obd_zombie_start;
+static struct completion        obd_zombie_stop;
+static unsigned long            obd_zombie_flags;
+static cfs_waitq_t              obd_zombie_waitq;
+
+enum {
+        OBD_ZOMBIE_STOP = 1
+};
+
+/**
+ * check for work for kill zombie import/export thread.
+ */
+int obd_zombie_impexp_check(void *arg)
+{
+        int rc;
+
+        spin_lock(&obd_zombie_impexp_lock);
+        rc = list_empty(&obd_zombie_imports) &&
+             list_empty(&obd_zombie_exports) &&
+             !test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+
+        spin_unlock(&obd_zombie_impexp_lock);
+
+        RETURN(rc);
+}
+
+/**
+ * notify import/export destroy thread about new zombie.
+ */
+static void obd_zombie_impexp_notify(void)
+{
+        cfs_waitq_signal(&obd_zombie_waitq);
+}
+
+#ifdef __KERNEL__
+
+/**
+ * destroy zombie export/import thread.
+ */
+static int obd_zombie_impexp_thread(void *unused)
+{
+        int rc;
+
+        if ((rc = cfs_daemonize_ctxt("obd_zombid"))) {
+                complete(&obd_zombie_start);
+                RETURN(rc);
+        }
+
+        complete(&obd_zombie_start);
+
+        while(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)) {
+                struct l_wait_info lwi = { 0 };
+
+                l_wait_event(obd_zombie_waitq, !obd_zombie_impexp_check(NULL), &lwi);
+
+                obd_zombie_impexp_cull();
+        }
+
+        complete(&obd_zombie_stop);
+
+        RETURN(0);
+}
+
+#else /* ! KERNEL */
+
+static atomic_t zombie_recur = ATOMIC_INIT(0);
+static void *obd_zombie_impexp_work_cb;
+static void *obd_zombie_impexp_idle_cb;
+
+int obd_zombie_impexp_kill(void *arg)
+{
+        int rc = 0;
+
+       if (atomic_inc_return(&zombie_recur) == 1) {
+                obd_zombie_impexp_cull();
+                rc = 1;
+        }
+        atomic_dec(&zombie_recur);
+        return rc;
+}
+
+#endif
+
+/**
+ * start destroy zombie import/export thread
+ */
+int obd_zombie_impexp_init(void)
 {
+        int rc;
+
         CFS_INIT_LIST_HEAD(&obd_zombie_imports);
         CFS_INIT_LIST_HEAD(&obd_zombie_exports);
         spin_lock_init(&obd_zombie_impexp_lock);
+        init_completion(&obd_zombie_start);
+        init_completion(&obd_zombie_stop);
+        cfs_waitq_init(&obd_zombie_waitq);
+
+#ifdef __KERNEL__
+        rc = cfs_kernel_thread(obd_zombie_impexp_thread, NULL, 0);
+        if (rc < 0)
+                RETURN(rc);
+
+        wait_for_completion(&obd_zombie_start);
+#else
+
+        obd_zombie_impexp_work_cb =
+                liblustre_register_wait_callback("obd_zombi_impexp_kill",
+                                                 &obd_zombie_impexp_kill, NULL);
+
+        obd_zombie_impexp_idle_cb =
+                liblustre_register_idle_callback("obd_zombi_impexp_check",
+                                                 &obd_zombie_impexp_check, NULL);
+        rc = 0;
+
+#endif
+        RETURN(rc);
+}
+/**
+ * stop destroy zombie import/export thread
+ */
+void obd_zombie_impexp_stop(void)
+{
+        set_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags);
+        obd_zombie_impexp_notify();
+#ifdef __KERNEL__
+        wait_for_completion(&obd_zombie_stop);
+#else
+        liblustre_deregister_wait_callback(obd_zombie_impexp_work_cb);
+        liblustre_deregister_idle_callback(obd_zombie_impexp_idle_cb);
+#endif
 }
 
index 0dd3ab6..b2a15f0 100644 (file)
@@ -343,10 +343,10 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
                obd->obd_name, obd->obd_uuid.uuid);
 
         class_decref(obd);
-        
+
         /* not strictly necessary, but cleans up eagerly */
         obd_zombie_impexp_cull();
-        
+
         RETURN(0);
 }
 
index 2ea79c2..213f381 100644 (file)
@@ -245,6 +245,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                         DEBUG_REQ(D_ERROR, req, "still on delayed list");
                 }
                 spin_unlock(&imp->imp_lock);
+                LASSERT(atomic_read(&imp->imp_inflight) == 0);
         }
 
 out:
index b000473..b2c06be 100644 (file)
@@ -98,8 +98,6 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
         if (test_bit(LIOD_STOP, &pc->pc_flags))
                 RETURN(1);
 
-        obd_zombie_impexp_cull();
-
         spin_lock(&pc->pc_set->set_new_req_lock);
         list_for_each_safe(pos, tmp, &pc->pc_set->set_new_requests) {
                 req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
@@ -177,13 +175,6 @@ static int ptlrpcd(void *arg)
         return 0;
 }
 
-static void ptlrpcd_zombie_impexp_notify(void)
-{
-        LASSERT(ptlrpcd_pc.pc_set != NULL); // call before ptlrpcd inited ?
-
-        cfs_waitq_signal(&ptlrpcd_pc.pc_set->set_waitq);
-}
-
 #else
 
 int ptlrpcd_check_async_rpcs(void *arg)
@@ -234,9 +225,6 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
                 RETURN(-ENOMEM);
 
 #ifdef __KERNEL__
-        /* wake ptlrpcd when zombie imports or exports exist */
-        obd_zombie_impexp_notify = ptlrpcd_zombie_impexp_notify;
-        
         rc = cfs_kernel_thread(ptlrpcd, pc, 0);
         if (rc < 0)  {
                 ptlrpc_set_destroy(pc->pc_set);
@@ -261,7 +249,6 @@ static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
         set_bit(LIOD_STOP, &pc->pc_flags);
         cfs_waitq_signal(&pc->pc_set->set_waitq);
 #ifdef __KERNEL__
-        obd_zombie_impexp_notify = NULL;
         wait_for_completion(&pc->pc_finishing);
 #else
         liblustre_deregister_wait_callback(pc->pc_wait_callback);