Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / ptlrpc / recov_thread.c
index 74aa72b..1f21db5 100644 (file)
@@ -4,20 +4,23 @@
  *  Copyright (C) 2003 Cluster File Systems, Inc.
  *   Author: Andreas Dilger <adilger@clusterfs.com>
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
  *
  * OST<->MDS recovery logging thread.
  *
 #endif
 
 #ifdef __KERNEL__
-#include <linux/fs.h>
+# include <libcfs/libcfs.h>
 #else
-# include <portals/list.h>
+# include <libcfs/list.h>
 # include <liblustre.h>
 #endif
 
-#include <linux/kp30.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
-#include <portals/types.h>
-#include <portals/list.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_commit_confd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lnet/types.h>
+#include <libcfs/list.h>
+#include <lustre_log.h>
 #include "ptlrpc_internal.h"
 
 #ifdef __KERNEL__
 
-static struct llog_commit_master lustre_lcm;
-static struct llog_commit_master *lcm = &lustre_lcm;
-
-/* Allocate new commit structs in case we do not have enough */
-static int llcd_alloc(void)
+/* Allocate new commit structs in case we do not have enough.
+ * Make the llcd size small enough that it fits into a single page when we
+ * are sending/receiving it. */
+static int llcd_alloc(struct llog_commit_master *lcm)
 {
         struct llog_canceld_ctxt *llcd;
-        int offset = offsetof(struct llog_canceld_ctxt, llcd_cookies);
+        int llcd_size;
 
-        OBD_ALLOC(llcd, PAGE_SIZE + offset);
+        /* payload of lustre_msg V2 is bigger */
+        llcd_size = 4096 - lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, NULL);
+        OBD_ALLOC(llcd,
+                  llcd_size + offsetof(struct llog_canceld_ctxt, llcd_cookies));
         if (llcd == NULL)
                 return -ENOMEM;
 
+        llcd->llcd_size = llcd_size;
         llcd->llcd_lcm = lcm;
 
         spin_lock(&lcm->lcm_llcd_lock);
@@ -76,18 +81,20 @@ static int llcd_alloc(void)
 }
 
 /* Get a free cookie struct from the list */
-struct llog_canceld_ctxt *llcd_grab(void)
+static struct llog_canceld_ctxt *llcd_grab(struct llog_commit_master *lcm)
 {
         struct llog_canceld_ctxt *llcd;
 
+repeat:
         spin_lock(&lcm->lcm_llcd_lock);
         if (list_empty(&lcm->lcm_llcd_free)) {
                 spin_unlock(&lcm->lcm_llcd_lock);
-                if (llcd_alloc() < 0) {
+                if (llcd_alloc(lcm) < 0) {
                         CERROR("unable to allocate log commit data!\n");
                         return NULL;
                 }
-                spin_lock(&lcm->lcm_llcd_lock);
+                /* check new llcd wasn't grabbed while lock dropped, b=7407 */
+                goto repeat;
         }
 
         llcd = list_entry(lcm->lcm_llcd_free.next, typeof(*llcd), llcd_list);
@@ -95,19 +102,20 @@ struct llog_canceld_ctxt *llcd_grab(void)
         atomic_dec(&lcm->lcm_llcd_numfree);
         spin_unlock(&lcm->lcm_llcd_lock);
 
-        llcd->llcd_tries = 0;
         llcd->llcd_cookiebytes = 0;
 
         return llcd;
 }
-EXPORT_SYMBOL(llcd_grab);
 
 static void llcd_put(struct llog_canceld_ctxt *llcd)
 {
-        int offset = offsetof(struct llog_canceld_ctxt, llcd_cookies);
+        struct llog_commit_master *lcm = llcd->llcd_lcm;
 
+        llog_ctxt_put(llcd->llcd_ctxt);
         if (atomic_read(&lcm->lcm_llcd_numfree) >= lcm->lcm_llcd_maxfree) {
-                OBD_FREE(llcd, PAGE_SIZE + offset);
+                int llcd_size = llcd->llcd_size +
+                         offsetof(struct llog_canceld_ctxt, llcd_cookies);
+                OBD_FREE(llcd, llcd_size);
         } else {
                 spin_lock(&lcm->lcm_llcd_lock);
                 list_add(&llcd->llcd_list, &lcm->lcm_llcd_free);
@@ -117,19 +125,55 @@ static void llcd_put(struct llog_canceld_ctxt *llcd)
 }
 
 /* Send some cookies to the appropriate target */
-void llcd_send(struct llog_canceld_ctxt *llcd)
+static void llcd_send(struct llog_canceld_ctxt *llcd)
+{
+        if (!(llcd->llcd_lcm->lcm_flags & LLOG_LCM_FL_EXIT)) {
+                spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
+                list_add_tail(&llcd->llcd_list,
+                              &llcd->llcd_lcm->lcm_llcd_pending);
+                spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
+        }
+        cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1);
+}
+
+/**
+ * Grab llcd and assign it to passed @ctxt. Also set up backward link
+ * and get ref on @ctxt.
+ */
+static struct llog_canceld_ctxt *ctxt_llcd_grab(struct llog_ctxt *ctxt)
 {
-        spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
-        list_add_tail(&llcd->llcd_list, &llcd->llcd_lcm->lcm_llcd_pending);
-        spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
+        struct llog_canceld_ctxt *llcd;
+
+        LASSERT_SEM_LOCKED(&ctxt->loc_sem);
+        llcd = llcd_grab(ctxt->loc_lcm);
+        if (llcd == NULL)
+                return NULL;
+
+        llcd->llcd_ctxt = llog_ctxt_get(ctxt);
+        ctxt->loc_llcd = llcd;
 
-        wake_up_nr(&llcd->llcd_lcm->lcm_waitq, 1);
+        CDEBUG(D_RPCTRACE,"grab llcd %p:%p\n", ctxt->loc_llcd, ctxt);
+        return llcd;
+}
+
+/**
+ * Put llcd in passed @ctxt. Set ->loc_llcd to NULL.
+ */
+static void ctxt_llcd_put(struct llog_ctxt *ctxt)
+{
+        mutex_down(&ctxt->loc_sem);
+        if (ctxt->loc_llcd != NULL) {
+                CDEBUG(D_RPCTRACE,"put llcd %p:%p\n", ctxt->loc_llcd, ctxt);
+                llcd_put(ctxt->loc_llcd);
+                ctxt->loc_llcd = NULL;
+        }
+        ctxt->loc_imp = NULL;
+        mutex_up(&ctxt->loc_sem);
 }
-EXPORT_SYMBOL(llcd_send);
 
 /* deleted objects have a commit callback that cancels the MDS
- * log record for the deletion.  The commit callback calls this 
- * function 
+ * log record for the deletion.  The commit callback calls this
+ * function
  */
 int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
                          struct lov_stripe_md *lsm, int count,
@@ -141,43 +185,43 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
 
         LASSERT(ctxt);
 
-        if (count == 0 || cookies == NULL) {
-                down(&ctxt->loc_sem);
-                if (ctxt->loc_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
-                        GOTO(out, rc);
+        mutex_down(&ctxt->loc_sem);
+        llcd = ctxt->loc_llcd;
 
-                llcd = ctxt->loc_llcd;
-                GOTO(send_now, rc);
+        if (ctxt->loc_imp == NULL) {
+                CDEBUG(D_RPCTRACE, "no import for ctxt %p\n", ctxt);
+                GOTO(out, rc = 0);
         }
 
-        down(&ctxt->loc_sem);
-        llcd = ctxt->loc_llcd;
-        if (llcd == NULL) {
-                llcd = llcd_grab();
+        if (count > 0 && cookies != NULL) {
                 if (llcd == NULL) {
-                        CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
-                               cookies->lgc_lgl.lgl_oid,
-                               cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
-                        GOTO(out, rc = -ENOMEM);
+                        llcd = ctxt_llcd_grab(ctxt);
+                        if (llcd == NULL) {
+                                CERROR("couldn't get an llcd - dropped "LPX64
+                                       ":%x+%u\n",
+                                       cookies->lgc_lgl.lgl_oid,
+                                       cookies->lgc_lgl.lgl_ogen, 
+                                       cookies->lgc_index);
+                                GOTO(out, rc = -ENOMEM);
+                        }
                 }
-                llcd->llcd_import = ctxt->loc_imp;
-                llcd->llcd_gen = ctxt->loc_gen;
-                ctxt->loc_llcd = llcd;
-        }
 
-        memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
-               sizeof(*cookies));
-        llcd->llcd_cookiebytes += sizeof(*cookies);
+                memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, 
+                       cookies, sizeof(*cookies));
+                llcd->llcd_cookiebytes += sizeof(*cookies);
+        } else {
+                if (llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+                        GOTO(out, rc);
+        }
 
-send_now:
-        if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
-             flags & OBD_LLOG_FL_SENDNOW)) {
-                CDEBUG(D_HA, "send llcd: %p\n", llcd);
+        if ((llcd->llcd_size - llcd->llcd_cookiebytes) < sizeof(*cookies) ||
+            (flags & OBD_LLOG_FL_SENDNOW)) {
+                CDEBUG(D_RPCTRACE, "send llcd %p:%p\n", llcd, llcd->llcd_ctxt);
                 ctxt->loc_llcd = NULL;
                 llcd_send(llcd);
         }
 out:
-        up(&ctxt->loc_sem);
+        mutex_up(&ctxt->loc_sem);
         return rc;
 }
 EXPORT_SYMBOL(llog_obd_repl_cancel);
@@ -187,65 +231,74 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
         int rc = 0;
         ENTRY;
 
-        LASSERT(ctxt->loc_llcd);
-
         if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
-                CWARN("import will be destroyed, put llcd %p\n", 
-                      ctxt->loc_llcd);
-                llcd_put(ctxt->loc_llcd);
-                ctxt->loc_llcd = NULL;
-                up(&ctxt->loc_sem);
+                CDEBUG(D_RPCTRACE,"reverse import disconnect\n");
+                /* 
+                 * We put llcd because it is not going to sending list and
+                 * thus, its refc will not be handled. We will handle it here.
+                 */
+                ctxt_llcd_put(ctxt);
         } else {
-                up(&ctxt->loc_sem);
+                /* 
+                 * Sending cancel. This means that ctxt->loc_llcd wil be
+                 * put on sending list in llog_obd_repl_cancel() and in
+                 * this case recovery thread will take care of it refc.
+                 */
                 rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
         }
-
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_obd_repl_sync);
 
+static inline void stop_log_commit(struct llog_commit_master *lcm,
+                                   struct llog_commit_daemon *lcd,
+                                   int rc)
+{
+        CERROR("error preparing commit: rc %d\n", rc);
+
+        spin_lock(&lcm->lcm_llcd_lock);
+        list_splice_init(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend);
+        spin_unlock(&lcm->lcm_llcd_lock);
+}
+
 static int log_commit_thread(void *arg)
 {
         struct llog_commit_master *lcm = arg;
         struct llog_commit_daemon *lcd;
         struct llog_canceld_ctxt *llcd, *n;
-        unsigned long flags;
+        struct obd_import *import = NULL;
         ENTRY;
 
         OBD_ALLOC(lcd, sizeof(*lcd));
         if (lcd == NULL)
                 RETURN(-ENOMEM);
 
-        lock_kernel();
-        ptlrpc_daemonize(); /* thread never needs to do IO */
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
         spin_lock(&lcm->lcm_thread_lock);
-        THREAD_NAME(current->comm, "ll_log_commit_%d",
-                    atomic_read(&lcm->lcm_thread_total));
+        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1,
+                    "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total));
         atomic_inc(&lcm->lcm_thread_total);
         spin_unlock(&lcm->lcm_thread_lock);
-        unlock_kernel();
 
-        INIT_LIST_HEAD(&lcd->lcd_lcm_list);
-        INIT_LIST_HEAD(&lcd->lcd_llcd_list);
+        ptlrpc_daemonize(cfs_curproc_comm()); /* thread never needs to do IO */
+
+        CFS_INIT_LIST_HEAD(&lcd->lcd_lcm_list);
+        CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list);
         lcd->lcd_lcm = lcm;
 
-        CDEBUG(D_HA, "%s started\n", current->comm);
+        CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
         do {
                 struct ptlrpc_request *request;
-                struct obd_import *import = NULL;
                 struct list_head *sending_list;
                 int rc = 0;
 
+                if (import)
+                        class_import_put(import);
+                import = NULL;
+
                 /* If we do not have enough pages available, allocate some */
                 while (atomic_read(&lcm->lcm_llcd_numfree) <
                        lcm->lcm_llcd_minfree) {
-                        if (llcd_alloc() < 0)
+                        if (llcd_alloc(lcm) < 0)
                                 break;
                 }
 
@@ -267,6 +320,9 @@ static int log_commit_thread(void *arg)
 
                 sending_list = &lcm->lcm_llcd_pending;
         resend:
+                if (import)
+                        class_import_put(import);
+                import = NULL;
                 if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) {
                         lcm->lcm_llcd_maxfree = 0;
                         lcm->lcm_llcd_minfree = 0;
@@ -279,7 +335,7 @@ static int log_commit_thread(void *arg)
 
                 if (atomic_read(&lcm->lcm_thread_numidle) <= 1 &&
                     atomic_read(&lcm->lcm_thread_total) < lcm->lcm_thread_max) {
-                        rc = llog_start_commit_thread();
+                        rc = llog_start_commit_thread(lcm);
                         if (rc < 0)
                                 CERROR("error starting thread: rc %d\n", rc);
                 }
@@ -294,11 +350,13 @@ static int log_commit_thread(void *arg)
                         llcd = list_entry(lcd->lcd_llcd_list.next,
                                           typeof(*llcd), llcd_list);
                         LASSERT(llcd->llcd_lcm == lcm);
-                        import = llcd->llcd_import;
+                        import = llcd->llcd_ctxt->loc_imp;
+                        if (import)
+                                class_import_get(import);
                 }
                 list_for_each_entry_safe(llcd, n, sending_list, llcd_list) {
                         LASSERT(llcd->llcd_lcm == lcm);
-                        if (import == llcd->llcd_import)
+                        if (import == llcd->llcd_ctxt->loc_imp)
                                 list_move_tail(&llcd->llcd_list,
                                                &lcd->lcd_llcd_list);
                 }
@@ -306,7 +364,7 @@ static int log_commit_thread(void *arg)
                         list_for_each_entry_safe(llcd, n, &lcm->lcm_llcd_resend,
                                                  llcd_list) {
                                 LASSERT(llcd->llcd_lcm == lcm);
-                                if (import == llcd->llcd_import)
+                                if (import == llcd->llcd_ctxt->loc_imp)
                                         list_move_tail(&llcd->llcd_list,
                                                        &lcd->lcd_llcd_list);
                         }
@@ -315,45 +373,71 @@ static int log_commit_thread(void *arg)
 
                 /* We are the only one manipulating our local list - no lock */
                 list_for_each_entry_safe(llcd,n, &lcd->lcd_llcd_list,llcd_list){
-                        char *bufs[1] = {(char *)llcd->llcd_cookies};
-                        struct obd_device *obd = import->imp_obd;
-                        struct llog_ctxt *ctxt;
+                        char *bufs[2] = { NULL, (char *)llcd->llcd_cookies };
 
                         list_del(&llcd->llcd_list);
                         if (llcd->llcd_cookiebytes == 0) {
-                                CDEBUG(D_HA, "just put empty llcd %p\n", llcd);
+                                CDEBUG(D_RPCTRACE, "put empty llcd %p:%p\n",
+                                       llcd, llcd->llcd_ctxt);
                                 llcd_put(llcd);
                                 continue;
                         }
-                        /* check whether the cookies are new. if new then send, otherwise
-                         * just put llcd */
-                        ctxt = llog_get_context(obd, llcd->llcd_cookies[0].lgc_subsys + 1);
-                        LASSERT(ctxt != NULL);
-                        down(&ctxt->loc_sem);
-                        if (log_gen_lt(llcd->llcd_gen, ctxt->loc_gen)) {
-                                up(&ctxt->loc_sem); 
-                                CDEBUG(D_HA, "just put stale llcd %p\n", llcd);
+
+                        mutex_down(&llcd->llcd_ctxt->loc_sem);
+                        if (llcd->llcd_ctxt->loc_imp == NULL) {
+                                mutex_up(&llcd->llcd_ctxt->loc_sem);
+                                CWARN("import will be destroyed, put "
+                                      "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
                                 llcd_put(llcd);
                                 continue;
                         }
-                        up(&ctxt->loc_sem); 
+                        mutex_up(&llcd->llcd_ctxt->loc_sem);
 
-                        request = ptlrpc_prep_req(import, OBD_LOG_CANCEL, 1,
-                                                  &llcd->llcd_cookiebytes,
-                                                  bufs);
+                        if (!import || (import == LP_POISON) ||
+                            (import->imp_client == LP_POISON)) {
+                                CERROR("No import %p (llcd=%p, ctxt=%p)\n",
+                                       import, llcd, llcd->llcd_ctxt);
+                                llcd_put(llcd);
+                                continue;
+                        }
+
+                        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
+
+                        request = ptlrpc_request_alloc(import, &RQF_LOG_CANCEL);
                         if (request == NULL) {
                                 rc = -ENOMEM;
-                                CERROR("error preparing commit: rc %d\n", rc);
+                                stop_log_commit(lcm, lcd, rc);
+                                break;
+                        }
 
-                                spin_lock(&lcm->lcm_llcd_lock);
-                                list_splice(&lcd->lcd_llcd_list,
-                                            &lcm->lcm_llcd_resend);
-                                INIT_LIST_HEAD(&lcd->lcd_llcd_list);
-                                spin_unlock(&lcm->lcm_llcd_lock);
+                        req_capsule_set_size(&request->rq_pill, &RMF_LOGCOOKIES,
+                                             RCL_CLIENT,llcd->llcd_cookiebytes);
+
+                        rc = ptlrpc_request_bufs_pack(request,
+                                                      LUSTRE_LOG_VERSION,
+                                                      OBD_LOG_CANCEL, bufs,
+                                                      NULL);
+                        if (rc) {
+                                ptlrpc_request_free(request);
+                                stop_log_commit(lcm, lcd, rc);
                                 break;
                         }
 
-                        request->rq_replen = lustre_msg_size(0, NULL);
+                        /* XXX FIXME bug 249, 5515 */
+                        request->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
+                        request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
+
+                        ptlrpc_request_set_replen(request);
+                        mutex_down(&llcd->llcd_ctxt->loc_sem);
+                        if (llcd->llcd_ctxt->loc_imp == NULL) {
+                                mutex_up(&llcd->llcd_ctxt->loc_sem);
+                                CWARN("import will be destroyed, put "
+                                      "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
+                                llcd_put(llcd);
+                                ptlrpc_req_finished(request);
+                                continue;
+                        }
+                        mutex_up(&llcd->llcd_ctxt->loc_sem);
                         rc = ptlrpc_queue_wait(request);
                         ptlrpc_req_finished(request);
 
@@ -364,26 +448,11 @@ static int log_commit_thread(void *arg)
                                 continue;
                         }
 
-#if 0                   /* FIXME just put llcd, not send it again */
-                        spin_lock(&lcm->lcm_llcd_lock);
-                        list_splice(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend);
-                        if (++llcd->llcd_tries < 5) {
-                                CERROR("commit %p failed on attempt %d: rc %d\n",
-                                       llcd, llcd->llcd_tries, rc);
-
-                                list_add_tail(&llcd->llcd_list,
-                                              &lcm->lcm_llcd_resend);
-                                spin_unlock(&lcm->lcm_llcd_lock);
-                        } else {
-                                spin_unlock(&lcm->lcm_llcd_lock);
-#endif
-                                CERROR("commit %p dropped %d cookies: rc %d\n",
-                                       llcd, (int)(llcd->llcd_cookiebytes /
-                                                   sizeof(*llcd->llcd_cookies)),
-                                       rc);
-                                llcd_put(llcd);
-//                        }
-                        break;
+                        CERROR("commit %p:%p drop %d cookies: rc %d\n",
+                               llcd, llcd->llcd_ctxt,
+                               (int)(llcd->llcd_cookiebytes /
+                                     sizeof(*llcd->llcd_cookies)), rc);
+                        llcd_put(llcd);
                 }
 
                 if (rc == 0) {
@@ -393,12 +462,15 @@ static int log_commit_thread(void *arg)
                 }
         } while(1);
 
+        if (import)
+                class_import_put(import);
+
         /* If we are force exiting, just drop all of the cookies. */
         if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) {
                 spin_lock(&lcm->lcm_llcd_lock);
-                list_splice(&lcm->lcm_llcd_pending, &lcd->lcd_llcd_list);
-                list_splice(&lcm->lcm_llcd_resend, &lcd->lcd_llcd_list);
-                list_splice(&lcm->lcm_llcd_free, &lcd->lcd_llcd_list);
+                list_splice_init(&lcm->lcm_llcd_pending, &lcd->lcd_llcd_list);
+                list_splice_init(&lcm->lcm_llcd_resend, &lcd->lcd_llcd_list);
+                list_splice_init(&lcm->lcm_llcd_free, &lcd->lcd_llcd_list);
                 spin_unlock(&lcm->lcm_llcd_lock);
 
                 list_for_each_entry_safe(llcd, n, &lcd->lcd_llcd_list,llcd_list)
@@ -410,16 +482,17 @@ static int log_commit_thread(void *arg)
         spin_unlock(&lcm->lcm_thread_lock);
         OBD_FREE(lcd, sizeof(*lcd));
 
+        CDEBUG(D_HA, "%s exiting\n", cfs_curproc_comm());
+
         spin_lock(&lcm->lcm_thread_lock);
         atomic_dec(&lcm->lcm_thread_total);
         spin_unlock(&lcm->lcm_thread_lock);
-        wake_up(&lcm->lcm_waitq);
+        cfs_waitq_signal(&lcm->lcm_waitq);
 
-        CDEBUG(D_HA, "%s exiting\n", current->comm);
         return 0;
 }
 
-int llog_start_commit_thread(void)
+int llog_start_commit_thread(struct llog_commit_master *lcm)
 {
         int rc;
         ENTRY;
@@ -427,7 +500,7 @@ int llog_start_commit_thread(void)
         if (atomic_read(&lcm->lcm_thread_total) >= lcm->lcm_thread_max)
                 RETURN(0);
 
-        rc = kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 CERROR("error starting thread #%d: %d\n",
                        atomic_read(&lcm->lcm_thread_total), rc);
@@ -439,21 +512,22 @@ int llog_start_commit_thread(void)
 EXPORT_SYMBOL(llog_start_commit_thread);
 
 static struct llog_process_args {
-        struct semaphore         llpa_sem; 
+        struct semaphore         llpa_sem;
         struct llog_ctxt        *llpa_ctxt;
         void                    *llpa_cb;
         void                    *llpa_arg;
 } llpa;
-int llog_init_commit_master(void)
+
+int llog_init_commit_master(struct llog_commit_master *lcm)
 {
-        INIT_LIST_HEAD(&lcm->lcm_thread_busy);
-        INIT_LIST_HEAD(&lcm->lcm_thread_idle);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle);
         spin_lock_init(&lcm->lcm_thread_lock);
         atomic_set(&lcm->lcm_thread_numidle, 0);
-        init_waitqueue_head(&lcm->lcm_waitq);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_free);
+        cfs_waitq_init(&lcm->lcm_waitq);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_free);
         spin_lock_init(&lcm->lcm_llcd_lock);
         atomic_set(&lcm->lcm_llcd_numfree, 0);
         lcm->lcm_llcd_minfree = 0;
@@ -462,19 +536,21 @@ int llog_init_commit_master(void)
         sema_init(&llpa.llpa_sem, 1);
         return 0;
 }
+EXPORT_SYMBOL(llog_init_commit_master);
 
-int llog_cleanup_commit_master(int force)
+int llog_cleanup_commit_master(struct llog_commit_master *lcm,
+                               int force)
 {
         lcm->lcm_flags |= LLOG_LCM_FL_EXIT;
         if (force)
                 lcm->lcm_flags |= LLOG_LCM_FL_EXIT_FORCE;
-        wake_up(&lcm->lcm_waitq);
+        cfs_waitq_signal(&lcm->lcm_waitq);
 
         wait_event_interruptible(lcm->lcm_waitq,
                                  atomic_read(&lcm->lcm_thread_total) == 0);
         return 0;
 }
-
+EXPORT_SYMBOL(llog_cleanup_commit_master);
 
 static int log_process_thread(void *args)
 {
@@ -483,97 +559,102 @@ static int log_process_thread(void *args)
         void   *cb = data->llpa_cb;
         struct llog_logid logid = *(struct llog_logid *)(data->llpa_arg);
         struct llog_handle *llh = NULL;
-        unsigned long flags;
         int rc;
         ENTRY;
-                                                                                                                             
-        up(&data->llpa_sem);
-        lock_kernel();
-        ptlrpc_daemonize(); /* thread never needs to do IO */
-                                                                                                                             
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-        unlock_kernel();
-                                                                                                                             
+
+        mutex_up(&data->llpa_sem);
+        ptlrpc_daemonize("llog_process");     /* thread does IO to log files */
+
         rc = llog_create(ctxt, &llh, &logid, NULL);
         if (rc) {
                 CERROR("llog_create failed %d\n", rc);
-                RETURN(rc);
+                GOTO(out, rc);
         }
         rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
         if (rc) {
                 CERROR("llog_init_handle failed %d\n", rc);
-                GOTO(out, rc);
+                GOTO(release_llh, rc);
         }
-                                                                                                                             
+
         if (cb) {
                 rc = llog_cat_process(llh, (llog_cb_t)cb, NULL);
-                if (rc)
+                if (rc != LLOG_PROC_BREAK)
                         CERROR("llog_cat_process failed %d\n", rc);
-        } else
+        } else {
                 CWARN("no callback function for recovery\n");
+        }
 
-        CDEBUG(D_HA, "send to llcd :%p forcibly\n", ctxt->loc_llcd);
+        CDEBUG(D_HA, "send llcd %p:%p forcibly after recovery\n",
+               ctxt->loc_llcd, ctxt);
         llog_sync(ctxt, NULL);
-out:
+
+release_llh:
         rc = llog_cat_put(llh);
         if (rc)
                 CERROR("llog_cat_put failed %d\n", rc);
-                                                                                                                             
+out:
+        llog_ctxt_put(ctxt);
         RETURN(rc);
 }
-static int llog_recovery_generic(struct llog_ctxt *ctxt,
-                                 void *handle,
-                                 void *arg)
+
+static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg)
 {
+        struct obd_device *obd = ctxt->loc_obd;
         int rc;
         ENTRY;
 
-        down(&llpa.llpa_sem);
-        llpa.llpa_ctxt = ctxt;
+        if (obd->obd_stopping)
+                RETURN(-ENODEV);
+
+        mutex_down(&llpa.llpa_sem);
         llpa.llpa_cb = handle;
         llpa.llpa_arg = arg;
-
-        rc = kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
-        if (rc < 0)
+        llpa.llpa_ctxt = llog_ctxt_get(ctxt);
+        if (!llpa.llpa_ctxt) {
+                up(&llpa.llpa_sem);
+                RETURN(-ENODEV);
+        }
+        rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
+        if (rc < 0) {
+                llog_ctxt_put(ctxt);
                 CERROR("error starting log_process_thread: %d\n", rc);
-        else {
+        else {
                 CDEBUG(D_HA, "log_process_thread: %d\n", rc);
                 rc = 0;
         }
 
         RETURN(rc);
 }
+
 int llog_repl_connect(struct llog_ctxt *ctxt, int count,
-                      struct llog_logid *logid, struct llog_ctxt_gen *gen)
+                      struct llog_logid *logid, struct llog_gen *gen,
+                      struct obd_uuid *uuid)
 {
         struct llog_canceld_ctxt *llcd;
         int rc;
         ENTRY;
-                                                                                                                             
-        down(&ctxt->loc_sem);
-        ctxt->loc_gen = *gen;
-        llcd = ctxt->loc_llcd;
-        if (llcd) {
-                CDEBUG(D_HA, "put current llcd when new connection arrives\n");
-                llcd_put(llcd);
+
+        /* send back llcd before recovery from llog */
+        if (ctxt->loc_llcd != NULL) {
+                CWARN("llcd %p:%p not empty\n", ctxt->loc_llcd, ctxt);
+                llog_sync(ctxt, NULL);
         }
-        llcd = llcd_grab();
+
+        mutex_down(&ctxt->loc_sem);
+        ctxt->loc_gen = *gen;
+        llcd = ctxt_llcd_grab(ctxt);
         if (llcd == NULL) {
                 CERROR("couldn't get an llcd\n");
+                mutex_up(&ctxt->loc_sem);
                 RETURN(-ENOMEM);
         }
-        llcd->llcd_import = ctxt->loc_imp;
-        llcd->llcd_gen = ctxt->loc_gen;
-        ctxt->loc_llcd = llcd;
-        up(&ctxt->loc_sem);
+        mutex_up(&ctxt->loc_sem);
 
-        rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid); 
-        if (rc != 0)
+        rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid);
+        if (rc != 0) {
+                ctxt_llcd_put(ctxt);
                 CERROR("error recovery process: %d\n", rc);
-
+        }
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_repl_connect);