Branch HEAD

[fs/lustre-release.git] / lustre / ptlrpc / recov_thread.c
diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c

index 5d002c8..45163a4 100644 (file)
--- a/lustre/ptlrpc/recov_thread.c
+++ b/lustre/ptlrpc/recov_thread.c
@@ -1,32 +1,48 @@
  /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   * vim:expandtab:shiftwidth=8:tabstop=8:
   *
- *  Copyright (C) 2003 Cluster File Systems, Inc.
- *   Author: Andreas Dilger <adilger@clusterfs.com>
+ * GPL HEADER START
   *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
   *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
   *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
   *
- * OST<->MDS recovery logging thread.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/ptlrpc/recov_thread.c
   *
+ * OST<->MDS recovery logging thread.
   * Invariants in implementation:
   * - we do not share logs among different OST<->MDS connections, so that
   *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger   <adilger@clusterfs.com>
+ *         Yury Umanets     <yury.umanets@sun.com>
+ *         Alexey Lyashkov  <alexey.lyashkov@sun.com>
   */
  
  #define DEBUG_SUBSYSTEM S_LOG
@@ -43,7 +59,6 @@
  #endif
  
  #include <obd_class.h>
-#include <lustre_commit_confd.h>
  #include <obd_support.h>
  #include <obd_class.h>
  #include <lustre_net.h>
@@ -52,613 +67,645 @@
  #include <lustre_log.h>
  #include "ptlrpc_internal.h"
  
+static atomic_t                   llcd_count = ATOMIC_INIT(0);
+static cfs_mem_cache_t           *llcd_cache = NULL;
+
  #ifdef __KERNEL__
+enum {
+        LLOG_LCM_FL_START       = 1 << 0,
+        LLOG_LCM_FL_EXIT        = 1 << 1
+};
  
-/* Allocate new commit structs in case we do not have enough.
- * Make the llcd size small enough that it fits into a single page when we
- * are sending/receiving it. */
-static int llcd_alloc(struct llog_commit_master *lcm)
+static void llcd_print(struct llog_canceld_ctxt *llcd,
+                       const char *func, int line)
  {
-        struct llog_canceld_ctxt *llcd;
-        int llcd_size;
-
-        /* payload of lustre_msg V2 is bigger */
-        llcd_size = 4096 - lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, NULL);
-        OBD_ALLOC(llcd,
-                  llcd_size + offsetof(struct llog_canceld_ctxt, llcd_cookies));
-        if (llcd == NULL)
-                return -ENOMEM;
-
-        llcd->llcd_size = llcd_size;
-        llcd->llcd_lcm = lcm;
-
-        spin_lock(&lcm->lcm_llcd_lock);
-        list_add(&llcd->llcd_list, &lcm->lcm_llcd_free);
-        atomic_inc(&lcm->lcm_llcd_numfree);
-        spin_unlock(&lcm->lcm_llcd_lock);
-
-        return 0;
+        CDEBUG(D_RPCTRACE, "Llcd (%p) at %s:%d:\n", llcd, func, line);
+        CDEBUG(D_RPCTRACE, "  size: %d\n", llcd->llcd_size);
+        CDEBUG(D_RPCTRACE, "  ctxt: %p\n", llcd->llcd_ctxt);
+        CDEBUG(D_RPCTRACE, "  lcm : %p\n", llcd->llcd_lcm);
+        CDEBUG(D_RPCTRACE, "  cookiebytes : %d\n", llcd->llcd_cookiebytes);
  }
  
-/* Get a free cookie struct from the list */
-static struct llog_canceld_ctxt *llcd_grab(struct llog_commit_master *lcm)
+/**
+ * Allocate new llcd from cache, init it and return to caller.
+ * Bumps number of objects allocated.
+ */
+static struct llog_canceld_ctxt *llcd_alloc(struct llog_commit_master *lcm)
  {
          struct llog_canceld_ctxt *llcd;
+        int size, overhead;
+
+        LASSERT(lcm != NULL);
+
+        /*
+         * We want to send one page of cookies with rpc header. This buffer
+         * will be assigned later to the rpc, this is why we preserve the
+         * space for rpc header.
+         */
+        size = CFS_PAGE_SIZE - lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, NULL);
+        overhead =  offsetof(struct llog_canceld_ctxt, llcd_cookies);
+        OBD_SLAB_ALLOC(llcd, llcd_cache, CFS_ALLOC_STD, size + overhead);
+        if (!llcd)
+                return NULL;
  
-repeat:
-        spin_lock(&lcm->lcm_llcd_lock);
-        if (list_empty(&lcm->lcm_llcd_free)) {
-                spin_unlock(&lcm->lcm_llcd_lock);
-                if (llcd_alloc(lcm) < 0) {
-                        CERROR("unable to allocate log commit data!\n");
-                        return NULL;
-                }
-                /* check new llcd wasn't grabbed while lock dropped, b=7407 */
-                goto repeat;
-        }
+        CFS_INIT_LIST_HEAD(&llcd->llcd_list);
+        llcd->llcd_cookiebytes = 0;
+        llcd->llcd_size = size;
  
-        llcd = list_entry(lcm->lcm_llcd_free.next, typeof(*llcd), llcd_list);
-        list_del(&llcd->llcd_list);
-        atomic_dec(&lcm->lcm_llcd_numfree);
-        spin_unlock(&lcm->lcm_llcd_lock);
+        spin_lock(&lcm->lcm_lock);
+        llcd->llcd_lcm = lcm;
+        atomic_inc(&lcm->lcm_count);
+        list_add_tail(&llcd->llcd_list, &lcm->lcm_llcds);
+        spin_unlock(&lcm->lcm_lock);
+        atomic_inc(&llcd_count);
  
-        llcd->llcd_cookiebytes = 0;
+        CDEBUG(D_RPCTRACE, "Alloc llcd %p on lcm %p (%d)\n",
+               llcd, lcm, atomic_read(&lcm->lcm_count));
  
          return llcd;
  }
  
-static void llcd_put(struct llog_canceld_ctxt *llcd)
+/**
+ * Returns passed llcd to cache.
+ */
+static void llcd_free(struct llog_canceld_ctxt *llcd)
  {
          struct llog_commit_master *lcm = llcd->llcd_lcm;
+        int size;
  
-        llog_ctxt_put(llcd->llcd_ctxt);
-        if (atomic_read(&lcm->lcm_llcd_numfree) >= lcm->lcm_llcd_maxfree) {
-                int llcd_size = llcd->llcd_size +
-                         offsetof(struct llog_canceld_ctxt, llcd_cookies);
-                OBD_FREE(llcd, llcd_size);
-        } else {
-                spin_lock(&lcm->lcm_llcd_lock);
-                list_add(&llcd->llcd_list, &lcm->lcm_llcd_free);
-                atomic_inc(&lcm->lcm_llcd_numfree);
-                spin_unlock(&lcm->lcm_llcd_lock);
+        if (lcm) {
+                if (atomic_read(&lcm->lcm_count) == 0) {
+                        CERROR("Invalid llcd free %p\n", llcd);
+                        llcd_print(llcd, __FUNCTION__, __LINE__);
+                        LBUG();
+                }
+                spin_lock(&lcm->lcm_lock);
+                LASSERT(!list_empty(&llcd->llcd_list));
+                list_del_init(&llcd->llcd_list);
+                atomic_dec(&lcm->lcm_count);
+                spin_unlock(&lcm->lcm_lock);
+
+                CDEBUG(D_RPCTRACE, "Free llcd %p on lcm %p (%d)\n",
+                       llcd, lcm, atomic_read(&lcm->lcm_count));
          }
+
+        LASSERT(atomic_read(&llcd_count) > 0);
+        atomic_dec(&llcd_count);
+
+        size = offsetof(struct llog_canceld_ctxt, llcd_cookies) +
+            llcd->llcd_size;
+        OBD_SLAB_FREE(llcd, llcd_cache, size);
  }
  
-/* Send some cookies to the appropriate target */
-static void llcd_send(struct llog_canceld_ctxt *llcd)
+/**
+ * Checks if passed cookie fits into llcd free space buffer. Returns
+ * 1 if yes and 0 otherwise.
+ */
+static inline int
+llcd_fit(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies)
  {
-        if (!(llcd->llcd_lcm->lcm_flags & LLOG_LCM_FL_EXIT)) {
-                spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
-                list_add_tail(&llcd->llcd_list,
-                              &llcd->llcd_lcm->lcm_llcd_pending);
-                spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
-        }
-        cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1);
+        return (llcd->llcd_size - llcd->llcd_cookiebytes >= sizeof(*cookies));
  }
  
  /**
- * Grab llcd and assign it to passed @ctxt. Also set up backward link
- * and get ref on @ctxt.
+ * Copy passed @cookies to @llcd.
   */
-static struct llog_canceld_ctxt *ctxt_llcd_grab(struct llog_ctxt *ctxt)
+static inline void
+llcd_copy(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies)
  {
-        struct llog_canceld_ctxt *llcd;
-
-        LASSERT_SEM_LOCKED(&ctxt->loc_sem);
-        llcd = llcd_grab(ctxt->loc_lcm);
-        if (llcd == NULL)
-                return NULL;
-
-        llcd->llcd_ctxt = llog_ctxt_get(ctxt);
-        ctxt->loc_llcd = llcd;
-
-        CDEBUG(D_RPCTRACE,"grab llcd %p:%p\n", ctxt->loc_llcd, ctxt);
-        return llcd;
+        LASSERT(llcd_fit(llcd, cookies));
+        memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes,
+              cookies, sizeof(*cookies));
+        llcd->llcd_cookiebytes += sizeof(*cookies);
  }
  
  /**
- * Put llcd in passed @ctxt. Set ->loc_llcd to NULL.
+ * Llcd completion function. Called uppon llcd send finish regardless
+ * sending result. Error is passed in @rc. Note, that this will be called
+ * in cleanup time when all inflight rpcs aborted.
   */
-static void ctxt_llcd_put(struct llog_ctxt *ctxt)
+static int
+llcd_interpret(const struct lu_env *env,
+               struct ptlrpc_request *req, void *noused, int rc)
  {
-        mutex_down(&ctxt->loc_sem);
-        if (ctxt->loc_llcd != NULL) {
-                CDEBUG(D_RPCTRACE,"put llcd %p:%p\n", ctxt->loc_llcd, ctxt);
-                llcd_put(ctxt->loc_llcd);
-                ctxt->loc_llcd = NULL;
-        }
-        class_import_put(ctxt->loc_imp);
-        ctxt->loc_imp = NULL;
-        mutex_up(&ctxt->loc_sem);
+        struct llog_canceld_ctxt *llcd = req->rq_async_args.pointer_arg[0];
+        CDEBUG(D_RPCTRACE, "Sent llcd %p (%d) - killing it\n", llcd, rc);
+        llcd_free(llcd);
+        return 0;
  }
  
-/* deleted objects have a commit callback that cancels the MDS
- * log record for the deletion.  The commit callback calls this
- * function
+/**
+ * Send @llcd to remote node. Free llcd uppon completion or error. Sending
+ * is performed in async style so this function will return asap without
+ * blocking.
   */
-int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
-                         struct lov_stripe_md *lsm, int count,
-                         struct llog_cookie *cookies, int flags)
+static int llcd_send(struct llog_canceld_ctxt *llcd)
  {
-        struct llog_canceld_ctxt *llcd;
-        int rc = 0;
+        char *bufs[2] = { NULL, (char *)llcd->llcd_cookies };
+        struct obd_import *import = NULL;
+        struct llog_commit_master *lcm;
+        struct ptlrpc_request *req;
+        struct llog_ctxt *ctxt;
+        int rc;
          ENTRY;
  
-        LASSERT(ctxt);
+        ctxt = llcd->llcd_ctxt;
+        if (!ctxt) {
+                CERROR("Invalid llcd with NULL ctxt found (%p)\n",
+                       llcd);
+                llcd_print(llcd, __FUNCTION__, __LINE__);
+                LBUG();
+        }
+        LASSERT_SEM_LOCKED(&ctxt->loc_sem);
  
-        mutex_down(&ctxt->loc_sem);
-        llcd = ctxt->loc_llcd;
+        if (llcd->llcd_cookiebytes == 0)
+                GOTO(exit, rc = 0);
  
-        if (ctxt->loc_imp == NULL) {
-                CDEBUG(D_RPCTRACE, "no import for ctxt %p\n", ctxt);
-                GOTO(out, rc = 0);
-        }
+        lcm = llcd->llcd_lcm;
  
-        if (count > 0 && cookies != NULL) {
-                if (llcd == NULL) {
-                        llcd = ctxt_llcd_grab(ctxt);
-                        if (llcd == NULL) {
-                                CERROR("couldn't get an llcd - dropped "LPX64
-                                       ":%x+%u\n",
-                                       cookies->lgc_lgl.lgl_oid,
-                                       cookies->lgc_lgl.lgl_ogen, 
-                                       cookies->lgc_index);
-                                GOTO(out, rc = -ENOMEM);
-                        }
-                }
+        /*
+         * Check if we're in exit stage. Do not send llcd in
+         * this case.
+         */
+        if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags))
+                GOTO(exit, rc = -ENODEV);
  
-                memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, 
-                       cookies, sizeof(*cookies));
-                llcd->llcd_cookiebytes += sizeof(*cookies);
-        } else {
-                if (llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
-                        GOTO(out, rc);
+        CDEBUG(D_RPCTRACE, "Sending llcd %p\n", llcd);
+
+        import = llcd->llcd_ctxt->loc_imp;
+        if (!import || (import == LP_POISON) ||
+            (import->imp_client == LP_POISON)) {
+                CERROR("Invalid import %p for llcd %p\n",
+                       import, llcd);
+                GOTO(exit, rc = -ENODEV);
          }
  
-        if ((llcd->llcd_size - llcd->llcd_cookiebytes) < sizeof(*cookies) ||
-            (flags & OBD_LLOG_FL_SENDNOW)) {
-                CDEBUG(D_RPCTRACE, "send llcd %p:%p\n", llcd, llcd->llcd_ctxt);
-                ctxt->loc_llcd = NULL;
-                llcd_send(llcd);
+        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
+
+        /*
+         * No need to get import here as it is already done in
+         * llog_receptor_accept().
+         */
+        req = ptlrpc_request_alloc(import, &RQF_LOG_CANCEL);
+        if (req == NULL) {
+                CERROR("Can't allocate request for sending llcd %p\n",
+                       llcd);
+                GOTO(exit, rc = -ENOMEM);
          }
-out:
-        mutex_up(&ctxt->loc_sem);
-        return rc;
-}
-EXPORT_SYMBOL(llog_obd_repl_cancel);
+        req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES,
+                             RCL_CLIENT, llcd->llcd_cookiebytes);
  
-int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
-{
-        int rc = 0;
-        ENTRY;
+        rc = ptlrpc_request_bufs_pack(req, LUSTRE_LOG_VERSION,
+                                      OBD_LOG_CANCEL, bufs, NULL);
+        if (rc) {
+                ptlrpc_request_free(req);
+                GOTO(exit, rc);
+        }
  
-        if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
-                CDEBUG(D_RPCTRACE,"reverse import disconnect\n");
-                /* 
-                 * We put llcd because it is not going to sending list and
-                 * thus, its refc will not be handled. We will handle it here.
-                 */
-                ctxt_llcd_put(ctxt);
-        } else {
-                /* 
-                 * Sending cancel. This means that ctxt->loc_llcd wil be
-                 * put on sending list in llog_obd_repl_cancel() and in
-                 * this case recovery thread will take care of it refc.
-                 */
-                rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+        ptlrpc_at_set_req_timeout(req);
+        ptlrpc_request_set_replen(req);
+
+        /* bug 5515 */
+        req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
+        req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
+        req->rq_interpret_reply = (ptlrpc_interpterer_t)llcd_interpret;
+        req->rq_async_args.pointer_arg[0] = llcd;
+
+        /* llog cancels will be replayed after reconnect so this will do twice
+         * first from replay llog, second for resended rpc */
+        req->rq_no_delay = req->rq_no_resend = 1;
+
+        rc = ptlrpc_set_add_new_req(&lcm->lcm_pc, req);
+        if (rc) {
+                ptlrpc_request_free(req);
+                GOTO(exit, rc);
          }
-        RETURN(rc);
+        RETURN(0);
+exit:
+        CDEBUG(D_RPCTRACE, "Refused llcd %p\n", llcd);
+        llcd_free(llcd);
+        return rc;
  }
-EXPORT_SYMBOL(llog_obd_repl_sync);
  
-static inline void stop_log_commit(struct llog_commit_master *lcm,
-                                   struct llog_commit_daemon *lcd,
-                                   int rc)
+/**
+ * Attach @llcd to @ctxt. Establish llcd vs. ctxt reserve connection
+ * so hat they can refer each other.
+ */
+static int
+llcd_attach(struct llog_ctxt *ctxt, struct llog_canceld_ctxt *llcd)
  {
-        CERROR("error preparing commit: rc %d\n", rc);
+        LASSERT(ctxt != NULL && llcd != NULL);
+        LASSERT_SEM_LOCKED(&ctxt->loc_sem);
+        LASSERT(ctxt->loc_llcd == NULL);
+        llcd->llcd_ctxt = llog_ctxt_get(ctxt);
+        ctxt->loc_llcd = llcd;
  
-        spin_lock(&lcm->lcm_llcd_lock);
-        list_splice_init(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend);
-        spin_unlock(&lcm->lcm_llcd_lock);
+        CDEBUG(D_RPCTRACE, "Attach llcd %p to ctxt %p\n",
+               llcd, ctxt);
+
+        return 0;
  }
  
-static int log_commit_thread(void *arg)
+/**
+ * Opposite to llcd_attach(). Detaches llcd from its @ctxt. This makes
+ * sure that this llcd will not be found another time we try to cancel.
+ */
+static struct llog_canceld_ctxt *llcd_detach(struct llog_ctxt *ctxt)
  {
-        struct llog_commit_master *lcm = arg;
-        struct llog_commit_daemon *lcd;
-        struct llog_canceld_ctxt *llcd, *n;
-        struct obd_import *import = NULL;
-        ENTRY;
+        struct llog_canceld_ctxt *llcd;
  
-        OBD_ALLOC(lcd, sizeof(*lcd));
-        if (lcd == NULL)
-                RETURN(-ENOMEM);
+        LASSERT(ctxt != NULL);
+        LASSERT_SEM_LOCKED(&ctxt->loc_sem);
  
-        spin_lock(&lcm->lcm_thread_lock);
-        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1,
-                    "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total));
-        atomic_inc(&lcm->lcm_thread_total);
-        spin_unlock(&lcm->lcm_thread_lock);
-
-        ptlrpc_daemonize(cfs_curproc_comm()); /* thread never needs to do IO */
-
-        CFS_INIT_LIST_HEAD(&lcd->lcd_lcm_list);
-        CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list);
-        lcd->lcd_lcm = lcm;
-
-        CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
-        do {
-                struct ptlrpc_request *request;
-                struct list_head *sending_list;
-                int rc = 0;
-
-                if (import)
-                        class_import_put(import);
-                import = NULL;
-
-                /* If we do not have enough pages available, allocate some */
-                while (atomic_read(&lcm->lcm_llcd_numfree) <
-                       lcm->lcm_llcd_minfree) {
-                        if (llcd_alloc(lcm) < 0)
-                                break;
-                }
+        llcd = ctxt->loc_llcd;
+        if (!llcd)
+                return NULL;
  
-                spin_lock(&lcm->lcm_thread_lock);
-                atomic_inc(&lcm->lcm_thread_numidle);
-                list_move(&lcd->lcd_lcm_list, &lcm->lcm_thread_idle);
-                spin_unlock(&lcm->lcm_thread_lock);
-
-                wait_event_interruptible(lcm->lcm_waitq,
-                                         !list_empty(&lcm->lcm_llcd_pending) ||
-                                         lcm->lcm_flags & LLOG_LCM_FL_EXIT);
-
-                /* If we are the last available thread, start a new one in case
-                 * we get blocked on an RPC (nobody else will start a new one)*/
-                spin_lock(&lcm->lcm_thread_lock);
-                atomic_dec(&lcm->lcm_thread_numidle);
-                list_move(&lcd->lcd_lcm_list, &lcm->lcm_thread_busy);
-                spin_unlock(&lcm->lcm_thread_lock);
-
-                sending_list = &lcm->lcm_llcd_pending;
-        resend:
-                if (import)
-                        class_import_put(import);
-                import = NULL;
-                if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) {
-                        lcm->lcm_llcd_maxfree = 0;
-                        lcm->lcm_llcd_minfree = 0;
-                        lcm->lcm_thread_max = 0;
-
-                        if (list_empty(&lcm->lcm_llcd_pending) ||
-                            lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE)
-                                break;
-                }
+        CDEBUG(D_RPCTRACE, "Detach llcd %p from ctxt %p\n",
+               llcd, ctxt);
  
-                if (atomic_read(&lcm->lcm_thread_numidle) <= 1 &&
-                    atomic_read(&lcm->lcm_thread_total) < lcm->lcm_thread_max) {
-                        rc = llog_start_commit_thread(lcm);
-                        if (rc < 0)
-                                CERROR("error starting thread: rc %d\n", rc);
-                }
+        ctxt->loc_llcd = NULL;
+        llog_ctxt_put(ctxt);
+        return llcd;
+}
  
-                /* Move all of the pending cancels from the same OST off of
-                 * the list, so we don't get multiple threads blocked and/or
-                 * doing upcalls on the same OST in case of failure. */
-                spin_lock(&lcm->lcm_llcd_lock);
-                if (!list_empty(sending_list)) {
-                        list_move_tail(sending_list->next,
-                                       &lcd->lcd_llcd_list);
-                        llcd = list_entry(lcd->lcd_llcd_list.next,
-                                          typeof(*llcd), llcd_list);
-                        LASSERT(llcd->llcd_lcm == lcm);
-                        import = llcd->llcd_ctxt->loc_imp;
-                        if (import)
-                                class_import_get(import);
-                }
-                list_for_each_entry_safe(llcd, n, sending_list, llcd_list) {
-                        LASSERT(llcd->llcd_lcm == lcm);
-                        if (import == llcd->llcd_ctxt->loc_imp)
-                                list_move_tail(&llcd->llcd_list,
-                                               &lcd->lcd_llcd_list);
-                }
-                if (sending_list != &lcm->lcm_llcd_resend) {
-                        list_for_each_entry_safe(llcd, n, &lcm->lcm_llcd_resend,
-                                                 llcd_list) {
-                                LASSERT(llcd->llcd_lcm == lcm);
-                                if (import == llcd->llcd_ctxt->loc_imp)
-                                        list_move_tail(&llcd->llcd_list,
-                                                       &lcd->lcd_llcd_list);
-                        }
-                }
-                spin_unlock(&lcm->lcm_llcd_lock);
-
-                /* We are the only one manipulating our local list - no lock */
-                list_for_each_entry_safe(llcd,n, &lcd->lcd_llcd_list,llcd_list){
-                        char *bufs[2] = { NULL, (char *)llcd->llcd_cookies };
-
-                        list_del(&llcd->llcd_list);
-                        if (llcd->llcd_cookiebytes == 0) {
-                                CDEBUG(D_RPCTRACE, "put empty llcd %p:%p\n",
-                                       llcd, llcd->llcd_ctxt);
-                                llcd_put(llcd);
-                                continue;
-                        }
-
-                        mutex_down(&llcd->llcd_ctxt->loc_sem);
-                        if (llcd->llcd_ctxt->loc_imp == NULL) {
-                                mutex_up(&llcd->llcd_ctxt->loc_sem);
-                                CWARN("import will be destroyed, put "
-                                      "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
-                                llcd_put(llcd);
-                                continue;
-                        }
-                        mutex_up(&llcd->llcd_ctxt->loc_sem);
-
-                        if (!import || (import == LP_POISON) ||
-                            (import->imp_client == LP_POISON)) {
-                                CERROR("No import %p (llcd=%p, ctxt=%p)\n",
-                                       import, llcd, llcd->llcd_ctxt);
-                                llcd_put(llcd);
-                                continue;
-                        }
-
-                        OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
-
-                        request = ptlrpc_request_alloc(import, &RQF_LOG_CANCEL);
-                        if (request == NULL) {
-                                rc = -ENOMEM;
-                                stop_log_commit(lcm, lcd, rc);
-                                break;
-                        }
-
-                        req_capsule_set_size(&request->rq_pill, &RMF_LOGCOOKIES,
-                                             RCL_CLIENT,llcd->llcd_cookiebytes);
-
-                        rc = ptlrpc_request_bufs_pack(request,
-                                                      LUSTRE_LOG_VERSION,
-                                                      OBD_LOG_CANCEL, bufs,
-                                                      NULL);
-                        if (rc) {
-                                ptlrpc_request_free(request);
-                                stop_log_commit(lcm, lcd, rc);
-                                break;
-                        }
-
-                        /* XXX FIXME bug 249, 5515 */
-                        request->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
-                        request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
-
-                        ptlrpc_request_set_replen(request);
-                        mutex_down(&llcd->llcd_ctxt->loc_sem);
-                        if (llcd->llcd_ctxt->loc_imp == NULL) {
-                                mutex_up(&llcd->llcd_ctxt->loc_sem);
-                                CWARN("import will be destroyed, put "
-                                      "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
-                                llcd_put(llcd);
-                                ptlrpc_req_finished(request);
-                                continue;
-                        }
-                        mutex_up(&llcd->llcd_ctxt->loc_sem);
-                        rc = ptlrpc_queue_wait(request);
-                        ptlrpc_req_finished(request);
-
-                        /* If the RPC failed, we put this and the remaining
-                         * messages onto the resend list for another time. */
-                        if (rc == 0) {
-                                llcd_put(llcd);
-                                continue;
-                        }
-
-                        CERROR("commit %p:%p drop %d cookies: rc %d\n",
-                               llcd, llcd->llcd_ctxt,
-                               (int)(llcd->llcd_cookiebytes /
-                                     sizeof(*llcd->llcd_cookies)), rc);
-                        llcd_put(llcd);
-                }
+/**
+ * Return @llcd cached in @ctxt. Allocate new one if required. Attach it
+ * to ctxt so that it may be used for gathering cookies and sending.
+ */
+static struct llog_canceld_ctxt *llcd_get(struct llog_ctxt *ctxt)
+{
+        struct llog_canceld_ctxt *llcd;
+        LASSERT(ctxt);
+        llcd = llcd_alloc(ctxt->loc_lcm);
+        if (!llcd) {
+                CERROR("Can't alloc an llcd for ctxt %p\n", ctxt);
+                return NULL;
+        }
+        llcd_attach(ctxt, llcd);
+        return llcd;
+}
  
-                if (rc == 0) {
-                        sending_list = &lcm->lcm_llcd_resend;
-                        if (!list_empty(sending_list))
-                                goto resend;
-                }
-        } while(1);
+/**
+ * Deatch llcd from its @ctxt. Free llcd.
+ */
+static void llcd_put(struct llog_ctxt *ctxt)
+{
+        struct llog_canceld_ctxt *llcd;
  
-        if (import)
-                class_import_put(import);
+        llcd = llcd_detach(ctxt);
+        if (llcd)
+                llcd_free(llcd);
+}
  
-        /* If we are force exiting, just drop all of the cookies. */
-        if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) {
-                spin_lock(&lcm->lcm_llcd_lock);
-                list_splice_init(&lcm->lcm_llcd_pending, &lcd->lcd_llcd_list);
-                list_splice_init(&lcm->lcm_llcd_resend, &lcd->lcd_llcd_list);
-                list_splice_init(&lcm->lcm_llcd_free, &lcd->lcd_llcd_list);
-                spin_unlock(&lcm->lcm_llcd_lock);
+/**
+ * Detach llcd from its @ctxt so that nobody will find it with try to
+ * re-use. Send llcd to remote node.
+ */
+static int llcd_push(struct llog_ctxt *ctxt)
+{
+        struct llog_canceld_ctxt *llcd;
+        int rc;
  
-                list_for_each_entry_safe(llcd, n, &lcd->lcd_llcd_list,llcd_list)
-                        llcd_put(llcd);
+        /*
+         * Make sure that this llcd will not be sent again as we detach
+         * it from ctxt.
+         */
+        llcd = llcd_detach(ctxt);
+        if (!llcd) {
+                CERROR("Invalid detached llcd found %p\n", llcd);
+                llcd_print(llcd, __FUNCTION__, __LINE__);
+                LBUG();
          }
  
-        spin_lock(&lcm->lcm_thread_lock);
-        list_del(&lcd->lcd_lcm_list);
-        spin_unlock(&lcm->lcm_thread_lock);
-        OBD_FREE(lcd, sizeof(*lcd));
-
-        CDEBUG(D_HA, "%s exiting\n", cfs_curproc_comm());
-
-        spin_lock(&lcm->lcm_thread_lock);
-        atomic_dec(&lcm->lcm_thread_total);
-        spin_unlock(&lcm->lcm_thread_lock);
-        cfs_waitq_signal(&lcm->lcm_waitq);
-
-        return 0;
+        rc = llcd_send(llcd);
+        if (rc)
+                CERROR("Couldn't send llcd %p (%d)\n", llcd, rc);
+        return rc;
  }
  
-int llog_start_commit_thread(struct llog_commit_master *lcm)
+/**
+ * Start recovery thread which actually deals llcd sending. This
+ * is all ptlrpc standard thread based so there is not much of work
+ * to do.
+ */
+int llog_recov_thread_start(struct llog_commit_master *lcm)
  {
          int rc;
          ENTRY;
  
-        if (atomic_read(&lcm->lcm_thread_total) >= lcm->lcm_thread_max)
-                RETURN(0);
-
-        rc = cfs_kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
-        if (rc < 0) {
-                CERROR("error starting thread #%d: %d\n",
-                       atomic_read(&lcm->lcm_thread_total), rc);
+        rc = ptlrpcd_start(lcm->lcm_name, &lcm->lcm_pc);
+        if (rc) {
+                CERROR("Error %d while starting recovery thread %s\n",
+                       rc, lcm->lcm_name);
                  RETURN(rc);
          }
-
-        RETURN(0);
-}
-EXPORT_SYMBOL(llog_start_commit_thread);
-
-static struct llog_process_args {
-        struct semaphore         llpa_sem;
-        struct llog_ctxt        *llpa_ctxt;
-        void                    *llpa_cb;
-        void                    *llpa_arg;
-} llpa;
-
-int llog_init_commit_master(struct llog_commit_master *lcm)
-{
-        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy);
-        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle);
-        spin_lock_init(&lcm->lcm_thread_lock);
-        atomic_set(&lcm->lcm_thread_numidle, 0);
-        cfs_waitq_init(&lcm->lcm_waitq);
-        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
-        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
-        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_free);
-        spin_lock_init(&lcm->lcm_llcd_lock);
-        atomic_set(&lcm->lcm_llcd_numfree, 0);
-        lcm->lcm_llcd_minfree = 0;
-        lcm->lcm_thread_max = 5;
-        /* FIXME initialize semaphore for llog_process_args */
-        sema_init(&llpa.llpa_sem, 1);
-        return 0;
+        RETURN(rc);
  }
-EXPORT_SYMBOL(llog_init_commit_master);
+EXPORT_SYMBOL(llog_recov_thread_start);
  
-int llog_cleanup_commit_master(struct llog_commit_master *lcm,
-                               int force)
+/**
+ * Stop recovery thread. Complement to llog_recov_thread_start().
+ */
+void llog_recov_thread_stop(struct llog_commit_master *lcm, int force)
  {
-        lcm->lcm_flags |= LLOG_LCM_FL_EXIT;
-        if (force)
-                lcm->lcm_flags |= LLOG_LCM_FL_EXIT_FORCE;
-        cfs_waitq_signal(&lcm->lcm_waitq);
+        ENTRY;
  
-        wait_event_interruptible(lcm->lcm_waitq,
-                                 atomic_read(&lcm->lcm_thread_total) == 0);
-        return 0;
+        /*
+         * Let all know that we're stopping. This will also make
+         * llcd_send() refuse any new llcds.
+         */
+        set_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags);
+
+        /*
+         * Stop processing thread. No new rpcs will be accepted for
+         * for processing now.
+         */
+        ptlrpcd_stop(&lcm->lcm_pc, force);
+
+        /*
+         * By this point no alive inflight llcds should be left. Only
+         * those forgotten in sync may still be attached to ctxt. Let's
+         * print them.
+         */
+        if (atomic_read(&lcm->lcm_count) != 0) {
+                struct llog_canceld_ctxt *llcd;
+                struct list_head         *tmp;
+
+                CERROR("Busy llcds found (%d) on lcm %p\n",
+                       atomic_read(&lcm->lcm_count), lcm);
+
+                spin_lock(&lcm->lcm_lock);
+                list_for_each(tmp, &lcm->lcm_llcds) {
+                        llcd = list_entry(tmp, struct llog_canceld_ctxt,
+                                          llcd_list);
+                        llcd_print(llcd, __FUNCTION__, __LINE__);
+                }
+                spin_unlock(&lcm->lcm_lock);
+
+                /*
+                 * No point to go further with busy llcds at this point
+                 * as this is clear bug. It might mean we got hanging
+                 * rpc which holds import ref and this means we will not
+                 * be able to cleanup anyways.
+                 *
+                 * Or we just missed to kill them when they were not
+                 * attached to ctxt. In this case our slab will remind
+                 * us about this a bit later.
+                 */
+                LBUG();
+        }
+        EXIT;
  }
-EXPORT_SYMBOL(llog_cleanup_commit_master);
+EXPORT_SYMBOL(llog_recov_thread_stop);
  
-static int log_process_thread(void *args)
+/**
+ * Initialize commit master structure and start recovery thread on it.
+ */
+struct llog_commit_master *llog_recov_thread_init(char *name)
  {
-        struct llog_process_args *data = args;
-        struct llog_ctxt *ctxt = data->llpa_ctxt;
-        void   *cb = data->llpa_cb;
-        struct llog_logid logid = *(struct llog_logid *)(data->llpa_arg);
-        struct llog_handle *llh = NULL;
+        struct llog_commit_master *lcm;
          int rc;
          ENTRY;
  
-        mutex_up(&data->llpa_sem);
-        ptlrpc_daemonize("llog_process");     /* thread does IO to log files */
-
-        rc = llog_create(ctxt, &llh, &logid, NULL);
+        OBD_ALLOC_PTR(lcm);
+        if (!lcm)
+                RETURN(NULL);
+
+        /*
+         * Try to create threads with unique names.
+         */
+        snprintf(lcm->lcm_name, sizeof(lcm->lcm_name),
+                 "lcm_%s", name);
+
+        atomic_set(&lcm->lcm_count, 0);
+        atomic_set(&lcm->lcm_refcount, 1);
+        spin_lock_init(&lcm->lcm_lock);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcds);
+        rc = llog_recov_thread_start(lcm);
          if (rc) {
-                CERROR("llog_create failed %d\n", rc);
+                CERROR("Can't start commit thread, rc %d\n", rc);
                  GOTO(out, rc);
          }
-        rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
-        if (rc) {
-                CERROR("llog_init_handle failed %d\n", rc);
-                GOTO(release_llh, rc);
-        }
-
-        if (cb) {
-                rc = llog_cat_process(llh, (llog_cb_t)cb, NULL);
-                if (rc != LLOG_PROC_BREAK)
-                        CERROR("llog_cat_process failed %d\n", rc);
-        } else {
-                CWARN("no callback function for recovery\n");
-        }
-
-        CDEBUG(D_HA, "send llcd %p:%p forcibly after recovery\n",
-               ctxt->loc_llcd, ctxt);
-        llog_sync(ctxt, NULL);
-
-release_llh:
-        rc = llog_cat_put(llh);
-        if (rc)
-                CERROR("llog_cat_put failed %d\n", rc);
+        RETURN(lcm);
  out:
-        llog_ctxt_put(ctxt);
-        RETURN(rc);
+        OBD_FREE_PTR(lcm);
+        return NULL;
+}
+EXPORT_SYMBOL(llog_recov_thread_init);
+
+/**
+ * Finalize commit master and its recovery thread.
+ */
+void llog_recov_thread_fini(struct llog_commit_master *lcm, int force)
+{
+        ENTRY;
+        llog_recov_thread_stop(lcm, force);
+        lcm_put(lcm);
+        EXIT;
  }
+EXPORT_SYMBOL(llog_recov_thread_fini);
  
-static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg)
+static int llog_recov_thread_replay(struct llog_ctxt *ctxt,
+                                    void *cb, void *arg)
  {
          struct obd_device *obd = ctxt->loc_obd;
+        struct llog_process_cat_args *lpca;
          int rc;
          ENTRY;
  
          if (obd->obd_stopping)
                  RETURN(-ENODEV);
  
-        mutex_down(&llpa.llpa_sem);
-        llpa.llpa_cb = handle;
-        llpa.llpa_arg = arg;
-        llpa.llpa_ctxt = llog_ctxt_get(ctxt);
-        if (!llpa.llpa_ctxt) {
-                up(&llpa.llpa_sem);
+        /*
+         * This will be balanced in llog_cat_process_thread()
+         */
+        OBD_ALLOC_PTR(lpca);
+        if (!lpca)
+                RETURN(-ENOMEM);
+
+        lpca->lpca_cb = cb;
+        lpca->lpca_arg = arg;
+
+        /*
+         * This will be balanced in llog_cat_process_thread()
+         */
+        lpca->lpca_ctxt = llog_ctxt_get(ctxt);
+        if (!lpca->lpca_ctxt) {
+                OBD_FREE_PTR(lpca);
                  RETURN(-ENODEV);
          }
-        rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(llog_cat_process_thread, lpca,
+                               CLONE_VM | CLONE_FILES);
          if (rc < 0) {
+                CERROR("Error starting llog_cat_process_thread(): %d\n", rc);
+                OBD_FREE_PTR(lpca);
                  llog_ctxt_put(ctxt);
-                CERROR("error starting log_process_thread: %d\n", rc);
          } else {
-                CDEBUG(D_HA, "log_process_thread: %d\n", rc);
+                CDEBUG(D_HA, "Started llog_cat_process_thread(): %d\n", rc);
                  rc = 0;
          }
  
          RETURN(rc);
  }
  
-int llog_repl_connect(struct llog_ctxt *ctxt, int count,
-                      struct llog_logid *logid, struct llog_gen *gen,
-                      struct obd_uuid *uuid)
+int llog_obd_repl_connect(struct llog_ctxt *ctxt,
+                          struct llog_logid *logid, struct llog_gen *gen,
+                          struct obd_uuid *uuid)
  {
-        struct llog_canceld_ctxt *llcd;
          int rc;
          ENTRY;
  
-        /* send back llcd before recovery from llog */
-        if (ctxt->loc_llcd != NULL) {
-                CWARN("llcd %p:%p not empty\n", ctxt->loc_llcd, ctxt);
-                llog_sync(ctxt, NULL);
-        }
+        /*
+         * Send back cached llcd from llog before recovery if we have any.
+         * This is void is nothing cached is found there.
+         */
+        llog_sync(ctxt, NULL);
  
+        /*
+         * Start recovery in separate thread.
+         */
          mutex_down(&ctxt->loc_sem);
          ctxt->loc_gen = *gen;
-        llcd = ctxt_llcd_grab(ctxt);
-        if (llcd == NULL) {
-                CERROR("couldn't get an llcd\n");
-                mutex_up(&ctxt->loc_sem);
-                RETURN(-ENOMEM);
+        rc = llog_recov_thread_replay(ctxt, ctxt->llog_proc_cb, logid);
+        mutex_up(&ctxt->loc_sem);
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_obd_repl_connect);
+
+/**
+ * Deleted objects have a commit callback that cancels the MDS
+ * log record for the deletion. The commit callback calls this
+ * function.
+ */
+int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
+                         struct lov_stripe_md *lsm, int count,
+                         struct llog_cookie *cookies, int flags)
+{
+        struct llog_commit_master *lcm;
+        struct llog_canceld_ctxt *llcd;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(ctxt != NULL);
+
+        mutex_down(&ctxt->loc_sem);
+        if (!ctxt->loc_lcm) {
+                CDEBUG(D_RPCTRACE, "No lcm for ctxt %p\n", ctxt);
+                GOTO(out, rc = -ENODEV);
          }
+        lcm = ctxt->loc_lcm;
+        CDEBUG(D_INFO, "cancel on lsm %p\n", lcm);
+
+        /*
+         * Let's check if we have all structures alive. We also check for
+         * possible shutdown. Do nothing if we're stopping.
+         */
+        if (ctxt->loc_imp == NULL) {
+                CDEBUG(D_RPCTRACE, "No import for ctxt %p\n", ctxt);
+                GOTO(out, rc = -ENODEV);
+        }
+
+        if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) {
+                CDEBUG(D_RPCTRACE, "Commit thread is stopping for ctxt %p\n",
+                       ctxt);
+                GOTO(out, rc = -ENODEV);
+        }
+
+        llcd = ctxt->loc_llcd;
+
+        if (count > 0 && cookies != NULL) {
+                /*
+                 * Get new llcd from ctxt if required.
+                 */
+                if (!llcd) {
+                        llcd = llcd_get(ctxt);
+                        if (!llcd)
+                                GOTO(out, rc = -ENOMEM);
+                        /*
+                         * Allocation is successful, let's check for stop
+                         * flag again to fall back as soon as possible.
+                         */
+                        if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags))
+                                GOTO(out, rc = -ENODEV);
+                }
+
+                /*
+                 * Llcd does not have enough room for @cookies. Let's push
+                 * it out and allocate new one.
+                 */
+                if (!llcd_fit(llcd, cookies)) {
+                        rc = llcd_push(ctxt);
+                        if (rc)
+                                GOTO(out, rc);
+                        llcd = llcd_get(ctxt);
+                        if (!llcd)
+                                GOTO(out, rc = -ENOMEM);
+                        /*
+                         * Allocation is successful, let's check for stop
+                         * flag again to fall back as soon as possible.
+                         */
+                        if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags))
+                                GOTO(out, rc = -ENODEV);
+                }
+
+                /*
+                 * Copy cookies to @llcd, no matter old or new allocated
+                 * one.
+                 */
+                llcd_copy(llcd, cookies);
+        }
+
+        /*
+         * Let's check if we need to send copied @cookies asap. If yes
+         * then do it.
+         */
+        if (llcd && (flags & OBD_LLOG_FL_SENDNOW)) {
+                CDEBUG(D_RPCTRACE, "Sync llcd %p\n", llcd);
+                rc = llcd_push(ctxt);
+                if (rc)
+                        GOTO(out, rc);
+        }
+        EXIT;
+out:
+        if (rc)
+                llcd_put(ctxt);
          mutex_up(&ctxt->loc_sem);
+        return rc;
+}
+EXPORT_SYMBOL(llog_obd_repl_cancel);
  
-        rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid);
-        if (rc != 0) {
-                ctxt_llcd_put(ctxt);
-                CERROR("error recovery process: %d\n", rc);
+int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
+{
+        int rc = 0;
+        ENTRY;
+
+        /*
+         * Flush any remaining llcd.
+         */
+        mutex_down(&ctxt->loc_sem);
+        if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
+                /*
+                 * This is ost->mds connection, we can't be sure that mds
+                 * can still receive cookies, let's killed the cached llcd.
+                 */
+                CDEBUG(D_RPCTRACE, "Kill cached llcd\n");
+                llcd_put(ctxt);
+                mutex_up(&ctxt->loc_sem);
+        } else {
+                /*
+                 * This is either llog_sync() from generic llog code or sync
+                 * on client disconnect. In either way let's do it and send
+                 * llcds to the target with waiting for completion.
+                 */
+                CDEBUG(D_RPCTRACE, "Sync cached llcd\n");
+                mutex_up(&ctxt->loc_sem);
+                rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
          }
          RETURN(rc);
  }
-EXPORT_SYMBOL(llog_repl_connect);
+EXPORT_SYMBOL(llog_obd_repl_sync);
  
  #else /* !__KERNEL__ */
  
@@ -669,3 +716,43 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
          return 0;
  }
  #endif
+
+/**
+ * Module init time fucntion. Initializes slab for llcd objects.
+ */
+int llog_recov_init(void)
+{
+        int llcd_size;
+
+        llcd_size = CFS_PAGE_SIZE -
+                lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, NULL);
+        llcd_size += offsetof(struct llog_canceld_ctxt, llcd_cookies);
+        llcd_cache = cfs_mem_cache_create("llcd_cache", llcd_size, 0, 0);
+        if (!llcd_cache) {
+                CERROR("Error allocating llcd cache\n");
+                return -ENOMEM;
+        }
+        return 0;
+}
+
+/**
+ * Module fini time fucntion. Releases slab for llcd objects.
+ */
+void llog_recov_fini(void)
+{
+        /*
+         * Kill llcd cache when thread is stopped and we're sure no
+         * llcd in use left.
+         */
+        if (llcd_cache) {
+                /*
+                 * In 2.6.22 cfs_mem_cache_destroy() will not return error
+                 * for busy resources. Let's check it another way.
+                 */
+                LASSERTF(atomic_read(&llcd_count) == 0,
+                         "Can't destroy llcd cache! Number of "
+                         "busy llcds: %d\n", atomic_read(&llcd_count));
+                cfs_mem_cache_destroy(llcd_cache);
+                llcd_cache = NULL;
+        }
+}