Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / ptlrpc / recov_thread.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 2003 Cluster File Systems, Inc.
5  *   Author: Andreas Dilger <adilger@clusterfs.com>
6  *
7  *   This file is part of the Lustre file system, http://www.lustre.org
8  *   Lustre is a trademark of Cluster File Systems, Inc.
9  *
10  *   You may have signed or agreed to another license before downloading
11  *   this software.  If so, you are bound by the terms and conditions
12  *   of that agreement, and the following does not apply to you.  See the
13  *   LICENSE file included with this distribution for more information.
14  *
15  *   If you did not agree to a different license, then this copy of Lustre
16  *   is open source software; you can redistribute it and/or modify it
17  *   under the terms of version 2 of the GNU General Public License as
18  *   published by the Free Software Foundation.
19  *
20  *   In either case, Lustre is distributed in the hope that it will be
21  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
22  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  *   license text for more details.
24  *
25  * OST<->MDS recovery logging thread.
26  *
27  * Invariants in implementation:
28  * - we do not share logs among different OST<->MDS connections, so that
29  *   if an OST or MDS fails it need only look at log(s) relevant to itself
30  */
31
32 #define DEBUG_SUBSYSTEM S_LOG
33
34 #ifndef EXPORT_SYMTAB
35 # define EXPORT_SYMTAB
36 #endif
37
38 #ifdef __KERNEL__
39 # include <libcfs/libcfs.h>
40 #else
41 # include <libcfs/list.h>
42 # include <liblustre.h>
43 #endif
44
45 #include <obd_class.h>
46 #include <lustre_commit_confd.h>
47 #include <obd_support.h>
48 #include <obd_class.h>
49 #include <lustre_net.h>
50 #include <lnet/types.h>
51 #include <libcfs/list.h>
52 #include <lustre_log.h>
53 #include "ptlrpc_internal.h"
54
55 #ifdef __KERNEL__
56
57 /* Allocate new commit structs in case we do not have enough.
58  * Make the llcd size small enough that it fits into a single page when we
59  * are sending/receiving it. */
60 static int llcd_alloc(struct llog_commit_master *lcm)
61 {
62         struct llog_canceld_ctxt *llcd;
63         int llcd_size;
64
65         /* payload of lustre_msg V2 is bigger */
66         llcd_size = 4096 - lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, NULL);
67         OBD_ALLOC(llcd,
68                   llcd_size + offsetof(struct llog_canceld_ctxt, llcd_cookies));
69         if (llcd == NULL)
70                 return -ENOMEM;
71
72         llcd->llcd_size = llcd_size;
73         llcd->llcd_lcm = lcm;
74
75         spin_lock(&lcm->lcm_llcd_lock);
76         list_add(&llcd->llcd_list, &lcm->lcm_llcd_free);
77         atomic_inc(&lcm->lcm_llcd_numfree);
78         spin_unlock(&lcm->lcm_llcd_lock);
79
80         return 0;
81 }
82
83 /* Get a free cookie struct from the list */
84 static struct llog_canceld_ctxt *llcd_grab(struct llog_commit_master *lcm)
85 {
86         struct llog_canceld_ctxt *llcd;
87
88 repeat:
89         spin_lock(&lcm->lcm_llcd_lock);
90         if (list_empty(&lcm->lcm_llcd_free)) {
91                 spin_unlock(&lcm->lcm_llcd_lock);
92                 if (llcd_alloc(lcm) < 0) {
93                         CERROR("unable to allocate log commit data!\n");
94                         return NULL;
95                 }
96                 /* check new llcd wasn't grabbed while lock dropped, b=7407 */
97                 goto repeat;
98         }
99
100         llcd = list_entry(lcm->lcm_llcd_free.next, typeof(*llcd), llcd_list);
101         list_del(&llcd->llcd_list);
102         atomic_dec(&lcm->lcm_llcd_numfree);
103         spin_unlock(&lcm->lcm_llcd_lock);
104
105         llcd->llcd_cookiebytes = 0;
106
107         return llcd;
108 }
109
110 static void llcd_put(struct llog_canceld_ctxt *llcd)
111 {
112         struct llog_commit_master *lcm = llcd->llcd_lcm;
113
114         llog_ctxt_put(llcd->llcd_ctxt);
115         if (atomic_read(&lcm->lcm_llcd_numfree) >= lcm->lcm_llcd_maxfree) {
116                 int llcd_size = llcd->llcd_size +
117                          offsetof(struct llog_canceld_ctxt, llcd_cookies);
118                 OBD_FREE(llcd, llcd_size);
119         } else {
120                 spin_lock(&lcm->lcm_llcd_lock);
121                 list_add(&llcd->llcd_list, &lcm->lcm_llcd_free);
122                 atomic_inc(&lcm->lcm_llcd_numfree);
123                 spin_unlock(&lcm->lcm_llcd_lock);
124         }
125 }
126
127 /* Send some cookies to the appropriate target */
128 static void llcd_send(struct llog_canceld_ctxt *llcd)
129 {
130         if (!(llcd->llcd_lcm->lcm_flags & LLOG_LCM_FL_EXIT)) {
131                 spin_lock(&llcd->llcd_lcm->lcm_llcd_lock);
132                 list_add_tail(&llcd->llcd_list,
133                               &llcd->llcd_lcm->lcm_llcd_pending);
134                 spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
135         }
136         cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1);
137 }
138
139 /**
140  * Grab llcd and assign it to passed @ctxt. Also set up backward link
141  * and get ref on @ctxt.
142  */
143 static struct llog_canceld_ctxt *ctxt_llcd_grab(struct llog_ctxt *ctxt)
144 {
145         struct llog_canceld_ctxt *llcd;
146
147         LASSERT_SEM_LOCKED(&ctxt->loc_sem);
148         llcd = llcd_grab(ctxt->loc_lcm);
149         if (llcd == NULL)
150                 return NULL;
151
152         llcd->llcd_ctxt = llog_ctxt_get(ctxt);
153         ctxt->loc_llcd = llcd;
154
155         CDEBUG(D_RPCTRACE,"grab llcd %p:%p\n", ctxt->loc_llcd, ctxt);
156         return llcd;
157 }
158
159 /**
160  * Put llcd in passed @ctxt. Set ->loc_llcd to NULL.
161  */
162 static void ctxt_llcd_put(struct llog_ctxt *ctxt)
163 {
164         mutex_down(&ctxt->loc_sem);
165         if (ctxt->loc_llcd != NULL) {
166                 CDEBUG(D_RPCTRACE,"put llcd %p:%p\n", ctxt->loc_llcd, ctxt);
167                 llcd_put(ctxt->loc_llcd);
168                 ctxt->loc_llcd = NULL;
169         }
170         ctxt->loc_imp = NULL;
171         mutex_up(&ctxt->loc_sem);
172 }
173
174 /* deleted objects have a commit callback that cancels the MDS
175  * log record for the deletion.  The commit callback calls this
176  * function
177  */
178 int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
179                          struct lov_stripe_md *lsm, int count,
180                          struct llog_cookie *cookies, int flags)
181 {
182         struct llog_canceld_ctxt *llcd;
183         int rc = 0;
184         ENTRY;
185
186         LASSERT(ctxt);
187
188         mutex_down(&ctxt->loc_sem);
189         llcd = ctxt->loc_llcd;
190
191         if (ctxt->loc_imp == NULL) {
192                 CDEBUG(D_RPCTRACE, "no import for ctxt %p\n", ctxt);
193                 GOTO(out, rc = 0);
194         }
195
196         if (count > 0 && cookies != NULL) {
197                 if (llcd == NULL) {
198                         llcd = ctxt_llcd_grab(ctxt);
199                         if (llcd == NULL) {
200                                 CERROR("couldn't get an llcd - dropped "LPX64
201                                        ":%x+%u\n",
202                                        cookies->lgc_lgl.lgl_oid,
203                                        cookies->lgc_lgl.lgl_ogen, 
204                                        cookies->lgc_index);
205                                 GOTO(out, rc = -ENOMEM);
206                         }
207                 }
208
209                 memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, 
210                        cookies, sizeof(*cookies));
211                 llcd->llcd_cookiebytes += sizeof(*cookies);
212         } else {
213                 if (llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
214                         GOTO(out, rc);
215         }
216
217         if ((llcd->llcd_size - llcd->llcd_cookiebytes) < sizeof(*cookies) ||
218             (flags & OBD_LLOG_FL_SENDNOW)) {
219                 CDEBUG(D_RPCTRACE, "send llcd %p:%p\n", llcd, llcd->llcd_ctxt);
220                 ctxt->loc_llcd = NULL;
221                 llcd_send(llcd);
222         }
223 out:
224         mutex_up(&ctxt->loc_sem);
225         return rc;
226 }
227 EXPORT_SYMBOL(llog_obd_repl_cancel);
228
229 int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
230 {
231         int rc = 0;
232         ENTRY;
233
234         if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
235                 CDEBUG(D_RPCTRACE,"reverse import disconnect\n");
236                 /* 
237                  * We put llcd because it is not going to sending list and
238                  * thus, its refc will not be handled. We will handle it here.
239                  */
240                 ctxt_llcd_put(ctxt);
241         } else {
242                 /* 
243                  * Sending cancel. This means that ctxt->loc_llcd wil be
244                  * put on sending list in llog_obd_repl_cancel() and in
245                  * this case recovery thread will take care of it refc.
246                  */
247                 rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
248         }
249         RETURN(rc);
250 }
251 EXPORT_SYMBOL(llog_obd_repl_sync);
252
253 static inline void stop_log_commit(struct llog_commit_master *lcm,
254                                    struct llog_commit_daemon *lcd,
255                                    int rc)
256 {
257         CERROR("error preparing commit: rc %d\n", rc);
258
259         spin_lock(&lcm->lcm_llcd_lock);
260         list_splice_init(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend);
261         spin_unlock(&lcm->lcm_llcd_lock);
262 }
263
264 static int log_commit_thread(void *arg)
265 {
266         struct llog_commit_master *lcm = arg;
267         struct llog_commit_daemon *lcd;
268         struct llog_canceld_ctxt *llcd, *n;
269         struct obd_import *import = NULL;
270         ENTRY;
271
272         OBD_ALLOC(lcd, sizeof(*lcd));
273         if (lcd == NULL)
274                 RETURN(-ENOMEM);
275
276         spin_lock(&lcm->lcm_thread_lock);
277         THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1,
278                     "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total));
279         atomic_inc(&lcm->lcm_thread_total);
280         spin_unlock(&lcm->lcm_thread_lock);
281
282         ptlrpc_daemonize(cfs_curproc_comm()); /* thread never needs to do IO */
283
284         CFS_INIT_LIST_HEAD(&lcd->lcd_lcm_list);
285         CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list);
286         lcd->lcd_lcm = lcm;
287
288         CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
289         do {
290                 struct ptlrpc_request *request;
291                 struct list_head *sending_list;
292                 int rc = 0;
293
294                 if (import)
295                         class_import_put(import);
296                 import = NULL;
297
298                 /* If we do not have enough pages available, allocate some */
299                 while (atomic_read(&lcm->lcm_llcd_numfree) <
300                        lcm->lcm_llcd_minfree) {
301                         if (llcd_alloc(lcm) < 0)
302                                 break;
303                 }
304
305                 spin_lock(&lcm->lcm_thread_lock);
306                 atomic_inc(&lcm->lcm_thread_numidle);
307                 list_move(&lcd->lcd_lcm_list, &lcm->lcm_thread_idle);
308                 spin_unlock(&lcm->lcm_thread_lock);
309
310                 wait_event_interruptible(lcm->lcm_waitq,
311                                          !list_empty(&lcm->lcm_llcd_pending) ||
312                                          lcm->lcm_flags & LLOG_LCM_FL_EXIT);
313
314                 /* If we are the last available thread, start a new one in case
315                  * we get blocked on an RPC (nobody else will start a new one)*/
316                 spin_lock(&lcm->lcm_thread_lock);
317                 atomic_dec(&lcm->lcm_thread_numidle);
318                 list_move(&lcd->lcd_lcm_list, &lcm->lcm_thread_busy);
319                 spin_unlock(&lcm->lcm_thread_lock);
320
321                 sending_list = &lcm->lcm_llcd_pending;
322         resend:
323                 if (import)
324                         class_import_put(import);
325                 import = NULL;
326                 if (lcm->lcm_flags & LLOG_LCM_FL_EXIT) {
327                         lcm->lcm_llcd_maxfree = 0;
328                         lcm->lcm_llcd_minfree = 0;
329                         lcm->lcm_thread_max = 0;
330
331                         if (list_empty(&lcm->lcm_llcd_pending) ||
332                             lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE)
333                                 break;
334                 }
335
336                 if (atomic_read(&lcm->lcm_thread_numidle) <= 1 &&
337                     atomic_read(&lcm->lcm_thread_total) < lcm->lcm_thread_max) {
338                         rc = llog_start_commit_thread(lcm);
339                         if (rc < 0)
340                                 CERROR("error starting thread: rc %d\n", rc);
341                 }
342
343                 /* Move all of the pending cancels from the same OST off of
344                  * the list, so we don't get multiple threads blocked and/or
345                  * doing upcalls on the same OST in case of failure. */
346                 spin_lock(&lcm->lcm_llcd_lock);
347                 if (!list_empty(sending_list)) {
348                         list_move_tail(sending_list->next,
349                                        &lcd->lcd_llcd_list);
350                         llcd = list_entry(lcd->lcd_llcd_list.next,
351                                           typeof(*llcd), llcd_list);
352                         LASSERT(llcd->llcd_lcm == lcm);
353                         import = llcd->llcd_ctxt->loc_imp;
354                         if (import)
355                                 class_import_get(import);
356                 }
357                 list_for_each_entry_safe(llcd, n, sending_list, llcd_list) {
358                         LASSERT(llcd->llcd_lcm == lcm);
359                         if (import == llcd->llcd_ctxt->loc_imp)
360                                 list_move_tail(&llcd->llcd_list,
361                                                &lcd->lcd_llcd_list);
362                 }
363                 if (sending_list != &lcm->lcm_llcd_resend) {
364                         list_for_each_entry_safe(llcd, n, &lcm->lcm_llcd_resend,
365                                                  llcd_list) {
366                                 LASSERT(llcd->llcd_lcm == lcm);
367                                 if (import == llcd->llcd_ctxt->loc_imp)
368                                         list_move_tail(&llcd->llcd_list,
369                                                        &lcd->lcd_llcd_list);
370                         }
371                 }
372                 spin_unlock(&lcm->lcm_llcd_lock);
373
374                 /* We are the only one manipulating our local list - no lock */
375                 list_for_each_entry_safe(llcd,n, &lcd->lcd_llcd_list,llcd_list){
376                         char *bufs[2] = { NULL, (char *)llcd->llcd_cookies };
377
378                         list_del(&llcd->llcd_list);
379                         if (llcd->llcd_cookiebytes == 0) {
380                                 CDEBUG(D_RPCTRACE, "put empty llcd %p:%p\n",
381                                        llcd, llcd->llcd_ctxt);
382                                 llcd_put(llcd);
383                                 continue;
384                         }
385
386                         mutex_down(&llcd->llcd_ctxt->loc_sem);
387                         if (llcd->llcd_ctxt->loc_imp == NULL) {
388                                 mutex_up(&llcd->llcd_ctxt->loc_sem);
389                                 CWARN("import will be destroyed, put "
390                                       "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
391                                 llcd_put(llcd);
392                                 continue;
393                         }
394                         mutex_up(&llcd->llcd_ctxt->loc_sem);
395
396                         if (!import || (import == LP_POISON) ||
397                             (import->imp_client == LP_POISON)) {
398                                 CERROR("No import %p (llcd=%p, ctxt=%p)\n",
399                                        import, llcd, llcd->llcd_ctxt);
400                                 llcd_put(llcd);
401                                 continue;
402                         }
403
404                         OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_RECOV, 10);
405
406                         request = ptlrpc_request_alloc(import, &RQF_LOG_CANCEL);
407                         if (request == NULL) {
408                                 rc = -ENOMEM;
409                                 stop_log_commit(lcm, lcd, rc);
410                                 break;
411                         }
412
413                         req_capsule_set_size(&request->rq_pill, &RMF_LOGCOOKIES,
414                                              RCL_CLIENT,llcd->llcd_cookiebytes);
415
416                         rc = ptlrpc_request_bufs_pack(request,
417                                                       LUSTRE_LOG_VERSION,
418                                                       OBD_LOG_CANCEL, bufs,
419                                                       NULL);
420                         if (rc) {
421                                 ptlrpc_request_free(request);
422                                 stop_log_commit(lcm, lcd, rc);
423                                 break;
424                         }
425
426                         /* XXX FIXME bug 249, 5515 */
427                         request->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL;
428                         request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
429
430                         ptlrpc_request_set_replen(request);
431                         mutex_down(&llcd->llcd_ctxt->loc_sem);
432                         if (llcd->llcd_ctxt->loc_imp == NULL) {
433                                 mutex_up(&llcd->llcd_ctxt->loc_sem);
434                                 CWARN("import will be destroyed, put "
435                                       "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
436                                 llcd_put(llcd);
437                                 ptlrpc_req_finished(request);
438                                 continue;
439                         }
440                         mutex_up(&llcd->llcd_ctxt->loc_sem);
441                         rc = ptlrpc_queue_wait(request);
442                         ptlrpc_req_finished(request);
443
444                         /* If the RPC failed, we put this and the remaining
445                          * messages onto the resend list for another time. */
446                         if (rc == 0) {
447                                 llcd_put(llcd);
448                                 continue;
449                         }
450
451                         CERROR("commit %p:%p drop %d cookies: rc %d\n",
452                                llcd, llcd->llcd_ctxt,
453                                (int)(llcd->llcd_cookiebytes /
454                                      sizeof(*llcd->llcd_cookies)), rc);
455                         llcd_put(llcd);
456                 }
457
458                 if (rc == 0) {
459                         sending_list = &lcm->lcm_llcd_resend;
460                         if (!list_empty(sending_list))
461                                 goto resend;
462                 }
463         } while(1);
464
465         if (import)
466                 class_import_put(import);
467
468         /* If we are force exiting, just drop all of the cookies. */
469         if (lcm->lcm_flags & LLOG_LCM_FL_EXIT_FORCE) {
470                 spin_lock(&lcm->lcm_llcd_lock);
471                 list_splice_init(&lcm->lcm_llcd_pending, &lcd->lcd_llcd_list);
472                 list_splice_init(&lcm->lcm_llcd_resend, &lcd->lcd_llcd_list);
473                 list_splice_init(&lcm->lcm_llcd_free, &lcd->lcd_llcd_list);
474                 spin_unlock(&lcm->lcm_llcd_lock);
475
476                 list_for_each_entry_safe(llcd, n, &lcd->lcd_llcd_list,llcd_list)
477                         llcd_put(llcd);
478         }
479
480         spin_lock(&lcm->lcm_thread_lock);
481         list_del(&lcd->lcd_lcm_list);
482         spin_unlock(&lcm->lcm_thread_lock);
483         OBD_FREE(lcd, sizeof(*lcd));
484
485         CDEBUG(D_HA, "%s exiting\n", cfs_curproc_comm());
486
487         spin_lock(&lcm->lcm_thread_lock);
488         atomic_dec(&lcm->lcm_thread_total);
489         spin_unlock(&lcm->lcm_thread_lock);
490         cfs_waitq_signal(&lcm->lcm_waitq);
491
492         return 0;
493 }
494
495 int llog_start_commit_thread(struct llog_commit_master *lcm)
496 {
497         int rc;
498         ENTRY;
499
500         if (atomic_read(&lcm->lcm_thread_total) >= lcm->lcm_thread_max)
501                 RETURN(0);
502
503         rc = cfs_kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
504         if (rc < 0) {
505                 CERROR("error starting thread #%d: %d\n",
506                        atomic_read(&lcm->lcm_thread_total), rc);
507                 RETURN(rc);
508         }
509
510         RETURN(0);
511 }
512 EXPORT_SYMBOL(llog_start_commit_thread);
513
514 static struct llog_process_args {
515         struct semaphore         llpa_sem;
516         struct llog_ctxt        *llpa_ctxt;
517         void                    *llpa_cb;
518         void                    *llpa_arg;
519 } llpa;
520
521 int llog_init_commit_master(struct llog_commit_master *lcm)
522 {
523         CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy);
524         CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle);
525         spin_lock_init(&lcm->lcm_thread_lock);
526         atomic_set(&lcm->lcm_thread_numidle, 0);
527         cfs_waitq_init(&lcm->lcm_waitq);
528         CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
529         CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
530         CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_free);
531         spin_lock_init(&lcm->lcm_llcd_lock);
532         atomic_set(&lcm->lcm_llcd_numfree, 0);
533         lcm->lcm_llcd_minfree = 0;
534         lcm->lcm_thread_max = 5;
535         /* FIXME initialize semaphore for llog_process_args */
536         sema_init(&llpa.llpa_sem, 1);
537         return 0;
538 }
539 EXPORT_SYMBOL(llog_init_commit_master);
540
541 int llog_cleanup_commit_master(struct llog_commit_master *lcm,
542                                int force)
543 {
544         lcm->lcm_flags |= LLOG_LCM_FL_EXIT;
545         if (force)
546                 lcm->lcm_flags |= LLOG_LCM_FL_EXIT_FORCE;
547         cfs_waitq_signal(&lcm->lcm_waitq);
548
549         wait_event_interruptible(lcm->lcm_waitq,
550                                  atomic_read(&lcm->lcm_thread_total) == 0);
551         return 0;
552 }
553 EXPORT_SYMBOL(llog_cleanup_commit_master);
554
555 static int log_process_thread(void *args)
556 {
557         struct llog_process_args *data = args;
558         struct llog_ctxt *ctxt = data->llpa_ctxt;
559         void   *cb = data->llpa_cb;
560         struct llog_logid logid = *(struct llog_logid *)(data->llpa_arg);
561         struct llog_handle *llh = NULL;
562         int rc;
563         ENTRY;
564
565         mutex_up(&data->llpa_sem);
566         ptlrpc_daemonize("llog_process");     /* thread does IO to log files */
567
568         rc = llog_create(ctxt, &llh, &logid, NULL);
569         if (rc) {
570                 CERROR("llog_create failed %d\n", rc);
571                 GOTO(out, rc);
572         }
573         rc = llog_init_handle(llh, LLOG_F_IS_CAT, NULL);
574         if (rc) {
575                 CERROR("llog_init_handle failed %d\n", rc);
576                 GOTO(release_llh, rc);
577         }
578
579         if (cb) {
580                 rc = llog_cat_process(llh, (llog_cb_t)cb, NULL);
581                 if (rc != LLOG_PROC_BREAK)
582                         CERROR("llog_cat_process failed %d\n", rc);
583         } else {
584                 CWARN("no callback function for recovery\n");
585         }
586
587         CDEBUG(D_HA, "send llcd %p:%p forcibly after recovery\n",
588                ctxt->loc_llcd, ctxt);
589         llog_sync(ctxt, NULL);
590
591 release_llh:
592         rc = llog_cat_put(llh);
593         if (rc)
594                 CERROR("llog_cat_put failed %d\n", rc);
595 out:
596         llog_ctxt_put(ctxt);
597         RETURN(rc);
598 }
599
600 static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg)
601 {
602         struct obd_device *obd = ctxt->loc_obd;
603         int rc;
604         ENTRY;
605
606         if (obd->obd_stopping)
607                 RETURN(-ENODEV);
608
609         mutex_down(&llpa.llpa_sem);
610         llpa.llpa_cb = handle;
611         llpa.llpa_arg = arg;
612         llpa.llpa_ctxt = llog_ctxt_get(ctxt);
613         if (!llpa.llpa_ctxt) {
614                 up(&llpa.llpa_sem);
615                 RETURN(-ENODEV);
616         }
617         rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
618         if (rc < 0) {
619                 llog_ctxt_put(ctxt);
620                 CERROR("error starting log_process_thread: %d\n", rc);
621         } else {
622                 CDEBUG(D_HA, "log_process_thread: %d\n", rc);
623                 rc = 0;
624         }
625
626         RETURN(rc);
627 }
628
629 int llog_repl_connect(struct llog_ctxt *ctxt, int count,
630                       struct llog_logid *logid, struct llog_gen *gen,
631                       struct obd_uuid *uuid)
632 {
633         struct llog_canceld_ctxt *llcd;
634         int rc;
635         ENTRY;
636
637         /* send back llcd before recovery from llog */
638         if (ctxt->loc_llcd != NULL) {
639                 CWARN("llcd %p:%p not empty\n", ctxt->loc_llcd, ctxt);
640                 llog_sync(ctxt, NULL);
641         }
642
643         mutex_down(&ctxt->loc_sem);
644         ctxt->loc_gen = *gen;
645         llcd = ctxt_llcd_grab(ctxt);
646         if (llcd == NULL) {
647                 CERROR("couldn't get an llcd\n");
648                 mutex_up(&ctxt->loc_sem);
649                 RETURN(-ENOMEM);
650         }
651         mutex_up(&ctxt->loc_sem);
652
653         rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid);
654         if (rc != 0) {
655                 ctxt_llcd_put(ctxt);
656                 CERROR("error recovery process: %d\n", rc);
657         }
658         RETURN(rc);
659 }
660 EXPORT_SYMBOL(llog_repl_connect);
661
662 #else /* !__KERNEL__ */
663
664 int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
665                          struct lov_stripe_md *lsm, int count,
666                          struct llog_cookie *cookies, int flags)
667 {
668         return 0;
669 }
670 #endif