X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Frecov_thread.c;h=d86ab0c8dd510932ca41d5c7fd06115807d12d5b;hb=37b6ad662657a6986d8d17d172a2756d52c86cf8;hp=69aec83b2096227148174253a3dce06e2a9c6beb;hpb=c07c913776f210512e8a208e5b288c0ca3883f55;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 69aec83..d86ab0c 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -26,10 +26,13 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* + * Copyright (c) 2011 Whamcloud, Inc. + */ +/* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. * @@ -67,7 +70,7 @@ #include #include "ptlrpc_internal.h" -static atomic_t llcd_count = ATOMIC_INIT(0); +static cfs_atomic_t llcd_count = CFS_ATOMIC_INIT(0); static cfs_mem_cache_t *llcd_cache = NULL; #ifdef __KERNEL__ @@ -76,8 +79,12 @@ enum { LLOG_LCM_FL_EXIT = 1 << 1 }; -static void llcd_print(struct llog_canceld_ctxt *llcd, - const char *func, int line) +struct llcd_async_args { + struct llog_canceld_ctxt *la_ctxt; +}; + +static void llcd_print(struct llog_canceld_ctxt *llcd, + const char *func, int line) { CDEBUG(D_RPCTRACE, "Llcd (%p) at %s:%d:\n", llcd, func, line); CDEBUG(D_RPCTRACE, " size: %d\n", llcd->llcd_size); @@ -112,15 +119,15 @@ static struct llog_canceld_ctxt *llcd_alloc(struct llog_commit_master *lcm) llcd->llcd_cookiebytes = 0; llcd->llcd_size = size; - spin_lock(&lcm->lcm_lock); + cfs_spin_lock(&lcm->lcm_lock); llcd->llcd_lcm = lcm; - atomic_inc(&lcm->lcm_count); - list_add_tail(&llcd->llcd_list, &lcm->lcm_llcds); - spin_unlock(&lcm->lcm_lock); - atomic_inc(&llcd_count); + cfs_atomic_inc(&lcm->lcm_count); + cfs_list_add_tail(&llcd->llcd_list, &lcm->lcm_llcds); + cfs_spin_unlock(&lcm->lcm_lock); + cfs_atomic_inc(&llcd_count); CDEBUG(D_RPCTRACE, "Alloc llcd %p on lcm %p (%d)\n", - llcd, lcm, atomic_read(&lcm->lcm_count)); + llcd, lcm, cfs_atomic_read(&lcm->lcm_count)); return llcd; } @@ -134,25 +141,25 @@ static void llcd_free(struct llog_canceld_ctxt *llcd) int size; if (lcm) { - if (atomic_read(&lcm->lcm_count) == 0) { + if (cfs_atomic_read(&lcm->lcm_count) == 0) { CERROR("Invalid llcd free %p\n", llcd); llcd_print(llcd, __FUNCTION__, __LINE__); LBUG(); } - spin_lock(&lcm->lcm_lock); - LASSERT(!list_empty(&llcd->llcd_list)); - list_del_init(&llcd->llcd_list); - atomic_dec(&lcm->lcm_count); - spin_unlock(&lcm->lcm_lock); - - CDEBUG(D_RPCTRACE, "Free llcd %p on lcm %p (%d)\n", - llcd, lcm, atomic_read(&lcm->lcm_count)); + cfs_spin_lock(&lcm->lcm_lock); + LASSERT(!cfs_list_empty(&llcd->llcd_list)); + cfs_list_del_init(&llcd->llcd_list); + cfs_atomic_dec(&lcm->lcm_count); + cfs_spin_unlock(&lcm->lcm_lock); + + CDEBUG(D_RPCTRACE, "Free llcd %p on lcm %p (%d)\n", + llcd, lcm, cfs_atomic_read(&lcm->lcm_count)); } - LASSERT(atomic_read(&llcd_count) > 0); - atomic_dec(&llcd_count); + LASSERT(cfs_atomic_read(&llcd_count) > 0); + cfs_atomic_dec(&llcd_count); - size = offsetof(struct llog_canceld_ctxt, llcd_cookies) + + size = offsetof(struct llog_canceld_ctxt, llcd_cookies) + llcd->llcd_size; OBD_SLAB_FREE(llcd, llcd_cache, size); } @@ -161,7 +168,7 @@ static void llcd_free(struct llog_canceld_ctxt *llcd) * Checks if passed cookie fits into llcd free space buffer. Returns * 1 if yes and 0 otherwise. */ -static inline int +static inline int llcd_fit(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies) { return (llcd->llcd_size - llcd->llcd_cookiebytes >= sizeof(*cookies)); @@ -170,11 +177,11 @@ llcd_fit(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies) /** * Copy passed @cookies to @llcd. */ -static inline void +static inline void llcd_copy(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies) { LASSERT(llcd_fit(llcd, cookies)); - memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, + memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, sizeof(*cookies)); llcd->llcd_cookiebytes += sizeof(*cookies); } @@ -186,9 +193,11 @@ llcd_copy(struct llog_canceld_ctxt *llcd, struct llog_cookie *cookies) */ static int llcd_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *noused, int rc) + struct ptlrpc_request *req, void *args, int rc) { - struct llog_canceld_ctxt *llcd = req->rq_async_args.pointer_arg[0]; + struct llcd_async_args *la = args; + struct llog_canceld_ctxt *llcd = la->la_ctxt; + CDEBUG(D_RPCTRACE, "Sent llcd %p (%d) - killing it\n", llcd, rc); llcd_free(llcd); return 0; @@ -204,6 +213,7 @@ static int llcd_send(struct llog_canceld_ctxt *llcd) char *bufs[2] = { NULL, (char *)llcd->llcd_cookies }; struct obd_import *import = NULL; struct llog_commit_master *lcm; + struct llcd_async_args *la; struct ptlrpc_request *req; struct llog_ctxt *ctxt; int rc; @@ -227,7 +237,7 @@ static int llcd_send(struct llog_canceld_ctxt *llcd) * Check if we're in exit stage. Do not send llcd in * this case. */ - if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) + if (cfs_test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) GOTO(exit, rc = -ENODEV); CDEBUG(D_RPCTRACE, "Sending llcd %p\n", llcd); @@ -268,13 +278,18 @@ static int llcd_send(struct llog_canceld_ctxt *llcd) /* bug 5515 */ req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL; req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; + req->rq_interpret_reply = (ptlrpc_interpterer_t)llcd_interpret; - req->rq_async_args.pointer_arg[0] = llcd; - rc = ptlrpc_set_add_new_req(&lcm->lcm_pc, req); - if (rc) { - ptlrpc_request_free(req); - GOTO(exit, rc); - } + + CLASSERT(sizeof(*la) <= sizeof(req->rq_async_args)); + la = ptlrpc_req_async_args(req); + la->la_ctxt = llcd; + + /* llog cancels will be replayed after reconnect so this will do twice + * first from replay llog, second for resended rpc */ + req->rq_no_delay = req->rq_no_resend = 1; + + ptlrpc_set_add_new_req(&lcm->lcm_pc, req); RETURN(0); exit: CDEBUG(D_RPCTRACE, "Refused llcd %p\n", llcd); @@ -316,7 +331,7 @@ static struct llog_canceld_ctxt *llcd_detach(struct llog_ctxt *ctxt) if (!llcd) return NULL; - CDEBUG(D_RPCTRACE, "Detach llcd %p from ctxt %p\n", + CDEBUG(D_RPCTRACE, "Detach llcd %p from ctxt %p\n", llcd, ctxt); ctxt->loc_llcd = NULL; @@ -331,7 +346,7 @@ static struct llog_canceld_ctxt *llcd_detach(struct llog_ctxt *ctxt) static struct llog_canceld_ctxt *llcd_get(struct llog_ctxt *ctxt) { struct llog_canceld_ctxt *llcd; - + LASSERT(ctxt); llcd = llcd_alloc(ctxt->loc_lcm); if (!llcd) { CERROR("Can't alloc an llcd for ctxt %p\n", ctxt); @@ -389,7 +404,7 @@ int llog_recov_thread_start(struct llog_commit_master *lcm) int rc; ENTRY; - rc = ptlrpcd_start(lcm->lcm_name, &lcm->lcm_pc); + rc = ptlrpcd_start(-1, 1, lcm->lcm_name, &lcm->lcm_pc); if (rc) { CERROR("Error %d while starting recovery thread %s\n", rc, lcm->lcm_name); @@ -410,7 +425,7 @@ void llog_recov_thread_stop(struct llog_commit_master *lcm, int force) * Let all know that we're stopping. This will also make * llcd_send() refuse any new llcds. */ - set_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags); + cfs_set_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags); /* * Stop processing thread. No new rpcs will be accepted for @@ -423,21 +438,21 @@ void llog_recov_thread_stop(struct llog_commit_master *lcm, int force) * those forgotten in sync may still be attached to ctxt. Let's * print them. */ - if (atomic_read(&lcm->lcm_count) != 0) { + if (cfs_atomic_read(&lcm->lcm_count) != 0) { struct llog_canceld_ctxt *llcd; - struct list_head *tmp; + cfs_list_t *tmp; - CERROR("Busy llcds found (%d) on lcm %p\n", - atomic_read(&lcm->lcm_count) == 0, lcm); + CERROR("Busy llcds found (%d) on lcm %p\n", + cfs_atomic_read(&lcm->lcm_count), lcm); - spin_lock(&lcm->lcm_lock); - list_for_each(tmp, &lcm->lcm_llcds) { - llcd = list_entry(tmp, struct llog_canceld_ctxt, - llcd_list); + cfs_spin_lock(&lcm->lcm_lock); + cfs_list_for_each(tmp, &lcm->lcm_llcds) { + llcd = cfs_list_entry(tmp, struct llog_canceld_ctxt, + llcd_list); llcd_print(llcd, __FUNCTION__, __LINE__); } - spin_unlock(&lcm->lcm_lock); - + cfs_spin_unlock(&lcm->lcm_lock); + /* * No point to go further with busy llcds at this point * as this is clear bug. It might mean we got hanging @@ -471,10 +486,11 @@ struct llog_commit_master *llog_recov_thread_init(char *name) * Try to create threads with unique names. */ snprintf(lcm->lcm_name, sizeof(lcm->lcm_name), - "ll_log_commit_%s", name); + "lcm_%s", name); - atomic_set(&lcm->lcm_count, 0); - spin_lock_init(&lcm->lcm_lock); + cfs_atomic_set(&lcm->lcm_count, 0); + cfs_atomic_set(&lcm->lcm_refcount, 1); + cfs_spin_lock_init(&lcm->lcm_lock); CFS_INIT_LIST_HEAD(&lcm->lcm_llcds); rc = llog_recov_thread_start(lcm); if (rc) { @@ -495,7 +511,7 @@ void llog_recov_thread_fini(struct llog_commit_master *lcm, int force) { ENTRY; llog_recov_thread_stop(lcm, force); - OBD_FREE_PTR(lcm); + lcm_put(lcm); EXIT; } EXPORT_SYMBOL(llog_recov_thread_fini); @@ -529,8 +545,7 @@ static int llog_recov_thread_replay(struct llog_ctxt *ctxt, OBD_FREE_PTR(lpca); RETURN(-ENODEV); } - rc = cfs_kernel_thread(llog_cat_process_thread, lpca, - CLONE_VM | CLONE_FILES); + rc = cfs_create_thread(llog_cat_process_thread, lpca, CFS_DAEMON_FLAGS); if (rc < 0) { CERROR("Error starting llog_cat_process_thread(): %d\n", rc); OBD_FREE_PTR(lpca); @@ -559,10 +574,10 @@ int llog_obd_repl_connect(struct llog_ctxt *ctxt, /* * Start recovery in separate thread. */ - mutex_down(&ctxt->loc_sem); + cfs_mutex_down(&ctxt->loc_sem); ctxt->loc_gen = *gen; rc = llog_recov_thread_replay(ctxt, ctxt->llog_proc_cb, logid); - mutex_up(&ctxt->loc_sem); + cfs_mutex_up(&ctxt->loc_sem); RETURN(rc); } @@ -584,8 +599,13 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, LASSERT(ctxt != NULL); - mutex_down(&ctxt->loc_sem); + cfs_mutex_down(&ctxt->loc_sem); + if (!ctxt->loc_lcm) { + CDEBUG(D_RPCTRACE, "No lcm for ctxt %p\n", ctxt); + GOTO(out, rc = -ENODEV); + } lcm = ctxt->loc_lcm; + CDEBUG(D_INFO, "cancel on lsm %p\n", lcm); /* * Let's check if we have all structures alive. We also check for @@ -596,7 +616,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, GOTO(out, rc = -ENODEV); } - if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) { + if (cfs_test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) { CDEBUG(D_RPCTRACE, "Commit thread is stopping for ctxt %p\n", ctxt); GOTO(out, rc = -ENODEV); @@ -616,7 +636,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, * Allocation is successful, let's check for stop * flag again to fall back as soon as possible. */ - if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) + if (cfs_test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) GOTO(out, rc = -ENODEV); } @@ -635,7 +655,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, * Allocation is successful, let's check for stop * flag again to fall back as soon as possible. */ - if (test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) + if (cfs_test_bit(LLOG_LCM_FL_EXIT, &lcm->lcm_flags)) GOTO(out, rc = -ENODEV); } @@ -660,7 +680,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, out: if (rc) llcd_put(ctxt); - mutex_up(&ctxt->loc_sem); + cfs_mutex_up(&ctxt->loc_sem); return rc; } EXPORT_SYMBOL(llog_obd_repl_cancel); @@ -670,10 +690,10 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) int rc = 0; ENTRY; - /* - * Flush any remaining llcd. + /* + * Flush any remaining llcd. */ - mutex_down(&ctxt->loc_sem); + cfs_mutex_down(&ctxt->loc_sem); if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) { /* * This is ost->mds connection, we can't be sure that mds @@ -681,15 +701,15 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) */ CDEBUG(D_RPCTRACE, "Kill cached llcd\n"); llcd_put(ctxt); - mutex_up(&ctxt->loc_sem); + cfs_mutex_up(&ctxt->loc_sem); } else { - /* + /* * This is either llog_sync() from generic llog code or sync * on client disconnect. In either way let's do it and send - * llcds to the target with waiting for completion. + * llcds to the target with waiting for completion. */ CDEBUG(D_RPCTRACE, "Sync cached llcd\n"); - mutex_up(&ctxt->loc_sem); + cfs_mutex_up(&ctxt->loc_sem); rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); } RETURN(rc); @@ -738,9 +758,9 @@ void llog_recov_fini(void) * In 2.6.22 cfs_mem_cache_destroy() will not return error * for busy resources. Let's check it another way. */ - LASSERTF(atomic_read(&llcd_count) == 0, + LASSERTF(cfs_atomic_read(&llcd_count) == 0, "Can't destroy llcd cache! Number of " - "busy llcds: %d\n", atomic_read(&llcd_count)); + "busy llcds: %d\n", cfs_atomic_read(&llcd_count)); cfs_mem_cache_destroy(llcd_cache); llcd_cache = NULL; }