From: Alexander.Boyko Date: Tue, 15 May 2012 08:55:40 +0000 (+0400) Subject: LU-1194 llog: fix for not sync llcd at thread stop X-Git-Tag: 2.1.3-RC1~7 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=94ae23e66d411745a866119c3b896616fa6c2e6d;p=fs%2Flustre-release.git LU-1194 llog: fix for not sync llcd at thread stop If llog_obd_repl_cancel() happend between llog_sync() and class_import_put() at filter_llog_finish(), llog_recov_thread_stop() throw LBUG. This patch fix this issue by adding new flags to llog_ctxt. Signed-off-by: Alexander Boyko Xyratex-bug-id: MRP-456 Signed-off-by: Bob Glossman Change-Id: I896519ed11abd301a889f658f96950ec15e76f97 Reviewed-on: http://review.whamcloud.com/3480 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index fef7a4c..5305f38 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -191,7 +191,7 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, struct llog_operations *op); int __llog_ctxt_put(struct llog_ctxt *ctxt); int llog_cleanup(struct llog_ctxt *); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags); int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies); @@ -228,7 +228,8 @@ int llog_handle_connect(struct ptlrpc_request *req); int llog_obd_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, int count, struct llog_cookie *cookies, int flags); -int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags); int llog_obd_repl_connect(struct llog_ctxt *ctxt, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); @@ -251,7 +252,8 @@ struct llog_operations { int (*lop_setup)(struct obd_device *obd, struct obd_llog_group *olg, int ctxt_idx, struct obd_device *disk_obd, int count, struct llog_logid *logid, const char *name); - int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); + int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags); int (*lop_cleanup)(struct llog_ctxt *ctxt); int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, @@ -274,6 +276,7 @@ int llog_put_cat_list(struct obd_device *disk_obd, char *name, int idx, int count, struct llog_catid *idarray); #define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001 +#define LLOG_CTXT_FLAG_STOP 0x00000002 struct llog_ctxt { int loc_idx; /* my index the obd array of ctxt's */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 07ea3da..91b5b6e 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1122,6 +1122,7 @@ struct obd_device { }; #define OBD_LLOG_FL_SENDNOW 0x0001 +#define OBD_LLOG_FL_EXIT 0x0002 enum obd_cleanup_stage { /* Special case hack for MDS LOVs */ diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 48a2032..65ad123 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -491,7 +491,7 @@ int llog_cat_process_thread(void *data) /* * Make sure that all cached data is sent. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, 0); GOTO(release_llh, rc); release_llh: rc = llog_cat_put(llh); diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 23487ae..17c440d 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -235,7 +235,7 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, } EXPORT_SYMBOL(llog_setup); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags) { int rc = 0; ENTRY; @@ -244,7 +244,7 @@ int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) RETURN(0); if (CTXTP(ctxt, sync)) - rc = CTXTP(ctxt, sync)(ctxt, exp); + rc = CTXTP(ctxt, sync)(ctxt, exp, flags); RETURN(rc); } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 3b202d7..e7f06b9 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2401,7 +2401,7 @@ static int filter_llog_finish(struct obd_device *obd, int count) * We actually do sync in disconnect time, but disconnect * may not come being marked rq_no_resend = 1. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, OBD_LLOG_FL_EXIT); /* * Balance class_import_get() in llog_receptor_accept(). @@ -2413,16 +2413,16 @@ static int filter_llog_finish(struct obd_device *obd, int count) class_import_put(ctxt->loc_imp); ctxt->loc_imp = NULL; } + + if (filter->fo_lcm) { + llog_recov_thread_fini(filter->fo_lcm, obd->obd_force); + filter->fo_lcm = NULL; + } + cfs_mutex_up(&ctxt->loc_sem); llog_ctxt_put(ctxt); } - if (filter->fo_lcm) { - cfs_mutex_down(&ctxt->loc_sem); - llog_recov_thread_fini(filter->fo_lcm, obd->obd_force); - filter->fo_lcm = NULL; - cfs_mutex_up(&ctxt->loc_sem); - } RETURN(filter_olg_fini(&obd->obd_olg)); } @@ -3055,7 +3055,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp) ctxt = llog_group_get_ctxt(olg_min, LLOG_MDS_OST_REPL_CTXT); if (ctxt) { - err = llog_sync(ctxt, olg_min->olg_exp); + err = llog_sync(ctxt, olg_min->olg_exp, 0); llog_ctxt_put(ctxt); if (err) { CERROR("error flushing logs to MDS: " diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 14065b9..190d247 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -4305,7 +4305,7 @@ static int osc_disconnect(struct obd_export *exp) if (obd->u.cli.cl_conn_count == 1) { /* Flush any remaining cancel messages out to the * target */ - llog_sync(ctxt, exp); + llog_sync(ctxt, exp, 0); } llog_ctxt_put(ctxt); } else { diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 0a5365e..8b5edb3 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -570,7 +570,7 @@ int llog_obd_repl_connect(struct llog_ctxt *ctxt, * Send back cached llcd from llog before recovery if we have any. * This is void is nothing cached is found there. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, 0); /* * Start recovery in separate thread. @@ -612,7 +612,12 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, * Let's check if we have all structures alive. We also check for * possible shutdown. Do nothing if we're stopping. */ - if (ctxt->loc_imp == NULL) { + if (ctxt->loc_flags & LLOG_CTXT_FLAG_STOP) { + CDEBUG(D_RPCTRACE, "Last sync was done for ctxt %p\n", ctxt); + GOTO(out, rc = -ENODEV); + } + + if (ctxt->loc_imp == NULL) { CDEBUG(D_RPCTRACE, "No import for ctxt %p\n", ctxt); GOTO(out, rc = -ENODEV); } @@ -681,12 +686,17 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, out: if (rc) llcd_put(ctxt); + + if (flags & OBD_LLOG_FL_EXIT) + ctxt->loc_flags = LLOG_CTXT_FLAG_STOP; + cfs_mutex_up(&ctxt->loc_sem); return rc; } EXPORT_SYMBOL(llog_obd_repl_cancel); -int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags) { int rc = 0; ENTRY; @@ -702,6 +712,10 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) */ CDEBUG(D_RPCTRACE, "Kill cached llcd\n"); llcd_put(ctxt); + + if (flags & OBD_LLOG_FL_EXIT) + ctxt->loc_flags = LLOG_CTXT_FLAG_STOP; + cfs_mutex_up(&ctxt->loc_sem); } else { /* @@ -711,7 +725,8 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) */ CDEBUG(D_RPCTRACE, "Sync cached llcd\n"); cfs_mutex_up(&ctxt->loc_sem); - rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); + rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW | + flags); } RETURN(rc); }