From 6ad1581bf6003631d87e8365c01c03b06be136de Mon Sep 17 00:00:00 2001 From: "Alexander.Boyko" Date: Tue, 15 May 2012 12:55:40 +0400 Subject: [PATCH 1/1] LU-1194 llog: fix for not sync llcd at thread stop If llog_obd_repl_cancel() happend between llog_sync() and class_import_put() at filter_llog_finish(), llog_recov_thread_stop() throw LBUG. This patch fix this issue by adding new flags to llog_ctxt. Signed-off-by: Alexander Boyko Reviewed-by: Andriy Skulysh Reviewed-by: Alexander Zarochentsev Xyratex-bug-id: MRP-456 Change-Id: Ife79adfe6cde0f2090776cd27cd87f65c1e988e2 Reviewed-on: http://review.whamcloud.com/2789 Reviewed-by: Andriy Skulysh Tested-by: Hudson Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/lustre_log.h | 9 ++++++--- lustre/include/obd.h | 1 + lustre/obdclass/llog_cat.c | 2 +- lustre/obdclass/llog_obd.c | 4 ++-- lustre/obdfilter/filter.c | 16 ++++++++-------- lustre/osc/osc_request.c | 2 +- lustre/ptlrpc/recov_thread.c | 23 +++++++++++++++++++---- 7 files changed, 38 insertions(+), 19 deletions(-) diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index 0b96577..e64ef1e 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -191,7 +191,7 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index, struct llog_operations *op); int __llog_ctxt_put(struct llog_ctxt *ctxt); int llog_cleanup(struct llog_ctxt *); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags); int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, struct llog_cookie *logcookies, int numcookies); @@ -228,7 +228,8 @@ int llog_handle_connect(struct ptlrpc_request *req); int llog_obd_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, int count, struct llog_cookie *cookies, int flags); -int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags); int llog_obd_repl_connect(struct llog_ctxt *ctxt, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); @@ -251,7 +252,8 @@ struct llog_operations { int (*lop_setup)(struct obd_device *obd, struct obd_llog_group *olg, int ctxt_idx, struct obd_device *disk_obd, int count, struct llog_logid *logid, const char *name); - int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); + int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags); int (*lop_cleanup)(struct llog_ctxt *ctxt); int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, @@ -274,6 +276,7 @@ int llog_put_cat_list(struct obd_device *disk_obd, char *name, int idx, int count, struct llog_catid *idarray); #define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001 +#define LLOG_CTXT_FLAG_STOP 0x00000002 struct llog_ctxt { int loc_idx; /* my index the obd array of ctxt's */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 07ef9af..5fa66e2 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1184,6 +1184,7 @@ struct obd_device { }; #define OBD_LLOG_FL_SENDNOW 0x0001 +#define OBD_LLOG_FL_EXIT 0x0002 enum obd_cleanup_stage { /* Special case hack for MDS LOVs */ diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 9ba6eb2..f0fa468 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -485,7 +485,7 @@ int llog_cat_process_thread(void *data) /* * Make sure that all cached data is sent. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, 0); GOTO(release_llh, rc); release_llh: rc = llog_cat_put(llh); diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 221fccc..8bf6b41 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -231,7 +231,7 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, } EXPORT_SYMBOL(llog_setup); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags) { int rc = 0; ENTRY; @@ -240,7 +240,7 @@ int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) RETURN(0); if (CTXTP(ctxt, sync)) - rc = CTXTP(ctxt, sync)(ctxt, exp); + rc = CTXTP(ctxt, sync)(ctxt, exp, flags); RETURN(rc); } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index c3ad008..dfb7d2f 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2408,7 +2408,7 @@ static int filter_llog_finish(struct obd_device *obd, int count) * We actually do sync in disconnect time, but disconnect * may not come being marked rq_no_resend = 1. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, OBD_LLOG_FL_EXIT); /* * Balance class_import_get() in llog_receptor_accept(). @@ -2420,16 +2420,16 @@ static int filter_llog_finish(struct obd_device *obd, int count) class_import_put(ctxt->loc_imp); ctxt->loc_imp = NULL; } + + if (filter->fo_lcm) { + llog_recov_thread_fini(filter->fo_lcm, obd->obd_force); + filter->fo_lcm = NULL; + } + cfs_mutex_unlock(&ctxt->loc_mutex); llog_ctxt_put(ctxt); } - if (filter->fo_lcm) { - cfs_mutex_lock(&ctxt->loc_mutex); - llog_recov_thread_fini(filter->fo_lcm, obd->obd_force); - filter->fo_lcm = NULL; - cfs_mutex_unlock(&ctxt->loc_mutex); - } RETURN(filter_olg_fini(&obd->obd_olg)); } @@ -3066,7 +3066,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp) ctxt = llog_group_get_ctxt(olg_min, LLOG_MDS_OST_REPL_CTXT); if (ctxt) { - err = llog_sync(ctxt, olg_min->olg_exp); + err = llog_sync(ctxt, olg_min->olg_exp, 0); llog_ctxt_put(ctxt); if (err) { CERROR("error flushing logs to MDS: " diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 8e86162..24832ea 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3367,7 +3367,7 @@ static int osc_disconnect(struct obd_export *exp) if (obd->u.cli.cl_conn_count == 1) { /* Flush any remaining cancel messages out to the * target */ - llog_sync(ctxt, exp); + llog_sync(ctxt, exp, 0); } llog_ctxt_put(ctxt); } else { diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index e1d8eea..8076420 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -562,7 +562,7 @@ int llog_obd_repl_connect(struct llog_ctxt *ctxt, * Send back cached llcd from llog before recovery if we have any. * This is void is nothing cached is found there. */ - llog_sync(ctxt, NULL); + llog_sync(ctxt, NULL, 0); /* * Start recovery in separate thread. @@ -604,7 +604,12 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, * Let's check if we have all structures alive. We also check for * possible shutdown. Do nothing if we're stopping. */ - if (ctxt->loc_imp == NULL) { + if (ctxt->loc_flags & LLOG_CTXT_FLAG_STOP) { + CDEBUG(D_RPCTRACE, "Last sync was done for ctxt %p\n", ctxt); + GOTO(out, rc = -ENODEV); + } + + if (ctxt->loc_imp == NULL) { CDEBUG(D_RPCTRACE, "No import for ctxt %p\n", ctxt); GOTO(out, rc = -ENODEV); } @@ -673,12 +678,17 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, out: if (rc) llcd_put(ctxt); + + if (flags & OBD_LLOG_FL_EXIT) + ctxt->loc_flags = LLOG_CTXT_FLAG_STOP; + cfs_mutex_unlock(&ctxt->loc_mutex); return rc; } EXPORT_SYMBOL(llog_obd_repl_cancel); -int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp, + int flags) { int rc = 0; ENTRY; @@ -694,6 +704,10 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) */ CDEBUG(D_RPCTRACE, "Kill cached llcd\n"); llcd_put(ctxt); + + if (flags & OBD_LLOG_FL_EXIT) + ctxt->loc_flags = LLOG_CTXT_FLAG_STOP; + cfs_mutex_unlock(&ctxt->loc_mutex); } else { /* @@ -703,7 +717,8 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) */ CDEBUG(D_RPCTRACE, "Sync cached llcd\n"); cfs_mutex_unlock(&ctxt->loc_mutex); - rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); + rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW | + flags); } RETURN(rc); } -- 1.8.3.1