From 81c713adebdb2a4c9340ba28a9c8f9310c9e76a0 Mon Sep 17 00:00:00 2001 From: wangdi Date: Thu, 3 Jul 2008 22:37:08 +0000 Subject: [PATCH] Branch: HEAD sync procfs/llog_thread with import destory b=15684 i=Robert, Jay --- lustre/ChangeLog | 6 ++++ lustre/include/lprocfs_status.h | 6 ++-- lustre/include/obd.h | 2 +- lustre/ldlm/ldlm_lib.c | 10 +++--- lustre/mdc/mdc_request.c | 26 +++++++++++---- lustre/mgc/mgc_request.c | 4 +-- lustre/obdclass/llog_obd.c | 5 +++ lustre/osc/osc_request.c | 21 +++++++++--- lustre/ptlrpc/llog_client.c | 73 +++++++++++++++++++++++++++++------------ lustre/ptlrpc/llog_net.c | 20 ++++++++--- lustre/ptlrpc/recov_thread.c | 1 + 11 files changed, 128 insertions(+), 46 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 2f2bda4d..d281ffd 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1149,6 +1149,12 @@ Description: filter threads hungs on waiting journal commit Details : Cleanup filter group llog code, then only filter group llog will be only created in the MDS/OST syncing process. +Severity : normal +Bugzilla : 15684 +Description: Procfs and llog threads access destoryed import sometimes. +Details : Sync the import destoryed process with procfs and llog threads by + the import refcount and semaphore. + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 0f4d9b3..f6f0b40 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -505,14 +505,14 @@ extern struct rw_semaphore _lprocfs_lock; * the import in a client obd_device for a lprocfs entry */ #define LPROCFS_CLIMP_CHECK(obd) do { \ typecheck(struct obd_device *, obd); \ - mutex_down(&(obd)->u.cli.cl_sem); \ + down_read(&(obd)->u.cli.cl_sem); \ if ((obd)->u.cli.cl_import == NULL) { \ - mutex_up(&(obd)->u.cli.cl_sem); \ + up_read(&(obd)->u.cli.cl_sem); \ return -ENODEV; \ } \ } while(0) #define LPROCFS_CLIMP_EXIT(obd) \ - mutex_up(&(obd)->u.cli.cl_sem); + up_read(&(obd)->u.cli.cl_sem); /* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 1995330..5836004 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -386,7 +386,7 @@ struct mdc_rpc_lock; struct obd_import; struct lustre_cache; struct client_obd { - struct semaphore cl_sem; + struct rw_semaphore cl_sem; struct obd_uuid cl_target_uuid; struct obd_import *cl_import; /* ptlrpc connection state */ int cl_conn_count; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 8b0a493..2bb5e64 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -244,7 +244,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) RETURN(-EINVAL); } - sema_init(&cli->cl_sem, 1); + init_rwsem(&cli->cl_sem); sema_init(&cli->cl_mgc_sem, 1); sptlrpc_rule_set_init(&cli->cl_sptlrpc_rset); cli->cl_sec_part = LUSTRE_SP_ANY; @@ -382,7 +382,7 @@ int client_connect_import(const struct lu_env *env, int rc; ENTRY; - mutex_down(&cli->cl_sem); + down_write(&cli->cl_sem); rc = class_connect(dlm_handle, obd, cluuid); if (rc) GOTO(out_sem, rc); @@ -441,7 +441,7 @@ out_disco: class_export_put(exp); } out_sem: - mutex_up(&cli->cl_sem); + up_write(&cli->cl_sem); if (to_be_freed) ldlm_namespace_free_post(to_be_freed); @@ -466,7 +466,7 @@ int client_disconnect_export(struct obd_export *exp) cli = &obd->u.cli; imp = cli->cl_import; - mutex_down(&cli->cl_sem); + down_write(&cli->cl_sem); if (!cli->cl_conn_count) { CERROR("disconnecting disconnected device (%s)\n", obd->obd_name); @@ -516,7 +516,7 @@ int client_disconnect_export(struct obd_export *exp) if (!rc && err) rc = err; out_sem: - mutex_up(&cli->cl_sem); + up_write(&cli->cl_sem); if (to_be_freed) ldlm_namespace_free_post(to_be_freed); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index cf6d57a..14ddcc9 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1212,13 +1212,24 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, { struct ptlrpc_request *req; struct obd_statfs *msfs; + struct obd_import *imp = NULL; int rc; ENTRY; - req = ptlrpc_request_alloc_pack(obd->u.cli.cl_import, &RQF_MDS_STATFS, + + /*Since the request might also come from lprocfs, so we need + *sync this with client_disconnect_export Bug15684*/ + down_read(&obd->u.cli.cl_sem); + if (obd->u.cli.cl_import) + imp = class_import_get(obd->u.cli.cl_import); + up_read(&obd->u.cli.cl_sem); + if (!imp) + RETURN(-ENODEV); + + req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS, LUSTRE_MDS_VERSION, MDS_STATFS); if (req == NULL) - RETURN(-ENOMEM); + GOTO(output, rc = -ENOMEM); ptlrpc_request_set_replen(req); @@ -1231,8 +1242,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, rc = ptlrpc_queue_wait(req); if (rc) { /* check connection error first */ - if (obd->u.cli.cl_import->imp_connect_error) - rc = obd->u.cli.cl_import->imp_connect_error; + if (imp->imp_connect_error) + rc = imp->imp_connect_error; GOTO(out, rc); } @@ -1244,6 +1255,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, EXIT; out: ptlrpc_req_finished(req); +output: + class_import_put(imp); return rc; } @@ -1584,11 +1597,12 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) class_destroy_import(imp); obd->u.cli.cl_import = NULL; } - break; - case OBD_CLEANUP_SELF_EXP: rc = obd_llog_finish(obd, 0); if (rc != 0) CERROR("failed to cleanup llogging subsystems\n"); + break; + case OBD_CLEANUP_SELF_EXP: + break; case OBD_CLEANUP_OBD: break; } diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index e8ddf27..1f0411f 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -467,12 +467,12 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) spin_unlock(&config_list_lock); cfs_waitq_signal(&rq_waitq); } - break; - case OBD_CLEANUP_SELF_EXP: rc = obd_llog_finish(obd, 0); if (rc != 0) CERROR("failed to cleanup llogging subsystems\n"); break; + case OBD_CLEANUP_SELF_EXP: + break; case OBD_CLEANUP_OBD: break; } diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index a26e336..cc27ba9 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -56,6 +56,11 @@ static void llog_ctxt_destroy(struct llog_ctxt *ctxt) { if (ctxt->loc_exp) class_export_put(ctxt->loc_exp); + if (ctxt->loc_imp) { + class_import_put(ctxt->loc_imp); + ctxt->loc_imp = NULL; + } + OBD_FREE_PTR(ctxt); return; } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 38bd034..f82849b 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3347,16 +3347,29 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs, { struct obd_statfs *msfs; struct ptlrpc_request *req; + struct obd_import *imp = NULL; int rc; ENTRY; + /*Since the request might also come from lprocfs, so we need + *sync this with client_disconnect_export Bug15684*/ + down_read(&obd->u.cli.cl_sem); + if (obd->u.cli.cl_import) + imp = class_import_get(obd->u.cli.cl_import); + up_read(&obd->u.cli.cl_sem); + if (!imp) + RETURN(-ENODEV); + /* We could possibly pass max_age in the request (as an absolute * timestamp or a "seconds.usec ago") so the target can avoid doing * extra calls into the filesystem if that isn't necessary (e.g. * during mount that would help a bit). Having relative timestamps * is not so great if request processing is slow, while absolute * timestamps are not ideal because they need time synchronization. */ - req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS); + req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS); + + class_import_put(imp); + if (req == NULL) RETURN(-ENOMEM); @@ -3966,13 +3979,13 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) class_destroy_import(imp); obd->u.cli.cl_import = NULL; } - break; - } - case OBD_CLEANUP_SELF_EXP: rc = obd_llog_finish(obd, 0); if (rc != 0) CERROR("failed to cleanup llogging subsystems\n"); break; + } + case OBD_CLEANUP_SELF_EXP: + break; case OBD_CLEANUP_OBD: break; } diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 395d756..7e4b12d 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -43,6 +43,31 @@ #include #include +#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \ + mutex_down(&ctxt->loc_sem); \ + if (ctxt->loc_imp) { \ + imp = class_import_get(ctxt->loc_imp); \ + } else { \ + CERROR("ctxt->loc_imp == NULL for context idx %d." \ + "Unable to complete MDS/OSS recovery," \ + "but I'll try again next time. Not fatal.\n", \ + ctxt->loc_idx); \ + imp = NULL; \ + mutex_up(&ctxt->loc_sem); \ + return (-EINVAL); \ + } \ + mutex_up(&ctxt->loc_sem); \ +} while(0) + +#define LLOG_CLIENT_EXIT(ctxt, imp) do { \ + mutex_down(&ctxt->loc_sem); \ + if (ctxt->loc_imp != imp) \ + CWARN("loc_imp has changed from %p to %p", \ + ctxt->loc_imp, imp); \ + class_import_put(imp); \ + mutex_up(&ctxt->loc_sem); \ +} while(0) + /* This is a callback from the llog_* functions. * Assumes caller has already pushed us into the kernel context. */ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, @@ -55,14 +80,7 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, int rc; ENTRY; - if (ctxt->loc_imp == NULL) { - /* This used to be an assert; bug 6200 */ - CERROR("ctxt->loc_imp == NULL for context idx %d. Unable to " - "complete MDS/OSS recovery, but I'll try again next " - "time. Not fatal.\n", ctxt->loc_idx); - RETURN(-EINVAL); - } - imp = ctxt->loc_imp; + LLOG_CLIENT_ENTRY(ctxt, imp); handle = llog_alloc_handle(); if (handle == NULL) @@ -110,6 +128,7 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, handle->lgh_ctxt = ctxt; EXIT; out: + LLOG_CLIENT_EXIT(ctxt, imp); ptlrpc_req_finished(req); return rc; err_free: @@ -119,17 +138,18 @@ err_free: static int llog_client_destroy(struct llog_handle *loghandle) { + struct obd_import *imp; struct ptlrpc_request *req = NULL; struct llogd_body *body; int rc; ENTRY; - req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp, - &RQF_LLOG_ORIGIN_HANDLE_DESTROY, + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY, LUSTRE_LOG_VERSION, LLOG_ORIGIN_HANDLE_DESTROY); if (req == NULL) - RETURN(-ENOMEM); + GOTO(err_exit, rc =-ENOMEM); body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); body->lgd_logid = loghandle->lgh_id; @@ -139,6 +159,8 @@ static int llog_client_destroy(struct llog_handle *loghandle) rc = ptlrpc_queue_wait(req); ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); RETURN(rc); } @@ -147,19 +169,20 @@ static int llog_client_next_block(struct llog_handle *loghandle, int *cur_idx, int next_idx, __u64 *cur_offset, void *buf, int len) { + struct obd_import *imp; struct ptlrpc_request *req = NULL; struct llogd_body *body; void *ptr; int rc; ENTRY; - req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp, - &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, LUSTRE_LOG_VERSION, LLOG_ORIGIN_HANDLE_NEXT_BLOCK); if (req == NULL) - RETURN(-ENOMEM); - + GOTO(err_exit, rc =-ENOMEM); + body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); body->lgd_logid = loghandle->lgh_id; body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; @@ -191,24 +214,27 @@ static int llog_client_next_block(struct llog_handle *loghandle, EXIT; out: ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); return rc; } static int llog_client_prev_block(struct llog_handle *loghandle, int prev_idx, void *buf, int len) { + struct obd_import *imp; struct ptlrpc_request *req = NULL; struct llogd_body *body; void *ptr; int rc; ENTRY; - req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp, - &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, + LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, LUSTRE_LOG_VERSION, LLOG_ORIGIN_HANDLE_PREV_BLOCK); if (req == NULL) - RETURN(-ENOMEM); + GOTO(err_exit, rc = -ENOMEM); body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); body->lgd_logid = loghandle->lgh_id; @@ -236,11 +262,14 @@ static int llog_client_prev_block(struct llog_handle *loghandle, EXIT; out: ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); return rc; } static int llog_client_read_header(struct llog_handle *handle) { + struct obd_import *imp; struct ptlrpc_request *req = NULL; struct llogd_body *body; struct llog_log_hdr *hdr; @@ -248,12 +277,12 @@ static int llog_client_read_header(struct llog_handle *handle) int rc; ENTRY; - req = ptlrpc_request_alloc_pack(handle->lgh_ctxt->loc_imp, - &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, + LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp); + req = ptlrpc_request_alloc_pack(imp,&RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, LUSTRE_LOG_VERSION, LLOG_ORIGIN_HANDLE_READ_HEADER); if (req == NULL) - RETURN(-ENOMEM); + GOTO(err_exit, rc = -ENOMEM); body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); body->lgd_logid = handle->lgh_id; @@ -288,6 +317,8 @@ static int llog_client_read_header(struct llog_handle *handle) EXIT; out: ptlrpc_req_finished(req); +err_exit: + LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp); return rc; } diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 772f937..ccf615f 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -122,10 +122,16 @@ EXPORT_SYMBOL(llog_handle_connect); int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp) { ENTRY; + LASSERT(ctxt); - LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == imp, - "%p - %p\n", ctxt->loc_imp, imp); - ctxt->loc_imp = imp; + mutex_down(&ctxt->loc_sem); + if (ctxt->loc_imp != imp) { + CWARN("changing the import %p - %p\n", ctxt->loc_imp, imp); + if (ctxt->loc_imp) + class_import_put(ctxt->loc_imp); + ctxt->loc_imp = class_import_get(imp); + } + mutex_up(&ctxt->loc_sem); RETURN(0); } EXPORT_SYMBOL(llog_receptor_accept); @@ -149,7 +155,13 @@ int llog_initiator_connect(struct llog_ctxt *ctxt) new_imp = ctxt->loc_obd->u.cli.cl_import; LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp, "%p - %p\n", ctxt->loc_imp, new_imp); - ctxt->loc_imp = new_imp; + mutex_down(&ctxt->loc_sem); + if (ctxt->loc_imp != new_imp) { + if (ctxt->loc_imp) + class_import_put(ctxt->loc_imp); + ctxt->loc_imp = class_import_get(new_imp); + } + mutex_up(&ctxt->loc_sem); RETURN(0); } EXPORT_SYMBOL(llog_initiator_connect); diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 1f21db5..5d002c8 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -167,6 +167,7 @@ static void ctxt_llcd_put(struct llog_ctxt *ctxt) llcd_put(ctxt->loc_llcd); ctxt->loc_llcd = NULL; } + class_import_put(ctxt->loc_imp); ctxt->loc_imp = NULL; mutex_up(&ctxt->loc_sem); } -- 1.8.3.1