From 824c90baa518327065ba74ac138160304c969d81 Mon Sep 17 00:00:00 2001 From: scjody Date: Sat, 28 Apr 2007 02:19:37 +0000 Subject: [PATCH] Branch HEAD Patch from nic@cray.com: add spin locks around import/export bit flag changes. b=11315 i=adilger i=alex --- lustre/ChangeLog | 7 +++++++ lustre/ldlm/ldlm_lib.c | 23 +++++++++++++++++++++-- lustre/mdc/mdc_request.c | 7 +++++++ lustre/mds/handler.c | 4 ++++ lustre/mds/mds_fs.c | 11 +++++++++-- lustre/mgc/mgc_request.c | 4 ++++ lustre/obdclass/genops.c | 10 ++++++++++ lustre/obdclass/obd_config.c | 4 ++++ lustre/obdfilter/filter.c | 21 +++++++++++++++++---- lustre/osc/osc_request.c | 6 +++++- lustre/ptlrpc/import.c | 17 +++++++++++++++-- lustre/ptlrpc/recover.c | 11 +++++++++++ 12 files changed, 114 insertions(+), 11 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 35fef70..55fb231 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -22,6 +22,13 @@ tbd Cluster File Systems, Inc. * Note that reiserfs quotas are temporarily disabled on SLES 10 in this kernel. +Severity : normal +Frequency : rare +Bugzilla : 11315 +Description: OST "spontaneously" evicts client; client has imp_pingable == 0 +Details : Due to a race condition, liblustre clients were occasionally + evicted incorrectly. + Severity : enhancement Bugzilla : 10997 Description: lfs setstripe use optional parameters instead of postional diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 9add69a..29eb851 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -318,7 +318,10 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) CDEBUG(D_HA, "marking %s %s->%s as inactive\n", name, obddev->obd_name, cli->cl_target_uuid.uuid); + + spin_lock(&imp->imp_lock); imp->imp_invalid = 1; + spin_unlock(&imp->imp_lock); } } @@ -445,7 +448,9 @@ int client_disconnect_export(struct obd_export *exp) /* Mark import deactivated now, so we don't try to reconnect if any * of the cleanup RPCs fails (e.g. ldlm cancel, etc). We don't * fully deactivate the import, or that would drop all requests. */ + spin_lock(&imp->imp_lock); imp->imp_deactive = 1; + spin_unlock(&imp->imp_lock); /* Some non-replayable imports (MDS's OSCs) are pinged, so just * delete it regardless. (It's safe to delete an import that was @@ -527,7 +532,10 @@ void target_client_add_cb(struct obd_device *obd, __u64 transno, void *cb_data, CDEBUG(D_HA, "%s: committing for initial connect of %s\n", obd->obd_name, exp->exp_client_uuid.uuid); + + spin_lock(&exp->exp_lock); exp->exp_need_sync = 0; + spin_unlock(&exp->exp_lock); } EXPORT_SYMBOL(target_client_add_cb); @@ -693,7 +701,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) break; } + spin_lock(&export->exp_lock); export->exp_connecting = 1; + spin_unlock(&export->exp_lock); spin_unlock(&target->obd_dev_lock); LASSERT(export->exp_obd == target); @@ -814,14 +824,17 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) GOTO(out, rc = -EALREADY); } export->exp_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg); - spin_unlock(&export->exp_lock); /* request from liblustre? Don't evict it for not pinging. */ if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) { export->exp_libclient = 1; + spin_unlock(&export->exp_lock); + spin_lock(&target->obd_dev_lock); list_del_init(&export->exp_obd_chain_timed); spin_unlock(&target->obd_dev_lock); + } else { + spin_unlock(&export->exp_lock); } if (export->exp_connection != NULL) @@ -856,8 +869,11 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) class_import_put(revimp); out: - if (export) + if (export) { + spin_lock(&export->exp_lock); export->exp_connecting = 0; + spin_unlock(&export->exp_lock); + } if (targref) class_decref(targref); if (rc) @@ -1338,7 +1354,10 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) export */ if (req->rq_export->exp_replay_needed) { --obd->obd_recoverable_clients; + + spin_lock(&req->rq_export->exp_lock); req->rq_export->exp_replay_needed = 0; + spin_unlock(&req->rq_export->exp_lock); } recovery_done = (obd->obd_recoverable_clients == 0); spin_unlock_bh(&obd->obd_processing_task_lock); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 7ebe0ee..47466c3 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -861,7 +861,10 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV)) { if (vallen != sizeof(int)) RETURN(-EINVAL); + spin_lock(&imp->imp_lock); imp->imp_initial_recov = *(int *)val; + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov); RETURN(0); @@ -870,9 +873,13 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { if (vallen != sizeof(int)) RETURN(-EINVAL); + + spin_lock(&imp->imp_lock); imp->imp_initial_recov_bk = *(int *)val; if (imp->imp_initial_recov_bk) imp->imp_initial_recov = 1; + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov_bk); RETURN(0); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index bf9646b..c53b906 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -380,7 +380,11 @@ int mds_init_export(struct obd_export *exp) INIT_LIST_HEAD(&med->med_open_head); spin_lock_init(&med->med_open_lock); + + spin_lock(&exp->exp_lock); exp->exp_connecting = 1; + spin_unlock(&exp->exp_lock); + RETURN(0); } diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 3f65d73..3c386d4 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -142,8 +142,11 @@ int mds_client_add(struct obd_device *obd, struct obd_export *exp, } else { rc = fsfilt_add_journal_cb(obd, 0, handle, target_client_add_cb, exp); - if (rc == 0) + if (rc == 0) { + spin_lock(&exp->exp_lock); exp->exp_need_sync = 1; + spin_unlock(&exp->exp_lock); + } rc = fsfilt_write_record(obd, file, med->med_mcd, sizeof(*med->med_mcd), &off, rc /* sync if no cb */); @@ -209,7 +212,7 @@ int mds_client_free(struct obd_export *exp) push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd, sizeof(zero_mcd), &off, - !exp->exp_libclient); + (!exp->exp_libclient || exp->exp_need_sync)); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); CDEBUG(rc == 0 ? D_INFO : D_ERROR, @@ -406,8 +409,12 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) mcd = NULL; + + spin_lock(&exp->exp_lock); exp->exp_replay_needed = 1; exp->exp_connecting = 0; + spin_unlock(&exp->exp_lock); + obd->obd_recoverable_clients++; obd->obd_max_recoverable_clients++; class_export_put(exp); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 36a578b..4b8fd76 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -744,7 +744,9 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV)) { if (vallen != sizeof(int)) RETURN(-EINVAL); + spin_lock(&imp->imp_lock); imp->imp_initial_recov = *(int *)val; + spin_unlock(&imp->imp_lock); CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov); RETURN(0); @@ -755,10 +757,12 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, if (vallen != sizeof(int)) RETURN(-EINVAL); value = *(int *)val; + spin_lock(&imp->imp_lock); imp->imp_initial_recov_bk = value > 0; /* Even after the initial connection, give up all comms if nobody answers the first time. */ imp->imp_recon_bk = 1; + spin_unlock(&imp->imp_lock); CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n", imp->imp_obd->obd_name, value, imp->imp_initial_recov, imp->imp_deactive, imp->imp_invalid, diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index cea757e..34b137c 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -823,7 +823,10 @@ void class_destroy_import(struct obd_import *import) class_handle_unhash(&import->imp_handle); + spin_lock(&import->imp_lock); import->imp_generation++; + spin_unlock(&import->imp_lock); + class_import_put(import); } EXPORT_SYMBOL(class_destroy_import); @@ -900,7 +903,10 @@ static void class_disconnect_export_list(struct list_head *list, int flags) while (!list_empty(list)) { exp = list_entry(list->next, struct obd_export, exp_obd_chain); class_export_get(exp); + + spin_lock(&exp->exp_lock); exp->exp_flags = flags; + spin_unlock(&exp->exp_lock); if (obd_uuid_equals(&exp->exp_client_uuid, &exp->exp_obd->obd_uuid)) { @@ -920,7 +926,11 @@ static void class_disconnect_export_list(struct list_head *list, int flags) class_export_put(exp); continue; } + + spin_lock(&fake_exp->exp_lock); fake_exp->exp_flags = flags; + spin_unlock(&fake_exp->exp_lock); + rc = obd_disconnect(fake_exp); class_export_put(exp); if (rc) { diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 86bf791..ca283d1 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -475,9 +475,13 @@ void class_decref(struct obd_device *obd) if (err) CERROR("Precleanup %s returned %d\n", obd->obd_name, err); + + spin_lock(&obd->obd_self_export->exp_lock); obd->obd_self_export->exp_flags |= (obd->obd_fail ? OBD_OPT_FAILOVER : 0) | (obd->obd_force ? OBD_OPT_FORCE : 0); + spin_unlock(&obd->obd_self_export->exp_lock); + /* note that we'll recurse into class_decref again */ class_unlink_export(obd->obd_self_export); return; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 779a215..cbbafbc 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -265,8 +265,11 @@ static int filter_client_add(struct obd_device *obd, struct obd_export *exp, } else { rc = fsfilt_add_journal_cb(obd, 0, handle, target_client_add_cb, exp); - if (rc == 0) + if (rc == 0) { + spin_lock(&exp->exp_lock); exp->exp_need_sync = 1; + spin_unlock(&exp->exp_lock); + } rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, fed->fed_fcd, sizeof(*fed->fed_fcd), @@ -332,7 +335,7 @@ static int filter_client_free(struct obd_export *exp) push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd, sizeof(zero_fcd), &off, - !exp->exp_libclient); + (!exp->exp_libclient || exp->exp_need_sync)); if (rc == 0) /* update server's transno */ @@ -536,7 +539,10 @@ static int filter_init_export(struct obd_export *exp) { spin_lock_init(&exp->exp_filter_data.fed_lock); INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list); + + spin_lock(&exp->exp_lock); exp->exp_connecting = 1; + spin_unlock(&exp->exp_lock); return 0; } @@ -757,8 +763,12 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */ fcd = NULL; + + spin_lock(&exp->exp_lock); exp->exp_replay_needed = 1; exp->exp_connecting = 0; + spin_unlock(&exp->exp_lock); + obd->obd_recoverable_clients++; obd->obd_max_recoverable_clients++; class_export_put(exp); @@ -2403,12 +2413,15 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, /* set cancel cookie callback function */ if (fsfilt_add_journal_cb(exp->exp_obd, 0, handle, filter_cancel_cookies_cb, - fcc)) + fcc)) { + spin_lock(&exp->exp_lock); exp->exp_need_sync = 1; - else + spin_unlock(&exp->exp_lock); + } else { fcc = NULL; } } + } if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS)) fsfilt_extend(exp->exp_obd, inode, 0, handle); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index d9db97d..9a492fe 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3212,9 +3212,11 @@ static int osc_setinfo_mds_conn_interpret(struct ptlrpc_request *req, "ctxt %p: %d\n", ctxt, rc); } + spin_lock(&imp->imp_lock); imp->imp_server_timeout = 1; - CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd)); imp->imp_pingable = 1; + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd)); RETURN(rc); } @@ -3254,7 +3256,9 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV)) { if (vallen != sizeof(int)) RETURN(-EINVAL); + spin_lock(&imp->imp_lock); imp->imp_initial_recov = *(int *)val; + spin_unlock(&imp->imp_lock); CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 2d96ad7..fe59707 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -405,7 +405,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) if (imp->imp_recon_bk) { CDEBUG(D_HA, "Last reconnection attempt (%d) for %s\n", imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd)); + spin_lock(&imp->imp_lock); imp->imp_last_recon = 1; + spin_unlock(&imp->imp_lock); } } @@ -442,7 +444,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) aa->pcaa_initial_connect = initial_connect; if (aa->pcaa_initial_connect) { + spin_lock(&imp->imp_lock); imp->imp_replayable = 1; + spin_unlock(&imp->imp_lock); /* On an initial connect, we don't know which one of a failover server pair is up. Don't wait long. */ #ifdef CRAY_XT3 @@ -515,7 +519,6 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, spin_unlock(&imp->imp_lock); RETURN(0); } - spin_unlock(&imp->imp_lock); if (rc) GOTO(out, rc); @@ -529,11 +532,13 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, if (aa->pcaa_initial_connect) { if (msg_flags & MSG_CONNECT_REPLAYABLE) { + imp->imp_replayable = 1; + spin_unlock(&imp->imp_lock); CDEBUG(D_HA, "connected to replayable target: %s\n", obd2cli_tgt(imp->imp_obd)); - imp->imp_replayable = 1; } else { imp->imp_replayable = 0; + spin_unlock(&imp->imp_lock); } if (msg_flags & MSG_CONNECT_NEXT_VER) { @@ -550,6 +555,8 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); GOTO(finish, rc = 0); + } else { + spin_unlock(&imp->imp_lock); } /* Determine what recovery state to move the import to. */ @@ -595,7 +602,11 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, CDEBUG(D_HA, "%s: reconnected to %s during replay\n", imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd)); + + spin_lock(&imp->imp_lock); imp->imp_resend_replay = 1; + spin_unlock(&imp->imp_lock); + IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); } else { IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); @@ -770,7 +781,9 @@ finish: (char *)imp->imp_connection->c_remote_uuid.uuid, rc); } + spin_lock(&imp->imp_lock); imp->imp_last_recon = 0; + spin_unlock(&imp->imp_lock); cfs_waitq_signal(&imp->imp_recovery_waitq); RETURN(rc); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 2d57808..94c0d78 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -117,7 +117,9 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) req = NULL; } + spin_lock(&imp->imp_lock); imp->imp_resend_replay = 0; + spin_unlock(&imp->imp_lock); if (req != NULL) { rc = ptlrpc_replay_req(req); @@ -231,12 +233,18 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) LCONSOLE_WARN("setting import %s INACTIVE by administrator " "request\n", obd2cli_tgt(imp->imp_obd)); ptlrpc_invalidate_import(imp); + + spin_lock(&imp->imp_lock); imp->imp_deactive = 1; + spin_unlock(&imp->imp_lock); } /* When activating, mark import valid, and attempt recovery */ if (active) { + spin_lock(&imp->imp_lock); imp->imp_deactive = 0; + spin_unlock(&imp->imp_lock); + CDEBUG(D_HA, "setting import %s VALID\n", obd2cli_tgt(imp->imp_obd)); rc = ptlrpc_recover_import(imp, NULL); @@ -254,7 +262,10 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) /* force import to be disconnected. */ ptlrpc_set_import_discon(imp, 0); + spin_lock(&imp->imp_lock); imp->imp_deactive = 0; + spin_unlock(&imp->imp_lock); + rc = ptlrpc_recover_import_no_retry(imp, new_uuid); RETURN(rc); -- 1.8.3.1