Patch from nic@cray.com: add spin locks around import/export bit flag changes.
b=11315
i=adilger
i=alex
* Note that reiserfs quotas are temporarily disabled on SLES 10 in this
kernel.
+Severity : normal
+Frequency : rare
+Bugzilla : 11315
+Description: OST "spontaneously" evicts client; client has imp_pingable == 0
+Details : Due to a race condition, liblustre clients were occasionally
+ evicted incorrectly.
+
Severity : enhancement
Bugzilla : 10997
Description: lfs setstripe use optional parameters instead of postional
CDEBUG(D_HA, "marking %s %s->%s as inactive\n",
name, obddev->obd_name,
cli->cl_target_uuid.uuid);
+
+ spin_lock(&imp->imp_lock);
imp->imp_invalid = 1;
+ spin_unlock(&imp->imp_lock);
}
}
/* Mark import deactivated now, so we don't try to reconnect if any
* of the cleanup RPCs fails (e.g. ldlm cancel, etc). We don't
* fully deactivate the import, or that would drop all requests. */
+ spin_lock(&imp->imp_lock);
imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
/* Some non-replayable imports (MDS's OSCs) are pinged, so just
* delete it regardless. (It's safe to delete an import that was
CDEBUG(D_HA, "%s: committing for initial connect of %s\n",
obd->obd_name, exp->exp_client_uuid.uuid);
+
+ spin_lock(&exp->exp_lock);
exp->exp_need_sync = 0;
+ spin_unlock(&exp->exp_lock);
}
EXPORT_SYMBOL(target_client_add_cb);
break;
}
+ spin_lock(&export->exp_lock);
export->exp_connecting = 1;
+ spin_unlock(&export->exp_lock);
spin_unlock(&target->obd_dev_lock);
LASSERT(export->exp_obd == target);
GOTO(out, rc = -EALREADY);
}
export->exp_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
- spin_unlock(&export->exp_lock);
/* request from liblustre? Don't evict it for not pinging. */
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
export->exp_libclient = 1;
+ spin_unlock(&export->exp_lock);
+
spin_lock(&target->obd_dev_lock);
list_del_init(&export->exp_obd_chain_timed);
spin_unlock(&target->obd_dev_lock);
+ } else {
+ spin_unlock(&export->exp_lock);
}
if (export->exp_connection != NULL)
class_import_put(revimp);
out:
- if (export)
+ if (export) {
+ spin_lock(&export->exp_lock);
export->exp_connecting = 0;
+ spin_unlock(&export->exp_lock);
+ }
if (targref)
class_decref(targref);
if (rc)
export */
if (req->rq_export->exp_replay_needed) {
--obd->obd_recoverable_clients;
+
+ spin_lock(&req->rq_export->exp_lock);
req->rq_export->exp_replay_needed = 0;
+ spin_unlock(&req->rq_export->exp_lock);
}
recovery_done = (obd->obd_recoverable_clients == 0);
spin_unlock_bh(&obd->obd_processing_task_lock);
if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
+ spin_lock(&imp->imp_lock);
imp->imp_initial_recov = *(int *)val;
+ spin_unlock(&imp->imp_lock);
+
CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
exp->exp_obd->obd_name, imp->imp_initial_recov);
RETURN(0);
if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
+
+ spin_lock(&imp->imp_lock);
imp->imp_initial_recov_bk = *(int *)val;
if (imp->imp_initial_recov_bk)
imp->imp_initial_recov = 1;
+ spin_unlock(&imp->imp_lock);
+
CDEBUG(D_HA, "%s: set imp_initial_recov_bk = %d\n",
exp->exp_obd->obd_name, imp->imp_initial_recov_bk);
RETURN(0);
INIT_LIST_HEAD(&med->med_open_head);
spin_lock_init(&med->med_open_lock);
+
+ spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
+ spin_unlock(&exp->exp_lock);
+
RETURN(0);
}
} else {
rc = fsfilt_add_journal_cb(obd, 0, handle,
target_client_add_cb, exp);
- if (rc == 0)
+ if (rc == 0) {
+ spin_lock(&exp->exp_lock);
exp->exp_need_sync = 1;
+ spin_unlock(&exp->exp_lock);
+ }
rc = fsfilt_write_record(obd, file, med->med_mcd,
sizeof(*med->med_mcd),
&off, rc /* sync if no cb */);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd,
sizeof(zero_mcd), &off,
- !exp->exp_libclient);
+ (!exp->exp_libclient || exp->exp_need_sync));
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
mcd = NULL;
+
+ spin_lock(&exp->exp_lock);
exp->exp_replay_needed = 1;
exp->exp_connecting = 0;
+ spin_unlock(&exp->exp_lock);
+
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
class_export_put(exp);
if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
+ spin_lock(&imp->imp_lock);
imp->imp_initial_recov = *(int *)val;
+ spin_unlock(&imp->imp_lock);
CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
exp->exp_obd->obd_name, imp->imp_initial_recov);
RETURN(0);
if (vallen != sizeof(int))
RETURN(-EINVAL);
value = *(int *)val;
+ spin_lock(&imp->imp_lock);
imp->imp_initial_recov_bk = value > 0;
/* Even after the initial connection, give up all comms if
nobody answers the first time. */
imp->imp_recon_bk = 1;
+ spin_unlock(&imp->imp_lock);
CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n",
imp->imp_obd->obd_name, value, imp->imp_initial_recov,
imp->imp_deactive, imp->imp_invalid,
class_handle_unhash(&import->imp_handle);
+ spin_lock(&import->imp_lock);
import->imp_generation++;
+ spin_unlock(&import->imp_lock);
+
class_import_put(import);
}
EXPORT_SYMBOL(class_destroy_import);
while (!list_empty(list)) {
exp = list_entry(list->next, struct obd_export, exp_obd_chain);
class_export_get(exp);
+
+ spin_lock(&exp->exp_lock);
exp->exp_flags = flags;
+ spin_unlock(&exp->exp_lock);
if (obd_uuid_equals(&exp->exp_client_uuid,
&exp->exp_obd->obd_uuid)) {
class_export_put(exp);
continue;
}
+
+ spin_lock(&fake_exp->exp_lock);
fake_exp->exp_flags = flags;
+ spin_unlock(&fake_exp->exp_lock);
+
rc = obd_disconnect(fake_exp);
class_export_put(exp);
if (rc) {
if (err)
CERROR("Precleanup %s returned %d\n",
obd->obd_name, err);
+
+ spin_lock(&obd->obd_self_export->exp_lock);
obd->obd_self_export->exp_flags |=
(obd->obd_fail ? OBD_OPT_FAILOVER : 0) |
(obd->obd_force ? OBD_OPT_FORCE : 0);
+ spin_unlock(&obd->obd_self_export->exp_lock);
+
/* note that we'll recurse into class_decref again */
class_unlink_export(obd->obd_self_export);
return;
} else {
rc = fsfilt_add_journal_cb(obd, 0, handle,
target_client_add_cb, exp);
- if (rc == 0)
+ if (rc == 0) {
+ spin_lock(&exp->exp_lock);
exp->exp_need_sync = 1;
+ spin_unlock(&exp->exp_lock);
+ }
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp,
fed->fed_fcd,
sizeof(*fed->fed_fcd),
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd,
sizeof(zero_fcd), &off,
- !exp->exp_libclient);
+ (!exp->exp_libclient || exp->exp_need_sync));
if (rc == 0)
/* update server's transno */
{
spin_lock_init(&exp->exp_filter_data.fed_lock);
INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list);
+
+ spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
+ spin_unlock(&exp->exp_lock);
return 0;
}
LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
fcd = NULL;
+
+ spin_lock(&exp->exp_lock);
exp->exp_replay_needed = 1;
exp->exp_connecting = 0;
+ spin_unlock(&exp->exp_lock);
+
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
class_export_put(exp);
/* set cancel cookie callback function */
if (fsfilt_add_journal_cb(exp->exp_obd, 0, handle,
filter_cancel_cookies_cb,
- fcc))
+ fcc)) {
+ spin_lock(&exp->exp_lock);
exp->exp_need_sync = 1;
- else
+ spin_unlock(&exp->exp_lock);
+ } else {
fcc = NULL;
}
}
+ }
if (OBD_FAIL_CHECK(OBD_FAIL_OST_SETATTR_CREDITS))
fsfilt_extend(exp->exp_obd, inode, 0, handle);
"ctxt %p: %d\n", ctxt, rc);
}
+ spin_lock(&imp->imp_lock);
imp->imp_server_timeout = 1;
- CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd));
imp->imp_pingable = 1;
+ spin_unlock(&imp->imp_lock);
+ CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd));
RETURN(rc);
}
if (KEY_IS(KEY_INIT_RECOV)) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
+ spin_lock(&imp->imp_lock);
imp->imp_initial_recov = *(int *)val;
+ spin_unlock(&imp->imp_lock);
CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n",
exp->exp_obd->obd_name,
imp->imp_initial_recov);
if (imp->imp_recon_bk) {
CDEBUG(D_HA, "Last reconnection attempt (%d) for %s\n",
imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd));
+ spin_lock(&imp->imp_lock);
imp->imp_last_recon = 1;
+ spin_unlock(&imp->imp_lock);
}
}
aa->pcaa_initial_connect = initial_connect;
if (aa->pcaa_initial_connect) {
+ spin_lock(&imp->imp_lock);
imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
/* On an initial connect, we don't know which one of a
failover server pair is up. Don't wait long. */
#ifdef CRAY_XT3
spin_unlock(&imp->imp_lock);
RETURN(0);
}
- spin_unlock(&imp->imp_lock);
if (rc)
GOTO(out, rc);
if (aa->pcaa_initial_connect) {
if (msg_flags & MSG_CONNECT_REPLAYABLE) {
+ imp->imp_replayable = 1;
+ spin_unlock(&imp->imp_lock);
CDEBUG(D_HA, "connected to replayable target: %s\n",
obd2cli_tgt(imp->imp_obd));
- imp->imp_replayable = 1;
} else {
imp->imp_replayable = 0;
+ spin_unlock(&imp->imp_lock);
}
if (msg_flags & MSG_CONNECT_NEXT_VER) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
GOTO(finish, rc = 0);
+ } else {
+ spin_unlock(&imp->imp_lock);
}
/* Determine what recovery state to move the import to. */
CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
imp->imp_obd->obd_name,
obd2cli_tgt(imp->imp_obd));
+
+ spin_lock(&imp->imp_lock);
imp->imp_resend_replay = 1;
+ spin_unlock(&imp->imp_lock);
+
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
(char *)imp->imp_connection->c_remote_uuid.uuid, rc);
}
+ spin_lock(&imp->imp_lock);
imp->imp_last_recon = 0;
+ spin_unlock(&imp->imp_lock);
cfs_waitq_signal(&imp->imp_recovery_waitq);
RETURN(rc);
req = NULL;
}
+ spin_lock(&imp->imp_lock);
imp->imp_resend_replay = 0;
+ spin_unlock(&imp->imp_lock);
if (req != NULL) {
rc = ptlrpc_replay_req(req);
LCONSOLE_WARN("setting import %s INACTIVE by administrator "
"request\n", obd2cli_tgt(imp->imp_obd));
ptlrpc_invalidate_import(imp);
+
+ spin_lock(&imp->imp_lock);
imp->imp_deactive = 1;
+ spin_unlock(&imp->imp_lock);
}
/* When activating, mark import valid, and attempt recovery */
if (active) {
+ spin_lock(&imp->imp_lock);
imp->imp_deactive = 0;
+ spin_unlock(&imp->imp_lock);
+
CDEBUG(D_HA, "setting import %s VALID\n",
obd2cli_tgt(imp->imp_obd));
rc = ptlrpc_recover_import(imp, NULL);
/* force import to be disconnected. */
ptlrpc_set_import_discon(imp, 0);
+ spin_lock(&imp->imp_lock);
imp->imp_deactive = 0;
+ spin_unlock(&imp->imp_lock);
+
rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
RETURN(rc);