* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2015, Intel Corporation.
+ * Copyright (c) 2010, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <lprocfs_status.h>
#include <lustre_ioctl.h>
#include <lustre_param.h>
+#include <lustre/lustre_barrier_user.h>
#include "mgs_internal.h"
#endif
enum ast_type {
- AST_CONFIG = 1,
- AST_PARAMS = 2,
- AST_IR = 3
+ AST_CONFIG = 1,
+ AST_PARAMS = 2,
+ AST_IR = 3,
+ AST_BARRIER = 4,
};
static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags,
case AST_IR:
mgs_ir_notify_complete(fsdb);
break;
+ case AST_BARRIER:
+ break;
default:
LBUG();
}
return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR);
}
+static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags,
+ void *cbdata)
+{
+ return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER);
+}
+
void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type)
{
ldlm_completion_callback cp = NULL;
- struct lustre_handle lockh = { 0 };
+ struct lustre_handle lockh = {
+ .cookie = 0,
+ };
struct ldlm_res_id res_id;
__u64 flags = LDLM_FL_ATOMIC_CB;
int rc;
break;
case CONFIG_T_RECOVER:
cp = mgs_completion_ast_ir;
+ break;
+ case CONFIG_T_BARRIER:
+ cp = mgs_completion_ast_barrier;
+ break;
default:
break;
}
/* Called whenever a target starts up. Flags indicate first connect, etc. */
static int mgs_target_reg(struct tgt_session_info *tsi)
{
- struct obd_device *obd = tsi->tsi_exp->exp_obd;
- struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp);
- struct mgs_target_info *mti, *rep_mti;
- struct fs_db *fsdb;
- int opc;
- int rc = 0;
+ struct obd_device *obd = tsi->tsi_exp->exp_obd;
+ struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp);
+ struct mgs_target_info *mti, *rep_mti;
+ struct fs_db *b_fsdb = NULL; /* barrier fsdb */
+ struct fs_db *c_fsdb = NULL; /* config fsdb */
+ char barrier_name[20];
+ int opc;
+ int rc = 0;
ENTRY;
RETURN(err_serious(-EFAULT));
}
+ down_read(&mgs->mgs_barrier_rwsem);
+
if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data,
IMP_RECOV))
opc = mti->mti_flags & LDD_F_OPC_MASK;
else
opc = LDD_F_OPC_REG;
- if (opc == LDD_F_OPC_READY) {
- CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
- mti->mti_fsname, mti->mti_stripe_index);
+ if (opc == LDD_F_OPC_READY) {
+ CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
rc = mgs_ir_update(tsi->tsi_env, mgs, mti);
- if (rc) {
- LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
- CERROR("Update IR return with %d(ignore and IR "
- "disabled)\n", rc);
- }
- GOTO(out_nolock, rc);
- }
+ if (rc) {
+ LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+ CERROR("%s: Update IR return failure: rc = %d\n",
+ mti->mti_fsname, rc);
+ }
- /* Do not support unregistering right now. */
- if (opc != LDD_F_OPC_REG)
- GOTO(out_nolock, rc = -EINVAL);
+ GOTO(out_norevoke, rc);
+ }
- CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
- mti->mti_fsname, mti->mti_stripe_index);
+ /* Do not support unregistering right now. */
+ if (opc != LDD_F_OPC_REG)
+ GOTO(out_norevoke, rc = -EINVAL);
- if (mti->mti_flags & LDD_F_NEED_INDEX)
- mti->mti_flags |= LDD_F_WRITECONF;
+ snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
+ mti->mti_fsname, BARRIER_FILENAME);
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb);
+ if (rc) {
+ CERROR("%s: Can't get db for %s: rc = %d\n",
+ mti->mti_fsname, barrier_name, rc);
- if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
- LDD_F_UPDATE))) {
- /* We're just here as a startup ping. */
- CDEBUG(D_MGS, "Server %s is running on %s\n",
+ GOTO(out_norevoke, rc);
+ }
+
+ CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
+
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
+ if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 ||
+ b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 ||
+ b_fsdb->fsdb_barrier_status == BS_FROZEN) {
+ LCONSOLE_WARN("%s: the system is in barrier, refuse "
+ "the connection from MDT %s temporary\n",
+ obd->obd_name, mti->mti_svname);
+
+ GOTO(out_norevoke, rc = -EBUSY);
+ }
+
+ if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) &&
+ !b_fsdb->fsdb_barrier_disabled) {
+ LCONSOLE_WARN("%s: the MDT %s does not support write "
+ "barrier, so disable barrier on the "
+ "whole system.\n",
+ obd->obd_name, mti->mti_svname);
+
+ b_fsdb->fsdb_barrier_disabled = 1;
+ }
+ }
+
+ if (mti->mti_flags & LDD_F_NEED_INDEX)
+ mti->mti_flags |= LDD_F_WRITECONF;
+
+ if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
+ LDD_F_UPDATE))) {
+ /* We're just here as a startup ping. */
+ CDEBUG(D_MGS, "Server %s is running on %s\n",
mti->mti_svname, obd_export_nid2str(tsi->tsi_exp));
rc = mgs_check_target(tsi->tsi_env, mgs, mti);
- /* above will set appropriate mti flags */
- if (rc <= 0)
- /* Nothing wrong, or fatal error */
- GOTO(out_nolock, rc);
+ /* above will set appropriate mti flags */
+ if (rc <= 0)
+ /* Nothing wrong, or fatal error */
+ GOTO(out_norevoke, rc);
} else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) {
rc = mgs_check_failover_reg(mti);
if (rc)
- GOTO(out_nolock, rc);
- }
+ GOTO(out_norevoke, rc);
+ }
- OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
- if (mti->mti_flags & LDD_F_WRITECONF) {
- if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
- mti->mti_stripe_index == 0) {
+ if (mti->mti_flags & LDD_F_WRITECONF) {
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
+ mti->mti_stripe_index == 0) {
+ mgs_put_fsdb(mgs, b_fsdb);
+ b_fsdb = NULL;
rc = mgs_erase_logs(tsi->tsi_env, mgs,
mti->mti_fsname);
- LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
- "request. All servers must be restarted "
- "in order to regenerate the logs."
- "\n", obd->obd_name, mti->mti_fsname);
- } else if (mti->mti_flags &
- (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
+ LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
+ "request. All servers must be restarted "
+ "in order to regenerate the logs: rc = %d"
+ "\n", obd->obd_name, mti->mti_fsname, rc);
+ if (rc && rc != -ENOENT)
+ GOTO(out_norevoke, rc);
+
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+ barrier_name, &b_fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s: %d\n",
+ barrier_name, rc);
+
+ GOTO(out_norevoke, rc);
+ }
+
+ if (!(exp_connect_flags(tsi->tsi_exp) &
+ OBD_CONNECT_BARRIER)) {
+ LCONSOLE_WARN("%s: the MDT %s does not support "
+ "write barrier, disable barrier "
+ "on the whole system.\n",
+ obd->obd_name, mti->mti_svname);
+
+ b_fsdb->fsdb_barrier_disabled = 1;
+ }
+ } else if (mti->mti_flags &
+ (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname);
- LCONSOLE_WARN("%s: Regenerating %s log by user "
- "request.\n",
- obd->obd_name, mti->mti_svname);
- }
- mti->mti_flags |= LDD_F_UPDATE;
- /* Erased logs means start from scratch. */
- mti->mti_flags &= ~LDD_F_UPGRADE14;
- if (rc)
- GOTO(out_nolock, rc);
- }
+ LCONSOLE_WARN("%s: Regenerating %s log by user "
+ "request: rc = %d\n",
+ obd->obd_name, mti->mti_svname, rc);
+ if (rc)
+ GOTO(out_norevoke, rc);
+ }
- rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &fsdb);
- if (rc) {
- CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
- GOTO(out_nolock, rc);
- }
+ mti->mti_flags |= LDD_F_UPDATE;
+ /* Erased logs means start from scratch. */
+ mti->mti_flags &= ~LDD_F_UPGRADE14;
+ }
+
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
+
+ GOTO(out_norevoke, rc);
+ }
/*
* Log writing contention is handled by the fsdb_mutex.
/* create or update the target log
and update the client/mdt logs */
- rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, fsdb);
+ rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb);
if (rc) {
CERROR("Failed to write %s log (%d)\n",
mti->mti_svname, rc);
}
out:
- mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
+ mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG);
+
+out_norevoke:
+ if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) {
+ if (!c_fsdb) {
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+ mti->mti_fsname, &c_fsdb);
+ if (rc)
+ CERROR("Fail to get db for %s: %d\n",
+ mti->mti_fsname, rc);
+ }
+
+ if (c_fsdb) {
+ memcpy(b_fsdb->fsdb_mdt_index_map,
+ c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+ b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count;
+ }
+ }
+
+ up_read(&mgs->mgs_barrier_rwsem);
-out_nolock:
CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
mti->mti_stripe_index, rc);
/* An error flag is set in the mti reply rather than an error code */
/* Flush logs to disk */
dt_sync(tsi->tsi_env, mgs->mgs_bottom);
+ if (b_fsdb)
+ mgs_put_fsdb(mgs, b_fsdb);
+ if (c_fsdb)
+ mgs_put_fsdb(mgs, c_fsdb);
RETURN(rc);
}
case LCFG_NODEMAP_DENY_UNKNOWN:
case LCFG_NODEMAP_SQUASH_UID:
case LCFG_NODEMAP_SQUASH_GID:
+ case LCFG_NODEMAP_MAP_MODE:
if (lcfg->lcfg_bufcount != 4)
GOTO(out_lcfg, rc = -EINVAL);
nodemap_name = lustre_cfg_string(lcfg, 1);
/* revoke nodemap lock */
rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb);
- if (rc < 0)
+ if (rc < 0) {
CWARN("%s: cannot make nodemap fsdb: rc = %d\n",
mgs->mgs_obd->obd_name, rc);
- else
+ } else {
mgs_revoke_lock(mgs, fsdb, CONFIG_T_NODEMAP);
+ mgs_put_fsdb(mgs, fsdb);
+ }
out_lcfg:
OBD_FREE(lcfg, data->ioc_plen1);
rc = mgs_iocontrol_pool(&env, mgs, data);
break;
+ case OBD_IOC_BARRIER:
+ rc = mgs_iocontrol_barrier(&env, mgs, data);
+ break;
+
case OBD_IOC_NODEMAP:
rc = mgs_iocontrol_nodemap(&env, mgs, data);
break;
+ case OBD_IOC_LCFG_FORK:
+ rc = mgs_lcfg_fork(&env, mgs, data->ioc_inlbuf1,
+ data->ioc_inlbuf2);
+ break;
+
+ case OBD_IOC_LCFG_ERASE:
+ rc = mgs_lcfg_erase(&env, mgs, data->ioc_inlbuf1);
+ break;
+
case OBD_IOC_CATLOGLIST:
rc = mgs_list_logs(&env, mgs, data);
break;
struct obd_device *obd;
struct lustre_mount_info *lmi;
struct llog_ctxt *ctxt;
- struct fs_db *fsdb = NULL;
- struct fs_db *fsdb_srpc = NULL;
int rc;
ENTRY;
mgs->mgs_start_time = cfs_time_current_sec();
spin_lock_init(&mgs->mgs_lock);
mutex_init(&mgs->mgs_health_mutex);
+ init_rwsem(&mgs->mgs_barrier_rwsem);
+
+ rc = mgs_lcfg_rename(env, mgs);
+ if (rc)
+ GOTO(err_llog, rc);
rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3));
if (rc != 0) {
/* Setup params fsdb and log, so that other servers can make a local
* copy successfully when they are mounted. See LU-4783 */
- rc = mgs_params_fsdb_setup(env, mgs, fsdb);
+ rc = mgs_params_fsdb_setup(env, mgs);
if (rc)
/* params fsdb and log can be setup later */
CERROR("%s: %s fsdb and log setup failed: rc = %d\n",
obd->obd_name, PARAMS_FILENAME, rc);
/* Setup _mgs fsdb, useful for srpc */
- mgs__mgs_fsdb_setup(env, mgs, fsdb_srpc);
+ mgs__mgs_fsdb_setup(env, mgs);
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"mgs_ldlm_client", &obd->obd_ldlm_client);
else
rc = -ENOMEM;
- return 0;
+ return rc;
}
static void mgs_object_free(const struct lu_env *env, struct lu_object *o)
struct obd_connect_data *data, void *localdata)
{
struct obd_export *lexp;
- struct lustre_handle conn = { 0 };
+ struct lustre_handle conn = {
+ .cookie = 0,
+ };
int rc;
ENTRY;