*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2013, Intel Corporation.
+ * Copyright (c) 2010, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <obd_class.h>
#include <lprocfs_status.h>
-#include <lustre_param.h>
+#include <uapi/linux/lustre/lustre_ioctl.h>
+#include <uapi/linux/lustre/lustre_param.h>
+#include <uapi/linux/lustre/lustre_barrier_user.h>
#include "mgs_internal.h"
ENTRY;
+ CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, cfs_fail_val);
rc = tgt_connect(tsi);
if (rc)
RETURN(rc);
RETURN(0);
}
+static inline bool str_starts_with(const char *str, const char *prefix)
+{
+ return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
static int mgs_set_info(struct tgt_session_info *tsi)
{
struct mgs_thread_info *mgi;
struct mgs_send_param *msp, *rep_msp;
struct lustre_cfg *lcfg;
+ size_t param_len;
+ char *s;
int rc;
ENTRY;
if (msp == NULL)
RETURN(err_serious(-EFAULT));
- /* Construct lustre_cfg structure to pass to function mgs_setparam */
+ param_len = strnlen(msp->mgs_param, sizeof(msp->mgs_param));
+ if (param_len == 0 || param_len == sizeof(msp->mgs_param))
+ RETURN(-EINVAL);
+
+ /* We only allow '*.lov.stripe{size,count,offset}=*' from an RPC. */
+ s = strchr(msp->mgs_param, '.');
+ if (s == NULL)
+ RETURN(-EINVAL);
+
+ if (!str_starts_with(s + 1, "lov.stripesize=") &&
+ !str_starts_with(s + 1, "lov.stripecount=") &&
+ !str_starts_with(s + 1, "lov.stripeoffset="))
+ RETURN(-EINVAL);
+
+ /* Construct lustre_cfg structure to pass to function mgs_set_param */
lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, msp->mgs_param);
- lcfg = lustre_cfg_new(LCFG_PARAM, &mgi->mgi_bufs);
+ OBD_ALLOC(lcfg, lustre_cfg_len(mgi->mgi_bufs.lcfg_bufcount,
+ mgi->mgi_bufs.lcfg_buflen));
+ if (!lcfg)
+ RETURN(-ENOMEM);
+ lustre_cfg_init(lcfg, LCFG_PARAM, &mgi->mgi_bufs);
- rc = mgs_setparam(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg,
- mgi->mgi_fsname);
+ rc = mgs_set_param(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg);
if (rc) {
- LCONSOLE_WARN("%s: Unable to set parameter %s for %s: %d\n",
- tgt_name(tsi->tsi_tgt), msp->mgs_param,
- mgi->mgi_fsname, rc);
+ LCONSOLE_WARN("%s: Unable to set parameter %s: %d\n",
+ tgt_name(tsi->tsi_tgt), msp->mgs_param, rc);
GOTO(out_cfg, rc);
}
*rep_msp = *msp;
EXIT;
out_cfg:
- lustre_cfg_free(lcfg);
+ OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens));
return rc;
}
+#endif
enum ast_type {
- AST_CONFIG = 1,
- AST_PARAMS = 2,
- AST_IR = 3
+ AST_CONFIG = 1,
+ AST_PARAMS = 2,
+ AST_IR = 3,
+ AST_BARRIER = 4,
};
static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags,
{
ENTRY;
- if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_BLOCK_CONV))) {
+ if (!(flags & LDLM_FL_BLOCKED_MASK)) {
struct fs_db *fsdb;
/* l_ast_data is used as a marker to avoid cancel ldlm lock
case AST_IR:
mgs_ir_notify_complete(fsdb);
break;
+ case AST_BARRIER:
+ break;
default:
LBUG();
}
return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR);
}
+static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags,
+ void *cbdata)
+{
+ return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER);
+}
+
void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type)
{
ldlm_completion_callback cp = NULL;
- struct lustre_handle lockh = { 0 };
+ struct lustre_handle lockh = {
+ .cookie = 0,
+ };
struct ldlm_res_id res_id;
__u64 flags = LDLM_FL_ATOMIC_CB;
int rc;
LASSERT(rc == 0);
switch (type) {
case CONFIG_T_CONFIG:
+ case CONFIG_T_NODEMAP:
cp = mgs_completion_ast_config;
if (test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags))
rc = -EALREADY;
break;
case CONFIG_T_RECOVER:
cp = mgs_completion_ast_ir;
+ break;
+ case CONFIG_T_BARRIER:
+ cp = mgs_completion_ast_barrier;
+ break;
default:
break;
}
if (!rc) {
LASSERT(cp != NULL);
- rc = ldlm_cli_enqueue_local(mgs->mgs_obd->obd_namespace,
+ rc = ldlm_cli_enqueue_local(NULL, mgs->mgs_obd->obd_namespace,
&res_id, LDLM_PLAIN, NULL, LCK_EX,
&flags, ldlm_blocking_ast, cp,
NULL, fsdb, 0, LVB_T_NONE, NULL,
&lockh);
if (rc != ELDLM_OK) {
- CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n",
+ CERROR("%s: can't take cfg lock for %#llx/%#llx : rc = %d\n",
+ mgs->mgs_obd->obd_name,
le64_to_cpu(res_id.name[0]),
le64_to_cpu(res_id.name[1]), rc);
rc = 1;
} else {
/* Index is correctly marked as used */
-
- /* If the logs don't contain the mti_nids then add
- them as failover nids */
- rc = mgs_check_failnid(env, mgs, mti);
+ rc = 0;
}
RETURN(rc);
/* Ensure this is not a failover node that is connecting first*/
static int mgs_check_failover_reg(struct mgs_target_info *mti)
{
- lnet_nid_t nid;
- char *ptr;
- int i;
+ lnet_nid_t nid;
+ char *ptr;
+ int i;
- ptr = mti->mti_params;
- while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
+ ptr = mti->mti_params;
+ while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
- for (i = 0; i < mti->mti_nid_count; i++) {
- if (nid == mti->mti_nids[i]) {
- LCONSOLE_WARN("Denying initial registra"
- "tion attempt from nid %s"
- ", specified as failover"
- "\n",libcfs_nid2str(nid));
- return -EADDRNOTAVAIL;
- }
- }
- }
- }
- return 0;
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ if (nid == mti->mti_nids[i]) {
+ LCONSOLE_WARN("Denying initial registration attempt from nid %s, specified as failover\n",
+ libcfs_nid2str(nid));
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+ }
+ return 0;
}
/* Called whenever a target starts up. Flags indicate first connect, etc. */
static int mgs_target_reg(struct tgt_session_info *tsi)
{
- struct obd_device *obd = tsi->tsi_exp->exp_obd;
- struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp);
- struct mgs_target_info *mti, *rep_mti;
- struct fs_db *fsdb;
- int opc;
- int rc = 0;
+ struct obd_device *obd = tsi->tsi_exp->exp_obd;
+ struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp);
+ struct mgs_target_info *mti, *rep_mti;
+ struct fs_db *b_fsdb = NULL; /* barrier fsdb */
+ struct fs_db *c_fsdb = NULL; /* config fsdb */
+ char barrier_name[20];
+ int opc;
+ int rc = 0;
ENTRY;
RETURN(err_serious(-EFAULT));
}
+ down_read(&mgs->mgs_barrier_rwsem);
+
if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data,
IMP_RECOV))
opc = mti->mti_flags & LDD_F_OPC_MASK;
else
opc = LDD_F_OPC_REG;
- if (opc == LDD_F_OPC_READY) {
- CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
- mti->mti_fsname, mti->mti_stripe_index);
+ if (opc == LDD_F_OPC_READY) {
+ CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
rc = mgs_ir_update(tsi->tsi_env, mgs, mti);
- if (rc) {
- LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
- CERROR("Update IR return with %d(ignore and IR "
- "disabled)\n", rc);
- }
- GOTO(out_nolock, rc);
- }
+ if (rc) {
+ LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+ CERROR("%s: Update IR return failure: rc = %d\n",
+ mti->mti_fsname, rc);
+ }
- /* Do not support unregistering right now. */
- if (opc != LDD_F_OPC_REG)
- GOTO(out_nolock, rc = -EINVAL);
+ GOTO(out_norevoke, rc);
+ }
- CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
- mti->mti_fsname, mti->mti_stripe_index);
+ /* Do not support unregistering right now. */
+ if (opc != LDD_F_OPC_REG)
+ GOTO(out_norevoke, rc = -EINVAL);
- if (mti->mti_flags & LDD_F_NEED_INDEX)
- mti->mti_flags |= LDD_F_WRITECONF;
+ snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
+ mti->mti_fsname, BARRIER_FILENAME);
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb);
+ if (rc) {
+ CERROR("%s: Can't get db for %s: rc = %d\n",
+ mti->mti_fsname, barrier_name, rc);
+
+ GOTO(out_norevoke, rc);
+ }
+
+ CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+ mti->mti_fsname, mti->mti_stripe_index);
- if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
- LDD_F_UPDATE))) {
- /* We're just here as a startup ping. */
- CDEBUG(D_MGS, "Server %s is running on %s\n",
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
+ if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 ||
+ b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 ||
+ b_fsdb->fsdb_barrier_status == BS_FROZEN) {
+ LCONSOLE_WARN("%s: the system is in barrier, refuse "
+ "the connection from MDT %s temporary\n",
+ obd->obd_name, mti->mti_svname);
+
+ GOTO(out_norevoke, rc = -EBUSY);
+ }
+
+ if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) &&
+ !b_fsdb->fsdb_barrier_disabled) {
+ LCONSOLE_WARN("%s: the MDT %s does not support write "
+ "barrier, so disable barrier on the "
+ "whole system.\n",
+ obd->obd_name, mti->mti_svname);
+
+ b_fsdb->fsdb_barrier_disabled = 1;
+ }
+ }
+
+ if (mti->mti_flags & LDD_F_NEED_INDEX)
+ mti->mti_flags |= LDD_F_WRITECONF;
+
+ if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPDATE))) {
+ /* We're just here as a startup ping. */
+ CDEBUG(D_MGS, "Server %s is running on %s\n",
mti->mti_svname, obd_export_nid2str(tsi->tsi_exp));
rc = mgs_check_target(tsi->tsi_env, mgs, mti);
- /* above will set appropriate mti flags */
- if (rc <= 0)
- /* Nothing wrong, or fatal error */
- GOTO(out_nolock, rc);
+ /* above will set appropriate mti flags */
+ if (rc <= 0)
+ /* Nothing wrong, or fatal error */
+ GOTO(out_norevoke, rc);
} else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) {
rc = mgs_check_failover_reg(mti);
if (rc)
- GOTO(out_nolock, rc);
- }
+ GOTO(out_norevoke, rc);
+ }
- OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
- if (mti->mti_flags & LDD_F_WRITECONF) {
- if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
- mti->mti_stripe_index == 0) {
+ if (mti->mti_flags & LDD_F_WRITECONF) {
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
+ mti->mti_stripe_index == 0) {
+ mgs_put_fsdb(mgs, b_fsdb);
+ b_fsdb = NULL;
rc = mgs_erase_logs(tsi->tsi_env, mgs,
mti->mti_fsname);
- LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
- "request. All servers must be restarted "
- "in order to regenerate the logs."
- "\n", obd->obd_name, mti->mti_fsname);
- } else if (mti->mti_flags &
- (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
+ LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
+ "request. All servers must be restarted "
+ "in order to regenerate the logs: rc = %d"
+ "\n", obd->obd_name, mti->mti_fsname, rc);
+ if (rc && rc != -ENOENT)
+ GOTO(out_norevoke, rc);
+
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+ barrier_name, &b_fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s: %d\n",
+ barrier_name, rc);
+
+ GOTO(out_norevoke, rc);
+ }
+
+ if (!(exp_connect_flags(tsi->tsi_exp) &
+ OBD_CONNECT_BARRIER)) {
+ LCONSOLE_WARN("%s: the MDT %s does not support "
+ "write barrier, disable barrier "
+ "on the whole system.\n",
+ obd->obd_name, mti->mti_svname);
+
+ b_fsdb->fsdb_barrier_disabled = 1;
+ }
+ } else if (mti->mti_flags &
+ (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname);
- LCONSOLE_WARN("%s: Regenerating %s log by user "
- "request.\n",
- obd->obd_name, mti->mti_svname);
- }
- mti->mti_flags |= LDD_F_UPDATE;
- /* Erased logs means start from scratch. */
- mti->mti_flags &= ~LDD_F_UPGRADE14;
- if (rc)
- GOTO(out_nolock, rc);
- }
+ LCONSOLE_WARN("%s: Regenerating %s log by user "
+ "request: rc = %d\n",
+ obd->obd_name, mti->mti_svname, rc);
+ if (rc)
+ GOTO(out_norevoke, rc);
+ }
- rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &fsdb);
- if (rc) {
- CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
- GOTO(out_nolock, rc);
- }
+ mti->mti_flags |= LDD_F_UPDATE;
+ }
- /*
- * Log writing contention is handled by the fsdb_mutex.
- *
- * It should be alright if someone was reading while we were
- * updating the logs - if we revoke at the end they will just update
- * from where they left off.
- */
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb);
+ if (rc) {
+ CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
- if (mti->mti_flags & LDD_F_UPGRADE14) {
- CERROR("Can't upgrade from 1.4 (%d)\n", rc);
- GOTO(out, rc);
+ GOTO(out_norevoke, rc);
}
+ /*
+ * Log writing contention is handled by the fsdb_mutex.
+ *
+ * It should be alright if someone was reading while we were
+ * updating the logs - if we revoke at the end they will just update
+ * from where they left off.
+ */
if (mti->mti_flags & LDD_F_UPDATE) {
CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
mti->mti_stripe_index);
/* create or update the target log
and update the client/mdt logs */
- rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, fsdb);
+ rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb);
if (rc) {
CERROR("Failed to write %s log (%d)\n",
mti->mti_svname, rc);
GOTO(out, rc);
}
- mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
- LDD_F_NEED_INDEX | LDD_F_WRITECONF |
- LDD_F_UPGRADE14);
- mti->mti_flags |= LDD_F_REWRITE_LDD;
+ mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
+ LDD_F_NEED_INDEX | LDD_F_WRITECONF);
+ mti->mti_flags |= LDD_F_REWRITE_LDD;
}
out:
- mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
+ mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG);
+
+out_norevoke:
+ if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) {
+ if (!c_fsdb) {
+ rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+ mti->mti_fsname, &c_fsdb);
+ if (rc)
+ CERROR("Fail to get db for %s: %d\n",
+ mti->mti_fsname, rc);
+ }
+
+ if (c_fsdb) {
+ memcpy(b_fsdb->fsdb_mdt_index_map,
+ c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+ b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count;
+ }
+ }
+
+ up_read(&mgs->mgs_barrier_rwsem);
-out_nolock:
CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
mti->mti_stripe_index, rc);
/* An error flag is set in the mti reply rather than an error code */
/* Flush logs to disk */
dt_sync(tsi->tsi_env, mgs->mgs_bottom);
+ if (b_fsdb)
+ mgs_put_fsdb(mgs, b_fsdb);
+ if (c_fsdb)
+ mgs_put_fsdb(mgs, c_fsdb);
RETURN(rc);
}
case CONFIG_T_RECOVER:
rc = mgs_get_ir_logs(req);
break;
+ case CONFIG_T_NODEMAP:
+ rc = nodemap_get_config_req(req->rq_export->exp_obd, req);
+ break;
case CONFIG_T_CONFIG:
rc = -EOPNOTSUPP;
break;
logname = req_capsule_client_get(tsi->tsi_pill, &RMF_NAME);
if (logname) {
- char *ptr = strchr(logname, '-');
- int len = (int)(ptr - logname);
+ char *ptr = strrchr(logname, '-');
+ int len = (ptr != NULL) ? (int)(ptr - logname) : 0;
if (ptr == NULL || len >= sizeof(mgi->mgi_fsname)) {
- LCONSOLE_WARN("%s: non-config logname received: %s\n",
- tgt_name(tsi->tsi_tgt), logname);
+ if (strcmp(logname, PARAMS_FILENAME) != 0)
+ LCONSOLE_WARN("%s: non-config logname "
+ "received: %s\n",
+ tgt_name(tsi->tsi_tgt),
+ logname);
/* not error, this can be llog test name */
} else {
strncpy(mgi->mgi_fsname, logname, len);
/* init mgs_export_data for fsc */
spin_lock_init(&data->med_lock);
- CFS_INIT_LIST_HEAD(&data->med_clients);
+ INIT_LIST_HEAD(&data->med_clients);
spin_lock(&exp->exp_lock);
exp->exp_connecting = 1;
RETURN(0);
}
-static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname)
+static int mgs_extract_fs_pool(char *arg, char *fsname, char *poolname)
{
- char *ptr;
+ size_t len;
+ char *ptr;
- ENTRY;
- for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) {
- *fsname = *ptr;
- fsname++;
- }
- if (*ptr == '\0')
- return -EINVAL;
- *fsname = '\0';
- ptr++;
- strcpy(poolname, ptr);
+ ENTRY;
+ /* Validate name */
+ for (ptr = arg; *ptr != '\0'; ptr++) {
+ if (!isalnum(*ptr) && *ptr != '_' && *ptr != '-' && *ptr != '.')
+ return -EINVAL;
+ }
- RETURN(0);
+ /* Test for fsname.poolname format.
+ * strlen() test if poolname is empty
+ */
+ ptr = strchr(arg, '.');
+ if (!ptr || !strlen(ptr))
+ return -EINVAL;
+ ptr++;
+
+ /* Also make sure poolname is not to long. */
+ if (strlen(ptr) > LOV_MAXPOOLNAME)
+ return -ENAMETOOLONG;
+ strncpy(poolname, ptr, LOV_MAXPOOLNAME);
+
+ /* Test if fsname is empty */
+ len = strlen(arg) - strlen(ptr) - 1;
+ if (!len)
+ return -EINVAL;
+
+ /* or too long */
+ if (len > LUSTRE_MAXFSNAME)
+ return -ENAMETOOLONG;
+
+ strncpy(fsname, arg, len);
+
+ RETURN(0);
}
static int mgs_iocontrol_nodemap(const struct lu_env *env,
struct obd_ioctl_data *data)
{
struct lustre_cfg *lcfg = NULL;
- struct lu_nodemap *nodemap;
+ struct fs_db *fsdb;
lnet_nid_t nid;
const char *nodemap_name = NULL;
const char *nidstr = NULL;
const char *idtype_str = NULL;
char *param = NULL;
char fs_idstr[16];
+ char name_buf[LUSTRE_NODEMAP_NAME_LENGTH + 1];
int rc = 0;
- __u32 client_id;
+ unsigned long client_id;
__u32 fs_id;
__u32 cmd;
int idtype;
GOTO(out, rc = -EINVAL);
}
- if (data->ioc_plen1 > PAGE_CACHE_SIZE)
+ if (data->ioc_plen1 > PAGE_SIZE)
GOTO(out, rc = -E2BIG);
OBD_ALLOC(lcfg, data->ioc_plen1);
GOTO(out_lcfg, rc = -EINVAL);
nidstr = lustre_cfg_string(lcfg, 1);
nid = libcfs_str2nid(nidstr);
- nodemap = nodemap_classify_nid(nid);
- memset(data->ioc_pbuf1, 0, data->ioc_plen1);
- if (copy_to_user(data->ioc_pbuf1, nodemap->nm_name,
- strlen(nodemap->nm_name)) != 0)
+ nodemap_test_nid(nid, name_buf, sizeof(name_buf));
+ rc = copy_to_user(data->ioc_pbuf1, name_buf,
+ MIN(data->ioc_plen1, sizeof(name_buf)));
+ if (rc != 0)
GOTO(out_lcfg, rc = -EFAULT);
break;
case LCFG_NODEMAP_TEST_ID:
client_idstr = lustre_cfg_string(lcfg, 3);
nid = libcfs_str2nid(nidstr);
- nodemap = nodemap_classify_nid(nid);
- client_id = simple_strtoul(client_idstr, NULL, 10);
-
if (strcmp(idtype_str, "uid") == 0)
idtype = NODEMAP_UID;
else
idtype = NODEMAP_GID;
- fs_id = nodemap_map_id(nodemap, idtype, NODEMAP_CLIENT_TO_FS,
- client_id);
+ rc = kstrtoul(client_idstr, 10, &client_id);
+ if (rc != 0)
+ GOTO(out_lcfg, rc = -EINVAL);
+
+ rc = nodemap_test_id(nid, idtype, client_id, &fs_id);
+ if (rc < 0)
+ GOTO(out_lcfg, rc = -EINVAL);
if (data->ioc_plen1 < sizeof(fs_idstr))
GOTO(out_lcfg, rc = -EINVAL);
case LCFG_NODEMAP_DEL_UIDMAP:
case LCFG_NODEMAP_ADD_GIDMAP:
case LCFG_NODEMAP_DEL_GIDMAP:
+ case LCFG_NODEMAP_SET_FILESET:
+ case LCFG_NODEMAP_SET_SEPOL:
if (lcfg->lcfg_bufcount != 3)
GOTO(out_lcfg, rc = -EINVAL);
nodemap_name = lustre_cfg_string(lcfg, 1);
break;
case LCFG_NODEMAP_ADMIN:
case LCFG_NODEMAP_TRUSTED:
+ case LCFG_NODEMAP_DENY_UNKNOWN:
case LCFG_NODEMAP_SQUASH_UID:
case LCFG_NODEMAP_SQUASH_GID:
+ case LCFG_NODEMAP_MAP_MODE:
+ case LCFG_NODEMAP_AUDIT_MODE:
if (lcfg->lcfg_bufcount != 4)
GOTO(out_lcfg, rc = -EINVAL);
nodemap_name = lustre_cfg_string(lcfg, 1);
GOTO(out_lcfg, rc);
}
+ /* revoke nodemap lock */
+ rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb);
+ if (rc < 0) {
+ CWARN("%s: cannot make nodemap fsdb: rc = %d\n",
+ mgs->mgs_obd->obd_name, rc);
+ } else {
+ mgs_revoke_lock(mgs, fsdb, CONFIG_T_NODEMAP);
+ mgs_put_fsdb(mgs, fsdb);
+ }
+
out_lcfg:
OBD_FREE(lcfg, data->ioc_plen1);
out:
GOTO(out_pool, rc = -EINVAL);
}
- if (data->ioc_plen1 > PAGE_CACHE_SIZE)
+ if (data->ioc_plen1 > PAGE_SIZE)
GOTO(out_pool, rc = -E2BIG);
OBD_ALLOC(lcfg, data->ioc_plen1);
}
/* from mdt_iocontrol */
-int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void *uarg)
+static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
+ void *karg, void __user *uarg)
{
struct mgs_device *mgs = exp2mgs_dev(exp);
struct obd_ioctl_data *data = karg;
switch (cmd) {
- case OBD_IOC_PARAM: {
- struct mgs_thread_info *mgi = mgs_env_info(&env);
- struct lustre_cfg *lcfg;
+ case OBD_IOC_PARAM: {
+ struct lustre_cfg *lcfg;
if (data->ioc_type != LUSTRE_CFG_TYPE) {
CERROR("%s: unknown cfg record type: %d\n",
if (lcfg->lcfg_bufcount < 1)
GOTO(out_free, rc = -EINVAL);
- rc = mgs_setparam(&env, mgs, lcfg, mgi->mgi_fsname);
+ rc = mgs_set_param(&env, mgs, lcfg);
if (rc)
CERROR("%s: setparam err: rc = %d\n",
exp->exp_obd->obd_name, rc);
case OBD_IOC_REPLACE_NIDS: {
if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
- CERROR("No device name specified!\n");
rc = -EINVAL;
+ CERROR("%s: no device or fsname specified: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
break;
}
- if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
- CERROR("Device name is not NUL terminated!\n");
- rc = -EINVAL;
+ if (data->ioc_inllen1 > MTI_NAME_MAXLEN) {
+ rc = -EOVERFLOW;
+ CERROR("%s: device or fsname is too long: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
break;
}
- if (data->ioc_plen1 > MTI_NAME_MAXLEN) {
- CERROR("Device name is too long\n");
- rc = -EOVERFLOW;
+ if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
+ rc = -EINVAL;
+ CERROR("%s: device or fsname is not NUL terminated: "
+ "rc = %d\n", exp->exp_obd->obd_name, rc);
break;
}
if (!data->ioc_inllen2 || !data->ioc_inlbuf2) {
- CERROR("No NIDs were specified!\n");
rc = -EINVAL;
+ CERROR("%s: no NIDs specified: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
break;
}
if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) {
- CERROR("NID list is not NUL terminated!\n");
rc = -EINVAL;
+ CERROR("%s: NID list is not NUL terminated: "
+ "rc = %d\n", exp->exp_obd->obd_name, rc);
break;
}
break;
}
+ case OBD_IOC_CLEAR_CONFIGS: {
+ if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
+ rc = -EINVAL;
+ CERROR("%s: no device or fsname specified: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
+ break;
+ }
+
+ if (data->ioc_inllen1 > MTI_NAME_MAXLEN) {
+ rc = -EOVERFLOW;
+ CERROR("%s: device or fsname is too long: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
+ break;
+ }
+
+ if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
+ rc = -EINVAL;
+ CERROR("%s: device or fsname is not NUL terminated: "
+ "rc = %d\n", exp->exp_obd->obd_name, rc);
+ break;
+ }
+
+ /* remove records marked SKIP from config logs */
+ rc = mgs_clear_configs(&env, mgs, data->ioc_inlbuf1);
+ if (rc)
+ CERROR("%s: error clearing config log: rc = %d\n",
+ exp->exp_obd->obd_name, rc);
+
+ break;
+ }
+
case OBD_IOC_POOL:
rc = mgs_iocontrol_pool(&env, mgs, data);
break;
+ case OBD_IOC_BARRIER:
+ rc = mgs_iocontrol_barrier(&env, mgs, data);
+ break;
+
case OBD_IOC_NODEMAP:
rc = mgs_iocontrol_nodemap(&env, mgs, data);
break;
- case OBD_IOC_DUMP_LOG: {
- struct llog_ctxt *ctxt;
-
- ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
- rc = class_config_dump_llog(&env, ctxt, data->ioc_inlbuf1,
- NULL);
- llog_ctxt_put(ctxt);
+ case OBD_IOC_LCFG_FORK:
+ rc = mgs_lcfg_fork(&env, mgs, data->ioc_inlbuf1,
+ data->ioc_inlbuf2);
+ break;
+ case OBD_IOC_LCFG_ERASE:
+ rc = mgs_lcfg_erase(&env, mgs, data->ioc_inlbuf1);
break;
- }
case OBD_IOC_CATLOGLIST:
rc = mgs_list_logs(&env, mgs, data);
0, MGS_DISCONNECT, mgs_disconnect,
&RQF_MDS_DISCONNECT, LUSTRE_OBD_VERSION),
TGT_MGS_HDL_VAR(0, MGS_EXCEPTION, mgs_exception),
-TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_SET_INFO, mgs_set_info),
-TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_TARGET_REG, mgs_target_reg),
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
+TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_SET_INFO, mgs_set_info),
+#endif
+TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_TARGET_REG, mgs_target_reg),
TGT_MGS_HDL_VAR(0, MGS_TARGET_DEL, mgs_target_del),
-TGT_MGS_HDL (HABEO_REFERO, MGS_CONFIG_READ, mgs_config_read),
+TGT_MGS_HDL(HAS_REPLY, MGS_CONFIG_READ, mgs_config_read),
};
static struct tgt_handler mgs_obd_handlers[] = {
* * instead of common OBD_FAIL_LDLM_ENQUEUE_NET */
.th_fail_id = 0,
.th_opc = LDLM_ENQUEUE,
- .th_flags = HABEO_CLAVIS,
+ .th_flags = HAS_KEY,
.th_act = tgt_enqueue,
.th_fmt = &RQF_LDLM_ENQUEUE,
.th_version = LUSTRE_DLM_VERSION,
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_CREATE, mgs_llog_open),
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, tgt_llog_next_block),
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header),
-TGT_LLOG_HDL_VAR(0, LLOG_ORIGIN_HANDLE_CLOSE, tgt_llog_close),
TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, tgt_llog_prev_block),
};
/* Internal mgs setup */
mgs_init_fsdb_list(mgs);
mutex_init(&mgs->mgs_mutex);
- mgs->mgs_start_time = cfs_time_current_sec();
+ mgs->mgs_start_time = ktime_get_real_seconds();
spin_lock_init(&mgs->mgs_lock);
+ mutex_init(&mgs->mgs_health_mutex);
+ init_rwsem(&mgs->mgs_barrier_rwsem);
+
+ rc = mgs_lcfg_rename(env, mgs);
+ if (rc)
+ GOTO(err_llog, rc);
rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3));
if (rc != 0) {
GOTO(err_llog, rc);
}
+ /* Setup params fsdb and log, so that other servers can make a local
+ * copy successfully when they are mounted. See LU-4783 */
+ rc = mgs_params_fsdb_setup(env, mgs);
+ if (rc)
+ /* params fsdb and log can be setup later */
+ CERROR("%s: %s fsdb and log setup failed: rc = %d\n",
+ obd->obd_name, PARAMS_FILENAME, rc);
+
+ /* Setup _mgs fsdb, useful for srpc */
+ mgs__mgs_fsdb_setup(env, mgs);
+
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"mgs_ldlm_client", &obd->obd_ldlm_client);
};
/* Start the service threads */
- mgs->mgs_service = ptlrpc_register_service(&conf, obd->obd_proc_entry);
+ mgs->mgs_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+ obd->obd_debugfs_entry);
if (IS_ERR(mgs->mgs_service)) {
rc = PTR_ERR(mgs->mgs_service);
CERROR("failed to start mgs service: %d\n", rc);
lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0);
RETURN(0);
err_lproc:
+ mgs_params_fsdb_cleanup(env, mgs);
lproc_mgs_cleanup(mgs);
err_llog:
ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
err_ops:
lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0);
if (!cfs_hash_is_empty(mgs2lu_dev(mgs)->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL);
lu_site_print(env, mgs2lu_dev(mgs)->ld_site, &msgdata,
lu_cdebug_printer);
}
obd_disconnect(mgs->mgs_bottom_exp);
err_lmi:
if (lmi)
- server_put_mount(lustre_cfg_string(lcfg, 0));
+ server_put_mount(lustre_cfg_string(lcfg, 0), true);
RETURN(rc);
}
else
rc = -ENOMEM;
- return 0;
+ return rc;
}
static void mgs_object_free(const struct lu_env *env, struct lu_object *o)
return (*p)(env, cookie, LUSTRE_MGS_NAME"-object@%p", o);
}
-struct lu_object_operations mgs_lu_obj_ops = {
+static struct lu_object_operations mgs_lu_obj_ops = {
.loo_object_init = mgs_object_init,
.loo_object_free = mgs_object_free,
.loo_object_print = mgs_object_print,
};
-struct lu_object *mgs_object_alloc(const struct lu_env *env,
- const struct lu_object_header *hdr,
- struct lu_device *d)
+static struct lu_object *mgs_object_alloc(const struct lu_env *env,
+ const struct lu_object_header *hdr,
+ struct lu_device *d)
{
struct lu_object_header *h;
struct mgs_object *o;
ping_evictor_stop();
+ mutex_lock(&mgs->mgs_health_mutex);
ptlrpc_unregister_service(mgs->mgs_service);
+ mutex_unlock(&mgs->mgs_health_mutex);
+
+ mgs_params_fsdb_cleanup(env, mgs);
+ mgs_cleanup_fsdb_list(mgs);
+ ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1);
obd_exports_barrier(obd);
obd_zombie_barrier();
tgt_fini(env, &mgs->mgs_lut);
- mgs_cleanup_fsdb_list(mgs);
lproc_mgs_cleanup(mgs);
ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
mgs_fs_cleanup(env, mgs);
- ldlm_namespace_free(obd->obd_namespace, NULL, 1);
+ ldlm_namespace_free_post(obd->obd_namespace);
obd->obd_namespace = NULL;
lu_site_purge(env, d->ld_site, ~0);
if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL);
lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer);
}
LASSERT(mgs->mgs_bottom_exp);
obd_disconnect(mgs->mgs_bottom_exp);
- server_put_mount(obd->obd_name);
+ server_put_mount(obd->obd_name, true);
RETURN(NULL);
}
.ldt_ctx_tags = LCT_MG_THREAD
};
+static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp,
+ struct obd_device *obd, struct obd_uuid *cluuid,
+ struct obd_connect_data *data, void *localdata)
+{
+ ENTRY;
+
+ if (exp == NULL || obd == NULL || cluuid == NULL)
+ RETURN(-EINVAL);
+
+ tgt_counter_incr(exp, LPROC_MGS_CONNECT);
+
+ if (data != NULL) {
+ data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
+
+ if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2)
+ data->ocd_connect_flags2 &= MGS_CONNECT_SUPPORTED2;
+
+ exp->exp_connect_data = *data;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+ }
+
+ RETURN(mgs_export_stats_init(obd, exp, localdata));
+}
+
static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp,
struct obd_device *obd, struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
struct obd_export *lexp;
- struct lustre_handle conn = { 0 };
+ struct lustre_handle conn = {
+ .cookie = 0,
+ };
int rc;
ENTRY;
if (lexp == NULL)
RETURN(-EFAULT);
- if (data != NULL) {
- data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
- data->ocd_version = LUSTRE_VERSION_CODE;
- lexp->exp_connect_data = *data;
- }
-
- tgt_counter_incr(lexp, LPROC_MGS_CONNECT);
-
- rc = mgs_export_stats_init(obd, lexp, localdata);
+ rc = mgs_obd_reconnect(env, lexp, obd, cluuid, data, localdata);
if (rc)
- class_disconnect(lexp);
- else
- *exp = lexp;
+ GOTO(out_disconnect, rc);
- RETURN(rc);
-}
+ *exp = lexp;
-static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp,
- struct obd_device *obd, struct obd_uuid *cluuid,
- struct obd_connect_data *data, void *localdata)
-{
- ENTRY;
-
- if (exp == NULL || obd == NULL || cluuid == NULL)
- RETURN(-EINVAL);
-
- tgt_counter_incr(exp, LPROC_MGS_CONNECT);
+ RETURN(rc);
- if (data != NULL) {
- data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
- data->ocd_version = LUSTRE_VERSION_CODE;
- exp->exp_connect_data = *data;
- }
+out_disconnect:
+ class_disconnect(lexp);
- RETURN(mgs_export_stats_init(obd, exp, localdata));
+ return rc;
}
static int mgs_obd_disconnect(struct obd_export *exp)
RETURN(rc);
}
+static int mgs_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+ struct mgs_device *mgs = lu2mgs_dev(obd->obd_lu_dev);
+ int rc = 0;
+
+ mutex_lock(&mgs->mgs_health_mutex);
+ rc |= ptlrpc_service_health_check(mgs->mgs_service);
+ mutex_unlock(&mgs->mgs_health_mutex);
+
+ return rc != 0 ? 1 : 0;
+}
+
/* use obd ops to offer management infrastructure */
-static struct obd_ops mgs_obd_device_ops = {
+static const struct obd_ops mgs_obd_device_ops = {
.o_owner = THIS_MODULE,
.o_connect = mgs_obd_connect,
.o_reconnect = mgs_obd_reconnect,
.o_init_export = mgs_init_export,
.o_destroy_export = mgs_destroy_export,
.o_iocontrol = mgs_iocontrol,
+ .o_health_check = mgs_health_check,
};
static int __init mgs_init(void)
{
- struct lprocfs_static_vars lvars;
-
- lprocfs_mgs_init_vars(&lvars);
- return class_register_type(&mgs_obd_device_ops, NULL, NULL,
-#ifndef HAVE_ONLY_PROCFS_SEQ
- lvars.module_vars,
-#endif
- LUSTRE_MGS_NAME, &mgs_device_type);
+ return class_register_type(&mgs_obd_device_ops, NULL, true, NULL,
+ LUSTRE_MGS_NAME, &mgs_device_type);
}
-static void /*__exit*/ mgs_exit(void)
+static void __exit mgs_exit(void)
{
class_unregister_type(LUSTRE_MGS_NAME);
}
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Management Server (MGS)");
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
+MODULE_DESCRIPTION("Lustre Management Server (MGS)");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
MODULE_LICENSE("GPL");
module_init(mgs_init);