X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmgs%2Fmgs_handler.c;h=096c27e681a0dc16302563fa8b30f6c303f218af;hp=040348306b7d48cce6b815fd8231fb77be227e06;hb=1f6cb3534e74f0c9462008c8088b5734b64ed41c;hpb=392471b575ad15eb64a10e83e3080aeaf94bca9e diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 0403483..096c27e 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2013, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -45,7 +41,9 @@ #include #include -#include +#include +#include +#include #include "mgs_internal.h" @@ -59,6 +57,7 @@ static int mgs_connect(struct tgt_session_info *tsi) ENTRY; + CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, cfs_fail_val); rc = tgt_connect(tsi); if (rc) RETURN(rc); @@ -92,11 +91,19 @@ static int mgs_exception(struct tgt_session_info *tsi) RETURN(0); } +static inline bool str_starts_with(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) static int mgs_set_info(struct tgt_session_info *tsi) { struct mgs_thread_info *mgi; struct mgs_send_param *msp, *rep_msp; struct lustre_cfg *lcfg; + size_t param_len; + char *s; int rc; ENTRY; @@ -109,17 +116,33 @@ static int mgs_set_info(struct tgt_session_info *tsi) if (msp == NULL) RETURN(err_serious(-EFAULT)); - /* Construct lustre_cfg structure to pass to function mgs_setparam */ + param_len = strnlen(msp->mgs_param, sizeof(msp->mgs_param)); + if (param_len == 0 || param_len == sizeof(msp->mgs_param)) + RETURN(-EINVAL); + + /* We only allow '*.lov.stripe{size,count,offset}=*' from an RPC. */ + s = strchr(msp->mgs_param, '.'); + if (s == NULL) + RETURN(-EINVAL); + + if (!str_starts_with(s + 1, "lov.stripesize=") && + !str_starts_with(s + 1, "lov.stripecount=") && + !str_starts_with(s + 1, "lov.stripeoffset=")) + RETURN(-EINVAL); + + /* Construct lustre_cfg structure to pass to function mgs_set_param */ lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, msp->mgs_param); - lcfg = lustre_cfg_new(LCFG_PARAM, &mgi->mgi_bufs); + OBD_ALLOC(lcfg, lustre_cfg_len(mgi->mgi_bufs.lcfg_bufcount, + mgi->mgi_bufs.lcfg_buflen)); + if (!lcfg) + RETURN(-ENOMEM); + lustre_cfg_init(lcfg, LCFG_PARAM, &mgi->mgi_bufs); - rc = mgs_setparam(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg, - mgi->mgi_fsname); + rc = mgs_set_param(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg); if (rc) { - LCONSOLE_WARN("%s: Unable to set parameter %s for %s: %d\n", - tgt_name(tsi->tsi_tgt), msp->mgs_param, - mgi->mgi_fsname, rc); + LCONSOLE_WARN("%s: Unable to set parameter %s: %d\n", + tgt_name(tsi->tsi_tgt), msp->mgs_param, rc); GOTO(out_cfg, rc); } @@ -128,14 +151,16 @@ static int mgs_set_info(struct tgt_session_info *tsi) *rep_msp = *msp; EXIT; out_cfg: - lustre_cfg_free(lcfg); + OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens)); return rc; } +#endif enum ast_type { - AST_CONFIG = 1, - AST_PARAMS = 2, - AST_IR = 3 + AST_CONFIG = 1, + AST_PARAMS = 2, + AST_IR = 3, + AST_BARRIER = 4, }; static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, @@ -143,8 +168,7 @@ static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, { ENTRY; - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV))) { + if (!(flags & LDLM_FL_BLOCKED_MASK)) { struct fs_db *fsdb; /* l_ast_data is used as a marker to avoid cancel ldlm lock @@ -170,6 +194,8 @@ static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, case AST_IR: mgs_ir_notify_complete(fsdb); break; + case AST_BARRIER: + break; default: LBUG(); } @@ -200,10 +226,18 @@ static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags, return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR); } +static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER); +} + void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) { ldlm_completion_callback cp = NULL; - struct lustre_handle lockh = { 0 }; + struct lustre_handle lockh = { + .cookie = 0, + }; struct ldlm_res_id res_id; __u64 flags = LDLM_FL_ATOMIC_CB; int rc; @@ -214,6 +248,7 @@ void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) LASSERT(rc == 0); switch (type) { case CONFIG_T_CONFIG: + case CONFIG_T_NODEMAP: cp = mgs_completion_ast_config; if (test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags)) rc = -EALREADY; @@ -225,19 +260,24 @@ void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) break; case CONFIG_T_RECOVER: cp = mgs_completion_ast_ir; + break; + case CONFIG_T_BARRIER: + cp = mgs_completion_ast_barrier; + break; default: break; } if (!rc) { LASSERT(cp != NULL); - rc = ldlm_cli_enqueue_local(mgs->mgs_obd->obd_namespace, + rc = ldlm_cli_enqueue_local(NULL, mgs->mgs_obd->obd_namespace, &res_id, LDLM_PLAIN, NULL, LCK_EX, &flags, ldlm_blocking_ast, cp, NULL, fsdb, 0, LVB_T_NONE, NULL, &lockh); if (rc != ELDLM_OK) { - CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n", + CERROR("%s: can't take cfg lock for %#llx/%#llx : rc = %d\n", + mgs->mgs_obd->obd_name, le64_to_cpu(res_id.name[0]), le64_to_cpu(res_id.name[1]), rc); @@ -279,10 +319,7 @@ static int mgs_check_target(const struct lu_env *env, rc = 1; } else { /* Index is correctly marked as used */ - - /* If the logs don't contain the mti_nids then add - them as failover nids */ - rc = mgs_check_failnid(env, mgs, mti); + rc = 0; } RETURN(rc); @@ -315,12 +352,14 @@ static int mgs_check_failover_reg(struct mgs_target_info *mti) /* Called whenever a target starts up. Flags indicate first connect, etc. */ static int mgs_target_reg(struct tgt_session_info *tsi) { - struct obd_device *obd = tsi->tsi_exp->exp_obd; - struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); - struct mgs_target_info *mti, *rep_mti; - struct fs_db *fsdb; - int opc; - int rc = 0; + struct obd_device *obd = tsi->tsi_exp->exp_obd; + struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); + struct mgs_target_info *mti, *rep_mti; + struct fs_db *b_fsdb = NULL; /* barrier fsdb */ + struct fs_db *c_fsdb = NULL; /* config fsdb */ + char barrier_name[20]; + int opc; + int rc = 0; ENTRY; @@ -336,117 +375,185 @@ static int mgs_target_reg(struct tgt_session_info *tsi) RETURN(err_serious(-EFAULT)); } + down_read(&mgs->mgs_barrier_rwsem); + if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data, IMP_RECOV)) opc = mti->mti_flags & LDD_F_OPC_MASK; else opc = LDD_F_OPC_REG; - if (opc == LDD_F_OPC_READY) { - CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", - mti->mti_fsname, mti->mti_stripe_index); + if (opc == LDD_F_OPC_READY) { + CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", + mti->mti_fsname, mti->mti_stripe_index); rc = mgs_ir_update(tsi->tsi_env, mgs, mti); - if (rc) { - LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); - CERROR("Update IR return with %d(ignore and IR " - "disabled)\n", rc); - } - GOTO(out_nolock, rc); - } + if (rc) { + LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); + CERROR("%s: Update IR return failure: rc = %d\n", + mti->mti_fsname, rc); + } + + GOTO(out_norevoke, rc); + } - /* Do not support unregistering right now. */ - if (opc != LDD_F_OPC_REG) - GOTO(out_nolock, rc = -EINVAL); + /* Do not support unregistering right now. */ + if (opc != LDD_F_OPC_REG) + GOTO(out_norevoke, rc = -EINVAL); - CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", - mti->mti_fsname, mti->mti_stripe_index); + snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s", + mti->mti_fsname, BARRIER_FILENAME); + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb); + if (rc) { + CERROR("%s: Can't get db for %s: rc = %d\n", + mti->mti_fsname, barrier_name, rc); - if (mti->mti_flags & LDD_F_NEED_INDEX) - mti->mti_flags |= LDD_F_WRITECONF; + GOTO(out_norevoke, rc); + } + + CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", + mti->mti_fsname, mti->mti_stripe_index); + + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 || + b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 || + b_fsdb->fsdb_barrier_status == BS_FROZEN) { + LCONSOLE_WARN("%s: the system is in barrier, refuse " + "the connection from MDT %s temporary\n", + obd->obd_name, mti->mti_svname); + + GOTO(out_norevoke, rc = -EBUSY); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) && + !b_fsdb->fsdb_barrier_disabled) { + LCONSOLE_WARN("%s: the MDT %s does not support write " + "barrier, so disable barrier on the " + "whole system.\n", + obd->obd_name, mti->mti_svname); - if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 | - LDD_F_UPDATE))) { - /* We're just here as a startup ping. */ - CDEBUG(D_MGS, "Server %s is running on %s\n", + b_fsdb->fsdb_barrier_disabled = 1; + } + } + + if (mti->mti_flags & LDD_F_NEED_INDEX) + mti->mti_flags |= LDD_F_WRITECONF; + + if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPDATE))) { + /* We're just here as a startup ping. */ + CDEBUG(D_MGS, "Server %s is running on %s\n", mti->mti_svname, obd_export_nid2str(tsi->tsi_exp)); rc = mgs_check_target(tsi->tsi_env, mgs, mti); - /* above will set appropriate mti flags */ - if (rc <= 0) - /* Nothing wrong, or fatal error */ - GOTO(out_nolock, rc); + /* above will set appropriate mti flags */ + if (rc <= 0) + /* Nothing wrong, or fatal error */ + GOTO(out_norevoke, rc); } else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) { rc = mgs_check_failover_reg(mti); if (rc) - GOTO(out_nolock, rc); - } + GOTO(out_norevoke, rc); + } - OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); - if (mti->mti_flags & LDD_F_WRITECONF) { - if (mti->mti_flags & LDD_F_SV_TYPE_MDT && - mti->mti_stripe_index == 0) { + if (mti->mti_flags & LDD_F_WRITECONF) { + if (mti->mti_flags & LDD_F_SV_TYPE_MDT && + mti->mti_stripe_index == 0) { + mgs_put_fsdb(mgs, b_fsdb); + b_fsdb = NULL; rc = mgs_erase_logs(tsi->tsi_env, mgs, mti->mti_fsname); - LCONSOLE_WARN("%s: Logs for fs %s were removed by user " - "request. All servers must be restarted " - "in order to regenerate the logs." - "\n", obd->obd_name, mti->mti_fsname); - } else if (mti->mti_flags & - (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { + LCONSOLE_WARN("%s: Logs for fs %s were removed by user " + "request. All servers must be restarted " + "in order to regenerate the logs: rc = %d" + "\n", obd->obd_name, mti->mti_fsname, rc); + if (rc && rc != -ENOENT) + GOTO(out_norevoke, rc); + + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + barrier_name, &b_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", + barrier_name, rc); + + GOTO(out_norevoke, rc); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & + OBD_CONNECT_BARRIER)) { + LCONSOLE_WARN("%s: the MDT %s does not support " + "write barrier, disable barrier " + "on the whole system.\n", + obd->obd_name, mti->mti_svname); + + b_fsdb->fsdb_barrier_disabled = 1; + } + } else if (mti->mti_flags & + (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname); - LCONSOLE_WARN("%s: Regenerating %s log by user " - "request.\n", - obd->obd_name, mti->mti_svname); - } - mti->mti_flags |= LDD_F_UPDATE; - /* Erased logs means start from scratch. */ - mti->mti_flags &= ~LDD_F_UPGRADE14; - if (rc) - GOTO(out_nolock, rc); - } + LCONSOLE_WARN("%s: Regenerating %s log by user " + "request: rc = %d\n", + obd->obd_name, mti->mti_svname, rc); + if (rc) + GOTO(out_norevoke, rc); + } - rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &fsdb); - if (rc) { - CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - GOTO(out_nolock, rc); - } + mti->mti_flags |= LDD_F_UPDATE; + } - /* - * Log writing contention is handled by the fsdb_mutex. - * - * It should be alright if someone was reading while we were - * updating the logs - if we revoke at the end they will just update - * from where they left off. - */ + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - if (mti->mti_flags & LDD_F_UPGRADE14) { - CERROR("Can't upgrade from 1.4 (%d)\n", rc); - GOTO(out, rc); + GOTO(out_norevoke, rc); } + /* + * Log writing contention is handled by the fsdb_mutex. + * + * It should be alright if someone was reading while we were + * updating the logs - if we revoke at the end they will just update + * from where they left off. + */ if (mti->mti_flags & LDD_F_UPDATE) { CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); /* create or update the target log and update the client/mdt logs */ - rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, fsdb); + rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb); if (rc) { CERROR("Failed to write %s log (%d)\n", mti->mti_svname, rc); GOTO(out, rc); } - mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | - LDD_F_NEED_INDEX | LDD_F_WRITECONF | - LDD_F_UPGRADE14); - mti->mti_flags |= LDD_F_REWRITE_LDD; + mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | + LDD_F_NEED_INDEX | LDD_F_WRITECONF); + mti->mti_flags |= LDD_F_REWRITE_LDD; } out: - mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); + mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG); + +out_norevoke: + if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) { + if (!c_fsdb) { + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + mti->mti_fsname, &c_fsdb); + if (rc) + CERROR("Fail to get db for %s: %d\n", + mti->mti_fsname, rc); + } + + if (c_fsdb) { + memcpy(b_fsdb->fsdb_mdt_index_map, + c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); + b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count; + } + } + + up_read(&mgs->mgs_barrier_rwsem); -out_nolock: CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, mti->mti_stripe_index, rc); /* An error flag is set in the mti reply rather than an error code */ @@ -459,6 +566,10 @@ out_nolock: /* Flush logs to disk */ dt_sync(tsi->tsi_env, mgs->mgs_bottom); + if (b_fsdb) + mgs_put_fsdb(mgs, b_fsdb); + if (c_fsdb) + mgs_put_fsdb(mgs, c_fsdb); RETURN(rc); } @@ -490,6 +601,9 @@ static int mgs_config_read(struct tgt_session_info *tsi) case CONFIG_T_RECOVER: rc = mgs_get_ir_logs(req); break; + case CONFIG_T_NODEMAP: + rc = nodemap_get_config_req(req->rq_export->exp_obd, req); + break; case CONFIG_T_CONFIG: rc = -EOPNOTSUPP; break; @@ -537,12 +651,15 @@ static int mgs_llog_open(struct tgt_session_info *tsi) logname = req_capsule_client_get(tsi->tsi_pill, &RMF_NAME); if (logname) { - char *ptr = strchr(logname, '-'); - int len = (int)(ptr - logname); + char *ptr = strrchr(logname, '-'); + int len = (ptr != NULL) ? (int)(ptr - logname) : 0; if (ptr == NULL || len >= sizeof(mgi->mgi_fsname)) { - LCONSOLE_WARN("%s: non-config logname received: %s\n", - tgt_name(tsi->tsi_tgt), logname); + if (strcmp(logname, PARAMS_FILENAME) != 0) + LCONSOLE_WARN("%s: non-config logname " + "received: %s\n", + tgt_name(tsi->tsi_tgt), + logname); /* not error, this can be llog test name */ } else { strncpy(mgi->mgi_fsname, logname, len); @@ -573,7 +690,7 @@ static inline int mgs_init_export(struct obd_export *exp) /* init mgs_export_data for fsc */ spin_lock_init(&data->med_lock); - CFS_INIT_LIST_HEAD(&data->med_clients); + INIT_LIST_HEAD(&data->med_clients); spin_lock(&exp->exp_lock); exp->exp_connecting = 1; @@ -602,22 +719,43 @@ static inline int mgs_destroy_export(struct obd_export *exp) RETURN(0); } -static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname) +static int mgs_extract_fs_pool(char *arg, char *fsname, char *poolname) { - char *ptr; + size_t len; + char *ptr; - ENTRY; - for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) { - *fsname = *ptr; - fsname++; - } - if (*ptr == '\0') - return -EINVAL; - *fsname = '\0'; - ptr++; - strcpy(poolname, ptr); + ENTRY; + /* Validate name */ + for (ptr = arg; *ptr != '\0'; ptr++) { + if (!isalnum(*ptr) && *ptr != '_' && *ptr != '-' && *ptr != '.') + return -EINVAL; + } - RETURN(0); + /* Test for fsname.poolname format. + * strlen() test if poolname is empty + */ + ptr = strchr(arg, '.'); + if (!ptr || !strlen(ptr)) + return -EINVAL; + ptr++; + + /* Also make sure poolname is not to long. */ + if (strlen(ptr) > LOV_MAXPOOLNAME) + return -ENAMETOOLONG; + strncpy(poolname, ptr, strlen(ptr)); + + /* Test if fsname is empty */ + len = strlen(arg) - strlen(ptr) - 1; + if (!len) + return -EINVAL; + + /* or too long */ + if (len > LUSTRE_MAXFSNAME) + return -ENAMETOOLONG; + + strncpy(fsname, arg, len); + + RETURN(0); } static int mgs_iocontrol_nodemap(const struct lu_env *env, @@ -625,7 +763,7 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, struct obd_ioctl_data *data) { struct lustre_cfg *lcfg = NULL; - struct lu_nodemap *nodemap; + struct fs_db *fsdb; lnet_nid_t nid; const char *nodemap_name = NULL; const char *nidstr = NULL; @@ -633,8 +771,9 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, const char *idtype_str = NULL; char *param = NULL; char fs_idstr[16]; + char name_buf[LUSTRE_NODEMAP_NAME_LENGTH + 1]; int rc = 0; - __u32 client_id; + unsigned long client_id; __u32 fs_id; __u32 cmd; int idtype; @@ -647,7 +786,7 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out, rc = -EINVAL); } - if (data->ioc_plen1 > PAGE_CACHE_SIZE) + if (data->ioc_plen1 > PAGE_SIZE) GOTO(out, rc = -E2BIG); OBD_ALLOC(lcfg, data->ioc_plen1); @@ -681,10 +820,10 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out_lcfg, rc = -EINVAL); nidstr = lustre_cfg_string(lcfg, 1); nid = libcfs_str2nid(nidstr); - nodemap = nodemap_classify_nid(nid); - memset(data->ioc_pbuf1, 0, data->ioc_plen1); - if (copy_to_user(data->ioc_pbuf1, nodemap->nm_name, - strlen(nodemap->nm_name)) != 0) + nodemap_test_nid(nid, name_buf, sizeof(name_buf)); + rc = copy_to_user(data->ioc_pbuf1, name_buf, + MIN(data->ioc_plen1, sizeof(name_buf))); + if (rc != 0) GOTO(out_lcfg, rc = -EFAULT); break; case LCFG_NODEMAP_TEST_ID: @@ -695,16 +834,18 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, client_idstr = lustre_cfg_string(lcfg, 3); nid = libcfs_str2nid(nidstr); - nodemap = nodemap_classify_nid(nid); - client_id = simple_strtoul(client_idstr, NULL, 10); - if (strcmp(idtype_str, "uid") == 0) idtype = NODEMAP_UID; else idtype = NODEMAP_GID; - fs_id = nodemap_map_id(nodemap, idtype, NODEMAP_CLIENT_TO_FS, - client_id); + rc = kstrtoul(client_idstr, 10, &client_id); + if (rc != 0) + GOTO(out_lcfg, rc = -EINVAL); + + rc = nodemap_test_id(nid, idtype, client_id, &fs_id); + if (rc < 0) + GOTO(out_lcfg, rc = -EINVAL); if (data->ioc_plen1 < sizeof(fs_idstr)) GOTO(out_lcfg, rc = -EINVAL); @@ -720,6 +861,8 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, case LCFG_NODEMAP_DEL_UIDMAP: case LCFG_NODEMAP_ADD_GIDMAP: case LCFG_NODEMAP_DEL_GIDMAP: + case LCFG_NODEMAP_SET_FILESET: + case LCFG_NODEMAP_SET_SEPOL: if (lcfg->lcfg_bufcount != 3) GOTO(out_lcfg, rc = -EINVAL); nodemap_name = lustre_cfg_string(lcfg, 1); @@ -728,8 +871,11 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, break; case LCFG_NODEMAP_ADMIN: case LCFG_NODEMAP_TRUSTED: + case LCFG_NODEMAP_DENY_UNKNOWN: case LCFG_NODEMAP_SQUASH_UID: case LCFG_NODEMAP_SQUASH_GID: + case LCFG_NODEMAP_MAP_MODE: + case LCFG_NODEMAP_AUDIT_MODE: if (lcfg->lcfg_bufcount != 4) GOTO(out_lcfg, rc = -EINVAL); nodemap_name = lustre_cfg_string(lcfg, 1); @@ -747,6 +893,16 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out_lcfg, rc); } + /* revoke nodemap lock */ + rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb); + if (rc < 0) { + CWARN("%s: cannot make nodemap fsdb: rc = %d\n", + mgs->mgs_obd->obd_name, rc); + } else { + mgs_revoke_lock(mgs, fsdb, CONFIG_T_NODEMAP); + mgs_put_fsdb(mgs, fsdb); + } + out_lcfg: OBD_FREE(lcfg, data->ioc_plen1); out: @@ -773,7 +929,7 @@ static int mgs_iocontrol_pool(const struct lu_env *env, GOTO(out_pool, rc = -EINVAL); } - if (data->ioc_plen1 > PAGE_CACHE_SIZE) + if (data->ioc_plen1 > PAGE_SIZE) GOTO(out_pool, rc = -E2BIG); OBD_ALLOC(lcfg, data->ioc_plen1); @@ -835,8 +991,8 @@ out_pool: } /* from mdt_iocontrol */ -int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) +static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void __user *uarg) { struct mgs_device *mgs = exp2mgs_dev(exp); struct obd_ioctl_data *data = karg; @@ -852,9 +1008,8 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { - case OBD_IOC_PARAM: { - struct mgs_thread_info *mgi = mgs_env_info(&env); - struct lustre_cfg *lcfg; + case OBD_IOC_PARAM: { + struct lustre_cfg *lcfg; if (data->ioc_type != LUSTRE_CFG_TYPE) { CERROR("%s: unknown cfg record type: %d\n", @@ -871,7 +1026,7 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (lcfg->lcfg_bufcount < 1) GOTO(out_free, rc = -EINVAL); - rc = mgs_setparam(&env, mgs, lcfg, mgi->mgi_fsname); + rc = mgs_set_param(&env, mgs, lcfg); if (rc) CERROR("%s: setparam err: rc = %d\n", exp->exp_obd->obd_name, rc); @@ -882,32 +1037,37 @@ out_free: case OBD_IOC_REPLACE_NIDS: { if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No device name specified!\n"); rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Device name is not NUL terminated!\n"); - rc = -EINVAL; + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } - if (data->ioc_plen1 > MTI_NAME_MAXLEN) { - CERROR("Device name is too long\n"); - rc = -EOVERFLOW; + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); break; } if (!data->ioc_inllen2 || !data->ioc_inlbuf2) { - CERROR("No NIDs were specified!\n"); rc = -EINVAL; + CERROR("%s: no NIDs specified: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) { - CERROR("NID list is not NUL terminated!\n"); rc = -EINVAL; + CERROR("%s: NID list is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); break; } @@ -921,24 +1081,57 @@ out_free: break; } + case OBD_IOC_CLEAR_CONFIGS: { + if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { + rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); + break; + } + + /* remove records marked SKIP from config logs */ + rc = mgs_clear_configs(&env, mgs, data->ioc_inlbuf1); + if (rc) + CERROR("%s: error clearing config log: rc = %d\n", + exp->exp_obd->obd_name, rc); + + break; + } + case OBD_IOC_POOL: rc = mgs_iocontrol_pool(&env, mgs, data); break; + case OBD_IOC_BARRIER: + rc = mgs_iocontrol_barrier(&env, mgs, data); + break; + case OBD_IOC_NODEMAP: rc = mgs_iocontrol_nodemap(&env, mgs, data); break; - case OBD_IOC_DUMP_LOG: { - struct llog_ctxt *ctxt; - - ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); - rc = class_config_dump_llog(&env, ctxt, data->ioc_inlbuf1, - NULL); - llog_ctxt_put(ctxt); + case OBD_IOC_LCFG_FORK: + rc = mgs_lcfg_fork(&env, mgs, data->ioc_inlbuf1, + data->ioc_inlbuf2); + break; + case OBD_IOC_LCFG_ERASE: + rc = mgs_lcfg_erase(&env, mgs, data->ioc_inlbuf1); break; - } case OBD_IOC_CATLOGLIST: rc = mgs_list_logs(&env, mgs, data); @@ -1009,7 +1202,9 @@ TGT_RPC_HANDLER(MGS_FIRST_OPC, 0, MGS_DISCONNECT, mgs_disconnect, &RQF_MDS_DISCONNECT, LUSTRE_OBD_VERSION), TGT_MGS_HDL_VAR(0, MGS_EXCEPTION, mgs_exception), +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_SET_INFO, mgs_set_info), +#endif TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_TARGET_REG, mgs_target_reg), TGT_MGS_HDL_VAR(0, MGS_TARGET_DEL, mgs_target_del), TGT_MGS_HDL (HABEO_REFERO, MGS_CONFIG_READ, mgs_config_read), @@ -1040,7 +1235,6 @@ static struct tgt_handler mgs_llog_handlers[] = { TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_CREATE, mgs_llog_open), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, tgt_llog_next_block), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header), -TGT_LLOG_HDL_VAR(0, LLOG_ORIGIN_HANDLE_CLOSE, tgt_llog_close), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, tgt_llog_prev_block), }; @@ -1082,7 +1276,6 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, struct obd_device *obd; struct lustre_mount_info *lmi; struct llog_ctxt *ctxt; - struct fs_db *fsdb = NULL; int rc; ENTRY; @@ -1144,8 +1337,14 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, /* Internal mgs setup */ mgs_init_fsdb_list(mgs); mutex_init(&mgs->mgs_mutex); - mgs->mgs_start_time = cfs_time_current_sec(); + mgs->mgs_start_time = ktime_get_real_seconds(); spin_lock_init(&mgs->mgs_lock); + mutex_init(&mgs->mgs_health_mutex); + init_rwsem(&mgs->mgs_barrier_rwsem); + + rc = mgs_lcfg_rename(env, mgs); + if (rc) + GOTO(err_llog, rc); rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3)); if (rc != 0) { @@ -1156,12 +1355,15 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, /* Setup params fsdb and log, so that other servers can make a local * copy successfully when they are mounted. See LU-4783 */ - rc = mgs_params_fsdb_setup(env, mgs, fsdb); + rc = mgs_params_fsdb_setup(env, mgs); if (rc) /* params fsdb and log can be setup later */ CERROR("%s: %s fsdb and log setup failed: rc = %d\n", obd->obd_name, PARAMS_FILENAME, rc); + /* Setup _mgs fsdb, useful for srpc */ + mgs__mgs_fsdb_setup(env, mgs); + ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "mgs_ldlm_client", &obd->obd_ldlm_client); @@ -1189,7 +1391,8 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, }; /* Start the service threads */ - mgs->mgs_service = ptlrpc_register_service(&conf, obd->obd_proc_entry); + mgs->mgs_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(mgs->mgs_service)) { rc = PTR_ERR(mgs->mgs_service); CERROR("failed to start mgs service: %d\n", rc); @@ -1224,14 +1427,14 @@ err_ns: err_ops: lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0); if (!cfs_hash_is_empty(mgs2lu_dev(mgs)->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, mgs2lu_dev(mgs)->ld_site, &msgdata, lu_cdebug_printer); } obd_disconnect(mgs->mgs_bottom_exp); err_lmi: if (lmi) - server_put_mount(lustre_cfg_string(lcfg, 0)); + server_put_mount(lustre_cfg_string(lcfg, 0), true); RETURN(rc); } @@ -1275,7 +1478,7 @@ static int mgs_object_init(const struct lu_env *env, struct lu_object *o, else rc = -ENOMEM; - return 0; + return rc; } static void mgs_object_free(const struct lu_env *env, struct lu_object *o) @@ -1296,15 +1499,15 @@ static int mgs_object_print(const struct lu_env *env, void *cookie, return (*p)(env, cookie, LUSTRE_MGS_NAME"-object@%p", o); } -struct lu_object_operations mgs_lu_obj_ops = { +static struct lu_object_operations mgs_lu_obj_ops = { .loo_object_init = mgs_object_init, .loo_object_free = mgs_object_free, .loo_object_print = mgs_object_print, }; -struct lu_object *mgs_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *d) +static struct lu_object *mgs_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d) { struct lu_object_header *h; struct mgs_object *o; @@ -1373,14 +1576,18 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, ping_evictor_stop(); + mutex_lock(&mgs->mgs_health_mutex); ptlrpc_unregister_service(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); + + mgs_params_fsdb_cleanup(env, mgs); + mgs_cleanup_fsdb_list(mgs); + ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1); obd_exports_barrier(obd); obd_zombie_barrier(); tgt_fini(env, &mgs->mgs_lut); - mgs_params_fsdb_cleanup(env, mgs); - mgs_cleanup_fsdb_list(mgs); lproc_mgs_cleanup(mgs); ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); @@ -1391,19 +1598,19 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, mgs_fs_cleanup(env, mgs); - ldlm_namespace_free(obd->obd_namespace, NULL, 1); + ldlm_namespace_free_post(obd->obd_namespace); obd->obd_namespace = NULL; lu_site_purge(env, d->ld_site, ~0); if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer); } LASSERT(mgs->mgs_bottom_exp); obd_disconnect(mgs->mgs_bottom_exp); - server_put_mount(obd->obd_name); + server_put_mount(obd->obd_name, true); RETURN(NULL); } @@ -1435,12 +1642,38 @@ static struct lu_device_type mgs_device_type = { .ldt_ctx_tags = LCT_MG_THREAD }; +static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + tgt_counter_incr(exp, LPROC_MGS_CONNECT); + + if (data != NULL) { + data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; + + if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2) + data->ocd_connect_flags2 &= MGS_CONNECT_SUPPORTED2; + + exp->exp_connect_data = *data; + data->ocd_version = LUSTRE_VERSION_CODE; + } + + RETURN(mgs_export_stats_init(obd, exp, localdata)); +} + static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data, void *localdata) { struct obd_export *lexp; - struct lustre_handle conn = { 0 }; + struct lustre_handle conn = { + .cookie = 0, + }; int rc; ENTRY; @@ -1456,41 +1689,18 @@ static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp, if (lexp == NULL) RETURN(-EFAULT); - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - data->ocd_version = LUSTRE_VERSION_CODE; - lexp->exp_connect_data = *data; - } - - tgt_counter_incr(lexp, LPROC_MGS_CONNECT); - - rc = mgs_export_stats_init(obd, lexp, localdata); + rc = mgs_obd_reconnect(env, lexp, obd, cluuid, data, localdata); if (rc) - class_disconnect(lexp); - else - *exp = lexp; + GOTO(out_disconnect, rc); - RETURN(rc); -} + *exp = lexp; -static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp, - struct obd_device *obd, struct obd_uuid *cluuid, - struct obd_connect_data *data, void *localdata) -{ - ENTRY; - - if (exp == NULL || obd == NULL || cluuid == NULL) - RETURN(-EINVAL); + RETURN(rc); - tgt_counter_incr(exp, LPROC_MGS_CONNECT); +out_disconnect: + class_disconnect(lexp); - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - data->ocd_version = LUSTRE_VERSION_CODE; - exp->exp_connect_data = *data; - } - - RETURN(mgs_export_stats_init(obd, exp, localdata)); + return rc; } static int mgs_obd_disconnect(struct obd_export *exp) @@ -1511,6 +1721,18 @@ static int mgs_obd_disconnect(struct obd_export *exp) RETURN(rc); } +static int mgs_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mgs_device *mgs = lu2mgs_dev(obd->obd_lu_dev); + int rc = 0; + + mutex_lock(&mgs->mgs_health_mutex); + rc |= ptlrpc_service_health_check(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); + + return rc != 0 ? 1 : 0; +} + /* use obd ops to offer management infrastructure */ static struct obd_ops mgs_obd_device_ops = { .o_owner = THIS_MODULE, @@ -1520,24 +1742,23 @@ static struct obd_ops mgs_obd_device_ops = { .o_init_export = mgs_init_export, .o_destroy_export = mgs_destroy_export, .o_iocontrol = mgs_iocontrol, + .o_health_check = mgs_health_check, }; static int __init mgs_init(void) { return class_register_type(&mgs_obd_device_ops, NULL, true, NULL, -#ifndef HAVE_ONLY_PROCFS_SEQ - NULL, -#endif LUSTRE_MGS_NAME, &mgs_device_type); } -static void /*__exit*/ mgs_exit(void) +static void __exit mgs_exit(void) { class_unregister_type(LUSTRE_MGS_NAME); } -MODULE_AUTHOR("Sun Microsystems, Inc. "); -MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_AUTHOR("OpenSFS, Inc. "); +MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); module_init(mgs_init);