X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmgs%2Fmgs_handler.c;h=c26270671ea600036ad35faf4ea0779b0b924881;hp=d3a00383c778fc49b212bcc634ebc1bf4121ed76;hb=6601661f96325b4971d0d1cb0be0fa01cc2ddc97;hpb=1c7f9caa99df3082a3e506673721d359147843d3 diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index d3a0038..c262706 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2014, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -45,8 +41,9 @@ #include #include -#include -#include +#include +#include +#include #include "mgs_internal.h" @@ -94,11 +91,19 @@ static int mgs_exception(struct tgt_session_info *tsi) RETURN(0); } +static inline bool str_starts_with(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) static int mgs_set_info(struct tgt_session_info *tsi) { struct mgs_thread_info *mgi; struct mgs_send_param *msp, *rep_msp; struct lustre_cfg *lcfg; + size_t param_len; + char *s; int rc; ENTRY; @@ -111,19 +116,33 @@ static int mgs_set_info(struct tgt_session_info *tsi) if (msp == NULL) RETURN(err_serious(-EFAULT)); - /* Construct lustre_cfg structure to pass to function mgs_setparam */ + param_len = strnlen(msp->mgs_param, sizeof(msp->mgs_param)); + if (param_len == 0 || param_len == sizeof(msp->mgs_param)) + RETURN(-EINVAL); + + /* We only allow '*.lov.stripe{size,count,offset}=*' from an RPC. */ + s = strchr(msp->mgs_param, '.'); + if (s == NULL) + RETURN(-EINVAL); + + if (!str_starts_with(s + 1, "lov.stripesize=") && + !str_starts_with(s + 1, "lov.stripecount=") && + !str_starts_with(s + 1, "lov.stripeoffset=")) + RETURN(-EINVAL); + + /* Construct lustre_cfg structure to pass to function mgs_set_param */ lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, msp->mgs_param); - lcfg = lustre_cfg_new(LCFG_PARAM, &mgi->mgi_bufs); - if (lcfg == NULL) + OBD_ALLOC(lcfg, lustre_cfg_len(mgi->mgi_bufs.lcfg_bufcount, + mgi->mgi_bufs.lcfg_buflen)); + if (!lcfg) RETURN(-ENOMEM); + lustre_cfg_init(lcfg, LCFG_PARAM, &mgi->mgi_bufs); - rc = mgs_setparam(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg, - mgi->mgi_fsname); + rc = mgs_set_param(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg); if (rc) { - LCONSOLE_WARN("%s: Unable to set parameter %s for %s: %d\n", - tgt_name(tsi->tsi_tgt), msp->mgs_param, - mgi->mgi_fsname, rc); + LCONSOLE_WARN("%s: Unable to set parameter %s: %d\n", + tgt_name(tsi->tsi_tgt), msp->mgs_param, rc); GOTO(out_cfg, rc); } @@ -132,14 +151,16 @@ static int mgs_set_info(struct tgt_session_info *tsi) *rep_msp = *msp; EXIT; out_cfg: - lustre_cfg_free(lcfg); + OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens)); return rc; } +#endif enum ast_type { - AST_CONFIG = 1, - AST_PARAMS = 2, - AST_IR = 3 + AST_CONFIG = 1, + AST_PARAMS = 2, + AST_IR = 3, + AST_BARRIER = 4, }; static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, @@ -173,6 +194,8 @@ static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, case AST_IR: mgs_ir_notify_complete(fsdb); break; + case AST_BARRIER: + break; default: LBUG(); } @@ -203,10 +226,18 @@ static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags, return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR); } +static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER); +} + void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) { ldlm_completion_callback cp = NULL; - struct lustre_handle lockh = { 0 }; + struct lustre_handle lockh = { + .cookie = 0, + }; struct ldlm_res_id res_id; __u64 flags = LDLM_FL_ATOMIC_CB; int rc; @@ -217,6 +248,7 @@ void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) LASSERT(rc == 0); switch (type) { case CONFIG_T_CONFIG: + case CONFIG_T_NODEMAP: cp = mgs_completion_ast_config; if (test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags)) rc = -EALREADY; @@ -228,19 +260,24 @@ void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) break; case CONFIG_T_RECOVER: cp = mgs_completion_ast_ir; + break; + case CONFIG_T_BARRIER: + cp = mgs_completion_ast_barrier; + break; default: break; } if (!rc) { LASSERT(cp != NULL); - rc = ldlm_cli_enqueue_local(mgs->mgs_obd->obd_namespace, + rc = ldlm_cli_enqueue_local(NULL, mgs->mgs_obd->obd_namespace, &res_id, LDLM_PLAIN, NULL, LCK_EX, &flags, ldlm_blocking_ast, cp, NULL, fsdb, 0, LVB_T_NONE, NULL, &lockh); if (rc != ELDLM_OK) { - CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n", + CERROR("%s: can't take cfg lock for %#llx/%#llx : rc = %d\n", + mgs->mgs_obd->obd_name, le64_to_cpu(res_id.name[0]), le64_to_cpu(res_id.name[1]), rc); @@ -282,10 +319,7 @@ static int mgs_check_target(const struct lu_env *env, rc = 1; } else { /* Index is correctly marked as used */ - - /* If the logs don't contain the mti_nids then add - them as failover nids */ - rc = mgs_check_failnid(env, mgs, mti); + rc = 0; } RETURN(rc); @@ -318,12 +352,14 @@ static int mgs_check_failover_reg(struct mgs_target_info *mti) /* Called whenever a target starts up. Flags indicate first connect, etc. */ static int mgs_target_reg(struct tgt_session_info *tsi) { - struct obd_device *obd = tsi->tsi_exp->exp_obd; - struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); - struct mgs_target_info *mti, *rep_mti; - struct fs_db *fsdb; - int opc; - int rc = 0; + struct obd_device *obd = tsi->tsi_exp->exp_obd; + struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); + struct mgs_target_info *mti, *rep_mti; + struct fs_db *b_fsdb = NULL; /* barrier fsdb */ + struct fs_db *c_fsdb = NULL; /* config fsdb */ + char barrier_name[20]; + int opc; + int rc = 0; ENTRY; @@ -339,117 +375,185 @@ static int mgs_target_reg(struct tgt_session_info *tsi) RETURN(err_serious(-EFAULT)); } + down_read(&mgs->mgs_barrier_rwsem); + if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data, IMP_RECOV)) opc = mti->mti_flags & LDD_F_OPC_MASK; else opc = LDD_F_OPC_REG; - if (opc == LDD_F_OPC_READY) { - CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", - mti->mti_fsname, mti->mti_stripe_index); + if (opc == LDD_F_OPC_READY) { + CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", + mti->mti_fsname, mti->mti_stripe_index); rc = mgs_ir_update(tsi->tsi_env, mgs, mti); - if (rc) { - LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); - CERROR("Update IR return with %d(ignore and IR " - "disabled)\n", rc); - } - GOTO(out_nolock, rc); - } + if (rc) { + LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); + CERROR("%s: Update IR return failure: rc = %d\n", + mti->mti_fsname, rc); + } - /* Do not support unregistering right now. */ - if (opc != LDD_F_OPC_REG) - GOTO(out_nolock, rc = -EINVAL); + GOTO(out_norevoke, rc); + } - CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", - mti->mti_fsname, mti->mti_stripe_index); + /* Do not support unregistering right now. */ + if (opc != LDD_F_OPC_REG) + GOTO(out_norevoke, rc = -EINVAL); - if (mti->mti_flags & LDD_F_NEED_INDEX) - mti->mti_flags |= LDD_F_WRITECONF; + snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s", + mti->mti_fsname, BARRIER_FILENAME); + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb); + if (rc) { + CERROR("%s: Can't get db for %s: rc = %d\n", + mti->mti_fsname, barrier_name, rc); + + GOTO(out_norevoke, rc); + } + + CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", + mti->mti_fsname, mti->mti_stripe_index); + + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 || + b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 || + b_fsdb->fsdb_barrier_status == BS_FROZEN) { + LCONSOLE_WARN("%s: the system is in barrier, refuse " + "the connection from MDT %s temporary\n", + obd->obd_name, mti->mti_svname); - if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 | - LDD_F_UPDATE))) { - /* We're just here as a startup ping. */ - CDEBUG(D_MGS, "Server %s is running on %s\n", + GOTO(out_norevoke, rc = -EBUSY); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) && + !b_fsdb->fsdb_barrier_disabled) { + LCONSOLE_WARN("%s: the MDT %s does not support write " + "barrier, so disable barrier on the " + "whole system.\n", + obd->obd_name, mti->mti_svname); + + b_fsdb->fsdb_barrier_disabled = 1; + } + } + + if (mti->mti_flags & LDD_F_NEED_INDEX) + mti->mti_flags |= LDD_F_WRITECONF; + + if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPDATE))) { + /* We're just here as a startup ping. */ + CDEBUG(D_MGS, "Server %s is running on %s\n", mti->mti_svname, obd_export_nid2str(tsi->tsi_exp)); rc = mgs_check_target(tsi->tsi_env, mgs, mti); - /* above will set appropriate mti flags */ - if (rc <= 0) - /* Nothing wrong, or fatal error */ - GOTO(out_nolock, rc); + /* above will set appropriate mti flags */ + if (rc <= 0) + /* Nothing wrong, or fatal error */ + GOTO(out_norevoke, rc); } else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) { rc = mgs_check_failover_reg(mti); if (rc) - GOTO(out_nolock, rc); - } + GOTO(out_norevoke, rc); + } - OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); - if (mti->mti_flags & LDD_F_WRITECONF) { - if (mti->mti_flags & LDD_F_SV_TYPE_MDT && - mti->mti_stripe_index == 0) { + if (mti->mti_flags & LDD_F_WRITECONF) { + if (mti->mti_flags & LDD_F_SV_TYPE_MDT && + mti->mti_stripe_index == 0) { + mgs_put_fsdb(mgs, b_fsdb); + b_fsdb = NULL; rc = mgs_erase_logs(tsi->tsi_env, mgs, mti->mti_fsname); - LCONSOLE_WARN("%s: Logs for fs %s were removed by user " - "request. All servers must be restarted " - "in order to regenerate the logs." - "\n", obd->obd_name, mti->mti_fsname); - } else if (mti->mti_flags & - (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { + LCONSOLE_WARN("%s: Logs for fs %s were removed by user " + "request. All servers must be restarted " + "in order to regenerate the logs: rc = %d" + "\n", obd->obd_name, mti->mti_fsname, rc); + if (rc && rc != -ENOENT) + GOTO(out_norevoke, rc); + + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + barrier_name, &b_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", + barrier_name, rc); + + GOTO(out_norevoke, rc); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & + OBD_CONNECT_BARRIER)) { + LCONSOLE_WARN("%s: the MDT %s does not support " + "write barrier, disable barrier " + "on the whole system.\n", + obd->obd_name, mti->mti_svname); + + b_fsdb->fsdb_barrier_disabled = 1; + } + } else if (mti->mti_flags & + (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname); - LCONSOLE_WARN("%s: Regenerating %s log by user " - "request.\n", - obd->obd_name, mti->mti_svname); - } - mti->mti_flags |= LDD_F_UPDATE; - /* Erased logs means start from scratch. */ - mti->mti_flags &= ~LDD_F_UPGRADE14; - if (rc) - GOTO(out_nolock, rc); - } + LCONSOLE_WARN("%s: Regenerating %s log by user " + "request: rc = %d\n", + obd->obd_name, mti->mti_svname, rc); + if (rc) + GOTO(out_norevoke, rc); + } - rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &fsdb); - if (rc) { - CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - GOTO(out_nolock, rc); - } + mti->mti_flags |= LDD_F_UPDATE; + } - /* - * Log writing contention is handled by the fsdb_mutex. - * - * It should be alright if someone was reading while we were - * updating the logs - if we revoke at the end they will just update - * from where they left off. - */ + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - if (mti->mti_flags & LDD_F_UPGRADE14) { - CERROR("Can't upgrade from 1.4 (%d)\n", rc); - GOTO(out, rc); + GOTO(out_norevoke, rc); } + /* + * Log writing contention is handled by the fsdb_mutex. + * + * It should be alright if someone was reading while we were + * updating the logs - if we revoke at the end they will just update + * from where they left off. + */ if (mti->mti_flags & LDD_F_UPDATE) { CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); /* create or update the target log and update the client/mdt logs */ - rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, fsdb); + rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb); if (rc) { CERROR("Failed to write %s log (%d)\n", mti->mti_svname, rc); GOTO(out, rc); } - mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | - LDD_F_NEED_INDEX | LDD_F_WRITECONF | - LDD_F_UPGRADE14); - mti->mti_flags |= LDD_F_REWRITE_LDD; + mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | + LDD_F_NEED_INDEX | LDD_F_WRITECONF); + mti->mti_flags |= LDD_F_REWRITE_LDD; } out: - mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); + mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG); + +out_norevoke: + if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) { + if (!c_fsdb) { + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + mti->mti_fsname, &c_fsdb); + if (rc) + CERROR("Fail to get db for %s: %d\n", + mti->mti_fsname, rc); + } + + if (c_fsdb) { + memcpy(b_fsdb->fsdb_mdt_index_map, + c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); + b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count; + } + } + + up_read(&mgs->mgs_barrier_rwsem); -out_nolock: CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, mti->mti_stripe_index, rc); /* An error flag is set in the mti reply rather than an error code */ @@ -462,6 +566,10 @@ out_nolock: /* Flush logs to disk */ dt_sync(tsi->tsi_env, mgs->mgs_bottom); + if (b_fsdb) + mgs_put_fsdb(mgs, b_fsdb); + if (c_fsdb) + mgs_put_fsdb(mgs, c_fsdb); RETURN(rc); } @@ -493,6 +601,9 @@ static int mgs_config_read(struct tgt_session_info *tsi) case CONFIG_T_RECOVER: rc = mgs_get_ir_logs(req); break; + case CONFIG_T_NODEMAP: + rc = nodemap_get_config_req(req->rq_export->exp_obd, req); + break; case CONFIG_T_CONFIG: rc = -EOPNOTSUPP; break; @@ -540,7 +651,7 @@ static int mgs_llog_open(struct tgt_session_info *tsi) logname = req_capsule_client_get(tsi->tsi_pill, &RMF_NAME); if (logname) { - char *ptr = strchr(logname, '-'); + char *ptr = strrchr(logname, '-'); int len = (ptr != NULL) ? (int)(ptr - logname) : 0; if (ptr == NULL || len >= sizeof(mgi->mgi_fsname)) { @@ -608,22 +719,43 @@ static inline int mgs_destroy_export(struct obd_export *exp) RETURN(0); } -static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname) +static int mgs_extract_fs_pool(char *arg, char *fsname, char *poolname) { - char *ptr; + size_t len; + char *ptr; - ENTRY; - for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) { - *fsname = *ptr; - fsname++; - } - if (*ptr == '\0') - return -EINVAL; - *fsname = '\0'; - ptr++; - strcpy(poolname, ptr); + ENTRY; + /* Validate name */ + for (ptr = arg; *ptr != '\0'; ptr++) { + if (!isalnum(*ptr) && *ptr != '_' && *ptr != '-' && *ptr != '.') + return -EINVAL; + } - RETURN(0); + /* Test for fsname.poolname format. + * strlen() test if poolname is empty + */ + ptr = strchr(arg, '.'); + if (!ptr || !strlen(ptr)) + return -EINVAL; + ptr++; + + /* Also make sure poolname is not to long. */ + if (strlen(ptr) > LOV_MAXPOOLNAME) + return -ENAMETOOLONG; + strncpy(poolname, ptr, LOV_MAXPOOLNAME); + + /* Test if fsname is empty */ + len = strlen(arg) - strlen(ptr) - 1; + if (!len) + return -EINVAL; + + /* or too long */ + if (len > LUSTRE_MAXFSNAME) + return -ENAMETOOLONG; + + strncpy(fsname, arg, len); + + RETURN(0); } static int mgs_iocontrol_nodemap(const struct lu_env *env, @@ -631,6 +763,7 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, struct obd_ioctl_data *data) { struct lustre_cfg *lcfg = NULL; + struct fs_db *fsdb; lnet_nid_t nid; const char *nodemap_name = NULL; const char *nidstr = NULL; @@ -653,7 +786,7 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out, rc = -EINVAL); } - if (data->ioc_plen1 > PAGE_CACHE_SIZE) + if (data->ioc_plen1 > PAGE_SIZE) GOTO(out, rc = -E2BIG); OBD_ALLOC(lcfg, data->ioc_plen1); @@ -710,7 +843,9 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, if (rc != 0) GOTO(out_lcfg, rc = -EINVAL); - fs_id = nodemap_test_id(nid, idtype, client_id); + rc = nodemap_test_id(nid, idtype, client_id, &fs_id); + if (rc < 0) + GOTO(out_lcfg, rc = -EINVAL); if (data->ioc_plen1 < sizeof(fs_idstr)) GOTO(out_lcfg, rc = -EINVAL); @@ -726,6 +861,8 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, case LCFG_NODEMAP_DEL_UIDMAP: case LCFG_NODEMAP_ADD_GIDMAP: case LCFG_NODEMAP_DEL_GIDMAP: + case LCFG_NODEMAP_SET_FILESET: + case LCFG_NODEMAP_SET_SEPOL: if (lcfg->lcfg_bufcount != 3) GOTO(out_lcfg, rc = -EINVAL); nodemap_name = lustre_cfg_string(lcfg, 1); @@ -734,8 +871,11 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, break; case LCFG_NODEMAP_ADMIN: case LCFG_NODEMAP_TRUSTED: + case LCFG_NODEMAP_DENY_UNKNOWN: case LCFG_NODEMAP_SQUASH_UID: case LCFG_NODEMAP_SQUASH_GID: + case LCFG_NODEMAP_MAP_MODE: + case LCFG_NODEMAP_AUDIT_MODE: if (lcfg->lcfg_bufcount != 4) GOTO(out_lcfg, rc = -EINVAL); nodemap_name = lustre_cfg_string(lcfg, 1); @@ -753,6 +893,16 @@ static int mgs_iocontrol_nodemap(const struct lu_env *env, GOTO(out_lcfg, rc); } + /* revoke nodemap lock */ + rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb); + if (rc < 0) { + CWARN("%s: cannot make nodemap fsdb: rc = %d\n", + mgs->mgs_obd->obd_name, rc); + } else { + mgs_revoke_lock(mgs, fsdb, CONFIG_T_NODEMAP); + mgs_put_fsdb(mgs, fsdb); + } + out_lcfg: OBD_FREE(lcfg, data->ioc_plen1); out: @@ -779,7 +929,7 @@ static int mgs_iocontrol_pool(const struct lu_env *env, GOTO(out_pool, rc = -EINVAL); } - if (data->ioc_plen1 > PAGE_CACHE_SIZE) + if (data->ioc_plen1 > PAGE_SIZE) GOTO(out_pool, rc = -E2BIG); OBD_ALLOC(lcfg, data->ioc_plen1); @@ -842,7 +992,7 @@ out_pool: /* from mdt_iocontrol */ static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) + void *karg, void __user *uarg) { struct mgs_device *mgs = exp2mgs_dev(exp); struct obd_ioctl_data *data = karg; @@ -858,9 +1008,8 @@ static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { - case OBD_IOC_PARAM: { - struct mgs_thread_info *mgi = mgs_env_info(&env); - struct lustre_cfg *lcfg; + case OBD_IOC_PARAM: { + struct lustre_cfg *lcfg; if (data->ioc_type != LUSTRE_CFG_TYPE) { CERROR("%s: unknown cfg record type: %d\n", @@ -877,7 +1026,7 @@ static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (lcfg->lcfg_bufcount < 1) GOTO(out_free, rc = -EINVAL); - rc = mgs_setparam(&env, mgs, lcfg, mgi->mgi_fsname); + rc = mgs_set_param(&env, mgs, lcfg); if (rc) CERROR("%s: setparam err: rc = %d\n", exp->exp_obd->obd_name, rc); @@ -888,32 +1037,37 @@ out_free: case OBD_IOC_REPLACE_NIDS: { if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No device name specified!\n"); rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Device name is not NUL terminated!\n"); - rc = -EINVAL; + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } - if (data->ioc_plen1 > MTI_NAME_MAXLEN) { - CERROR("Device name is too long\n"); - rc = -EOVERFLOW; + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); break; } if (!data->ioc_inllen2 || !data->ioc_inlbuf2) { - CERROR("No NIDs were specified!\n"); rc = -EINVAL; + CERROR("%s: no NIDs specified: rc = %d\n", + exp->exp_obd->obd_name, rc); break; } if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) { - CERROR("NID list is not NUL terminated!\n"); rc = -EINVAL; + CERROR("%s: NID list is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); break; } @@ -927,24 +1081,57 @@ out_free: break; } + case OBD_IOC_CLEAR_CONFIGS: { + if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { + rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); + break; + } + + /* remove records marked SKIP from config logs */ + rc = mgs_clear_configs(&env, mgs, data->ioc_inlbuf1); + if (rc) + CERROR("%s: error clearing config log: rc = %d\n", + exp->exp_obd->obd_name, rc); + + break; + } + case OBD_IOC_POOL: rc = mgs_iocontrol_pool(&env, mgs, data); break; + case OBD_IOC_BARRIER: + rc = mgs_iocontrol_barrier(&env, mgs, data); + break; + case OBD_IOC_NODEMAP: rc = mgs_iocontrol_nodemap(&env, mgs, data); break; - case OBD_IOC_DUMP_LOG: { - struct llog_ctxt *ctxt; - - ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); - rc = class_config_dump_llog(&env, ctxt, data->ioc_inlbuf1, - NULL); - llog_ctxt_put(ctxt); + case OBD_IOC_LCFG_FORK: + rc = mgs_lcfg_fork(&env, mgs, data->ioc_inlbuf1, + data->ioc_inlbuf2); + break; + case OBD_IOC_LCFG_ERASE: + rc = mgs_lcfg_erase(&env, mgs, data->ioc_inlbuf1); break; - } case OBD_IOC_CATLOGLIST: rc = mgs_list_logs(&env, mgs, data); @@ -1015,10 +1202,12 @@ TGT_RPC_HANDLER(MGS_FIRST_OPC, 0, MGS_DISCONNECT, mgs_disconnect, &RQF_MDS_DISCONNECT, LUSTRE_OBD_VERSION), TGT_MGS_HDL_VAR(0, MGS_EXCEPTION, mgs_exception), -TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_SET_INFO, mgs_set_info), -TGT_MGS_HDL (HABEO_REFERO | MUTABOR, MGS_TARGET_REG, mgs_target_reg), +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) +TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_SET_INFO, mgs_set_info), +#endif +TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_TARGET_REG, mgs_target_reg), TGT_MGS_HDL_VAR(0, MGS_TARGET_DEL, mgs_target_del), -TGT_MGS_HDL (HABEO_REFERO, MGS_CONFIG_READ, mgs_config_read), +TGT_MGS_HDL(HAS_REPLY, MGS_CONFIG_READ, mgs_config_read), }; static struct tgt_handler mgs_obd_handlers[] = { @@ -1035,7 +1224,7 @@ static struct tgt_handler mgs_dlm_handlers[] = { * * instead of common OBD_FAIL_LDLM_ENQUEUE_NET */ .th_fail_id = 0, .th_opc = LDLM_ENQUEUE, - .th_flags = HABEO_CLAVIS, + .th_flags = HAS_KEY, .th_act = tgt_enqueue, .th_fmt = &RQF_LDLM_ENQUEUE, .th_version = LUSTRE_DLM_VERSION, @@ -1046,7 +1235,6 @@ static struct tgt_handler mgs_llog_handlers[] = { TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_CREATE, mgs_llog_open), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, tgt_llog_next_block), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header), -TGT_LLOG_HDL_VAR(0, LLOG_ORIGIN_HANDLE_CLOSE, tgt_llog_close), TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, tgt_llog_prev_block), }; @@ -1072,6 +1260,11 @@ static struct tgt_opc_slice mgs_common_slice[] = { .tos_hs = mgs_llog_handlers }, { + .tos_opc_start = SEC_FIRST_OPC, + .tos_opc_end = SEC_LAST_OPC, + .tos_hs = tgt_sec_ctx_handlers + }, + { .tos_hs = NULL } }; @@ -1083,8 +1276,6 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, struct obd_device *obd; struct lustre_mount_info *lmi; struct llog_ctxt *ctxt; - struct fs_db *fsdb = NULL; - struct fs_db *fsdb_srpc = NULL; int rc; ENTRY; @@ -1146,9 +1337,14 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, /* Internal mgs setup */ mgs_init_fsdb_list(mgs); mutex_init(&mgs->mgs_mutex); - mgs->mgs_start_time = cfs_time_current_sec(); + mgs->mgs_start_time = ktime_get_real_seconds(); spin_lock_init(&mgs->mgs_lock); mutex_init(&mgs->mgs_health_mutex); + init_rwsem(&mgs->mgs_barrier_rwsem); + + rc = mgs_lcfg_rename(env, mgs); + if (rc) + GOTO(err_llog, rc); rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3)); if (rc != 0) { @@ -1159,14 +1355,14 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, /* Setup params fsdb and log, so that other servers can make a local * copy successfully when they are mounted. See LU-4783 */ - rc = mgs_params_fsdb_setup(env, mgs, fsdb); + rc = mgs_params_fsdb_setup(env, mgs); if (rc) /* params fsdb and log can be setup later */ CERROR("%s: %s fsdb and log setup failed: rc = %d\n", obd->obd_name, PARAMS_FILENAME, rc); /* Setup _mgs fsdb, useful for srpc */ - mgs__mgs_fsdb_setup(env, mgs, fsdb_srpc); + mgs__mgs_fsdb_setup(env, mgs); ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "mgs_ldlm_client", &obd->obd_ldlm_client); @@ -1195,7 +1391,8 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, }; /* Start the service threads */ - mgs->mgs_service = ptlrpc_register_service(&conf, obd->obd_proc_entry); + mgs->mgs_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(mgs->mgs_service)) { rc = PTR_ERR(mgs->mgs_service); CERROR("failed to start mgs service: %d\n", rc); @@ -1230,7 +1427,7 @@ err_ns: err_ops: lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0); if (!cfs_hash_is_empty(mgs2lu_dev(mgs)->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, mgs2lu_dev(mgs)->ld_site, &msgdata, lu_cdebug_printer); } @@ -1281,7 +1478,7 @@ static int mgs_object_init(const struct lu_env *env, struct lu_object *o, else rc = -ENOMEM; - return 0; + return rc; } static void mgs_object_free(const struct lu_env *env, struct lu_object *o) @@ -1386,6 +1583,7 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, mgs_params_fsdb_cleanup(env, mgs); mgs_cleanup_fsdb_list(mgs); + ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1); obd_exports_barrier(obd); obd_zombie_barrier(); @@ -1400,12 +1598,12 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, mgs_fs_cleanup(env, mgs); - ldlm_namespace_free(obd->obd_namespace, NULL, 1); + ldlm_namespace_free_post(obd->obd_namespace); obd->obd_namespace = NULL; lu_site_purge(env, d->ld_site, ~0); if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer); } @@ -1444,12 +1642,38 @@ static struct lu_device_type mgs_device_type = { .ldt_ctx_tags = LCT_MG_THREAD }; +static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + tgt_counter_incr(exp, LPROC_MGS_CONNECT); + + if (data != NULL) { + data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; + + if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2) + data->ocd_connect_flags2 &= MGS_CONNECT_SUPPORTED2; + + exp->exp_connect_data = *data; + data->ocd_version = LUSTRE_VERSION_CODE; + } + + RETURN(mgs_export_stats_init(obd, exp, localdata)); +} + static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data, void *localdata) { struct obd_export *lexp; - struct lustre_handle conn = { 0 }; + struct lustre_handle conn = { + .cookie = 0, + }; int rc; ENTRY; @@ -1465,41 +1689,18 @@ static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp, if (lexp == NULL) RETURN(-EFAULT); - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - data->ocd_version = LUSTRE_VERSION_CODE; - lexp->exp_connect_data = *data; - } - - tgt_counter_incr(lexp, LPROC_MGS_CONNECT); - - rc = mgs_export_stats_init(obd, lexp, localdata); + rc = mgs_obd_reconnect(env, lexp, obd, cluuid, data, localdata); if (rc) - class_disconnect(lexp); - else - *exp = lexp; - - RETURN(rc); -} - -static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp, - struct obd_device *obd, struct obd_uuid *cluuid, - struct obd_connect_data *data, void *localdata) -{ - ENTRY; + GOTO(out_disconnect, rc); - if (exp == NULL || obd == NULL || cluuid == NULL) - RETURN(-EINVAL); + *exp = lexp; - tgt_counter_incr(exp, LPROC_MGS_CONNECT); + RETURN(rc); - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - data->ocd_version = LUSTRE_VERSION_CODE; - exp->exp_connect_data = *data; - } +out_disconnect: + class_disconnect(lexp); - RETURN(mgs_export_stats_init(obd, exp, localdata)); + return rc; } static int mgs_obd_disconnect(struct obd_export *exp) @@ -1550,13 +1751,14 @@ static int __init mgs_init(void) LUSTRE_MGS_NAME, &mgs_device_type); } -static void /*__exit*/ mgs_exit(void) +static void __exit mgs_exit(void) { class_unregister_type(LUSTRE_MGS_NAME); } -MODULE_AUTHOR("Sun Microsystems, Inc. "); -MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_AUTHOR("OpenSFS, Inc. "); +MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); module_init(mgs_init);