X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmgs%2Fmgs_handler.c;h=e5603d52caf3e7b3866fffae64af46a242614734;hp=dc993c73d15df5edd3d19f3358399b25a50c4948;hb=4c4c327b25f3414f20a9ae600e7311f1aa3a866d;hpb=08aa217ce49aba1ded52e0f7adb8a607035123fd diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index dc993c7..e5603d5 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -15,11 +15,7 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ @@ -27,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2012, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -45,186 +41,258 @@ #include #include -#include +#include +#include +#include #include "mgs_internal.h" -/* Establish a connection to the MGS.*/ -static int mgs_connect(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data, - void *localdata) +/* + * Regular MGS handlers + */ +static int mgs_connect(struct tgt_session_info *tsi) { - struct obd_export *lexp; - struct lustre_handle conn = { 0 }; - int rc; - ENTRY; + struct ptlrpc_request *req = tgt_ses_req(tsi); + int rc; - if (!exp || !obd || !cluuid) - RETURN(-EINVAL); + ENTRY; - rc = class_connect(&conn, obd, cluuid); - if (rc) - RETURN(rc); + CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, cfs_fail_val); + rc = tgt_connect(tsi); + if (rc) + RETURN(rc); - lexp = class_conn2export(&conn); - if (lexp == NULL) - GOTO(out, rc = -EFAULT); + if (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1) + lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); - mgs_counter_incr(lexp, LPROC_MGS_CONNECT); + RETURN(0); +} - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - lexp->exp_connect_flags = data->ocd_connect_flags; - data->ocd_version = LUSTRE_VERSION_CODE; - } +static int mgs_disconnect(struct tgt_session_info *tsi) +{ + int rc; - rc = mgs_export_stats_init(obd, lexp, localdata); + ENTRY; -out: - if (rc) { - class_disconnect(lexp); - } else { - *exp = lexp; - } + LASSERT(tsi->tsi_exp); - RETURN(rc); + rc = tgt_disconnect(tsi); + if (rc) + RETURN(err_serious(rc)); + RETURN(0); } -static int mgs_reconnect(const struct lu_env *env, - struct obd_export *exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data, - void *localdata) +static int mgs_exception(struct tgt_session_info *tsi) { - ENTRY; - - if (exp == NULL || obd == NULL || cluuid == NULL) - RETURN(-EINVAL); - - mgs_counter_incr(exp, LPROC_MGS_CONNECT); + ENTRY; - if (data != NULL) { - data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - exp->exp_connect_flags = data->ocd_connect_flags; - data->ocd_version = LUSTRE_VERSION_CODE; - } + tgt_counter_incr(tsi->tsi_exp, LPROC_MGS_EXCEPTION); - RETURN(mgs_export_stats_init(obd, exp, localdata)); + RETURN(0); } -static int mgs_disconnect(struct obd_export *exp) +static inline bool str_starts_with(const char *str, const char *prefix) { - int rc; - ENTRY; + return strncmp(str, prefix, strlen(prefix)) == 0; +} - LASSERT(exp); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) +static int mgs_set_info(struct tgt_session_info *tsi) +{ + struct mgs_thread_info *mgi; + struct mgs_send_param *msp, *rep_msp; + struct lustre_cfg *lcfg; + size_t param_len; + char *s; + int rc; - mgs_fsc_cleanup(exp); + ENTRY; - class_export_get(exp); - mgs_counter_incr(exp, LPROC_MGS_DISCONNECT); + mgi = mgs_env_info(tsi->tsi_env); + if (IS_ERR(mgi)) + RETURN(err_serious(PTR_ERR(mgi))); - rc = server_disconnect_export(exp); - class_export_put(exp); - RETURN(rc); -} + msp = req_capsule_client_get(tsi->tsi_pill, &RMF_MGS_SEND_PARAM); + if (msp == NULL) + RETURN(err_serious(-EFAULT)); -static int mgs_handle(struct ptlrpc_request *req); + param_len = strnlen(msp->mgs_param, sizeof(msp->mgs_param)); + if (param_len == 0 || param_len == sizeof(msp->mgs_param)) + RETURN(-EINVAL); -static int mgs_completion_ast_config(struct ldlm_lock *lock, __u64 flags, - void *cbdata) -{ - ENTRY; + /* We only allow '*.lov.stripe{size,count,offset}=*' from an RPC. */ + s = strchr(msp->mgs_param, '.'); + if (s == NULL) + RETURN(-EINVAL); - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV))) { - struct fs_db *fsdb = (struct fs_db *)lock->l_ast_data; - struct lustre_handle lockh; + if (!str_starts_with(s + 1, "lov.stripesize=") && + !str_starts_with(s + 1, "lov.stripecount=") && + !str_starts_with(s + 1, "lov.stripeoffset=")) + RETURN(-EINVAL); - /* clear the bit before lock put */ - clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags); + /* Construct lustre_cfg structure to pass to function mgs_set_param */ + lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); + lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, msp->mgs_param); + OBD_ALLOC(lcfg, lustre_cfg_len(mgi->mgi_bufs.lcfg_bufcount, + mgi->mgi_bufs.lcfg_buflen)); + if (!lcfg) + RETURN(-ENOMEM); + lustre_cfg_init(lcfg, LCFG_PARAM, &mgi->mgi_bufs); - ldlm_lock2handle(lock, &lockh); - ldlm_lock_decref_and_cancel(&lockh, LCK_EX); - } + rc = mgs_set_param(tsi->tsi_env, exp2mgs_dev(tsi->tsi_exp), lcfg); + if (rc) { + LCONSOLE_WARN("%s: Unable to set parameter %s: %d\n", + tgt_name(tsi->tsi_tgt), msp->mgs_param, rc); + GOTO(out_cfg, rc); + } - RETURN(ldlm_completion_ast(lock, flags, cbdata)); + /* send back the whole msp in the reply */ + rep_msp = req_capsule_server_get(tsi->tsi_pill, &RMF_MGS_SEND_PARAM); + *rep_msp = *msp; + EXIT; +out_cfg: + OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens)); + return rc; } +#endif -static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags, - void *cbdata) +enum ast_type { + AST_CONFIG = 1, + AST_PARAMS = 2, + AST_IR = 3, + AST_BARRIER = 4, +}; + +static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags, + void *cbdata, enum ast_type type) { - ENTRY; + ENTRY; - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV))) { - struct fs_db *fsdb; + if (!(flags & LDLM_FL_BLOCKED_MASK)) { + struct fs_db *fsdb; + + /* l_ast_data is used as a marker to avoid cancel ldlm lock + * twice. See LU-2317. */ + lock_res_and_lock(lock); + fsdb = (struct fs_db *)lock->l_ast_data; + lock->l_ast_data = NULL; + unlock_res_and_lock(lock); + + if (fsdb != NULL) { + struct lustre_handle lockh; + + switch(type) { + case AST_CONFIG: + /* clear the bit before lock put */ + clear_bit(FSDB_REVOKING_LOCK, + &fsdb->fsdb_flags); + break; + case AST_PARAMS: + clear_bit(FSDB_REVOKING_PARAMS, + &fsdb->fsdb_flags); + break; + case AST_IR: + mgs_ir_notify_complete(fsdb); + break; + case AST_BARRIER: + break; + default: + LBUG(); + } + + ldlm_lock2handle(lock, &lockh); + ldlm_lock_decref_and_cancel(&lockh, LCK_EX); + } + } - /* l_ast_data is used as a marker to avoid cancel ldlm lock - * twice. See LU-1259. */ - lock_res_and_lock(lock); - fsdb = (struct fs_db *)lock->l_ast_data; - lock->l_ast_data = NULL; - unlock_res_and_lock(lock); + RETURN(ldlm_completion_ast(lock, flags, cbdata)); +} - if (fsdb != NULL) { - struct lustre_handle lockh; +static int mgs_completion_ast_config(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_CONFIG); +} - mgs_ir_notify_complete(fsdb); +static int mgs_completion_ast_params(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_PARAMS); +} - ldlm_lock2handle(lock, &lockh); - ldlm_lock_decref_and_cancel(&lockh, LCK_EX); - } - } +static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR); +} - RETURN(ldlm_completion_ast(lock, flags, cbdata)); +static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags, + void *cbdata) +{ + return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER); } void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type) { - ldlm_completion_callback cp = NULL; - struct lustre_handle lockh = { 0 }; - struct ldlm_res_id res_id; + ldlm_completion_callback cp = NULL; + struct lustre_handle lockh = { + .cookie = 0, + }; + struct ldlm_res_id res_id; __u64 flags = LDLM_FL_ATOMIC_CB; - int rc; - ENTRY; - - LASSERT(fsdb->fsdb_name[0] != '\0'); - rc = mgc_fsname2resid(fsdb->fsdb_name, &res_id, type); - LASSERT(rc == 0); + int rc; + ENTRY; - switch (type) { - case CONFIG_T_CONFIG: - cp = mgs_completion_ast_config; + LASSERT(fsdb->fsdb_name[0] != '\0'); + rc = mgc_fsname2resid(fsdb->fsdb_name, &res_id, type); + LASSERT(rc == 0); + switch (type) { + case CONFIG_T_CONFIG: + case CONFIG_T_NODEMAP: + cp = mgs_completion_ast_config; if (test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags)) - rc = -EALREADY; - break; - case CONFIG_T_RECOVER: - cp = mgs_completion_ast_ir; - default: - break; - } + rc = -EALREADY; + break; + case CONFIG_T_PARAMS: + cp = mgs_completion_ast_params; + if (test_and_set_bit(FSDB_REVOKING_PARAMS, &fsdb->fsdb_flags)) + rc = -EALREADY; + break; + case CONFIG_T_RECOVER: + cp = mgs_completion_ast_ir; + break; + case CONFIG_T_BARRIER: + cp = mgs_completion_ast_barrier; + break; + default: + break; + } - if (!rc) { - LASSERT(cp != NULL); - rc = ldlm_cli_enqueue_local(mgs->mgs_obd->obd_namespace, + if (!rc) { + LASSERT(cp != NULL); + rc = ldlm_cli_enqueue_local(NULL, mgs->mgs_obd->obd_namespace, &res_id, LDLM_PLAIN, NULL, LCK_EX, &flags, ldlm_blocking_ast, cp, NULL, fsdb, 0, LVB_T_NONE, NULL, &lockh); - if (rc != ELDLM_OK) { - CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n", - le64_to_cpu(res_id.name[0]), - le64_to_cpu(res_id.name[1]), rc); + if (rc != ELDLM_OK) { + CERROR("%s: can't take cfg lock for %#llx/%#llx : rc = %d\n", + mgs->mgs_obd->obd_name, + le64_to_cpu(res_id.name[0]), + le64_to_cpu(res_id.name[1]), rc); - if (type == CONFIG_T_CONFIG) + if (type == CONFIG_T_CONFIG) clear_bit(FSDB_REVOKING_LOCK, - &fsdb->fsdb_flags); - } - /* lock has been cancelled in completion_ast. */ - } + &fsdb->fsdb_flags); + + if (type == CONFIG_T_PARAMS) + clear_bit(FSDB_REVOKING_PARAMS, + &fsdb->fsdb_flags); + } + /* lock has been cancelled in completion_ast. */ + } - RETURN_EXIT; + RETURN_EXIT; } /* rc=0 means ok @@ -251,10 +319,7 @@ static int mgs_check_target(const struct lu_env *env, rc = 1; } else { /* Index is correctly marked as used */ - - /* If the logs don't contain the mti_nids then add - them as failover nids */ - rc = mgs_check_failnid(env, mgs, mti); + rc = 0; } RETURN(rc); @@ -263,491 +328,358 @@ static int mgs_check_target(const struct lu_env *env, /* Ensure this is not a failover node that is connecting first*/ static int mgs_check_failover_reg(struct mgs_target_info *mti) { - lnet_nid_t nid; - char *ptr; - int i; + lnet_nid_t nid; + char *ptr; + int i; - ptr = mti->mti_params; - while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) { + ptr = mti->mti_params; + while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) { while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { - for (i = 0; i < mti->mti_nid_count; i++) { - if (nid == mti->mti_nids[i]) { - LCONSOLE_WARN("Denying initial registra" - "tion attempt from nid %s" - ", specified as failover" - "\n",libcfs_nid2str(nid)); - return -EADDRNOTAVAIL; - } - } - } - } - return 0; + for (i = 0; i < mti->mti_nid_count; i++) { + if (nid == mti->mti_nids[i]) { + LCONSOLE_WARN("Denying initial registration attempt from nid %s, specified as failover\n", + libcfs_nid2str(nid)); + return -EADDRNOTAVAIL; + } + } + } + } + return 0; } /* Called whenever a target starts up. Flags indicate first connect, etc. */ -static int mgs_handle_target_reg(struct ptlrpc_request *req) +static int mgs_target_reg(struct tgt_session_info *tsi) { - struct obd_device *obd = req->rq_export->exp_obd; - struct mgs_device *mgs = exp2mgs_dev(req->rq_export); - struct lu_env *env = req->rq_svc_thread->t_env; - struct mgs_target_info *mti, *rep_mti; - struct fs_db *fsdb; - int opc; - int rc = 0; - ENTRY; + struct obd_device *obd = tsi->tsi_exp->exp_obd; + struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp); + struct mgs_target_info *mti, *rep_mti; + struct fs_db *b_fsdb = NULL; /* barrier fsdb */ + struct fs_db *c_fsdb = NULL; /* config fsdb */ + char barrier_name[20]; + int opc; + int rc = 0; - mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_REG); + ENTRY; - mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); + rc = lu_env_refill((struct lu_env *)tsi->tsi_env); + if (rc) + return err_serious(rc); - opc = mti->mti_flags & LDD_F_OPC_MASK; - if (opc == LDD_F_OPC_READY) { - CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", - mti->mti_fsname, mti->mti_stripe_index); - rc = mgs_ir_update(env, mgs, mti); - if (rc) { - LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); - CERROR("Update IR return with %d(ignore and IR " - "disabled)\n", rc); - } - GOTO(out_nolock, rc); - } + tgt_counter_incr(tsi->tsi_exp, LPROC_MGS_TARGET_REG); - /* Do not support unregistering right now. */ - if (opc != LDD_F_OPC_REG) - GOTO(out_nolock, rc = -EINVAL); + mti = req_capsule_client_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO); + if (mti == NULL) { + DEBUG_REQ(D_HA, tgt_ses_req(tsi), "no mgs_send_param"); + RETURN(err_serious(-EFAULT)); + } - CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", - mti->mti_fsname, mti->mti_stripe_index); + down_read(&mgs->mgs_barrier_rwsem); - if (mti->mti_flags & LDD_F_NEED_INDEX) - mti->mti_flags |= LDD_F_WRITECONF; + if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data, + IMP_RECOV)) + opc = mti->mti_flags & LDD_F_OPC_MASK; + else + opc = LDD_F_OPC_REG; + + if (opc == LDD_F_OPC_READY) { + CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n", + mti->mti_fsname, mti->mti_stripe_index); + rc = mgs_ir_update(tsi->tsi_env, mgs, mti); + if (rc) { + LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE)); + CERROR("%s: Update IR return failure: rc = %d\n", + mti->mti_fsname, rc); + } - if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 | - LDD_F_UPDATE))) { - /* We're just here as a startup ping. */ - CDEBUG(D_MGS, "Server %s is running on %s\n", - mti->mti_svname, obd_export_nid2str(req->rq_export)); - rc = mgs_check_target(env, mgs, mti); - /* above will set appropriate mti flags */ - if (rc <= 0) - /* Nothing wrong, or fatal error */ - GOTO(out_nolock, rc); + GOTO(out_norevoke, rc); + } + + /* Do not support unregistering right now. */ + if (opc != LDD_F_OPC_REG) + GOTO(out_norevoke, rc = -EINVAL); + + snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s", + mti->mti_fsname, BARRIER_FILENAME); + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb); + if (rc) { + CERROR("%s: Can't get db for %s: rc = %d\n", + mti->mti_fsname, barrier_name, rc); + + GOTO(out_norevoke, rc); + } + + CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n", + mti->mti_fsname, mti->mti_stripe_index); + + if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 || + b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 || + b_fsdb->fsdb_barrier_status == BS_FROZEN) { + LCONSOLE_WARN("%s: the system is in barrier, refuse " + "the connection from MDT %s temporary\n", + obd->obd_name, mti->mti_svname); + + GOTO(out_norevoke, rc = -EBUSY); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) && + !b_fsdb->fsdb_barrier_disabled) { + LCONSOLE_WARN("%s: the MDT %s does not support write " + "barrier, so disable barrier on the " + "whole system.\n", + obd->obd_name, mti->mti_svname); + + b_fsdb->fsdb_barrier_disabled = 1; + } + } + + if (mti->mti_flags & LDD_F_NEED_INDEX) + mti->mti_flags |= LDD_F_WRITECONF; + + if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPDATE))) { + /* We're just here as a startup ping. */ + CDEBUG(D_MGS, "Server %s is running on %s\n", + mti->mti_svname, obd_export_nid2str(tsi->tsi_exp)); + rc = mgs_check_target(tsi->tsi_env, mgs, mti); + /* above will set appropriate mti flags */ + if (rc <= 0) + /* Nothing wrong, or fatal error */ + GOTO(out_norevoke, rc); } else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) { rc = mgs_check_failover_reg(mti); if (rc) - GOTO(out_nolock, rc); - } + GOTO(out_norevoke, rc); + } - OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); - - if (mti->mti_flags & LDD_F_WRITECONF) { - if (mti->mti_flags & LDD_F_SV_TYPE_MDT && - mti->mti_stripe_index == 0) { - rc = mgs_erase_logs(env, mgs, mti->mti_fsname); - LCONSOLE_WARN("%s: Logs for fs %s were removed by user " - "request. All servers must be restarted " - "in order to regenerate the logs." - "\n", obd->obd_name, mti->mti_fsname); - } else if (mti->mti_flags & - (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { - rc = mgs_erase_log(env, mgs, mti->mti_svname); - LCONSOLE_WARN("%s: Regenerating %s log by user " - "request.\n", - obd->obd_name, mti->mti_svname); - } - mti->mti_flags |= LDD_F_UPDATE; - /* Erased logs means start from scratch. */ - mti->mti_flags &= ~LDD_F_UPGRADE14; - if (rc) - GOTO(out_nolock, rc); - } + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); + + if (mti->mti_flags & LDD_F_WRITECONF) { + if (mti->mti_flags & LDD_F_SV_TYPE_MDT && + mti->mti_stripe_index == 0) { + mgs_put_fsdb(mgs, b_fsdb); + b_fsdb = NULL; + rc = mgs_erase_logs(tsi->tsi_env, mgs, + mti->mti_fsname); + LCONSOLE_WARN("%s: Logs for fs %s were removed by user " + "request. All servers must be restarted " + "in order to regenerate the logs: rc = %d" + "\n", obd->obd_name, mti->mti_fsname, rc); + if (rc && rc != -ENOENT) + GOTO(out_norevoke, rc); + + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + barrier_name, &b_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", + barrier_name, rc); + + GOTO(out_norevoke, rc); + } + + if (!(exp_connect_flags(tsi->tsi_exp) & + OBD_CONNECT_BARRIER)) { + LCONSOLE_WARN("%s: the MDT %s does not support " + "write barrier, disable barrier " + "on the whole system.\n", + obd->obd_name, mti->mti_svname); + + b_fsdb->fsdb_barrier_disabled = 1; + } + } else if (mti->mti_flags & + (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { + rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname); + LCONSOLE_WARN("%s: Regenerating %s log by user " + "request: rc = %d\n", + obd->obd_name, mti->mti_svname, rc); + if (rc) + GOTO(out_norevoke, rc); + } - rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb); - if (rc) { - CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - GOTO(out_nolock, rc); - } + mti->mti_flags |= LDD_F_UPDATE; + } - /* - * Log writing contention is handled by the fsdb_mutex. - * - * It should be alright if someone was reading while we were - * updating the logs - if we revoke at the end they will just update - * from where they left off. - */ + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); - if (mti->mti_flags & LDD_F_UPGRADE14) { - CERROR("Can't upgrade from 1.4 (%d)\n", rc); - GOTO(out, rc); + GOTO(out_norevoke, rc); } + /* + * Log writing contention is handled by the fsdb_mutex. + * + * It should be alright if someone was reading while we were + * updating the logs - if we revoke at the end they will just update + * from where they left off. + */ if (mti->mti_flags & LDD_F_UPDATE) { CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); /* create or update the target log and update the client/mdt logs */ - rc = mgs_write_log_target(env, mgs, mti, fsdb); + rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb); if (rc) { CERROR("Failed to write %s log (%d)\n", mti->mti_svname, rc); GOTO(out, rc); } - mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | - LDD_F_NEED_INDEX | LDD_F_WRITECONF | - LDD_F_UPGRADE14); - mti->mti_flags |= LDD_F_REWRITE_LDD; - } - -out: - mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG); - -out_nolock: - CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, - mti->mti_stripe_index, rc); - req->rq_status = rc; - if (rc) - /* we need an error flag to tell the target what's going on, - * instead of just doing it by error code only. */ - mti->mti_flags |= LDD_F_ERROR; - - rc = req_capsule_server_pack(&req->rq_pill); - if (rc) - RETURN(rc); - - /* send back the whole mti in the reply */ - rep_mti = req_capsule_server_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); - *rep_mti = *mti; - - /* Flush logs to disk */ - dt_sync(req->rq_svc_thread->t_env, mgs->mgs_bottom); - RETURN(rc); -} - -static int mgs_set_info_rpc(struct ptlrpc_request *req) -{ - struct mgs_device *mgs = exp2mgs_dev(req->rq_export); - struct lu_env *env = req->rq_svc_thread->t_env; - struct mgs_send_param *msp, *rep_msp; - struct mgs_thread_info *mgi = mgs_env_info(env); - int rc; - struct lustre_cfg *lcfg; - ENTRY; - - msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); - LASSERT(msp); - - /* Construct lustre_cfg structure to pass to function mgs_setparam */ - lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL); - lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, msp->mgs_param); - lcfg = lustre_cfg_new(LCFG_PARAM, &mgi->mgi_bufs); - if (IS_ERR(lcfg)) - GOTO(out, rc = PTR_ERR(lcfg)); - rc = mgs_setparam(env, mgs, lcfg, mgi->mgi_fsname); - if (rc) { - CERROR("Error %d in setting the parameter %s for fs %s\n", - rc, msp->mgs_param, mgi->mgi_fsname); - GOTO(out_cfg, rc); + mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | + LDD_F_NEED_INDEX | LDD_F_WRITECONF); + mti->mti_flags |= LDD_F_REWRITE_LDD; } - rc = req_capsule_server_pack(&req->rq_pill); - if (rc == 0) { - rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); - rep_msp = msp; - } -out_cfg: - lustre_cfg_free(lcfg); out: - RETURN(rc); -} - -static int mgs_config_read(struct ptlrpc_request *req) -{ - struct mgs_config_body *body; - int rc; - ENTRY; - - body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY); - if (body == NULL) - RETURN(-EINVAL); - - switch (body->mcb_type) { - case CONFIG_T_RECOVER: - rc = mgs_get_ir_logs(req); - break; - - case CONFIG_T_CONFIG: - rc = -ENOTSUPP; - break; - - default: - rc = -EINVAL; - break; - } - - RETURN(rc); -} - -/* - * similar as in ost_connect_check_sptlrpc() - */ -static int mgs_connect_check_sptlrpc(struct ptlrpc_request *req) -{ - struct obd_export *exp = req->rq_export; - struct mgs_device *mgs = exp2mgs_dev(req->rq_export); - struct lu_env *env = req->rq_svc_thread->t_env; - struct fs_db *fsdb; - struct sptlrpc_flavor flvr; - int rc = 0; - - if (exp->exp_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) { - rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb); - if (rc) - return rc; - - mutex_lock(&fsdb->fsdb_mutex); - if (sptlrpc_rule_set_choose(&fsdb->fsdb_srpc_gen, - LUSTRE_SP_MGC, LUSTRE_SP_MGS, - req->rq_peer.nid, - &flvr) == 0) { - /* by defualt allow any flavors */ - flvr.sf_rpc = SPTLRPC_FLVR_ANY; + mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG); + +out_norevoke: + if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) { + if (!c_fsdb) { + rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, + mti->mti_fsname, &c_fsdb); + if (rc) + CERROR("Fail to get db for %s: %d\n", + mti->mti_fsname, rc); } - mutex_unlock(&fsdb->fsdb_mutex); - spin_lock(&exp->exp_lock); - - exp->exp_sp_peer = req->rq_sp_from; - exp->exp_flvr = flvr; + if (c_fsdb) { + memcpy(b_fsdb->fsdb_mdt_index_map, + c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE); + b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count; + } + } - if (exp->exp_flvr.sf_rpc != SPTLRPC_FLVR_ANY && - exp->exp_flvr.sf_rpc != req->rq_flvr.sf_rpc) { - CERROR("invalid rpc flavor %x, expect %x, from %s\n", - req->rq_flvr.sf_rpc, exp->exp_flvr.sf_rpc, - libcfs_nid2str(req->rq_peer.nid)); - rc = -EACCES; - } + up_read(&mgs->mgs_barrier_rwsem); - spin_unlock(&exp->exp_lock); - } else { - if (exp->exp_sp_peer != req->rq_sp_from) { - CERROR("RPC source %s doesn't match %s\n", - sptlrpc_part2name(req->rq_sp_from), - sptlrpc_part2name(exp->exp_sp_peer)); - rc = -EACCES; - } else { - rc = sptlrpc_target_export_check(exp, req); - } - } - - return rc; + CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, + mti->mti_stripe_index, rc); + /* An error flag is set in the mti reply rather than an error code */ + if (rc) + mti->mti_flags |= LDD_F_ERROR; + + /* send back the whole mti in the reply */ + rep_mti = req_capsule_server_get(tsi->tsi_pill, &RMF_MGS_TARGET_INFO); + *rep_mti = *mti; + + /* Flush logs to disk */ + dt_sync(tsi->tsi_env, mgs->mgs_bottom); + if (b_fsdb) + mgs_put_fsdb(mgs, b_fsdb); + if (c_fsdb) + mgs_put_fsdb(mgs, c_fsdb); + RETURN(rc); } /* Called whenever a target cleans up. */ -/* XXX - Currently unused */ -static int mgs_handle_target_del(struct ptlrpc_request *req) +static int mgs_target_del(struct tgt_session_info *tsi) { - ENTRY; - mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_DEL); - RETURN(0); -} + ENTRY; -/* XXX - Currently unused */ -static int mgs_handle_exception(struct ptlrpc_request *req) -{ - ENTRY; - mgs_counter_incr(req->rq_export, LPROC_MGS_EXCEPTION); - RETURN(0); + tgt_counter_incr(tsi->tsi_exp, LPROC_MGS_TARGET_DEL); + + RETURN(0); } -/* - * For old clients there is no direct way of knowing which filesystems - * a client is operating at the MGS side. But we need to pick up those - * clients so that the MGS can mark the corresponding filesystem as - * non-IR capable because old clients are not ready to be notified. - * - * This is why we have this _hack_ function. We detect the filesystem's - * name by hacking llog operation which is currently used by the clients - * to fetch configuration logs. At present this is fine because this is - * the ONLY llog operation between mgc and the MGS. - * - * If extra llog operation is going to be added, this function needs fixing. - * - * If releases prior than 2.0 are not supported, we can remove this function. - */ -static int mgs_handle_fslog_hack(struct ptlrpc_request *req) +static int mgs_config_read(struct tgt_session_info *tsi) { - char *logname; - char fsname[16]; - char *ptr; - int rc; + struct ptlrpc_request *req = tgt_ses_req(tsi); + struct mgs_config_body *body; + int rc; - /* XXX: We suppose that llog at mgs is only used for - * fetching file system log */ - logname = req_capsule_client_get(&req->rq_pill, &RMF_NAME); - if (logname == NULL) { - CERROR("No logname, is llog on MGS used for something else?\n"); - return -EINVAL; - } + ENTRY; - ptr = strchr(logname, '-'); - rc = (int)(ptr - logname); - if (ptr == NULL || rc >= sizeof(fsname)) { - CERROR("Invalid logname received: %s\n", logname); - return -EINVAL; - } + body = req_capsule_client_get(tsi->tsi_pill, &RMF_MGS_CONFIG_BODY); + if (body == NULL) { + DEBUG_REQ(D_HA, req, "no mgs_config_body"); + RETURN(err_serious(-EFAULT)); + } - strncpy(fsname, logname, rc); - fsname[rc] = 0; - rc = mgs_fsc_attach(req->rq_svc_thread->t_env, req->rq_export, fsname); - if (rc < 0 && rc != -EEXIST) - CERROR("add fs client %s returns %d\n", fsname, rc); + switch (body->mcb_type) { + case CONFIG_T_RECOVER: + rc = mgs_get_ir_logs(req); + break; + case CONFIG_T_NODEMAP: + rc = nodemap_get_config_req(req->rq_export->exp_obd, req); + break; + case CONFIG_T_CONFIG: + rc = -EOPNOTSUPP; + break; + default: + rc = -EINVAL; + break; + } - return rc; + RETURN(rc); } -/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ -int mgs_handle(struct ptlrpc_request *req) +static int mgs_llog_open(struct tgt_session_info *tsi) { - int fail = OBD_FAIL_MGS_ALL_REPLY_NET; - int opc, rc = 0; - ENTRY; - - req_capsule_init(&req->rq_pill, req, RCL_SERVER); - CFS_FAIL_TIMEOUT_MS(OBD_FAIL_MGS_PAUSE_REQ, cfs_fail_val); - if (CFS_FAIL_CHECK(OBD_FAIL_MGS_ALL_REQUEST_NET)) - RETURN(0); + struct mgs_thread_info *mgi; + struct ptlrpc_request *req = tgt_ses_req(tsi); + char *logname; + int rc; - LASSERT(current->journal_info == NULL); - opc = lustre_msg_get_opc(req->rq_reqmsg); - - if (opc == SEC_CTX_INIT || - opc == SEC_CTX_INIT_CONT || - opc == SEC_CTX_FINI) - GOTO(out, rc = 0); - - if (opc != MGS_CONNECT) { - if (!class_connected_export(req->rq_export)) { - DEBUG_REQ(D_MGS, req, "operation on unconnected MGS\n"); - req->rq_status = -ENOTCONN; - GOTO(out, rc = -ENOTCONN); - } - } - - switch (opc) { - case MGS_CONNECT: - DEBUG_REQ(D_MGS, req, "connect"); - /* MGS and MDS have same request format for connect */ - req_capsule_set(&req->rq_pill, &RQF_MDS_CONNECT); - rc = target_handle_connect(req); - if (rc == 0) - rc = mgs_connect_check_sptlrpc(req); - - if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1)) - /* Make clients trying to reconnect after a MGS restart - happy; also requires obd_replayable */ - lustre_msg_add_op_flags(req->rq_repmsg, - MSG_CONNECT_RECONNECT); - break; - case MGS_DISCONNECT: - DEBUG_REQ(D_MGS, req, "disconnect"); - /* MGS and MDS have same request format for disconnect */ - req_capsule_set(&req->rq_pill, &RQF_MDS_DISCONNECT); - rc = target_handle_disconnect(req); - req->rq_status = rc; /* superfluous? */ - break; - case MGS_EXCEPTION: - DEBUG_REQ(D_MGS, req, "exception"); - rc = mgs_handle_exception(req); - break; - case MGS_TARGET_REG: - DEBUG_REQ(D_MGS, req, "target add"); - req_capsule_set(&req->rq_pill, &RQF_MGS_TARGET_REG); - rc = mgs_handle_target_reg(req); - break; - case MGS_TARGET_DEL: - DEBUG_REQ(D_MGS, req, "target del"); - rc = mgs_handle_target_del(req); - break; - case MGS_SET_INFO: - DEBUG_REQ(D_MGS, req, "set_info"); - req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO); - rc = mgs_set_info_rpc(req); - break; - case MGS_CONFIG_READ: - DEBUG_REQ(D_MGS, req, "read config"); - req_capsule_set(&req->rq_pill, &RQF_MGS_CONFIG_READ); - rc = mgs_config_read(req); - break; - case LDLM_ENQUEUE: - DEBUG_REQ(D_MGS, req, "enqueue"); - req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE); - rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, - ldlm_server_blocking_ast, NULL); - break; - case LDLM_BL_CALLBACK: - case LDLM_CP_CALLBACK: - DEBUG_REQ(D_MGS, req, "callback"); - CERROR("callbacks should not happen on MGS\n"); - LBUG(); - break; - - case OBD_PING: - DEBUG_REQ(D_INFO, req, "ping"); - req_capsule_set(&req->rq_pill, &RQF_OBD_PING); - rc = target_handle_ping(req); - break; - case OBD_LOG_CANCEL: - DEBUG_REQ(D_MGS, req, "log cancel"); - rc = -ENOTSUPP; /* la la la */ - break; - - case LLOG_ORIGIN_HANDLE_CREATE: - DEBUG_REQ(D_MGS, req, "llog_open"); - req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); - rc = llog_origin_handle_open(req); - if (rc == 0) - (void)mgs_handle_fslog_hack(req); - break; - case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: - DEBUG_REQ(D_MGS, req, "llog next block"); - req_capsule_set(&req->rq_pill, - &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - rc = llog_origin_handle_next_block(req); - break; - case LLOG_ORIGIN_HANDLE_PREV_BLOCK: - DEBUG_REQ(D_MGS, req, "llog prev block"); - req_capsule_set(&req->rq_pill, - &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK); - rc = llog_origin_handle_prev_block(req); - break; - case LLOG_ORIGIN_HANDLE_READ_HEADER: - DEBUG_REQ(D_MGS, req, "llog read header"); - req_capsule_set(&req->rq_pill, - &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); - rc = llog_origin_handle_read_header(req); - break; - case LLOG_ORIGIN_HANDLE_CLOSE: - DEBUG_REQ(D_MGS, req, "llog close"); - rc = llog_origin_handle_close(req); - break; - default: - rc = -EOPNOTSUPP; - } + ENTRY; - LASSERT(current->journal_info == NULL); - if (rc) { - DEBUG_REQ(D_MGS, req, "MGS fail to handle opc = %d: rc = %d\n", - opc, rc); - req->rq_status = rc; - rc = ptlrpc_error(req); + rc = tgt_llog_open(tsi); + if (rc) RETURN(rc); + + /* + * For old clients there is no direct way of knowing which file system + * a client is operating at the MGS side. But we need to pick up those + * clients so that the MGS can mark the corresponding file system as + * non-IR capable because old clients are not ready to be notified. + * + * Therefore we attempt to detect the file systems name by hacking the + * llog operation which is currently used by the clients to fetch + * configuration logs. At present this is fine because this is the + * ONLY llog operation between mgc and the MGS. + * + * If extra llog operation are going to be added, this function needs + * further work. + * + * When releases prior than 2.0 are not supported, the following code + * can be removed. + */ + mgi = mgs_env_info(tsi->tsi_env); + if (IS_ERR(mgi)) + RETURN(PTR_ERR(mgi)); + + logname = req_capsule_client_get(tsi->tsi_pill, &RMF_NAME); + if (logname) { + char *ptr = strrchr(logname, '-'); + int len = (ptr != NULL) ? (int)(ptr - logname) : 0; + + if (ptr == NULL || len >= sizeof(mgi->mgi_fsname)) { + if (strcmp(logname, PARAMS_FILENAME) != 0) + LCONSOLE_WARN("%s: non-config logname " + "received: %s\n", + tgt_name(tsi->tsi_tgt), + logname); + /* not error, this can be llog test name */ + } else { + strncpy(mgi->mgi_fsname, logname, len); + mgi->mgi_fsname[len] = 0; + + rc = mgs_fsc_attach(tsi->tsi_env, tsi->tsi_exp, + mgi->mgi_fsname); + if (rc && rc != -EEXIST) { + LCONSOLE_WARN("%s: Unable to add client %s " + "to file system %s: %d\n", + tgt_name(tsi->tsi_tgt), + libcfs_nid2str(req->rq_peer.nid), + mgi->mgi_fsname, rc); + } else { + rc = 0; + } + } + } else { + CERROR("%s: no logname in request\n", tgt_name(tsi->tsi_tgt)); + RETURN(-EINVAL); } -out: - target_send_reply(req, rc, fail); - RETURN(0); + RETURN(rc); } static inline int mgs_init_export(struct obd_export *exp) @@ -756,7 +688,7 @@ static inline int mgs_init_export(struct obd_export *exp) /* init mgs_export_data for fsc */ spin_lock_init(&data->med_lock); - CFS_INIT_LIST_HEAD(&data->med_clients); + INIT_LIST_HEAD(&data->med_clients); spin_lock(&exp->exp_lock); exp->exp_connecting = 1; @@ -785,22 +717,195 @@ static inline int mgs_destroy_export(struct obd_export *exp) RETURN(0); } -static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname) +static int mgs_extract_fs_pool(char *arg, char *fsname, char *poolname) { - char *ptr; + size_t len; + char *ptr; - ENTRY; - for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) { - *fsname = *ptr; - fsname++; - } - if (*ptr == '\0') - return -EINVAL; - *fsname = '\0'; - ptr++; - strcpy(poolname, ptr); + ENTRY; + /* Validate name */ + for (ptr = arg; *ptr != '\0'; ptr++) { + if (!isalnum(*ptr) && *ptr != '_' && *ptr != '-' && *ptr != '.') + return -EINVAL; + } - RETURN(0); + /* Test for fsname.poolname format. + * strlen() test if poolname is empty + */ + ptr = strchr(arg, '.'); + if (!ptr || !strlen(ptr)) + return -EINVAL; + ptr++; + + /* Also make sure poolname is not to long. */ + if (strlen(ptr) > LOV_MAXPOOLNAME) + return -ENAMETOOLONG; + strncpy(poolname, ptr, LOV_MAXPOOLNAME); + + /* Test if fsname is empty */ + len = strlen(arg) - strlen(ptr) - 1; + if (!len) + return -EINVAL; + + /* or too long */ + if (len > LUSTRE_MAXFSNAME) + return -ENAMETOOLONG; + + strncpy(fsname, arg, len); + + RETURN(0); +} + +static int mgs_iocontrol_nodemap(const struct lu_env *env, + struct mgs_device *mgs, + struct obd_ioctl_data *data) +{ + struct lustre_cfg *lcfg = NULL; + struct fs_db *fsdb; + lnet_nid_t nid; + const char *nodemap_name = NULL; + const char *nidstr = NULL; + const char *client_idstr = NULL; + const char *idtype_str = NULL; + char *param = NULL; + char fs_idstr[16]; + char name_buf[LUSTRE_NODEMAP_NAME_LENGTH + 1]; + int rc = 0; + unsigned long client_id; + __u32 fs_id; + __u32 cmd; + int idtype; + + ENTRY; + + if (data->ioc_type != LUSTRE_CFG_TYPE) { + CERROR("%s: unknown cfg record type: %d\n", + mgs->mgs_obd->obd_name, data->ioc_type); + GOTO(out, rc = -EINVAL); + } + + if (data->ioc_plen1 > PAGE_SIZE) + GOTO(out, rc = -E2BIG); + + OBD_ALLOC(lcfg, data->ioc_plen1); + if (lcfg == NULL) + GOTO(out, rc = -ENOMEM); + + if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + GOTO(out_lcfg, rc = -EFAULT); + + cmd = lcfg->lcfg_command; + + switch (cmd) { + case LCFG_NODEMAP_ACTIVATE: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + param = lustre_cfg_string(lcfg, 1); + if (strcmp(param, "1") == 0) + nodemap_activate(1); + else + nodemap_activate(0); + break; + case LCFG_NODEMAP_ADD: + case LCFG_NODEMAP_DEL: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); + break; + case LCFG_NODEMAP_TEST_NID: + if (lcfg->lcfg_bufcount != 2) + GOTO(out_lcfg, rc = -EINVAL); + nidstr = lustre_cfg_string(lcfg, 1); + nid = libcfs_str2nid(nidstr); + nodemap_test_nid(nid, name_buf, sizeof(name_buf)); + rc = copy_to_user(data->ioc_pbuf1, name_buf, + min_t(size_t, data->ioc_plen1, + sizeof(name_buf))); + if (rc != 0) + GOTO(out_lcfg, rc = -EFAULT); + break; + case LCFG_NODEMAP_TEST_ID: + if (lcfg->lcfg_bufcount != 4) + GOTO(out_lcfg, rc = -EINVAL); + nidstr = lustre_cfg_string(lcfg, 1); + idtype_str = lustre_cfg_string(lcfg, 2); + client_idstr = lustre_cfg_string(lcfg, 3); + + nid = libcfs_str2nid(nidstr); + if (strcmp(idtype_str, "uid") == 0) + idtype = NODEMAP_UID; + else + idtype = NODEMAP_GID; + + rc = kstrtoul(client_idstr, 10, &client_id); + if (rc != 0) + GOTO(out_lcfg, rc = -EINVAL); + + rc = nodemap_test_id(nid, idtype, client_id, &fs_id); + if (rc < 0) + GOTO(out_lcfg, rc = -EINVAL); + + if (data->ioc_plen1 < sizeof(fs_idstr)) + GOTO(out_lcfg, rc = -EINVAL); + + snprintf(fs_idstr, sizeof(fs_idstr), "%u", fs_id); + if (copy_to_user(data->ioc_pbuf1, fs_idstr, + sizeof(fs_idstr)) != 0) + GOTO(out_lcfg, rc = -EINVAL); + break; + case LCFG_NODEMAP_ADD_RANGE: + case LCFG_NODEMAP_DEL_RANGE: + case LCFG_NODEMAP_ADD_UIDMAP: + case LCFG_NODEMAP_DEL_UIDMAP: + case LCFG_NODEMAP_ADD_GIDMAP: + case LCFG_NODEMAP_DEL_GIDMAP: + case LCFG_NODEMAP_SET_FILESET: + case LCFG_NODEMAP_SET_SEPOL: + if (lcfg->lcfg_bufcount != 3) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + param = lustre_cfg_string(lcfg, 2); + rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); + break; + case LCFG_NODEMAP_ADMIN: + case LCFG_NODEMAP_TRUSTED: + case LCFG_NODEMAP_DENY_UNKNOWN: + case LCFG_NODEMAP_SQUASH_UID: + case LCFG_NODEMAP_SQUASH_GID: + case LCFG_NODEMAP_MAP_MODE: + case LCFG_NODEMAP_AUDIT_MODE: + if (lcfg->lcfg_bufcount != 4) + GOTO(out_lcfg, rc = -EINVAL); + nodemap_name = lustre_cfg_string(lcfg, 1); + param = lustre_cfg_string(lcfg, 3); + rc = mgs_nodemap_cmd(env, mgs, cmd, nodemap_name, param); + break; + default: + rc = -ENOTTY; + } + + if (rc != 0) { + CERROR("%s: OBD_IOC_NODEMAP command %X for %s: rc = %d\n", + mgs->mgs_obd->obd_name, lcfg->lcfg_command, + nodemap_name, rc); + GOTO(out_lcfg, rc); + } + + /* revoke nodemap lock */ + rc = mgs_find_or_make_fsdb(env, mgs, LUSTRE_NODEMAP_NAME, &fsdb); + if (rc < 0) { + CWARN("%s: cannot make nodemap fsdb: rc = %d\n", + mgs->mgs_obd->obd_name, rc); + } else { + mgs_revoke_lock(mgs, fsdb, CONFIG_T_NODEMAP); + mgs_put_fsdb(mgs, fsdb); + } + +out_lcfg: + OBD_FREE(lcfg, data->ioc_plen1); +out: + RETURN(rc); } static int mgs_iocontrol_pool(const struct lu_env *env, @@ -823,14 +928,14 @@ static int mgs_iocontrol_pool(const struct lu_env *env, GOTO(out_pool, rc = -EINVAL); } - if (data->ioc_plen1 > CFS_PAGE_SIZE) + if (data->ioc_plen1 > PAGE_SIZE) GOTO(out_pool, rc = -E2BIG); OBD_ALLOC(lcfg, data->ioc_plen1); if (lcfg == NULL) GOTO(out_pool, rc = -ENOMEM); - if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) GOTO(out_lcfg, rc = -EFAULT); if (lcfg->lcfg_bufcount < 2) @@ -885,8 +990,8 @@ out_pool: } /* from mdt_iocontrol */ -int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg) +static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void __user *uarg) { struct mgs_device *mgs = exp2mgs_dev(exp); struct obd_ioctl_data *data = karg; @@ -902,9 +1007,8 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { - case OBD_IOC_PARAM: { - struct mgs_thread_info *mgi = mgs_env_info(&env); - struct lustre_cfg *lcfg; + case OBD_IOC_PARAM: { + struct lustre_cfg *lcfg; if (data->ioc_type != LUSTRE_CFG_TYPE) { CERROR("%s: unknown cfg record type: %d\n", @@ -915,13 +1019,13 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, OBD_ALLOC(lcfg, data->ioc_plen1); if (lcfg == NULL) GOTO(out, rc = -ENOMEM); - if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) GOTO(out_free, rc = -EFAULT); if (lcfg->lcfg_bufcount < 1) GOTO(out_free, rc = -EINVAL); - rc = mgs_setparam(&env, mgs, lcfg, mgi->mgi_fsname); + rc = mgs_set_param(&env, mgs, lcfg); if (rc) CERROR("%s: setparam err: rc = %d\n", exp->exp_obd->obd_name, rc); @@ -930,21 +1034,107 @@ out_free: break; } + case OBD_IOC_REPLACE_NIDS: { + if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { + rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); + break; + } + + if (!data->ioc_inllen2 || !data->ioc_inlbuf2) { + rc = -EINVAL; + CERROR("%s: no NIDs specified: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: NID list is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); + break; + } + + /* replace nids in llog */ + rc = mgs_replace_nids(&env, mgs, data->ioc_inlbuf1, + data->ioc_inlbuf2); + if (rc) + CERROR("%s: error replacing nids: rc = %d\n", + exp->exp_obd->obd_name, rc); + + break; + } + + case OBD_IOC_CLEAR_CONFIGS: { + if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { + rc = -EINVAL; + CERROR("%s: no device or fsname specified: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inllen1 > MTI_NAME_MAXLEN) { + rc = -EOVERFLOW; + CERROR("%s: device or fsname is too long: rc = %d\n", + exp->exp_obd->obd_name, rc); + break; + } + + if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { + rc = -EINVAL; + CERROR("%s: device or fsname is not NUL terminated: " + "rc = %d\n", exp->exp_obd->obd_name, rc); + break; + } + + /* remove records marked SKIP from config logs */ + rc = mgs_clear_configs(&env, mgs, data->ioc_inlbuf1); + if (rc) + CERROR("%s: error clearing config log: rc = %d\n", + exp->exp_obd->obd_name, rc); + + break; + } + case OBD_IOC_POOL: rc = mgs_iocontrol_pool(&env, mgs, data); break; - case OBD_IOC_DUMP_LOG: { - struct llog_ctxt *ctxt; + case OBD_IOC_BARRIER: + rc = mgs_iocontrol_barrier(&env, mgs, data); + break; - ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); - rc = class_config_dump_llog(&env, ctxt, data->ioc_inlbuf1, - NULL); - llog_ctxt_put(ctxt); + case OBD_IOC_NODEMAP: + rc = mgs_iocontrol_nodemap(&env, mgs, data); + break; + case OBD_IOC_LCFG_FORK: + rc = mgs_lcfg_fork(&env, mgs, data->ioc_inlbuf1, + data->ioc_inlbuf2); break; - } + case OBD_IOC_LCFG_ERASE: + rc = mgs_lcfg_erase(&env, mgs, data->ioc_inlbuf1); + break; + + case OBD_IOC_CATLOGLIST: + rc = mgs_list_logs(&env, mgs, data); + break; case OBD_IOC_LLOG_CANCEL: case OBD_IOC_LLOG_REMOVE: case OBD_IOC_LLOG_CHECK: @@ -1003,16 +1193,89 @@ out: RETURN(rc); } +static struct tgt_handler mgs_mgs_handlers[] = { +TGT_RPC_HANDLER(MGS_FIRST_OPC, + 0, MGS_CONNECT, mgs_connect, + &RQF_CONNECT, LUSTRE_OBD_VERSION), +TGT_RPC_HANDLER(MGS_FIRST_OPC, + 0, MGS_DISCONNECT, mgs_disconnect, + &RQF_MDS_DISCONNECT, LUSTRE_OBD_VERSION), +TGT_MGS_HDL_VAR(0, MGS_EXCEPTION, mgs_exception), +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) +TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_SET_INFO, mgs_set_info), +#endif +TGT_MGS_HDL(HAS_REPLY | IS_MUTABLE, MGS_TARGET_REG, mgs_target_reg), +TGT_MGS_HDL_VAR(0, MGS_TARGET_DEL, mgs_target_del), +TGT_MGS_HDL(HAS_REPLY, MGS_CONFIG_READ, mgs_config_read), +}; + +static struct tgt_handler mgs_obd_handlers[] = { +TGT_OBD_HDL(0, OBD_PING, tgt_obd_ping), +}; + +static struct tgt_handler mgs_dlm_handlers[] = { +[LDLM_ENQUEUE - LDLM_FIRST_OPC] = { + .th_name = "LDLM_ENQUEUE", + /* don't use th_fail_id for MGS to don't interfere with MDS tests. + * * There are no tests for MGS with OBD_FAIL_LDLM_ENQUEUE_NET so it + * * is safe. If such tests will be needed we have to distinguish + * * MDS and MGS fail ids, e.g use OBD_FAIL_MGS_ENQUEUE_NET for MGS + * * instead of common OBD_FAIL_LDLM_ENQUEUE_NET */ + .th_fail_id = 0, + .th_opc = LDLM_ENQUEUE, + .th_flags = HAS_KEY, + .th_act = tgt_enqueue, + .th_fmt = &RQF_LDLM_ENQUEUE, + .th_version = LUSTRE_DLM_VERSION, + }, +}; + +static struct tgt_handler mgs_llog_handlers[] = { +TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_CREATE, mgs_llog_open), +TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_NEXT_BLOCK, tgt_llog_next_block), +TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_READ_HEADER, tgt_llog_read_header), +TGT_LLOG_HDL (0, LLOG_ORIGIN_HANDLE_PREV_BLOCK, tgt_llog_prev_block), +}; + +static struct tgt_opc_slice mgs_common_slice[] = { + { + .tos_opc_start = MGS_FIRST_OPC, + .tos_opc_end = MGS_LAST_OPC, + .tos_hs = mgs_mgs_handlers + }, + { + .tos_opc_start = OBD_FIRST_OPC, + .tos_opc_end = OBD_LAST_OPC, + .tos_hs = mgs_obd_handlers + }, + { + .tos_opc_start = LDLM_FIRST_OPC, + .tos_opc_end = LDLM_LAST_OPC, + .tos_hs = mgs_dlm_handlers + }, + { + .tos_opc_start = LLOG_FIRST_OPC, + .tos_opc_end = LLOG_LAST_OPC, + .tos_hs = mgs_llog_handlers + }, + { + .tos_opc_start = SEC_FIRST_OPC, + .tos_opc_end = SEC_LAST_OPC, + .tos_hs = tgt_sec_ctx_handlers + }, + { + .tos_hs = NULL + } +}; static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, struct lu_device_type *ldt, struct lustre_cfg *lcfg) { - static struct ptlrpc_service_conf conf; - struct lprocfs_static_vars lvars = { 0 }; - struct obd_device *obd; - struct lustre_mount_info *lmi; - struct llog_ctxt *ctxt; - int rc; + struct ptlrpc_service_conf conf; + struct obd_device *obd; + struct lustre_mount_info *lmi; + struct llog_ctxt *ctxt; + int rc; ENTRY; @@ -1024,7 +1287,7 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, rc = mgs_connect_to_osd(mgs, lustre_cfg_string(lcfg, 3)); if (rc) - RETURN(rc); + GOTO(err_lmi, rc); obd = class_name2obd(lustre_cfg_string(lcfg, 0)); LASSERT(obd); @@ -1042,15 +1305,20 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, if (obd->obd_namespace == NULL) GOTO(err_ops, rc = -ENOMEM); - /* ldlm setup */ - ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - "mgs_ldlm_client", &obd->obd_ldlm_client); + /* No recovery for MGCs */ + obd->obd_replayable = 0; + + rc = tgt_init(env, &mgs->mgs_lut, obd, mgs->mgs_bottom, + mgs_common_slice, OBD_FAIL_MGS_ALL_REQUEST_NET, + OBD_FAIL_MGS_ALL_REPLY_NET); + if (rc) + GOTO(err_ns, rc); rc = mgs_fs_setup(env, mgs); if (rc) { CERROR("%s: MGS filesystem method init failed: rc = %d\n", obd->obd_name, rc); - GOTO(err_ns, rc); + GOTO(err_tgt, rc); } rc = llog_setup(env, obd, &obd->obd_olg, LLOG_CONFIG_ORIG_CTXT, @@ -1065,24 +1333,39 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, ctxt->loc_dir = mgs->mgs_configs_dir; llog_ctxt_put(ctxt); - /* No recovery for MGC's */ - obd->obd_replayable = 0; - /* Internal mgs setup */ mgs_init_fsdb_list(mgs); mutex_init(&mgs->mgs_mutex); - mgs->mgs_start_time = cfs_time_current_sec(); + mgs->mgs_start_time = ktime_get_real_seconds(); spin_lock_init(&mgs->mgs_lock); + mutex_init(&mgs->mgs_health_mutex); + init_rwsem(&mgs->mgs_barrier_rwsem); - /* Setup proc */ - lprocfs_mgs_init_vars(&lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { - lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3)); - rc = lprocfs_alloc_md_stats(obd, LPROC_MGS_LAST); - if (rc) - GOTO(err_llog, rc); + rc = mgs_lcfg_rename(env, mgs); + if (rc) + GOTO(err_llog, rc); + + rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3)); + if (rc != 0) { + CERROR("%s: cannot initialize proc entry: rc = %d\n", + obd->obd_name, rc); + GOTO(err_llog, rc); } + /* Setup params fsdb and log, so that other servers can make a local + * copy successfully when they are mounted. See LU-4783 */ + rc = mgs_params_fsdb_setup(env, mgs); + if (rc) + /* params fsdb and log can be setup later */ + CERROR("%s: %s fsdb and log setup failed: rc = %d\n", + obd->obd_name, PARAMS_FILENAME, rc); + + /* Setup _mgs fsdb, useful for srpc */ + mgs__mgs_fsdb_setup(env, mgs); + + ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, + "mgs_ldlm_client", &obd->obd_ldlm_client); + conf = (typeof(conf)) { .psc_name = LUSTRE_MGS_NAME, .psc_watchdog_factor = MGS_SERVICE_WATCHDOG_FACTOR, @@ -1101,15 +1384,18 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, .tc_ctx_tags = LCT_MG_THREAD, }, .psc_ops = { - .so_req_handler = mgs_handle, + .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, }, }; + /* Start the service threads */ - mgs->mgs_service = ptlrpc_register_service(&conf, obd->obd_proc_entry); + mgs->mgs_service = ptlrpc_register_service(&conf, &obd->obd_kset, + obd->obd_debugfs_entry); if (IS_ERR(mgs->mgs_service)) { rc = PTR_ERR(mgs->mgs_service); - CERROR("failed to start service: %d\n", rc); + CERROR("failed to start mgs service: %d\n", rc); + mgs->mgs_service = NULL; GOTO(err_lproc, rc); } @@ -1120,8 +1406,8 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, /* device stack is not yet fully setup to keep no objects behind */ lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0); RETURN(0); - err_lproc: + mgs_params_fsdb_cleanup(env, mgs); lproc_mgs_cleanup(mgs); err_llog: ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); @@ -1129,6 +1415,8 @@ err_llog: ctxt->loc_dir = NULL; llog_cleanup(env, ctxt); } +err_tgt: + tgt_fini(env, &mgs->mgs_lut); err_fs: /* No extra cleanup needed for llog_init_commit_thread() */ mgs_fs_cleanup(env, mgs); @@ -1138,11 +1426,14 @@ err_ns: err_ops: lu_site_purge(env, mgs2lu_dev(mgs)->ld_site, ~0); if (!cfs_hash_is_empty(mgs2lu_dev(mgs)->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, mgs2lu_dev(mgs)->ld_site, &msgdata, lu_cdebug_printer); } obd_disconnect(mgs->mgs_bottom_exp); +err_lmi: + if (lmi) + server_put_mount(lustre_cfg_string(lcfg, 0), true); RETURN(rc); } @@ -1186,7 +1477,7 @@ static int mgs_object_init(const struct lu_env *env, struct lu_object *o, else rc = -ENOMEM; - return 0; + return rc; } static void mgs_object_free(const struct lu_env *env, struct lu_object *o) @@ -1207,15 +1498,15 @@ static int mgs_object_print(const struct lu_env *env, void *cookie, return (*p)(env, cookie, LUSTRE_MGS_NAME"-object@%p", o); } -struct lu_object_operations mgs_lu_obj_ops = { +static struct lu_object_operations mgs_lu_obj_ops = { .loo_object_init = mgs_object_init, .loo_object_free = mgs_object_free, .loo_object_print = mgs_object_print, }; -struct lu_object *mgs_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *d) +static struct lu_object *mgs_object_alloc(const struct lu_env *env, + const struct lu_object_header *hdr, + struct lu_device *d) { struct lu_object_header *h; struct mgs_object *o; @@ -1280,14 +1571,22 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, LASSERT(mgs->mgs_bottom); + class_disconnect_exports(obd); + ping_evictor_stop(); + mutex_lock(&mgs->mgs_health_mutex); ptlrpc_unregister_service(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); + mgs_params_fsdb_cleanup(env, mgs); + mgs_cleanup_fsdb_list(mgs); + + ldlm_namespace_free_prior(obd->obd_namespace, NULL, 1); obd_exports_barrier(obd); obd_zombie_barrier(); - mgs_cleanup_fsdb_list(mgs); + tgt_fini(env, &mgs->mgs_lut); lproc_mgs_cleanup(mgs); ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT); @@ -1298,19 +1597,19 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, mgs_fs_cleanup(env, mgs); - ldlm_namespace_free(obd->obd_namespace, NULL, 1); + ldlm_namespace_free_post(obd->obd_namespace); obd->obd_namespace = NULL; lu_site_purge(env, d->ld_site, ~0); if (!cfs_hash_is_empty(d->ld_site->ls_obj_hash)) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_OTHER, NULL); lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer); } LASSERT(mgs->mgs_bottom_exp); obd_disconnect(mgs->mgs_bottom_exp); - server_put_mount(obd->obd_name, NULL); + server_put_mount(obd->obd_name, true); RETURN(NULL); } @@ -1342,36 +1641,123 @@ static struct lu_device_type mgs_device_type = { .ldt_ctx_tags = LCT_MG_THREAD }; +static int mgs_obd_reconnect(const struct lu_env *env, struct obd_export *exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + tgt_counter_incr(exp, LPROC_MGS_CONNECT); + + if (data != NULL) { + data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; + + if (data->ocd_connect_flags & OBD_CONNECT_FLAGS2) + data->ocd_connect_flags2 &= MGS_CONNECT_SUPPORTED2; + + exp->exp_connect_data = *data; + data->ocd_version = LUSTRE_VERSION_CODE; + } + + RETURN(mgs_export_stats_init(obd, exp, localdata)); +} + +static int mgs_obd_connect(const struct lu_env *env, struct obd_export **exp, + struct obd_device *obd, struct obd_uuid *cluuid, + struct obd_connect_data *data, void *localdata) +{ + struct obd_export *lexp; + struct lustre_handle conn = { + .cookie = 0, + }; + int rc; + + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + rc = class_connect(&conn, obd, cluuid); + if (rc) + RETURN(rc); + + lexp = class_conn2export(&conn); + if (lexp == NULL) + RETURN(-EFAULT); + + rc = mgs_obd_reconnect(env, lexp, obd, cluuid, data, localdata); + if (rc) + GOTO(out_disconnect, rc); + + *exp = lexp; + + RETURN(rc); + +out_disconnect: + class_disconnect(lexp); + + return rc; +} + +static int mgs_obd_disconnect(struct obd_export *exp) +{ + int rc; + + ENTRY; + + LASSERT(exp); + + mgs_fsc_cleanup(exp); + + class_export_get(exp); + tgt_counter_incr(exp, LPROC_MGS_DISCONNECT); + + rc = server_disconnect_export(exp); + class_export_put(exp); + RETURN(rc); +} + +static int mgs_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mgs_device *mgs = lu2mgs_dev(obd->obd_lu_dev); + int rc = 0; + + mutex_lock(&mgs->mgs_health_mutex); + rc |= ptlrpc_service_health_check(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); + + return rc != 0 ? 1 : 0; +} /* use obd ops to offer management infrastructure */ -static struct obd_ops mgs_obd_ops = { - .o_owner = THIS_MODULE, - .o_connect = mgs_connect, - .o_reconnect = mgs_reconnect, - .o_disconnect = mgs_disconnect, - .o_init_export = mgs_init_export, - .o_destroy_export = mgs_destroy_export, - .o_iocontrol = mgs_iocontrol, +static const struct obd_ops mgs_obd_device_ops = { + .o_owner = THIS_MODULE, + .o_connect = mgs_obd_connect, + .o_reconnect = mgs_obd_reconnect, + .o_disconnect = mgs_obd_disconnect, + .o_init_export = mgs_init_export, + .o_destroy_export = mgs_destroy_export, + .o_iocontrol = mgs_iocontrol, + .o_health_check = mgs_health_check, }; static int __init mgs_init(void) { - struct lprocfs_static_vars lvars; - - lprocfs_mgs_init_vars(&lvars); - class_register_type(&mgs_obd_ops, NULL, lvars.module_vars, - LUSTRE_MGS_NAME, &mgs_device_type); - - return 0; + return class_register_type(&mgs_obd_device_ops, NULL, true, NULL, + LUSTRE_MGS_NAME, &mgs_device_type); } -static void /*__exit*/ mgs_exit(void) +static void __exit mgs_exit(void) { - class_unregister_type(LUSTRE_MGS_NAME); + class_unregister_type(LUSTRE_MGS_NAME); } -MODULE_AUTHOR("Sun Microsystems, Inc. "); -MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_AUTHOR("OpenSFS, Inc. "); +MODULE_DESCRIPTION("Lustre Management Server (MGS)"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); module_init(mgs_init);