X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmgs%2Fmgs_handler.c;h=fbbea81cbe49a9166ff79e9a30df3efc3e445e2d;hp=b53339a9fa6368750f36e46b0a499aa546147689;hb=708bc9a21e4437a163e60492a0813a9bdf85dd29;hpb=12c651d87301fae91ad960ea1080655be39e1cd0 diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index b53339a..fbbea81 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -1,33 +1,48 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/mgs/mgs_handler.c - * Lustre Management Server (mgs) request handler + * GPL HEADER START * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Nathan Rutman + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Lustre, http://www.lustre.org. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/mgs/mgs_handler.c + * + * Author: Nathan Rutman */ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_MGS -#define D_MGS D_CONFIG/*|D_WARNING*/ +#define D_MGS D_CONFIG #ifdef __KERNEL__ # include @@ -42,69 +57,82 @@ #include #include #include -#include #include #include "mgs_internal.h" /* Establish a connection to the MGS.*/ -static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data) +static int mgs_connect(const struct lu_env *env, + struct obd_export **exp, struct obd_device *obd, + struct obd_uuid *cluuid, struct obd_connect_data *data, + void *localdata) { - struct obd_export *exp; + struct obd_export *lexp; + struct lustre_handle conn = { 0 }; int rc; ENTRY; - if (!conn || !obd || !cluuid) + if (!exp || !obd || !cluuid) RETURN(-EINVAL); - rc = class_connect(conn, obd, cluuid); + rc = class_connect(&conn, obd, cluuid); if (rc) RETURN(rc); - exp = class_conn2export(conn); - LASSERT(exp); + + lexp = class_conn2export(&conn); + LASSERT(lexp); + + mgs_counter_incr(lexp, LPROC_MGS_CONNECT); if (data != NULL) { data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; - exp->exp_connect_flags = data->ocd_connect_flags; + lexp->exp_connect_flags = data->ocd_connect_flags; data->ocd_version = LUSTRE_VERSION_CODE; } + rc = mgs_client_add(obd, lexp, localdata); + if (rc) { - class_disconnect(exp); + class_disconnect(lexp); } else { - class_export_put(exp); + *exp = lexp; } RETURN(rc); } +static int mgs_reconnect(const struct lu_env *env, + struct obd_export *exp, struct obd_device *obd, + struct obd_uuid *cluuid, struct obd_connect_data *data, + void *localdata) +{ + ENTRY; + + if (exp == NULL || obd == NULL || cluuid == NULL) + RETURN(-EINVAL); + + mgs_counter_incr(exp, LPROC_MGS_CONNECT); + + if (data != NULL) { + data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED; + exp->exp_connect_flags = data->ocd_connect_flags; + data->ocd_version = LUSTRE_VERSION_CODE; + } + + RETURN(0); +} + static int mgs_disconnect(struct obd_export *exp) { int rc; ENTRY; LASSERT(exp); - class_export_get(exp); - - /* Disconnect early so that clients can't keep using export */ - rc = class_disconnect(exp); - ldlm_cancel_locks_for_export(exp); - /* complete all outstanding replies */ - spin_lock(&exp->exp_lock); - while (!list_empty(&exp->exp_outstanding_replies)) { - struct ptlrpc_reply_state *rs = - list_entry(exp->exp_outstanding_replies.next, - struct ptlrpc_reply_state, rs_exp_list); - struct ptlrpc_service *svc = rs->rs_service; + class_export_get(exp); + mgs_counter_incr(exp, LPROC_MGS_DISCONNECT); - spin_lock(&svc->srv_lock); - list_del_init(&rs->rs_exp_list); - ptlrpc_schedule_difficult_reply(rs); - spin_unlock(&svc->srv_lock); - } - spin_unlock(&exp->exp_lock); + rc = server_disconnect_export(exp); class_export_put(exp); RETURN(rc); @@ -113,8 +141,33 @@ static int mgs_disconnect(struct obd_export *exp) static int mgs_cleanup(struct obd_device *obd); static int mgs_handle(struct ptlrpc_request *req); +static int mgs_llog_init(struct obd_device *obd, struct obd_llog_group *olg, + struct obd_device *tgt, int *index) +{ + int rc; + ENTRY; + + LASSERT(olg == &obd->obd_olg); + rc = llog_setup(obd, olg, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, + &llog_lvfs_ops); + RETURN(rc); +} + +static int mgs_llog_finish(struct obd_device *obd, int count) +{ + struct llog_ctxt *ctxt; + int rc = 0; + ENTRY; + + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + if (ctxt) + rc = llog_cleanup(ctxt); + + RETURN(rc); +} + /* Start the MGS obd */ -static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) +static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { struct lprocfs_static_vars lvars; struct mgs_obd *mgs = &obd->u.mgs; @@ -128,7 +181,7 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) /* Find our disk */ lmi = server_get_mount(obd->obd_name); - if (!lmi) + if (!lmi) RETURN(rc = -EINVAL); mnt = lmi->lmi_mnt; @@ -137,19 +190,22 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) if (IS_ERR(obd->obd_fsops)) GOTO(err_put, rc = PTR_ERR(obd->obd_fsops)); + if (lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))) { + CERROR("%s: Underlying device is marked as read-only. " + "Setup failed\n", obd->obd_name); + GOTO(err_ops, rc = -EROFS); + } + /* namespace for mgs llog */ - obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER); - if (obd->obd_namespace == NULL) { - mgs_cleanup(obd); + obd->obd_namespace = ldlm_namespace_new(obd ,"MGS", LDLM_NAMESPACE_SERVER, + LDLM_NAMESPACE_MODEST); + if (obd->obd_namespace == NULL) GOTO(err_ops, rc = -ENOMEM); - } /* ldlm setup */ ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "mgs_ldlm_client", &obd->obd_ldlm_client); - LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb))); - rc = mgs_fs_setup(obd, mnt); if (rc) { CERROR("%s: MGS filesystem method init failed: rc = %d\n", @@ -157,12 +213,7 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_ns, rc); } - rc = llog_start_commit_thread(); - if (rc < 0) - GOTO(err_fs, rc); - - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, - &llog_lvfs_ops); + rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL); if (rc) GOTO(err_fs, rc); @@ -171,33 +222,36 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) /* Internal mgs setup */ mgs_init_fsdb_list(obd); - sema_init(&mgs->mgs_sem, 1); + cfs_sema_init(&mgs->mgs_sem, 1); + + /* Setup proc */ + lprocfs_mgs_init_vars(&lvars); + if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { + lproc_mgs_setup(obd); + rc = lprocfs_alloc_md_stats(obd, LPROC_MGS_LAST); + if (rc) + GOTO(err_llog, rc); + } /* Start the service threads */ mgs->mgs_service = ptlrpc_init_svc(MGS_NBUFS, MGS_BUFSIZE, MGS_MAXREQSIZE, MGS_MAXREPSIZE, MGS_REQUEST_PORTAL, - MGC_REPLY_PORTAL, MGS_SERVICE_WATCHDOG_TIMEOUT, + MGC_REPLY_PORTAL, 2, mgs_handle, LUSTRE_MGS_NAME, - obd->obd_proc_entry, NULL, + obd->obd_proc_entry, target_print_req, MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX, - "ll_mgs"); + "ll_mgs", LCT_MD_THREAD, NULL); if (!mgs->mgs_service) { CERROR("failed to start service\n"); - GOTO(err_fs, rc = -ENOMEM); + GOTO(err_llog, rc = -ENOMEM); } rc = ptlrpc_start_threads(obd, mgs->mgs_service); if (rc) GOTO(err_thread, rc); - /* Setup proc */ - lprocfs_init_vars(mgs, &lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { - lproc_mgs_setup(obd); - } - ping_evictor_start(); LCONSOLE_INFO("MGS %s started\n", obd->obd_name); @@ -206,11 +260,14 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) err_thread: ptlrpc_unregister_service(mgs->mgs_service); +err_llog: + lproc_mgs_cleanup(obd); + obd_llog_finish(obd, 0); err_fs: /* No extra cleanup needed for llog_init_commit_thread() */ mgs_fs_cleanup(obd); err_ns: - ldlm_namespace_free(obd->obd_namespace, 0); + ldlm_namespace_free(obd->obd_namespace, NULL, 0); obd->obd_namespace = NULL; err_ops: fsfilt_put_ops(obd->obd_fsops); @@ -228,55 +285,36 @@ static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) switch (stage) { case OBD_CLEANUP_EARLY: case OBD_CLEANUP_EXPORTS: - break; - case OBD_CLEANUP_SELF_EXP: - llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); rc = obd_llog_finish(obd, 0); break; - case OBD_CLEANUP_OBD: - break; } RETURN(rc); } -static int mgs_ldlm_nsfree(void *data) -{ - struct ldlm_namespace *ns = (struct ldlm_namespace *)data; - int rc; - ENTRY; - - ptlrpc_daemonize("ll_mgs_nsfree"); - rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */); - RETURN(rc); -} - +/** + * Performs cleanup procedures for passed \a obd given it is mgs obd. + */ static int mgs_cleanup(struct obd_device *obd) { struct mgs_obd *mgs = &obd->u.mgs; ENTRY; - ping_evictor_stop(); - if (mgs->mgs_sb == NULL) RETURN(0); - - ptlrpc_unregister_service(mgs->mgs_service); - mgs_cleanup_fsdb_list(obd); + ping_evictor_stop(); - lprocfs_obd_cleanup(obd); - mgs->mgs_proc_live = NULL; + ptlrpc_unregister_service(mgs->mgs_service); + mgs_cleanup_fsdb_list(obd); + lproc_mgs_cleanup(obd); mgs_fs_cleanup(obd); server_put_mount(obd->obd_name, mgs->mgs_vfsmnt); mgs->mgs_sb = NULL; - /* Free the namespace in it's own thread, so that if the - ldlm_cancel_handler put the last mgs obd ref, we won't - deadlock here. */ - cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, - CLONE_VM | CLONE_FILES); + ldlm_namespace_free(obd->obd_namespace, NULL, 1); + obd->obd_namespace = NULL; fsfilt_put_ops(obd->obd_fsops); @@ -292,16 +330,16 @@ static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname, int rc, flags = 0; ENTRY; - rc = mgc_logname2resid(fsname, &res_id); - if (!rc) - rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id, + rc = mgc_fsname2resid(fsname, &res_id); + if (!rc) + rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_PLAIN, NULL, LCK_EX, &flags, ldlm_blocking_ast, ldlm_completion_ast, NULL, fsname, 0, NULL, lockh); - if (rc) + if (rc) CERROR("can't take cfg lock for %s (%d)\n", fsname, rc); - + RETURN(rc); } @@ -312,6 +350,26 @@ static int mgs_put_cfg_lock(struct lustre_handle *lockh) RETURN(0); } +void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb) +{ + struct lustre_handle lockh; + int lockrc; + + LASSERT(fsdb->fsdb_name[0] != '\0'); + + if (cfs_test_and_set_bit(1, &fsdb->fsdb_revoking_lock) == 0) { + lockrc = mgs_get_cfg_lock(obd, fsdb->fsdb_name, &lockh); + /* clear the bit before lock put */ + cfs_clear_bit(1, &fsdb->fsdb_revoking_lock); + + if (lockrc != ELDLM_OK) + CERROR("lock error %d for fs %s\n", + lockrc, fsdb->fsdb_name); + else + mgs_put_cfg_lock(&lockh); + } +} + /* rc=0 means ok 1 means update <0 means error */ @@ -322,21 +380,20 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti) rc = mgs_check_index(obd, mti); if (rc == 0) { - LCONSOLE_ERROR("%s claims to have registered, but this MGS " - "does not know about it. Assuming writeconf.\n", - mti->mti_svname); - mti->mti_flags |= LDD_F_WRITECONF; - rc = 1; + LCONSOLE_ERROR_MSG(0x13b, "%s claims to have registered, but " + "this MGS does not know about it, preventing " + "registration.\n", mti->mti_svname); + rc = -ENOENT; } else if (rc == -1) { - LCONSOLE_ERROR("Client log %s-client has disappeared! " - "Regenerating all logs.\n", - mti->mti_fsname); + LCONSOLE_ERROR_MSG(0x13c, "Client log %s-client has " + "disappeared! Regenerating all logs.\n", + mti->mti_fsname); mti->mti_flags |= LDD_F_WRITECONF; rc = 1; } else { /* Index is correctly marked as used */ - /* If the logs don't contain the mti_nids then add + /* If the logs don't contain the mti_nids then add them as failover nids */ rc = mgs_check_failnid(obd, mti); } @@ -346,17 +403,16 @@ static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti) /* Called whenever a target starts up. Flags indicate first connect, etc. */ static int mgs_handle_target_reg(struct ptlrpc_request *req) -{ +{ struct obd_device *obd = req->rq_export->exp_obd; - struct lustre_handle lockh; struct mgs_target_info *mti, *rep_mti; - int rep_size[] = { sizeof(struct ptlrpc_body), sizeof(*mti) }; - int rc = 0, lockrc; + struct fs_db *fsdb; + int rc = 0; ENTRY; - mti = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*mti), - lustre_swab_mgs_target_info); - + mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_REG); + + mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 | LDD_F_UPDATE))) { /* We're just here as a startup ping. */ @@ -364,36 +420,23 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) mti->mti_svname, obd_export_nid2str(req->rq_export)); rc = mgs_check_target(obd, mti); /* above will set appropriate mti flags */ - if (rc <= 0) + if (rc <= 0) /* Nothing wrong, or fatal error */ GOTO(out_nolock, rc); } - /* Revoke the config lock to make sure nobody is reading. */ - /* Although actually I think it should be alright if - someone was reading while we were updating the logs - if we - revoke at the end they will just update from where they left off. */ - lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh); - if (lockrc != ELDLM_OK) { - LCONSOLE_ERROR("%s: Can't signal other nodes to update " - "their configuration (%d). Updating local logs " - "anyhow; you might have to manually restart " - "other nodes to get the latest configuration.\n", - obd->obd_name, lockrc); - } - - OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10); - - /* Log writing contention is handled by the fsdb_sem */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10); if (mti->mti_flags & LDD_F_WRITECONF) { - if (mti->mti_flags & LDD_F_SV_TYPE_MDT) { + if (mti->mti_flags & LDD_F_SV_TYPE_MDT && + mti->mti_stripe_index == 0) { rc = mgs_erase_logs(obd, mti->mti_fsname); LCONSOLE_WARN("%s: Logs for fs %s were removed by user " "request. All servers must be restarted " "in order to regenerate the logs." "\n", obd->obd_name, mti->mti_fsname); - } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) { + } else if (mti->mti_flags & + (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) { rc = mgs_erase_log(obd, mti->mti_svname); LCONSOLE_WARN("%s: Regenerating %s log by user " "request.\n", @@ -401,72 +444,201 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req) } mti->mti_flags |= LDD_F_UPDATE; /* Erased logs means start from scratch. */ - mti->mti_flags &= ~LDD_F_UPGRADE14; + mti->mti_flags &= ~LDD_F_UPGRADE14; } + rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb); + if (rc) { + CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc); + GOTO(out_nolock, rc); + } + + /* + * Log writing contention is handled by the fsdb_sem. + * + * It should be alright if someone was reading while we were + * updating the logs - if we revoke at the end they will just update + * from where they left off. + */ + /* COMPAT_146 */ if (mti->mti_flags & LDD_F_UPGRADE14) { - rc = mgs_upgrade_sv_14(obd, mti); + rc = mgs_upgrade_sv_14(obd, mti, fsdb); if (rc) { CERROR("Can't upgrade from 1.4 (%d)\n", rc); GOTO(out, rc); } - + /* We're good to go */ mti->mti_flags |= LDD_F_UPDATE; } /* end COMPAT_146 */ if (mti->mti_flags & LDD_F_UPDATE) { - CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, + CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname, mti->mti_stripe_index); - - /* create or update the target log + + /* create or update the target log and update the client/mdt logs */ - rc = mgs_write_log_target(obd, mti); + rc = mgs_write_log_target(obd, mti, fsdb); if (rc) { - CERROR("Failed to write %s log (%d)\n", + CERROR("Failed to write %s log (%d)\n", mti->mti_svname, rc); GOTO(out, rc); } - mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | + mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | LDD_F_NEED_INDEX | LDD_F_WRITECONF | LDD_F_UPGRADE14); mti->mti_flags |= LDD_F_REWRITE_LDD; } out: - /* done with log update */ - if (lockrc == ELDLM_OK) - mgs_put_cfg_lock(&lockh); + mgs_revoke_lock(obd, fsdb); + out_nolock: - CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, + CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname, mti->mti_stripe_index, rc); - lustre_pack_reply(req, 2, rep_size, NULL); + rc = req_capsule_server_pack(&req->rq_pill); + if (rc) + RETURN(rc); + /* send back the whole mti in the reply */ - rep_mti = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, - sizeof(*rep_mti)); - memcpy(rep_mti, mti, sizeof(*rep_mti)); + rep_mti = req_capsule_server_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); + *rep_mti = *mti; /* Flush logs to disk */ fsfilt_sync(obd, obd->u.mgs.mgs_sb); RETURN(rc); } +static int mgs_set_info_rpc(struct ptlrpc_request *req) +{ + struct obd_device *obd = req->rq_export->exp_obd; + struct mgs_send_param *msp, *rep_msp; + int rc; + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + char fsname[MTI_NAME_MAXLEN]; + ENTRY; + + msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); + LASSERT(msp); + + /* Construct lustre_cfg structure to pass to function mgs_setparam */ + lustre_cfg_bufs_reset(&bufs, NULL); + lustre_cfg_bufs_set_string(&bufs, 1, msp->mgs_param); + lcfg = lustre_cfg_new(LCFG_PARAM, &bufs); + rc = mgs_setparam(obd, lcfg, fsname); + if (rc) { + CERROR("Error %d in setting the parameter %s for fs %s\n", + rc, msp->mgs_param, fsname); + RETURN(rc); + } + + lustre_cfg_free(lcfg); + + rc = req_capsule_server_pack(&req->rq_pill); + if (rc == 0) { + rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); + rep_msp = msp; + } + RETURN(rc); +} + +/* + * similar as in ost_connect_check_sptlrpc() + */ +static int mgs_connect_check_sptlrpc(struct ptlrpc_request *req) +{ + struct obd_export *exp = req->rq_export; + struct obd_device *obd = exp->exp_obd; + struct fs_db *fsdb; + struct sptlrpc_flavor flvr; + int rc = 0; + + if (exp->exp_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) { + rc = mgs_find_or_make_fsdb(obd, MGSSELF_NAME, &fsdb); + if (rc) + return rc; + + cfs_down(&fsdb->fsdb_sem); + if (sptlrpc_rule_set_choose(&fsdb->fsdb_srpc_gen, + LUSTRE_SP_MGC, LUSTRE_SP_MGS, + req->rq_peer.nid, + &flvr) == 0) { + /* by defualt allow any flavors */ + flvr.sf_rpc = SPTLRPC_FLVR_ANY; + } + cfs_up(&fsdb->fsdb_sem); + + cfs_spin_lock(&exp->exp_lock); + + exp->exp_sp_peer = req->rq_sp_from; + exp->exp_flvr = flvr; + + if (exp->exp_flvr.sf_rpc != SPTLRPC_FLVR_ANY && + exp->exp_flvr.sf_rpc != req->rq_flvr.sf_rpc) { + CERROR("invalid rpc flavor %x, expect %x, from %s\n", + req->rq_flvr.sf_rpc, exp->exp_flvr.sf_rpc, + libcfs_nid2str(req->rq_peer.nid)); + rc = -EACCES; + } + + cfs_spin_unlock(&exp->exp_lock); + } else { + if (exp->exp_sp_peer != req->rq_sp_from) { + CERROR("RPC source %s doesn't match %s\n", + sptlrpc_part2name(req->rq_sp_from), + sptlrpc_part2name(exp->exp_sp_peer)); + rc = -EACCES; + } else { + rc = sptlrpc_target_export_check(exp, req); + } + } + + return rc; +} + +/* Called whenever a target cleans up. */ +/* XXX - Currently unused */ +static int mgs_handle_target_del(struct ptlrpc_request *req) +{ + ENTRY; + mgs_counter_incr(req->rq_export, LPROC_MGS_TARGET_DEL); + RETURN(0); +} + +/* XXX - Currently unused */ +static int mgs_handle_exception(struct ptlrpc_request *req) +{ + ENTRY; + mgs_counter_incr(req->rq_export, LPROC_MGS_EXCEPTION); + RETURN(0); +} + +/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ int mgs_handle(struct ptlrpc_request *req) { int fail = OBD_FAIL_MGS_ALL_REPLY_NET; int opc, rc = 0; ENTRY; - OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_REQUEST_NET, 2); - OBD_FAIL_RETURN(OBD_FAIL_MGS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); + req_capsule_init(&req->rq_pill, req, RCL_SERVER); + OBD_FAIL_TIMEOUT_MS(OBD_FAIL_MGS_PAUSE_REQ, obd_fail_val); + if (OBD_FAIL_CHECK(OBD_FAIL_MGS_ALL_REQUEST_NET)) + RETURN(0); LASSERT(current->journal_info == NULL); opc = lustre_msg_get_opc(req->rq_reqmsg); + + if (opc == SEC_CTX_INIT || + opc == SEC_CTX_INIT_CONT || + opc == SEC_CTX_FINI) + GOTO(out, rc = 0); + if (opc != MGS_CONNECT) { - if (req->rq_export == NULL) { + if (!class_connected_export(req->rq_export)) { CERROR("lustre_mgs: operation %d on unconnected MGS\n", opc); req->rq_status = -ENOTCONN; @@ -477,7 +649,12 @@ int mgs_handle(struct ptlrpc_request *req) switch (opc) { case MGS_CONNECT: DEBUG_REQ(D_MGS, req, "connect"); - rc = target_handle_connect(req, mgs_handle); + /* MGS and MDS have same request format for connect */ + req_capsule_set(&req->rq_pill, &RQF_MDS_CONNECT); + rc = target_handle_connect(req); + if (rc == 0) + rc = mgs_connect_check_sptlrpc(req); + if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1)) /* Make clients trying to reconnect after a MGS restart happy; also requires obd_replayable */ @@ -486,20 +663,33 @@ int mgs_handle(struct ptlrpc_request *req) break; case MGS_DISCONNECT: DEBUG_REQ(D_MGS, req, "disconnect"); + /* MGS and MDS have same request format for disconnect */ + req_capsule_set(&req->rq_pill, &RQF_MDS_DISCONNECT); rc = target_handle_disconnect(req); req->rq_status = rc; /* superfluous? */ break; + case MGS_EXCEPTION: + DEBUG_REQ(D_MGS, req, "exception"); + rc = mgs_handle_exception(req); + break; case MGS_TARGET_REG: - DEBUG_REQ(D_MGS, req, "target add\n"); + DEBUG_REQ(D_MGS, req, "target add"); + req_capsule_set(&req->rq_pill, &RQF_MGS_TARGET_REG); rc = mgs_handle_target_reg(req); break; case MGS_TARGET_DEL: - DEBUG_REQ(D_MGS, req, "target del\n"); - //rc = mgs_handle_target_del(req); + DEBUG_REQ(D_MGS, req, "target del"); + rc = mgs_handle_target_del(req); + break; + case MGS_SET_INFO: + DEBUG_REQ(D_MGS, req, "set_info"); + req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO); + rc = mgs_set_info_rpc(req); break; case LDLM_ENQUEUE: DEBUG_REQ(D_MGS, req, "enqueue"); + req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE); rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, ldlm_server_blocking_ast, NULL); break; @@ -512,23 +702,29 @@ int mgs_handle(struct ptlrpc_request *req) case OBD_PING: DEBUG_REQ(D_INFO, req, "ping"); + req_capsule_set(&req->rq_pill, &RQF_OBD_PING); rc = target_handle_ping(req); break; case OBD_LOG_CANCEL: - DEBUG_REQ(D_MGS, req, "log cancel\n"); + DEBUG_REQ(D_MGS, req, "log cancel"); rc = -ENOTSUPP; /* la la la */ break; case LLOG_ORIGIN_HANDLE_CREATE: DEBUG_REQ(D_MGS, req, "llog_init"); + req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); rc = llog_origin_handle_create(req); break; case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: DEBUG_REQ(D_MGS, req, "llog next block"); + req_capsule_set(&req->rq_pill, + &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); rc = llog_origin_handle_next_block(req); break; case LLOG_ORIGIN_HANDLE_READ_HEADER: DEBUG_REQ(D_MGS, req, "llog read header"); + req_capsule_set(&req->rq_pill, + &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); rc = llog_origin_handle_read_header(req); break; case LLOG_ORIGIN_HANDLE_CLOSE: @@ -537,6 +733,7 @@ int mgs_handle(struct ptlrpc_request *req) break; case LLOG_CATINFO: DEBUG_REQ(D_MGS, req, "llog catinfo"); + req_capsule_set(&req->rq_pill, &RQF_LLOG_CATINFO); rc = llog_catinfo(req); break; default: @@ -546,24 +743,156 @@ int mgs_handle(struct ptlrpc_request *req) } LASSERT(current->journal_info == NULL); - - if (rc) + + if (rc) CERROR("MGS handle cmd=%d rc=%d\n", opc, rc); - out: +out: target_send_reply(req, rc, fail); RETURN(0); } +static inline int mgs_init_export(struct obd_export *exp) +{ + cfs_spin_lock(&exp->exp_lock); + exp->exp_connecting = 1; + cfs_spin_unlock(&exp->exp_lock); + + return ldlm_init_export(exp); +} + static inline int mgs_destroy_export(struct obd_export *exp) { ENTRY; target_destroy_export(exp); + mgs_client_free(exp); + ldlm_destroy_export(exp); RETURN(0); } +static int mgs_extract_fs_pool(char * arg, char *fsname, char *poolname) +{ + char *ptr; + + ENTRY; + for (ptr = arg; (*ptr != '\0') && (*ptr != '.'); ptr++ ) { + *fsname = *ptr; + fsname++; + } + if (*ptr == '\0') + return -EINVAL; + *fsname = '\0'; + ptr++; + strcpy(poolname, ptr); + + RETURN(0); +} + +static int mgs_iocontrol_pool(struct obd_device *obd, + struct obd_ioctl_data *data) +{ + int rc; + struct lustre_cfg *lcfg = NULL; + struct llog_rec_hdr rec; + char *fsname = NULL; + char *poolname = NULL; + ENTRY; + + OBD_ALLOC(fsname, MTI_NAME_MAXLEN); + if (fsname == NULL) + RETURN(-ENOMEM); + + OBD_ALLOC(poolname, LOV_MAXPOOLNAME + 1); + if (poolname == NULL) { + rc = -ENOMEM; + GOTO(out_pool, rc); + } + rec.lrh_len = llog_data_len(data->ioc_plen1); + + if (data->ioc_type == LUSTRE_CFG_TYPE) { + rec.lrh_type = OBD_CFG_REC; + } else { + CERROR("unknown cfg record type:%d \n", data->ioc_type); + rc = -EINVAL; + GOTO(out_pool, rc); + } + + if (data->ioc_plen1 > CFS_PAGE_SIZE) { + rc = -E2BIG; + GOTO(out_pool, rc); + } + + OBD_ALLOC(lcfg, data->ioc_plen1); + if (lcfg == NULL) + GOTO(out_pool, rc = -ENOMEM); + + if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + GOTO(out_pool, rc = -EFAULT); + + if (lcfg->lcfg_bufcount < 2) { + GOTO(out_pool, rc = -EFAULT); + } + + /* first arg is always . */ + mgs_extract_fs_pool(lustre_cfg_string(lcfg, 1), fsname, + poolname); + + switch (lcfg->lcfg_command) { + case LCFG_POOL_NEW: { + if (lcfg->lcfg_bufcount != 2) + RETURN(-EINVAL); + rc = mgs_pool_cmd(obd, LCFG_POOL_NEW, fsname, + poolname, NULL); + break; + } + case LCFG_POOL_ADD: { + if (lcfg->lcfg_bufcount != 3) + RETURN(-EINVAL); + rc = mgs_pool_cmd(obd, LCFG_POOL_ADD, fsname, poolname, + lustre_cfg_string(lcfg, 2)); + break; + } + case LCFG_POOL_REM: { + if (lcfg->lcfg_bufcount != 3) + RETURN(-EINVAL); + rc = mgs_pool_cmd(obd, LCFG_POOL_REM, fsname, poolname, + lustre_cfg_string(lcfg, 2)); + break; + } + case LCFG_POOL_DEL: { + if (lcfg->lcfg_bufcount != 2) + RETURN(-EINVAL); + rc = mgs_pool_cmd(obd, LCFG_POOL_DEL, fsname, + poolname, NULL); + break; + } + default: { + rc = -EINVAL; + GOTO(out_pool, rc); + } + } + + if (rc) { + CERROR("OBD_IOC_POOL err %d, cmd %X for pool %s.%s\n", + rc, lcfg->lcfg_command, fsname, poolname); + GOTO(out_pool, rc); + } + +out_pool: + if (lcfg != NULL) + OBD_FREE(lcfg, data->ioc_plen1); + + if (fsname != NULL) + OBD_FREE(fsname, MTI_NAME_MAXLEN); + + if (poolname != NULL) + OBD_FREE(poolname, LOV_MAXPOOLNAME + 1); + + RETURN(rc); +} + /* from mdt_iocontrol */ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) @@ -579,11 +908,9 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, switch (cmd) { case OBD_IOC_PARAM: { - struct lustre_handle lockh; struct lustre_cfg *lcfg; struct llog_rec_hdr rec; char fsname[MTI_NAME_MAXLEN]; - int lockrc; rec.lrh_len = llog_data_len(data->ioc_plen1); @@ -597,9 +924,8 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, OBD_ALLOC(lcfg, data->ioc_plen1); if (lcfg == NULL) RETURN(-ENOMEM); - rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1); - if (rc) - GOTO(out_free, rc); + if (cfs_copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) + GOTO(out_free, rc = -EFAULT); if (lcfg->lcfg_bufcount < 1) GOTO(out_free, rc = -EINVAL); @@ -609,33 +935,22 @@ int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len, CERROR("setparam err %d\n", rc); GOTO(out_free, rc); } - - /* Revoke lock so everyone updates. Should be alright if - someone was already reading while we were updating the logs, - so we don't really need to hold the lock while we're - writing (above). */ - if (fsname[0]) { - lockrc = mgs_get_cfg_lock(obd, fsname, &lockh); - if (lockrc != ELDLM_OK) - CERROR("lock error %d for fs %s\n", lockrc, - fsname); - else - mgs_put_cfg_lock(&lockh); - } - out_free: OBD_FREE(lcfg, data->ioc_plen1); RETURN(rc); } + case OBD_IOC_POOL: { + RETURN(mgs_iocontrol_pool(obd, data)); + } + case OBD_IOC_DUMP_LOG: { - struct llog_ctxt *ctxt = - llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + struct llog_ctxt *ctxt; + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - if (rc) - RETURN(rc); + llog_ctxt_put(ctxt); RETURN(rc); } @@ -643,12 +958,13 @@ out_free: case OBD_IOC_LLOG_CHECK: case OBD_IOC_LLOG_INFO: case OBD_IOC_LLOG_PRINT: { - struct llog_ctxt *ctxt = - llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + struct llog_ctxt *ctxt; + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); + llog_ctxt_put(ctxt); RETURN(rc); } @@ -664,20 +980,25 @@ out_free: static struct obd_ops mgs_obd_ops = { .o_owner = THIS_MODULE, .o_connect = mgs_connect, + .o_reconnect = mgs_reconnect, .o_disconnect = mgs_disconnect, .o_setup = mgs_setup, .o_precleanup = mgs_precleanup, .o_cleanup = mgs_cleanup, + .o_init_export = mgs_init_export, .o_destroy_export = mgs_destroy_export, .o_iocontrol = mgs_iocontrol, + .o_llog_init = mgs_llog_init, + .o_llog_finish = mgs_llog_finish }; static int __init mgs_init(void) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(mgs, &lvars); - class_register_type(&mgs_obd_ops, lvars.module_vars, LUSTRE_MGS_NAME); + lprocfs_mgs_init_vars(&lvars); + class_register_type(&mgs_obd_ops, NULL, + lvars.module_vars, LUSTRE_MGS_NAME, NULL); return 0; } @@ -687,7 +1008,7 @@ static void /*__exit*/ mgs_exit(void) class_unregister_type(LUSTRE_MGS_NAME); } -MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre Management Server (MGS)"); MODULE_LICENSE("GPL");