1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mgs/mgs_handler.c
5 * Lustre Management Server (mgs) request handler
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: Nathan Rutman <nathan@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
29 #define DEBUG_SUBSYSTEM S_MGS
30 #define D_MGS D_CONFIG/*|D_WARNING*/
33 # include <linux/module.h>
34 # include <linux/pagemap.h>
35 # include <linux/miscdevice.h>
36 # include <linux/init.h>
38 # include <liblustre.h>
41 #include <obd_class.h>
42 #include <lustre_dlm.h>
43 #include <lprocfs_status.h>
44 #include <lustre_fsfilt.h>
45 #include <lustre_commit_confd.h>
46 #include <lustre_disk.h>
47 #include "mgs_internal.h"
50 /* Establish a connection to the MGS.*/
51 static int mgs_connect(const struct lu_env *env,
52 struct lustre_handle *conn, struct obd_device *obd,
53 struct obd_uuid *cluuid, struct obd_connect_data *data)
55 struct obd_export *exp;
59 if (!conn || !obd || !cluuid)
62 rc = class_connect(conn, obd, cluuid);
65 exp = class_conn2export(conn);
68 exp->exp_flvr.sf_rpc = SPTLRPC_FLVR_NULL;
71 data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
72 exp->exp_connect_flags = data->ocd_connect_flags;
73 data->ocd_version = LUSTRE_VERSION_CODE;
76 if ((exp->exp_connect_flags & OBD_CONNECT_FID) == 0) {
77 CWARN("MGS requires FID support, but client not\n");
82 class_disconnect(exp);
84 class_export_put(exp);
90 static int mgs_disconnect(struct obd_export *exp)
96 class_export_get(exp);
98 /* Disconnect early so that clients can't keep using export */
99 rc = class_disconnect(exp);
100 ldlm_cancel_locks_for_export(exp);
102 /* complete all outstanding replies */
103 spin_lock(&exp->exp_lock);
104 while (!list_empty(&exp->exp_outstanding_replies)) {
105 struct ptlrpc_reply_state *rs =
106 list_entry(exp->exp_outstanding_replies.next,
107 struct ptlrpc_reply_state, rs_exp_list);
108 struct ptlrpc_service *svc = rs->rs_service;
110 spin_lock(&svc->srv_lock);
111 list_del_init(&rs->rs_exp_list);
112 ptlrpc_schedule_difficult_reply(rs);
113 spin_unlock(&svc->srv_lock);
115 spin_unlock(&exp->exp_lock);
117 class_export_put(exp);
121 static int mgs_cleanup(struct obd_device *obd);
122 static int mgs_handle(struct ptlrpc_request *req);
124 /* Start the MGS obd */
125 static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
127 struct lprocfs_static_vars lvars;
128 struct mgs_obd *mgs = &obd->u.mgs;
129 struct lustre_mount_info *lmi;
130 struct lustre_sb_info *lsi;
131 struct vfsmount *mnt;
135 CDEBUG(D_CONFIG, "Starting MGS\n");
138 lmi = server_get_mount(obd->obd_name);
140 RETURN(rc = -EINVAL);
143 lsi = s2lsi(lmi->lmi_sb);
144 obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
145 if (IS_ERR(obd->obd_fsops))
146 GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
148 /* namespace for mgs llog */
149 obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER,
150 LDLM_NAMESPACE_MODEST);
151 if (obd->obd_namespace == NULL)
152 GOTO(err_ops, rc = -ENOMEM);
155 ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
156 "mgs_ldlm_client", &obd->obd_ldlm_client);
158 LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
160 rc = mgs_fs_setup(obd, mnt);
162 CERROR("%s: MGS filesystem method init failed: rc = %d\n",
167 rc = llog_start_commit_thread();
171 rc = llog_setup(obd, NULL, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL,
176 /* No recovery for MGC's */
177 obd->obd_replayable = 0;
179 /* Internal mgs setup */
180 mgs_init_fsdb_list(obd);
181 sema_init(&mgs->mgs_sem, 1);
183 /* Start the service threads */
185 ptlrpc_init_svc(MGS_NBUFS, MGS_BUFSIZE, MGS_MAXREQSIZE,
186 MGS_MAXREPSIZE, MGS_REQUEST_PORTAL,
187 MGC_REPLY_PORTAL, MGS_SERVICE_WATCHDOG_TIMEOUT,
188 mgs_handle, LUSTRE_MGS_NAME,
189 obd->obd_proc_entry, NULL,
190 MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX,
191 "ll_mgs", LCT_MD_THREAD);
193 if (!mgs->mgs_service) {
194 CERROR("failed to start service\n");
195 GOTO(err_fs, rc = -ENOMEM);
198 rc = ptlrpc_start_threads(obd, mgs->mgs_service);
200 GOTO(err_thread, rc);
203 lprocfs_mgs_init_vars(&lvars);
204 if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
205 lproc_mgs_setup(obd);
208 ping_evictor_start();
210 LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
215 ptlrpc_unregister_service(mgs->mgs_service);
217 /* No extra cleanup needed for llog_init_commit_thread() */
220 ldlm_namespace_free(obd->obd_namespace, 0);
221 obd->obd_namespace = NULL;
223 fsfilt_put_ops(obd->obd_fsops);
225 server_put_mount(obd->obd_name, mnt);
230 static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
236 case OBD_CLEANUP_EARLY:
237 case OBD_CLEANUP_EXPORTS:
239 case OBD_CLEANUP_SELF_EXP:
240 llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
241 rc = obd_llog_finish(obd, 0);
243 case OBD_CLEANUP_OBD:
249 static int mgs_ldlm_nsfree(void *data)
251 struct ldlm_namespace *ns = (struct ldlm_namespace *)data;
255 ptlrpc_daemonize("ll_mgs_nsfree");
256 rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */);
260 static int mgs_cleanup(struct obd_device *obd)
262 struct mgs_obd *mgs = &obd->u.mgs;
265 if (mgs->mgs_sb == NULL)
270 ptlrpc_unregister_service(mgs->mgs_service);
272 mgs_cleanup_fsdb_list(obd);
274 lprocfs_obd_cleanup(obd);
275 mgs->mgs_proc_live = NULL;
279 server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
282 /* Free the namespace in it's own thread, so that if the
283 ldlm_cancel_handler put the last mgs obd ref, we won't
285 cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace,
286 CLONE_VM | CLONE_FILES);
289 fsfilt_put_ops(obd->obd_fsops);
291 LCONSOLE_INFO("%s has stopped.\n", obd->obd_name);
295 /* similar to filter_prepare_destroy */
296 static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
297 struct lustre_handle *lockh)
299 struct ldlm_res_id res_id;
303 rc = mgc_fsname2resid(fsname, &res_id);
305 rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
306 LDLM_PLAIN, NULL, LCK_EX,
307 &flags, ldlm_blocking_ast,
308 ldlm_completion_ast, NULL,
309 fsname, 0, NULL, lockh);
311 CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
316 static int mgs_put_cfg_lock(struct lustre_handle *lockh)
319 ldlm_lock_decref(lockh, LCK_EX);
326 static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti)
331 rc = mgs_check_index(obd, mti);
333 LCONSOLE_ERROR_MSG(0x13b, "%s claims to have registered, but "
334 "this MGS does not know about it. Assuming"
335 " writeconf.\n", mti->mti_svname);
336 mti->mti_flags |= LDD_F_WRITECONF;
338 } else if (rc == -1) {
339 LCONSOLE_ERROR_MSG(0x13c, "Client log %s-client has "
340 "disappeared! Regenerating all logs.\n",
342 mti->mti_flags |= LDD_F_WRITECONF;
345 /* Index is correctly marked as used */
347 /* If the logs don't contain the mti_nids then add
348 them as failover nids */
349 rc = mgs_check_failnid(obd, mti);
355 /* Called whenever a target starts up. Flags indicate first connect, etc. */
356 static int mgs_handle_target_reg(struct ptlrpc_request *req)
358 struct obd_device *obd = req->rq_export->exp_obd;
359 struct lustre_handle lockh;
360 struct mgs_target_info *mti, *rep_mti;
364 mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
365 if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
367 /* We're just here as a startup ping. */
368 CDEBUG(D_MGS, "Server %s is running on %s\n",
369 mti->mti_svname, obd_export_nid2str(req->rq_export));
370 rc = mgs_check_target(obd, mti);
371 /* above will set appropriate mti flags */
373 /* Nothing wrong, or fatal error */
374 GOTO(out_nolock, rc);
377 /* Revoke the config lock to make sure nobody is reading. */
378 /* Although actually I think it should be alright if
379 someone was reading while we were updating the logs - if we
380 revoke at the end they will just update from where they left off. */
381 lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
382 if (lockrc != ELDLM_OK) {
383 LCONSOLE_ERROR_MSG(0x13d, "%s: Can't signal other nodes to "
384 "update their configuration (%d). Updating "
385 "local logs anyhow; you might have to "
386 "manually restart other nodes to get the "
387 "latest configuration.\n",
388 obd->obd_name, lockrc);
391 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10);
393 /* Log writing contention is handled by the fsdb_sem */
395 if (mti->mti_flags & LDD_F_WRITECONF) {
396 if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
397 mti->mti_stripe_index == 0) {
398 rc = mgs_erase_logs(obd, mti->mti_fsname);
399 LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
400 "request. All servers must be restarted "
401 "in order to regenerate the logs."
402 "\n", obd->obd_name, mti->mti_fsname);
403 } else if (mti->mti_flags &
404 (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
405 rc = mgs_erase_log(obd, mti->mti_svname);
406 LCONSOLE_WARN("%s: Regenerating %s log by user "
408 obd->obd_name, mti->mti_svname);
410 mti->mti_flags |= LDD_F_UPDATE;
411 /* Erased logs means start from scratch. */
412 mti->mti_flags &= ~LDD_F_UPGRADE14;
416 if (mti->mti_flags & LDD_F_UPGRADE14) {
417 rc = mgs_upgrade_sv_14(obd, mti);
419 CERROR("Can't upgrade from 1.4 (%d)\n", rc);
423 /* We're good to go */
424 mti->mti_flags |= LDD_F_UPDATE;
428 if (mti->mti_flags & LDD_F_UPDATE) {
429 CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
430 mti->mti_stripe_index);
432 /* create or update the target log
433 and update the client/mdt logs */
434 rc = mgs_write_log_target(obd, mti);
436 CERROR("Failed to write %s log (%d)\n",
437 mti->mti_svname, rc);
441 mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
442 LDD_F_NEED_INDEX | LDD_F_WRITECONF |
444 mti->mti_flags |= LDD_F_REWRITE_LDD;
448 /* done with log update */
449 if (lockrc == ELDLM_OK)
450 mgs_put_cfg_lock(&lockh);
452 CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
453 mti->mti_stripe_index, rc);
454 rc = req_capsule_server_pack(&req->rq_pill);
458 /* send back the whole mti in the reply */
459 rep_mti = req_capsule_server_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
462 /* Flush logs to disk */
463 fsfilt_sync(obd, obd->u.mgs.mgs_sb);
467 static int mgs_set_info_rpc(struct ptlrpc_request *req)
469 struct obd_device *obd = req->rq_export->exp_obd;
470 struct mgs_send_param *msp, *rep_msp;
471 struct lustre_handle lockh;
473 struct lustre_cfg_bufs bufs;
474 struct lustre_cfg *lcfg;
475 char fsname[MTI_NAME_MAXLEN];
478 msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
481 /* Construct lustre_cfg structure to pass to function mgs_setparam */
482 lustre_cfg_bufs_reset(&bufs, NULL);
483 lustre_cfg_bufs_set_string(&bufs, 1, msp->mgs_param);
484 lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
485 rc = mgs_setparam(obd, lcfg, fsname);
487 CERROR("Error %d in setting the parameter %s for fs %s\n",
488 rc, msp->mgs_param, fsname);
492 /* Revoke lock so everyone updates. Should be alright if
493 * someone was already reading while we were updating the logs,
494 * so we don't really need to hold the lock while we're
498 lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
499 if (lockrc != ELDLM_OK)
500 CERROR("lock error %d for fs %s\n", lockrc,
503 mgs_put_cfg_lock(&lockh);
505 lustre_cfg_free(lcfg);
507 rc = req_capsule_server_pack(&req->rq_pill);
509 rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM);
515 /* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
516 int mgs_handle(struct ptlrpc_request *req)
518 int fail = OBD_FAIL_MGS_ALL_REPLY_NET;
522 req_capsule_init(&req->rq_pill, req, RCL_SERVER);
523 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_REQUEST_NET, 2);
525 LASSERT(current->journal_info == NULL);
526 opc = lustre_msg_get_opc(req->rq_reqmsg);
527 if (opc != MGS_CONNECT) {
528 if (req->rq_export == NULL) {
529 CERROR("lustre_mgs: operation %d on unconnected MGS\n",
531 req->rq_status = -ENOTCONN;
532 GOTO(out, rc = -ENOTCONN);
538 DEBUG_REQ(D_MGS, req, "connect");
539 /* MGS and MDS have same request format for connect */
540 req_capsule_set(&req->rq_pill, &RQF_MDS_CONNECT);
541 rc = target_handle_connect(req);
542 if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1))
543 /* Make clients trying to reconnect after a MGS restart
544 happy; also requires obd_replayable */
545 lustre_msg_add_op_flags(req->rq_repmsg,
546 MSG_CONNECT_RECONNECT);
549 DEBUG_REQ(D_MGS, req, "disconnect");
550 /* MGS and MDS have same request format for disconnect */
551 req_capsule_set(&req->rq_pill, &RQF_MDS_DISCONNECT);
552 rc = target_handle_disconnect(req);
553 req->rq_status = rc; /* superfluous? */
556 DEBUG_REQ(D_MGS, req, "target add");
557 req_capsule_set(&req->rq_pill, &RQF_MGS_TARGET_REG);
558 rc = mgs_handle_target_reg(req);
561 DEBUG_REQ(D_MGS, req, "target del");
562 //rc = mgs_handle_target_del(req);
565 DEBUG_REQ(D_MGS, req, "set_info");
566 req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO);
567 rc = mgs_set_info_rpc(req);
571 DEBUG_REQ(D_MGS, req, "enqueue");
572 req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
573 rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
574 ldlm_server_blocking_ast, NULL);
576 case LDLM_BL_CALLBACK:
577 case LDLM_CP_CALLBACK:
578 DEBUG_REQ(D_MGS, req, "callback");
579 CERROR("callbacks should not happen on MGS\n");
584 DEBUG_REQ(D_INFO, req, "ping");
585 req_capsule_set(&req->rq_pill, &RQF_OBD_PING);
586 rc = target_handle_ping(req);
589 DEBUG_REQ(D_MGS, req, "log cancel");
590 rc = -ENOTSUPP; /* la la la */
593 case LLOG_ORIGIN_HANDLE_CREATE:
594 DEBUG_REQ(D_MGS, req, "llog_init");
595 req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
596 rc = llog_origin_handle_create(req);
598 case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
599 DEBUG_REQ(D_MGS, req, "llog next block");
600 req_capsule_set(&req->rq_pill,
601 &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
602 rc = llog_origin_handle_next_block(req);
604 case LLOG_ORIGIN_HANDLE_READ_HEADER:
605 DEBUG_REQ(D_MGS, req, "llog read header");
606 req_capsule_set(&req->rq_pill,
607 &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER);
608 rc = llog_origin_handle_read_header(req);
610 case LLOG_ORIGIN_HANDLE_CLOSE:
611 DEBUG_REQ(D_MGS, req, "llog close");
612 rc = llog_origin_handle_close(req);
615 DEBUG_REQ(D_MGS, req, "llog catinfo");
616 req_capsule_set(&req->rq_pill, &RQF_LLOG_CATINFO);
617 rc = llog_catinfo(req);
620 req->rq_status = -ENOTSUPP;
621 rc = ptlrpc_error(req);
625 LASSERT(current->journal_info == NULL);
628 CERROR("MGS handle cmd=%d rc=%d\n", opc, rc);
631 target_send_reply(req, rc, fail);
635 static inline int mgs_destroy_export(struct obd_export *exp)
639 target_destroy_export(exp);
644 /* from mdt_iocontrol */
645 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
646 void *karg, void *uarg)
648 struct obd_device *obd = exp->exp_obd;
649 struct obd_ioctl_data *data = karg;
650 struct lvfs_run_ctxt saved;
654 CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
658 case OBD_IOC_PARAM: {
659 struct lustre_handle lockh;
660 struct lustre_cfg *lcfg;
661 struct llog_rec_hdr rec;
662 char fsname[MTI_NAME_MAXLEN];
665 rec.lrh_len = llog_data_len(data->ioc_plen1);
667 if (data->ioc_type == LUSTRE_CFG_TYPE) {
668 rec.lrh_type = OBD_CFG_REC;
670 CERROR("unknown cfg record type:%d \n", data->ioc_type);
674 OBD_ALLOC(lcfg, data->ioc_plen1);
677 rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
681 if (lcfg->lcfg_bufcount < 1)
682 GOTO(out_free, rc = -EINVAL);
684 rc = mgs_setparam(obd, lcfg, fsname);
686 CERROR("setparam err %d\n", rc);
690 /* Revoke lock so everyone updates. Should be alright if
691 someone was already reading while we were updating the logs,
692 so we don't really need to hold the lock while we're
695 lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
696 if (lockrc != ELDLM_OK)
697 CERROR("lock error %d for fs %s\n", lockrc,
700 mgs_put_cfg_lock(&lockh);
704 OBD_FREE(lcfg, data->ioc_plen1);
708 case OBD_IOC_DUMP_LOG: {
709 struct llog_ctxt *ctxt =
710 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
711 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
712 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
713 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
720 case OBD_IOC_LLOG_CHECK:
721 case OBD_IOC_LLOG_INFO:
722 case OBD_IOC_LLOG_PRINT: {
723 struct llog_ctxt *ctxt =
724 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
726 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
727 rc = llog_ioctl(ctxt, cmd, data);
728 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
734 CDEBUG(D_INFO, "unknown command %x\n", cmd);
740 /* use obd ops to offer management infrastructure */
741 static struct obd_ops mgs_obd_ops = {
742 .o_owner = THIS_MODULE,
743 .o_connect = mgs_connect,
744 .o_disconnect = mgs_disconnect,
745 .o_setup = mgs_setup,
746 .o_precleanup = mgs_precleanup,
747 .o_cleanup = mgs_cleanup,
748 .o_destroy_export = mgs_destroy_export,
749 .o_iocontrol = mgs_iocontrol,
752 static int __init mgs_init(void)
754 struct lprocfs_static_vars lvars;
756 lprocfs_mgs_init_vars(&lvars);
757 class_register_type(&mgs_obd_ops, NULL,
758 lvars.module_vars, LUSTRE_MGS_NAME, NULL);
763 static void /*__exit*/ mgs_exit(void)
765 class_unregister_type(LUSTRE_MGS_NAME);
768 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
769 MODULE_DESCRIPTION("Lustre Management Server (MGS)");
770 MODULE_LICENSE("GPL");
772 module_init(mgs_init);
773 module_exit(mgs_exit);