1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mgs/mgs_handler.c
5 * Lustre Management Server (mgs) request handler
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: Nathan Rutman <nathan@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
29 #define DEBUG_SUBSYSTEM S_MGS
30 #define D_MGS D_CONFIG/*|D_WARNING*/
33 # include <linux/module.h>
34 # include <linux/pagemap.h>
35 # include <linux/miscdevice.h>
36 # include <linux/init.h>
38 # include <liblustre.h>
41 #include <obd_class.h>
42 #include <lustre_dlm.h>
43 #include <lprocfs_status.h>
44 #include <lustre_fsfilt.h>
45 #include <lustre_commit_confd.h>
46 #include <lustre_disk.h>
47 #include "mgs_internal.h"
50 /* Establish a connection to the MGS.*/
51 static int mgs_connect(const struct lu_env *env,
52 struct lustre_handle *conn, struct obd_device *obd,
53 struct obd_uuid *cluuid, struct obd_connect_data *data)
55 struct obd_export *exp;
59 if (!conn || !obd || !cluuid)
62 rc = class_connect(conn, obd, cluuid);
65 exp = class_conn2export(conn);
69 data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
70 exp->exp_connect_flags = data->ocd_connect_flags;
71 data->ocd_version = LUSTRE_VERSION_CODE;
75 class_disconnect(exp);
77 class_export_put(exp);
83 static int mgs_disconnect(struct obd_export *exp)
89 class_export_get(exp);
91 /* Disconnect early so that clients can't keep using export */
92 rc = class_disconnect(exp);
93 ldlm_cancel_locks_for_export(exp);
95 /* complete all outstanding replies */
96 spin_lock(&exp->exp_lock);
97 while (!list_empty(&exp->exp_outstanding_replies)) {
98 struct ptlrpc_reply_state *rs =
99 list_entry(exp->exp_outstanding_replies.next,
100 struct ptlrpc_reply_state, rs_exp_list);
101 struct ptlrpc_service *svc = rs->rs_service;
103 spin_lock(&svc->srv_lock);
104 list_del_init(&rs->rs_exp_list);
105 ptlrpc_schedule_difficult_reply(rs);
106 spin_unlock(&svc->srv_lock);
108 spin_unlock(&exp->exp_lock);
110 class_export_put(exp);
114 static int mgs_cleanup(struct obd_device *obd);
115 static int mgs_handle(struct ptlrpc_request *req);
117 /* Start the MGS obd */
118 static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
120 struct lprocfs_static_vars lvars;
121 struct mgs_obd *mgs = &obd->u.mgs;
122 struct lustre_mount_info *lmi;
123 struct lustre_sb_info *lsi;
124 struct vfsmount *mnt;
128 CDEBUG(D_CONFIG, "Starting MGS\n");
131 lmi = server_get_mount(obd->obd_name);
133 RETURN(rc = -EINVAL);
136 lsi = s2lsi(lmi->lmi_sb);
137 obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
138 if (IS_ERR(obd->obd_fsops))
139 GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
141 /* namespace for mgs llog */
142 obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER);
143 if (obd->obd_namespace == NULL) {
145 GOTO(err_ops, rc = -ENOMEM);
149 ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
150 "mgs_ldlm_client", &obd->obd_ldlm_client);
152 LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
154 rc = mgs_fs_setup(obd, mnt);
156 CERROR("%s: MGS filesystem method init failed: rc = %d\n",
161 rc = llog_start_commit_thread();
165 rc = llog_setup(obd, NULL, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL,
170 /* No recovery for MGC's */
171 obd->obd_replayable = 0;
173 /* Internal mgs setup */
174 mgs_init_fsdb_list(obd);
175 sema_init(&mgs->mgs_sem, 1);
177 /* Start the service threads */
179 ptlrpc_init_svc(MGS_NBUFS, MGS_BUFSIZE, MGS_MAXREQSIZE,
180 MGS_MAXREPSIZE, MGS_REQUEST_PORTAL,
181 MGC_REPLY_PORTAL, MGS_SERVICE_WATCHDOG_TIMEOUT,
182 mgs_handle, LUSTRE_MGS_NAME,
183 obd->obd_proc_entry, NULL,
184 MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX,
185 "ll_mgs", LCT_MD_THREAD);
187 if (!mgs->mgs_service) {
188 CERROR("failed to start service\n");
189 GOTO(err_fs, rc = -ENOMEM);
192 rc = ptlrpc_start_threads(obd, mgs->mgs_service);
194 GOTO(err_thread, rc);
197 lprocfs_init_vars(mgs, &lvars);
198 if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
199 lproc_mgs_setup(obd);
202 ping_evictor_start();
204 LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
209 ptlrpc_unregister_service(mgs->mgs_service);
211 /* No extra cleanup needed for llog_init_commit_thread() */
214 ldlm_namespace_free(obd->obd_namespace, 0);
215 obd->obd_namespace = NULL;
217 fsfilt_put_ops(obd->obd_fsops);
219 server_put_mount(obd->obd_name, mnt);
224 static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
230 case OBD_CLEANUP_EARLY:
231 case OBD_CLEANUP_EXPORTS:
233 case OBD_CLEANUP_SELF_EXP:
234 llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
235 rc = obd_llog_finish(obd, 0);
237 case OBD_CLEANUP_OBD:
243 static int mgs_ldlm_nsfree(void *data)
245 struct ldlm_namespace *ns = (struct ldlm_namespace *)data;
249 ptlrpc_daemonize("ll_mgs_nsfree");
250 rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */);
254 static int mgs_cleanup(struct obd_device *obd)
256 struct mgs_obd *mgs = &obd->u.mgs;
261 if (mgs->mgs_sb == NULL)
264 ptlrpc_unregister_service(mgs->mgs_service);
266 mgs_cleanup_fsdb_list(obd);
268 lprocfs_obd_cleanup(obd);
269 mgs->mgs_proc_live = NULL;
273 server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
276 /* Free the namespace in it's own thread, so that if the
277 ldlm_cancel_handler put the last mgs obd ref, we won't
279 cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace,
280 CLONE_VM | CLONE_FILES);
283 fsfilt_put_ops(obd->obd_fsops);
285 LCONSOLE_INFO("%s has stopped.\n", obd->obd_name);
289 /* similar to filter_prepare_destroy */
290 static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
291 struct lustre_handle *lockh)
293 struct ldlm_res_id res_id;
297 rc = mgc_logname2resid(fsname, &res_id);
299 rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
300 LDLM_PLAIN, NULL, LCK_EX,
301 &flags, ldlm_blocking_ast,
302 ldlm_completion_ast, NULL,
303 fsname, 0, NULL, lockh);
305 CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
310 static int mgs_put_cfg_lock(struct lustre_handle *lockh)
313 ldlm_lock_decref(lockh, LCK_EX);
320 static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti)
325 rc = mgs_check_index(obd, mti);
327 LCONSOLE_ERROR_MSG(0x13b, "%s claims to have registered, but "
328 "this MGS does not know about it. Assuming"
329 " writeconf.\n", mti->mti_svname);
330 mti->mti_flags |= LDD_F_WRITECONF;
332 } else if (rc == -1) {
333 LCONSOLE_ERROR_MSG(0x13c, "Client log %s-client has "
334 "disappeared! Regenerating all logs.\n",
336 mti->mti_flags |= LDD_F_WRITECONF;
339 /* Index is correctly marked as used */
341 /* If the logs don't contain the mti_nids then add
342 them as failover nids */
343 rc = mgs_check_failnid(obd, mti);
349 /* Called whenever a target starts up. Flags indicate first connect, etc. */
350 static int mgs_handle_target_reg(struct ptlrpc_request *req)
352 struct obd_device *obd = req->rq_export->exp_obd;
353 struct lustre_handle lockh;
354 struct mgs_target_info *mti, *rep_mti;
355 int rep_size[] = { sizeof(struct ptlrpc_body), sizeof(*mti) };
359 mti = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*mti),
360 lustre_swab_mgs_target_info);
362 if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
364 /* We're just here as a startup ping. */
365 CDEBUG(D_MGS, "Server %s is running on %s\n",
366 mti->mti_svname, obd_export_nid2str(req->rq_export));
367 rc = mgs_check_target(obd, mti);
368 /* above will set appropriate mti flags */
370 /* Nothing wrong, or fatal error */
371 GOTO(out_nolock, rc);
374 /* Revoke the config lock to make sure nobody is reading. */
375 /* Although actually I think it should be alright if
376 someone was reading while we were updating the logs - if we
377 revoke at the end they will just update from where they left off. */
378 lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
379 if (lockrc != ELDLM_OK) {
380 LCONSOLE_ERROR_MSG(0x13d, "%s: Can't signal other nodes to "
381 "update their configuration (%d). Updating "
382 "local logs anyhow; you might have to "
383 "manually restart other nodes to get the "
384 "latest configuration.\n",
385 obd->obd_name, lockrc);
388 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10);
390 /* Log writing contention is handled by the fsdb_sem */
392 if (mti->mti_flags & LDD_F_WRITECONF) {
393 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
394 rc = mgs_erase_logs(obd, mti->mti_fsname);
395 LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
396 "request. All servers must be restarted "
397 "in order to regenerate the logs."
398 "\n", obd->obd_name, mti->mti_fsname);
399 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
400 rc = mgs_erase_log(obd, mti->mti_svname);
401 LCONSOLE_WARN("%s: Regenerating %s log by user "
403 obd->obd_name, mti->mti_svname);
405 mti->mti_flags |= LDD_F_UPDATE;
406 /* Erased logs means start from scratch. */
407 mti->mti_flags &= ~LDD_F_UPGRADE14;
411 if (mti->mti_flags & LDD_F_UPGRADE14) {
412 rc = mgs_upgrade_sv_14(obd, mti);
414 CERROR("Can't upgrade from 1.4 (%d)\n", rc);
418 /* We're good to go */
419 mti->mti_flags |= LDD_F_UPDATE;
423 if (mti->mti_flags & LDD_F_UPDATE) {
424 CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
425 mti->mti_stripe_index);
427 /* create or update the target log
428 and update the client/mdt logs */
429 rc = mgs_write_log_target(obd, mti);
431 CERROR("Failed to write %s log (%d)\n",
432 mti->mti_svname, rc);
436 mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
437 LDD_F_NEED_INDEX | LDD_F_WRITECONF |
439 mti->mti_flags |= LDD_F_REWRITE_LDD;
443 /* done with log update */
444 if (lockrc == ELDLM_OK)
445 mgs_put_cfg_lock(&lockh);
447 CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
448 mti->mti_stripe_index, rc);
449 lustre_pack_reply(req, 2, rep_size, NULL);
450 /* send back the whole mti in the reply */
451 rep_mti = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
453 memcpy(rep_mti, mti, sizeof(*rep_mti));
455 /* Flush logs to disk */
456 fsfilt_sync(obd, obd->u.mgs.mgs_sb);
460 int mgs_handle(struct ptlrpc_request *req)
462 int fail = OBD_FAIL_MGS_ALL_REPLY_NET;
466 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_REQUEST_NET, 2);
468 LASSERT(current->journal_info == NULL);
469 opc = lustre_msg_get_opc(req->rq_reqmsg);
470 if (opc != MGS_CONNECT) {
471 if (req->rq_export == NULL) {
472 CERROR("lustre_mgs: operation %d on unconnected MGS\n",
474 req->rq_status = -ENOTCONN;
475 GOTO(out, rc = -ENOTCONN);
481 DEBUG_REQ(D_MGS, req, "connect");
482 rc = target_handle_connect(req);
483 if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1))
484 /* Make clients trying to reconnect after a MGS restart
485 happy; also requires obd_replayable */
486 lustre_msg_add_op_flags(req->rq_repmsg,
487 MSG_CONNECT_RECONNECT);
490 DEBUG_REQ(D_MGS, req, "disconnect");
491 rc = target_handle_disconnect(req);
492 req->rq_status = rc; /* superfluous? */
495 DEBUG_REQ(D_MGS, req, "target add");
496 rc = mgs_handle_target_reg(req);
499 DEBUG_REQ(D_MGS, req, "target del");
500 //rc = mgs_handle_target_del(req);
504 DEBUG_REQ(D_MGS, req, "enqueue");
505 rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
506 ldlm_server_blocking_ast, NULL);
508 case LDLM_BL_CALLBACK:
509 case LDLM_CP_CALLBACK:
510 DEBUG_REQ(D_MGS, req, "callback");
511 CERROR("callbacks should not happen on MGS\n");
516 DEBUG_REQ(D_INFO, req, "ping");
517 rc = target_handle_ping(req);
520 DEBUG_REQ(D_MGS, req, "log cancel");
521 rc = -ENOTSUPP; /* la la la */
524 case LLOG_ORIGIN_HANDLE_CREATE:
525 DEBUG_REQ(D_MGS, req, "llog_init");
526 rc = llog_origin_handle_create(req);
528 case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
529 DEBUG_REQ(D_MGS, req, "llog next block");
530 rc = llog_origin_handle_next_block(req);
532 case LLOG_ORIGIN_HANDLE_READ_HEADER:
533 DEBUG_REQ(D_MGS, req, "llog read header");
534 rc = llog_origin_handle_read_header(req);
536 case LLOG_ORIGIN_HANDLE_CLOSE:
537 DEBUG_REQ(D_MGS, req, "llog close");
538 rc = llog_origin_handle_close(req);
541 DEBUG_REQ(D_MGS, req, "llog catinfo");
542 rc = llog_catinfo(req);
545 req->rq_status = -ENOTSUPP;
546 rc = ptlrpc_error(req);
550 LASSERT(current->journal_info == NULL);
553 CERROR("MGS handle cmd=%d rc=%d\n", opc, rc);
556 target_send_reply(req, rc, fail);
560 static inline int mgs_destroy_export(struct obd_export *exp)
564 target_destroy_export(exp);
569 /* from mdt_iocontrol */
570 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
571 void *karg, void *uarg)
573 struct obd_device *obd = exp->exp_obd;
574 struct obd_ioctl_data *data = karg;
575 struct lvfs_run_ctxt saved;
579 CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
583 case OBD_IOC_PARAM: {
584 struct lustre_handle lockh;
585 struct lustre_cfg *lcfg;
586 struct llog_rec_hdr rec;
587 char fsname[MTI_NAME_MAXLEN];
590 rec.lrh_len = llog_data_len(data->ioc_plen1);
592 if (data->ioc_type == LUSTRE_CFG_TYPE) {
593 rec.lrh_type = OBD_CFG_REC;
595 CERROR("unknown cfg record type:%d \n", data->ioc_type);
599 OBD_ALLOC(lcfg, data->ioc_plen1);
602 rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
606 if (lcfg->lcfg_bufcount < 1)
607 GOTO(out_free, rc = -EINVAL);
609 rc = mgs_setparam(obd, lcfg, fsname);
611 CERROR("setparam err %d\n", rc);
615 /* Revoke lock so everyone updates. Should be alright if
616 someone was already reading while we were updating the logs,
617 so we don't really need to hold the lock while we're
620 lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
621 if (lockrc != ELDLM_OK)
622 CERROR("lock error %d for fs %s\n", lockrc,
625 mgs_put_cfg_lock(&lockh);
629 OBD_FREE(lcfg, data->ioc_plen1);
633 case OBD_IOC_DUMP_LOG: {
634 struct llog_ctxt *ctxt =
635 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
636 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
637 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
638 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
645 case OBD_IOC_LLOG_CHECK:
646 case OBD_IOC_LLOG_INFO:
647 case OBD_IOC_LLOG_PRINT: {
648 struct llog_ctxt *ctxt =
649 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
651 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
652 rc = llog_ioctl(ctxt, cmd, data);
653 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
659 CDEBUG(D_INFO, "unknown command %x\n", cmd);
665 /* use obd ops to offer management infrastructure */
666 static struct obd_ops mgs_obd_ops = {
667 .o_owner = THIS_MODULE,
668 .o_connect = mgs_connect,
669 .o_disconnect = mgs_disconnect,
670 .o_setup = mgs_setup,
671 .o_precleanup = mgs_precleanup,
672 .o_cleanup = mgs_cleanup,
673 .o_destroy_export = mgs_destroy_export,
674 .o_iocontrol = mgs_iocontrol,
677 static int __init mgs_init(void)
679 struct lprocfs_static_vars lvars;
681 lprocfs_init_vars(mgs, &lvars);
682 class_register_type(&mgs_obd_ops, NULL,
683 lvars.module_vars, LUSTRE_MGS_NAME, NULL);
688 static void /*__exit*/ mgs_exit(void)
690 class_unregister_type(LUSTRE_MGS_NAME);
693 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
694 MODULE_DESCRIPTION("Lustre Management Server (MGS)");
695 MODULE_LICENSE("GPL");
697 module_init(mgs_init);
698 module_exit(mgs_exit);