1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * lustre/mgs/mgs_handler.c
5 * Lustre Management Server (mgs) request handler
7 * Copyright (C) 2006 Cluster File Systems, Inc.
8 * Author: Nathan Rutman <nathan@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
29 #define DEBUG_SUBSYSTEM S_MGS
30 #define D_MGS D_CONFIG/*|D_WARNING*/
33 # include <linux/module.h>
34 # include <linux/pagemap.h>
35 # include <linux/miscdevice.h>
36 # include <linux/init.h>
38 # include <liblustre.h>
41 #include <obd_class.h>
42 #include <lustre_dlm.h>
43 #include <lprocfs_status.h>
44 #include <lustre_fsfilt.h>
45 #include <lustre_commit_confd.h>
46 #include <lustre_disk.h>
47 #include "mgs_internal.h"
50 /* Establish a connection to the MGS.*/
51 static int mgs_connect(struct lustre_handle *conn, struct obd_device *obd,
52 struct obd_uuid *cluuid, struct obd_connect_data *data)
54 struct obd_export *exp;
58 if (!conn || !obd || !cluuid)
61 rc = class_connect(conn, obd, cluuid);
64 exp = class_conn2export(conn);
68 data->ocd_connect_flags &= MGS_CONNECT_SUPPORTED;
69 exp->exp_connect_flags = data->ocd_connect_flags;
70 data->ocd_version = LUSTRE_VERSION_CODE;
74 class_disconnect(exp);
76 class_export_put(exp);
82 static int mgs_disconnect(struct obd_export *exp)
88 class_export_get(exp);
90 /* Disconnect early so that clients can't keep using export */
91 rc = class_disconnect(exp);
92 ldlm_cancel_locks_for_export(exp);
94 /* complete all outstanding replies */
95 spin_lock(&exp->exp_lock);
96 while (!list_empty(&exp->exp_outstanding_replies)) {
97 struct ptlrpc_reply_state *rs =
98 list_entry(exp->exp_outstanding_replies.next,
99 struct ptlrpc_reply_state, rs_exp_list);
100 struct ptlrpc_service *svc = rs->rs_service;
102 spin_lock(&svc->srv_lock);
103 list_del_init(&rs->rs_exp_list);
104 ptlrpc_schedule_difficult_reply(rs);
105 spin_unlock(&svc->srv_lock);
107 spin_unlock(&exp->exp_lock);
109 class_export_put(exp);
113 static int mgs_cleanup(struct obd_device *obd);
114 static int mgs_handle(struct ptlrpc_request *req);
116 /* Start the MGS obd */
117 static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
119 struct lprocfs_static_vars lvars;
120 struct mgs_obd *mgs = &obd->u.mgs;
121 struct lustre_mount_info *lmi;
122 struct lustre_sb_info *lsi;
123 struct vfsmount *mnt;
127 CDEBUG(D_CONFIG, "Starting MGS\n");
130 lmi = server_get_mount(obd->obd_name);
132 RETURN(rc = -EINVAL);
135 lsi = s2lsi(lmi->lmi_sb);
136 obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
137 if (IS_ERR(obd->obd_fsops))
138 GOTO(err_put, rc = PTR_ERR(obd->obd_fsops));
140 /* namespace for mgs llog */
141 obd->obd_namespace = ldlm_namespace_new("MGS", LDLM_NAMESPACE_SERVER);
142 if (obd->obd_namespace == NULL) {
144 GOTO(err_ops, rc = -ENOMEM);
148 ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
149 "mgs_ldlm_client", &obd->obd_ldlm_client);
151 LASSERT(!lvfs_check_rdonly(lvfs_sbdev(mnt->mnt_sb)));
153 rc = mgs_fs_setup(obd, mnt);
155 CERROR("%s: MGS filesystem method init failed: rc = %d\n",
160 rc = llog_start_commit_thread();
164 rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL,
169 /* No recovery for MGC's */
170 obd->obd_replayable = 0;
172 /* Internal mgs setup */
173 mgs_init_fsdb_list(obd);
174 sema_init(&mgs->mgs_sem, 1);
176 /* Start the service threads */
178 ptlrpc_init_svc(MGS_NBUFS, MGS_BUFSIZE, MGS_MAXREQSIZE,
179 MGS_MAXREPSIZE, MGS_REQUEST_PORTAL,
180 MGC_REPLY_PORTAL, MGS_SERVICE_WATCHDOG_TIMEOUT,
181 mgs_handle, LUSTRE_MGS_NAME,
182 obd->obd_proc_entry, NULL,
183 MGS_THREADS_AUTO_MIN, MGS_THREADS_AUTO_MAX,
186 if (!mgs->mgs_service) {
187 CERROR("failed to start service\n");
188 GOTO(err_fs, rc = -ENOMEM);
191 rc = ptlrpc_start_threads(obd, mgs->mgs_service);
193 GOTO(err_thread, rc);
196 lprocfs_init_vars(mgs, &lvars);
197 if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) {
198 lproc_mgs_setup(obd);
201 ping_evictor_start();
203 LCONSOLE_INFO("MGS %s started\n", obd->obd_name);
208 ptlrpc_unregister_service(mgs->mgs_service);
210 /* No extra cleanup needed for llog_init_commit_thread() */
213 ldlm_namespace_free(obd->obd_namespace, 0);
214 obd->obd_namespace = NULL;
216 fsfilt_put_ops(obd->obd_fsops);
218 server_put_mount(obd->obd_name, mnt);
223 static int mgs_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
229 case OBD_CLEANUP_EARLY:
230 case OBD_CLEANUP_EXPORTS:
232 case OBD_CLEANUP_SELF_EXP:
233 llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT));
234 rc = obd_llog_finish(obd, 0);
236 case OBD_CLEANUP_OBD:
242 static int mgs_ldlm_nsfree(void *data)
244 struct ldlm_namespace *ns = (struct ldlm_namespace *)data;
248 ptlrpc_daemonize("ll_mgs_nsfree");
249 rc = ldlm_namespace_free(ns, 1 /* obd_force should always be on */);
253 static int mgs_cleanup(struct obd_device *obd)
255 struct mgs_obd *mgs = &obd->u.mgs;
260 if (mgs->mgs_sb == NULL)
263 ptlrpc_unregister_service(mgs->mgs_service);
265 mgs_cleanup_fsdb_list(obd);
267 lprocfs_obd_cleanup(obd);
268 mgs->mgs_proc_live = NULL;
272 server_put_mount(obd->obd_name, mgs->mgs_vfsmnt);
275 /* Free the namespace in it's own thread, so that if the
276 ldlm_cancel_handler put the last mgs obd ref, we won't
278 cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace,
279 CLONE_VM | CLONE_FILES);
281 fsfilt_put_ops(obd->obd_fsops);
283 LCONSOLE_INFO("%s has stopped.\n", obd->obd_name);
287 /* similar to filter_prepare_destroy */
288 static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
289 struct lustre_handle *lockh)
291 struct ldlm_res_id res_id;
295 rc = mgc_logname2resid(fsname, &res_id);
297 rc = ldlm_cli_enqueue_local(obd->obd_namespace, res_id,
298 LDLM_PLAIN, NULL, LCK_EX,
299 &flags, ldlm_blocking_ast,
300 ldlm_completion_ast, NULL,
301 fsname, 0, NULL, lockh);
303 CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
308 static int mgs_put_cfg_lock(struct lustre_handle *lockh)
311 ldlm_lock_decref(lockh, LCK_EX);
318 static int mgs_check_target(struct obd_device *obd, struct mgs_target_info *mti)
323 rc = mgs_check_index(obd, mti);
325 LCONSOLE_ERROR("%s claims to have registered, but this MGS "
326 "does not know about it. Assuming writeconf.\n",
328 mti->mti_flags |= LDD_F_WRITECONF;
330 } else if (rc == -1) {
331 LCONSOLE_ERROR("Client log %s-client has disappeared! "
332 "Regenerating all logs.\n",
334 mti->mti_flags |= LDD_F_WRITECONF;
337 /* Index is correctly marked as used */
339 /* If the logs don't contain the mti_nids then add
340 them as failover nids */
341 rc = mgs_check_failnid(obd, mti);
347 /* Called whenever a target starts up. Flags indicate first connect, etc. */
348 static int mgs_handle_target_reg(struct ptlrpc_request *req)
350 struct obd_device *obd = req->rq_export->exp_obd;
351 struct lustre_handle lockh;
352 struct mgs_target_info *mti, *rep_mti;
353 int rep_size[] = { sizeof(struct ptlrpc_body), sizeof(*mti) };
357 mti = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*mti),
358 lustre_swab_mgs_target_info);
360 if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
362 /* We're just here as a startup ping. */
363 CDEBUG(D_MGS, "Server %s is running on %s\n",
364 mti->mti_svname, obd_export_nid2str(req->rq_export));
365 rc = mgs_check_target(obd, mti);
366 /* above will set appropriate mti flags */
368 /* Nothing wrong, or fatal error */
369 GOTO(out_nolock, rc);
372 /* Revoke the config lock to make sure nobody is reading. */
373 /* Although actually I think it should be alright if
374 someone was reading while we were updating the logs - if we
375 revoke at the end they will just update from where they left off. */
376 lockrc = mgs_get_cfg_lock(obd, mti->mti_fsname, &lockh);
377 if (lockrc != ELDLM_OK) {
378 LCONSOLE_ERROR("%s: Can't signal other nodes to update "
379 "their configuration (%d). Updating local logs "
380 "anyhow; you might have to manually restart "
381 "other nodes to get the latest configuration.\n",
382 obd->obd_name, lockrc);
385 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_TARGET_REG, 10);
387 /* Log writing contention is handled by the fsdb_sem */
389 if (mti->mti_flags & LDD_F_WRITECONF) {
390 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
391 rc = mgs_erase_logs(obd, mti->mti_fsname);
392 LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
393 "request. All servers must be restarted "
394 "in order to regenerate the logs."
395 "\n", obd->obd_name, mti->mti_fsname);
396 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
397 rc = mgs_erase_log(obd, mti->mti_svname);
398 LCONSOLE_WARN("%s: Regenerating %s log by user "
400 obd->obd_name, mti->mti_svname);
402 mti->mti_flags |= LDD_F_UPDATE;
403 /* Erased logs means start from scratch. */
404 mti->mti_flags &= ~LDD_F_UPGRADE14;
408 if (mti->mti_flags & LDD_F_UPGRADE14) {
409 rc = mgs_upgrade_sv_14(obd, mti);
411 CERROR("Can't upgrade from 1.4 (%d)\n", rc);
415 /* We're good to go */
416 mti->mti_flags |= LDD_F_UPDATE;
420 if (mti->mti_flags & LDD_F_UPDATE) {
421 CDEBUG(D_MGS, "updating %s, index=%d\n", mti->mti_svname,
422 mti->mti_stripe_index);
424 /* create or update the target log
425 and update the client/mdt logs */
426 rc = mgs_write_log_target(obd, mti);
428 CERROR("Failed to write %s log (%d)\n",
429 mti->mti_svname, rc);
433 mti->mti_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE |
434 LDD_F_NEED_INDEX | LDD_F_WRITECONF);
435 mti->mti_flags |= LDD_F_REWRITE_LDD;
439 /* done with log update */
440 if (lockrc == ELDLM_OK)
441 mgs_put_cfg_lock(&lockh);
443 CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
444 mti->mti_stripe_index, rc);
445 lustre_pack_reply(req, 2, rep_size, NULL);
446 /* send back the whole mti in the reply */
447 rep_mti = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
449 memcpy(rep_mti, mti, sizeof(*rep_mti));
451 /* Flush logs to disk */
452 fsfilt_sync(obd, obd->u.mgs.mgs_sb);
456 int mgs_handle(struct ptlrpc_request *req)
458 int fail = OBD_FAIL_MGS_ALL_REPLY_NET;
462 OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_SLOW_REQUEST_NET, 2);
463 OBD_FAIL_RETURN(OBD_FAIL_MGS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0);
465 LASSERT(current->journal_info == NULL);
466 opc = lustre_msg_get_opc(req->rq_reqmsg);
467 if (opc != MGS_CONNECT) {
468 if (req->rq_export == NULL) {
469 CERROR("lustre_mgs: operation %d on unconnected MGS\n",
471 req->rq_status = -ENOTCONN;
472 GOTO(out, rc = -ENOTCONN);
478 DEBUG_REQ(D_MGS, req, "connect");
479 rc = target_handle_connect(req, mgs_handle);
480 if (!rc && (lustre_msg_get_conn_cnt(req->rq_reqmsg) > 1))
481 /* Make clients trying to reconnect after a MGS restart
482 happy; also requires obd_replayable */
483 lustre_msg_add_op_flags(req->rq_repmsg,
484 MSG_CONNECT_RECONNECT);
487 DEBUG_REQ(D_MGS, req, "disconnect");
488 rc = target_handle_disconnect(req);
489 req->rq_status = rc; /* superfluous? */
492 DEBUG_REQ(D_MGS, req, "target add\n");
493 rc = mgs_handle_target_reg(req);
496 DEBUG_REQ(D_MGS, req, "target del\n");
497 //rc = mgs_handle_target_del(req);
501 DEBUG_REQ(D_MGS, req, "enqueue");
502 rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
503 ldlm_server_blocking_ast, NULL);
505 case LDLM_BL_CALLBACK:
506 case LDLM_CP_CALLBACK:
507 DEBUG_REQ(D_MGS, req, "callback");
508 CERROR("callbacks should not happen on MGS\n");
513 DEBUG_REQ(D_INFO, req, "ping");
514 rc = target_handle_ping(req);
517 DEBUG_REQ(D_MGS, req, "log cancel\n");
518 rc = -ENOTSUPP; /* la la la */
521 case LLOG_ORIGIN_HANDLE_CREATE:
522 DEBUG_REQ(D_MGS, req, "llog_init");
523 rc = llog_origin_handle_create(req);
525 case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
526 DEBUG_REQ(D_MGS, req, "llog next block");
527 rc = llog_origin_handle_next_block(req);
529 case LLOG_ORIGIN_HANDLE_READ_HEADER:
530 DEBUG_REQ(D_MGS, req, "llog read header");
531 rc = llog_origin_handle_read_header(req);
533 case LLOG_ORIGIN_HANDLE_CLOSE:
534 DEBUG_REQ(D_MGS, req, "llog close");
535 rc = llog_origin_handle_close(req);
538 DEBUG_REQ(D_MGS, req, "llog catinfo");
539 rc = llog_catinfo(req);
542 req->rq_status = -ENOTSUPP;
543 rc = ptlrpc_error(req);
547 LASSERT(current->journal_info == NULL);
550 CERROR("MGS handle cmd=%d rc=%d\n", opc, rc);
553 target_send_reply(req, rc, fail);
557 static inline int mgs_destroy_export(struct obd_export *exp)
561 target_destroy_export(exp);
566 /* from mdt_iocontrol */
567 int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
568 void *karg, void *uarg)
570 struct obd_device *obd = exp->exp_obd;
571 struct obd_ioctl_data *data = karg;
572 struct lvfs_run_ctxt saved;
576 CDEBUG(D_IOCTL, "handling ioctl cmd %#x\n", cmd);
580 case OBD_IOC_PARAM: {
581 struct lustre_handle lockh;
582 struct lustre_cfg *lcfg;
583 struct llog_rec_hdr rec;
584 char fsname[MTI_NAME_MAXLEN];
587 rec.lrh_len = llog_data_len(data->ioc_plen1);
589 if (data->ioc_type == LUSTRE_CFG_TYPE) {
590 rec.lrh_type = OBD_CFG_REC;
592 CERROR("unknown cfg record type:%d \n", data->ioc_type);
596 OBD_ALLOC(lcfg, data->ioc_plen1);
599 rc = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
603 if (lcfg->lcfg_bufcount < 1)
604 GOTO(out_free, rc = -EINVAL);
606 rc = mgs_setparam(obd, lcfg, fsname);
608 CERROR("setparam err %d\n", rc);
612 /* Revoke lock so everyone updates. Should be alright if
613 someone was already reading while we were updating the logs,
614 so we don't really need to hold the lock while we're
617 lockrc = mgs_get_cfg_lock(obd, fsname, &lockh);
618 if (lockrc != ELDLM_OK)
619 CERROR("lock error %d for fs %s\n", lockrc,
622 mgs_put_cfg_lock(&lockh);
626 OBD_FREE(lcfg, data->ioc_plen1);
630 case OBD_IOC_DUMP_LOG: {
631 struct llog_ctxt *ctxt =
632 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
633 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
634 rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
635 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
642 case OBD_IOC_LLOG_CHECK:
643 case OBD_IOC_LLOG_INFO:
644 case OBD_IOC_LLOG_PRINT: {
645 struct llog_ctxt *ctxt =
646 llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
648 push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
649 rc = llog_ioctl(ctxt, cmd, data);
650 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
656 CDEBUG(D_INFO, "unknown command %x\n", cmd);
662 /* use obd ops to offer management infrastructure */
663 static struct obd_ops mgs_obd_ops = {
664 .o_owner = THIS_MODULE,
665 .o_connect = mgs_connect,
666 .o_disconnect = mgs_disconnect,
667 .o_setup = mgs_setup,
668 .o_precleanup = mgs_precleanup,
669 .o_cleanup = mgs_cleanup,
670 .o_destroy_export = mgs_destroy_export,
671 .o_iocontrol = mgs_iocontrol,
674 static int __init mgs_init(void)
676 struct lprocfs_static_vars lvars;
678 lprocfs_init_vars(mgs, &lvars);
679 class_register_type(&mgs_obd_ops, lvars.module_vars, LUSTRE_MGS_NAME);
684 static void /*__exit*/ mgs_exit(void)
686 class_unregister_type(LUSTRE_MGS_NAME);
689 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
690 MODULE_DESCRIPTION("Lustre Management Server (MGS)");
691 MODULE_LICENSE("GPL");
693 module_init(mgs_init);
694 module_exit(mgs_exit);