4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lustre/mds/handler.c
36 * Author: Peter Braam <braam@clusterfs.com>
37 * Author: Andreas Dilger <adilger@clusterfs.com>
38 * Author: Phil Schwan <phil@clusterfs.com>
39 * Author: Mike Shaver <shaver@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_MDS
44 #include <lustre_mds.h>
45 #include <linux/module.h>
46 #include <linux/init.h>
48 #include <linux/jbd.h>
49 #include <linux/buffer_head.h>
50 #include <linux/workqueue.h>
51 #include <linux/mount.h>
53 #include <lustre_acl.h>
54 #include <obd_class.h>
55 #include <lustre_dlm.h>
57 #include <lustre_fsfilt.h>
58 #include <lprocfs_status.h>
59 #include <lustre_disk.h>
60 #include <lustre_param.h>
61 #include <lustre_log.h>
63 #include "mds_internal.h"
65 __u32 mds_max_ost_index=0xFFFF;
66 CFS_MODULE_PARM(mds_max_ost_index, "i", int, 0444,
69 static int mds_lov_presetup (struct mds_obd *mds, struct lustre_cfg *lcfg)
74 if (lcfg->lcfg_bufcount >= 4 && LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) {
77 ll_generate_random_uuid(uuid);
78 class_uuid_unparse(uuid, &mds->mds_lov_uuid);
80 OBD_ALLOC(mds->mds_profile, LUSTRE_CFG_BUFLEN(lcfg, 3));
81 if (mds->mds_profile == NULL)
84 strncpy(mds->mds_profile, lustre_cfg_string(lcfg, 3),
85 LUSTRE_CFG_BUFLEN(lcfg, 3));
90 static int mds_lov_clean(struct obd_device *obd)
92 struct mds_obd *mds = &obd->u.mds;
93 struct obd_device *osc = mds->mds_lov_obd;
96 if (mds->mds_profile) {
97 class_del_profile(mds->mds_profile);
98 OBD_FREE(mds->mds_profile, strlen(mds->mds_profile) + 1);
99 mds->mds_profile = NULL;
102 /* There better be a lov */
106 RETURN(PTR_ERR(osc));
108 obd_register_observer(osc, NULL);
110 /* Give lov our same shutdown flags */
111 osc->obd_force = obd->obd_force;
112 osc->obd_fail = obd->obd_fail;
114 /* Cleanup the lov */
115 obd_disconnect(mds->mds_lov_exp);
116 class_manual_cleanup(osc);
121 static int mds_postsetup(struct obd_device *obd)
123 struct mds_obd *mds = &obd->u.mds;
124 struct llog_ctxt *ctxt;
128 rc = llog_setup(NULL, obd, &obd->obd_olg, LLOG_CONFIG_ORIG_CTXT, obd,
133 mds_changelog_llog_init(obd, obd);
135 if (mds->mds_profile) {
136 struct lustre_profile *lprof;
137 /* The profile defines which osc and mdc to connect to, for a
138 client. We reuse that here to figure out the name of the
139 lov to use (and ignore lprof->lp_md).
140 The profile was set in the config log with
141 LCFG_MOUNTOPT profilenm oscnm mdcnm */
142 lprof = class_get_profile(mds->mds_profile);
144 CERROR("No profile found: %s\n", mds->mds_profile);
145 GOTO(err_cleanup, rc = -ENOENT);
147 rc = mds_lov_connect(obd, lprof->lp_dt);
149 GOTO(err_cleanup, rc);
156 ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
158 llog_cleanup(NULL, ctxt);
162 int mds_postrecov(struct obd_device *obd)
170 LASSERT(!obd->obd_recovering);
171 /* clean PENDING dir */
173 if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME)))
174 rc = mds_cleanup_pending(obd);
178 /* FIXME Does target_finish_recovery really need this to block? */
179 /* Notify the LOV, which will in turn call mds_notify for each tgt */
180 /* This means that we have to hack obd_notify to think we're obd_set_up
181 during mds_lov_connect. */
182 obd_notify(obd->u.mds.mds_lov_obd, NULL,
183 obd->obd_async_recov ? OBD_NOTIFY_SYNC_NONBLOCK :
184 OBD_NOTIFY_SYNC, NULL);
189 /* We need to be able to stop an mds_lov_synchronize */
190 static int mds_lov_early_clean(struct obd_device *obd)
192 struct mds_obd *mds = &obd->u.mds;
193 struct obd_device *osc = mds->mds_lov_obd;
195 if (!osc || (!obd->obd_force && !obd->obd_fail))
198 CDEBUG(D_HA, "abort inflight\n");
199 return (obd_precleanup(osc, OBD_CLEANUP_EARLY));
202 static int mds_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
204 struct mds_obd *mds = &obd->u.mds;
205 struct llog_ctxt *ctxt;
210 case OBD_CLEANUP_EARLY:
212 case OBD_CLEANUP_EXPORTS:
213 mds_lov_early_clean(obd);
214 cfs_down_write(&mds->mds_notify_lock);
215 mds_lov_disconnect(obd);
217 ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT);
219 llog_cleanup(NULL, ctxt);
220 rc = obd_llog_finish(obd, 0);
221 mds->mds_lov_exp = NULL;
222 cfs_up_write(&mds->mds_notify_lock);
228 /* Look up an entry by inode number. */
229 /* this function ONLY returns valid dget'd dentries with an initialized inode
231 static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
234 struct fsfilt_fid fid;
235 struct obd_device *obd = (struct obd_device *)data;
238 RETURN(ERR_PTR(-ESTALE));
243 RETURN(fsfilt_fid2dentry(obd, obd->u.mds.mds_obt.obt_vfsmnt, &fid, 0));
247 struct lvfs_callback_ops mds_lvfs_ops = {
248 l_fid2dentry: mds_lvfs_fid2dentry,
251 static int mds_init_ctxt(struct obd_device *obd, struct vfsmount *mnt)
253 struct mds_obd *mds = &obd->u.mds;
256 mds->mds_obt.obt_vfsmnt = mnt;
257 /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */
258 obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb;
259 obd->u.obt.obt_magic = OBT_MAGIC;
260 rc = fsfilt_setup(obd, obd->u.obt.obt_sb);
264 OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
265 obd->obd_lvfs_ctxt.pwdmnt = mnt;
266 obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
267 obd->obd_lvfs_ctxt.fs = get_ds();
268 obd->obd_lvfs_ctxt.cb_ops = mds_lvfs_ops;
272 /*mds still need lov setup here*/
273 static int mds_cmd_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
275 struct mds_obd *mds = &obd->u.mds;
276 struct lvfs_run_ctxt saved;
278 struct vfsmount *mnt;
279 struct lustre_sb_info *lsi;
280 struct lustre_mount_info *lmi;
281 struct dentry *dentry;
285 CDEBUG(D_INFO, "obd %s setup \n", obd->obd_name);
286 if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME)))
289 if (lcfg->lcfg_bufcount < 5) {
290 CERROR("invalid arg for setup %s\n", MDD_OBD_NAME);
293 dev = lustre_cfg_string(lcfg, 4);
294 lmi = server_get_mount(dev);
295 LASSERT(lmi != NULL);
297 lsi = s2lsi(lmi->lmi_sb);
299 /* FIXME: MDD LOV initialize objects.
300 * we need only lmi here but not get mount
301 * OSD did mount already, so put mount back
303 cfs_atomic_dec(&lsi->lsi_mounts);
304 cfs_init_rwsem(&mds->mds_notify_lock);
306 obd->obd_fsops = fsfilt_get_ops(lsi->lsi_fstype);
307 rc = mds_init_ctxt(obd, mnt);
311 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
312 dentry = simple_mkdir(cfs_fs_pwd(current->fs), mnt, "OBJECTS", 0777, 1);
313 if (IS_ERR(dentry)) {
314 rc = PTR_ERR(dentry);
315 CERROR("cannot create OBJECTS directory: rc = %d\n", rc);
318 mds->mds_objects_dir = dentry;
320 rc = mds_lov_init_objids(obd);
322 CERROR("cannot init lov objid rc = %d\n", rc);
323 GOTO(err_objects, rc );
326 rc = mds_lov_presetup(mds, lcfg);
328 GOTO(err_objects, rc);
330 /* Don't wait for mds_postrecov trying to clear orphans */
331 obd->obd_async_recov = 1;
332 rc = mds_postsetup(obd);
333 /* Bug 11557 - allow async abort_recov start
334 FIXME can remove most of this obd_async_recov plumbing
335 obd->obd_async_recov = 0;
339 GOTO(err_objects, rc);
341 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
345 dput(mds->mds_objects_dir);
347 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
349 fsfilt_put_ops(obd->obd_fsops);
353 static int mds_cmd_cleanup(struct obd_device *obd)
355 struct mds_obd *mds = &obd->u.mds;
356 struct lvfs_run_ctxt saved;
360 mds->mds_lov_exp = NULL;
363 LCONSOLE_WARN("%s: shutting down for failover; client state "
364 "will be preserved.\n", obd->obd_name);
366 if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME)))
369 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
371 mds_lov_destroy_objids(obd);
373 if (mds->mds_objects_dir != NULL) {
374 l_dput(mds->mds_objects_dir);
375 mds->mds_objects_dir = NULL;
378 ll_vfs_dq_off(obd->u.obt.obt_sb, 0);
379 shrink_dcache_sb(mds->mds_obt.obt_sb);
380 fsfilt_put_ops(obd->obd_fsops);
382 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
387 static int mds_cmd_health_check(struct obd_device *obd)
392 static struct obd_ops mds_cmd_obd_ops = {
393 .o_owner = THIS_MODULE,
394 .o_setup = mds_cmd_setup,
395 .o_cleanup = mds_cmd_cleanup,
396 .o_precleanup = mds_precleanup,
397 .o_create = mds_obd_create,
398 .o_destroy = mds_obd_destroy,
399 .o_llog_init = mds_llog_init,
400 .o_llog_finish = mds_llog_finish,
401 .o_notify = mds_notify,
402 .o_postrecov = mds_postrecov,
403 // .o_health_check = mds_cmd_health_check,
406 static int __init mds_cmd_init(void)
408 struct lprocfs_static_vars lvars;
410 lprocfs_mds_init_vars(&lvars);
411 class_register_type(&mds_cmd_obd_ops, NULL, lvars.module_vars,
412 LUSTRE_MDS_NAME, NULL);
417 static void /*__exit*/ mds_cmd_exit(void)
419 class_unregister_type(LUSTRE_MDS_NAME);
422 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
423 MODULE_DESCRIPTION("Lustre Metadata Server (MDS)");
424 MODULE_LICENSE("GPL");
426 module_init(mds_cmd_init);
427 module_exit(mds_cmd_exit);