Whamcloud - gitweb
Land b1_8_gate onto b1_8 (20081218_1708)
[fs/lustre-release.git] / lustre / obdclass / obd_mount.c
index aa97caa..d240cba 100644 (file)
@@ -1,26 +1,43 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lustre/obdclass/obd_mount.c
- *  Client/server mount routines
+ * GPL HEADER START
  *
- *  Copyright (c) 2006 Cluster File Systems, Inc.
- *   Author: Nathan Rutman <nathan@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   This file is part of Lustre, http://www.lustre.org/
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/obdclass/obd_mount.c
+ *
+ * Client/server mount routines
+ *
+ * Author: Nathan Rutman <nathan@clusterfs.com>
  */
 
 
@@ -150,7 +167,7 @@ struct lustre_mount_info *server_get_mount(char *name)
         lsi = s2lsi(lmi->lmi_sb);
         mntget(lmi->lmi_mnt);
         atomic_inc(&lsi->lsi_mounts);
-
+        
         CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
                lmi->lmi_mnt, name, atomic_read(&lsi->lsi_mounts),
                atomic_read(&lmi->lmi_mnt->mnt_count));
@@ -181,7 +198,7 @@ int server_put_mount(char *name, struct vfsmount *mnt)
 
         /* This might be the last one, can't deref after this */
         unlock_mntput(mnt);
-        
+
         down(&lustre_mount_info_lock);
         lmi = server_find_mount(name);
         up(&lustre_mount_info_lock);
@@ -377,7 +394,7 @@ int lustre_process_log(struct super_block *sb, char *logname,
                                    "communication errors between this node and "
                                    "the MGS, a bad configuration, or other "
                                    "errors. See the syslog for more "
-                                   "information.\n", mgc->obd_name, logname, 
+                                   "information.\n", mgc->obd_name, logname,
                                    rc);
 
         /* class_obd_list(); */
@@ -481,16 +498,18 @@ static int server_start_mgs(struct super_block *sb)
 
         rc = server_register_mount(LUSTRE_MGS_OBDNAME, sb, mnt);
 
-        if (!rc &&
-            ((rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
-                                       LUSTRE_MGS_OBDNAME, 0, 0))))
-                server_deregister_mount(LUSTRE_MGS_OBDNAME);
+        if (!rc) {
+                rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
+                                         LUSTRE_MGS_OBDNAME, 0, 0);
+                /* Do NOT call server_deregister_mount() here. This leads to
+                 * inability cleanup cleanly and free lsi and other stuff when
+                 * mgs calls server_put_mount() in error handling case. -umka */
+        }
 
         if (rc)
                 LCONSOLE_ERROR_MSG(0x15e, "Failed to start MGS '%s' (%d).  Is "
-                                   "the 'mgs' module loaded?\n", 
+                                   "the 'mgs' module loaded?\n",
                                    LUSTRE_MGS_OBDNAME, rc);
-
         RETURN(rc);
 }
 
@@ -517,7 +536,12 @@ static int server_stop_mgs(struct super_block *sb)
 
 DECLARE_MUTEX(mgc_start_lock);
 
-/* Set up a mgcobd to process startup logs */
+/** Set up a mgc obd to process startup logs
+ *
+ * \param sb [in] super block of the mgc obd
+ *
+ * \retval 0 success, otherwise error code
+ */
 static int lustre_start_mgc(struct super_block *sb)
 {
         struct lustre_handle mgc_conn = {0, };
@@ -574,7 +598,7 @@ static int lustre_start_mgc(struct super_block *sb)
         mutex_down(&mgc_start_lock);
 
         obd = class_name2obd(mgcname);
-        if (obd) {
+        if (obd && !obd->obd_stopping) {
                 /* Re-using an existing MGC */
                 atomic_inc(&obd->u.cli.cl_mgc_refcount);
 
@@ -704,7 +728,8 @@ static int lustre_start_mgc(struct super_block *sb)
         OBD_ALLOC_PTR(data);
         if (data == NULL)
                 GOTO(out, rc = -ENOMEM);
-        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT;
+        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
+                                  OBD_CONNECT_FID;
         data->ocd_version = LUSTRE_VERSION_CODE;
         /* We connect to the MGS at setup, and don't disconnect until cleanup */
         rc = obd_connect(&mgc_conn, obd, &(obd->obd_uuid), data, NULL);
@@ -755,7 +780,7 @@ static int lustre_stop_mgc(struct super_block *sb)
                 GOTO(out, rc = -EBUSY);
         }
 
-        /* The MGC has no recoverable data in any case. 
+        /* The MGC has no recoverable data in any case.
          * force shotdown set in umount_begin */
         obd->obd_no_recov = 1;
 
@@ -1069,7 +1094,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
         if (rc == -EINVAL) {
                 LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
                                    "server (%s) to start.  Please see messages "
-                                   "on the MGS node.\n", 
+                                   "on the MGS node.\n",
                                    lsi->lsi_ldd->ldd_svname);
                 GOTO(out_mgc, rc);
         }
@@ -1089,6 +1114,9 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
         if (rc) {
                 CERROR("failed to start server %s: %d\n",
                        lsi->lsi_ldd->ldd_svname, rc);
+                /* Do NOT call server_deregister_mount() here. This makes it
+                 * impossible to find mount later in cleanup time and leaves
+                 * @lsi and othder stuff leaked. -umka */
                 GOTO(out_mgc, rc);
         }
 
@@ -1148,10 +1176,8 @@ static int lustre_free_lsi(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         ENTRY;
 
-        if (!lsi)
-                RETURN(0);
-
-        CDEBUG(D_MOUNT, "Freeing lsi\n");
+        LASSERT(lsi != NULL);
+        CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
 
         /* someone didn't call server_put_mount. */
         LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
@@ -1191,10 +1217,9 @@ static int lustre_put_lsi(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         ENTRY;
 
-        LASSERT(lsi);
+        LASSERT(lsi != NULL);
 
         CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
-
         if (atomic_dec_and_test(&lsi->lsi_mounts)) {
                 lustre_free_lsi(sb);
                 RETURN(1);
@@ -1226,14 +1251,28 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
            Note ext3/ldiskfs can't be mounted ro. */
         s_flags = sb->s_flags;
 
+        /* allocate memory for options */
+        OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
+        if (!__page)
+                GOTO(out_free, rc = -ENOMEM);
+        page = (unsigned long)cfs_page_address(__page);
+        options = (char *)page;
+        memset(options, 0, CFS_PAGE_SIZE);
+
+        /* mount-line options must be added for pre-mount because it may
+         * contain mount options such as journal_dev which are required
+         * to mount successfuly the underlying filesystem */
+        if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0))
+                strncat(options, lmd->lmd_opts, CFS_PAGE_SIZE - 1);
+
         /* Pre-mount ldiskfs to read the MOUNT_DATA_FILE */
         CDEBUG(D_MOUNT, "Pre-mount ldiskfs %s\n", lmd->lmd_dev);
-        mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, 0);
+        mnt = ll_kern_mount("ldiskfs", s_flags, lmd->lmd_dev, (void *)options);
         if (IS_ERR(mnt)) {
                 rc = PTR_ERR(mnt);
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
                 /* 2.6 kernels: if ldiskfs fails, try ldiskfs2 */
-                mnt = ll_kern_mount("ldiskfs2", s_flags, lmd->lmd_dev, 0);
+                mnt = ll_kern_mount("ldiskfs2", s_flags, lmd->lmd_dev,
+                                    (void *)options);
                 if (IS_ERR(mnt)) {
                         int rc2 = PTR_ERR(mnt);
                         CERROR("premount %s:%#lx ldiskfs failed: %d, ldiskfs2 "
@@ -1241,15 +1280,6 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
                                lmd->lmd_dev, s_flags, rc, rc2);
                         GOTO(out_free, rc);
                 }
-#else
-                /* 2.4 kernels: if ldiskfs fails, try ext3 */
-                mnt = ll_kern_mount("ext3", s_flags, lmd->lmd_dev, 0);
-                if (IS_ERR(mnt)) {
-                        rc = PTR_ERR(mnt);
-                        CERROR("premount ext3 failed: rc = %d\n", rc);
-                        GOTO(out_free, rc);
-                }
-#endif
         }
 
         OBD_SET_CTXT_MAGIC(&mount_ctxt);
@@ -1268,12 +1298,6 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
         /* Done with our pre-mount, now do the real mount. */
 
         /* Glom up mount options */
-        OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
-        if (!__page)
-                GOTO(out_free, rc = -ENOMEM);
-        page = (unsigned long)cfs_page_address(__page);
-
-        options = (char *)page;
         memset(options, 0, CFS_PAGE_SIZE);
         strncpy(options, ldd->ldd_mount_opts, CFS_PAGE_SIZE - 2);
 
@@ -1293,18 +1317,24 @@ static struct vfsmount *server_kernel_mount(struct super_block *sb)
                MT_STR(ldd), lmd->lmd_dev, options);
         mnt = ll_kern_mount(MT_STR(ldd), s_flags, lmd->lmd_dev,
                             (void *)options);
-        OBD_PAGE_FREE(__page);
         if (IS_ERR(mnt)) {
                 rc = PTR_ERR(mnt);
                 CERROR("ll_kern_mount failed: rc = %d\n", rc);
                 GOTO(out_free, rc);
         }
 
+        if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
+                simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
+                                LR_CLIENT_START);
+
+        OBD_PAGE_FREE(__page);
         lsi->lsi_ldd = ldd;   /* freed at lsi cleanup */
         CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
         RETURN(mnt);
 
 out_free:
+        if (__page)
+                OBD_PAGE_FREE(__page);
         OBD_FREE(ldd, sizeof(*ldd));
         lsi->lsi_ldd = NULL;
         RETURN(ERR_PTR(rc));
@@ -1322,10 +1352,9 @@ static void server_wait_finished(struct vfsmount *mnt)
                 LCONSOLE_WARN("Mount still busy with %d refs, waiting for "
                               "%d secs...\n",
                               atomic_read(&mnt->mnt_count), retries);
-
                 /* Wait for a bit */
                 retries -= 5;
-                lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT(cfs_time_seconds(5), NULL, NULL);
                 l_wait_event(waitq, 0, &lwi);
         }
         if (atomic_read(&mnt->mnt_count) > 1) {
@@ -1354,7 +1383,7 @@ static void server_put_super(struct super_block *sb)
         CDEBUG(D_MOUNT, "server put_super %s\n", tmpname);
 
         /* Stop the target */
-        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) && 
+        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
             (IS_MDT(lsi->lsi_ldd) || IS_OST(lsi->lsi_ldd))) {
                 struct lustre_profile *lprof = NULL;
 
@@ -1560,9 +1589,9 @@ static int server_fill_super(struct super_block *sb)
         if (IS_ERR(mnt)) {
                 rc = PTR_ERR(mnt);
                 CERROR("Unable to mount device %s: %d\n",
-                      lsi->lsi_lmd->lmd_dev, rc);
+                       lsi->lsi_lmd->lmd_dev, rc);
                 lustre_put_lsi(sb);
-                GOTO(out, rc);
+                RETURN(rc);
         }
         lsi->lsi_srv_mnt = mnt;
 
@@ -1574,14 +1603,14 @@ static int server_fill_super(struct super_block *sb)
         if (class_name2obd(lsi->lsi_ldd->ldd_svname)) {
                 LCONSOLE_ERROR_MSG(0x161, "The target named %s is already "
                                    "running. Double-mount may have compromised "
-                                   "the disk journal.\n", 
+                                   "the disk journal.\n",
                                    lsi->lsi_ldd->ldd_svname);
-                unlock_mntput(mnt);
                 lustre_put_lsi(sb);
-                GOTO(out, rc = -EALREADY);
+                unlock_mntput(mnt);
+                RETURN(-EALREADY);
         }
 
-        /* start MGS before MGC */
+        /* Start MGS before MGC */
         if (IS_MGS(lsi->lsi_ldd) && !(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOMGS)) {
                 rc = server_start_mgs(sb);
                 if (rc)
@@ -1619,11 +1648,12 @@ static int server_fill_super(struct super_block *sb)
                       lsi->lsi_ldd->ldd_svname, lsi->lsi_lmd->lmd_dev);
 
         RETURN(0);
-
 out_mnt:
+        /* We jump here in case of failure while starting targets or MGS.
+         * In this case we can't just put @mnt and have to do real cleanup
+         * with stoping targets, etc. */
         server_put_super(sb);
-out:
-        RETURN(rc);
+        return rc;
 }
 
 /* Get the index from the obd name.
@@ -1838,8 +1868,11 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                            must be the last one. */
                         *s1 = '\0';
                         break;
+                } else if (strncmp(s1, "loop=", 5) == 0) {
+                        clear++;
                 }
 
+
                 /* Find next opt */
                 s2 = strchr(s1, ',');
                 if (s2 == NULL) {
@@ -1860,8 +1893,9 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                 goto invalid;
         }
 
-        s1 = strrchr(devname, ':');
+        s1 = strstr(devname, ":/");
         if (s1) {
+                ++s1;
                 lmd->lmd_flags = LMD_FLG_CLIENT;
                 /* Remove leading /s from fsname */
                 while (*++s1 == '/') ;
@@ -1919,7 +1953,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
         /* Figure out the lmd from the mount options */
         if (lmd_parse((char *)data, lmd)) {
                 lustre_put_lsi(sb);
-                RETURN(-EINVAL);
+                GOTO(out, rc = -EINVAL);
         }
 
         if (lmd_is_client(lmd)) {
@@ -1928,18 +1962,19 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                         LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
                                            "client mount! Is the 'lustre' "
                                            "module loaded?\n");
+                        lustre_put_lsi(sb);
                         rc = -ENODEV;
                 } else {
                         rc = lustre_start_mgc(sb);
                         if (rc) {
                                 lustre_stop_mgc(sb);
-                                goto out;
+                                lustre_put_lsi(sb);
+                                GOTO(out, rc);
                         }
                         /* Connect and start */
                         /* (should always be ll_fill_super) */
                         rc = (*client_fill_super)(sb);
                         /* c_f_s will call lustre_common_put_super on failure */
-
                 }
         } else {
                 CDEBUG(D_MOUNT, "Mounting server from %s\n", lmd->lmd_dev);
@@ -1951,14 +1986,18 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent)
                 /* s_f_s will call server_put_super on failure */
         }
 
+        /* If error happens in fill_super() call, @lsi will be killed there.
+         * This is why we do not put it here. */
+        GOTO(out, rc);
 out:
-        if (rc){
+        if (rc) {
                 CERROR("Unable to mount %s (%d)\n",
                        s2lsi(sb) ? lmd->lmd_dev : "", rc);
         } else {
-                CDEBUG(D_SUPER, "mount %s complete\n", lmd->lmd_dev);
+                CDEBUG(D_SUPER, "Mount %s complete\n", 
+                       lmd->lmd_dev);
         }
-        RETURN(rc);
+        return rc;
 }
 
 
@@ -1976,7 +2015,6 @@ void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
 
 /***************** FS registration ******************/
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 /* 2.5 and later */
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
 struct super_block * lustre_get_sb(struct file_system_type *fs_type,
@@ -2018,28 +2056,6 @@ struct file_system_type lustre_fs_type = {
                         LL_RENAME_DOES_D_MOVE,
 };
 
-#else
-/* 2.4 */
-static struct super_block *lustre_read_super(struct super_block *sb,
-                                             void *data, int silent)
-{
-        int rc;
-        ENTRY;
-
-        rc = lustre_fill_super(sb, data, silent);
-        if (rc)
-                RETURN(NULL);
-        RETURN(sb);
-}
-
-static struct file_system_type lustre_fs_type = {
-        .owner          = THIS_MODULE,
-        .name           = "lustre",
-        .fs_flags       = FS_NFSEXP_FSID,
-        .read_super     = lustre_read_super,
-};
-#endif
-
 int lustre_register_fs(void)
 {
         return register_filesystem(&lustre_fs_type);
@@ -2061,5 +2077,3 @@ EXPORT_SYMBOL(server_register_target);
 EXPORT_SYMBOL(server_name2index);
 EXPORT_SYMBOL(server_mti_print);
 EXPORT_SYMBOL(do_lcfg);
-
-