Whamcloud - gitweb
LU-1711 mount: obd_mount to start osd
authorAlex Zhuravlev <bzzz@whamcloud.com>
Tue, 7 Aug 2012 09:56:48 +0000 (13:56 +0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 12 Sep 2012 04:49:29 +0000 (00:49 -0400)
... instead of mounting underlying ldiskfs directly.
so, now OSD can be any depending on osd= mount option.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Iab1ca6ca9f84ba6c708e05cd85c4de0a51d34de6
Reviewed-on: http://review.whamcloud.com/3650
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
16 files changed:
lustre/include/lustre_disk.h
lustre/include/obd.h
lustre/mdd/mdd_device.c
lustre/mds/handler.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/obdclass/genops.c
lustre/obdclass/obd_mount.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_internal.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_lproc.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-zfs/osd_handler.c
lustre/osd-zfs/osd_lproc.c

index fbc945a..f91b21c 100644 (file)
@@ -490,9 +490,13 @@ struct lustre_sb_info {
         struct obd_device        *lsi_mgc;     /* mgc obd */
         struct lustre_mount_data *lsi_lmd;     /* mount command info */
         struct ll_sb_info        *lsi_llsbi;   /* add'l client sbi info */
+       struct dt_device         *lsi_dt_dev;  /* dt device to access disk fs*/
         struct vfsmount          *lsi_srv_mnt; /* the one server mount */
         cfs_atomic_t              lsi_mounts;  /* references to the srv_mnt */
        char                      lsi_svname[MTI_NAME_MAXLEN];
+       char                      lsi_osd_obdname[64];
+       char                      lsi_osd_uuid[64];
+       struct obd_export        *lsi_osd_exp;
        char                      lsi_osd_type[16];
        char                      lsi_fstype[16];
         struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
index bc1883d..6490a04 100644 (file)
@@ -836,7 +836,7 @@ struct niobuf_local {
 
 #define LUSTRE_CMM_NAME         "cmm"
 #define LUSTRE_MDD_NAME         "mdd"
-#define LUSTRE_OSD_NAME         "osd-ldiskfs"
+#define LUSTRE_OSD_LDISKFS_NAME        "osd-ldiskfs"
 #define LUSTRE_OSD_ZFS_NAME     "osd-zfs"
 #define LUSTRE_VVP_NAME         "vvp"
 #define LUSTRE_LMV_NAME         "lmv"
index 34b3091..f61df7c 100644 (file)
@@ -94,7 +94,6 @@ static struct lu_device *mdd_device_fini(const struct lu_env *env,
                                          struct lu_device *d)
 {
         struct mdd_device *mdd = lu2mdd_dev(d);
-        struct lu_device *next = &mdd->mdd_child->dd_lu_dev;
         int rc;
 
         rc = mdd_procfs_fini(mdd);
@@ -102,7 +101,7 @@ static struct lu_device *mdd_device_fini(const struct lu_env *env,
                 CERROR("proc fini error %d \n", rc);
                 return ERR_PTR(rc);
         }
-        return next;
+       return NULL;
 }
 
 static void mdd_changelog_fini(const struct lu_env *env,
@@ -1281,13 +1280,12 @@ static struct lu_device *mdd_device_free(const struct lu_env *env,
                                          struct lu_device *lu)
 {
         struct mdd_device *m = lu2mdd_dev(lu);
-        struct lu_device  *next = &m->mdd_child->dd_lu_dev;
         ENTRY;
 
         LASSERT(cfs_atomic_read(&lu->ld_ref) == 0);
         md_device_fini(&m->mdd_md_dev);
         OBD_FREE_PTR(m);
-        RETURN(next);
+       RETURN(NULL);
 }
 
 static struct obd_ops mdd_obd_device_ops = {
index 71ea401..182100e 100644 (file)
@@ -312,7 +312,6 @@ static int mds_cmd_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
          * OSD did mount already, so put mount back
          */
         cfs_atomic_dec(&lsi->lsi_mounts);
-        mntput(mnt);
         cfs_init_rwsem(&mds->mds_notify_lock);
 
        obd->obd_fsops = fsfilt_get_ops(lsi->lsi_fstype);
index d2e5312..c7e1b68 100644 (file)
@@ -4360,6 +4360,8 @@ static void mdt_stack_fini(const struct lu_env *env,
         lu_stack_fini(env, top);
         m->mdt_child = NULL;
         m->mdt_bottom = NULL;
+
+       obd_disconnect(m->mdt_bottom_exp);
 }
 
 static struct lu_device *mdt_layer_setup(struct lu_env *env,
@@ -4423,6 +4425,41 @@ out:
         return ERR_PTR(rc);
 }
 
+static int mdt_connect_to_next(const struct lu_env *env, struct mdt_device *m,
+                              const char *next, struct obd_export **exp)
+{
+       struct obd_connect_data *data = NULL;
+       struct obd_device       *obd;
+       int                      rc;
+       ENTRY;
+
+       OBD_ALLOC_PTR(data);
+       if (data == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       obd = class_name2obd(next);
+       if (obd == NULL) {
+               CERROR("%s: can't locate next device: %s\n",
+                      m->mdt_md_dev.md_lu_dev.ld_obd->obd_name, next);
+               GOTO(out, rc = -ENOTCONN);
+       }
+
+       data->ocd_connect_flags = OBD_CONNECT_VERSION;
+       data->ocd_version = LUSTRE_VERSION_CODE;
+
+       rc = obd_connect(NULL, exp, obd, &obd->obd_uuid, data, NULL);
+       if (rc) {
+               CERROR("%s: cannot connect to next dev %s (%d)\n",
+                      m->mdt_md_dev.md_lu_dev.ld_obd->obd_name, next, rc);
+               GOTO(out, rc);
+       }
+
+out:
+       if (data)
+               OBD_FREE_PTR(data);
+       RETURN(rc);
+}
+
 static int mdt_stack_init(struct lu_env *env,
                           struct mdt_device *m,
                           struct lustre_cfg *cfg,
@@ -4432,16 +4469,27 @@ static int mdt_stack_init(struct lu_env *env,
         struct lu_device  *tmp;
         struct md_device  *md;
         struct lu_device  *child_lu_dev;
+       char              *osdname;
         int rc;
         ENTRY;
 
-        /* init the stack */
-        tmp = mdt_layer_setup(env, LUSTRE_OSD_NAME, d, cfg);
-        if (IS_ERR(tmp)) {
-                RETURN(PTR_ERR(tmp));
-        }
-        m->mdt_bottom = lu2dt_dev(tmp);
-        d = tmp;
+       /* find bottom osd */
+       OBD_ALLOC(osdname, MTI_NAME_MAXLEN);
+       if (osdname == NULL)
+               RETURN(-ENOMEM);
+
+       snprintf(osdname, MTI_NAME_MAXLEN, "%s-osd", lustre_cfg_string(cfg, 0));
+       rc = mdt_connect_to_next(env, m, osdname, &m->mdt_bottom_exp);
+       OBD_FREE(osdname, MTI_NAME_MAXLEN);
+       if (rc)
+               RETURN(rc);
+
+       tmp = m->mdt_bottom_exp->exp_obd->obd_lu_dev;
+       LASSERT(tmp);
+       m->mdt_bottom = lu2dt_dev(tmp);
+       tmp->ld_site = d->ld_site;
+       d = tmp;
+
         tmp = mdt_layer_setup(env, LUSTRE_MDD_NAME, d, cfg);
         if (IS_ERR(tmp)) {
                 GOTO(out, rc = PTR_ERR(tmp));
@@ -4598,6 +4646,8 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m)
         }
         LASSERT(cfs_atomic_read(&d->ld_ref) == 0);
 
+       server_put_mount(mdt2obd_dev(m)->obd_name, NULL);
+
         EXIT;
 }
 
@@ -4670,7 +4720,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
         m->mdt_som_conf = 0;
 
         m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
-        lmi = server_get_mount_2(dev);
+       lmi = server_get_mount(dev);
         if (lmi == NULL) {
                 CERROR("Cannot get mount info for %s!\n", dev);
                 RETURN(-EFAULT);
@@ -4799,9 +4849,6 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                 GOTO(err_llog_cleanup, rc);
 #endif
 
-        server_put_mount_2(dev, lmi->lmi_mnt);
-        lmi = NULL;
-
         rc = next->md_ops->mdo_iocontrol(env, next, OBD_IOC_GET_MNTOPT, 0,
                                          &mntopts);
         if (rc)
@@ -4898,8 +4945,8 @@ err_lu_site:
 err_free_site:
         OBD_FREE_PTR(mite);
 err_lmi:
-        if (lmi)
-                server_put_mount_2(dev, lmi->lmi_mnt);
+       if (lmi)
+               server_put_mount(dev, lmi->lmi_mnt);
         return (rc);
 }
 
index 52ba781..e15524d 100644 (file)
@@ -111,8 +111,10 @@ struct mdt_device {
         /* ptlrpc handle for MDS->client connections (for lock ASTs). */
         struct ptlrpc_client      *mdt_ldlm_client;
         /* underlying device */
+       struct obd_export         *mdt_child_exp;
         struct md_device          *mdt_child;
         struct dt_device          *mdt_bottom;
+       struct obd_export         *mdt_bottom_exp;
         /** target device */
         struct lu_target           mdt_lut;
         /*
index 8a141ab..3d6a145 100644 (file)
@@ -112,6 +112,7 @@ struct obd_type *class_search_type(const char *name)
         cfs_spin_unlock(&obd_types_lock);
         return NULL;
 }
+EXPORT_SYMBOL(class_search_type);
 
 struct obd_type *class_get_type(const char *name)
 {
index f844734..368256a 100644 (file)
@@ -172,8 +172,6 @@ struct lustre_mount_info *server_get_mount(const char *name)
         }
         lsi = s2lsi(lmi->lmi_sb);
 
-       if (lmi->lmi_mnt)
-               mntget(lmi->lmi_mnt);
         cfs_atomic_inc(&lsi->lsi_mounts);
 
         CDEBUG(D_MOUNT, "get_mnt %p from %s, refs=%d, vfscount=%d\n",
@@ -204,22 +202,6 @@ struct lustre_mount_info *server_get_mount_2(const char *name)
 }
 EXPORT_SYMBOL(server_get_mount_2);
 
-static void unlock_mntput(struct vfsmount *mnt)
-{
-#ifdef HAVE_KERNEL_LOCKED
-       /* for kernel < 2.6.37 */
-       if (kernel_locked()) {
-               unlock_kernel();
-               mntput(mnt);
-               lock_kernel();
-       } else {
-               mntput(mnt);
-       }
-#else
-       mntput(mnt);
-#endif
-}
-
 static int lustre_put_lsi(struct super_block *sb);
 
 /* to be called from obd_cleanup methods */
@@ -230,12 +212,6 @@ int server_put_mount(const char *name, struct vfsmount *mnt)
        int count = 0;
         ENTRY;
 
-        /* This might be the last one, can't deref after this */
-       if (mnt) {
-               count = mnt_get_count(mnt) - 1;
-               unlock_mntput(mnt);
-       }
-
         cfs_mutex_lock(&lustre_mount_info_lock);
         lmi = server_find_mount(name);
         cfs_mutex_unlock(&lustre_mount_info_lock);
@@ -244,7 +220,6 @@ int server_put_mount(const char *name, struct vfsmount *mnt)
                 RETURN(-ENOENT);
         }
         lsi = s2lsi(lmi->lmi_sb);
-        LASSERT(lmi->lmi_mnt == mnt);
 
         CDEBUG(D_MOUNT, "put_mnt %p from %s, refs=%d, vfscount=%d\n",
                lmi->lmi_mnt, name, cfs_atomic_read(&lsi->lsi_mounts), count);
@@ -398,22 +373,22 @@ EXPORT_SYMBOL(do_lcfg);
  * obd type-specific methods.
  */
 static int lustre_start_simple(char *obdname, char *type, char *uuid,
-                               char *s1, char *s2)
+                              char *s1, char *s2, char *s3, char *s4)
 {
-        int rc;
-        CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
+       int rc;
+       CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
 
-        rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
-        if (rc) {
-                CERROR("%s attach error %d\n", obdname, rc);
-                return(rc);
-        }
-        rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, 0, 0);
-        if (rc) {
-                CERROR("%s setup error %d\n", obdname, rc);
-                do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
-        }
-        return rc;
+       rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, 0, 0);
+       if (rc) {
+               CERROR("%s attach error %d\n", obdname, rc);
+               return(rc);
+       }
+       rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
+       if (rc) {
+               CERROR("%s setup error %d\n", obdname, rc);
+               do_lcfg(obdname, 0, LCFG_DETACH, 0, 0, 0, 0);
+       }
+       return rc;
 }
 
 /* Set up a MGS to serve startup logs */
@@ -442,7 +417,7 @@ static int server_start_mgs(struct super_block *sb)
 
         if (!rc) {
                 rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME,
-                                         LUSTRE_MGS_OBDNAME, 0, 0);
+                                        LUSTRE_MGS_OBDNAME, 0, 0, 0, 0);
                 /* Do NOT call server_deregister_mount() here. This leads to
                  * inability cleanup cleanly and free lsi and other stuff when
                  * mgs calls server_put_mount() in error handling case. -umka */
@@ -658,7 +633,7 @@ static int lustre_start_mgc(struct super_block *sb)
         /* Start the MGC */
         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
-                                 niduuid);
+                                niduuid, 0, 0);
         OBD_FREE_PTR(uuid);
         if (rc)
                 GOTO(out_free, rc);
@@ -1201,7 +1176,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
                         rc = lustre_start_simple(LUSTRE_OSS_OBDNAME,
                                                  LUSTRE_OSS_NAME,
                                                  LUSTRE_OSS_OBDNAME"_uuid",
-                                                 0, 0);
+                                                0, 0, 0, 0);
                         if (rc) {
                                 cfs_mutex_unlock(&server_start_lock);
                                 CERROR("failed to start OSS: %d\n", rc);
@@ -1360,6 +1335,11 @@ static int lustre_put_lsi(struct super_block *sb)
 
         CDEBUG(D_MOUNT, "put %p %d\n", sb, cfs_atomic_read(&lsi->lsi_mounts));
         if (cfs_atomic_dec_and_test(&lsi->lsi_mounts)) {
+               if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
+                       obd_disconnect(lsi->lsi_osd_exp);
+                       /* wait till OSD is gone */
+                       obd_zombie_barrier();
+               }
                 lustre_free_lsi(sb);
                 RETURN(1);
         }
@@ -1394,7 +1374,7 @@ static int lsi_prepare(struct lustre_sb_info *lsi)
 
                strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type);
        } else {
-               strcpy(lsi->lsi_osd_type, LUSTRE_OSD_NAME);
+               strcpy(lsi->lsi_osd_type, LUSTRE_OSD_LDISKFS_NAME);
        }
 
        /* XXX: a temp. solution for components using fsfilt
@@ -1439,151 +1419,12 @@ static int lsi_prepare(struct lustre_sb_info *lsi)
 
 /*************** server mount ******************/
 
-/** Kernel mount using mount options in MOUNT_DATA_FILE.
- * Since this file lives on the disk, we pre-mount using a common
- * type, read the file, then re-mount using the type specified in the
- * file.
- */
-static struct vfsmount *server_kernel_mount(struct super_block *sb)
-{
-        struct lustre_sb_info *lsi = s2lsi(sb);
-        struct lustre_mount_data *lmd = lsi->lsi_lmd;
-        struct vfsmount *mnt;
-        struct file_system_type *type;
-        char *options = NULL;
-        unsigned long page, s_flags;
-        struct page *__page;
-        int len;
-        int rc;
-        ENTRY;
-
-       rc = lsi_prepare(lsi);
-       if (rc)
-               RETURN(ERR_PTR(rc));
-
-       if (strcmp(lmd->lmd_osd_type, "osd-ldiskfs") == 0) {
-               /* with ldiskfs we're still mounting in the kernel space */
-               OBD_FREE(lmd->lmd_osd_type,
-                        strlen(lmd->lmd_osd_type) + 1);
-               lmd->lmd_osd_type = NULL;
-       } else {
-               /* non-ldiskfs backends (zfs) do mounting internally */
-               RETURN(NULL);
-       }
-
-        /* In the past, we have always used flags = 0.
-           Note ext3/ldiskfs can't be mounted ro. */
-        s_flags = sb->s_flags;
-
-        /* allocate memory for options */
-        OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
-        if (!__page)
-                GOTO(out_free, rc = -ENOMEM);
-        page = (unsigned long)cfs_page_address(__page);
-        options = (char *)page;
-        memset(options, 0, CFS_PAGE_SIZE);
-
-        /* Glom up mount options */
-        memset(options, 0, CFS_PAGE_SIZE);
-       strncpy(options, lsi->lsi_lmd->lmd_opts, CFS_PAGE_SIZE - 2);
-
-        len = CFS_PAGE_SIZE - strlen(options) - 2;
-        if (*options != 0)
-                strcat(options, ",");
-        strncat(options, "no_mbcache", len);
-
-        /* Add in any mount-line options */
-        if (lmd->lmd_opts && (*(lmd->lmd_opts) != 0)) {
-                len = CFS_PAGE_SIZE - strlen(options) - 2;
-                strcat(options, ",");
-                strncat(options, lmd->lmd_opts, len);
-        }
-
-        /* Special permanent mount flags */
-       if (IS_OST(lsi))
-            s_flags |= MS_NOATIME | MS_NODIRATIME;
-
-        CDEBUG(D_MOUNT, "kern_mount: %s %s %s\n",
-              lsi->lsi_osd_type, lmd->lmd_dev, options);
-       type = get_fs_type(lsi->lsi_fstype);
-        if (!type) {
-                CERROR("get_fs_type failed\n");
-                GOTO(out_free, rc = -ENODEV);
-        }
-        mnt = vfs_kern_mount(type, s_flags, lmd->lmd_dev, (void *)options);
-        cfs_module_put(type->owner);
-        if (IS_ERR(mnt)) {
-                rc = PTR_ERR(mnt);
-                CERROR("vfs_kern_mount failed: rc = %d\n", rc);
-                GOTO(out_free, rc);
-        }
-
-        if (lmd->lmd_flags & LMD_FLG_ABORT_RECOV)
-                simple_truncate(mnt->mnt_sb->s_root, mnt, LAST_RCVD,
-                                LR_CLIENT_START);
-
-        OBD_PAGE_FREE(__page);
-        CDEBUG(D_SUPER, "%s: mnt = %p\n", lmd->lmd_dev, mnt);
-        RETURN(mnt);
-
-out_free:
-        if (__page)
-                OBD_PAGE_FREE(__page);
-        RETURN(ERR_PTR(rc));
-}
-
-/** Wait here forever until the mount refcount is 0 before completing umount,
- * else we risk dereferencing a null pointer.
- * LNET may take e.g. 165s before killing zombies.
- */
-static void server_wait_finished(struct vfsmount *mnt)
-{
-       cfs_waitq_t             waitq;
-       int                     rc, waited = 0;
-       cfs_sigset_t            blocked;
-
-       if (mnt == NULL) {
-               cfs_waitq_init(&waitq);
-               cfs_waitq_wait_event_interruptible_timeout(waitq, 0,
-                                               cfs_time_seconds(3), rc);
-               return;
-       }
-
-       LASSERT(mnt);
-       cfs_waitq_init(&waitq);
-
-       while (mnt_get_count(mnt) > 1) {
-               if (waited && (waited % 30 == 0))
-                       LCONSOLE_WARN("Mount still busy with %d refs after "
-                                      "%d secs.\n",
-                                      mnt_get_count(mnt),
-                                      waited);
-               /* Cannot use l_event_wait() for an interruptible sleep. */
-               waited += 3;
-               blocked = cfs_block_sigsinv(sigmask(SIGKILL));
-               cfs_waitq_wait_event_interruptible_timeout(
-                       waitq,
-                       (mnt_get_count(mnt) == 1),
-                       cfs_time_seconds(3),
-                       rc);
-               cfs_restore_sigs(blocked);
-               if (rc < 0) {
-                       LCONSOLE_EMERG("Danger: interrupted umount %s with "
-                               "%d refs!\n", mnt_get_devname(mnt),
-                               mnt_get_count(mnt));
-                       break;
-               }
-
-       }
-}
-
 /** Start the shutdown of servers at umount.
  */
 static void server_put_super(struct super_block *sb)
 {
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct obd_device     *obd;
-        struct vfsmount       *mnt = lsi->lsi_srv_mnt;
         char *tmpname, *extraname = NULL;
         int tmpname_sz;
         int lsiflags = lsi->lsi_flags;
@@ -1641,13 +1482,10 @@ static void server_put_super(struct super_block *sb)
         /* Clean the mgc and sb */
         lustre_common_put_super(sb);
 
-        /* Wait for the targets to really clean up - can't exit (and let the
-           sb get destroyed) while the mount is still in use */
-        server_wait_finished(mnt);
-
-        /* drop the One True Mount */
-       if (mnt)
-               unlock_mntput(mnt);
+       /* wait till all in-progress cleanups are done
+        * specifically we're interested in ofd cleanup
+        * as it pins OSS */
+       obd_zombie_barrier();
 
        /* Stop the servers (MDS, OSS) if no longer needed.  We must wait
           until the target is really gone so that our type refcount check
@@ -1772,6 +1610,58 @@ static int server_fill_super_common(struct super_block *sb)
         RETURN(0);
 }
 
+static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags)
+{
+       struct lustre_mount_data *lmd = lsi->lsi_lmd;
+       struct obd_device        *obd;
+       struct dt_device_param    p;
+       char                      flagstr[16];
+       int                       rc;
+       ENTRY;
+
+       CDEBUG(D_MOUNT,
+              "Attempting to start %s, type=%s, lsifl=%x, mountfl=%lx\n",
+              lsi->lsi_svname, lsi->lsi_osd_type, lsi->lsi_flags, mflags);
+
+       sprintf(lsi->lsi_osd_obdname, "%s-osd", lsi->lsi_svname);
+       strcpy(lsi->lsi_osd_uuid, lsi->lsi_osd_obdname);
+       strcat(lsi->lsi_osd_uuid, "_UUID");
+       sprintf(flagstr, "%lu:%lu", mflags, (unsigned long) lmd->lmd_flags);
+
+       obd = class_name2obd(lsi->lsi_osd_obdname);
+       if (obd == NULL) {
+               rc = lustre_start_simple(lsi->lsi_osd_obdname,
+                               lsi->lsi_osd_type,
+                               lsi->lsi_osd_uuid, lmd->lmd_dev,
+                               flagstr, lsi->lsi_lmd->lmd_opts,
+                               lsi->lsi_svname);
+               if (rc)
+                       GOTO(out, rc);
+               obd = class_name2obd(lsi->lsi_osd_obdname);
+               LASSERT(obd);
+       }
+
+       rc = obd_connect(NULL, &lsi->lsi_osd_exp, obd, &obd->obd_uuid, NULL, NULL);
+       if (rc) {
+               obd->obd_force = 1;
+               class_manual_cleanup(obd);
+               lsi->lsi_dt_dev = NULL;
+       }
+
+       /* XXX: to keep support old components relying on lsi_srv_mnt
+        *      we get this info from OSD just started */
+       LASSERT(obd->obd_lu_dev);
+       lsi->lsi_dt_dev = lu2dt_dev(obd->obd_lu_dev);
+       LASSERT(lsi->lsi_dt_dev);
+
+       dt_conf_get(NULL, lsi->lsi_dt_dev, &p);
+
+       lsi->lsi_srv_mnt = p.ddp_mnt;
+
+out:
+       RETURN(rc);
+}
+
 /** Fill in the superblock info for a Lustre server.
  * Mount the device with the correct options.
  * Read the on-disk config file.
@@ -1780,20 +1670,21 @@ static int server_fill_super_common(struct super_block *sb)
 static int server_fill_super(struct super_block *sb)
 {
         struct lustre_sb_info *lsi = s2lsi(sb);
-        struct vfsmount *mnt;
         int rc;
         ENTRY;
 
-        /* the One True Mount */
-        mnt = server_kernel_mount(sb);
-        if (IS_ERR(mnt)) {
-                rc = PTR_ERR(mnt);
-                CERROR("Unable to mount device %s: %d\n",
-                       lsi->lsi_lmd->lmd_dev, rc);
+       rc = lsi_prepare(lsi);
+       if (rc)
+               RETURN(rc);
+
+       /* Start low level OSD */
+       rc = osd_start(lsi, sb->s_flags);
+       if (rc) {
+               CERROR("Unable to start osd on %s: %d\n",
+                      lsi->lsi_lmd->lmd_dev, rc);
                 lustre_put_lsi(sb);
                 RETURN(rc);
-        }
-        lsi->lsi_srv_mnt = mnt;
+       }
 
        CDEBUG(D_MOUNT, "Found service %s on device %s\n",
               lsi->lsi_svname, lsi->lsi_lmd->lmd_dev);
@@ -1804,7 +1695,6 @@ static int server_fill_super(struct super_block *sb)
                                    " the disk journal.\n",
                                   lsi->lsi_svname);
                 lustre_put_lsi(sb);
-                unlock_mntput(mnt);
                 RETURN(-EALREADY);
         }
 
@@ -1823,7 +1713,7 @@ static int server_fill_super(struct super_block *sb)
         /* Set up all obd devices for service */
         if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOSVC) &&
                        (IS_OST(lsi) || IS_MDT(lsi))) {
-                rc = server_start_targets(sb, mnt);
+               rc = server_start_targets(sb, lsi->lsi_srv_mnt);
                 if (rc < 0) {
                         CERROR("Unable to start targets: %d\n", rc);
                         GOTO(out_mnt, rc);
index ac87ce5..b06c04b 100644 (file)
@@ -61,46 +61,74 @@ static struct lu_kmem_descr ofd_caches[] = {
        }
 };
 
+static int ofd_connect_to_next(const struct lu_env *env, struct ofd_device *m,
+                              const char *next, struct obd_export **exp)
+{
+       struct obd_connect_data *data = NULL;
+       struct obd_device       *obd;
+       int                      rc;
+       ENTRY;
+
+       OBD_ALLOC_PTR(data);
+       if (data == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       obd = class_name2obd(next);
+       if (obd == NULL) {
+               CERROR("%s: can't locate next device: %s\n",
+                      m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next);
+               GOTO(out, rc = -ENOTCONN);
+       }
+
+       data->ocd_connect_flags = OBD_CONNECT_VERSION;
+       data->ocd_version = LUSTRE_VERSION_CODE;
+
+       rc = obd_connect(NULL, exp, obd, &obd->obd_uuid, data, NULL);
+       if (rc) {
+               CERROR("%s: cannot connect to next dev %s: rc = %d\n",
+                      m->ofd_dt_dev.dd_lu_dev.ld_obd->obd_name, next, rc);
+               GOTO(out, rc);
+       }
+
+out:
+       if (data)
+               OBD_FREE_PTR(data);
+       RETURN(rc);
+}
+
 static int ofd_stack_init(const struct lu_env *env,
                          struct ofd_device *m, struct lustre_cfg *cfg)
 {
        struct lu_device        *ofd_lu = &m->ofd_dt_dev.dd_lu_dev;
        const char              *dev = lustre_cfg_string(cfg, 0);
-       struct obd_type         *type;
-       struct lu_device_type   *ldt;
        struct lu_device        *d;
        struct ofd_thread_info  *info = ofd_info(env);
        struct lustre_mount_info *lmi;
        int                      rc;
+       char                    *osdname;
 
        ENTRY;
 
-       lmi = server_get_mount_2(dev);
+       lmi = server_get_mount(dev);
        if (lmi == NULL) {
                CERROR("Cannot get mount info for %s!\n", dev);
                RETURN(-ENODEV);
        }
 
-       type = class_get_type(s2lsi(lmi->lmi_sb)->lsi_osd_type);
-       if (!type) {
-               CERROR("Unknown type: '%s'\n",
-                      s2lsi(lmi->lmi_sb)->lsi_osd_type);
-               RETURN(-ENODEV);
-       }
+       /* find bottom osd */
+       OBD_ALLOC(osdname, MTI_NAME_MAXLEN);
+       if (osdname == NULL)
+               RETURN(-ENOMEM);
 
-       ldt = type->typ_lu;
-       if (ldt == NULL) {
-               CERROR("type: '%s'\n", s2lsi(lmi->lmi_sb)->lsi_osd_type);
-               GOTO(out_type, rc = -EINVAL);
-       }
+       snprintf(osdname, MTI_NAME_MAXLEN, "%s-osd", dev);
+       rc = ofd_connect_to_next(env, m, osdname, &m->ofd_osd_exp);
+       OBD_FREE(osdname, MTI_NAME_MAXLEN);
+       if (rc)
+               RETURN(rc);
 
-       ldt->ldt_obd_type = type;
-       d = ldt->ldt_ops->ldto_device_alloc(env, ldt, cfg);
-       if (IS_ERR(d)) {
-               CERROR("Cannot allocate device: '%s'\n",
-                      s2lsi(lmi->lmi_sb)->lsi_osd_type);
-               GOTO(out_type, rc = -ENODEV);
-       }
+       d = m->ofd_osd_exp->exp_obd->obd_lu_dev;
+       LASSERT(d);
+       m->ofd_osd = lu2dt_dev(d);
 
        LASSERT(ofd_lu->ld_site);
        d->ld_site = ofd_lu->ld_site;
@@ -108,39 +136,6 @@ static int ofd_stack_init(const struct lu_env *env,
        snprintf(info->fti_u.name, sizeof(info->fti_u.name),
                 "%s-osd", lustre_cfg_string(cfg, 0));
 
-       type->typ_refcnt++;
-
-       rc = lu_env_refill((struct lu_env *)env);
-       if (rc != 0) {
-               CERROR("Failure to refill session: '%d'\n", rc);
-               GOTO(out_free, rc);
-       }
-
-       rc = ldt->ldt_ops->ldto_device_init(env, d, dev, NULL);
-       if (rc) {
-               CERROR("can't init device '%s', rc = %d\n",
-                      s2lsi(lmi->lmi_sb)->lsi_osd_type, rc);
-               GOTO(out_free, rc);
-       }
-       lu_device_get(d);
-       lu_ref_add(&d->ld_reference, "lu-stack", &lu_site_init);
-
-       m->ofd_osd = lu2dt_dev(d);
-
-       /* process setup config */
-       rc = d->ld_ops->ldo_process_config(env, d, cfg);
-       if (rc)
-               GOTO(out_fini, rc);
-
-       RETURN(rc);
-
-out_fini:
-       ldt->ldt_ops->ldto_device_fini(env, d);
-out_free:
-       type->typ_refcnt--;
-       ldt->ldt_ops->ldto_device_free(env, d);
-out_type:
-       class_put_type(type);
        RETURN(rc);
 }
 
@@ -173,7 +168,10 @@ static void ofd_stack_fini(const struct lu_env *env, struct ofd_device *m,
        top->ld_ops->ldo_process_config(env, top, lcfg);
        lustre_cfg_free(lcfg);
 
-       lu_stack_fini(env, &m->ofd_osd->dd_lu_dev);
+       lu_site_purge(env, top->ld_site, ~0);
+
+       LASSERT(m->ofd_osd_exp);
+       obd_disconnect(m->ofd_osd_exp);
        m->ofd_osd = NULL;
 
        EXIT;
@@ -601,10 +599,11 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m)
                d->ld_obd->obd_namespace = m->ofd_namespace = NULL;
        }
 
-       ofd_stack_fini(env, m, m->ofd_site.ls_top_dev);
+       ofd_stack_fini(env, m, &m->ofd_dt_dev.dd_lu_dev);
        lu_site_fini(&m->ofd_site);
        ofd_procfs_fini(m);
        LASSERT(cfs_atomic_read(&d->ld_ref) == 0);
+       server_put_mount(obd->obd_name, NULL);
        EXIT;
 }
 
index b5563cf..c94a7a2 100644 (file)
@@ -75,6 +75,7 @@ enum {
 struct ofd_device {
        struct dt_device         ofd_dt_dev;
        struct dt_device        *ofd_osd;
+       struct obd_export       *ofd_osd_exp;
        struct dt_device_param   ofd_dt_conf;
        /* DLM name-space for meta-data locks maintained by this server */
        struct ldlm_namespace   *ofd_namespace;
index b50d865..f1c0506 100644 (file)
@@ -976,7 +976,8 @@ static int osd_object_print(const struct lu_env *env, void *cookie,
                 d = o->oo_dir->od_container.ic_descr;
         else
                 d = NULL;
-        return (*p)(env, cookie, LUSTRE_OSD_NAME"-object@%p(i:%p:%lu/%u)[%s]",
+       return (*p)(env, cookie,
+                   LUSTRE_OSD_LDISKFS_NAME"-object@%p(i:%p:%lu/%u)[%s]",
                     o, o->oo_inode,
                     o->oo_inode ? o->oo_inode->i_ino : 0UL,
                     o->oo_inode ? o->oo_inode->i_generation : 0,
@@ -994,7 +995,7 @@ int osd_statfs(const struct lu_env *env, struct dt_device *d,
         struct kstatfs     *ksfs;
         int result = 0;
 
-       if (unlikely(osd->od_mount == NULL))
+       if (unlikely(osd->od_mnt == NULL))
                return -EINPROGRESS;
 
         /* osd_lproc.c call this without env, allocate ksfs for that case */
@@ -1038,6 +1039,7 @@ static void osd_conf_get(const struct lu_env *env,
         /*
          * XXX should be taken from not-yet-existing fs abstraction layer.
          */
+       param->ddp_mnt = osd_dt_dev(dev)->od_mnt;
         param->ddp_max_name_len = LDISKFS_NAME_LEN;
         param->ddp_max_nlink    = LDISKFS_LINK_MAX;
        param->ddp_block_shift  = sb->s_blocksize_bits;
@@ -1074,7 +1076,7 @@ static struct lu_buf *osd_buf_get(const struct lu_env *env, void *area, ssize_t
  */
 static int osd_sync(const struct lu_env *env, struct dt_device *d)
 {
-        CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_NAME);
+       CDEBUG(D_HA, "syncing OSD %s\n", LUSTRE_OSD_LDISKFS_NAME);
         return ldiskfs_force_commit(osd_sb(osd_dt_dev(d)));
 }
 
@@ -1096,7 +1098,7 @@ static int osd_commit_async(const struct lu_env *env,
         struct super_block *s = osd_sb(osd_dt_dev(d));
         ENTRY;
 
-        CDEBUG(D_HA, "async commit OSD %s\n", LUSTRE_OSD_NAME);
+       CDEBUG(D_HA, "async commit OSD %s\n", LUSTRE_OSD_LDISKFS_NAME);
         RETURN(s->s_op->sync_fs(s, 0));
 }
 
@@ -1110,7 +1112,7 @@ static int osd_ro(const struct lu_env *env, struct dt_device *d)
         int rc;
         ENTRY;
 
-        CERROR("*** setting device %s read-only ***\n", LUSTRE_OSD_NAME);
+       CERROR("*** setting %s read-only ***\n", osd_dt_dev(d)->od_svname);
 
         rc = __lvfs_set_rdonly(sb->s_bdev, LDISKFS_SB(sb)->journal_bdev);
         RETURN(rc);
@@ -4331,49 +4333,92 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 static int osd_mount(const struct lu_env *env,
                      struct osd_device *o, struct lustre_cfg *cfg)
 {
-        struct lustre_mount_info *lmi;
-        const char               *dev  = lustre_cfg_string(cfg, 0);
-        struct lustre_sb_info    *lsi;
-        int                       rc = 0;
-
+       const char              *name  = lustre_cfg_string(cfg, 0);
+       const char              *dev  = lustre_cfg_string(cfg, 1);
+       const char              *opts;
+       unsigned long            page, s_flags, lmd_flags = 0;
+       struct page             *__page;
+       struct file_system_type *type;
+       char                    *options = NULL;
+       char                    *str;
+       int                       rc = 0;
         ENTRY;
 
+       if (o->od_mnt != NULL)
+               RETURN(0);
+
         o->od_fsops = fsfilt_get_ops(mt_str(LDD_MT_LDISKFS));
         if (o->od_fsops == NULL) {
                 CERROR("Can't find fsfilt_ldiskfs\n");
                 RETURN(-ENOTSUPP);
         }
 
-        if (o->od_mount != NULL) {
-                CERROR("Already mounted (%s)\n", dev);
-                RETURN(-EEXIST);
-        }
+       OBD_PAGE_ALLOC(__page, CFS_ALLOC_STD);
+       if (__page == NULL)
+               RETURN(-ENOMEM);
+
+       str = lustre_cfg_string(cfg, 2);
+       s_flags = simple_strtoul(str, NULL, 0);
+       str = strstr(str, ":");
+       if (str)
+               lmd_flags = simple_strtoul(str + 1, NULL, 0);
+       opts = lustre_cfg_string(cfg, 3);
+       page = (unsigned long)cfs_page_address(__page);
+       options = (char *)page;
+       *options = '\0';
+       if (opts == NULL)
+               strcat(options, "user_xattr,acl");
+       else
+               strcat(options, opts);
 
-        /* get mount */
-        lmi = server_get_mount(dev);
-        if (lmi == NULL) {
-                CERROR("Cannot get mount info for %s!\n", dev);
-                RETURN(-EFAULT);
-        }
+       /* Glom up mount options */
+       if (*options != '\0')
+               strcat(options, ",");
+       strlcat(options, "no_mbcache", CFS_PAGE_SIZE);
 
-        LASSERT(lmi != NULL);
-        /* save lustre_mount_info in dt_device */
-        o->od_mount = lmi;
-        o->od_mnt = lmi->lmi_mnt;
+       type = get_fs_type("ldiskfs");
+       if (!type) {
+               CERROR("%s: cannot find ldiskfs module\n", name);
+               GOTO(out, rc = -ENODEV);
+       }
 
-        lsi = s2lsi(lmi->lmi_sb);
+       o->od_mnt = vfs_kern_mount(type, s_flags, dev, options);
+       cfs_module_put(type->owner);
 
-       if (lsi->lsi_flags & LDD_F_IAM_DIR) {
+       if (IS_ERR(o->od_mnt)) {
+               rc = PTR_ERR(o->od_mnt);
+               CERROR("%s: can't mount %s: %d\n", name, dev, rc);
+               o->od_mnt = NULL;
+               GOTO(out, rc);
+       }
+
+       if (lvfs_check_rdonly(o->od_mnt->mnt_sb->s_bdev)) {
+               CERROR("%s: underlying device %s is marked as read-only. "
+                      "Setup failed\n", name, dev);
+               mntput(o->od_mnt);
+               o->od_mnt = NULL;
+               GOTO(out, rc = -EROFS);
+       }
+
+       if (!LDISKFS_HAS_COMPAT_FEATURE(o->od_mnt->mnt_sb,
+           LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)) {
+               CERROR("%s: device %s is mounted w/o journal\n", name, dev);
+               mntput(o->od_mnt);
+               o->od_mnt = NULL;
+               GOTO(out, rc = -EINVAL);
+       }
+
+       if (lmd_flags & LMD_FLG_IAM) {
                 o->od_iop_mode = 0;
-                LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", dev);
+               LCONSOLE_WARN("%s: OSD: IAM mode enabled\n", name);
         } else
                 o->od_iop_mode = 1;
+       if (lmd_flags & LMD_FLG_NOSCRUB)
+               o->od_scrub.os_no_scrub = 1;
 
-       if (lsi->lsi_flags & LDD_F_SV_TYPE_OST) {
-                rc = osd_compat_init(o);
-                if (rc)
-                        CERROR("%s: can't initialize compats: %d\n", dev, rc);
-        }
+out:
+       if (__page)
+               OBD_PAGE_FREE(__page);
 
         RETURN(rc);
 }
@@ -4384,6 +4429,8 @@ static struct lu_device *osd_device_fini(const struct lu_env *env,
         int rc;
         ENTRY;
 
+       rc = osd_shutdown(env, osd_dev(d));
+
         osd_compat_fini(osd_dev(d));
 
         shrink_dcache_sb(osd_sb(osd_dev(d)));
@@ -4395,46 +4442,104 @@ static struct lu_device *osd_device_fini(const struct lu_env *env,
                 RETURN (ERR_PTR(rc));
         }
 
-        if (osd_dev(d)->od_mount)
-                server_put_mount(osd_dev(d)->od_mount->lmi_name,
-                                 osd_dev(d)->od_mount->lmi_mnt);
-        osd_dev(d)->od_mount = NULL;
+       if (osd_dev(d)->od_mnt) {
+               mntput(osd_dev(d)->od_mnt);
+               osd_dev(d)->od_mnt = NULL;
+       }
 
         RETURN(NULL);
 }
 
+static int osd_device_init0(const struct lu_env *env,
+                           struct osd_device *o,
+                           struct lustre_cfg *cfg)
+{
+       struct lu_device        *l = osd2lu_dev(o);
+       struct osd_thread_info *info;
+       int                     rc;
+
+       /* if the module was re-loaded, env can loose its keys */
+       rc = lu_env_refill((struct lu_env *) env);
+       if (rc)
+               GOTO(out, rc);
+       info = osd_oti_get(env);
+       LASSERT(info);
+
+       l->ld_ops = &osd_lu_ops;
+       o->od_dt_dev.dd_ops = &osd_dt_ops;
+
+       cfs_spin_lock_init(&o->od_osfs_lock);
+       cfs_mutex_init(&o->od_otable_mutex);
+       o->od_osfs_age = cfs_time_shift_64(-1000);
+
+       o->od_capa_hash = init_capa_hash();
+       if (o->od_capa_hash == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       o->od_read_cache = 1;
+       o->od_writethrough_cache = 1;
+
+       rc = osd_mount(env, o, cfg);
+       if (rc)
+               GOTO(out_capa, rc);
+
+       /* setup scrub, including OI files initialization */
+       rc = osd_scrub_setup(env, o);
+       if (rc < 0)
+               GOTO(out_mnt, rc);
+
+       strncpy(o->od_svname, lustre_cfg_string(cfg, 4),
+                       sizeof(o->od_svname) - 1);
+
+       if (strstr(o->od_svname, "-OST")) {
+               rc = osd_compat_init(o);
+               if (rc != 0)
+                       GOTO(out_mnt, rc);
+       }
+
+       rc = osd_procfs_init(o, o->od_svname);
+       if (rc != 0) {
+               CERROR("%s: can't initialize procfs: rc = %d\n",
+                      o->od_svname, rc);
+               GOTO(out_compat, rc);
+       }
+
+       RETURN(0);
+out_compat:
+       osd_compat_fini(o);
+out_mnt:
+       osd_oi_fini(info, o);
+       osd_shutdown(env, o);
+       mntput(o->od_mnt);
+       o->od_mnt = NULL;
+out_capa:
+       cleanup_capa_hash(o->od_capa_hash);
+out:
+       RETURN(rc);
+}
+
 static struct lu_device *osd_device_alloc(const struct lu_env *env,
                                           struct lu_device_type *t,
                                           struct lustre_cfg *cfg)
 {
-        struct lu_device  *l;
-        struct osd_device *o;
-
-        OBD_ALLOC_PTR(o);
-        if (o != NULL) {
-                int result;
-
-                result = dt_device_init(&o->od_dt_dev, t);
-                if (result == 0) {
-                        l = osd2lu_dev(o);
-                        l->ld_ops = &osd_lu_ops;
-                        o->od_dt_dev.dd_ops = &osd_dt_ops;
-                        cfs_spin_lock_init(&o->od_osfs_lock);
-                       cfs_mutex_init(&o->od_otable_mutex);
-                        o->od_osfs_age = cfs_time_shift_64(-1000);
-                        o->od_capa_hash = init_capa_hash();
-                        if (o->od_capa_hash == NULL) {
-                                dt_device_fini(&o->od_dt_dev);
-                                l = ERR_PTR(-ENOMEM);
-                        }
-                } else
-                        l = ERR_PTR(result);
+       struct osd_device *o;
+       int                rc;
 
-                if (IS_ERR(l))
-                        OBD_FREE_PTR(o);
-        } else
-                l = ERR_PTR(-ENOMEM);
-        return l;
+       OBD_ALLOC_PTR(o);
+       if (o == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       rc = dt_device_init(&o->od_dt_dev, t);
+       if (rc == 0) {
+               rc = osd_device_init0(env, o, cfg);
+               if (rc)
+                       dt_device_fini(&o->od_dt_dev);
+       }
+
+       if (unlikely(rc != 0))
+               OBD_FREE_PTR(o);
+
+       return rc == 0 ? osd2lu_dev(o) : ERR_PTR(rc);
 }
 
 static struct lu_device *osd_device_free(const struct lu_env *env,
@@ -4462,7 +4567,7 @@ static int osd_process_config(const struct lu_env *env,
                 break;
         case LCFG_CLEANUP:
                lu_dev_del_linkage(d->ld_site, d);
-                err = osd_shutdown(env, o);
+               err = 0;
                 break;
         default:
                 err = -ENOSYS;
@@ -4477,6 +4582,58 @@ static int osd_recovery_complete(const struct lu_env *env,
         RETURN(0);
 }
 
+/*
+ * we use exports to track all osd users
+ */
+static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp,
+                          struct obd_device *obd, struct obd_uuid *cluuid,
+                          struct obd_connect_data *data, void *localdata)
+{
+       struct osd_device    *osd = osd_dev(obd->obd_lu_dev);
+       struct lustre_handle  conn;
+       int                   rc;
+       ENTRY;
+
+       CDEBUG(D_CONFIG, "connect #%d\n", osd->od_connects);
+
+       rc = class_connect(&conn, obd, cluuid);
+       if (rc)
+               RETURN(rc);
+
+       *exp = class_conn2export(&conn);
+
+       cfs_spin_lock(&osd->od_osfs_lock);
+       osd->od_connects++;
+       cfs_spin_unlock(&osd->od_osfs_lock);
+
+       RETURN(0);
+}
+
+/*
+ * once last export (we don't count self-export) disappeared
+ * osd can be released
+ */
+static int osd_obd_disconnect(struct obd_export *exp)
+{
+       struct obd_device *obd = exp->exp_obd;
+       struct osd_device *osd = osd_dev(obd->obd_lu_dev);
+       int                rc, release = 0;
+       ENTRY;
+
+       /* Only disconnect the underlying layers on the final disconnect. */
+       cfs_spin_lock(&osd->od_osfs_lock);
+       osd->od_connects--;
+       if (osd->od_connects == 0)
+               release = 1;
+       cfs_spin_unlock(&osd->od_osfs_lock);
+
+       rc = class_disconnect(exp); /* bz 9811 */
+
+       if (rc == 0 && release)
+               class_manual_cleanup(obd);
+       RETURN(rc);
+}
+
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                        struct lu_device *dev)
 {
@@ -4484,11 +4641,6 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
        int                result;
        ENTRY;
 
-       /* 1. setup scrub, including OI files initialization */
-       result = osd_scrub_setup(env, osd);
-        if (result < 0)
-                RETURN(result);
-
        /* 2. setup quota slave instance */
        osd->od_quota_slave = qsd_init(env, osd->od_svname, &osd->od_dt_dev,
                                       osd->od_proc_entry);
@@ -4553,9 +4705,9 @@ static const struct lu_device_type_operations osd_device_type_ops = {
         .ldto_device_fini    = osd_device_fini
 };
 
-static struct lu_device_type osd_device_type = {
+struct lu_device_type osd_device_type = {
         .ldt_tags     = LU_DEVICE_DT,
-        .ldt_name     = LUSTRE_OSD_NAME,
+       .ldt_name     = LUSTRE_OSD_LDISKFS_NAME,
         .ldt_ops      = &osd_device_type_ops,
         .ldt_ctx_tags = LCT_LOCAL,
 };
@@ -4564,7 +4716,9 @@ static struct lu_device_type osd_device_type = {
  * lprocfs legacy support.
  */
 static struct obd_ops osd_obd_device_ops = {
-        .o_owner = THIS_MODULE
+       .o_owner = THIS_MODULE,
+       .o_connect      = osd_obd_connect,
+       .o_disconnect   = osd_obd_disconnect
 };
 
 static int __init osd_mod_init(void)
@@ -4574,16 +4728,16 @@ static int __init osd_mod_init(void)
         osd_oi_mod_init();
         lprocfs_osd_init_vars(&lvars);
         return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars,
-                                   LUSTRE_OSD_NAME, &osd_device_type);
+                                  LUSTRE_OSD_LDISKFS_NAME, &osd_device_type);
 }
 
 static void __exit osd_mod_exit(void)
 {
-        class_unregister_type(LUSTRE_OSD_NAME);
+       class_unregister_type(LUSTRE_OSD_LDISKFS_NAME);
 }
 
 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_NAME")");
+MODULE_DESCRIPTION("Lustre Object Storage Device ("LUSTRE_OSD_LDISKFS_NAME")");
 MODULE_LICENSE("GPL");
 
 cfs_module(osd, "0.1.0", osd_mod_init, osd_mod_exit);
index 3c5406b..939d539 100644 (file)
@@ -255,7 +255,6 @@ struct osd_device {
         /* super-class */
         struct dt_device          od_dt_dev;
         /* information about underlying file system */
-        struct lustre_mount_info *od_mount;
         struct vfsmount          *od_mnt;
         /* object index */
         struct osd_oi           **od_oi_table;
@@ -287,6 +286,7 @@ struct osd_device {
         __u32                     od_iop_mode;
 
         struct fsfilt_operations *od_fsops;
+       int                       od_connects;
 
         /*
          * mapping for legacy OST objids
@@ -751,7 +751,7 @@ static inline struct osd_device *osd_obj2dev(const struct osd_object *o)
 
 static inline struct super_block *osd_sb(const struct osd_device *dev)
 {
-        return dev->od_mount->lmi_mnt->mnt_sb;
+       return dev->od_mnt->mnt_sb;
 }
 
 static inline int osd_object_is_root(const struct osd_object *obj)
index d05bac6..6cb96a7 100644 (file)
@@ -250,12 +250,13 @@ static const char *osd_counter_names[] = {
 int osd_procfs_init(struct osd_device *osd, const char *name)
 {
         struct lprocfs_static_vars lvars;
-        struct lu_device    *ld = &osd->od_dt_dev.dd_lu_dev;
         struct obd_type     *type;
         int                  rc;
         ENTRY;
 
-        type = ld->ld_type->ldt_obd_type;
+       /* at the moment there is no linkage between lu_type
+        * and obd_type, so we lookup obd_type this way */
+       type = class_search_type(LUSTRE_OSD_LDISKFS_NAME);
 
         LASSERT(name != NULL);
         LASSERT(type != NULL);
@@ -325,14 +326,14 @@ static int lprocfs_osd_rd_mntdev(char *page, char **start, off_t off, int count,
         struct osd_device *osd = osd_dt_dev(data);
 
         LASSERT(osd != NULL);
-        if (unlikely(osd->od_mount == NULL))
+       if (unlikely(osd->od_mnt == NULL))
                 return -EINPROGRESS;
 
-       LASSERT(mnt_get_devname(osd->od_mount->lmi_mnt));
+       LASSERT(mnt_get_devname(osd->od_mnt));
        *eof = 1;
 
        return snprintf(page, count, "%s\n",
-                       mnt_get_devname(osd->od_mount->lmi_mnt));
+                       mnt_get_devname(osd->od_mnt));
 }
 
 #ifdef HAVE_LDISKFS_PDO
@@ -366,7 +367,7 @@ static int lprocfs_osd_rd_auto_scrub(char *page, char **start, off_t off,
        struct osd_device *dev = data;
 
        LASSERT(dev != NULL);
-       if (unlikely(dev->od_mount == NULL))
+       if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        *eof = 1;
@@ -380,7 +381,7 @@ static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer,
        int val, rc;
 
        LASSERT(dev != NULL);
-       if (unlikely(dev->od_mount == NULL))
+       if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        rc = lprocfs_write_helper(buffer, count, &val);
@@ -397,7 +398,7 @@ static int lprocfs_osd_rd_oi_scrub(char *page, char **start, off_t off,
        struct osd_device *dev = data;
 
        LASSERT(dev != NULL);
-       if (unlikely(dev->od_mount == NULL))
+       if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        *eof = 1;
index 65157f4..98827d7 100644 (file)
@@ -1042,8 +1042,6 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        cfs_init_rwsem(&scrub->os_rwsem);
        cfs_spin_lock_init(&scrub->os_lock);
        CFS_INIT_LIST_HEAD(&scrub->os_inconsistent_items);
-       if (get_mount_flags(dev->od_mount->lmi_sb) & LMD_FLG_NOSCRUB)
-               scrub->os_no_scrub = 1;
 
        push_ctxt(&saved, ctxt, NULL);
        filp = filp_open(osd_scrub_name, O_RDWR | O_CREAT, 0644);
index e4f97cc..fedf1e9 100644 (file)
@@ -340,6 +340,8 @@ static void osd_conf_get(const struct lu_env *env,
        param->ddp_inodespace = OSD_DNODE_EST_COUNT;
        /* per-fragment overhead to be used by the client code */
        param->ddp_grant_frag = udmu_blk_insert_cost();
+
+       param->ddp_mnt = NULL;
 }
 
 /*
@@ -483,30 +485,20 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o)
 static int osd_mount(const struct lu_env *env,
                     struct osd_device *o, struct lustre_cfg *cfg)
 {
-       char                            *dev  = lustre_cfg_string(cfg, 0);
-       struct lustre_mount_info        *lmi;
-       struct lustre_sb_info           *lsi;
-       dmu_buf_t                       *rootdb;
-       int                              rc;
+       char      *dev  = lustre_cfg_string(cfg, 1);
+       dmu_buf_t *rootdb;
+       int        rc;
        ENTRY;
 
        if (o->od_objset.os != NULL)
                RETURN(0);
 
-       lmi = server_get_mount(dev);
-       if (lmi == NULL) {
-               CERROR("Unknown mount point: '%s'\n", dev);
-               RETURN(-ENODEV);
-       }
-
-       lsi = s2lsi(lmi->lmi_sb);
-       dev = lsi->lsi_lmd->lmd_dev;
-
        if (strlen(dev) >= sizeof(o->od_mntdev))
                RETURN(-E2BIG);
 
        strcpy(o->od_mntdev, dev);
-       strcpy(o->od_svname, lsi->lsi_svname);
+       strncpy(o->od_svname, lustre_cfg_string(cfg, 4),
+               sizeof(o->od_svname) - 1);
 
        rc = -udmu_objset_open(o->od_mntdev, &o->od_objset);
        if (rc) {
@@ -587,28 +579,36 @@ out:
        RETURN(rc);
 }
 
+static struct lu_device *osd_device_fini(const struct lu_env *env,
+                                        struct lu_device *dev);
+
 static struct lu_device *osd_device_alloc(const struct lu_env *env,
-                                         struct lu_device_type *t,
+                                         struct lu_device_type *type,
                                          struct lustre_cfg *cfg)
 {
-       struct osd_device       *o;
-       int                      rc;
+       struct osd_device *dev;
+       int                rc;
 
-       OBD_ALLOC_PTR(o);
-       if (o == NULL)
+       OBD_ALLOC_PTR(dev);
+       if (dev == NULL)
                return ERR_PTR(-ENOMEM);
 
-       rc = dt_device_init(&o->od_dt_dev, t);
+       rc = dt_device_init(&dev->od_dt_dev, type);
        if (rc == 0) {
-               rc = osd_device_init0(env, o, cfg);
+               rc = osd_device_init0(env, dev, cfg);
+               if (rc == 0) {
+                       rc = osd_mount(env, dev, cfg);
+                       if (rc)
+                               osd_device_fini(env, osd2lu_dev(dev));
+               }
                if (rc)
-                       dt_device_fini(&o->od_dt_dev);
+                       dt_device_fini(&dev->od_dt_dev);
        }
 
        if (unlikely(rc != 0))
-               OBD_FREE_PTR(o);
+               OBD_FREE_PTR(dev);
 
-       return rc == 0 ? osd2lu_dev(o) : ERR_PTR(rc);
+       return rc == 0 ? osd2lu_dev(dev) : ERR_PTR(rc);
 }
 
 static struct lu_device *osd_device_free(const struct lu_env *env,
@@ -631,9 +631,8 @@ static struct lu_device *osd_device_free(const struct lu_env *env,
 static struct lu_device *osd_device_fini(const struct lu_env *env,
                                         struct lu_device *d)
 {
-       struct osd_device        *o = osd_dev(d);
-       struct lustre_mount_info *lmi;
-       int rc;
+       struct osd_device *o = osd_dev(d);
+       int                rc;
        ENTRY;
 
 
@@ -655,10 +654,6 @@ static struct lu_device *osd_device_fini(const struct lu_env *env,
        if (o->od_objset.os)
                osd_umount(env, o);
 
-       lmi = server_get_mount_2(o->od_svname);
-       LASSERT(lmi);
-       server_put_mount(lmi->lmi_name, lmi->lmi_mnt);
-
        RETURN(NULL);
 }
 
@@ -699,6 +694,58 @@ static int osd_recovery_complete(const struct lu_env *env, struct lu_device *d)
        RETURN(0);
 }
 
+/*
+ * we use exports to track all osd users
+ */
+static int osd_obd_connect(const struct lu_env *env, struct obd_export **exp,
+                          struct obd_device *obd, struct obd_uuid *cluuid,
+                          struct obd_connect_data *data, void *localdata)
+{
+       struct osd_device    *osd = osd_dev(obd->obd_lu_dev);
+       struct lustre_handle  conn;
+       int                   rc;
+       ENTRY;
+
+       CDEBUG(D_CONFIG, "connect #%d\n", osd->od_connects);
+
+       rc = class_connect(&conn, obd, cluuid);
+       if (rc)
+               RETURN(rc);
+
+       *exp = class_conn2export(&conn);
+
+       cfs_spin_lock(&osd->od_objset.lock);
+       osd->od_connects++;
+       cfs_spin_unlock(&osd->od_objset.lock);
+
+       RETURN(0);
+}
+
+/*
+ * once last export (we don't count self-export) disappeared
+ * osd can be released
+ */
+static int osd_obd_disconnect(struct obd_export *exp)
+{
+       struct obd_device *obd = exp->exp_obd;
+       struct osd_device *osd = osd_dev(obd->obd_lu_dev);
+       int                rc, release = 0;
+       ENTRY;
+
+       /* Only disconnect the underlying layers on the final disconnect. */
+       cfs_spin_lock(&osd->od_objset.lock);
+       osd->od_connects--;
+       if (osd->od_connects == 0)
+               release = 1;
+       cfs_spin_unlock(&osd->od_objset.lock);
+
+       rc = class_disconnect(exp); /* bz 9811 */
+
+       if (rc == 0 && release)
+               class_manual_cleanup(obd);
+       RETURN(rc);
+}
+
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                       struct lu_device *dev)
 {
@@ -756,6 +803,8 @@ static struct lu_device_type osd_device_type = {
 
 static struct obd_ops osd_obd_device_ops = {
        .o_owner       = THIS_MODULE,
+       .o_connect      = osd_obd_connect,
+       .o_disconnect   = osd_obd_disconnect
 };
 
 int __init osd_init(void)
index c050acb..6ce8a3b 100644 (file)
@@ -44,6 +44,7 @@
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <obd.h>
+#include <obd_class.h>
 #include <lprocfs_status.h>
 #include <lu_time.h>
 
@@ -190,14 +191,17 @@ struct lprocfs_vars lprocfs_osd_module_vars[] = {
 
 int osd_procfs_init(struct osd_device *osd, const char *name)
 {
-       struct lu_device    *ld = &osd->od_dt_dev.dd_lu_dev;
-       struct obd_type     *type = ld->ld_type->ldt_obd_type;
-       int                  rc;
+       struct obd_type *type;
+       int              rc;
        ENTRY;
 
        if (osd->od_proc_entry)
                RETURN(0);
 
+       /* at the moment there is no linkage between lu_type
+        * and obd_type, so we lookup obd_type this way */
+       type = class_search_type(LUSTRE_OSD_ZFS_NAME);
+
        LASSERT(name != NULL);
        LASSERT(type != NULL);